All of lore.kernel.org
 help / color / mirror / Atom feed
* RFC Breaking up GEM struct_mutex for async-pages
@ 2019-03-06 14:24 Chris Wilson
  2019-03-06 14:24 ` [PATCH 01/43] drm/i915/selftests: Canonicalise gen8 addresses Chris Wilson
                   ` (45 more replies)
  0 siblings, 46 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Preparation changes to adapt some of the GEM locking in preparation for
async-pages. Feedback on the GEM changes appreciated.
-Chris


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 78+ messages in thread

* [PATCH 01/43] drm/i915/selftests: Canonicalise gen8 addresses
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-06 14:24 ` [PATCH 02/43] drm/i915: Force GPU idle on suspend Chris Wilson
                   ` (44 subsequent siblings)
  45 siblings, 0 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala, matthew.auld

For igt_vm_isolation, we write into the whole 48b address space, and not
just our usual low 4G of global GTT. For these MI operations, play safe
and ensure we use the canonical address form.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    | 19 -----------------
 drivers/gpu/drm/i915/intel_gpu_commands.h     | 21 +++++++++++++++++++
 .../gpu/drm/i915/selftests/i915_gem_context.c |  2 ++
 3 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 943a221acb21..c335c0a4099a 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -289,25 +289,6 @@ struct i915_execbuffer {
 
 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
 
-/*
- * Used to convert any address to canonical form.
- * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
- * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
- * addresses to be in a canonical form:
- * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
- * canonical form [63:48] == [47]."
- */
-#define GEN8_HIGH_ADDRESS_BIT 47
-static inline u64 gen8_canonical_addr(u64 address)
-{
-	return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
-}
-
-static inline u64 gen8_noncanonical_addr(u64 address)
-{
-	return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0);
-}
-
 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
 {
 	return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len;
diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h
index a34ece53a771..b017bc928d00 100644
--- a/drivers/gpu/drm/i915/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/intel_gpu_commands.h
@@ -7,6 +7,8 @@
 #ifndef _INTEL_GPU_COMMANDS_H_
 #define _INTEL_GPU_COMMANDS_H_
 
+#include <linux/bitops.h>
+
 /*
  * Instruction field definitions used by the command parser
  */
@@ -275,4 +277,23 @@
 #define COLOR_BLT     ((0x2<<29)|(0x40<<22))
 #define SRC_COPY_BLT  ((0x2<<29)|(0x43<<22))
 
+/*
+ * Used to convert any address to canonical form.
+ * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
+ * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
+ * addresses to be in a canonical form:
+ * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
+ * canonical form [63:48] == [47]."
+ */
+#define GEN8_HIGH_ADDRESS_BIT 47
+static inline u64 gen8_canonical_addr(u64 address)
+{
+	return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
+}
+
+static inline u64 gen8_noncanonical_addr(u64 address)
+{
+	return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0);
+}
+
 #endif /* _INTEL_GPU_COMMANDS_H_ */
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 0346ff224d5d..34a8c15273f4 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -1194,6 +1194,7 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 
 	*cmd++ = MI_STORE_DWORD_IMM_GEN4;
 	if (INTEL_GEN(i915) >= 8) {
+		offset = gen8_canonical_addr(offset);
 		*cmd++ = lower_32_bits(offset);
 		*cmd++ = upper_32_bits(offset);
 	} else {
@@ -1284,6 +1285,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 	if (INTEL_GEN(i915) >= 8) {
 		*cmd++ = MI_LOAD_REGISTER_MEM_GEN8;
 		*cmd++ = RCS_GPR0;
+		offset = gen8_canonical_addr(offset);
 		*cmd++ = lower_32_bits(offset);
 		*cmd++ = upper_32_bits(offset);
 		*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 02/43] drm/i915: Force GPU idle on suspend
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
  2019-03-06 14:24 ` [PATCH 01/43] drm/i915/selftests: Canonicalise gen8 addresses Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-07  9:38   ` Tvrtko Ursulin
  2019-03-06 14:24 ` [PATCH 03/43] drm/i915/selftests: Improve switch-to-kernel-context checking Chris Wilson
                   ` (43 subsequent siblings)
  45 siblings, 1 reply; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

To facilitate the next patch to allow preemptible kernels not to incur
the wrath of hangcheck, we need to ensure that we can still suspend and
shutdown. That is we will not be able to rely on hangcheck to terminate
a blocking kernel and instead must manually do so ourselves. The
advantage is that we can apply more pressure!

As we now perform a GPU reset to clean up any residual kernels, we leave
the GPU in an unknown state and in particular can not talk to the GuC
before we reinitialise it following resume. For example, we no longer
need to tell the GuC to suspend itself, as it is already reset.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0b23cf3be718..c10be23d959e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3123,13 +3123,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 
 		lockdep_assert_held(&i915->drm.struct_mutex);
 
-		if (GEM_SHOW_DEBUG() && !timeout) {
-			/* Presume that timeout was non-zero to begin with! */
-			dev_warn(&i915->drm.pdev->dev,
-				 "Missed idle-completion interrupt!\n");
-			GEM_TRACE_DUMP();
-		}
-
 		err = wait_for_engines(i915);
 		if (err)
 			return err;
@@ -4379,11 +4372,12 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 					     I915_WAIT_INTERRUPTIBLE |
 					     I915_WAIT_LOCKED |
 					     I915_WAIT_FOR_IDLE_BOOST,
-					     MAX_SCHEDULE_TIMEOUT);
-		if (ret && ret != -EIO)
+					     HZ / 5);
+		if (ret == -EINTR)
 			goto err_unlock;
 
-		assert_kernel_context_is_current(i915);
+		/* Forcibly cancel outstanding work and leave the gpu quiet. */
+		i915_gem_set_wedged(i915);
 	}
 	i915_retire_requests(i915); /* ensure we flush after wedging */
 
@@ -4398,15 +4392,11 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 	 */
 	drain_delayed_work(&i915->gt.idle_work);
 
-	intel_uc_suspend(i915);
-
 	/*
 	 * Assert that we successfully flushed all the work and
 	 * reset the GPU back to its idle, low power state.
 	 */
-	WARN_ON(i915->gt.awake);
-	if (WARN_ON(!intel_engines_are_idle(i915)))
-		i915_gem_set_wedged(i915); /* no hope, discard everything */
+	GEM_BUG_ON(i915->gt.awake);
 
 	intel_runtime_pm_put(i915, wakeref);
 	return 0;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 03/43] drm/i915/selftests: Improve switch-to-kernel-context checking
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
  2019-03-06 14:24 ` [PATCH 01/43] drm/i915/selftests: Canonicalise gen8 addresses Chris Wilson
  2019-03-06 14:24 ` [PATCH 02/43] drm/i915: Force GPU idle on suspend Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-07 12:40   ` Tvrtko Ursulin
  2019-03-06 14:24 ` [PATCH 04/43] drm/i915: Do a synchronous switch-to-kernel-context on idling Chris Wilson
                   ` (42 subsequent siblings)
  45 siblings, 1 reply; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

We can reduce the switch-to-kernel-context selftest to operate as a loop
and so trivially test another state transition (that of idle->busy).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/selftests/i915_gem_context.c | 80 ++++++++-----------
 1 file changed, 35 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 34a8c15273f4..cb3d77c95ddf 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -1495,63 +1495,55 @@ static int __igt_switch_to_kernel_context(struct drm_i915_private *i915,
 {
 	struct intel_engine_cs *engine;
 	unsigned int tmp;
-	int err;
+	int pass;
 
 	GEM_TRACE("Testing %s\n", __engine_name(i915, engines));
-	for_each_engine_masked(engine, i915, engines, tmp) {
-		struct i915_request *rq;
+	for (pass = 0; pass < 4; pass++) { /* Once busy; once idle; repeat */
+		bool from_idle = pass & 1;
+		int err;
 
-		rq = i915_request_alloc(engine, ctx);
-		if (IS_ERR(rq))
-			return PTR_ERR(rq);
+		if (!from_idle) {
+			for_each_engine_masked(engine, i915, engines, tmp) {
+				struct i915_request *rq;
 
-		i915_request_add(rq);
-	}
+				rq = i915_request_alloc(engine, ctx);
+				if (IS_ERR(rq))
+					return PTR_ERR(rq);
 
-	err = i915_gem_switch_to_kernel_context(i915);
-	if (err)
-		return err;
-
-	for_each_engine_masked(engine, i915, engines, tmp) {
-		if (!engine_has_kernel_context_barrier(engine)) {
-			pr_err("kernel context not last on engine %s!\n",
-			       engine->name);
-			return -EINVAL;
+				i915_request_add(rq);
+			}
 		}
-	}
 
-	err = i915_gem_wait_for_idle(i915,
-				     I915_WAIT_LOCKED,
-				     MAX_SCHEDULE_TIMEOUT);
-	if (err)
-		return err;
+		err = i915_gem_switch_to_kernel_context(i915);
+		if (err)
+			return err;
 
-	GEM_BUG_ON(i915->gt.active_requests);
-	for_each_engine_masked(engine, i915, engines, tmp) {
-		if (engine->last_retired_context->gem_context != i915->kernel_context) {
-			pr_err("engine %s not idling in kernel context!\n",
-			       engine->name);
+		if (!from_idle) {
+			err = i915_gem_wait_for_idle(i915,
+						     I915_WAIT_LOCKED,
+						     MAX_SCHEDULE_TIMEOUT);
+			if (err)
+				return err;
+		}
+
+		if (i915->gt.active_requests) {
+			pr_err("%d active requests remain after switching to kernel context, pass %d (%s) on %s engine%s\n",
+			       i915->gt.active_requests,
+			       pass, from_idle ? "idle" : "busy",
+			       __engine_name(i915, engines),
+			       is_power_of_2(engines) ? "" : "s");
 			return -EINVAL;
 		}
-	}
 
-	err = i915_gem_switch_to_kernel_context(i915);
-	if (err)
-		return err;
+		/* XXX Bonus points for proving we are the kernel context! */
 
-	if (i915->gt.active_requests) {
-		pr_err("switch-to-kernel-context emitted %d requests even though it should already be idling in the kernel context\n",
-		       i915->gt.active_requests);
-		return -EINVAL;
+		mutex_unlock(&i915->drm.struct_mutex);
+		drain_delayed_work(&i915->gt.idle_work);
+		mutex_lock(&i915->drm.struct_mutex);
 	}
 
-	for_each_engine_masked(engine, i915, engines, tmp) {
-		if (!intel_engine_has_kernel_context(engine)) {
-			pr_err("kernel context not last on engine %s!\n",
-			       engine->name);
-			return -EINVAL;
-		}
-	}
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		return -EIO;
 
 	return 0;
 }
@@ -1595,8 +1587,6 @@ static int igt_switch_to_kernel_context(void *arg)
 
 out_unlock:
 	GEM_TRACE_DUMP_ON(err);
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
-		err = -EIO;
 
 	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 04/43] drm/i915: Do a synchronous switch-to-kernel-context on idling
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (2 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 03/43] drm/i915/selftests: Improve switch-to-kernel-context checking Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-07 13:07   ` Tvrtko Ursulin
  2019-03-06 14:24 ` [PATCH 05/43] drm/i915: Refactor common code to load initial power context Chris Wilson
                   ` (41 subsequent siblings)
  45 siblings, 1 reply; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

When the system idles, we switch to the kernel context as a defensive
measure (no users are harmed if the kernel context is lost). Currently,
we issue a switch to kernel context and then come back later to see if
the kernel context is still current and the system is idle. However,
if we are no longer privy to the runqueue ordering, then we have to
relax our assumptions about the logical state of the GPU and the only
way to ensure that the kernel context is currently loaded is by issuing
a request to run after all others, and wait for it to complete all while
preventing anyone else from issuing their own requests.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c           |  14 +--
 drivers/gpu/drm/i915/i915_drv.h           |   2 +-
 drivers/gpu/drm/i915/i915_gem.c           | 139 ++++++++--------------
 drivers/gpu/drm/i915/i915_gem_context.c   |   4 +
 drivers/gpu/drm/i915/selftests/i915_gem.c |   9 +-
 5 files changed, 63 insertions(+), 105 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index b548c292738c..a4235a4d4309 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -714,8 +714,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 	return 0;
 
 cleanup_gem:
-	if (i915_gem_suspend(dev_priv))
-		DRM_ERROR("failed to idle hardware; continuing to unload!\n");
+	i915_gem_suspend(dev_priv);
 	i915_gem_fini(dev_priv);
 cleanup_modeset:
 	intel_modeset_cleanup(dev);
@@ -1787,8 +1786,7 @@ void i915_driver_unload(struct drm_device *dev)
 	/* Flush any external code that still may be under the RCU lock */
 	synchronize_rcu();
 
-	if (i915_gem_suspend(dev_priv))
-		DRM_ERROR("failed to idle hardware; continuing to unload!\n");
+	i915_gem_suspend(dev_priv);
 
 	drm_atomic_helper_shutdown(dev);
 
@@ -1896,7 +1894,6 @@ static bool suspend_to_idle(struct drm_i915_private *dev_priv)
 static int i915_drm_prepare(struct drm_device *dev)
 {
 	struct drm_i915_private *i915 = to_i915(dev);
-	int err;
 
 	/*
 	 * NB intel_display_suspend() may issue new requests after we've
@@ -1904,12 +1901,9 @@ static int i915_drm_prepare(struct drm_device *dev)
 	 * split out that work and pull it forward so that after point,
 	 * the GPU is not woken again.
 	 */
-	err = i915_gem_suspend(i915);
-	if (err)
-		dev_err(&i915->drm.pdev->dev,
-			"GEM idle failed, suspend/resume might fail\n");
+	i915_gem_suspend(i915);
 
-	return err;
+	return 0;
 }
 
 static int i915_drm_suspend(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3b8e354ec8b3..de142a268371 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3028,7 +3028,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv);
 void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv);
 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
 			   unsigned int flags, long timeout);
-int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv);
+void i915_gem_suspend(struct drm_i915_private *dev_priv);
 void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
 void i915_gem_resume(struct drm_i915_private *dev_priv);
 vm_fault_t i915_gem_fault(struct vm_fault *vmf);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c10be23d959e..0ae375b4e586 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2828,13 +2828,6 @@ i915_gem_retire_work_handler(struct work_struct *work)
 				   round_jiffies_up_relative(HZ));
 }
 
-static inline bool
-new_requests_since_last_retire(const struct drm_i915_private *i915)
-{
-	return (READ_ONCE(i915->gt.active_requests) ||
-		work_pending(&i915->gt.idle_work.work));
-}
-
 static void assert_kernel_context_is_current(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
@@ -2843,7 +2836,8 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915)
 	if (i915_reset_failed(i915))
 		return;
 
-	GEM_BUG_ON(i915->gt.active_requests);
+	i915_retire_requests(i915);
+
 	for_each_engine(engine, i915, id) {
 		GEM_BUG_ON(__i915_active_request_peek(&engine->timeline.last_request));
 		GEM_BUG_ON(engine->last_retired_context !=
@@ -2851,77 +2845,75 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915)
 	}
 }
 
+static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
+{
+	if (i915_gem_switch_to_kernel_context(i915))
+		return false;
+
+	if (i915_gem_wait_for_idle(i915,
+				   I915_WAIT_LOCKED |
+				   I915_WAIT_FOR_IDLE_BOOST,
+				   HZ / 10))
+		return false;
+
+	assert_kernel_context_is_current(i915);
+	return true;
+}
+
 static void
 i915_gem_idle_work_handler(struct work_struct *work)
 {
-	struct drm_i915_private *dev_priv =
-		container_of(work, typeof(*dev_priv), gt.idle_work.work);
+	struct drm_i915_private *i915 =
+		container_of(work, typeof(*i915), gt.idle_work.work);
+	typeof(i915->gt) *gt = &i915->gt;
 	bool rearm_hangcheck;
 
-	if (!READ_ONCE(dev_priv->gt.awake))
+	if (!READ_ONCE(gt->awake))
 		return;
 
-	if (READ_ONCE(dev_priv->gt.active_requests))
+	if (READ_ONCE(gt->active_requests))
 		return;
 
-	/*
-	 * Flush out the last user context, leaving only the pinned
-	 * kernel context resident. When we are idling on the kernel_context,
-	 * no more new requests (with a context switch) are emitted and we
-	 * can finally rest. A consequence is that the idle work handler is
-	 * always called at least twice before idling (and if the system is
-	 * idle that implies a round trip through the retire worker).
-	 */
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	i915_gem_switch_to_kernel_context(dev_priv);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-
-	GEM_TRACE("active_requests=%d (after switch-to-kernel-context)\n",
-		  READ_ONCE(dev_priv->gt.active_requests));
-
-	/*
-	 * Wait for last execlists context complete, but bail out in case a
-	 * new request is submitted. As we don't trust the hardware, we
-	 * continue on if the wait times out. This is necessary to allow
-	 * the machine to suspend even if the hardware dies, and we will
-	 * try to recover in resume (after depriving the hardware of power,
-	 * it may be in a better mmod).
-	 */
-	__wait_for(if (new_requests_since_last_retire(dev_priv)) return,
-		   intel_engines_are_idle(dev_priv),
-		   I915_IDLE_ENGINES_TIMEOUT * 1000,
-		   10, 500);
-
 	rearm_hangcheck =
-		cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
+		cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
 
-	if (!mutex_trylock(&dev_priv->drm.struct_mutex)) {
+	if (!mutex_trylock(&i915->drm.struct_mutex)) {
 		/* Currently busy, come back later */
-		mod_delayed_work(dev_priv->wq,
-				 &dev_priv->gt.idle_work,
+		mod_delayed_work(i915->wq,
+				 &gt->idle_work,
 				 msecs_to_jiffies(50));
 		goto out_rearm;
 	}
 
 	/*
-	 * New request retired after this work handler started, extend active
-	 * period until next instance of the work.
+	 * Flush out the last user context, leaving only the pinned
+	 * kernel context resident. Should anything unfortunate happen
+	 * while we are idle (such as the GPU being power cycled), no users
+	 * will be harmed.
 	 */
-	if (new_requests_since_last_retire(dev_priv))
-		goto out_unlock;
-
-	__i915_gem_park(dev_priv);
+	if (!gt->active_requests && !work_pending(&gt->idle_work.work)) {
+		++gt->active_requests; /* don't requeue idle */
+
+		if (!switch_to_kernel_context_sync(i915)) {
+			dev_err(i915->drm.dev,
+				"Failed to idle engines, declaring wedged!\n");
+			GEM_TRACE_DUMP();
+			i915_gem_set_wedged(i915);
+		}
+		i915_retire_requests(i915);
 
-	assert_kernel_context_is_current(dev_priv);
+		if (!--gt->active_requests) {
+			__i915_gem_park(i915);
+			rearm_hangcheck = false;
+		}
+	}
 
-	rearm_hangcheck = false;
-out_unlock:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	mutex_unlock(&i915->drm.struct_mutex);
 
 out_rearm:
 	if (rearm_hangcheck) {
-		GEM_BUG_ON(!dev_priv->gt.awake);
-		i915_queue_hangcheck(dev_priv);
+		GEM_BUG_ON(!gt->awake);
+		i915_queue_hangcheck(i915);
 	}
 }
 
@@ -3128,7 +3120,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 			return err;
 
 		i915_retire_requests(i915);
-		GEM_BUG_ON(i915->gt.active_requests);
 	}
 
 	return 0;
@@ -4340,10 +4331,9 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	mutex_unlock(&i915->drm.struct_mutex);
 }
 
-int i915_gem_suspend(struct drm_i915_private *i915)
+void i915_gem_suspend(struct drm_i915_private *i915)
 {
 	intel_wakeref_t wakeref;
-	int ret;
 
 	GEM_TRACE("\n");
 
@@ -4363,19 +4353,7 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 	 * state. Fortunately, the kernel_context is disposable and we do
 	 * not rely on its state.
 	 */
-	if (!i915_reset_failed(i915)) {
-		ret = i915_gem_switch_to_kernel_context(i915);
-		if (ret)
-			goto err_unlock;
-
-		ret = i915_gem_wait_for_idle(i915,
-					     I915_WAIT_INTERRUPTIBLE |
-					     I915_WAIT_LOCKED |
-					     I915_WAIT_FOR_IDLE_BOOST,
-					     HZ / 5);
-		if (ret == -EINTR)
-			goto err_unlock;
-
+	if (!switch_to_kernel_context_sync(i915)) {
 		/* Forcibly cancel outstanding work and leave the gpu quiet. */
 		i915_gem_set_wedged(i915);
 	}
@@ -4399,12 +4377,6 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 	GEM_BUG_ON(i915->gt.awake);
 
 	intel_runtime_pm_put(i915, wakeref);
-	return 0;
-
-err_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-	intel_runtime_pm_put(i915, wakeref);
-	return ret;
 }
 
 void i915_gem_suspend_late(struct drm_i915_private *i915)
@@ -4670,18 +4642,11 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 			goto err_active;
 	}
 
-	err = i915_gem_switch_to_kernel_context(i915);
-	if (err)
-		goto err_active;
-
-	if (i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, HZ / 5)) {
-		i915_gem_set_wedged(i915);
+	if (!switch_to_kernel_context_sync(i915)) {
 		err = -EIO; /* Caller will declare us wedged */
 		goto err_active;
 	}
 
-	assert_kernel_context_is_current(i915);
-
 	/*
 	 * Immediately park the GPU so that we enable powersaving and
 	 * treat it as idle. The next time we issue a request, we will
@@ -4925,7 +4890,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 err_init_hw:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	WARN_ON(i915_gem_suspend(dev_priv));
+	i915_gem_suspend(dev_priv);
 	i915_gem_suspend_late(dev_priv);
 
 	i915_gem_drain_workqueue(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index b9f321947982..9a3eb4f66d85 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -767,6 +767,10 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
 	lockdep_assert_held(&i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915->kernel_context);
 
+	/* Inoperable, so presume the GPU is safely pointing into the void! */
+	if (i915_terminally_wedged(i915))
+		return 0;
+
 	i915_retire_requests(i915);
 
 	for_each_engine(engine, i915, id) {
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index e77b7ed449ae..50bb7bbd26d3 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -84,14 +84,9 @@ static void simulate_hibernate(struct drm_i915_private *i915)
 
 static int pm_prepare(struct drm_i915_private *i915)
 {
-	int err = 0;
-
-	if (i915_gem_suspend(i915)) {
-		pr_err("i915_gem_suspend failed\n");
-		err = -EINVAL;
-	}
+	i915_gem_suspend(i915);
 
-	return err;
+	return 0;
 }
 
 static void pm_suspend(struct drm_i915_private *i915)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 05/43] drm/i915: Refactor common code to load initial power context
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (3 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 04/43] drm/i915: Do a synchronous switch-to-kernel-context on idling Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-07 13:19   ` Tvrtko Ursulin
  2019-03-06 14:24 ` [PATCH 06/43] drm/i915: Reduce presumption of request ordering for barriers Chris Wilson
                   ` (40 subsequent siblings)
  45 siblings, 1 reply; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

We load a context (the kernel context) on both module load and resume in
order to initialise some logical state onto the GPU. We can use the same
routine for both operations, which will become more useful as we
refactor rc6/rps enabling.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 48 ++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0ae375b4e586..d4800e7d6f2c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2860,6 +2860,22 @@ static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
 	return true;
 }
 
+static bool load_power_context(struct drm_i915_private *i915)
+{
+	if (!switch_to_kernel_context_sync(i915))
+		return false;
+
+	/*
+	 * Immediately park the GPU so that we enable powersaving and
+	 * treat it as idle. The next time we issue a request, we will
+	 * unpark and start using the engine->pinned_default_state, otherwise
+	 * it is in limbo and an early reset may fail.
+	 */
+	__i915_gem_park(i915);
+
+	return true;
+}
+
 static void
 i915_gem_idle_work_handler(struct work_struct *work)
 {
@@ -4444,7 +4460,7 @@ void i915_gem_resume(struct drm_i915_private *i915)
 	intel_uc_resume(i915);
 
 	/* Always reload a context for powersaving. */
-	if (i915_gem_switch_to_kernel_context(i915))
+	if (!load_power_context(i915))
 		goto err_wedged;
 
 out_unlock:
@@ -4609,7 +4625,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 	struct i915_gem_context *ctx;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	int err;
+	int err = 0;
 
 	/*
 	 * As we reset the gpu during very early sanitisation, the current
@@ -4642,19 +4658,12 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 			goto err_active;
 	}
 
-	if (!switch_to_kernel_context_sync(i915)) {
-		err = -EIO; /* Caller will declare us wedged */
+	/* Flush the default context image to memory, and enable powersaving. */
+	if (!load_power_context(i915)) {
+		err = -EIO;
 		goto err_active;
 	}
 
-	/*
-	 * Immediately park the GPU so that we enable powersaving and
-	 * treat it as idle. The next time we issue a request, we will
-	 * unpark and start using the engine->pinned_default_state, otherwise
-	 * it is in limbo and an early reset may fail.
-	 */
-	__i915_gem_park(i915);
-
 	for_each_engine(engine, i915, id) {
 		struct i915_vma *state;
 		void *vaddr;
@@ -4720,19 +4729,10 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 err_active:
 	/*
 	 * If we have to abandon now, we expect the engines to be idle
-	 * and ready to be torn-down. First try to flush any remaining
-	 * request, ensure we are pointing at the kernel context and
-	 * then remove it.
+	 * and ready to be torn-down. The quickest way we can accomplish
+	 * this is by declaring ourselves wedged.
 	 */
-	if (WARN_ON(i915_gem_switch_to_kernel_context(i915)))
-		goto out_ctx;
-
-	if (WARN_ON(i915_gem_wait_for_idle(i915,
-					   I915_WAIT_LOCKED,
-					   MAX_SCHEDULE_TIMEOUT)))
-		goto out_ctx;
-
-	i915_gem_contexts_lost(i915);
+	i915_gem_set_wedged(i915);
 	goto out_ctx;
 }
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 06/43] drm/i915: Reduce presumption of request ordering for barriers
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (4 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 05/43] drm/i915: Refactor common code to load initial power context Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-07 17:26   ` Tvrtko Ursulin
  2019-03-06 14:24 ` [PATCH 07/43] drm/i915: Remove has-kernel-context Chris Wilson
                   ` (39 subsequent siblings)
  45 siblings, 1 reply; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Currently we assume that we know the order in which requests run and so
can determine if we need to reissue a switch-to-kernel-context prior to
idling. That assumption does not hold for the future, so instead of
tracking which barriers have been used, simply determine if we have ever
switched away from the kernel context by using the engine and before
idling ensure that all engines that have been used since the last idle
are synchronously switched back to the kernel context for safety (and
else of shrinking memory while idle).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h               |  1 +
 drivers/gpu/drm/i915/i915_gem.c               | 13 ++--
 drivers/gpu/drm/i915/i915_gem_context.c       | 66 +------------------
 drivers/gpu/drm/i915/i915_gem_context.h       |  3 +-
 drivers/gpu/drm/i915/i915_gem_evict.c         |  2 +-
 drivers/gpu/drm/i915/i915_request.c           |  1 +
 drivers/gpu/drm/i915/intel_engine_cs.c        |  5 ++
 .../gpu/drm/i915/selftests/i915_gem_context.c |  3 +-
 .../gpu/drm/i915/selftests/igt_flush_test.c   |  2 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  4 ++
 10 files changed, 28 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index de142a268371..59d041eb72d7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1994,6 +1994,7 @@ struct drm_i915_private {
 
 		struct list_head active_rings;
 		struct list_head closed_vma;
+		unsigned long active_engines;
 		u32 active_requests;
 
 		/**
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d4800e7d6f2c..ed7695fd444a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2845,9 +2845,10 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915)
 	}
 }
 
-static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
+static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
+					  unsigned long mask)
 {
-	if (i915_gem_switch_to_kernel_context(i915))
+	if (i915_gem_switch_to_kernel_context(i915, mask))
 		return false;
 
 	if (i915_gem_wait_for_idle(i915,
@@ -2862,7 +2863,8 @@ static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
 
 static bool load_power_context(struct drm_i915_private *i915)
 {
-	if (!switch_to_kernel_context_sync(i915))
+	/* Force loading the kernel context on all engines */
+	if (!switch_to_kernel_context_sync(i915, -1))
 		return false;
 
 	/*
@@ -2910,7 +2912,8 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	if (!gt->active_requests && !work_pending(&gt->idle_work.work)) {
 		++gt->active_requests; /* don't requeue idle */
 
-		if (!switch_to_kernel_context_sync(i915)) {
+		if (!switch_to_kernel_context_sync(i915,
+						   i915->gt.active_engines)) {
 			dev_err(i915->drm.dev,
 				"Failed to idle engines, declaring wedged!\n");
 			GEM_TRACE_DUMP();
@@ -4369,7 +4372,7 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	 * state. Fortunately, the kernel_context is disposable and we do
 	 * not rely on its state.
 	 */
-	if (!switch_to_kernel_context_sync(i915)) {
+	if (!switch_to_kernel_context_sync(i915, i915->gt.active_engines)) {
 		/* Forcibly cancel outstanding work and leave the gpu quiet. */
 		i915_gem_set_wedged(i915);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 9a3eb4f66d85..486203e9d205 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -704,63 +704,10 @@ last_request_on_engine(struct i915_timeline *timeline,
 	return NULL;
 }
 
-static bool engine_has_kernel_context_barrier(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *i915 = engine->i915;
-	const struct intel_context * const ce =
-		to_intel_context(i915->kernel_context, engine);
-	struct i915_timeline *barrier = ce->ring->timeline;
-	struct intel_ring *ring;
-	bool any_active = false;
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	list_for_each_entry(ring, &i915->gt.active_rings, active_link) {
-		struct i915_request *rq;
-
-		rq = last_request_on_engine(ring->timeline, engine);
-		if (!rq)
-			continue;
-
-		any_active = true;
-
-		if (rq->hw_context == ce)
-			continue;
-
-		/*
-		 * Was this request submitted after the previous
-		 * switch-to-kernel-context?
-		 */
-		if (!i915_timeline_sync_is_later(barrier, &rq->fence)) {
-			GEM_TRACE("%s needs barrier for %llx:%lld\n",
-				  ring->timeline->name,
-				  rq->fence.context,
-				  rq->fence.seqno);
-			return false;
-		}
-
-		GEM_TRACE("%s has barrier after %llx:%lld\n",
-			  ring->timeline->name,
-			  rq->fence.context,
-			  rq->fence.seqno);
-	}
-
-	/*
-	 * If any other timeline was still active and behind the last barrier,
-	 * then our last switch-to-kernel-context must still be queued and
-	 * will run last (leaving the engine in the kernel context when it
-	 * eventually idles).
-	 */
-	if (any_active)
-		return true;
-
-	/* The engine is idle; check that it is idling in the kernel context. */
-	return engine->last_retired_context == ce;
-}
-
-int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
+int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
+				      unsigned long mask)
 {
 	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 
 	GEM_TRACE("awake?=%s\n", yesno(i915->gt.awake));
 
@@ -771,17 +718,11 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
 	if (i915_terminally_wedged(i915))
 		return 0;
 
-	i915_retire_requests(i915);
-
-	for_each_engine(engine, i915, id) {
+	for_each_engine_masked(engine, i915, mask, mask) {
 		struct intel_ring *ring;
 		struct i915_request *rq;
 
 		GEM_BUG_ON(!to_intel_context(i915->kernel_context, engine));
-		if (engine_has_kernel_context_barrier(engine))
-			continue;
-
-		GEM_TRACE("emit barrier on %s\n", engine->name);
 
 		rq = i915_request_alloc(engine, i915->kernel_context);
 		if (IS_ERR(rq))
@@ -805,7 +746,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
 			i915_sw_fence_await_sw_fence_gfp(&rq->submit,
 							 &prev->submit,
 							 I915_FENCE_GFP);
-			i915_timeline_sync_set(rq->timeline, &prev->fence);
 		}
 
 		i915_request_add(rq);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 2f9ef333acaa..e1188d77a23d 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -372,7 +372,8 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 void i915_gem_context_close(struct drm_file *file);
 
 int i915_switch_context(struct i915_request *rq);
-int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv);
+int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
+				      unsigned long engine_mask);
 
 void i915_gem_context_release(struct kref *ctx_ref);
 struct i915_gem_context *
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 68d74c50ac39..7d8e90dfca84 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -62,7 +62,7 @@ static int ggtt_flush(struct drm_i915_private *i915)
 	 * the hopes that we can then remove contexts and the like only
 	 * bound by their active reference.
 	 */
-	err = i915_gem_switch_to_kernel_context(i915);
+	err = i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index f8a63495114c..9533a85cb0b3 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1068,6 +1068,7 @@ void i915_request_add(struct i915_request *request)
 		GEM_TRACE("marking %s as active\n", ring->timeline->name);
 		list_add(&ring->active_link, &request->i915->gt.active_rings);
 	}
+	request->i915->gt.active_engines |= request->engine->mask;
 	request->emitted_jiffies = jiffies;
 
 	/*
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 555a4590fa23..18174f808fd8 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1106,6 +1106,9 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)
 
 	lockdep_assert_held(&engine->i915->drm.struct_mutex);
 
+	if (!engine->context_size)
+		return true;
+
 	/*
 	 * Check the last context seen by the engine. If active, it will be
 	 * the last request that remains in the timeline. When idle, it is
@@ -1205,6 +1208,8 @@ void intel_engines_park(struct drm_i915_private *i915)
 		i915_gem_batch_pool_fini(&engine->batch_pool);
 		engine->execlists.no_priolist = false;
 	}
+
+	i915->gt.active_engines = 0;
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index cb3d77c95ddf..00ac34007582 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -1514,7 +1514,8 @@ static int __igt_switch_to_kernel_context(struct drm_i915_private *i915,
 			}
 		}
 
-		err = i915_gem_switch_to_kernel_context(i915);
+		err = i915_gem_switch_to_kernel_context(i915,
+							i915->gt.active_engines);
 		if (err)
 			return err;
 
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index e0d3122fd35a..94aee4071a66 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -14,7 +14,7 @@ int igt_flush_test(struct drm_i915_private *i915, unsigned int flags)
 	cond_resched();
 
 	if (flags & I915_WAIT_LOCKED &&
-	    i915_gem_switch_to_kernel_context(i915)) {
+	    i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines)) {
 		pr_err("Failed to switch back to kernel context; declaring wedged\n");
 		i915_gem_set_wedged(i915);
 	}
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index b2c7808e0595..54cfb611c0aa 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -109,6 +109,10 @@ static void mock_retire_work_handler(struct work_struct *work)
 
 static void mock_idle_work_handler(struct work_struct *work)
 {
+	struct drm_i915_private *i915 =
+		container_of(work, typeof(*i915), gt.idle_work.work);
+
+	i915->gt.active_engines = 0;
 }
 
 static int pm_domain_resume(struct device *dev)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 07/43] drm/i915: Remove has-kernel-context
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (5 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 06/43] drm/i915: Reduce presumption of request ordering for barriers Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-07 17:29   ` Tvrtko Ursulin
  2019-03-06 14:24 ` [PATCH 08/43] drm/i915: Introduce the i915_user_extension_method Chris Wilson
                   ` (38 subsequent siblings)
  45 siblings, 1 reply; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

We can no longer assume execution ordering, and in particular we cannot
assume which context will execute last. One side-effect of this is that
we cannot determine if the kernel-context is resident on the GPU, so
remove the routines that claimed to do so.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_active.h      | 13 -----------
 drivers/gpu/drm/i915/i915_gem.c         | 18 --------------
 drivers/gpu/drm/i915/i915_gem_evict.c   | 16 +++----------
 drivers/gpu/drm/i915/intel_engine_cs.c  | 31 -------------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
 5 files changed, 3 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 8142a334b37b..7d758719ce39 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -108,19 +108,6 @@ i915_active_request_set_retire_fn(struct i915_active_request *active,
 	active->retire = fn ?: i915_active_retire_noop;
 }
 
-static inline struct i915_request *
-__i915_active_request_peek(const struct i915_active_request *active)
-{
-	/*
-	 * Inside the error capture (running with the driver in an unknown
-	 * state), we want to bend the rules slightly (a lot).
-	 *
-	 * Work is in progress to make it safer, in the meantime this keeps
-	 * the known issue from spamming the logs.
-	 */
-	return rcu_dereference_protected(active->request, 1);
-}
-
 /**
  * i915_active_request_raw - return the active request
  * @active - the active tracker
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ed7695fd444a..8d5918b99fc9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2828,23 +2828,6 @@ i915_gem_retire_work_handler(struct work_struct *work)
 				   round_jiffies_up_relative(HZ));
 }
 
-static void assert_kernel_context_is_current(struct drm_i915_private *i915)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	if (i915_reset_failed(i915))
-		return;
-
-	i915_retire_requests(i915);
-
-	for_each_engine(engine, i915, id) {
-		GEM_BUG_ON(__i915_active_request_peek(&engine->timeline.last_request));
-		GEM_BUG_ON(engine->last_retired_context !=
-			   to_intel_context(i915->kernel_context, engine));
-	}
-}
-
 static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
 					  unsigned long mask)
 {
@@ -2857,7 +2840,6 @@ static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
 				   HZ / 10))
 		return false;
 
-	assert_kernel_context_is_current(i915);
 	return true;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 7d8e90dfca84..060f5903544a 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -38,25 +38,15 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl {
 
 static bool ggtt_is_idle(struct drm_i915_private *i915)
 {
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
-
-       if (i915->gt.active_requests)
-	       return false;
-
-       for_each_engine(engine, i915, id) {
-	       if (!intel_engine_has_kernel_context(engine))
-		       return false;
-       }
-
-       return true;
+	return !i915->gt.active_requests;
 }
 
 static int ggtt_flush(struct drm_i915_private *i915)
 {
 	int err;
 
-	/* Not everything in the GGTT is tracked via vma (otherwise we
+	/*
+	 * Not everything in the GGTT is tracked via vma (otherwise we
 	 * could evict as required with minimal stalling) so we are forced
 	 * to idle the GPU and explicitly retire outstanding requests in
 	 * the hopes that we can then remove contexts and the like only
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 18174f808fd8..8e326556499e 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1090,37 +1090,6 @@ bool intel_engines_are_idle(struct drm_i915_private *i915)
 	return true;
 }
 
-/**
- * intel_engine_has_kernel_context:
- * @engine: the engine
- *
- * Returns true if the last context to be executed on this engine, or has been
- * executed if the engine is already idle, is the kernel context
- * (#i915.kernel_context).
- */
-bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)
-{
-	const struct intel_context *kernel_context =
-		to_intel_context(engine->i915->kernel_context, engine);
-	struct i915_request *rq;
-
-	lockdep_assert_held(&engine->i915->drm.struct_mutex);
-
-	if (!engine->context_size)
-		return true;
-
-	/*
-	 * Check the last context seen by the engine. If active, it will be
-	 * the last request that remains in the timeline. When idle, it is
-	 * the last executed context as tracked by retirement.
-	 */
-	rq = __i915_active_request_peek(&engine->timeline.last_request);
-	if (rq)
-		return rq->hw_context == kernel_context;
-	else
-		return engine->last_retired_context == kernel_context;
-}
-
 void intel_engines_reset_default_submission(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 84b7047e2df5..9ccbe63d46e3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -935,7 +935,6 @@ void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
-bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine);
 void intel_engine_lost_context(struct intel_engine_cs *engine);
 
 void intel_engines_park(struct drm_i915_private *i915);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 08/43] drm/i915: Introduce the i915_user_extension_method
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (6 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 07/43] drm/i915: Remove has-kernel-context Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-06 14:24 ` [PATCH 09/43] drm/i915: Track active engines within a context Chris Wilson
                   ` (37 subsequent siblings)
  45 siblings, 0 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

An idea for extending uABI inspired by Vulkan's extension chains.
Instead of expanding the data struct for each ioctl every time we need
to add a new feature, define an extension chain instead. As we add
optional interfaces to control the ioctl, we define a new extension
struct that can be linked into the ioctl data only when required by the
user. The key advantage being able to ignore large control structs for
optional interfaces/extensions, while being able to process them in a
consistent manner.

In comparison to other extensible ioctls, the key difference is the
use of a linked chain of extension structs vs an array of tagged
pointers. For example,

struct drm_amdgpu_cs_chunk {
        __u32           chunk_id;
        __u32           length_dw;
        __u64           chunk_data;
};

struct drm_amdgpu_cs_in {
        __u32           ctx_id;
        __u32           bo_list_handle;
        __u32           num_chunks;
        __u32           _pad;
        __u64           chunks;
};

allows userspace to pass in array of pointers to extension structs, but
must therefore keep constructing that array along side the command stream.
In dynamic situations like that, a linked list is preferred and does not
similar from extra cache line misses as the extension structs themselves
must still be loaded separate to the chunks array.

v2: Apply the tail call optimisation directly to nip the worry of stack
overflow in the bud.
v3: Defend against recursion.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile               |  1 +
 drivers/gpu/drm/i915/i915_user_extensions.c | 43 +++++++++++++++++++++
 drivers/gpu/drm/i915/i915_user_extensions.h | 20 ++++++++++
 drivers/gpu/drm/i915/i915_utils.h           |  7 ++++
 include/uapi/drm/i915_drm.h                 | 20 ++++++++++
 5 files changed, 91 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_user_extensions.c
 create mode 100644 drivers/gpu/drm/i915/i915_user_extensions.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index a1d834068765..89105b1aaf12 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -46,6 +46,7 @@ i915-y := i915_drv.o \
 	  i915_sw_fence.o \
 	  i915_syncmap.o \
 	  i915_sysfs.o \
+	  i915_user_extensions.o \
 	  intel_csr.o \
 	  intel_device_info.o \
 	  intel_pm.o \
diff --git a/drivers/gpu/drm/i915/i915_user_extensions.c b/drivers/gpu/drm/i915/i915_user_extensions.c
new file mode 100644
index 000000000000..879b4094b2d7
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_user_extensions.c
@@ -0,0 +1,43 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include <linux/sched/signal.h>
+#include <linux/uaccess.h>
+#include <uapi/drm/i915_drm.h>
+
+#include "i915_user_extensions.h"
+
+int i915_user_extensions(struct i915_user_extension __user *ext,
+			 const i915_user_extension_fn *tbl,
+			 unsigned long count,
+			 void *data)
+{
+	unsigned int stackdepth = 512;
+
+	while (ext) {
+		int err;
+		u64 x;
+
+		if (!stackdepth--) /* recursion vs useful flexibility */
+			return -EINVAL;
+
+		if (get_user(x, &ext->name))
+			return -EFAULT;
+
+		err = -EINVAL;
+		if (x < count && tbl[x])
+			err = tbl[x](ext, data);
+		if (err)
+			return err;
+
+		if (get_user(x, &ext->next_extension))
+			return -EFAULT;
+
+		ext = u64_to_user_ptr(x);
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_user_extensions.h b/drivers/gpu/drm/i915/i915_user_extensions.h
new file mode 100644
index 000000000000..313a510b068a
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_user_extensions.h
@@ -0,0 +1,20 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#ifndef I915_USER_EXTENSIONS_H
+#define I915_USER_EXTENSIONS_H
+
+struct i915_user_extension;
+
+typedef int (*i915_user_extension_fn)(struct i915_user_extension __user *ext,
+				      void *data);
+
+int i915_user_extensions(struct i915_user_extension __user *ext,
+			 const i915_user_extension_fn *tbl,
+			 unsigned long count,
+			 void *data);
+
+#endif /* I915_USER_EXTENSIONS_H */
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 9726df37c4c4..fcc751aa1ea8 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -105,6 +105,13 @@
 	__T;								\
 })
 
+#define container_of_user(ptr, type, member) ({				\
+	void __user *__mptr = (void __user *)(ptr);			\
+	BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) &&	\
+			 !__same_type(*(ptr), void),			\
+			 "pointer type mismatch in container_of()");	\
+	((type __user *)(__mptr - offsetof(type, member))); })
+
 static inline u64 ptr_to_u64(const void *ptr)
 {
 	return (uintptr_t)ptr;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index aa2d4c73a97d..39835793722b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -62,6 +62,26 @@ extern "C" {
 #define I915_ERROR_UEVENT		"ERROR"
 #define I915_RESET_UEVENT		"RESET"
 
+/*
+ * i915_user_extension: Base class for defining a chain of extensions
+ *
+ * Many interfaces need to grow over time. In most cases we can simply
+ * extend the struct and have userspace pass in more data. Another option,
+ * as demonstrated by Vulkan's approach to providing extensions for forward
+ * and backward compatibility, is to use a list of optional structs to
+ * provide those extra details.
+ *
+ * The key advantage to using an extension chain is that it allows us to
+ * redefine the interface more easily than an ever growing struct of
+ * increasing complexity, and for large parts of that interface to be
+ * entirely optional. The downside is more pointer chasing; chasing across
+ * the __user boundary with pointers encapsulated inside u64.
+ */
+struct i915_user_extension {
+	__u64 next_extension;
+	__u64 name;
+};
+
 /*
  * MOCS indexes used for GPU surfaces, defining the cacheability of the
  * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 09/43] drm/i915: Track active engines within a context
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (7 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 08/43] drm/i915: Introduce the i915_user_extension_method Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-06 14:24 ` [PATCH 10/43] drm/i915: Introduce a context barrier callback Chris Wilson
                   ` (36 subsequent siblings)
  45 siblings, 0 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

For use in the next patch, if we track which engines have been used by
the HW, we can reduce the work required to flush our state off the HW to
those engines.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c           | 18 +++++----------
 drivers/gpu/drm/i915/i915_gem_context.c       |  5 +++++
 drivers/gpu/drm/i915/i915_gem_context.h       |  5 +++++
 drivers/gpu/drm/i915/intel_lrc.c              | 22 +++++++++----------
 drivers/gpu/drm/i915/intel_ringbuffer.c       | 14 +++++++-----
 drivers/gpu/drm/i915/selftests/mock_context.c |  2 ++
 drivers/gpu/drm/i915/selftests/mock_engine.c  |  6 +++++
 7 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 0a6348ad7c98..6a90558de213 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -388,12 +388,9 @@ static void print_context_stats(struct seq_file *m,
 	struct i915_gem_context *ctx;
 
 	list_for_each_entry(ctx, &i915->contexts.list, link) {
-		struct intel_engine_cs *engine;
-		enum intel_engine_id id;
-
-		for_each_engine(engine, i915, id) {
-			struct intel_context *ce = to_intel_context(ctx, engine);
+		struct intel_context *ce;
 
+		list_for_each_entry(ce, &ctx->active_engines, active_link) {
 			if (ce->state)
 				per_file_stats(0, ce->state->obj, &kstats);
 			if (ce->ring)
@@ -1880,9 +1877,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct drm_device *dev = &dev_priv->drm;
-	struct intel_engine_cs *engine;
 	struct i915_gem_context *ctx;
-	enum intel_engine_id id;
 	int ret;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
@@ -1890,6 +1885,8 @@ static int i915_context_status(struct seq_file *m, void *unused)
 		return ret;
 
 	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
+		struct intel_context *ce;
+
 		seq_puts(m, "HW context ");
 		if (!list_empty(&ctx->hw_id_link))
 			seq_printf(m, "%x [pin %u]", ctx->hw_id,
@@ -1912,11 +1909,8 @@ static int i915_context_status(struct seq_file *m, void *unused)
 		seq_putc(m, ctx->remap_slice ? 'R' : 'r');
 		seq_putc(m, '\n');
 
-		for_each_engine(engine, dev_priv, id) {
-			struct intel_context *ce =
-				to_intel_context(ctx, engine);
-
-			seq_printf(m, "%s: ", engine->name);
+		list_for_each_entry(ce, &ctx->active_engines, active_link) {
+			seq_printf(m, "%s: ", ce->engine->name);
 			if (ce->state)
 				describe_obj(m, ce->state->obj);
 			if (ce->ring)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 486203e9d205..d997695a4f77 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -226,6 +226,7 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
+	GEM_BUG_ON(!list_empty(&ctx->active_engines));
 
 	release_hw_id(ctx);
 	i915_ppgtt_put(ctx->ppgtt);
@@ -241,6 +242,7 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	put_pid(ctx->pid);
 
 	list_del(&ctx->link);
+	mutex_destroy(&ctx->mutex);
 
 	kfree_rcu(ctx, rcu);
 }
@@ -353,6 +355,7 @@ intel_context_init(struct intel_context *ce,
 		   struct intel_engine_cs *engine)
 {
 	ce->gem_context = ctx;
+	ce->engine = engine;
 
 	INIT_LIST_HEAD(&ce->signal_link);
 	INIT_LIST_HEAD(&ce->signals);
@@ -381,6 +384,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	list_add_tail(&ctx->link, &dev_priv->contexts.list);
 	ctx->i915 = dev_priv;
 	ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
+	INIT_LIST_HEAD(&ctx->active_engines);
+	mutex_init(&ctx->mutex);
 
 	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++)
 		intel_context_init(&ctx->__engine[n], ctx, dev_priv->engine[n]);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index e1188d77a23d..124c2a082b99 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -163,6 +163,9 @@ struct i915_gem_context {
 	atomic_t hw_id_pin_count;
 	struct list_head hw_id_link;
 
+	struct list_head active_engines;
+	struct mutex mutex;
+
 	/**
 	 * @user_handle: userspace identifier
 	 *
@@ -176,7 +179,9 @@ struct i915_gem_context {
 	/** engine: per-engine logical HW state */
 	struct intel_context {
 		struct i915_gem_context *gem_context;
+		struct intel_engine_cs *engine;
 		struct intel_engine_cs *active;
+		struct list_head active_link;
 		struct list_head signal_link;
 		struct list_head signals;
 		struct i915_vma *state;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f0ba20f2b41d..a9a47dbeac88 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1282,6 +1282,7 @@ static void execlists_context_unpin(struct intel_context *ce)
 	i915_gem_object_unpin_map(ce->state->obj);
 	i915_vma_unpin(ce->state);
 
+	list_del(&ce->active_link);
 	i915_gem_context_put(ce->gem_context);
 }
 
@@ -1366,6 +1367,11 @@ __execlists_context_pin(struct intel_engine_cs *engine,
 	__execlists_update_reg_state(engine, ce);
 
 	ce->state->obj->pin_global++;
+
+	mutex_lock(&ctx->mutex);
+	list_add(&ce->active_link, &ctx->active_engines);
+	mutex_unlock(&ctx->mutex);
+
 	i915_gem_context_get(ctx);
 	return ce;
 
@@ -2887,9 +2893,8 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 
 void intel_lr_context_resume(struct drm_i915_private *i915)
 {
-	struct intel_engine_cs *engine;
 	struct i915_gem_context *ctx;
-	enum intel_engine_id id;
+	struct intel_context *ce;
 
 	/*
 	 * Because we emit WA_TAIL_DWORDS there may be a disparity
@@ -2903,17 +2908,10 @@ void intel_lr_context_resume(struct drm_i915_private *i915)
 	 * simplicity, we just zero everything out.
 	 */
 	list_for_each_entry(ctx, &i915->contexts.list, link) {
-		for_each_engine(engine, i915, id) {
-			struct intel_context *ce =
-				to_intel_context(ctx, engine);
-
-			if (!ce->state)
-				continue;
-
+		list_for_each_entry(ce, &ctx->active_engines, active_link) {
+			GEM_BUG_ON(!ce->ring);
 			intel_ring_reset(ce->ring, 0);
-
-			if (ce->pin_count) /* otherwise done in context_pin */
-				__execlists_update_reg_state(engine, ce);
+			__execlists_update_reg_state(ce->engine, ce);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 1dd0c99b893f..82f33ff94dc8 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1431,6 +1431,7 @@ static void intel_ring_context_unpin(struct intel_context *ce)
 	__context_unpin_ppgtt(ce->gem_context);
 	__context_unpin(ce);
 
+	list_del(&ce->active_link);
 	i915_gem_context_put(ce->gem_context);
 }
 
@@ -1510,6 +1511,10 @@ __ring_context_pin(struct intel_engine_cs *engine,
 {
 	int err;
 
+	/* One ringbuffer to rule them all */
+	GEM_BUG_ON(!engine->buffer);
+	ce->ring = engine->buffer;
+
 	if (!ce->state && engine->context_size) {
 		struct i915_vma *vma;
 
@@ -1530,12 +1535,11 @@ __ring_context_pin(struct intel_engine_cs *engine,
 	if (err)
 		goto err_unpin;
 
-	i915_gem_context_get(ctx);
-
-	/* One ringbuffer to rule them all */
-	GEM_BUG_ON(!engine->buffer);
-	ce->ring = engine->buffer;
+	mutex_lock(&ctx->mutex);
+	list_add(&ce->active_link, &ctx->active_engines);
+	mutex_unlock(&ctx->mutex);
 
+	i915_gem_context_get(ctx);
 	return ce;
 
 err_unpin:
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index b646cdcdd602..353b37b9f78e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -44,6 +44,8 @@ mock_context(struct drm_i915_private *i915,
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
 	INIT_LIST_HEAD(&ctx->handles_list);
 	INIT_LIST_HEAD(&ctx->hw_id_link);
+	INIT_LIST_HEAD(&ctx->active_engines);
+	mutex_init(&ctx->mutex);
 
 	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++)
 		intel_context_init(&ctx->__engine[n], ctx, i915->engine[n]);
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index c2c954f64226..8032a8a9542f 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -125,6 +125,7 @@ static void hw_delay_complete(struct timer_list *t)
 static void mock_context_unpin(struct intel_context *ce)
 {
 	mock_timeline_unpin(ce->ring->timeline);
+	list_del(&ce->active_link);
 	i915_gem_context_put(ce->gem_context);
 }
 
@@ -160,6 +161,11 @@ mock_context_pin(struct intel_engine_cs *engine,
 	mock_timeline_pin(ce->ring->timeline);
 
 	ce->ops = &mock_context_ops;
+
+	mutex_lock(&ctx->mutex);
+	list_add(&ce->active_link, &ctx->active_engines);
+	mutex_unlock(&ctx->mutex);
+
 	i915_gem_context_get(ctx);
 	return ce;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 10/43] drm/i915: Introduce a context barrier callback
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (8 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 09/43] drm/i915: Track active engines within a context Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-06 14:24 ` [PATCH 11/43] drm/i915: Create/destroy VM (ppGTT) for use with contexts Chris Wilson
                   ` (35 subsequent siblings)
  45 siblings, 0 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

In the next patch, we will want to update live state within a context.
As this state may be in use by the GPU and we haven't been explicitly
tracking its activity, we instead attach it to a request we send down
the context setup with its new state and on retiring that request
cleanup the old state as we then know that it is no longer live.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c       |  74 +++++++++++++
 .../gpu/drm/i915/selftests/i915_gem_context.c | 103 ++++++++++++++++++
 2 files changed, 177 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index d997695a4f77..97a021e0c55e 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -709,6 +709,80 @@ last_request_on_engine(struct i915_timeline *timeline,
 	return NULL;
 }
 
+struct context_barrier_task {
+	struct i915_active base;
+	void (*task)(void *data);
+	void *data;
+};
+
+static void cb_retire(struct i915_active *base)
+{
+	struct context_barrier_task *cb = container_of(base, typeof(*cb), base);
+
+	if (cb->task)
+		cb->task(cb->data);
+
+	i915_active_fini(&cb->base);
+	kfree(cb);
+}
+
+I915_SELFTEST_DECLARE(static unsigned long context_barrier_inject_fault);
+static int context_barrier_task(struct i915_gem_context *ctx,
+				unsigned long engines,
+				void (*task)(void *data),
+				void *data)
+{
+	struct drm_i915_private *i915 = ctx->i915;
+	struct context_barrier_task *cb;
+	struct intel_context *ce;
+	intel_wakeref_t wakeref;
+	int err = 0;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	GEM_BUG_ON(!task);
+
+	cb = kmalloc(sizeof(*cb), GFP_KERNEL);
+	if (!cb)
+		return -ENOMEM;
+
+	i915_active_init(i915, &cb->base, cb_retire);
+	i915_active_acquire(&cb->base);
+
+	wakeref = intel_runtime_pm_get(i915);
+	list_for_each_entry(ce, &ctx->active_engines, active_link) {
+		struct intel_engine_cs *engine = ce->engine;
+		struct i915_request *rq;
+
+		if (!(ce->engine->mask & engines))
+			continue;
+
+		if (I915_SELFTEST_ONLY(context_barrier_inject_fault &
+				       engine->mask)) {
+			err = -ENXIO;
+			break;
+		}
+
+		rq = i915_request_alloc(engine, ctx);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		err = i915_active_ref(&cb->base, rq->fence.context, rq);
+		i915_request_add(rq);
+		if (err)
+			break;
+	}
+	intel_runtime_pm_put(i915, wakeref);
+
+	cb->task = err ? NULL : task; /* caller needs to unwind instead */
+	cb->data = data;
+
+	i915_active_release(&cb->base);
+
+	return err;
+}
+
 int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
 				      unsigned long mask)
 {
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 00ac34007582..94da39fb7f2f 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -1596,10 +1596,113 @@ static int igt_switch_to_kernel_context(void *arg)
 	return err;
 }
 
+static void mock_barrier_task(void *data)
+{
+	unsigned int *counter = data;
+
+	++*counter;
+}
+
+static int mock_context_barrier(void *arg)
+{
+#undef pr_fmt
+#define pr_fmt(x) "context_barrier_task():" # x
+	struct drm_i915_private *i915 = arg;
+	struct i915_gem_context *ctx;
+	struct i915_request *rq;
+	intel_wakeref_t wakeref;
+	unsigned int counter;
+	int err;
+
+	/*
+	 * The context barrier provides us with a callback after it emits
+	 * a request; useful for retiring old state after loading new.
+	 */
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	ctx = mock_context(i915, "mock");
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto unlock;
+	}
+
+	counter = 0;
+	err = context_barrier_task(ctx, 0, mock_barrier_task, &counter);
+	if (err) {
+		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
+		goto out;
+	}
+	if (counter == 0) {
+		pr_err("Did not retire immediately with 0 engines\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	counter = 0;
+	err = context_barrier_task(ctx, -1, mock_barrier_task, &counter);
+	if (err) {
+		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
+		goto out;
+	}
+	if (counter == 0) {
+		pr_err("Did not retire immediately for all inactive engines\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	rq = ERR_PTR(-ENODEV);
+	with_intel_runtime_pm(i915, wakeref)
+		rq = i915_request_alloc(i915->engine[RCS0], ctx);
+	if (IS_ERR(rq)) {
+		pr_err("Request allocation failed!\n");
+		goto out;
+	}
+	i915_request_add(rq);
+	GEM_BUG_ON(list_empty(&ctx->active_engines));
+
+	counter = 0;
+	context_barrier_inject_fault = BIT(RCS0);
+	err = context_barrier_task(ctx, -1, mock_barrier_task, &counter);
+	context_barrier_inject_fault = 0;
+	if (err == -ENXIO)
+		err = 0;
+	else
+		pr_err("Did not hit fault injection!\n");
+	if (counter != 0) {
+		pr_err("Invoked callback on error!\n");
+		err = -EIO;
+	}
+	if (err)
+		goto out;
+
+	counter = 0;
+	err = context_barrier_task(ctx, -1, mock_barrier_task, &counter);
+	if (err) {
+		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
+		goto out;
+	}
+	mock_device_flush(i915);
+	if (counter == 0) {
+		pr_err("Did not retire on each active engines\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+out:
+	mock_context_close(ctx);
+unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+#undef pr_fmt
+#define pr_fmt(x) x
+}
+
 int i915_gem_context_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_switch_to_kernel_context),
+		SUBTEST(mock_context_barrier),
 	};
 	struct drm_i915_private *i915;
 	int err;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 11/43] drm/i915: Create/destroy VM (ppGTT) for use with contexts
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (9 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 10/43] drm/i915: Introduce a context barrier callback Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-06 14:24 ` [PATCH 12/43] drm/i915: Extend CONTEXT_CREATE to set parameters upon construction Chris Wilson
                   ` (34 subsequent siblings)
  45 siblings, 0 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

In preparation to making the ppGTT binding for a context explicit (to
facilitate reusing the same ppGTT between different contexts), allow the
user to create and destroy named ppGTT.

v2: Replace global barrier for swapping over the ppgtt and tlbs with a
local context barrier (Tvrtko)
v3: serialise with struct_mutex; it's lazy but required dammit
v4: Rewrite igt_ctx_shared_exec to be more different (aimed to be more
similarly, turned out different!)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c               |   2 +
 drivers/gpu/drm/i915/i915_drv.h               |   3 +
 drivers/gpu/drm/i915/i915_gem_context.c       | 254 +++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_context.h       |   5 +
 drivers/gpu/drm/i915/i915_gem_gtt.c           |  17 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h           |  16 +-
 drivers/gpu/drm/i915/selftests/huge_pages.c   |   1 -
 .../gpu/drm/i915/selftests/i915_gem_context.c | 215 ++++++++++++---
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |   1 -
 drivers/gpu/drm/i915/selftests/mock_context.c |   8 +-
 include/uapi/drm/i915_drm.h                   |  36 +++
 11 files changed, 497 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index a4235a4d4309..14d9b1dd5ccd 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -3008,6 +3008,8 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_VM_CREATE, i915_gem_vm_create_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_VM_DESTROY, i915_gem_vm_destroy_ioctl, DRM_RENDER_ALLOW),
 };
 
 static struct drm_driver driver = {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 59d041eb72d7..9de4080c47a6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -218,6 +218,9 @@ struct drm_i915_file_private {
 	} mm;
 	struct idr context_idr;
 
+	struct mutex vm_lock;
+	struct idr vm_idr;
+
 	unsigned int bsd_engine;
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 97a021e0c55e..5e580e79163e 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -126,6 +126,8 @@ static void lut_close(struct i915_gem_context *ctx)
 		struct i915_vma *vma = rcu_dereference_raw(*slot);
 
 		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
+
+		vma->open_count--;
 		__i915_gem_object_release_unless_active(vma->obj);
 	}
 	rcu_read_unlock();
@@ -310,7 +312,7 @@ static void context_close(struct i915_gem_context *ctx)
 	 */
 	lut_close(ctx);
 	if (ctx->ppgtt)
-		i915_ppgtt_close(&ctx->ppgtt->vm);
+		i915_ppgtt_close(ctx->ppgtt);
 
 	ctx->file_priv = ERR_PTR(-EBADF);
 	i915_gem_context_put(ctx);
@@ -449,6 +451,32 @@ static void __destroy_hw_context(struct i915_gem_context *ctx,
 	context_close(ctx);
 }
 
+static struct i915_hw_ppgtt *
+__set_ppgtt(struct i915_gem_context *ctx, struct i915_hw_ppgtt *ppgtt)
+{
+	struct i915_hw_ppgtt *old = ctx->ppgtt;
+
+	i915_ppgtt_open(ppgtt);
+	ctx->ppgtt = i915_ppgtt_get(ppgtt);
+
+	ctx->desc_template = default_desc_template(ctx->i915, ppgtt);
+
+	return old;
+}
+
+static void __assign_ppgtt(struct i915_gem_context *ctx,
+			   struct i915_hw_ppgtt *ppgtt)
+{
+	if (ppgtt == ctx->ppgtt)
+		return;
+
+	ppgtt = __set_ppgtt(ctx, ppgtt);
+	if (ppgtt) {
+		i915_ppgtt_close(ppgtt);
+		i915_ppgtt_put(ppgtt);
+	}
+}
+
 static struct i915_gem_context *
 i915_gem_create_context(struct drm_i915_private *dev_priv,
 			struct drm_i915_file_private *file_priv)
@@ -475,8 +503,8 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
 			return ERR_CAST(ppgtt);
 		}
 
-		ctx->ppgtt = ppgtt;
-		ctx->desc_template = default_desc_template(dev_priv, ppgtt);
+		__assign_ppgtt(ctx, ppgtt);
+		i915_ppgtt_put(ppgtt);
 	}
 
 	trace_i915_context_create(ctx);
@@ -657,19 +685,29 @@ static int context_idr_cleanup(int id, void *p, void *data)
 	return 0;
 }
 
+static int vm_idr_cleanup(int id, void *p, void *data)
+{
+	i915_ppgtt_put(p);
+	return 0;
+}
+
 int i915_gem_context_open(struct drm_i915_private *i915,
 			  struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct i915_gem_context *ctx;
 
+	mutex_init(&file_priv->vm_lock);
+
 	idr_init(&file_priv->context_idr);
+	idr_init_base(&file_priv->vm_idr, 1);
 
 	mutex_lock(&i915->drm.struct_mutex);
 	ctx = i915_gem_create_context(i915, file_priv);
 	mutex_unlock(&i915->drm.struct_mutex);
 	if (IS_ERR(ctx)) {
 		idr_destroy(&file_priv->context_idr);
+		idr_destroy(&file_priv->vm_idr);
 		return PTR_ERR(ctx);
 	}
 
@@ -686,6 +724,89 @@ void i915_gem_context_close(struct drm_file *file)
 
 	idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
 	idr_destroy(&file_priv->context_idr);
+
+	idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL);
+	idr_destroy(&file_priv->vm_idr);
+
+	mutex_destroy(&file_priv->vm_lock);
+}
+
+int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_vm_control *args = data;
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct i915_hw_ppgtt *ppgtt;
+	int err;
+
+	if (!HAS_FULL_PPGTT(i915))
+		return -ENODEV;
+
+	if (args->flags)
+		return -EINVAL;
+
+	if (args->extensions)
+		return -EINVAL;
+
+	ppgtt = i915_ppgtt_create(i915, file_priv);
+	if (IS_ERR(ppgtt))
+		return PTR_ERR(ppgtt);
+
+	err = mutex_lock_interruptible(&file_priv->vm_lock);
+	if (err)
+		goto err_put;
+
+	err = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL);
+	mutex_unlock(&file_priv->vm_lock);
+	if (err < 0)
+		goto err_put;
+
+	GEM_BUG_ON(err == 0); /* reserved for default/unassigned ppgtt */
+	ppgtt->user_handle = err;
+	args->id = err;
+	return 0;
+
+err_put:
+	i915_ppgtt_put(ppgtt);
+	return err;
+}
+
+int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_gem_vm_control *args = data;
+	struct i915_hw_ppgtt *ppgtt;
+	int err;
+	u32 id;
+
+	if (args->flags)
+		return -EINVAL;
+
+	if (args->extensions)
+		return -EINVAL;
+
+	id = args->id;
+	if (!id)
+		return -ENOENT;
+
+	err = mutex_lock_interruptible(&file_priv->vm_lock);
+	if (err)
+		return err;
+
+	ppgtt = idr_remove(&file_priv->vm_idr, id);
+	if (ppgtt) {
+		GEM_BUG_ON(!ppgtt->user_handle);
+		ppgtt->user_handle = 0;
+	}
+
+	mutex_unlock(&file_priv->vm_lock);
+	if (!ppgtt)
+		return -ENOENT;
+
+	i915_ppgtt_put(ppgtt);
+	return 0;
 }
 
 static struct i915_request *
@@ -833,6 +954,120 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
 	return 0;
 }
 
+static int get_ppgtt(struct i915_gem_context *ctx,
+		     struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_file_private *file_priv = ctx->file_priv;
+	struct i915_hw_ppgtt *ppgtt;
+	int ret;
+
+	if (!ctx->ppgtt)
+		return -ENODEV;
+
+	/* XXX rcu acquire? */
+	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (ret)
+		return ret;
+
+	ppgtt = i915_ppgtt_get(ctx->ppgtt);
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+	ret = mutex_lock_interruptible(&file_priv->vm_lock);
+	if (ret)
+		goto err_put;
+
+	if (!ppgtt->user_handle) {
+		ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL);
+		GEM_BUG_ON(!ret);
+		if (ret < 0)
+			goto err_unlock;
+
+		ppgtt->user_handle = ret;
+		i915_ppgtt_get(ppgtt);
+	}
+
+	args->size = 0;
+	args->value = ppgtt->user_handle;
+
+	ret = 0;
+err_unlock:
+	mutex_unlock(&file_priv->vm_lock);
+err_put:
+	i915_ppgtt_put(ppgtt);
+	return ret;
+}
+
+static void set_ppgtt_barrier(void *data)
+{
+	struct i915_hw_ppgtt *old = data;
+
+	i915_ppgtt_close(old);
+	i915_ppgtt_put(old);
+}
+
+static int set_ppgtt(struct i915_gem_context *ctx,
+		     struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_file_private *file_priv = ctx->file_priv;
+	struct i915_hw_ppgtt *ppgtt, *old;
+	int err;
+
+	if (args->size)
+		return -EINVAL;
+
+	if (upper_32_bits(args->value))
+		return -EINVAL;
+
+	if (!ctx->ppgtt)
+		return -ENODEV;
+
+	err = mutex_lock_interruptible(&file_priv->vm_lock);
+	if (err)
+		return err;
+
+	ppgtt = idr_find(&file_priv->vm_idr, args->value);
+	if (ppgtt) {
+		GEM_BUG_ON(ppgtt->user_handle != args->value);
+		i915_ppgtt_get(ppgtt);
+	}
+	mutex_unlock(&file_priv->vm_lock);
+	if (!ppgtt)
+		return -ENOENT;
+
+	err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (err)
+		goto out;
+
+	if (ppgtt == ctx->ppgtt)
+		goto unlock;
+
+	/* Teardown the existing obj:vma cache, it will have to be rebuilt. */
+	lut_close(ctx);
+
+	old = __set_ppgtt(ctx, ppgtt);
+
+	/*
+	 * We need to flush any requests using the current ppgtt before
+	 * we release it as the requests do not hold a reference themselves,
+	 * only indirectly through the context.
+	 */
+	err = context_barrier_task(ctx, ALL_ENGINES, set_ppgtt_barrier, old);
+	if (err) {
+		ctx->ppgtt = old;
+		ctx->desc_template = default_desc_template(ctx->i915, old);
+
+		i915_ppgtt_close(ppgtt);
+		i915_ppgtt_put(ppgtt);
+	}
+
+unlock:
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+out:
+	i915_ppgtt_put(ppgtt);
+	return err;
+}
+
 static bool client_is_banned(struct drm_i915_file_private *file_priv)
 {
 	return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
@@ -1011,6 +1246,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_CONTEXT_PARAM_SSEU:
 		ret = get_sseu(ctx, args);
 		break;
+	case I915_CONTEXT_PARAM_VM:
+		ret = get_ppgtt(ctx, args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -1308,9 +1546,6 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		return -ENOENT;
 
 	switch (args->param) {
-	case I915_CONTEXT_PARAM_BAN_PERIOD:
-		ret = -EINVAL;
-		break;
 	case I915_CONTEXT_PARAM_NO_ZEROMAP:
 		if (args->size)
 			ret = -EINVAL;
@@ -1366,9 +1601,16 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 					I915_USER_PRIORITY(priority);
 		}
 		break;
+
 	case I915_CONTEXT_PARAM_SSEU:
 		ret = set_sseu(ctx, args);
 		break;
+
+	case I915_CONTEXT_PARAM_VM:
+		ret = set_ppgtt(ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 124c2a082b99..92c2eecae12e 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -384,6 +384,11 @@ void i915_gem_context_release(struct kref *ctx_ref);
 struct i915_gem_context *
 i915_gem_context_create_gvt(struct drm_device *dev);
 
+int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file);
+int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file);
 int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index dac08d9c3fab..d717952cc430 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2099,10 +2099,21 @@ i915_ppgtt_create(struct drm_i915_private *i915,
 	return ppgtt;
 }
 
-void i915_ppgtt_close(struct i915_address_space *vm)
+void i915_ppgtt_open(struct i915_hw_ppgtt *ppgtt)
 {
-	GEM_BUG_ON(vm->closed);
-	vm->closed = true;
+	GEM_BUG_ON(ppgtt->vm.closed);
+
+	ppgtt->open_count++;
+}
+
+void i915_ppgtt_close(struct i915_hw_ppgtt *ppgtt)
+{
+	GEM_BUG_ON(!ppgtt->open_count);
+	if (--ppgtt->open_count)
+		return;
+
+	GEM_BUG_ON(ppgtt->vm.closed);
+	ppgtt->vm.closed = true;
 }
 
 static void ppgtt_destroy_vma(struct i915_address_space *vm)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index a47e11e6fc1b..25d5f7682bda 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -391,11 +391,15 @@ struct i915_hw_ppgtt {
 	struct kref ref;
 
 	unsigned long pd_dirty_engines;
+	unsigned int open_count;
+
 	union {
 		struct i915_pml4 pml4;		/* GEN8+ & 48b PPGTT */
 		struct i915_page_directory_pointer pdp;	/* GEN8+ */
 		struct i915_page_directory pd;		/* GEN6-7 */
 	};
+
+	u32 user_handle;
 };
 
 struct gen6_hw_ppgtt {
@@ -606,12 +610,16 @@ int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv);
 void i915_ppgtt_release(struct kref *kref);
 struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv,
 					struct drm_i915_file_private *fpriv);
-void i915_ppgtt_close(struct i915_address_space *vm);
-static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
+
+void i915_ppgtt_open(struct i915_hw_ppgtt *ppgtt);
+void i915_ppgtt_close(struct i915_hw_ppgtt *ppgtt);
+
+static inline struct i915_hw_ppgtt *i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
 {
-	if (ppgtt)
-		kref_get(&ppgtt->ref);
+	kref_get(&ppgtt->ref);
+	return ppgtt;
 }
+
 static inline void i915_ppgtt_put(struct i915_hw_ppgtt *ppgtt)
 {
 	if (ppgtt)
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index 1e66cff985f8..0b7740dc18cb 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -1734,7 +1734,6 @@ int i915_gem_huge_page_mock_selftests(void)
 	err = i915_subtests(tests, ppgtt);
 
 out_close:
-	i915_ppgtt_close(&ppgtt->vm);
 	i915_ppgtt_put(ppgtt);
 
 out_unlock:
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 94da39fb7f2f..1c6ce9e0195a 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -372,7 +372,8 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
 	return 0;
 }
 
-static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
+static noinline int cpu_check(struct drm_i915_gem_object *obj,
+			      unsigned int idx, unsigned int max)
 {
 	unsigned int n, m, needs_flush;
 	int err;
@@ -390,8 +391,10 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
 
 		for (m = 0; m < max; m++) {
 			if (map[m] != m) {
-				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
-				       n, m, map[m], m);
+				pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n",
+				       __builtin_return_address(0), idx,
+				       n, real_page_count(obj), m, max,
+				       map[m], m);
 				err = -EINVAL;
 				goto out_unmap;
 			}
@@ -399,8 +402,9 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
 
 		for (; m < DW_PER_PAGE; m++) {
 			if (map[m] != STACK_MAGIC) {
-				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
-				       n, m, map[m], STACK_MAGIC);
+				pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n",
+				       __builtin_return_address(0), idx, n, m,
+				       map[m], STACK_MAGIC);
 				err = -EINVAL;
 				goto out_unmap;
 			}
@@ -478,12 +482,8 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj)
 static int igt_ctx_exec(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj = NULL;
-	unsigned long ncontexts, ndwords, dw;
-	struct igt_live_test t;
-	struct drm_file *file;
-	IGT_TIMEOUT(end_time);
-	LIST_HEAD(objects);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	int err = -ENODEV;
 
 	/*
@@ -495,41 +495,166 @@ static int igt_ctx_exec(void *arg)
 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
 		return 0;
 
+	for_each_engine(engine, i915, id) {
+		struct drm_i915_gem_object *obj = NULL;
+		unsigned long ncontexts, ndwords, dw;
+		struct igt_live_test t;
+		struct drm_file *file;
+		IGT_TIMEOUT(end_time);
+		LIST_HEAD(objects);
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		if (!engine->context_size)
+			continue; /* No logical context support in HW */
+
+		file = mock_file(i915);
+		if (IS_ERR(file))
+			return PTR_ERR(file);
+
+		mutex_lock(&i915->drm.struct_mutex);
+
+		err = igt_live_test_begin(&t, i915, __func__, engine->name);
+		if (err)
+			goto out_unlock;
+
+		ncontexts = 0;
+		ndwords = 0;
+		dw = 0;
+		while (!time_after(jiffies, end_time)) {
+			struct i915_gem_context *ctx;
+			intel_wakeref_t wakeref;
+
+			ctx = i915_gem_create_context(i915, file->driver_priv);
+			if (IS_ERR(ctx)) {
+				err = PTR_ERR(ctx);
+				goto out_unlock;
+			}
+
+			if (!obj) {
+				obj = create_test_object(ctx, file, &objects);
+				if (IS_ERR(obj)) {
+					err = PTR_ERR(obj);
+					goto out_unlock;
+				}
+			}
+
+			with_intel_runtime_pm(i915, wakeref)
+				err = gpu_fill(obj, ctx, engine, dw);
+			if (err) {
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				       ndwords, dw, max_dwords(obj),
+				       engine->name, ctx->hw_id,
+				       yesno(!!ctx->ppgtt), err);
+				goto out_unlock;
+			}
+
+			if (++dw == max_dwords(obj)) {
+				obj = NULL;
+				dw = 0;
+			}
+
+			ndwords++;
+			ncontexts++;
+		}
+
+		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
+			ncontexts, engine->name, ndwords);
+
+		ncontexts = dw = 0;
+		list_for_each_entry(obj, &objects, st_link) {
+			unsigned int rem =
+				min_t(unsigned int, ndwords - dw, max_dwords(obj));
+
+			err = cpu_check(obj, ncontexts++, rem);
+			if (err)
+				break;
+
+			dw += rem;
+		}
+
+out_unlock:
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		mutex_unlock(&i915->drm.struct_mutex);
+
+		mock_file_free(i915, file);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int igt_shared_ctx_exec(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct i915_gem_context *parent;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct igt_live_test t;
+	struct drm_file *file;
+	int err = 0;
+
+	/*
+	 * Create a few different contexts with the same mm and write
+	 * through each ctx using the GPU making sure those writes end
+	 * up in the expected pages of our obj.
+	 */
+	if (!DRIVER_CAPS(i915)->has_logical_contexts)
+		return 0;
+
 	file = mock_file(i915);
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
 	mutex_lock(&i915->drm.struct_mutex);
 
+	parent = i915_gem_create_context(i915, file->driver_priv);
+	if (IS_ERR(parent)) {
+		err = PTR_ERR(parent);
+		goto out_unlock;
+	}
+
+	if (!parent->ppgtt) {
+		err = -ENODEV;
+		goto out_unlock;
+	}
+
 	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
 		goto out_unlock;
 
-	ncontexts = 0;
-	ndwords = 0;
-	dw = 0;
-	while (!time_after(jiffies, end_time)) {
-		struct intel_engine_cs *engine;
-		struct i915_gem_context *ctx;
-		unsigned int id;
+	for_each_engine(engine, i915, id) {
+		unsigned long ncontexts, ndwords, dw;
+		struct drm_i915_gem_object *obj = NULL;
+		struct i915_gem_context *ctx = NULL;
+		IGT_TIMEOUT(end_time);
+		LIST_HEAD(objects);
 
-		ctx = i915_gem_create_context(i915, file->driver_priv);
-		if (IS_ERR(ctx)) {
-			err = PTR_ERR(ctx);
-			goto out_unlock;
-		}
+		if (!intel_engine_can_store_dword(engine))
+			continue;
 
-		for_each_engine(engine, i915, id) {
+		dw = 0;
+		ndwords = 0;
+		ncontexts = 0;
+		while (!time_after(jiffies, end_time)) {
 			intel_wakeref_t wakeref;
 
-			if (!engine->context_size)
-				continue; /* No logical context support in HW */
+			if (ctx)
+				__destroy_hw_context(ctx, file->driver_priv);
 
-			if (!intel_engine_can_store_dword(engine))
-				continue;
+			ctx = i915_gem_create_context(i915, file->driver_priv);
+			if (IS_ERR(ctx)) {
+				err = PTR_ERR(ctx);
+				goto out_unlock;
+			}
+
+			__assign_ppgtt(ctx, parent->ppgtt);
 
 			if (!obj) {
-				obj = create_test_object(ctx, file, &objects);
+				obj = create_test_object(parent, file, &objects);
 				if (IS_ERR(obj)) {
 					err = PTR_ERR(obj);
 					goto out_unlock;
@@ -551,25 +676,25 @@ static int igt_ctx_exec(void *arg)
 				obj = NULL;
 				dw = 0;
 			}
+
 			ndwords++;
+			ncontexts++;
 		}
-		ncontexts++;
-	}
-	pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n",
-		ncontexts, RUNTIME_INFO(i915)->num_engines, ndwords);
+		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
+			ncontexts, engine->name, ndwords);
 
-	dw = 0;
-	list_for_each_entry(obj, &objects, st_link) {
-		unsigned int rem =
-			min_t(unsigned int, ndwords - dw, max_dwords(obj));
+		ncontexts = dw = 0;
+		list_for_each_entry(obj, &objects, st_link) {
+			unsigned int rem =
+				min_t(unsigned int, ndwords - dw, max_dwords(obj));
 
-		err = cpu_check(obj, rem);
-		if (err)
-			break;
+			err = cpu_check(obj, ncontexts++, rem);
+			if (err)
+				goto out_unlock;
 
-		dw += rem;
+			dw += rem;
+		}
 	}
-
 out_unlock:
 	if (igt_live_test_end(&t))
 		err = -EIO;
@@ -1048,7 +1173,7 @@ static int igt_ctx_readonly(void *arg)
 	struct drm_i915_gem_object *obj = NULL;
 	struct i915_gem_context *ctx;
 	struct i915_hw_ppgtt *ppgtt;
-	unsigned long ndwords, dw;
+	unsigned long idx, ndwords, dw;
 	struct igt_live_test t;
 	struct drm_file *file;
 	I915_RND_STATE(prng);
@@ -1129,6 +1254,7 @@ static int igt_ctx_readonly(void *arg)
 		ndwords, RUNTIME_INFO(i915)->num_engines);
 
 	dw = 0;
+	idx = 0;
 	list_for_each_entry(obj, &objects, st_link) {
 		unsigned int rem =
 			min_t(unsigned int, ndwords - dw, max_dwords(obj));
@@ -1138,7 +1264,7 @@ static int igt_ctx_readonly(void *arg)
 		if (i915_gem_object_is_readonly(obj))
 			num_writes = 0;
 
-		err = cpu_check(obj, num_writes);
+		err = cpu_check(obj, idx++, num_writes);
 		if (err)
 			break;
 
@@ -1725,6 +1851,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
 		SUBTEST(igt_ctx_exec),
 		SUBTEST(igt_ctx_readonly),
 		SUBTEST(igt_ctx_sseu),
+		SUBTEST(igt_shared_ctx_exec),
 		SUBTEST(igt_vm_isolation),
 	};
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 826fd51c331e..57b3d9867070 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1020,7 +1020,6 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
 
 	err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time);
 
-	i915_ppgtt_close(&ppgtt->vm);
 	i915_ppgtt_put(ppgtt);
 out_unlock:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index 353b37b9f78e..5eddf9fcfe8a 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -55,13 +55,17 @@ mock_context(struct drm_i915_private *i915,
 		goto err_handles;
 
 	if (name) {
+		struct i915_hw_ppgtt *ppgtt;
+
 		ctx->name = kstrdup(name, GFP_KERNEL);
 		if (!ctx->name)
 			goto err_put;
 
-		ctx->ppgtt = mock_ppgtt(i915, name);
-		if (!ctx->ppgtt)
+		ppgtt = mock_ppgtt(i915, name);
+		if (!ppgtt)
 			goto err_put;
+
+		__set_ppgtt(ctx, ppgtt);
 	}
 
 	return ctx;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 39835793722b..6575470755d0 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -341,6 +341,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_PERF_ADD_CONFIG	0x37
 #define DRM_I915_PERF_REMOVE_CONFIG	0x38
 #define DRM_I915_QUERY			0x39
+#define DRM_I915_GEM_VM_CREATE		0x3a
+#define DRM_I915_GEM_VM_DESTROY		0x3b
 /* Must be kept compact -- no holes */
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
@@ -400,6 +402,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_PERF_ADD_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
 #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
 #define DRM_IOCTL_I915_QUERY			DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
+#define DRM_IOCTL_I915_GEM_VM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)
+#define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1451,6 +1455,26 @@ struct drm_i915_gem_context_destroy {
 	__u32 pad;
 };
 
+/*
+ * DRM_I915_GEM_VM_CREATE -
+ *
+ * Create a new virtual memory address space (ppGTT) for use within a context
+ * on the same file. Extensions can be provided to configure exactly how the
+ * address space is setup upon creation.
+ *
+ * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is
+ * returned.
+ *
+ * DRM_I915_GEM_VM_DESTROY -
+ *
+ * Destroys a previously created VM id.
+ */
+struct drm_i915_gem_vm_control {
+	__u64 extensions;
+	__u32 flags;
+	__u32 id;
+};
+
 struct drm_i915_reg_read {
 	/*
 	 * Register offset.
@@ -1540,7 +1564,19 @@ struct drm_i915_gem_context_param {
  * On creation, all new contexts are marked as recoverable.
  */
 #define I915_CONTEXT_PARAM_RECOVERABLE	0x8
+
+	/*
+	 * The id of the associated virtual memory address space (ppGTT) of
+	 * this context. Can be retrieved and passed to another context
+	 * (on the same fd) for both to use the same ppGTT and so share
+	 * address layouts, and avoid reloading the page tables on context
+	 * switches between themselves.
+	 *
+	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
+	 */
+#define I915_CONTEXT_PARAM_VM		0x9
 /* Must be kept compact -- no holes and well documented */
+
 	__u64 value;
 };
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 12/43] drm/i915: Extend CONTEXT_CREATE to set parameters upon construction
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (10 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 11/43] drm/i915: Create/destroy VM (ppGTT) for use with contexts Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-06 14:24 ` [PATCH 13/43] drm/i915: Allow contexts to share a single timeline across all engines Chris Wilson
                   ` (33 subsequent siblings)
  45 siblings, 0 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

It can be useful to have a single ioctl to create a context with all
the initial parameters instead of a series of create + setparam + setparam
ioctls. This extension to create context allows any of the parameters
to be passed in as a linked list to be applied to the newly constructed
context.

v2: Make a local copy of user setparam (Tvrtko)
v3: Use flags to detect availability of extension interface

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c         |   2 +-
 drivers/gpu/drm/i915/i915_gem_context.c | 447 +++++++++++++-----------
 include/uapi/drm/i915_drm.h             | 166 +++++----
 3 files changed, 339 insertions(+), 276 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 14d9b1dd5ccd..a54030d60f84 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -2997,7 +2997,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER),
 	DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER),
 	DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE_EXT, i915_gem_context_create_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_gem_context_reset_stats_ioctl, DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 5e580e79163e..bc042b0e0082 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -90,6 +90,7 @@
 #include "i915_drv.h"
 #include "i915_globals.h"
 #include "i915_trace.h"
+#include "i915_user_extensions.h"
 #include "intel_lrc_reg.h"
 #include "intel_workarounds.h"
 
@@ -1068,196 +1069,6 @@ static int set_ppgtt(struct i915_gem_context *ctx,
 	return err;
 }
 
-static bool client_is_banned(struct drm_i915_file_private *file_priv)
-{
-	return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
-}
-
-int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
-				  struct drm_file *file)
-{
-	struct drm_i915_private *i915 = to_i915(dev);
-	struct drm_i915_gem_context_create *args = data;
-	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct i915_gem_context *ctx;
-	int ret;
-
-	if (!DRIVER_CAPS(i915)->has_logical_contexts)
-		return -ENODEV;
-
-	if (args->pad != 0)
-		return -EINVAL;
-
-	ret = i915_terminally_wedged(i915);
-	if (ret)
-		return ret;
-
-	if (client_is_banned(file_priv)) {
-		DRM_DEBUG("client %s[%d] banned from creating ctx\n",
-			  current->comm,
-			  pid_nr(get_task_pid(current, PIDTYPE_PID)));
-
-		return -EIO;
-	}
-
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
-
-	ctx = i915_gem_create_context(i915, file_priv);
-	mutex_unlock(&dev->struct_mutex);
-	if (IS_ERR(ctx))
-		return PTR_ERR(ctx);
-
-	GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
-
-	args->ctx_id = ctx->user_handle;
-	DRM_DEBUG("HW context %d created\n", args->ctx_id);
-
-	return 0;
-}
-
-int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
-				   struct drm_file *file)
-{
-	struct drm_i915_gem_context_destroy *args = data;
-	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct i915_gem_context *ctx;
-	int ret;
-
-	if (args->pad != 0)
-		return -EINVAL;
-
-	if (args->ctx_id == DEFAULT_CONTEXT_HANDLE)
-		return -ENOENT;
-
-	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
-	if (!ctx)
-		return -ENOENT;
-
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		goto out;
-
-	__destroy_hw_context(ctx, file_priv);
-	mutex_unlock(&dev->struct_mutex);
-
-out:
-	i915_gem_context_put(ctx);
-	return 0;
-}
-
-static int get_sseu(struct i915_gem_context *ctx,
-		    struct drm_i915_gem_context_param *args)
-{
-	struct drm_i915_gem_context_param_sseu user_sseu;
-	struct intel_engine_cs *engine;
-	struct intel_context *ce;
-	int ret;
-
-	if (args->size == 0)
-		goto out;
-	else if (args->size < sizeof(user_sseu))
-		return -EINVAL;
-
-	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
-			   sizeof(user_sseu)))
-		return -EFAULT;
-
-	if (user_sseu.flags || user_sseu.rsvd)
-		return -EINVAL;
-
-	engine = intel_engine_lookup_user(ctx->i915,
-					  user_sseu.engine_class,
-					  user_sseu.engine_instance);
-	if (!engine)
-		return -EINVAL;
-
-	/* Only use for mutex here is to serialize get_param and set_param. */
-	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
-	ce = to_intel_context(ctx, engine);
-
-	user_sseu.slice_mask = ce->sseu.slice_mask;
-	user_sseu.subslice_mask = ce->sseu.subslice_mask;
-	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
-	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
-
-	mutex_unlock(&ctx->i915->drm.struct_mutex);
-
-	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
-			 sizeof(user_sseu)))
-		return -EFAULT;
-
-out:
-	args->size = sizeof(user_sseu);
-
-	return 0;
-}
-
-int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
-				    struct drm_file *file)
-{
-	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct drm_i915_gem_context_param *args = data;
-	struct i915_gem_context *ctx;
-	int ret = 0;
-
-	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
-	if (!ctx)
-		return -ENOENT;
-
-	switch (args->param) {
-	case I915_CONTEXT_PARAM_BAN_PERIOD:
-		ret = -EINVAL;
-		break;
-	case I915_CONTEXT_PARAM_NO_ZEROMAP:
-		args->size = 0;
-		args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
-		break;
-	case I915_CONTEXT_PARAM_GTT_SIZE:
-		args->size = 0;
-
-		if (ctx->ppgtt)
-			args->value = ctx->ppgtt->vm.total;
-		else if (to_i915(dev)->mm.aliasing_ppgtt)
-			args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total;
-		else
-			args->value = to_i915(dev)->ggtt.vm.total;
-		break;
-	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
-		args->size = 0;
-		args->value = i915_gem_context_no_error_capture(ctx);
-		break;
-	case I915_CONTEXT_PARAM_BANNABLE:
-		args->size = 0;
-		args->value = i915_gem_context_is_bannable(ctx);
-		break;
-	case I915_CONTEXT_PARAM_RECOVERABLE:
-		args->size = 0;
-		args->value = i915_gem_context_is_recoverable(ctx);
-		break;
-	case I915_CONTEXT_PARAM_PRIORITY:
-		args->size = 0;
-		args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
-		break;
-	case I915_CONTEXT_PARAM_SSEU:
-		ret = get_sseu(ctx, args);
-		break;
-	case I915_CONTEXT_PARAM_VM:
-		ret = get_ppgtt(ctx, args);
-		break;
-	default:
-		ret = -EINVAL;
-		break;
-	}
-
-	i915_gem_context_put(ctx);
-	return ret;
-}
-
 static int gen8_emit_rpcs_config(struct i915_request *rq,
 				 struct intel_context *ce,
 				 struct intel_sseu sseu)
@@ -1533,18 +1344,11 @@ static int set_sseu(struct i915_gem_context *ctx,
 	return 0;
 }
 
-int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
-				    struct drm_file *file)
+static int ctx_setparam(struct i915_gem_context *ctx,
+			struct drm_i915_gem_context_param *args)
 {
-	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct drm_i915_gem_context_param *args = data;
-	struct i915_gem_context *ctx;
 	int ret = 0;
 
-	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
-	if (!ctx)
-		return -ENOENT;
-
 	switch (args->param) {
 	case I915_CONTEXT_PARAM_NO_ZEROMAP:
 		if (args->size)
@@ -1554,6 +1358,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		else
 			clear_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
 		break;
+
 	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
 		if (args->size)
 			ret = -EINVAL;
@@ -1562,6 +1367,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		else
 			i915_gem_context_clear_no_error_capture(ctx);
 		break;
+
 	case I915_CONTEXT_PARAM_BANNABLE:
 		if (args->size)
 			ret = -EINVAL;
@@ -1588,7 +1394,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 
 			if (args->size)
 				ret = -EINVAL;
-			else if (!(to_i915(dev)->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
+			else if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
 				ret = -ENODEV;
 			else if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
 				 priority < I915_CONTEXT_MIN_USER_PRIORITY)
@@ -1616,6 +1422,247 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		break;
 	}
 
+	return ret;
+}
+
+static int create_setparam(struct i915_user_extension __user *ext, void *data)
+{
+	struct drm_i915_gem_context_create_ext_setparam local;
+
+	if (copy_from_user(&local, ext, sizeof(local)))
+		return -EFAULT;
+
+	if (local.setparam.ctx_id)
+		return -EINVAL;
+
+	return ctx_setparam(data, &local.setparam);
+}
+
+static const i915_user_extension_fn create_extensions[] = {
+	[I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam,
+};
+
+static bool client_is_banned(struct drm_i915_file_private *file_priv)
+{
+	return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
+}
+
+int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_context_create_ext *args = data;
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct i915_gem_context *ctx;
+	int ret;
+
+	if (!DRIVER_CAPS(i915)->has_logical_contexts)
+		return -ENODEV;
+
+	if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN)
+		return -EINVAL;
+
+	ret = i915_terminally_wedged(i915);
+	if (ret)
+		return ret;
+
+	if (client_is_banned(file_priv)) {
+		DRM_DEBUG("client %s[%d] banned from creating ctx\n",
+			  current->comm,
+			  pid_nr(get_task_pid(current, PIDTYPE_PID)));
+
+		return -EIO;
+	}
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		return ret;
+
+	ctx = i915_gem_create_context(i915, file_priv);
+	mutex_unlock(&dev->struct_mutex);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
+
+	if (args->flags & I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS) {
+		ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
+					   create_extensions,
+					   ARRAY_SIZE(create_extensions),
+					   ctx);
+		if (ret) {
+			idr_remove(&file_priv->context_idr, ctx->user_handle);
+			context_close(ctx);
+			return ret;
+		}
+	}
+
+	args->ctx_id = ctx->user_handle;
+	DRM_DEBUG("HW context %d created\n", args->ctx_id);
+
+	return 0;
+}
+
+int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
+				   struct drm_file *file)
+{
+	struct drm_i915_gem_context_destroy *args = data;
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct i915_gem_context *ctx;
+	int ret;
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	if (args->ctx_id == DEFAULT_CONTEXT_HANDLE)
+		return -ENOENT;
+
+	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
+	if (!ctx)
+		return -ENOENT;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		goto out;
+
+	__destroy_hw_context(ctx, file_priv);
+	mutex_unlock(&dev->struct_mutex);
+
+out:
+	i915_gem_context_put(ctx);
+	return 0;
+}
+
+static int get_sseu(struct i915_gem_context *ctx,
+		    struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_gem_context_param_sseu user_sseu;
+	struct intel_engine_cs *engine;
+	struct intel_context *ce;
+	int ret;
+
+	if (args->size == 0)
+		goto out;
+	else if (args->size < sizeof(user_sseu))
+		return -EINVAL;
+
+	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+			   sizeof(user_sseu)))
+		return -EFAULT;
+
+	if (user_sseu.flags || user_sseu.rsvd)
+		return -EINVAL;
+
+	engine = intel_engine_lookup_user(ctx->i915,
+					  user_sseu.engine_class,
+					  user_sseu.engine_instance);
+	if (!engine)
+		return -EINVAL;
+
+	/* Only use for mutex here is to serialize get_param and set_param. */
+	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (ret)
+		return ret;
+
+	ce = to_intel_context(ctx, engine);
+
+	user_sseu.slice_mask = ce->sseu.slice_mask;
+	user_sseu.subslice_mask = ce->sseu.subslice_mask;
+	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
+	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
+
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
+			 sizeof(user_sseu)))
+		return -EFAULT;
+
+out:
+	args->size = sizeof(user_sseu);
+
+	return 0;
+}
+
+int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_gem_context_param *args = data;
+	struct i915_gem_context *ctx;
+	int ret = 0;
+
+	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
+	if (!ctx)
+		return -ENOENT;
+
+	switch (args->param) {
+	case I915_CONTEXT_PARAM_NO_ZEROMAP:
+		args->size = 0;
+		args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
+		break;
+
+	case I915_CONTEXT_PARAM_GTT_SIZE:
+		args->size = 0;
+		if (ctx->ppgtt)
+			args->value = ctx->ppgtt->vm.total;
+		else if (to_i915(dev)->mm.aliasing_ppgtt)
+			args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total;
+		else
+			args->value = to_i915(dev)->ggtt.vm.total;
+		break;
+
+	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+		args->size = 0;
+		args->value = i915_gem_context_no_error_capture(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_BANNABLE:
+		args->size = 0;
+		args->value = i915_gem_context_is_bannable(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_RECOVERABLE:
+		args->size = 0;
+		args->value = i915_gem_context_is_recoverable(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_PRIORITY:
+		args->size = 0;
+		args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
+		break;
+
+	case I915_CONTEXT_PARAM_SSEU:
+		ret = get_sseu(ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_VM:
+		ret = get_ppgtt(ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_BAN_PERIOD:
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	i915_gem_context_put(ctx);
+	return ret;
+}
+
+int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_gem_context_param *args = data;
+	struct i915_gem_context *ctx;
+	int ret;
+
+	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
+	if (!ctx)
+		return -ENOENT;
+
+	ret = ctx_setparam(ctx, args);
+
 	i915_gem_context_put(ctx);
 	return ret;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 6575470755d0..0db92a4153c8 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -392,6 +392,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey)
 #define DRM_IOCTL_I915_GEM_WAIT		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT, struct drm_i915_gem_wait)
 #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
+#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_ext)
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
 #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
@@ -1445,85 +1446,17 @@ struct drm_i915_gem_wait {
 };
 
 struct drm_i915_gem_context_create {
-	/*  output: id of new context*/
-	__u32 ctx_id;
-	__u32 pad;
-};
-
-struct drm_i915_gem_context_destroy {
-	__u32 ctx_id;
+	__u32 ctx_id; /* output: id of new context*/
 	__u32 pad;
 };
 
-/*
- * DRM_I915_GEM_VM_CREATE -
- *
- * Create a new virtual memory address space (ppGTT) for use within a context
- * on the same file. Extensions can be provided to configure exactly how the
- * address space is setup upon creation.
- *
- * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is
- * returned.
- *
- * DRM_I915_GEM_VM_DESTROY -
- *
- * Destroys a previously created VM id.
- */
-struct drm_i915_gem_vm_control {
-	__u64 extensions;
+struct drm_i915_gem_context_create_ext {
+	__u32 ctx_id; /* output: id of new context*/
 	__u32 flags;
-	__u32 id;
-};
-
-struct drm_i915_reg_read {
-	/*
-	 * Register offset.
-	 * For 64bit wide registers where the upper 32bits don't immediately
-	 * follow the lower 32bits, the offset of the lower 32bits must
-	 * be specified
-	 */
-	__u64 offset;
-#define I915_REG_READ_8B_WA (1ul << 0)
-
-	__u64 val; /* Return value */
-};
-/* Known registers:
- *
- * Render engine timestamp - 0x2358 + 64bit - gen7+
- * - Note this register returns an invalid value if using the default
- *   single instruction 8byte read, in order to workaround that pass
- *   flag I915_REG_READ_8B_WA in offset field.
- *
- */
-
-struct drm_i915_reset_stats {
-	__u32 ctx_id;
-	__u32 flags;
-
-	/* All resets since boot/module reload, for all contexts */
-	__u32 reset_count;
-
-	/* Number of batches lost when active in GPU, for this context */
-	__u32 batch_active;
-
-	/* Number of batches lost pending for execution, for this context */
-	__u32 batch_pending;
-
-	__u32 pad;
-};
-
-struct drm_i915_gem_userptr {
-	__u64 user_ptr;
-	__u64 user_size;
-	__u32 flags;
-#define I915_USERPTR_READ_ONLY 0x1
-#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
-	/**
-	 * Returned handle for the object.
-	 *
-	 * Object handles are nonzero.
-	 */
-	__u32 handle;
+#define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS	(1u << 0)
+#define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \
+	(-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1))
+	__u64 extensions;
 };
 
 struct drm_i915_gem_context_param {
@@ -1639,6 +1572,89 @@ struct drm_i915_gem_context_param_sseu {
 	__u32 rsvd;
 };
 
+struct drm_i915_gem_context_create_ext_setparam {
+#define I915_CONTEXT_CREATE_EXT_SETPARAM 0
+	struct i915_user_extension base;
+	struct drm_i915_gem_context_param setparam;
+};
+
+struct drm_i915_gem_context_destroy {
+	__u32 ctx_id;
+	__u32 pad;
+};
+
+/*
+ * DRM_I915_GEM_VM_CREATE -
+ *
+ * Create a new virtual memory address space (ppGTT) for use within a context
+ * on the same file. Extensions can be provided to configure exactly how the
+ * address space is setup upon creation.
+ *
+ * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is
+ * returned.
+ *
+ * DRM_I915_GEM_VM_DESTROY -
+ *
+ * Destroys a previously created VM id.
+ */
+struct drm_i915_gem_vm_control {
+	__u64 extensions;
+	__u32 flags;
+	__u32 id;
+};
+
+struct drm_i915_reg_read {
+	/*
+	 * Register offset.
+	 * For 64bit wide registers where the upper 32bits don't immediately
+	 * follow the lower 32bits, the offset of the lower 32bits must
+	 * be specified
+	 */
+	__u64 offset;
+#define I915_REG_READ_8B_WA (1ul << 0)
+
+	__u64 val; /* Return value */
+};
+
+/* Known registers:
+ *
+ * Render engine timestamp - 0x2358 + 64bit - gen7+
+ * - Note this register returns an invalid value if using the default
+ *   single instruction 8byte read, in order to workaround that pass
+ *   flag I915_REG_READ_8B_WA in offset field.
+ *
+ */
+
+struct drm_i915_reset_stats {
+	__u32 ctx_id;
+	__u32 flags;
+
+	/* All resets since boot/module reload, for all contexts */
+	__u32 reset_count;
+
+	/* Number of batches lost when active in GPU, for this context */
+	__u32 batch_active;
+
+	/* Number of batches lost pending for execution, for this context */
+	__u32 batch_pending;
+
+	__u32 pad;
+};
+
+struct drm_i915_gem_userptr {
+	__u64 user_ptr;
+	__u64 user_size;
+	__u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
+
 enum drm_i915_oa_format {
 	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
 	I915_OA_FORMAT_A29,	    /* HSW only */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 13/43] drm/i915: Allow contexts to share a single timeline across all engines
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (11 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 12/43] drm/i915: Extend CONTEXT_CREATE to set parameters upon construction Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-06 14:24 ` [PATCH 14/43] drm/i915: Allow userspace to clone contexts on creation Chris Wilson
                   ` (32 subsequent siblings)
  45 siblings, 0 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Previously, our view has been always to run the engines independently
within a context. (Multiple engines happened before we had contexts and
timelines, so they always operated independently and that behaviour
persisted into contexts.) However, at the user level the context often
represents a single timeline (e.g. GL contexts) and userspace must
ensure that the individual engines are serialised to present that
ordering to the client (or forgot about this detail entirely and hope no
one notices - a fair ploy if the client can only directly control one
engine themselves ;)

In the next patch, we will want to construct a set of engines that
operate as one, that have a single timeline interwoven between them, to
present a single virtual engine to the user. (They submit to the virtual
engine, then we decide which engine to execute on based.)

To that end, we want to be able to create contexts which have a single
timeline (fence context) shared between all engines, rather than multiple
timelines.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c       | 32 ++++++++++++---
 drivers/gpu/drm/i915/i915_gem_context.h       |  3 ++
 drivers/gpu/drm/i915/i915_request.c           | 10 ++++-
 drivers/gpu/drm/i915/i915_request.h           |  5 ++-
 drivers/gpu/drm/i915/i915_sw_fence.c          | 39 ++++++++++++++++---
 drivers/gpu/drm/i915/i915_sw_fence.h          | 13 ++++++-
 drivers/gpu/drm/i915/intel_lrc.c              |  5 ++-
 .../gpu/drm/i915/selftests/i915_gem_context.c | 18 +++++----
 drivers/gpu/drm/i915/selftests/mock_context.c |  2 +-
 include/uapi/drm/i915_drm.h                   |  3 +-
 10 files changed, 103 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index bc042b0e0082..9927f1296fd6 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -241,6 +241,9 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 			ce->ops->destroy(ce);
 	}
 
+	if (ctx->timeline)
+		i915_timeline_put(ctx->timeline);
+
 	kfree(ctx->name);
 	put_pid(ctx->pid);
 
@@ -480,12 +483,17 @@ static void __assign_ppgtt(struct i915_gem_context *ctx,
 
 static struct i915_gem_context *
 i915_gem_create_context(struct drm_i915_private *dev_priv,
-			struct drm_i915_file_private *file_priv)
+			struct drm_i915_file_private *file_priv,
+			unsigned int flags)
 {
 	struct i915_gem_context *ctx;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
+	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
+	    !HAS_EXECLISTS(dev_priv))
+		return ERR_PTR(-EINVAL);
+
 	/* Reap the most stale context */
 	contexts_free_first(dev_priv);
 
@@ -508,6 +516,18 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
 		i915_ppgtt_put(ppgtt);
 	}
 
+	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
+		struct i915_timeline *timeline;
+
+		timeline = i915_timeline_create(dev_priv, ctx->name, NULL);
+		if (IS_ERR(timeline)) {
+			__destroy_hw_context(ctx, file_priv);
+			return ERR_CAST(timeline);
+		}
+
+		ctx->timeline = timeline;
+	}
+
 	trace_i915_context_create(ctx);
 
 	return ctx;
@@ -536,7 +556,7 @@ i915_gem_context_create_gvt(struct drm_device *dev)
 	if (ret)
 		return ERR_PTR(ret);
 
-	ctx = i915_gem_create_context(to_i915(dev), NULL);
+	ctx = i915_gem_create_context(to_i915(dev), NULL, 0);
 	if (IS_ERR(ctx))
 		goto out;
 
@@ -572,7 +592,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 	struct i915_gem_context *ctx;
 	int err;
 
-	ctx = i915_gem_create_context(i915, NULL);
+	ctx = i915_gem_create_context(i915, NULL, 0);
 	if (IS_ERR(ctx))
 		return ctx;
 
@@ -704,7 +724,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 	idr_init_base(&file_priv->vm_idr, 1);
 
 	mutex_lock(&i915->drm.struct_mutex);
-	ctx = i915_gem_create_context(i915, file_priv);
+	ctx = i915_gem_create_context(i915, file_priv, 0);
 	mutex_unlock(&i915->drm.struct_mutex);
 	if (IS_ERR(ctx)) {
 		idr_destroy(&file_priv->context_idr);
@@ -820,7 +840,7 @@ last_request_on_engine(struct i915_timeline *timeline,
 
 	rq = i915_active_request_raw(&timeline->last_request,
 				     &engine->i915->drm.struct_mutex);
-	if (rq && rq->engine == engine) {
+	if (rq && rq->engine->mask & engine->mask) {
 		GEM_TRACE("last request for %s on engine %s: %llx:%llu\n",
 			  timeline->name, engine->name,
 			  rq->fence.context, rq->fence.seqno);
@@ -1478,7 +1498,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		return ret;
 
-	ctx = i915_gem_create_context(i915, file_priv);
+	ctx = i915_gem_create_context(i915, file_priv, args->flags);
 	mutex_unlock(&dev->struct_mutex);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 92c2eecae12e..755b2d97c32c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -43,6 +43,7 @@ struct drm_i915_private;
 struct drm_i915_file_private;
 struct i915_hw_ppgtt;
 struct i915_request;
+struct i915_timeline;
 struct i915_vma;
 struct intel_ring;
 
@@ -78,6 +79,8 @@ struct i915_gem_context {
 	/** file_priv: owning file descriptor */
 	struct drm_i915_file_private *file_priv;
 
+	struct i915_timeline *timeline;
+
 	/**
 	 * @ppgtt: unique address space (GTT)
 	 *
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 9533a85cb0b3..18f49132a526 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1047,8 +1047,14 @@ void i915_request_add(struct i915_request *request)
 	prev = i915_active_request_raw(&timeline->last_request,
 				       &request->i915->drm.struct_mutex);
 	if (prev && !i915_request_completed(prev)) {
-		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
-					     &request->submitq);
+		if (is_power_of_2(prev->engine->mask | engine->mask))
+			i915_sw_fence_await_sw_fence(&request->submit,
+						     &prev->submit,
+						     &request->submitq);
+		else
+			__i915_sw_fence_await_dma_fence(&request->submit,
+							&prev->fence,
+							&request->dmaq);
 		if (engine->schedule)
 			__i915_sched_node_add_dependency(&request->sched,
 							 &prev->sched,
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 8c8fa5010644..cd6c130964cd 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -128,7 +128,10 @@ struct i915_request {
 	 * It is used by the driver to then queue the request for execution.
 	 */
 	struct i915_sw_fence submit;
-	wait_queue_entry_t submitq;
+	union {
+		wait_queue_entry_t submitq;
+		struct i915_sw_dma_fence_cb dmaq;
+	};
 	struct list_head execute_cb;
 
 	/*
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 8d1400d378d7..5387aafd3424 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -359,11 +359,6 @@ int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
 	return __i915_sw_fence_await_sw_fence(fence, signaler, NULL, gfp);
 }
 
-struct i915_sw_dma_fence_cb {
-	struct dma_fence_cb base;
-	struct i915_sw_fence *fence;
-};
-
 struct i915_sw_dma_fence_cb_timer {
 	struct i915_sw_dma_fence_cb base;
 	struct dma_fence *dma;
@@ -480,6 +475,40 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 	return ret;
 }
 
+static void __dma_i915_sw_fence_wake(struct dma_fence *dma,
+				     struct dma_fence_cb *data)
+{
+	struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base);
+
+	i915_sw_fence_complete(cb->fence);
+}
+
+int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+				    struct dma_fence *dma,
+				    struct i915_sw_dma_fence_cb *cb)
+{
+	int ret;
+
+	debug_fence_assert(fence);
+
+	if (dma_fence_is_signaled(dma))
+		return 0;
+
+	cb->fence = fence;
+	i915_sw_fence_await(fence);
+
+	ret = dma_fence_add_callback(dma, &cb->base, __dma_i915_sw_fence_wake);
+	if (ret == 0) {
+		ret = 1;
+	} else {
+		i915_sw_fence_complete(fence);
+		if (ret == -ENOENT) /* fence already signaled */
+			ret = 0;
+	}
+
+	return ret;
+}
+
 int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 				    struct reservation_object *resv,
 				    const struct dma_fence_ops *exclude,
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index 6dec9e1d1102..9cb5c3b307a6 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -9,14 +9,13 @@
 #ifndef _I915_SW_FENCE_H_
 #define _I915_SW_FENCE_H_
 
+#include <linux/dma-fence.h>
 #include <linux/gfp.h>
 #include <linux/kref.h>
 #include <linux/notifier.h> /* for NOTIFY_DONE */
 #include <linux/wait.h>
 
 struct completion;
-struct dma_fence;
-struct dma_fence_ops;
 struct reservation_object;
 
 struct i915_sw_fence {
@@ -68,10 +67,20 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
 				     struct i915_sw_fence *after,
 				     gfp_t gfp);
+
+struct i915_sw_dma_fence_cb {
+	struct dma_fence_cb base;
+	struct i915_sw_fence *fence;
+};
+
+int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+				    struct dma_fence *dma,
+				    struct i915_sw_dma_fence_cb *cb);
 int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 				  struct dma_fence *dma,
 				  unsigned long timeout,
 				  gfp_t gfp);
+
 int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 				    struct reservation_object *resv,
 				    const struct dma_fence_ops *exclude,
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a9a47dbeac88..0bd837b64ade 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2860,7 +2860,10 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 		goto error_deref_obj;
 	}
 
-	timeline = i915_timeline_create(ctx->i915, ctx->name, NULL);
+	if (ctx->timeline)
+		timeline = i915_timeline_get(ctx->timeline);
+	else
+		timeline = i915_timeline_create(ctx->i915, ctx->name, NULL);
 	if (IS_ERR(timeline)) {
 		ret = PTR_ERR(timeline);
 		goto error_deref_obj;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 1c6ce9e0195a..df1380498612 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -76,7 +76,7 @@ static int live_nop_switch(void *arg)
 	}
 
 	for (n = 0; n < nctx; n++) {
-		ctx[n] = i915_gem_create_context(i915, file->driver_priv);
+		ctx[n] = i915_gem_create_context(i915, file->driver_priv, 0);
 		if (IS_ERR(ctx[n])) {
 			err = PTR_ERR(ctx[n]);
 			goto out_unlock;
@@ -526,7 +526,8 @@ static int igt_ctx_exec(void *arg)
 			struct i915_gem_context *ctx;
 			intel_wakeref_t wakeref;
 
-			ctx = i915_gem_create_context(i915, file->driver_priv);
+			ctx = i915_gem_create_context(i915,
+						      file->driver_priv, 0);
 			if (IS_ERR(ctx)) {
 				err = PTR_ERR(ctx);
 				goto out_unlock;
@@ -611,7 +612,7 @@ static int igt_shared_ctx_exec(void *arg)
 
 	mutex_lock(&i915->drm.struct_mutex);
 
-	parent = i915_gem_create_context(i915, file->driver_priv);
+	parent = i915_gem_create_context(i915, file->driver_priv, 0);
 	if (IS_ERR(parent)) {
 		err = PTR_ERR(parent);
 		goto out_unlock;
@@ -645,7 +646,8 @@ static int igt_shared_ctx_exec(void *arg)
 			if (ctx)
 				__destroy_hw_context(ctx, file->driver_priv);
 
-			ctx = i915_gem_create_context(i915, file->driver_priv);
+			ctx = i915_gem_create_context(i915,
+						      file->driver_priv, 0);
 			if (IS_ERR(ctx)) {
 				err = PTR_ERR(ctx);
 				goto out_unlock;
@@ -1087,7 +1089,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 
 	mutex_lock(&i915->drm.struct_mutex);
 
-	ctx = i915_gem_create_context(i915, file->driver_priv);
+	ctx = i915_gem_create_context(i915, file->driver_priv, 0);
 	if (IS_ERR(ctx)) {
 		ret = PTR_ERR(ctx);
 		goto out_unlock;
@@ -1197,7 +1199,7 @@ static int igt_ctx_readonly(void *arg)
 	if (err)
 		goto out_unlock;
 
-	ctx = i915_gem_create_context(i915, file->driver_priv);
+	ctx = i915_gem_create_context(i915, file->driver_priv, 0);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
 		goto out_unlock;
@@ -1525,13 +1527,13 @@ static int igt_vm_isolation(void *arg)
 	if (err)
 		goto out_unlock;
 
-	ctx_a = i915_gem_create_context(i915, file->driver_priv);
+	ctx_a = i915_gem_create_context(i915, file->driver_priv, 0);
 	if (IS_ERR(ctx_a)) {
 		err = PTR_ERR(ctx_a);
 		goto out_unlock;
 	}
 
-	ctx_b = i915_gem_create_context(i915, file->driver_priv);
+	ctx_b = i915_gem_create_context(i915, file->driver_priv, 0);
 	if (IS_ERR(ctx_b)) {
 		err = PTR_ERR(ctx_b);
 		goto out_unlock;
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index 5eddf9fcfe8a..5d0ff2293abc 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -95,7 +95,7 @@ live_context(struct drm_i915_private *i915, struct drm_file *file)
 {
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
-	return i915_gem_create_context(i915, file->driver_priv);
+	return i915_gem_create_context(i915, file->driver_priv, 0);
 }
 
 struct i915_gem_context *
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 0db92a4153c8..007d77ff7295 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1454,8 +1454,9 @@ struct drm_i915_gem_context_create_ext {
 	__u32 ctx_id; /* output: id of new context*/
 	__u32 flags;
 #define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS	(1u << 0)
+#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE	(1u << 1)
 #define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \
-	(-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1))
+	(-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1))
 	__u64 extensions;
 };
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 14/43] drm/i915: Allow userspace to clone contexts on creation
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (12 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 13/43] drm/i915: Allow contexts to share a single timeline across all engines Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-06 14:24 ` [PATCH 15/43] drm/i915: Allow a context to define its set of engines Chris Wilson
                   ` (31 subsequent siblings)
  45 siblings, 0 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

A usecase arose out of handling context recovery in mesa, whereby they
wish to recreate a context with fresh logical state but preserving all
other details of the original. Currently, they create a new context and
iterate over which bits they want to copy across, but it would much more
convenient if they were able to just pass in a target context to clone
during creation. This essentially extends the setparam during creation
to pull the details from a target context instead of the user supplied
parameters.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 71 +++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h             | 14 +++++
 2 files changed, 85 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 9927f1296fd6..e211c5dc9e79 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1458,8 +1458,79 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data)
 	return ctx_setparam(data, &local.setparam);
 }
 
+static void clone_sseu(struct i915_gem_context *dst,
+		       struct i915_gem_context *src)
+{
+	const struct intel_sseu default_sseu =
+		intel_device_default_sseu(dst->i915);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, dst->i915, id) {
+		struct intel_context *ce;
+		struct intel_sseu sseu;
+
+		ce = to_intel_context(src, engine);
+		if (!ce)
+			continue;
+
+		sseu = ce->sseu;
+		if (!memcmp(&sseu, &default_sseu, sizeof(sseu)))
+			continue;
+
+		ce = to_intel_context(dst, engine);
+		ce->sseu = sseu;
+	}
+}
+
+static int create_clone(struct i915_user_extension __user *ext, void *data)
+{
+	struct drm_i915_gem_context_create_ext_clone local;
+	struct i915_gem_context *dst = data;
+	struct i915_gem_context *src;
+
+	if (copy_from_user(&local, ext, sizeof(local)))
+		return -EFAULT;
+
+	if (local.flags & I915_CONTEXT_CLONE_UNKNOWN)
+		return -EINVAL;
+
+	if (local.rsvd)
+		return -EINVAL;
+
+	rcu_read_lock();
+	src = __i915_gem_context_lookup_rcu(dst->file_priv, local.clone);
+	rcu_read_unlock();
+	if (!src)
+		return -ENOENT;
+
+	if (local.flags & I915_CONTEXT_CLONE_FLAGS)
+		dst->user_flags = src->user_flags;
+
+	if (local.flags & I915_CONTEXT_CLONE_SCHED)
+		dst->sched = src->sched;
+
+	if (local.flags & I915_CONTEXT_CLONE_SSEU)
+		clone_sseu(dst, src);
+
+	if (local.flags & I915_CONTEXT_CLONE_TIMELINE && src->timeline) {
+		if (dst->timeline)
+			i915_timeline_put(dst->timeline);
+		dst->timeline = i915_timeline_get(src->timeline);
+	}
+
+	if (local.flags & I915_CONTEXT_CLONE_VM && src->ppgtt) {
+		if (dst->ppgtt)
+			i915_ppgtt_put(dst->ppgtt);
+		dst->ppgtt = i915_ppgtt_get(src->ppgtt);
+	}
+
+	return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
 	[I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam,
+	[I915_CONTEXT_CREATE_EXT_CLONE] = create_clone,
 };
 
 static bool client_is_banned(struct drm_i915_file_private *file_priv)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 007d77ff7295..50d154954d5f 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1579,6 +1579,20 @@ struct drm_i915_gem_context_create_ext_setparam {
 	struct drm_i915_gem_context_param setparam;
 };
 
+struct drm_i915_gem_context_create_ext_clone {
+#define I915_CONTEXT_CREATE_EXT_CLONE 1
+	struct i915_user_extension base;
+	__u32 clone;
+	__u32 flags;
+#define I915_CONTEXT_CLONE_FLAGS	(1u << 0)
+#define I915_CONTEXT_CLONE_SCHED	(1u << 1)
+#define I915_CONTEXT_CLONE_SSEU		(1u << 2)
+#define I915_CONTEXT_CLONE_TIMELINE	(1u << 3)
+#define I915_CONTEXT_CLONE_VM		(1u << 4)
+#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
+	__u64 rsvd;
+};
+
 struct drm_i915_gem_context_destroy {
 	__u32 ctx_id;
 	__u32 pad;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 15/43] drm/i915: Allow a context to define its set of engines
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (13 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 14/43] drm/i915: Allow userspace to clone contexts on creation Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-06 14:24 ` [PATCH 16/43] drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[] Chris Wilson
                   ` (30 subsequent siblings)
  45 siblings, 0 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Over the last few years, we have debated how to extend the user API to
support an increase in the number of engines, that may be sparse and
even be heterogeneous within a class (not all video decoders created
equal). We settled on using (class, instance) tuples to identify a
specific engine, with an API for the user to construct a map of engines
to capabilities. Into this picture, we then add a challenge of virtual
engines; one user engine that maps behind the scenes to any number of
physical engines. To keep it general, we want the user to have full
control over that mapping. To that end, we allow the user to constrain a
context to define the set of engines that it can access, order fully
controlled by the user via (class, instance). With such precise control
in context setup, we can continue to use the existing execbuf uABI of
specifying a single index; only now it doesn't automagically map onto
the engines, it uses the user defined engine map from the context.

The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
execbuf. It's use will be revealed in the next patch.

v2: Fixup freeing of local on success of get_engines()

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c    | 205 ++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_context.h    |   4 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  22 ++-
 include/uapi/drm/i915_drm.h                |  34 +++-
 4 files changed, 252 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index e211c5dc9e79..eab203b80e5c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -101,6 +101,21 @@ static struct i915_global_context {
 	struct kmem_cache *slab_luts;
 } global;
 
+static struct intel_engine_cs *
+lookup_user_engine(struct i915_gem_context *ctx,
+		   unsigned long flags, u16 class, u16 instance)
+#define LOOKUP_USER_INDEX BIT(0)
+{
+	if (flags & LOOKUP_USER_INDEX) {
+		if (instance >= ctx->nengine)
+			return NULL;
+
+		return ctx->engines[instance];
+	}
+
+	return intel_engine_lookup_user(ctx->i915, class, instance);
+}
+
 struct i915_lut_handle *i915_lut_handle_alloc(void)
 {
 	return kmem_cache_alloc(global.slab_luts, GFP_KERNEL);
@@ -234,6 +249,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	release_hw_id(ctx);
 	i915_ppgtt_put(ctx->ppgtt);
 
+	kfree(ctx->engines);
+
 	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
 		struct intel_context *ce = &ctx->__engine[n];
 
@@ -1341,9 +1358,9 @@ static int set_sseu(struct i915_gem_context *ctx,
 	if (user_sseu.flags || user_sseu.rsvd)
 		return -EINVAL;
 
-	engine = intel_engine_lookup_user(i915,
-					  user_sseu.engine_class,
-					  user_sseu.engine_instance);
+	engine = lookup_user_engine(ctx, 0,
+				    user_sseu.engine_class,
+				    user_sseu.engine_instance);
 	if (!engine)
 		return -EINVAL;
 
@@ -1361,9 +1378,154 @@ static int set_sseu(struct i915_gem_context *ctx,
 
 	args->size = sizeof(user_sseu);
 
+	return 0;
+};
+
+struct set_engines {
+	struct i915_gem_context *ctx;
+	struct intel_engine_cs **engines;
+	unsigned int nengine;
+};
+
+static const i915_user_extension_fn set_engines__extensions[] = {
+};
+
+static int
+set_engines(struct i915_gem_context *ctx,
+	    const struct drm_i915_gem_context_param *args)
+{
+	struct i915_context_param_engines __user *user;
+	struct set_engines set = { .ctx = ctx };
+	u64 size, extensions;
+	unsigned int n;
+	int err;
+
+	user = u64_to_user_ptr(args->value);
+	size = args->size;
+	if (!size)
+		goto out;
+
+	BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->class_instance)));
+	if (size < sizeof(*user) || size % sizeof(*user->class_instance))
+		return -EINVAL;
+
+	set.nengine = (size - sizeof(*user)) / sizeof(*user->class_instance);
+	if (set.nengine == 0 || set.nengine > I915_EXEC_RING_MASK)
+		return -EINVAL;
+
+	set.engines = kmalloc_array(set.nengine,
+				    sizeof(*set.engines),
+				    GFP_KERNEL);
+	if (!set.engines)
+		return -ENOMEM;
+
+	for (n = 0; n < set.nengine; n++) {
+		u16 class, inst;
+
+		if (get_user(class, &user->class_instance[n].engine_class) ||
+		    get_user(inst, &user->class_instance[n].engine_instance)) {
+			kfree(set.engines);
+			return -EFAULT;
+		}
+
+		if (class == (u16)I915_ENGINE_CLASS_INVALID &&
+		    inst == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
+			set.engines[n] = NULL;
+			continue;
+		}
+
+		set.engines[n] = lookup_user_engine(ctx, 0, class, inst);
+		if (!set.engines[n]) {
+			kfree(set.engines);
+			return -ENOENT;
+		}
+	}
+
+	err = -EFAULT;
+	if (!get_user(extensions, &user->extensions))
+		err = i915_user_extensions(u64_to_user_ptr(extensions),
+					   set_engines__extensions,
+					   ARRAY_SIZE(set_engines__extensions),
+					   &set);
+	if (err) {
+		kfree(set.engines);
+		return err;
+	}
+
+out:
+	mutex_lock(&ctx->i915->drm.struct_mutex);
+	kfree(ctx->engines);
+	ctx->engines = set.engines;
+	ctx->nengine = set.nengine;
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
 	return 0;
 }
 
+static int
+get_engines(struct i915_gem_context *ctx,
+	    struct drm_i915_gem_context_param *args)
+{
+	struct i915_context_param_engines *local;
+	unsigned int n, count, size;
+	int err = 0;
+
+restart:
+	count = READ_ONCE(ctx->nengine);
+	if (count > (INT_MAX - sizeof(*local)) / sizeof(*local->class_instance))
+		return -ENOMEM; /* unrepresentable! */
+
+	size = sizeof(*local) + count * sizeof(*local->class_instance);
+	if (!args->size) {
+		args->size = size;
+		return 0;
+	}
+	if (args->size < size)
+		return -EINVAL;
+
+	local = kmalloc(size, GFP_KERNEL);
+	if (!local)
+		return -ENOMEM;
+
+	if (mutex_lock_interruptible(&ctx->i915->drm.struct_mutex)) {
+		err = -EINTR;
+		goto out;
+	}
+
+	if (READ_ONCE(ctx->nengine) != count) {
+		mutex_unlock(&ctx->i915->drm.struct_mutex);
+		kfree(local);
+		goto restart;
+	}
+
+	local->extensions = 0;
+	for (n = 0; n < count; n++) {
+		if (ctx->engines[n]) {
+			local->class_instance[n].engine_class =
+				ctx->engines[n]->uabi_class;
+			local->class_instance[n].engine_instance =
+				ctx->engines[n]->instance;
+		} else {
+			local->class_instance[n].engine_class =
+				I915_ENGINE_CLASS_INVALID;
+			local->class_instance[n].engine_instance =
+				I915_ENGINE_CLASS_INVALID_NONE;
+		}
+	}
+
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+	if (copy_to_user(u64_to_user_ptr(args->value), local, size)) {
+		err = -EFAULT;
+		goto out;
+	}
+	args->size = size;
+
+out:
+	kfree(local);
+	return err;
+}
+
 static int ctx_setparam(struct i915_gem_context *ctx,
 			struct drm_i915_gem_context_param *args)
 {
@@ -1436,6 +1598,10 @@ static int ctx_setparam(struct i915_gem_context *ctx,
 		ret = set_ppgtt(ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_ENGINES:
+		ret = set_engines(ctx, args);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
@@ -1483,11 +1649,28 @@ static void clone_sseu(struct i915_gem_context *dst,
 	}
 }
 
+static int clone_engines(struct i915_gem_context *dst,
+			 struct i915_gem_context *src)
+{
+	struct intel_engine_cs **engines;
+
+	engines = kmemdup(src->engines,
+			  sizeof(*src->engines) * src->nengine,
+			  GFP_KERNEL);
+	if (!engines)
+		return -ENOMEM;
+
+	dst->engines = engines;
+	dst->nengine = src->nengine;
+	return 0;
+}
+
 static int create_clone(struct i915_user_extension __user *ext, void *data)
 {
 	struct drm_i915_gem_context_create_ext_clone local;
 	struct i915_gem_context *dst = data;
 	struct i915_gem_context *src;
+	int err;
 
 	if (copy_from_user(&local, ext, sizeof(local)))
 		return -EFAULT;
@@ -1525,6 +1708,12 @@ static int create_clone(struct i915_user_extension __user *ext, void *data)
 		dst->ppgtt = i915_ppgtt_get(src->ppgtt);
 	}
 
+	if (local.flags & I915_CONTEXT_CLONE_ENGINES && src->nengine) {
+		err = clone_engines(dst, src);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
@@ -1644,9 +1833,9 @@ static int get_sseu(struct i915_gem_context *ctx,
 	if (user_sseu.flags || user_sseu.rsvd)
 		return -EINVAL;
 
-	engine = intel_engine_lookup_user(ctx->i915,
-					  user_sseu.engine_class,
-					  user_sseu.engine_instance);
+	engine = lookup_user_engine(ctx, 0,
+				    user_sseu.engine_class,
+				    user_sseu.engine_instance);
 	if (!engine)
 		return -EINVAL;
 
@@ -1730,6 +1919,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 		ret = get_ppgtt(ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_ENGINES:
+		ret = get_engines(ctx, args);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 755b2d97c32c..5415eb7191e6 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -79,6 +79,8 @@ struct i915_gem_context {
 	/** file_priv: owning file descriptor */
 	struct drm_i915_file_private *file_priv;
 
+	struct intel_engine_cs **engines;
+
 	struct i915_timeline *timeline;
 
 	/**
@@ -148,6 +150,8 @@ struct i915_gem_context {
 #define CONTEXT_CLOSED			1
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	2
 
+	unsigned int nengine;
+
 	/**
 	 * @hw_id: - unique identifier for the context
 	 *
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c335c0a4099a..4b0af9a8000f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2071,13 +2071,23 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
 };
 
 static struct intel_engine_cs *
-eb_select_engine(struct drm_i915_private *dev_priv,
+eb_select_engine(struct i915_execbuffer *eb,
 		 struct drm_file *file,
 		 struct drm_i915_gem_execbuffer2 *args)
 {
 	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
 	struct intel_engine_cs *engine;
 
+	if (eb->ctx->engines) {
+		if (user_ring_id >= eb->ctx->nengine) {
+			DRM_DEBUG("execbuf with unknown ring: %u\n",
+				  user_ring_id);
+			return NULL;
+		}
+
+		return eb->ctx->engines[user_ring_id];
+	}
+
 	if (user_ring_id > I915_USER_RINGS) {
 		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
 		return NULL;
@@ -2090,11 +2100,11 @@ eb_select_engine(struct drm_i915_private *dev_priv,
 		return NULL;
 	}
 
-	if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(dev_priv, VCS1)) {
+	if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(eb->i915, VCS1)) {
 		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
 
 		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
-			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
+			bsd_idx = gen8_dispatch_bsd_engine(eb->i915, file);
 		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
 			   bsd_idx <= I915_EXEC_BSD_RING2) {
 			bsd_idx >>= I915_EXEC_BSD_SHIFT;
@@ -2105,9 +2115,9 @@ eb_select_engine(struct drm_i915_private *dev_priv,
 			return NULL;
 		}
 
-		engine = dev_priv->engine[_VCS(bsd_idx)];
+		engine = eb->i915->engine[_VCS(bsd_idx)];
 	} else {
-		engine = dev_priv->engine[user_ring_map[user_ring_id]];
+		engine = eb->i915->engine[user_ring_map[user_ring_id]];
 	}
 
 	if (!engine) {
@@ -2317,7 +2327,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_destroy;
 
-	eb.engine = eb_select_engine(eb.i915, file, args);
+	eb.engine = eb_select_engine(&eb, file, args);
 	if (!eb.engine) {
 		err = -EINVAL;
 		goto err_engine;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 50d154954d5f..85ccba4f04e8 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -124,6 +124,8 @@ enum drm_i915_gem_engine_class {
 	I915_ENGINE_CLASS_INVALID	= -1
 };
 
+#define I915_ENGINE_CLASS_INVALID_NONE -1
+
 /**
  * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
  *
@@ -1509,6 +1511,26 @@ struct drm_i915_gem_context_param {
 	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
 	 */
 #define I915_CONTEXT_PARAM_VM		0x9
+
+/*
+ * I915_CONTEXT_PARAM_ENGINES:
+ *
+ * Bind this context to operate on this subset of available engines. Henceforth,
+ * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
+ * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
+ * and upwards. Slots 0...N are filled in using the specified (class, instance).
+ * Use
+ *	engine_class: I915_ENGINE_CLASS_INVALID,
+ *	engine_instance: I915_ENGINE_CLASS_INVALID_NONE
+ * to specify a gap in the array that can be filled in later, e.g. by a
+ * virtual engine used for load balancing.
+ *
+ * Setting the number of engines bound to the context to 0, by passing a zero
+ * sized argument, will revert back to default settings.
+ *
+ * See struct i915_context_param_engines.
+ */
+#define I915_CONTEXT_PARAM_ENGINES	0xa
 /* Must be kept compact -- no holes and well documented */
 
 	__u64 value;
@@ -1573,6 +1595,15 @@ struct drm_i915_gem_context_param_sseu {
 	__u32 rsvd;
 };
 
+struct i915_context_param_engines {
+	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
+
+	struct {
+		__u16 engine_class; /* see enum drm_i915_gem_engine_class */
+		__u16 engine_instance;
+	} class_instance[0];
+} __attribute__((packed));
+
 struct drm_i915_gem_context_create_ext_setparam {
 #define I915_CONTEXT_CREATE_EXT_SETPARAM 0
 	struct i915_user_extension base;
@@ -1589,7 +1620,8 @@ struct drm_i915_gem_context_create_ext_clone {
 #define I915_CONTEXT_CLONE_SSEU		(1u << 2)
 #define I915_CONTEXT_CLONE_TIMELINE	(1u << 3)
 #define I915_CONTEXT_CLONE_VM		(1u << 4)
-#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
+#define I915_CONTEXT_CLONE_ENGINES	(1u << 5)
+#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_ENGINES << 1)
 	__u64 rsvd;
 };
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 16/43] drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[]
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (14 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 15/43] drm/i915: Allow a context to define its set of engines Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-06 14:24 ` [PATCH 17/43] drm/i915: Split struct intel_context definition to its own header Chris Wilson
                   ` (29 subsequent siblings)
  45 siblings, 0 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Allow the user to specify a local engine index (as opposed to
class:index) that they can use to refer to a preset engine inside the
ctx->engine[] array defined by an earlier I915_CONTEXT_PARAM_ENGINES.
This will be useful for setting SSEU parameters on virtual engines that
are local to the context and do not have a valid global class:instance
lookup.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 24 ++++++++++++++++++++----
 include/uapi/drm/i915_drm.h             |  3 ++-
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index eab203b80e5c..8037d0efbb4a 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1343,6 +1343,7 @@ static int set_sseu(struct i915_gem_context *ctx,
 	struct drm_i915_gem_context_param_sseu user_sseu;
 	struct intel_engine_cs *engine;
 	struct intel_sseu sseu;
+	unsigned long lookup;
 	int ret;
 
 	if (args->size < sizeof(user_sseu))
@@ -1355,10 +1356,17 @@ static int set_sseu(struct i915_gem_context *ctx,
 			   sizeof(user_sseu)))
 		return -EFAULT;
 
-	if (user_sseu.flags || user_sseu.rsvd)
+	if (user_sseu.rsvd)
 		return -EINVAL;
 
-	engine = lookup_user_engine(ctx, 0,
+	if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX))
+		return -EINVAL;
+
+	lookup = 0;
+	if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)
+		lookup |= LOOKUP_USER_INDEX;
+
+	engine = lookup_user_engine(ctx, lookup,
 				    user_sseu.engine_class,
 				    user_sseu.engine_instance);
 	if (!engine)
@@ -1819,6 +1827,7 @@ static int get_sseu(struct i915_gem_context *ctx,
 	struct drm_i915_gem_context_param_sseu user_sseu;
 	struct intel_engine_cs *engine;
 	struct intel_context *ce;
+	unsigned long lookup;
 	int ret;
 
 	if (args->size == 0)
@@ -1830,10 +1839,17 @@ static int get_sseu(struct i915_gem_context *ctx,
 			   sizeof(user_sseu)))
 		return -EFAULT;
 
-	if (user_sseu.flags || user_sseu.rsvd)
+	if (user_sseu.rsvd)
 		return -EINVAL;
 
-	engine = lookup_user_engine(ctx, 0,
+	if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX))
+		return -EINVAL;
+
+	lookup = 0;
+	if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)
+		lookup |= LOOKUP_USER_INDEX;
+
+	engine = lookup_user_engine(ctx, lookup,
 				    user_sseu.engine_class,
 				    user_sseu.engine_instance);
 	if (!engine)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 85ccba4f04e8..b68e1ad12c0f 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1565,9 +1565,10 @@ struct drm_i915_gem_context_param_sseu {
 	__u16 engine_instance;
 
 	/*
-	 * Unused for now. Must be cleared to zero.
+	 * Unknown flags must be cleared to zero.
 	 */
 	__u32 flags;
+#define I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX (1u << 0)
 
 	/*
 	 * Mask of slices to enable for the context. Valid values are a subset
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 17/43] drm/i915: Split struct intel_context definition to its own header
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (15 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 16/43] drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[] Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-06 14:24 ` [PATCH 18/43] drm/i915: Store the intel_context_ops in the intel_engine_cs Chris Wilson
                   ` (28 subsequent siblings)
  45 siblings, 0 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

This complex struct pulling in half the driver deserves its own
isolation in preparation for intel_context becoming an outright
complicated class of its own.

In order to split this beast into its own header also requests splitting
several of its dependent types and their dependencies into their own
headers as well.

v2: Add standalone compilation tests

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   9 +
 drivers/gpu/drm/i915/i915_gem_context.h       | 245 +-------
 drivers/gpu/drm/i915/i915_gem_context_types.h | 187 +++++++
 drivers/gpu/drm/i915/i915_timeline.h          |  70 +--
 drivers/gpu/drm/i915/i915_timeline_types.h    |  80 +++
 drivers/gpu/drm/i915/intel_context.h          |  47 ++
 drivers/gpu/drm/i915/intel_context_types.h    |  60 ++
 drivers/gpu/drm/i915/intel_engine_types.h     | 521 ++++++++++++++++++
 drivers/gpu/drm/i915/intel_guc.h              |   1 +
 drivers/gpu/drm/i915/intel_ringbuffer.h       | 502 +----------------
 drivers/gpu/drm/i915/intel_workarounds.h      |  13 +-
 .../gpu/drm/i915/intel_workarounds_types.h    |  27 +
 .../i915/test_i915_active_types_standalone.c  |   7 +
 .../test_i915_gem_context_types_standalone.c  |   7 +
 .../test_i915_timeline_types_standalone.c     |   7 +
 .../test_intel_context_types_standalone.c     |   7 +
 .../i915/test_intel_engine_types_standalone.c |   7 +
 .../test_intel_workarounds_types_standalone.c |   7 +
 18 files changed, 980 insertions(+), 824 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_context_types.h
 create mode 100644 drivers/gpu/drm/i915/i915_timeline_types.h
 create mode 100644 drivers/gpu/drm/i915/intel_context.h
 create mode 100644 drivers/gpu/drm/i915/intel_context_types.h
 create mode 100644 drivers/gpu/drm/i915/intel_engine_types.h
 create mode 100644 drivers/gpu/drm/i915/intel_workarounds_types.h
 create mode 100644 drivers/gpu/drm/i915/test_i915_active_types_standalone.c
 create mode 100644 drivers/gpu/drm/i915/test_i915_gem_context_types_standalone.c
 create mode 100644 drivers/gpu/drm/i915/test_i915_timeline_types_standalone.c
 create mode 100644 drivers/gpu/drm/i915/test_intel_context_types_standalone.c
 create mode 100644 drivers/gpu/drm/i915/test_intel_engine_types_standalone.c
 create mode 100644 drivers/gpu/drm/i915/test_intel_workarounds_types_standalone.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 89105b1aaf12..524cace37d85 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -57,6 +57,15 @@ i915-$(CONFIG_COMPAT)   += i915_ioc32.o
 i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
 i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 
+# Test the headers are compilable as standalone units
+i915-$(CONFIG_DRM_I915_WERROR) += \
+	test_i915_active_types_standalone.o \
+	test_i915_gem_context_types_standalone.o \
+	test_i915_timeline_types_standalone.o \
+	test_intel_context_types_standalone.o \
+	test_intel_engine_types_standalone.o \
+	test_intel_workarounds_types_standalone.o
+
 # GEM code
 i915-y += \
 	  i915_active.o \
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 5415eb7191e6..ed9a2447bba9 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -25,225 +25,17 @@
 #ifndef __I915_GEM_CONTEXT_H__
 #define __I915_GEM_CONTEXT_H__
 
-#include <linux/bitops.h>
-#include <linux/list.h>
-#include <linux/radix-tree.h>
+#include "i915_gem_context_types.h"
 
 #include "i915_gem.h"
 #include "i915_scheduler.h"
+#include "intel_context.h"
 #include "intel_device_info.h"
 #include "intel_ringbuffer.h"
 
-struct pid;
-
 struct drm_device;
 struct drm_file;
 
-struct drm_i915_private;
-struct drm_i915_file_private;
-struct i915_hw_ppgtt;
-struct i915_request;
-struct i915_timeline;
-struct i915_vma;
-struct intel_ring;
-
-#define DEFAULT_CONTEXT_HANDLE 0
-
-struct intel_context;
-
-struct intel_context_ops {
-	void (*unpin)(struct intel_context *ce);
-	void (*destroy)(struct intel_context *ce);
-};
-
-/*
- * Powergating configuration for a particular (context,engine).
- */
-struct intel_sseu {
-	u8 slice_mask;
-	u8 subslice_mask;
-	u8 min_eus_per_subslice;
-	u8 max_eus_per_subslice;
-};
-
-/**
- * struct i915_gem_context - client state
- *
- * The struct i915_gem_context represents the combined view of the driver and
- * logical hardware state for a particular client.
- */
-struct i915_gem_context {
-	/** i915: i915 device backpointer */
-	struct drm_i915_private *i915;
-
-	/** file_priv: owning file descriptor */
-	struct drm_i915_file_private *file_priv;
-
-	struct intel_engine_cs **engines;
-
-	struct i915_timeline *timeline;
-
-	/**
-	 * @ppgtt: unique address space (GTT)
-	 *
-	 * In full-ppgtt mode, each context has its own address space ensuring
-	 * complete seperation of one client from all others.
-	 *
-	 * In other modes, this is a NULL pointer with the expectation that
-	 * the caller uses the shared global GTT.
-	 */
-	struct i915_hw_ppgtt *ppgtt;
-
-	/**
-	 * @pid: process id of creator
-	 *
-	 * Note that who created the context may not be the principle user,
-	 * as the context may be shared across a local socket. However,
-	 * that should only affect the default context, all contexts created
-	 * explicitly by the client are expected to be isolated.
-	 */
-	struct pid *pid;
-
-	/**
-	 * @name: arbitrary name
-	 *
-	 * A name is constructed for the context from the creator's process
-	 * name, pid and user handle in order to uniquely identify the
-	 * context in messages.
-	 */
-	const char *name;
-
-	/** link: place with &drm_i915_private.context_list */
-	struct list_head link;
-	struct llist_node free_link;
-
-	/**
-	 * @ref: reference count
-	 *
-	 * A reference to a context is held by both the client who created it
-	 * and on each request submitted to the hardware using the request
-	 * (to ensure the hardware has access to the state until it has
-	 * finished all pending writes). See i915_gem_context_get() and
-	 * i915_gem_context_put() for access.
-	 */
-	struct kref ref;
-
-	/**
-	 * @rcu: rcu_head for deferred freeing.
-	 */
-	struct rcu_head rcu;
-
-	/**
-	 * @user_flags: small set of booleans controlled by the user
-	 */
-	unsigned long user_flags;
-#define UCONTEXT_NO_ZEROMAP		0
-#define UCONTEXT_NO_ERROR_CAPTURE	1
-#define UCONTEXT_BANNABLE		2
-#define UCONTEXT_RECOVERABLE		3
-
-	/**
-	 * @flags: small set of booleans
-	 */
-	unsigned long flags;
-#define CONTEXT_BANNED			0
-#define CONTEXT_CLOSED			1
-#define CONTEXT_FORCE_SINGLE_SUBMISSION	2
-
-	unsigned int nengine;
-
-	/**
-	 * @hw_id: - unique identifier for the context
-	 *
-	 * The hardware needs to uniquely identify the context for a few
-	 * functions like fault reporting, PASID, scheduling. The
-	 * &drm_i915_private.context_hw_ida is used to assign a unqiue
-	 * id for the lifetime of the context.
-	 *
-	 * @hw_id_pin_count: - number of times this context had been pinned
-	 * for use (should be, at most, once per engine).
-	 *
-	 * @hw_id_link: - all contexts with an assigned id are tracked
-	 * for possible repossession.
-	 */
-	unsigned int hw_id;
-	atomic_t hw_id_pin_count;
-	struct list_head hw_id_link;
-
-	struct list_head active_engines;
-	struct mutex mutex;
-
-	/**
-	 * @user_handle: userspace identifier
-	 *
-	 * A unique per-file identifier is generated from
-	 * &drm_i915_file_private.contexts.
-	 */
-	u32 user_handle;
-
-	struct i915_sched_attr sched;
-
-	/** engine: per-engine logical HW state */
-	struct intel_context {
-		struct i915_gem_context *gem_context;
-		struct intel_engine_cs *engine;
-		struct intel_engine_cs *active;
-		struct list_head active_link;
-		struct list_head signal_link;
-		struct list_head signals;
-		struct i915_vma *state;
-		struct intel_ring *ring;
-		u32 *lrc_reg_state;
-		u64 lrc_desc;
-		int pin_count;
-
-		/**
-		 * active_tracker: Active tracker for the external rq activity
-		 * on this intel_context object.
-		 */
-		struct i915_active_request active_tracker;
-
-		const struct intel_context_ops *ops;
-
-		/** sseu: Control eu/slice partitioning */
-		struct intel_sseu sseu;
-	} __engine[I915_NUM_ENGINES];
-
-	/** ring_size: size for allocating the per-engine ring buffer */
-	u32 ring_size;
-	/** desc_template: invariant fields for the HW context descriptor */
-	u32 desc_template;
-
-	/** guilty_count: How many times this context has caused a GPU hang. */
-	atomic_t guilty_count;
-	/**
-	 * @active_count: How many times this context was active during a GPU
-	 * hang, but did not cause it.
-	 */
-	atomic_t active_count;
-
-	/**
-	 * @hang_timestamp: The last time(s) this context caused a GPU hang
-	 */
-	unsigned long hang_timestamp[2];
-#define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */
-
-	/** remap_slice: Bitmask of cache lines that need remapping */
-	u8 remap_slice;
-
-	/** handles_vma: rbtree to look up our context specific obj/vma for
-	 * the user handle. (user handles are per fd, but the binding is
-	 * per vm, which may be one per context or shared with the global GTT)
-	 */
-	struct radix_tree_root handles_vma;
-
-	/** handles_list: reverse list of all the rbtree entries in use for
-	 * this context, which allows us to free all the allocations on
-	 * context close.
-	 */
-	struct list_head handles_list;
-};
-
 static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
 {
 	return test_bit(CONTEXT_CLOSED, &ctx->flags);
@@ -345,35 +137,6 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
 	return !ctx->file_priv;
 }
 
-static inline struct intel_context *
-to_intel_context(struct i915_gem_context *ctx,
-		 const struct intel_engine_cs *engine)
-{
-	return &ctx->__engine[engine->id];
-}
-
-static inline struct intel_context *
-intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
-{
-	return engine->context_pin(engine, ctx);
-}
-
-static inline void __intel_context_pin(struct intel_context *ce)
-{
-	GEM_BUG_ON(!ce->pin_count);
-	ce->pin_count++;
-}
-
-static inline void intel_context_unpin(struct intel_context *ce)
-{
-	GEM_BUG_ON(!ce->pin_count);
-	if (--ce->pin_count)
-		return;
-
-	GEM_BUG_ON(!ce->ops);
-	ce->ops->unpin(ce);
-}
-
 /* i915_gem_context.c */
 int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv);
 void i915_gem_contexts_lost(struct drm_i915_private *dev_priv);
@@ -422,10 +185,6 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx)
 	kref_put(&ctx->ref, i915_gem_context_release);
 }
 
-void intel_context_init(struct intel_context *ce,
-			struct i915_gem_context *ctx,
-			struct intel_engine_cs *engine);
-
 struct i915_lut_handle *i915_lut_handle_alloc(void);
 void i915_lut_handle_free(struct i915_lut_handle *lut);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
new file mode 100644
index 000000000000..16e2518439a3
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
@@ -0,0 +1,187 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_CONTEXT_TYPES_H__
+#define __I915_GEM_CONTEXT_TYPES_H__
+
+#include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/llist.h>
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/radix-tree.h>
+#include <linux/rcupdate.h>
+#include <linux/types.h>
+
+#include "i915_gem.h" /* I915_NUM_ENGINES */
+#include "i915_scheduler.h"
+#include "intel_context_types.h"
+
+struct pid;
+
+struct drm_i915_private;
+struct drm_i915_file_private;
+struct i915_hw_ppgtt;
+struct i915_timeline;
+struct intel_ring;
+
+/**
+ * struct i915_gem_context - client state
+ *
+ * The struct i915_gem_context represents the combined view of the driver and
+ * logical hardware state for a particular client.
+ */
+struct i915_gem_context {
+	/** i915: i915 device backpointer */
+	struct drm_i915_private *i915;
+
+	/** file_priv: owning file descriptor */
+	struct drm_i915_file_private *file_priv;
+
+	struct intel_engine_cs **engines;
+
+	struct i915_timeline *timeline;
+
+	/**
+	 * @ppgtt: unique address space (GTT)
+	 *
+	 * In full-ppgtt mode, each context has its own address space ensuring
+	 * complete seperation of one client from all others.
+	 *
+	 * In other modes, this is a NULL pointer with the expectation that
+	 * the caller uses the shared global GTT.
+	 */
+	struct i915_hw_ppgtt *ppgtt;
+
+	/**
+	 * @pid: process id of creator
+	 *
+	 * Note that who created the context may not be the principle user,
+	 * as the context may be shared across a local socket. However,
+	 * that should only affect the default context, all contexts created
+	 * explicitly by the client are expected to be isolated.
+	 */
+	struct pid *pid;
+
+	/**
+	 * @name: arbitrary name
+	 *
+	 * A name is constructed for the context from the creator's process
+	 * name, pid and user handle in order to uniquely identify the
+	 * context in messages.
+	 */
+	const char *name;
+
+	/** link: place with &drm_i915_private.context_list */
+	struct list_head link;
+	struct llist_node free_link;
+
+	/**
+	 * @ref: reference count
+	 *
+	 * A reference to a context is held by both the client who created it
+	 * and on each request submitted to the hardware using the request
+	 * (to ensure the hardware has access to the state until it has
+	 * finished all pending writes). See i915_gem_context_get() and
+	 * i915_gem_context_put() for access.
+	 */
+	struct kref ref;
+
+	/**
+	 * @rcu: rcu_head for deferred freeing.
+	 */
+	struct rcu_head rcu;
+
+	/**
+	 * @user_flags: small set of booleans controlled by the user
+	 */
+	unsigned long user_flags;
+#define UCONTEXT_NO_ZEROMAP		0
+#define UCONTEXT_NO_ERROR_CAPTURE	1
+#define UCONTEXT_BANNABLE		2
+#define UCONTEXT_RECOVERABLE		3
+
+	/**
+	 * @flags: small set of booleans
+	 */
+	unsigned long flags;
+#define CONTEXT_BANNED			0
+#define CONTEXT_CLOSED			1
+#define CONTEXT_FORCE_SINGLE_SUBMISSION	2
+
+	unsigned int nengine;
+
+	/**
+	 * @hw_id: - unique identifier for the context
+	 *
+	 * The hardware needs to uniquely identify the context for a few
+	 * functions like fault reporting, PASID, scheduling. The
+	 * &drm_i915_private.context_hw_ida is used to assign a unqiue
+	 * id for the lifetime of the context.
+	 *
+	 * @hw_id_pin_count: - number of times this context had been pinned
+	 * for use (should be, at most, once per engine).
+	 *
+	 * @hw_id_link: - all contexts with an assigned id are tracked
+	 * for possible repossession.
+	 */
+	unsigned int hw_id;
+	atomic_t hw_id_pin_count;
+	struct list_head hw_id_link;
+
+	struct list_head active_engines;
+	struct mutex mutex;
+
+	/**
+	 * @user_handle: userspace identifier
+	 *
+	 * A unique per-file identifier is generated from
+	 * &drm_i915_file_private.contexts.
+	 */
+	u32 user_handle;
+#define DEFAULT_CONTEXT_HANDLE 0
+
+	struct i915_sched_attr sched;
+
+	/** engine: per-engine logical HW state */
+	struct intel_context __engine[I915_NUM_ENGINES];
+
+	/** ring_size: size for allocating the per-engine ring buffer */
+	u32 ring_size;
+	/** desc_template: invariant fields for the HW context descriptor */
+	u32 desc_template;
+
+	/** guilty_count: How many times this context has caused a GPU hang. */
+	atomic_t guilty_count;
+	/**
+	 * @active_count: How many times this context was active during a GPU
+	 * hang, but did not cause it.
+	 */
+	atomic_t active_count;
+
+	/**
+	 * @hang_timestamp: The last time(s) this context caused a GPU hang
+	 */
+	unsigned long hang_timestamp[2];
+#define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */
+
+	/** remap_slice: Bitmask of cache lines that need remapping */
+	u8 remap_slice;
+
+	/** handles_vma: rbtree to look up our context specific obj/vma for
+	 * the user handle. (user handles are per fd, but the binding is
+	 * per vm, which may be one per context or shared with the global GTT)
+	 */
+	struct radix_tree_root handles_vma;
+
+	/** handles_list: reverse list of all the rbtree entries in use for
+	 * this context, which allows us to free all the allocations on
+	 * context close.
+	 */
+	struct list_head handles_list;
+};
+
+#endif /* __I915_GEM_CONTEXT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 60b1dfad93ed..9126c8206490 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -25,76 +25,10 @@
 #ifndef I915_TIMELINE_H
 #define I915_TIMELINE_H
 
-#include <linux/list.h>
-#include <linux/kref.h>
+#include <linux/lockdep.h>
 
-#include "i915_active.h"
-#include "i915_request.h"
 #include "i915_syncmap.h"
-#include "i915_utils.h"
-
-struct i915_vma;
-struct i915_timeline_cacheline;
-
-struct i915_timeline {
-	u64 fence_context;
-	u32 seqno;
-
-	spinlock_t lock;
-#define TIMELINE_CLIENT 0 /* default subclass */
-#define TIMELINE_ENGINE 1
-
-	struct mutex mutex; /* protects the flow of requests */
-
-	unsigned int pin_count;
-	const u32 *hwsp_seqno;
-	struct i915_vma *hwsp_ggtt;
-	u32 hwsp_offset;
-
-	struct i915_timeline_cacheline *hwsp_cacheline;
-
-	bool has_initial_breadcrumb;
-
-	/**
-	 * List of breadcrumbs associated with GPU requests currently
-	 * outstanding.
-	 */
-	struct list_head requests;
-
-	/* Contains an RCU guarded pointer to the last request. No reference is
-	 * held to the request, users must carefully acquire a reference to
-	 * the request using i915_active_request_get_request_rcu(), or hold the
-	 * struct_mutex.
-	 */
-	struct i915_active_request last_request;
-
-	/**
-	 * We track the most recent seqno that we wait on in every context so
-	 * that we only have to emit a new await and dependency on a more
-	 * recent sync point. As the contexts may be executed out-of-order, we
-	 * have to track each individually and can not rely on an absolute
-	 * global_seqno. When we know that all tracked fences are completed
-	 * (i.e. when the driver is idle), we know that the syncmap is
-	 * redundant and we can discard it without loss of generality.
-	 */
-	struct i915_syncmap *sync;
-
-	/**
-	 * Barrier provides the ability to serialize ordering between different
-	 * timelines.
-	 *
-	 * Users can call i915_timeline_set_barrier which will make all
-	 * subsequent submissions to this timeline be executed only after the
-	 * barrier has been completed.
-	 */
-	struct i915_active_request barrier;
-
-	struct list_head link;
-	const char *name;
-	struct drm_i915_private *i915;
-
-	struct kref kref;
-};
+#include "i915_timeline_types.h"
 
 int i915_timeline_init(struct drm_i915_private *i915,
 		       struct i915_timeline *tl,
diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h
new file mode 100644
index 000000000000..8ff146dc05ba
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_timeline_types.h
@@ -0,0 +1,80 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_TIMELINE_TYPES_H__
+#define __I915_TIMELINE_TYPES_H__
+
+#include <linux/list.h>
+#include <linux/kref.h>
+#include <linux/types.h>
+
+#include "i915_active.h"
+
+struct drm_i915_private;
+struct i915_vma;
+struct i915_timeline_cacheline;
+struct i915_syncmap;
+
+struct i915_timeline {
+	u64 fence_context;
+	u32 seqno;
+
+	spinlock_t lock;
+#define TIMELINE_CLIENT 0 /* default subclass */
+#define TIMELINE_ENGINE 1
+	struct mutex mutex; /* protects the flow of requests */
+
+	unsigned int pin_count;
+	const u32 *hwsp_seqno;
+	struct i915_vma *hwsp_ggtt;
+	u32 hwsp_offset;
+
+	struct i915_timeline_cacheline *hwsp_cacheline;
+
+	bool has_initial_breadcrumb;
+
+	/**
+	 * List of breadcrumbs associated with GPU requests currently
+	 * outstanding.
+	 */
+	struct list_head requests;
+
+	/* Contains an RCU guarded pointer to the last request. No reference is
+	 * held to the request, users must carefully acquire a reference to
+	 * the request using i915_active_request_get_request_rcu(), or hold the
+	 * struct_mutex.
+	 */
+	struct i915_active_request last_request;
+
+	/**
+	 * We track the most recent seqno that we wait on in every context so
+	 * that we only have to emit a new await and dependency on a more
+	 * recent sync point. As the contexts may be executed out-of-order, we
+	 * have to track each individually and can not rely on an absolute
+	 * global_seqno. When we know that all tracked fences are completed
+	 * (i.e. when the driver is idle), we know that the syncmap is
+	 * redundant and we can discard it without loss of generality.
+	 */
+	struct i915_syncmap *sync;
+
+	/**
+	 * Barrier provides the ability to serialize ordering between different
+	 * timelines.
+	 *
+	 * Users can call i915_timeline_set_barrier which will make all
+	 * subsequent submissions to this timeline be executed only after the
+	 * barrier has been completed.
+	 */
+	struct i915_active_request barrier;
+
+	struct list_head link;
+	const char *name;
+	struct drm_i915_private *i915;
+
+	struct kref kref;
+};
+
+#endif /* __I915_TIMELINE_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/intel_context.h
new file mode 100644
index 000000000000..dd947692bb0b
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_context.h
@@ -0,0 +1,47 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_CONTEXT_H__
+#define __INTEL_CONTEXT_H__
+
+#include "i915_gem_context_types.h"
+#include "intel_context_types.h"
+#include "intel_engine_types.h"
+
+void intel_context_init(struct intel_context *ce,
+			struct i915_gem_context *ctx,
+			struct intel_engine_cs *engine);
+
+static inline struct intel_context *
+to_intel_context(struct i915_gem_context *ctx,
+		 const struct intel_engine_cs *engine)
+{
+	return &ctx->__engine[engine->id];
+}
+
+static inline struct intel_context *
+intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
+{
+	return engine->context_pin(engine, ctx);
+}
+
+static inline void __intel_context_pin(struct intel_context *ce)
+{
+	GEM_BUG_ON(!ce->pin_count);
+	ce->pin_count++;
+}
+
+static inline void intel_context_unpin(struct intel_context *ce)
+{
+	GEM_BUG_ON(!ce->pin_count);
+	if (--ce->pin_count)
+		return;
+
+	GEM_BUG_ON(!ce->ops);
+	ce->ops->unpin(ce);
+}
+
+#endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
new file mode 100644
index 000000000000..16e1306e9595
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -0,0 +1,60 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_CONTEXT_TYPES__
+#define __INTEL_CONTEXT_TYPES__
+
+#include <linux/list.h>
+#include <linux/types.h>
+
+#include "i915_active_types.h"
+
+struct i915_gem_context;
+struct i915_vma;
+struct intel_context;
+struct intel_ring;
+
+struct intel_context_ops {
+	void (*unpin)(struct intel_context *ce);
+	void (*destroy)(struct intel_context *ce);
+};
+
+/*
+ * Powergating configuration for a particular (context,engine).
+ */
+struct intel_sseu {
+	u8 slice_mask;
+	u8 subslice_mask;
+	u8 min_eus_per_subslice;
+	u8 max_eus_per_subslice;
+};
+
+struct intel_context {
+	struct i915_gem_context *gem_context;
+	struct intel_engine_cs *engine;
+	struct intel_engine_cs *active;
+	struct list_head active_link;
+	struct list_head signal_link;
+	struct list_head signals;
+	struct i915_vma *state;
+	struct intel_ring *ring;
+	u32 *lrc_reg_state;
+	u64 lrc_desc;
+	int pin_count;
+
+	/**
+	 * active_tracker: Active tracker for the external rq activity
+	 * on this intel_context object.
+	 */
+	struct i915_active_request active_tracker;
+
+	const struct intel_context_ops *ops;
+
+	/** sseu: Control eu/slice partitioning */
+	struct intel_sseu sseu;
+};
+
+#endif /* __INTEL_CONTEXT_TYPES__ */
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
new file mode 100644
index 000000000000..f7ceda334169
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -0,0 +1,521 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_ENGINE_TYPES__
+#define __INTEL_ENGINE_TYPES__
+
+#include <linux/hashtable.h>
+#include <linux/irq_work.h>
+#include <linux/list.h>
+#include <linux/types.h>
+
+#include "i915_timeline_types.h"
+#include "intel_device_info.h"
+#include "intel_workarounds_types.h"
+
+#include "i915_gem_batch_pool.h"
+#include "i915_pmu.h"
+
+#define I915_MAX_SLICES	3
+#define I915_MAX_SUBSLICES 8
+
+#define I915_CMD_HASH_ORDER 9
+
+struct drm_i915_reg_table;
+struct i915_gem_context;
+struct i915_request;
+struct i915_sched_attr;
+
+struct intel_hw_status_page {
+	struct i915_vma *vma;
+	u32 *addr;
+};
+
+struct intel_instdone {
+	u32 instdone;
+	/* The following exist only in the RCS engine */
+	u32 slice_common;
+	u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES];
+	u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
+};
+
+struct intel_engine_hangcheck {
+	u64 acthd;
+	u32 last_seqno;
+	u32 next_seqno;
+	unsigned long action_timestamp;
+	struct intel_instdone instdone;
+};
+
+struct intel_ring {
+	struct i915_vma *vma;
+	void *vaddr;
+
+	struct i915_timeline *timeline;
+	struct list_head request_list;
+	struct list_head active_link;
+
+	u32 head;
+	u32 tail;
+	u32 emit;
+
+	u32 space;
+	u32 size;
+	u32 effective_size;
+};
+
+/*
+ * we use a single page to load ctx workarounds so all of these
+ * values are referred in terms of dwords
+ *
+ * struct i915_wa_ctx_bb:
+ *  offset: specifies batch starting position, also helpful in case
+ *    if we want to have multiple batches at different offsets based on
+ *    some criteria. It is not a requirement at the moment but provides
+ *    an option for future use.
+ *  size: size of the batch in DWORDS
+ */
+struct i915_ctx_workarounds {
+	struct i915_wa_ctx_bb {
+		u32 offset;
+		u32 size;
+	} indirect_ctx, per_ctx;
+	struct i915_vma *vma;
+};
+
+#define I915_MAX_VCS	4
+#define I915_MAX_VECS	2
+
+/*
+ * Engine IDs definitions.
+ * Keep instances of the same type engine together.
+ */
+enum intel_engine_id {
+	RCS0 = 0,
+	BCS0,
+	VCS0,
+	VCS1,
+	VCS2,
+	VCS3,
+#define _VCS(n) (VCS0 + (n))
+	VECS0,
+	VECS1
+#define _VECS(n) (VECS0 + (n))
+};
+
+struct st_preempt_hang {
+	struct completion completion;
+	unsigned int count;
+	bool inject_hang;
+};
+
+/**
+ * struct intel_engine_execlists - execlist submission queue and port state
+ *
+ * The struct intel_engine_execlists represents the combined logical state of
+ * driver and the hardware state for execlist mode of submission.
+ */
+struct intel_engine_execlists {
+	/**
+	 * @tasklet: softirq tasklet for bottom handler
+	 */
+	struct tasklet_struct tasklet;
+
+	/**
+	 * @default_priolist: priority list for I915_PRIORITY_NORMAL
+	 */
+	struct i915_priolist default_priolist;
+
+	/**
+	 * @no_priolist: priority lists disabled
+	 */
+	bool no_priolist;
+
+	/**
+	 * @submit_reg: gen-specific execlist submission register
+	 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to
+	 * the ExecList Submission Queue Contents register array for Gen11+
+	 */
+	u32 __iomem *submit_reg;
+
+	/**
+	 * @ctrl_reg: the enhanced execlists control register, used to load the
+	 * submit queue on the HW and to request preemptions to idle
+	 */
+	u32 __iomem *ctrl_reg;
+
+	/**
+	 * @port: execlist port states
+	 *
+	 * For each hardware ELSP (ExecList Submission Port) we keep
+	 * track of the last request and the number of times we submitted
+	 * that port to hw. We then count the number of times the hw reports
+	 * a context completion or preemption. As only one context can
+	 * be active on hw, we limit resubmission of context to port[0]. This
+	 * is called Lite Restore, of the context.
+	 */
+	struct execlist_port {
+		/**
+		 * @request_count: combined request and submission count
+		 */
+		struct i915_request *request_count;
+#define EXECLIST_COUNT_BITS 2
+#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
+#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
+#define port_set(p, packed) ((p)->request_count = (packed))
+#define port_isset(p) ((p)->request_count)
+#define port_index(p, execlists) ((p) - (execlists)->port)
+
+		/**
+		 * @context_id: context ID for port
+		 */
+		GEM_DEBUG_DECL(u32 context_id);
+
+#define EXECLIST_MAX_PORTS 2
+	} port[EXECLIST_MAX_PORTS];
+
+	/**
+	 * @active: is the HW active? We consider the HW as active after
+	 * submitting any context for execution and until we have seen the
+	 * last context completion event. After that, we do not expect any
+	 * more events until we submit, and so can park the HW.
+	 *
+	 * As we have a small number of different sources from which we feed
+	 * the HW, we track the state of each inside a single bitfield.
+	 */
+	unsigned int active;
+#define EXECLISTS_ACTIVE_USER 0
+#define EXECLISTS_ACTIVE_PREEMPT 1
+#define EXECLISTS_ACTIVE_HWACK 2
+
+	/**
+	 * @port_mask: number of execlist ports - 1
+	 */
+	unsigned int port_mask;
+
+	/**
+	 * @queue_priority_hint: Highest pending priority.
+	 *
+	 * When we add requests into the queue, or adjust the priority of
+	 * executing requests, we compute the maximum priority of those
+	 * pending requests. We can then use this value to determine if
+	 * we need to preempt the executing requests to service the queue.
+	 * However, since the we may have recorded the priority of an inflight
+	 * request we wanted to preempt but since completed, at the time of
+	 * dequeuing the priority hint may no longer may match the highest
+	 * available request priority.
+	 */
+	int queue_priority_hint;
+
+	/**
+	 * @queue: queue of requests, in priority lists
+	 */
+	struct rb_root_cached queue;
+
+	/**
+	 * @csb_write: control register for Context Switch buffer
+	 *
+	 * Note this register may be either mmio or HWSP shadow.
+	 */
+	u32 *csb_write;
+
+	/**
+	 * @csb_status: status array for Context Switch buffer
+	 *
+	 * Note these register may be either mmio or HWSP shadow.
+	 */
+	u32 *csb_status;
+
+	/**
+	 * @preempt_complete_status: expected CSB upon completing preemption
+	 */
+	u32 preempt_complete_status;
+
+	/**
+	 * @csb_head: context status buffer head
+	 */
+	u8 csb_head;
+
+	I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;)
+};
+
+#define INTEL_ENGINE_CS_MAX_NAME 8
+
+struct intel_engine_cs {
+	struct drm_i915_private *i915;
+	char name[INTEL_ENGINE_CS_MAX_NAME];
+
+	enum intel_engine_id id;
+	unsigned int hw_id;
+	unsigned int guc_id;
+	intel_engine_mask_t mask;
+
+	u8 uabi_class;
+
+	u8 class;
+	u8 instance;
+	u32 context_size;
+	u32 mmio_base;
+
+	struct intel_ring *buffer;
+
+	struct i915_timeline timeline;
+
+	struct drm_i915_gem_object *default_state;
+	void *pinned_default_state;
+
+	/* Rather than have every client wait upon all user interrupts,
+	 * with the herd waking after every interrupt and each doing the
+	 * heavyweight seqno dance, we delegate the task (of being the
+	 * bottom-half of the user interrupt) to the first client. After
+	 * every interrupt, we wake up one client, who does the heavyweight
+	 * coherent seqno read and either goes back to sleep (if incomplete),
+	 * or wakes up all the completed clients in parallel, before then
+	 * transferring the bottom-half status to the next client in the queue.
+	 *
+	 * Compared to walking the entire list of waiters in a single dedicated
+	 * bottom-half, we reduce the latency of the first waiter by avoiding
+	 * a context switch, but incur additional coherent seqno reads when
+	 * following the chain of request breadcrumbs. Since it is most likely
+	 * that we have a single client waiting on each seqno, then reducing
+	 * the overhead of waking that client is much preferred.
+	 */
+	struct intel_breadcrumbs {
+		spinlock_t irq_lock;
+		struct list_head signalers;
+
+		struct irq_work irq_work; /* for use from inside irq_lock */
+
+		unsigned int irq_enabled;
+
+		bool irq_armed;
+	} breadcrumbs;
+
+	struct intel_engine_pmu {
+		/**
+		 * @enable: Bitmask of enable sample events on this engine.
+		 *
+		 * Bits correspond to sample event types, for instance
+		 * I915_SAMPLE_QUEUED is bit 0 etc.
+		 */
+		u32 enable;
+		/**
+		 * @enable_count: Reference count for the enabled samplers.
+		 *
+		 * Index number corresponds to @enum drm_i915_pmu_engine_sample.
+		 */
+		unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT];
+		/**
+		 * @sample: Counter values for sampling events.
+		 *
+		 * Our internal timer stores the current counters in this field.
+		 *
+		 * Index number corresponds to @enum drm_i915_pmu_engine_sample.
+		 */
+		struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT];
+	} pmu;
+
+	/*
+	 * A pool of objects to use as shadow copies of client batch buffers
+	 * when the command parser is enabled. Prevents the client from
+	 * modifying the batch contents after software parsing.
+	 */
+	struct i915_gem_batch_pool batch_pool;
+
+	struct intel_hw_status_page status_page;
+	struct i915_ctx_workarounds wa_ctx;
+	struct i915_wa_list ctx_wa_list;
+	struct i915_wa_list wa_list;
+	struct i915_wa_list whitelist;
+
+	u32             irq_keep_mask; /* always keep these interrupts */
+	u32		irq_enable_mask; /* bitmask to enable ring interrupt */
+	void		(*irq_enable)(struct intel_engine_cs *engine);
+	void		(*irq_disable)(struct intel_engine_cs *engine);
+
+	int		(*init_hw)(struct intel_engine_cs *engine);
+
+	struct {
+		void (*prepare)(struct intel_engine_cs *engine);
+		void (*reset)(struct intel_engine_cs *engine, bool stalled);
+		void (*finish)(struct intel_engine_cs *engine);
+	} reset;
+
+	void		(*park)(struct intel_engine_cs *engine);
+	void		(*unpark)(struct intel_engine_cs *engine);
+
+	void		(*set_default_submission)(struct intel_engine_cs *engine);
+
+	struct intel_context *(*context_pin)(struct intel_engine_cs *engine,
+					     struct i915_gem_context *ctx);
+
+	int		(*request_alloc)(struct i915_request *rq);
+	int		(*init_context)(struct i915_request *rq);
+
+	int		(*emit_flush)(struct i915_request *request, u32 mode);
+#define EMIT_INVALIDATE	BIT(0)
+#define EMIT_FLUSH	BIT(1)
+#define EMIT_BARRIER	(EMIT_INVALIDATE | EMIT_FLUSH)
+	int		(*emit_bb_start)(struct i915_request *rq,
+					 u64 offset, u32 length,
+					 unsigned int dispatch_flags);
+#define I915_DISPATCH_SECURE BIT(0)
+#define I915_DISPATCH_PINNED BIT(1)
+	int		 (*emit_init_breadcrumb)(struct i915_request *rq);
+	u32		*(*emit_fini_breadcrumb)(struct i915_request *rq,
+						 u32 *cs);
+	unsigned int	emit_fini_breadcrumb_dw;
+
+	/* Pass the request to the hardware queue (e.g. directly into
+	 * the legacy ringbuffer or to the end of an execlist).
+	 *
+	 * This is called from an atomic context with irqs disabled; must
+	 * be irq safe.
+	 */
+	void		(*submit_request)(struct i915_request *rq);
+
+	/*
+	 * Call when the priority on a request has changed and it and its
+	 * dependencies may need rescheduling. Note the request itself may
+	 * not be ready to run!
+	 */
+	void		(*schedule)(struct i915_request *request,
+				    const struct i915_sched_attr *attr);
+
+	/*
+	 * Cancel all requests on the hardware, or queued for execution.
+	 * This should only cancel the ready requests that have been
+	 * submitted to the engine (via the engine->submit_request callback).
+	 * This is called when marking the device as wedged.
+	 */
+	void		(*cancel_requests)(struct intel_engine_cs *engine);
+
+	void		(*cleanup)(struct intel_engine_cs *engine);
+
+	struct intel_engine_execlists execlists;
+
+	/* Contexts are pinned whilst they are active on the GPU. The last
+	 * context executed remains active whilst the GPU is idle - the
+	 * switch away and write to the context object only occurs on the
+	 * next execution.  Contexts are only unpinned on retirement of the
+	 * following request ensuring that we can always write to the object
+	 * on the context switch even after idling. Across suspend, we switch
+	 * to the kernel context and trash it as the save may not happen
+	 * before the hardware is powered down.
+	 */
+	struct intel_context *last_retired_context;
+
+	/* status_notifier: list of callbacks for context-switch changes */
+	struct atomic_notifier_head context_status_notifier;
+
+	struct intel_engine_hangcheck hangcheck;
+
+#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
+#define I915_ENGINE_SUPPORTS_STATS   BIT(1)
+#define I915_ENGINE_HAS_PREEMPTION   BIT(2)
+#define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
+	unsigned int flags;
+
+	/*
+	 * Table of commands the command parser needs to know about
+	 * for this engine.
+	 */
+	DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER);
+
+	/*
+	 * Table of registers allowed in commands that read/write registers.
+	 */
+	const struct drm_i915_reg_table *reg_tables;
+	int reg_table_count;
+
+	/*
+	 * Returns the bitmask for the length field of the specified command.
+	 * Return 0 for an unrecognized/invalid command.
+	 *
+	 * If the command parser finds an entry for a command in the engine's
+	 * cmd_tables, it gets the command's length based on the table entry.
+	 * If not, it calls this function to determine the per-engine length
+	 * field encoding for the command (i.e. different opcode ranges use
+	 * certain bits to encode the command length in the header).
+	 */
+	u32 (*get_cmd_length_mask)(u32 cmd_header);
+
+	struct {
+		/**
+		 * @lock: Lock protecting the below fields.
+		 */
+		seqlock_t lock;
+		/**
+		 * @enabled: Reference count indicating number of listeners.
+		 */
+		unsigned int enabled;
+		/**
+		 * @active: Number of contexts currently scheduled in.
+		 */
+		unsigned int active;
+		/**
+		 * @enabled_at: Timestamp when busy stats were enabled.
+		 */
+		ktime_t enabled_at;
+		/**
+		 * @start: Timestamp of the last idle to active transition.
+		 *
+		 * Idle is defined as active == 0, active is active > 0.
+		 */
+		ktime_t start;
+		/**
+		 * @total: Total time this engine was busy.
+		 *
+		 * Accumulated time not counting the most recent block in cases
+		 * where engine is currently busy (active > 0).
+		 */
+		ktime_t total;
+	} stats;
+};
+
+static inline bool
+intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
+}
+
+static inline bool
+intel_engine_supports_stats(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_SUPPORTS_STATS;
+}
+
+static inline bool
+intel_engine_has_preemption(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_HAS_PREEMPTION;
+}
+
+static inline bool
+intel_engine_has_semaphores(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_HAS_SEMAPHORES;
+}
+
+#define instdone_slice_mask(dev_priv__) \
+	(IS_GEN(dev_priv__, 7) ? \
+	 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
+
+#define instdone_subslice_mask(dev_priv__) \
+	(IS_GEN(dev_priv__, 7) ? \
+	 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])
+
+#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
+	for ((slice__) = 0, (subslice__) = 0; \
+	     (slice__) < I915_MAX_SLICES; \
+	     (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
+	       (slice__) += ((subslice__) == 0)) \
+		for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
+			    (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
+
+#endif /* __INTEL_ENGINE_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 744220296653..77ec1bd4df5a 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -32,6 +32,7 @@
 #include "intel_guc_log.h"
 #include "intel_guc_reg.h"
 #include "intel_uc_fw.h"
+#include "i915_utils.h"
 #include "i915_vma.h"
 
 struct guc_preempt_work {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 9ccbe63d46e3..e612bdca9fd9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -15,14 +15,11 @@
 #include "i915_request.h"
 #include "i915_selftest.h"
 #include "i915_timeline.h"
-#include "intel_device_info.h"
+#include "intel_engine_types.h"
 #include "intel_gpu_commands.h"
 #include "intel_workarounds.h"
 
 struct drm_printer;
-struct i915_sched_attr;
-
-#define I915_CMD_HASH_ORDER 9
 
 /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
  * but keeps the logic simple. Indeed, the whole purpose of this macro is just
@@ -32,11 +29,6 @@ struct i915_sched_attr;
 #define CACHELINE_BYTES 64
 #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
 
-struct intel_hw_status_page {
-	struct i915_vma *vma;
-	u32 *addr;
-};
-
 #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
 #define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val)
 
@@ -91,498 +83,6 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
 	return "unknown";
 }
 
-#define I915_MAX_SLICES	3
-#define I915_MAX_SUBSLICES 8
-
-#define instdone_slice_mask(dev_priv__) \
-	(IS_GEN(dev_priv__, 7) ? \
-	 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
-
-#define instdone_subslice_mask(dev_priv__) \
-	(IS_GEN(dev_priv__, 7) ? \
-	 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])
-
-#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
-	for ((slice__) = 0, (subslice__) = 0; \
-	     (slice__) < I915_MAX_SLICES; \
-	     (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
-	       (slice__) += ((subslice__) == 0)) \
-		for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
-			    (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
-
-struct intel_instdone {
-	u32 instdone;
-	/* The following exist only in the RCS engine */
-	u32 slice_common;
-	u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES];
-	u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
-};
-
-struct intel_engine_hangcheck {
-	u64 acthd;
-	u32 last_seqno;
-	u32 next_seqno;
-	unsigned long action_timestamp;
-	struct intel_instdone instdone;
-};
-
-struct intel_ring {
-	struct i915_vma *vma;
-	void *vaddr;
-
-	struct i915_timeline *timeline;
-	struct list_head request_list;
-	struct list_head active_link;
-
-	u32 head;
-	u32 tail;
-	u32 emit;
-
-	u32 space;
-	u32 size;
-	u32 effective_size;
-};
-
-struct i915_gem_context;
-struct drm_i915_reg_table;
-
-/*
- * we use a single page to load ctx workarounds so all of these
- * values are referred in terms of dwords
- *
- * struct i915_wa_ctx_bb:
- *  offset: specifies batch starting position, also helpful in case
- *    if we want to have multiple batches at different offsets based on
- *    some criteria. It is not a requirement at the moment but provides
- *    an option for future use.
- *  size: size of the batch in DWORDS
- */
-struct i915_ctx_workarounds {
-	struct i915_wa_ctx_bb {
-		u32 offset;
-		u32 size;
-	} indirect_ctx, per_ctx;
-	struct i915_vma *vma;
-};
-
-struct i915_request;
-
-#define I915_MAX_VCS	4
-#define I915_MAX_VECS	2
-
-/*
- * Engine IDs definitions.
- * Keep instances of the same type engine together.
- */
-enum intel_engine_id {
-	RCS0 = 0,
-	BCS0,
-	VCS0,
-	VCS1,
-	VCS2,
-	VCS3,
-#define _VCS(n) (VCS0 + (n))
-	VECS0,
-	VECS1
-#define _VECS(n) (VECS0 + (n))
-};
-
-struct st_preempt_hang {
-	struct completion completion;
-	unsigned int count;
-	bool inject_hang;
-};
-
-/**
- * struct intel_engine_execlists - execlist submission queue and port state
- *
- * The struct intel_engine_execlists represents the combined logical state of
- * driver and the hardware state for execlist mode of submission.
- */
-struct intel_engine_execlists {
-	/**
-	 * @tasklet: softirq tasklet for bottom handler
-	 */
-	struct tasklet_struct tasklet;
-
-	/**
-	 * @default_priolist: priority list for I915_PRIORITY_NORMAL
-	 */
-	struct i915_priolist default_priolist;
-
-	/**
-	 * @no_priolist: priority lists disabled
-	 */
-	bool no_priolist;
-
-	/**
-	 * @submit_reg: gen-specific execlist submission register
-	 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to
-	 * the ExecList Submission Queue Contents register array for Gen11+
-	 */
-	u32 __iomem *submit_reg;
-
-	/**
-	 * @ctrl_reg: the enhanced execlists control register, used to load the
-	 * submit queue on the HW and to request preemptions to idle
-	 */
-	u32 __iomem *ctrl_reg;
-
-	/**
-	 * @port: execlist port states
-	 *
-	 * For each hardware ELSP (ExecList Submission Port) we keep
-	 * track of the last request and the number of times we submitted
-	 * that port to hw. We then count the number of times the hw reports
-	 * a context completion or preemption. As only one context can
-	 * be active on hw, we limit resubmission of context to port[0]. This
-	 * is called Lite Restore, of the context.
-	 */
-	struct execlist_port {
-		/**
-		 * @request_count: combined request and submission count
-		 */
-		struct i915_request *request_count;
-#define EXECLIST_COUNT_BITS 2
-#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
-#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
-#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
-#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
-#define port_set(p, packed) ((p)->request_count = (packed))
-#define port_isset(p) ((p)->request_count)
-#define port_index(p, execlists) ((p) - (execlists)->port)
-
-		/**
-		 * @context_id: context ID for port
-		 */
-		GEM_DEBUG_DECL(u32 context_id);
-
-#define EXECLIST_MAX_PORTS 2
-	} port[EXECLIST_MAX_PORTS];
-
-	/**
-	 * @active: is the HW active? We consider the HW as active after
-	 * submitting any context for execution and until we have seen the
-	 * last context completion event. After that, we do not expect any
-	 * more events until we submit, and so can park the HW.
-	 *
-	 * As we have a small number of different sources from which we feed
-	 * the HW, we track the state of each inside a single bitfield.
-	 */
-	unsigned int active;
-#define EXECLISTS_ACTIVE_USER 0
-#define EXECLISTS_ACTIVE_PREEMPT 1
-#define EXECLISTS_ACTIVE_HWACK 2
-
-	/**
-	 * @port_mask: number of execlist ports - 1
-	 */
-	unsigned int port_mask;
-
-	/**
-	 * @queue_priority_hint: Highest pending priority.
-	 *
-	 * When we add requests into the queue, or adjust the priority of
-	 * executing requests, we compute the maximum priority of those
-	 * pending requests. We can then use this value to determine if
-	 * we need to preempt the executing requests to service the queue.
-	 * However, since the we may have recorded the priority of an inflight
-	 * request we wanted to preempt but since completed, at the time of
-	 * dequeuing the priority hint may no longer may match the highest
-	 * available request priority.
-	 */
-	int queue_priority_hint;
-
-	/**
-	 * @queue: queue of requests, in priority lists
-	 */
-	struct rb_root_cached queue;
-
-	/**
-	 * @csb_write: control register for Context Switch buffer
-	 *
-	 * Note this register may be either mmio or HWSP shadow.
-	 */
-	u32 *csb_write;
-
-	/**
-	 * @csb_status: status array for Context Switch buffer
-	 *
-	 * Note these register may be either mmio or HWSP shadow.
-	 */
-	u32 *csb_status;
-
-	/**
-	 * @preempt_complete_status: expected CSB upon completing preemption
-	 */
-	u32 preempt_complete_status;
-
-	/**
-	 * @csb_head: context status buffer head
-	 */
-	u8 csb_head;
-
-	I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;)
-};
-
-#define INTEL_ENGINE_CS_MAX_NAME 8
-
-struct intel_engine_cs {
-	struct drm_i915_private *i915;
-	char name[INTEL_ENGINE_CS_MAX_NAME];
-
-	enum intel_engine_id id;
-	unsigned int hw_id;
-	unsigned int guc_id;
-	intel_engine_mask_t mask;
-
-	u8 uabi_class;
-
-	u8 class;
-	u8 instance;
-	u32 context_size;
-	u32 mmio_base;
-
-	struct intel_ring *buffer;
-
-	struct i915_timeline timeline;
-
-	struct drm_i915_gem_object *default_state;
-	void *pinned_default_state;
-
-	/* Rather than have every client wait upon all user interrupts,
-	 * with the herd waking after every interrupt and each doing the
-	 * heavyweight seqno dance, we delegate the task (of being the
-	 * bottom-half of the user interrupt) to the first client. After
-	 * every interrupt, we wake up one client, who does the heavyweight
-	 * coherent seqno read and either goes back to sleep (if incomplete),
-	 * or wakes up all the completed clients in parallel, before then
-	 * transferring the bottom-half status to the next client in the queue.
-	 *
-	 * Compared to walking the entire list of waiters in a single dedicated
-	 * bottom-half, we reduce the latency of the first waiter by avoiding
-	 * a context switch, but incur additional coherent seqno reads when
-	 * following the chain of request breadcrumbs. Since it is most likely
-	 * that we have a single client waiting on each seqno, then reducing
-	 * the overhead of waking that client is much preferred.
-	 */
-	struct intel_breadcrumbs {
-		spinlock_t irq_lock;
-		struct list_head signalers;
-
-		struct irq_work irq_work; /* for use from inside irq_lock */
-
-		unsigned int irq_enabled;
-
-		bool irq_armed;
-	} breadcrumbs;
-
-	struct intel_engine_pmu {
-		/**
-		 * @enable: Bitmask of enable sample events on this engine.
-		 *
-		 * Bits correspond to sample event types, for instance
-		 * I915_SAMPLE_QUEUED is bit 0 etc.
-		 */
-		u32 enable;
-		/**
-		 * @enable_count: Reference count for the enabled samplers.
-		 *
-		 * Index number corresponds to @enum drm_i915_pmu_engine_sample.
-		 */
-		unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT];
-		/**
-		 * @sample: Counter values for sampling events.
-		 *
-		 * Our internal timer stores the current counters in this field.
-		 *
-		 * Index number corresponds to @enum drm_i915_pmu_engine_sample.
-		 */
-		struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT];
-	} pmu;
-
-	/*
-	 * A pool of objects to use as shadow copies of client batch buffers
-	 * when the command parser is enabled. Prevents the client from
-	 * modifying the batch contents after software parsing.
-	 */
-	struct i915_gem_batch_pool batch_pool;
-
-	struct intel_hw_status_page status_page;
-	struct i915_ctx_workarounds wa_ctx;
-	struct i915_wa_list ctx_wa_list;
-	struct i915_wa_list wa_list;
-	struct i915_wa_list whitelist;
-
-	u32             irq_keep_mask; /* always keep these interrupts */
-	u32		irq_enable_mask; /* bitmask to enable ring interrupt */
-	void		(*irq_enable)(struct intel_engine_cs *engine);
-	void		(*irq_disable)(struct intel_engine_cs *engine);
-
-	int		(*init_hw)(struct intel_engine_cs *engine);
-
-	struct {
-		void (*prepare)(struct intel_engine_cs *engine);
-		void (*reset)(struct intel_engine_cs *engine, bool stalled);
-		void (*finish)(struct intel_engine_cs *engine);
-	} reset;
-
-	void		(*park)(struct intel_engine_cs *engine);
-	void		(*unpark)(struct intel_engine_cs *engine);
-
-	void		(*set_default_submission)(struct intel_engine_cs *engine);
-
-	struct intel_context *(*context_pin)(struct intel_engine_cs *engine,
-					     struct i915_gem_context *ctx);
-
-	int		(*request_alloc)(struct i915_request *rq);
-	int		(*init_context)(struct i915_request *rq);
-
-	int		(*emit_flush)(struct i915_request *request, u32 mode);
-#define EMIT_INVALIDATE	BIT(0)
-#define EMIT_FLUSH	BIT(1)
-#define EMIT_BARRIER	(EMIT_INVALIDATE | EMIT_FLUSH)
-	int		(*emit_bb_start)(struct i915_request *rq,
-					 u64 offset, u32 length,
-					 unsigned int dispatch_flags);
-#define I915_DISPATCH_SECURE BIT(0)
-#define I915_DISPATCH_PINNED BIT(1)
-	int		 (*emit_init_breadcrumb)(struct i915_request *rq);
-	u32		*(*emit_fini_breadcrumb)(struct i915_request *rq,
-						 u32 *cs);
-	unsigned int	emit_fini_breadcrumb_dw;
-
-	/* Pass the request to the hardware queue (e.g. directly into
-	 * the legacy ringbuffer or to the end of an execlist).
-	 *
-	 * This is called from an atomic context with irqs disabled; must
-	 * be irq safe.
-	 */
-	void		(*submit_request)(struct i915_request *rq);
-
-	/*
-	 * Call when the priority on a request has changed and it and its
-	 * dependencies may need rescheduling. Note the request itself may
-	 * not be ready to run!
-	 */
-	void		(*schedule)(struct i915_request *request,
-				    const struct i915_sched_attr *attr);
-
-	/*
-	 * Cancel all requests on the hardware, or queued for execution.
-	 * This should only cancel the ready requests that have been
-	 * submitted to the engine (via the engine->submit_request callback).
-	 * This is called when marking the device as wedged.
-	 */
-	void		(*cancel_requests)(struct intel_engine_cs *engine);
-
-	void		(*cleanup)(struct intel_engine_cs *engine);
-
-	struct intel_engine_execlists execlists;
-
-	/* Contexts are pinned whilst they are active on the GPU. The last
-	 * context executed remains active whilst the GPU is idle - the
-	 * switch away and write to the context object only occurs on the
-	 * next execution.  Contexts are only unpinned on retirement of the
-	 * following request ensuring that we can always write to the object
-	 * on the context switch even after idling. Across suspend, we switch
-	 * to the kernel context and trash it as the save may not happen
-	 * before the hardware is powered down.
-	 */
-	struct intel_context *last_retired_context;
-
-	/* status_notifier: list of callbacks for context-switch changes */
-	struct atomic_notifier_head context_status_notifier;
-
-	struct intel_engine_hangcheck hangcheck;
-
-#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
-#define I915_ENGINE_SUPPORTS_STATS   BIT(1)
-#define I915_ENGINE_HAS_PREEMPTION   BIT(2)
-#define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
-	unsigned int flags;
-
-	/*
-	 * Table of commands the command parser needs to know about
-	 * for this engine.
-	 */
-	DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER);
-
-	/*
-	 * Table of registers allowed in commands that read/write registers.
-	 */
-	const struct drm_i915_reg_table *reg_tables;
-	int reg_table_count;
-
-	/*
-	 * Returns the bitmask for the length field of the specified command.
-	 * Return 0 for an unrecognized/invalid command.
-	 *
-	 * If the command parser finds an entry for a command in the engine's
-	 * cmd_tables, it gets the command's length based on the table entry.
-	 * If not, it calls this function to determine the per-engine length
-	 * field encoding for the command (i.e. different opcode ranges use
-	 * certain bits to encode the command length in the header).
-	 */
-	u32 (*get_cmd_length_mask)(u32 cmd_header);
-
-	struct {
-		/**
-		 * @lock: Lock protecting the below fields.
-		 */
-		seqlock_t lock;
-		/**
-		 * @enabled: Reference count indicating number of listeners.
-		 */
-		unsigned int enabled;
-		/**
-		 * @active: Number of contexts currently scheduled in.
-		 */
-		unsigned int active;
-		/**
-		 * @enabled_at: Timestamp when busy stats were enabled.
-		 */
-		ktime_t enabled_at;
-		/**
-		 * @start: Timestamp of the last idle to active transition.
-		 *
-		 * Idle is defined as active == 0, active is active > 0.
-		 */
-		ktime_t start;
-		/**
-		 * @total: Total time this engine was busy.
-		 *
-		 * Accumulated time not counting the most recent block in cases
-		 * where engine is currently busy (active > 0).
-		 */
-		ktime_t total;
-	} stats;
-};
-
-static inline bool
-intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine)
-{
-	return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
-}
-
-static inline bool
-intel_engine_supports_stats(const struct intel_engine_cs *engine)
-{
-	return engine->flags & I915_ENGINE_SUPPORTS_STATS;
-}
-
-static inline bool
-intel_engine_has_preemption(const struct intel_engine_cs *engine)
-{
-	return engine->flags & I915_ENGINE_HAS_PREEMPTION;
-}
-
-static inline bool
-intel_engine_has_semaphores(const struct intel_engine_cs *engine)
-{
-	return engine->flags & I915_ENGINE_HAS_SEMAPHORES;
-}
-
 void intel_engines_set_scheduler_caps(struct drm_i915_private *i915);
 
 static inline bool __execlists_need_preempt(int prio, int last)
diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/intel_workarounds.h
index 7c734714b05e..a1bf51c611a9 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.h
+++ b/drivers/gpu/drm/i915/intel_workarounds.h
@@ -9,18 +9,7 @@
 
 #include <linux/slab.h>
 
-struct i915_wa {
-	i915_reg_t	  reg;
-	u32		  mask;
-	u32		  val;
-};
-
-struct i915_wa_list {
-	const char	*name;
-	struct i915_wa	*list;
-	unsigned int	count;
-	unsigned int	wa_count;
-};
+#include "intel_workarounds_types.h"
 
 static inline void intel_wa_list_free(struct i915_wa_list *wal)
 {
diff --git a/drivers/gpu/drm/i915/intel_workarounds_types.h b/drivers/gpu/drm/i915/intel_workarounds_types.h
new file mode 100644
index 000000000000..30918da180ff
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_workarounds_types.h
@@ -0,0 +1,27 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2018 Intel Corporation
+ */
+
+#ifndef __INTEL_WORKAROUNDS_TYPES_H__
+#define __INTEL_WORKAROUNDS_TYPES_H__
+
+#include <linux/types.h>
+
+#include "i915_reg.h"
+
+struct i915_wa {
+	i915_reg_t	  reg;
+	u32		  mask;
+	u32		  val;
+};
+
+struct i915_wa_list {
+	const char	*name;
+	struct i915_wa	*list;
+	unsigned int	count;
+	unsigned int	wa_count;
+};
+
+#endif /* __INTEL_WORKAROUNDS_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/test_i915_active_types_standalone.c b/drivers/gpu/drm/i915/test_i915_active_types_standalone.c
new file mode 100644
index 000000000000..144ebd153e57
--- /dev/null
+++ b/drivers/gpu/drm/i915/test_i915_active_types_standalone.c
@@ -0,0 +1,7 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_active_types.h"
diff --git a/drivers/gpu/drm/i915/test_i915_gem_context_types_standalone.c b/drivers/gpu/drm/i915/test_i915_gem_context_types_standalone.c
new file mode 100644
index 000000000000..4e4da4860bc2
--- /dev/null
+++ b/drivers/gpu/drm/i915/test_i915_gem_context_types_standalone.c
@@ -0,0 +1,7 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_gem_context_types.h"
diff --git a/drivers/gpu/drm/i915/test_i915_timeline_types_standalone.c b/drivers/gpu/drm/i915/test_i915_timeline_types_standalone.c
new file mode 100644
index 000000000000..f58e148e8946
--- /dev/null
+++ b/drivers/gpu/drm/i915/test_i915_timeline_types_standalone.c
@@ -0,0 +1,7 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_timeline_types.h"
diff --git a/drivers/gpu/drm/i915/test_intel_context_types_standalone.c b/drivers/gpu/drm/i915/test_intel_context_types_standalone.c
new file mode 100644
index 000000000000..b39e3c4e6551
--- /dev/null
+++ b/drivers/gpu/drm/i915/test_intel_context_types_standalone.c
@@ -0,0 +1,7 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "intel_context_types.h"
diff --git a/drivers/gpu/drm/i915/test_intel_engine_types_standalone.c b/drivers/gpu/drm/i915/test_intel_engine_types_standalone.c
new file mode 100644
index 000000000000..d05e4cdcbcf9
--- /dev/null
+++ b/drivers/gpu/drm/i915/test_intel_engine_types_standalone.c
@@ -0,0 +1,7 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "intel_engine_types.h"
diff --git a/drivers/gpu/drm/i915/test_intel_workarounds_types_standalone.c b/drivers/gpu/drm/i915/test_intel_workarounds_types_standalone.c
new file mode 100644
index 000000000000..4f658bb00825
--- /dev/null
+++ b/drivers/gpu/drm/i915/test_intel_workarounds_types_standalone.c
@@ -0,0 +1,7 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "intel_workarounds_types.h"
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 18/43] drm/i915: Store the intel_context_ops in the intel_engine_cs
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (16 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 17/43] drm/i915: Split struct intel_context definition to its own header Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-06 14:39   ` Tvrtko Ursulin
  2019-03-06 14:24 ` [PATCH 19/43] drm/i915: Move over to intel_context_lookup() Chris Wilson
                   ` (27 subsequent siblings)
  45 siblings, 1 reply; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

If we place a pointer to the engine specific intel_context_ops in the
engine itself, we can assign the ops pointer on initialising the
context, and then rely on it being set. This simplifies the code in
later patches.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c      |  1 +
 drivers/gpu/drm/i915/intel_engine_types.h    |  1 +
 drivers/gpu/drm/i915/intel_lrc.c             | 13 ++++++------
 drivers/gpu/drm/i915/intel_ringbuffer.c      | 22 +++++++++-----------
 drivers/gpu/drm/i915/selftests/mock_engine.c | 13 ++++++------
 5 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 8037d0efbb4a..bc89d01d338d 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -379,6 +379,7 @@ intel_context_init(struct intel_context *ce,
 {
 	ce->gem_context = ctx;
 	ce->engine = engine;
+	ce->ops = engine->cops;
 
 	INIT_LIST_HEAD(&ce->signal_link);
 	INIT_LIST_HEAD(&ce->signals);
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
index f7ceda334169..4d2c0ba58e6e 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -351,6 +351,7 @@ struct intel_engine_cs {
 
 	void		(*set_default_submission)(struct intel_engine_cs *engine);
 
+	const struct intel_context_ops *cops;
 	struct intel_context *(*context_pin)(struct intel_engine_cs *engine,
 					     struct i915_gem_context *ctx);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 0bd837b64ade..31cc4a82b928 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1386,11 +1386,6 @@ __execlists_context_pin(struct intel_engine_cs *engine,
 	return ERR_PTR(ret);
 }
 
-static const struct intel_context_ops execlists_context_ops = {
-	.unpin = execlists_context_unpin,
-	.destroy = execlists_context_destroy,
-};
-
 static struct intel_context *
 execlists_context_pin(struct intel_engine_cs *engine,
 		      struct i915_gem_context *ctx)
@@ -1404,11 +1399,14 @@ execlists_context_pin(struct intel_engine_cs *engine,
 		return ce;
 	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
 
-	ce->ops = &execlists_context_ops;
-
 	return __execlists_context_pin(engine, ctx, ce);
 }
 
+static const struct intel_context_ops execlists_context_ops = {
+	.unpin = execlists_context_unpin,
+	.destroy = execlists_context_destroy,
+};
+
 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
 {
 	u32 *cs;
@@ -2352,6 +2350,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 	engine->reset.reset = execlists_reset;
 	engine->reset.finish = execlists_reset_finish;
 
+	engine->cops = &execlists_context_ops;
 	engine->context_pin = execlists_context_pin;
 	engine->request_alloc = execlists_request_alloc;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 82f33ff94dc8..78e3c03aab4e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1349,7 +1349,7 @@ intel_ring_free(struct intel_ring *ring)
 	kfree(ring);
 }
 
-static void intel_ring_context_destroy(struct intel_context *ce)
+static void ring_context_destroy(struct intel_context *ce)
 {
 	GEM_BUG_ON(ce->pin_count);
 
@@ -1426,7 +1426,7 @@ static void __context_unpin(struct intel_context *ce)
 	i915_vma_unpin(vma);
 }
 
-static void intel_ring_context_unpin(struct intel_context *ce)
+static void ring_context_unpin(struct intel_context *ce)
 {
 	__context_unpin_ppgtt(ce->gem_context);
 	__context_unpin(ce);
@@ -1549,14 +1549,8 @@ __ring_context_pin(struct intel_engine_cs *engine,
 	return ERR_PTR(err);
 }
 
-static const struct intel_context_ops ring_context_ops = {
-	.unpin = intel_ring_context_unpin,
-	.destroy = intel_ring_context_destroy,
-};
-
 static struct intel_context *
-intel_ring_context_pin(struct intel_engine_cs *engine,
-		       struct i915_gem_context *ctx)
+ring_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 {
 	struct intel_context *ce = to_intel_context(ctx, engine);
 
@@ -1566,11 +1560,14 @@ intel_ring_context_pin(struct intel_engine_cs *engine,
 		return ce;
 	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
 
-	ce->ops = &ring_context_ops;
-
 	return __ring_context_pin(engine, ctx, ce);
 }
 
+static const struct intel_context_ops ring_context_ops = {
+	.unpin = ring_context_unpin,
+	.destroy = ring_context_destroy,
+};
+
 static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 {
 	struct i915_timeline *timeline;
@@ -2276,7 +2273,8 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 	engine->reset.reset = reset_ring;
 	engine->reset.finish = reset_finish;
 
-	engine->context_pin = intel_ring_context_pin;
+	engine->cops = &ring_context_ops;
+	engine->context_pin = ring_context_pin;
 	engine->request_alloc = ring_request_alloc;
 
 	/*
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 8032a8a9542f..856ec2706f3e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -137,11 +137,6 @@ static void mock_context_destroy(struct intel_context *ce)
 		mock_ring_free(ce->ring);
 }
 
-static const struct intel_context_ops mock_context_ops = {
-	.unpin = mock_context_unpin,
-	.destroy = mock_context_destroy,
-};
-
 static struct intel_context *
 mock_context_pin(struct intel_engine_cs *engine,
 		 struct i915_gem_context *ctx)
@@ -160,8 +155,6 @@ mock_context_pin(struct intel_engine_cs *engine,
 
 	mock_timeline_pin(ce->ring->timeline);
 
-	ce->ops = &mock_context_ops;
-
 	mutex_lock(&ctx->mutex);
 	list_add(&ce->active_link, &ctx->active_engines);
 	mutex_unlock(&ctx->mutex);
@@ -174,6 +167,11 @@ mock_context_pin(struct intel_engine_cs *engine,
 	return ERR_PTR(err);
 }
 
+static const struct intel_context_ops mock_context_ops = {
+	.unpin = mock_context_unpin,
+	.destroy = mock_context_destroy,
+};
+
 static int mock_request_alloc(struct i915_request *request)
 {
 	INIT_LIST_HEAD(&request->mock.link);
@@ -232,6 +230,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	engine->base.mask = BIT(id);
 	engine->base.status_page.addr = (void *)(engine + 1);
 
+	engine->base.cops = &mock_context_ops;
 	engine->base.context_pin = mock_context_pin;
 	engine->base.request_alloc = mock_request_alloc;
 	engine->base.emit_flush = mock_emit_flush;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 19/43] drm/i915: Move over to intel_context_lookup()
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (17 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 18/43] drm/i915: Store the intel_context_ops in the intel_engine_cs Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-06 14:40   ` Tvrtko Ursulin
  2019-03-06 14:24 ` [PATCH 20/43] drm/i915: Make context pinning part of intel_context_ops Chris Wilson
                   ` (26 subsequent siblings)
  45 siblings, 1 reply; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

In preparation for an ever growing number of engines and so ever
increasing static array of HW contexts within the GEM context, move the
array over to an rbtree, allocated upon first use.

Unfortunately, this imposes an rbtree lookup at a few frequent callsites,
but we should be able to mitigate those by moving over to using the HW
context as our primary type and so only incur the lookup on the boundary
with the user GEM context and engines.

v2: Check for no HW context in guc_stage_desc_init

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gvt/mmio_context.c       |   3 +-
 drivers/gpu/drm/i915/i915_gem.c               |   9 +-
 drivers/gpu/drm/i915/i915_gem_context.c       |  91 ++++-----
 drivers/gpu/drm/i915/i915_gem_context.h       |   1 -
 drivers/gpu/drm/i915/i915_gem_context_types.h |   7 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    |   4 +-
 drivers/gpu/drm/i915/i915_globals.c           |   1 +
 drivers/gpu/drm/i915/i915_globals.h           |   1 +
 drivers/gpu/drm/i915/i915_perf.c              |   4 +-
 drivers/gpu/drm/i915/intel_context.c          | 180 ++++++++++++++++++
 drivers/gpu/drm/i915/intel_context.h          |  40 +++-
 drivers/gpu/drm/i915/intel_context_types.h    |   2 +
 drivers/gpu/drm/i915/intel_engine_cs.c        |   2 +-
 drivers/gpu/drm/i915/intel_engine_types.h     |   5 +
 drivers/gpu/drm/i915/intel_guc_ads.c          |   4 +-
 drivers/gpu/drm/i915/intel_guc_submission.c   |   6 +-
 drivers/gpu/drm/i915/intel_lrc.c              |  29 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.c       |  23 ++-
 drivers/gpu/drm/i915/selftests/mock_context.c |   7 +-
 drivers/gpu/drm/i915/selftests/mock_engine.c  |   6 +-
 21 files changed, 324 insertions(+), 102 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/intel_context.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 524cace37d85..60de05f3fa60 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -95,6 +95,7 @@ i915-y += \
 	  i915_trace_points.o \
 	  i915_vma.o \
 	  intel_breadcrumbs.o \
+	  intel_context.o \
 	  intel_engine_cs.o \
 	  intel_hangcheck.o \
 	  intel_lrc.o \
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 0209d27fcaf0..f64c76dd11d4 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -494,7 +494,8 @@ static void switch_mmio(struct intel_vgpu *pre,
 			 * itself.
 			 */
 			if (mmio->in_context &&
-			    !is_inhibit_context(&s->shadow_ctx->__engine[ring_id]))
+			    !is_inhibit_context(intel_context_lookup(s->shadow_ctx,
+								     dev_priv->engine[ring_id])))
 				continue;
 
 			if (mmio->mask)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8d5918b99fc9..71d8ce38f42c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4650,15 +4650,20 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 	}
 
 	for_each_engine(engine, i915, id) {
+		struct intel_context *ce;
 		struct i915_vma *state;
 		void *vaddr;
 
-		GEM_BUG_ON(to_intel_context(ctx, engine)->pin_count);
+		ce = intel_context_lookup(ctx, engine);
+		if (!ce)
+			continue;
 
-		state = to_intel_context(ctx, engine)->state;
+		state = ce->state;
 		if (!state)
 			continue;
 
+		GEM_BUG_ON(ce->pin_count);
+
 		/*
 		 * As we will hold a reference to the logical state, it will
 		 * not be torn down with the context, and importantly the
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index bc89d01d338d..f7a075efccad 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -96,7 +96,7 @@
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
-static struct i915_global_context {
+static struct i915_global_gem_context {
 	struct i915_global base;
 	struct kmem_cache *slab_luts;
 } global;
@@ -240,7 +240,7 @@ static void release_hw_id(struct i915_gem_context *ctx)
 
 static void i915_gem_context_free(struct i915_gem_context *ctx)
 {
-	unsigned int n;
+	struct intel_context *it, *n;
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
@@ -251,12 +251,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 
 	kfree(ctx->engines);
 
-	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
-		struct intel_context *ce = &ctx->__engine[n];
-
-		if (ce->ops)
-			ce->ops->destroy(ce);
-	}
+	rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
+		it->ops->destroy(it);
 
 	if (ctx->timeline)
 		i915_timeline_put(ctx->timeline);
@@ -363,40 +359,11 @@ static u32 default_desc_template(const struct drm_i915_private *i915,
 	return desc;
 }
 
-static void intel_context_retire(struct i915_active_request *active,
-				 struct i915_request *rq)
-{
-	struct intel_context *ce =
-		container_of(active, typeof(*ce), active_tracker);
-
-	intel_context_unpin(ce);
-}
-
-void
-intel_context_init(struct intel_context *ce,
-		   struct i915_gem_context *ctx,
-		   struct intel_engine_cs *engine)
-{
-	ce->gem_context = ctx;
-	ce->engine = engine;
-	ce->ops = engine->cops;
-
-	INIT_LIST_HEAD(&ce->signal_link);
-	INIT_LIST_HEAD(&ce->signals);
-
-	/* Use the whole device by default */
-	ce->sseu = intel_device_default_sseu(ctx->i915);
-
-	i915_active_request_init(&ce->active_tracker,
-				 NULL, intel_context_retire);
-}
-
 static struct i915_gem_context *
 __create_hw_context(struct drm_i915_private *dev_priv,
 		    struct drm_i915_file_private *file_priv)
 {
 	struct i915_gem_context *ctx;
-	unsigned int n;
 	int ret;
 	int i;
 
@@ -411,8 +378,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	INIT_LIST_HEAD(&ctx->active_engines);
 	mutex_init(&ctx->mutex);
 
-	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++)
-		intel_context_init(&ctx->__engine[n], ctx, dev_priv->engine[n]);
+	ctx->hw_contexts = RB_ROOT;
+	spin_lock_init(&ctx->hw_contexts_lock);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
 	INIT_LIST_HEAD(&ctx->handles_list);
@@ -961,8 +928,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
 		struct intel_ring *ring;
 		struct i915_request *rq;
 
-		GEM_BUG_ON(!to_intel_context(i915->kernel_context, engine));
-
 		rq = i915_request_alloc(engine, i915->kernel_context);
 		if (IS_ERR(rq))
 			return PTR_ERR(rq);
@@ -1197,9 +1162,13 @@ __i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
 				    struct intel_engine_cs *engine,
 				    struct intel_sseu sseu)
 {
-	struct intel_context *ce = to_intel_context(ctx, engine);
+	struct intel_context *ce;
 	int ret = 0;
 
+	ce = intel_context_instance(ctx, engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
 	GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
 	GEM_BUG_ON(engine->id != RCS0);
 
@@ -1633,8 +1602,8 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data)
 	return ctx_setparam(data, &local.setparam);
 }
 
-static void clone_sseu(struct i915_gem_context *dst,
-		       struct i915_gem_context *src)
+static int clone_sseu(struct i915_gem_context *dst,
+		      struct i915_gem_context *src)
 {
 	const struct intel_sseu default_sseu =
 		intel_device_default_sseu(dst->i915);
@@ -1645,7 +1614,7 @@ static void clone_sseu(struct i915_gem_context *dst,
 		struct intel_context *ce;
 		struct intel_sseu sseu;
 
-		ce = to_intel_context(src, engine);
+		ce = intel_context_lookup(src, engine);
 		if (!ce)
 			continue;
 
@@ -1653,9 +1622,14 @@ static void clone_sseu(struct i915_gem_context *dst,
 		if (!memcmp(&sseu, &default_sseu, sizeof(sseu)))
 			continue;
 
-		ce = to_intel_context(dst, engine);
+		ce = intel_context_instance(dst, engine);
+		if (IS_ERR(ce))
+			return PTR_ERR(ce);
+
 		ce->sseu = sseu;
 	}
+
+	return 0;
 }
 
 static int clone_engines(struct i915_gem_context *dst,
@@ -1702,8 +1676,11 @@ static int create_clone(struct i915_user_extension __user *ext, void *data)
 	if (local.flags & I915_CONTEXT_CLONE_SCHED)
 		dst->sched = src->sched;
 
-	if (local.flags & I915_CONTEXT_CLONE_SSEU)
-		clone_sseu(dst, src);
+	if (local.flags & I915_CONTEXT_CLONE_SSEU) {
+		err = clone_sseu(dst, src);
+		if (err)
+			return err;
+	}
 
 	if (local.flags & I915_CONTEXT_CLONE_TIMELINE && src->timeline) {
 		if (dst->timeline)
@@ -1856,13 +1833,15 @@ static int get_sseu(struct i915_gem_context *ctx,
 	if (!engine)
 		return -EINVAL;
 
+	ce = intel_context_instance(ctx, engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
 	/* Only use for mutex here is to serialize get_param and set_param. */
 	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
 	if (ret)
 		return ret;
 
-	ce = to_intel_context(ctx, engine);
-
 	user_sseu.slice_mask = ce->sseu.slice_mask;
 	user_sseu.subslice_mask = ce->sseu.subslice_mask;
 	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
@@ -2038,22 +2017,22 @@ int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
 #include "selftests/i915_gem_context.c"
 #endif
 
-static void i915_global_context_shrink(void)
+static void i915_global_gem_context_shrink(void)
 {
 	kmem_cache_shrink(global.slab_luts);
 }
 
-static void i915_global_context_exit(void)
+static void i915_global_gem_context_exit(void)
 {
 	kmem_cache_destroy(global.slab_luts);
 }
 
-static struct i915_global_context global = { {
-	.shrink = i915_global_context_shrink,
-	.exit = i915_global_context_exit,
+static struct i915_global_gem_context global = { {
+	.shrink = i915_global_gem_context_shrink,
+	.exit = i915_global_gem_context_exit,
 } };
 
-int __init i915_global_context_init(void)
+int __init i915_global_gem_context_init(void)
 {
 	global.slab_luts = KMEM_CACHE(i915_lut_handle, 0);
 	if (!global.slab_luts)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index ed9a2447bba9..1e670372892c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -31,7 +31,6 @@
 #include "i915_scheduler.h"
 #include "intel_context.h"
 #include "intel_device_info.h"
-#include "intel_ringbuffer.h"
 
 struct drm_device;
 struct drm_file;
diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
index 16e2518439a3..8a89f3053f73 100644
--- a/drivers/gpu/drm/i915/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
@@ -13,10 +13,10 @@
 #include <linux/kref.h>
 #include <linux/mutex.h>
 #include <linux/radix-tree.h>
+#include <linux/rbtree.h>
 #include <linux/rcupdate.h>
 #include <linux/types.h>
 
-#include "i915_gem.h" /* I915_NUM_ENGINES */
 #include "i915_scheduler.h"
 #include "intel_context_types.h"
 
@@ -146,8 +146,9 @@ struct i915_gem_context {
 
 	struct i915_sched_attr sched;
 
-	/** engine: per-engine logical HW state */
-	struct intel_context __engine[I915_NUM_ENGINES];
+	/** hw_contexts: per-engine logical HW state */
+	struct rb_root hw_contexts;
+	spinlock_t hw_contexts_lock;
 
 	/** ring_size: size for allocating the per-engine ring buffer */
 	u32 ring_size;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 4b0af9a8000f..8fc5bbdba9fa 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -775,8 +775,8 @@ static int eb_wait_for_ring(const struct i915_execbuffer *eb)
 	 * keeping all of their resources pinned.
 	 */
 
-	ce = to_intel_context(eb->ctx, eb->engine);
-	if (!ce->ring) /* first use, assume empty! */
+	ce = intel_context_lookup(eb->ctx, eb->engine);
+	if (!ce || !ce->ring) /* first use, assume empty! */
 		return 0;
 
 	rq = __eb_wait_for_ring(ce->ring);
diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c
index 1cf4e8bc8ec6..2f5c72e2a9d1 100644
--- a/drivers/gpu/drm/i915/i915_globals.c
+++ b/drivers/gpu/drm/i915/i915_globals.c
@@ -36,6 +36,7 @@ static void __i915_globals_cleanup(void)
 static __initconst int (* const initfn[])(void) = {
 	i915_global_active_init,
 	i915_global_context_init,
+	i915_global_gem_context_init,
 	i915_global_objects_init,
 	i915_global_request_init,
 	i915_global_scheduler_init,
diff --git a/drivers/gpu/drm/i915/i915_globals.h b/drivers/gpu/drm/i915/i915_globals.h
index a45529022a42..04c1ce107fc0 100644
--- a/drivers/gpu/drm/i915/i915_globals.h
+++ b/drivers/gpu/drm/i915/i915_globals.h
@@ -26,6 +26,7 @@ void i915_globals_exit(void);
 /* constructors */
 int i915_global_active_init(void);
 int i915_global_context_init(void);
+int i915_global_gem_context_init(void);
 int i915_global_objects_init(void);
 int i915_global_request_init(void);
 int i915_global_scheduler_init(void);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index a0d145f976ec..e19a89e4df64 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1740,11 +1740,11 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
 
 	/* Update all contexts now that we've stalled the submission. */
 	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
-		struct intel_context *ce = to_intel_context(ctx, engine);
+		struct intel_context *ce = intel_context_lookup(ctx, engine);
 		u32 *regs;
 
 		/* OA settings will be set upon first use */
-		if (!ce->state)
+		if (!ce || !ce->state)
 			continue;
 
 		regs = i915_gem_object_pin_map(ce->state->obj, map_type);
diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
new file mode 100644
index 000000000000..d04a1f51a90c
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -0,0 +1,180 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_gem_context.h"
+#include "i915_globals.h"
+#include "intel_context.h"
+#include "intel_ringbuffer.h"
+
+static struct i915_global_context {
+	struct i915_global base;
+	struct kmem_cache *slab_ce;
+} global;
+
+struct intel_context *intel_context_alloc(void)
+{
+	return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL);
+}
+
+void intel_context_free(struct intel_context *ce)
+{
+	kmem_cache_free(global.slab_ce, ce);
+}
+
+struct intel_context *
+intel_context_lookup(struct i915_gem_context *ctx,
+		     struct intel_engine_cs *engine)
+{
+	struct intel_context *ce = NULL;
+	struct rb_node *p;
+
+	spin_lock(&ctx->hw_contexts_lock);
+	p = ctx->hw_contexts.rb_node;
+	while (p) {
+		struct intel_context *this =
+			rb_entry(p, struct intel_context, node);
+
+		if (this->engine == engine) {
+			GEM_BUG_ON(this->gem_context != ctx);
+			ce = this;
+			break;
+		}
+
+		if (this->engine < engine)
+			p = p->rb_right;
+		else
+			p = p->rb_left;
+	}
+	spin_unlock(&ctx->hw_contexts_lock);
+
+	return ce;
+}
+
+struct intel_context *
+__intel_context_insert(struct i915_gem_context *ctx,
+		       struct intel_engine_cs *engine,
+		       struct intel_context *ce)
+{
+	struct rb_node **p, *parent;
+	int err = 0;
+
+	spin_lock(&ctx->hw_contexts_lock);
+
+	parent = NULL;
+	p = &ctx->hw_contexts.rb_node;
+	while (*p) {
+		struct intel_context *this;
+
+		parent = *p;
+		this = rb_entry(parent, struct intel_context, node);
+
+		if (this->engine == engine) {
+			err = -EEXIST;
+			ce = this;
+			break;
+		}
+
+		if (this->engine < engine)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	if (!err) {
+		rb_link_node(&ce->node, parent, p);
+		rb_insert_color(&ce->node, &ctx->hw_contexts);
+	}
+
+	spin_unlock(&ctx->hw_contexts_lock);
+
+	return ce;
+}
+
+void __intel_context_remove(struct intel_context *ce)
+{
+	struct i915_gem_context *ctx = ce->gem_context;
+
+	spin_lock(&ctx->hw_contexts_lock);
+	rb_erase(&ce->node, &ctx->hw_contexts);
+	spin_unlock(&ctx->hw_contexts_lock);
+}
+
+struct intel_context *
+intel_context_instance(struct i915_gem_context *ctx,
+		       struct intel_engine_cs *engine)
+{
+	struct intel_context *ce, *pos;
+
+	ce = intel_context_lookup(ctx, engine);
+	if (likely(ce))
+		return ce;
+
+	ce = intel_context_alloc();
+	if (!ce)
+		return ERR_PTR(-ENOMEM);
+
+	intel_context_init(ce, ctx, engine);
+
+	pos = __intel_context_insert(ctx, engine, ce);
+	if (unlikely(pos != ce)) /* Beaten! Use their HW context instead */
+		intel_context_free(ce);
+
+	GEM_BUG_ON(intel_context_lookup(ctx, engine) != pos);
+	return pos;
+}
+
+static void intel_context_retire(struct i915_active_request *active,
+				 struct i915_request *rq)
+{
+	struct intel_context *ce =
+		container_of(active, typeof(*ce), active_tracker);
+
+	intel_context_unpin(ce);
+}
+
+void
+intel_context_init(struct intel_context *ce,
+		   struct i915_gem_context *ctx,
+		   struct intel_engine_cs *engine)
+{
+	ce->gem_context = ctx;
+	ce->engine = engine;
+	ce->ops = engine->cops;
+
+	INIT_LIST_HEAD(&ce->signal_link);
+	INIT_LIST_HEAD(&ce->signals);
+
+	/* Use the whole device by default */
+	ce->sseu = intel_device_default_sseu(ctx->i915);
+
+	i915_active_request_init(&ce->active_tracker,
+				 NULL, intel_context_retire);
+}
+
+static void i915_global_context_shrink(void)
+{
+	kmem_cache_shrink(global.slab_ce);
+}
+
+static void i915_global_context_exit(void)
+{
+	kmem_cache_destroy(global.slab_ce);
+}
+
+static struct i915_global_context global = { {
+	.shrink = i915_global_context_shrink,
+	.exit = i915_global_context_exit,
+} };
+
+int __init i915_global_context_init(void)
+{
+	global.slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN);
+	if (!global.slab_ce)
+		return -ENOMEM;
+
+	i915_global_register(&global.base);
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/intel_context.h
index dd947692bb0b..2c910628acaf 100644
--- a/drivers/gpu/drm/i915/intel_context.h
+++ b/drivers/gpu/drm/i915/intel_context.h
@@ -7,20 +7,46 @@
 #ifndef __INTEL_CONTEXT_H__
 #define __INTEL_CONTEXT_H__
 
-#include "i915_gem_context_types.h"
 #include "intel_context_types.h"
 #include "intel_engine_types.h"
 
+struct intel_context *intel_context_alloc(void);
+void intel_context_free(struct intel_context *ce);
+
 void intel_context_init(struct intel_context *ce,
 			struct i915_gem_context *ctx,
 			struct intel_engine_cs *engine);
 
-static inline struct intel_context *
-to_intel_context(struct i915_gem_context *ctx,
-		 const struct intel_engine_cs *engine)
-{
-	return &ctx->__engine[engine->id];
-}
+/**
+ * intel_context_lookup - Find the matching HW context for this (ctx, engine)
+ * @ctx - the parent GEM context
+ * @engine - the target HW engine
+ *
+ * May return NULL if the HW context hasn't been instantiated (i.e. unused).
+ */
+struct intel_context *
+intel_context_lookup(struct i915_gem_context *ctx,
+		     struct intel_engine_cs *engine);
+
+/**
+ * intel_context_instance - Lookup or allocate the HW context for (ctx, engine)
+ * @ctx - the parent GEM context
+ * @engine - the target HW engine
+ *
+ * Returns the existing HW context for this pair of (GEM context, engine), or
+ * allocates and initialises a fresh context. Once allocated, the HW context
+ * remains resident until the GEM context is destroyed.
+ */
+struct intel_context *
+intel_context_instance(struct i915_gem_context *ctx,
+		       struct intel_engine_cs *engine);
+
+struct intel_context *
+__intel_context_insert(struct i915_gem_context *ctx,
+		       struct intel_engine_cs *engine,
+		       struct intel_context *ce);
+void
+__intel_context_remove(struct intel_context *ce);
 
 static inline struct intel_context *
 intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
index 16e1306e9595..857f5c335324 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -8,6 +8,7 @@
 #define __INTEL_CONTEXT_TYPES__
 
 #include <linux/list.h>
+#include <linux/rbtree.h>
 #include <linux/types.h>
 
 #include "i915_active_types.h"
@@ -52,6 +53,7 @@ struct intel_context {
 	struct i915_active_request active_tracker;
 
 	const struct intel_context_ops *ops;
+	struct rb_node node;
 
 	/** sseu: Control eu/slice partitioning */
 	struct intel_sseu sseu;
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 8e326556499e..1937128ea267 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -644,7 +644,7 @@ void intel_engines_set_scheduler_caps(struct drm_i915_private *i915)
 static void __intel_context_unpin(struct i915_gem_context *ctx,
 				  struct intel_engine_cs *engine)
 {
-	intel_context_unpin(to_intel_context(ctx, engine));
+	intel_context_unpin(intel_context_lookup(ctx, engine));
 }
 
 struct measure_breadcrumb {
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
index 4d2c0ba58e6e..6a570e4e51c3 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -231,6 +231,11 @@ struct intel_engine_execlists {
 	 */
 	u32 *csb_status;
 
+	/**
+	 * @preempt_context: the HW context for injecting preempt-to-idle
+	 */
+	struct intel_context *preempt_context;
+
 	/**
 	 * @preempt_complete_status: expected CSB upon completing preemption
 	 */
diff --git a/drivers/gpu/drm/i915/intel_guc_ads.c b/drivers/gpu/drm/i915/intel_guc_ads.c
index c51d558fd431..b4ff0045a605 100644
--- a/drivers/gpu/drm/i915/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/intel_guc_ads.c
@@ -121,8 +121,8 @@ int intel_guc_ads_create(struct intel_guc *guc)
 	 * to find it. Note that we have to skip our header (1 page),
 	 * because our GuC shared data is there.
 	 */
-	kernel_ctx_vma = to_intel_context(dev_priv->kernel_context,
-					  dev_priv->engine[RCS0])->state;
+	kernel_ctx_vma = intel_context_lookup(dev_priv->kernel_context,
+					      dev_priv->engine[RCS0])->state;
 	blob->ads.golden_context_lrca =
 		intel_guc_ggtt_offset(guc, kernel_ctx_vma) + skipped_offset;
 
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index e3afc91f0e7b..4a5727233419 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -382,7 +382,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client)
 	desc->db_id = client->doorbell_id;
 
 	for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
-		struct intel_context *ce = to_intel_context(ctx, engine);
+		struct intel_context *ce = intel_context_lookup(ctx, engine);
 		u32 guc_engine_id = engine->guc_id;
 		struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id];
 
@@ -393,7 +393,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client)
 		 * for now who owns a GuC client. But for future owner of GuC
 		 * client, need to make sure lrc is pinned prior to enter here.
 		 */
-		if (!ce->state)
+		if (!ce || !ce->state)
 			break;	/* XXX: continue? */
 
 		/*
@@ -567,7 +567,7 @@ static void inject_preempt_context(struct work_struct *work)
 					     preempt_work[engine->id]);
 	struct intel_guc_client *client = guc->preempt_client;
 	struct guc_stage_desc *stage_desc = __get_stage_desc(client);
-	struct intel_context *ce = to_intel_context(client->owner, engine);
+	struct intel_context *ce = intel_context_lookup(client->owner, engine);
 	u32 data[7];
 
 	if (!ce->ring->emit) { /* recreate upon load/resume */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 31cc4a82b928..9231efb7a82d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -628,8 +628,7 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
 static void inject_preempt_context(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists *execlists = &engine->execlists;
-	struct intel_context *ce =
-		to_intel_context(engine->i915->preempt_context, engine);
+	struct intel_context *ce = execlists->preempt_context;
 	unsigned int n;
 
 	GEM_BUG_ON(execlists->preempt_complete_status !=
@@ -1237,19 +1236,24 @@ static void execlists_submit_request(struct i915_request *request)
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
-static void execlists_context_destroy(struct intel_context *ce)
+static void __execlists_context_fini(struct intel_context *ce)
 {
-	GEM_BUG_ON(ce->pin_count);
-
-	if (!ce->state)
-		return;
-
 	intel_ring_free(ce->ring);
 
 	GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
 	i915_gem_object_put(ce->state->obj);
 }
 
+static void execlists_context_destroy(struct intel_context *ce)
+{
+	GEM_BUG_ON(ce->pin_count);
+
+	if (ce->state)
+		__execlists_context_fini(ce);
+
+	intel_context_free(ce);
+}
+
 static void execlists_context_unpin(struct intel_context *ce)
 {
 	struct intel_engine_cs *engine;
@@ -1390,7 +1394,11 @@ static struct intel_context *
 execlists_context_pin(struct intel_engine_cs *engine,
 		      struct i915_gem_context *ctx)
 {
-	struct intel_context *ce = to_intel_context(ctx, engine);
+	struct intel_context *ce;
+
+	ce = intel_context_instance(ctx, engine);
+	if (IS_ERR(ce))
+		return ce;
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 	GEM_BUG_ON(!ctx->ppgtt);
@@ -2441,8 +2449,9 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 	execlists->preempt_complete_status = ~0u;
 	if (i915->preempt_context) {
 		struct intel_context *ce =
-			to_intel_context(i915->preempt_context, engine);
+			intel_context_lookup(i915->preempt_context, engine);
 
+		execlists->preempt_context = ce;
 		execlists->preempt_complete_status =
 			upper_32_bits(ce->lrc_desc);
 	}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 78e3c03aab4e..602babbd737f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1349,15 +1349,20 @@ intel_ring_free(struct intel_ring *ring)
 	kfree(ring);
 }
 
+static void __ring_context_fini(struct intel_context *ce)
+{
+	GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
+	i915_gem_object_put(ce->state->obj);
+}
+
 static void ring_context_destroy(struct intel_context *ce)
 {
 	GEM_BUG_ON(ce->pin_count);
 
-	if (!ce->state)
-		return;
+	if (ce->state)
+		__ring_context_fini(ce);
 
-	GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
-	i915_gem_object_put(ce->state->obj);
+	intel_context_free(ce);
 }
 
 static int __context_pin_ppgtt(struct i915_gem_context *ctx)
@@ -1552,7 +1557,11 @@ __ring_context_pin(struct intel_engine_cs *engine,
 static struct intel_context *
 ring_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 {
-	struct intel_context *ce = to_intel_context(ctx, engine);
+	struct intel_context *ce;
+
+	ce = intel_context_instance(ctx, engine);
+	if (IS_ERR(ce))
+		return ce;
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 
@@ -1754,8 +1763,8 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 		 * placeholder we use to flush other contexts.
 		 */
 		*cs++ = MI_SET_CONTEXT;
-		*cs++ = i915_ggtt_offset(to_intel_context(i915->kernel_context,
-							  engine)->state) |
+		*cs++ = i915_ggtt_offset(intel_context_lookup(i915->kernel_context,
+							      engine)->state) |
 			MI_MM_SPACE_GTT |
 			MI_RESTORE_INHIBIT;
 	}
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index 5d0ff2293abc..1d6dc2fe36ab 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -30,7 +30,6 @@ mock_context(struct drm_i915_private *i915,
 	     const char *name)
 {
 	struct i915_gem_context *ctx;
-	unsigned int n;
 	int ret;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -41,15 +40,15 @@ mock_context(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&ctx->link);
 	ctx->i915 = i915;
 
+	ctx->hw_contexts = RB_ROOT;
+	spin_lock_init(&ctx->hw_contexts_lock);
+
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
 	INIT_LIST_HEAD(&ctx->handles_list);
 	INIT_LIST_HEAD(&ctx->hw_id_link);
 	INIT_LIST_HEAD(&ctx->active_engines);
 	mutex_init(&ctx->mutex);
 
-	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++)
-		intel_context_init(&ctx->__engine[n], ctx, i915->engine[n]);
-
 	ret = i915_gem_context_pin_hw_id(ctx);
 	if (ret < 0)
 		goto err_handles;
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 856ec2706f3e..d9bbb102677f 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -141,9 +141,13 @@ static struct intel_context *
 mock_context_pin(struct intel_engine_cs *engine,
 		 struct i915_gem_context *ctx)
 {
-	struct intel_context *ce = to_intel_context(ctx, engine);
+	struct intel_context *ce;
 	int err = -ENOMEM;
 
+	ce = intel_context_instance(ctx, engine);
+	if (IS_ERR(ce))
+		return ce;
+
 	if (ce->pin_count++)
 		return ce;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 20/43] drm/i915: Make context pinning part of intel_context_ops
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (18 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 19/43] drm/i915: Move over to intel_context_lookup() Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-06 14:24 ` [PATCH 21/43] drm/i915: Track the pinned kernel contexts on each engine Chris Wilson
                   ` (25 subsequent siblings)
  45 siblings, 0 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Push the intel_context pin callback down from intel_engine_cs onto the
context itself by virtue of having a central caller for
intel_context_pin() being able to lookup the intel_context itself.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_context.c         |  34 ++++++
 drivers/gpu/drm/i915/intel_context.h         |   7 +-
 drivers/gpu/drm/i915/intel_context_types.h   |   1 +
 drivers/gpu/drm/i915/intel_engine_types.h    |   2 -
 drivers/gpu/drm/i915/intel_lrc.c             | 114 ++++++++-----------
 drivers/gpu/drm/i915/intel_ringbuffer.c      |  45 ++------
 drivers/gpu/drm/i915/selftests/mock_engine.c |  32 +-----
 7 files changed, 98 insertions(+), 137 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
index d04a1f51a90c..4c1dacb69f51 100644
--- a/drivers/gpu/drm/i915/intel_context.c
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -126,6 +126,40 @@ intel_context_instance(struct i915_gem_context *ctx,
 	return pos;
 }
 
+struct intel_context *
+intel_context_pin(struct i915_gem_context *ctx,
+		  struct intel_engine_cs *engine)
+{
+	struct intel_context *ce;
+	int err;
+
+	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
+
+	ce = intel_context_instance(ctx, engine);
+	if (IS_ERR(ce))
+		return ce;
+
+	if (unlikely(!ce->pin_count++)) {
+		err = ce->ops->pin(ce);
+		if (err)
+			goto err_unpin;
+
+		mutex_lock(&ctx->mutex);
+		list_add(&ce->active_link, &ctx->active_engines);
+		mutex_unlock(&ctx->mutex);
+
+		i915_gem_context_get(ctx);
+		GEM_BUG_ON(ce->gem_context != ctx);
+	}
+	GEM_BUG_ON(!ce->pin_count); /* no overflow! */
+
+	return ce;
+
+err_unpin:
+	ce->pin_count = 0;
+	return ERR_PTR(err);
+}
+
 static void intel_context_retire(struct i915_active_request *active,
 				 struct i915_request *rq)
 {
diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/intel_context.h
index 2c910628acaf..ea21473891f7 100644
--- a/drivers/gpu/drm/i915/intel_context.h
+++ b/drivers/gpu/drm/i915/intel_context.h
@@ -48,11 +48,8 @@ __intel_context_insert(struct i915_gem_context *ctx,
 void
 __intel_context_remove(struct intel_context *ce);
 
-static inline struct intel_context *
-intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
-{
-	return engine->context_pin(engine, ctx);
-}
+struct intel_context *
+intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine);
 
 static inline void __intel_context_pin(struct intel_context *ce)
 {
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
index 857f5c335324..804fa93de853 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -19,6 +19,7 @@ struct intel_context;
 struct intel_ring;
 
 struct intel_context_ops {
+	int (*pin)(struct intel_context *ce);
 	void (*unpin)(struct intel_context *ce);
 	void (*destroy)(struct intel_context *ce);
 };
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
index 6a570e4e51c3..21f3275cc00c 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -357,8 +357,6 @@ struct intel_engine_cs {
 	void		(*set_default_submission)(struct intel_engine_cs *engine);
 
 	const struct intel_context_ops *cops;
-	struct intel_context *(*context_pin)(struct intel_engine_cs *engine,
-					     struct i915_gem_context *ctx);
 
 	int		(*request_alloc)(struct i915_request *rq);
 	int		(*init_context)(struct i915_request *rq);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 9231efb7a82d..79107ab771d8 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -166,9 +166,8 @@
 
 #define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT | I915_PRIORITY_NOSEMAPHORE)
 
-static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
-					    struct intel_engine_cs *engine,
-					    struct intel_context *ce);
+static int execlists_context_deferred_alloc(struct intel_context *ce,
+					    struct intel_engine_cs *engine);
 static void execlists_init_reg_state(u32 *reg_state,
 				     struct intel_context *ce,
 				     struct intel_engine_cs *engine,
@@ -330,11 +329,10 @@ assert_priority_queue(const struct i915_request *prev,
  * engine info, SW context ID and SW counter need to form a unique number
  * (Context ID) per lrc.
  */
-static void
-intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
-				   struct intel_engine_cs *engine,
-				   struct intel_context *ce)
+static u64
+lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 {
+	struct i915_gem_context *ctx = ce->gem_context;
 	u64 desc;
 
 	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
@@ -352,7 +350,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
 	 * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
 	 * anything below.
 	 */
-	if (INTEL_GEN(ctx->i915) >= 11) {
+	if (INTEL_GEN(engine->i915) >= 11) {
 		GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
 		desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
 								/* bits 37-47 */
@@ -369,7 +367,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
 		desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;	/* bits 32-52 */
 	}
 
-	ce->lrc_desc = desc;
+	return desc;
 }
 
 static void unwind_wa_tail(struct i915_request *rq)
@@ -1290,7 +1288,7 @@ static void execlists_context_unpin(struct intel_context *ce)
 	i915_gem_context_put(ce->gem_context);
 }
 
-static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
+static int __context_pin(struct i915_vma *vma)
 {
 	unsigned int flags;
 	int err;
@@ -1313,11 +1311,14 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
 }
 
 static void
-__execlists_update_reg_state(struct intel_engine_cs *engine,
-			     struct intel_context *ce)
+__execlists_update_reg_state(struct intel_context *ce,
+			     struct intel_engine_cs *engine)
 {
-	u32 *regs = ce->lrc_reg_state;
 	struct intel_ring *ring = ce->ring;
+	u32 *regs = ce->lrc_reg_state;
+
+	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
+	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
 
 	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma);
 	regs[CTX_RING_HEAD + 1] = ring->head;
@@ -1329,25 +1330,26 @@ __execlists_update_reg_state(struct intel_engine_cs *engine,
 			gen8_make_rpcs(engine->i915, &ce->sseu);
 }
 
-static struct intel_context *
-__execlists_context_pin(struct intel_engine_cs *engine,
-			struct i915_gem_context *ctx,
-			struct intel_context *ce)
+static int
+__execlists_context_pin(struct intel_context *ce,
+			struct intel_engine_cs *engine)
 {
 	void *vaddr;
 	int ret;
 
-	ret = execlists_context_deferred_alloc(ctx, engine, ce);
+	GEM_BUG_ON(!ce->gem_context->ppgtt);
+
+	ret = execlists_context_deferred_alloc(ce, engine);
 	if (ret)
 		goto err;
 	GEM_BUG_ON(!ce->state);
 
-	ret = __context_pin(ctx, ce->state);
+	ret = __context_pin(ce->state);
 	if (ret)
 		goto err;
 
 	vaddr = i915_gem_object_pin_map(ce->state->obj,
-					i915_coherent_map_type(ctx->i915) |
+					i915_coherent_map_type(engine->i915) |
 					I915_MAP_OVERRIDE);
 	if (IS_ERR(vaddr)) {
 		ret = PTR_ERR(vaddr);
@@ -1358,26 +1360,16 @@ __execlists_context_pin(struct intel_engine_cs *engine,
 	if (ret)
 		goto unpin_map;
 
-	ret = i915_gem_context_pin_hw_id(ctx);
+	ret = i915_gem_context_pin_hw_id(ce->gem_context);
 	if (ret)
 		goto unpin_ring;
 
-	intel_lr_context_descriptor_update(ctx, engine, ce);
-
-	GEM_BUG_ON(!intel_ring_offset_valid(ce->ring, ce->ring->head));
-
+	ce->lrc_desc = lrc_descriptor(ce, engine);
 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
-
-	__execlists_update_reg_state(engine, ce);
+	__execlists_update_reg_state(ce, engine);
 
 	ce->state->obj->pin_global++;
-
-	mutex_lock(&ctx->mutex);
-	list_add(&ce->active_link, &ctx->active_engines);
-	mutex_unlock(&ctx->mutex);
-
-	i915_gem_context_get(ctx);
-	return ce;
+	return 0;
 
 unpin_ring:
 	intel_ring_unpin(ce->ring);
@@ -1386,31 +1378,16 @@ __execlists_context_pin(struct intel_engine_cs *engine,
 unpin_vma:
 	__i915_vma_unpin(ce->state);
 err:
-	ce->pin_count = 0;
-	return ERR_PTR(ret);
+	return ret;
 }
 
-static struct intel_context *
-execlists_context_pin(struct intel_engine_cs *engine,
-		      struct i915_gem_context *ctx)
+static int execlists_context_pin(struct intel_context *ce)
 {
-	struct intel_context *ce;
-
-	ce = intel_context_instance(ctx, engine);
-	if (IS_ERR(ce))
-		return ce;
-
-	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
-	GEM_BUG_ON(!ctx->ppgtt);
-
-	if (likely(ce->pin_count++))
-		return ce;
-	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
-
-	return __execlists_context_pin(engine, ctx, ce);
+	return __execlists_context_pin(ce, ce->engine);
 }
 
 static const struct intel_context_ops execlists_context_ops = {
+	.pin = execlists_context_pin,
 	.unpin = execlists_context_unpin,
 	.destroy = execlists_context_destroy,
 };
@@ -2034,7 +2011,7 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	intel_ring_update_space(rq->ring);
 
 	execlists_init_reg_state(regs, rq->hw_context, engine, rq->ring);
-	__execlists_update_reg_state(engine, rq->hw_context);
+	__execlists_update_reg_state(rq->hw_context, engine);
 
 out_unlock:
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
@@ -2359,7 +2336,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 	engine->reset.finish = execlists_reset_finish;
 
 	engine->cops = &execlists_context_ops;
-	engine->context_pin = execlists_context_pin;
 	engine->request_alloc = execlists_request_alloc;
 
 	engine->emit_flush = gen8_emit_flush;
@@ -2836,9 +2812,16 @@ populate_lr_context(struct intel_context *ce,
 	return ret;
 }
 
-static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
-					    struct intel_engine_cs *engine,
-					    struct intel_context *ce)
+static struct i915_timeline *get_timeline(struct i915_gem_context *ctx)
+{
+	if (ctx->timeline)
+		return i915_timeline_get(ctx->timeline);
+	else
+		return i915_timeline_create(ctx->i915, ctx->name, NULL);
+}
+
+static int execlists_context_deferred_alloc(struct intel_context *ce,
+					    struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_object *ctx_obj;
 	struct i915_vma *vma;
@@ -2858,26 +2841,25 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 	 */
 	context_size += LRC_HEADER_PAGES * PAGE_SIZE;
 
-	ctx_obj = i915_gem_object_create(ctx->i915, context_size);
+	ctx_obj = i915_gem_object_create(engine->i915, context_size);
 	if (IS_ERR(ctx_obj))
 		return PTR_ERR(ctx_obj);
 
-	vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.vm, NULL);
+	vma = i915_vma_instance(ctx_obj, &engine->i915->ggtt.vm, NULL);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto error_deref_obj;
 	}
 
-	if (ctx->timeline)
-		timeline = i915_timeline_get(ctx->timeline);
-	else
-		timeline = i915_timeline_create(ctx->i915, ctx->name, NULL);
+	timeline = get_timeline(ce->gem_context);
 	if (IS_ERR(timeline)) {
 		ret = PTR_ERR(timeline);
 		goto error_deref_obj;
 	}
 
-	ring = intel_engine_create_ring(engine, timeline, ctx->ring_size);
+	ring = intel_engine_create_ring(engine,
+					timeline,
+					ce->gem_context->ring_size);
 	i915_timeline_put(timeline);
 	if (IS_ERR(ring)) {
 		ret = PTR_ERR(ring);
@@ -2922,7 +2904,7 @@ void intel_lr_context_resume(struct drm_i915_private *i915)
 		list_for_each_entry(ce, &ctx->active_engines, active_link) {
 			GEM_BUG_ON(!ce->ring);
 			intel_ring_reset(ce->ring, 0);
-			__execlists_update_reg_state(ce->engine, ce);
+			__execlists_update_reg_state(ce, ce->engine);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 602babbd737f..d06908c9584c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1509,11 +1509,9 @@ alloc_context_vma(struct intel_engine_cs *engine)
 	return ERR_PTR(err);
 }
 
-static struct intel_context *
-__ring_context_pin(struct intel_engine_cs *engine,
-		   struct i915_gem_context *ctx,
-		   struct intel_context *ce)
+static int ring_context_pin(struct intel_context *ce)
 {
+	struct intel_engine_cs *engine = ce->engine;
 	int err;
 
 	/* One ringbuffer to rule them all */
@@ -1524,55 +1522,29 @@ __ring_context_pin(struct intel_engine_cs *engine,
 		struct i915_vma *vma;
 
 		vma = alloc_context_vma(engine);
-		if (IS_ERR(vma)) {
-			err = PTR_ERR(vma);
-			goto err;
-		}
+		if (IS_ERR(vma))
+			return PTR_ERR(vma);
 
 		ce->state = vma;
 	}
 
 	err = __context_pin(ce);
 	if (err)
-		goto err;
+		return err;
 
 	err = __context_pin_ppgtt(ce->gem_context);
 	if (err)
 		goto err_unpin;
 
-	mutex_lock(&ctx->mutex);
-	list_add(&ce->active_link, &ctx->active_engines);
-	mutex_unlock(&ctx->mutex);
-
-	i915_gem_context_get(ctx);
-	return ce;
+	return 0;
 
 err_unpin:
 	__context_unpin(ce);
-err:
-	ce->pin_count = 0;
-	return ERR_PTR(err);
-}
-
-static struct intel_context *
-ring_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
-{
-	struct intel_context *ce;
-
-	ce = intel_context_instance(ctx, engine);
-	if (IS_ERR(ce))
-		return ce;
-
-	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
-
-	if (likely(ce->pin_count++))
-		return ce;
-	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
-
-	return __ring_context_pin(engine, ctx, ce);
+	return err;
 }
 
 static const struct intel_context_ops ring_context_ops = {
+	.pin = ring_context_pin,
 	.unpin = ring_context_unpin,
 	.destroy = ring_context_destroy,
 };
@@ -2283,7 +2255,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 	engine->reset.finish = reset_finish;
 
 	engine->cops = &ring_context_ops;
-	engine->context_pin = ring_context_pin;
 	engine->request_alloc = ring_request_alloc;
 
 	/*
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index d9bbb102677f..e483329fd533 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -137,41 +137,20 @@ static void mock_context_destroy(struct intel_context *ce)
 		mock_ring_free(ce->ring);
 }
 
-static struct intel_context *
-mock_context_pin(struct intel_engine_cs *engine,
-		 struct i915_gem_context *ctx)
+static int mock_context_pin(struct intel_context *ce)
 {
-	struct intel_context *ce;
-	int err = -ENOMEM;
-
-	ce = intel_context_instance(ctx, engine);
-	if (IS_ERR(ce))
-		return ce;
-
-	if (ce->pin_count++)
-		return ce;
-
 	if (!ce->ring) {
-		ce->ring = mock_ring(engine);
+		ce->ring = mock_ring(ce->engine);
 		if (!ce->ring)
-			goto err;
+			return -ENOMEM;
 	}
 
 	mock_timeline_pin(ce->ring->timeline);
-
-	mutex_lock(&ctx->mutex);
-	list_add(&ce->active_link, &ctx->active_engines);
-	mutex_unlock(&ctx->mutex);
-
-	i915_gem_context_get(ctx);
-	return ce;
-
-err:
-	ce->pin_count = 0;
-	return ERR_PTR(err);
+	return 0;
 }
 
 static const struct intel_context_ops mock_context_ops = {
+	.pin = mock_context_pin,
 	.unpin = mock_context_unpin,
 	.destroy = mock_context_destroy,
 };
@@ -235,7 +214,6 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	engine->base.status_page.addr = (void *)(engine + 1);
 
 	engine->base.cops = &mock_context_ops;
-	engine->base.context_pin = mock_context_pin;
 	engine->base.request_alloc = mock_request_alloc;
 	engine->base.emit_flush = mock_emit_flush;
 	engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 21/43] drm/i915: Track the pinned kernel contexts on each engine
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (19 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 20/43] drm/i915: Make context pinning part of intel_context_ops Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-08  9:26   ` Tvrtko Ursulin
  2019-03-06 14:24 ` [PATCH 22/43] drm/i915: Introduce intel_context.pin_mutex for pin management Chris Wilson
                   ` (24 subsequent siblings)
  45 siblings, 1 reply; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Each engine acquires a pin on the kernel contexts (normal and preempt)
so that the logical state is always available on demand. Keep track of
each engines pin by storing the returned pointer on the engine for quick
access.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_engine_cs.c       | 65 ++++++++++----------
 drivers/gpu/drm/i915/intel_engine_types.h    |  8 +--
 drivers/gpu/drm/i915/intel_guc_ads.c         |  3 +-
 drivers/gpu/drm/i915/intel_lrc.c             | 13 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c      |  3 +-
 drivers/gpu/drm/i915/selftests/mock_engine.c |  5 +-
 6 files changed, 45 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 1937128ea267..652c1b3ba190 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -641,12 +641,6 @@ void intel_engines_set_scheduler_caps(struct drm_i915_private *i915)
 		i915->caps.scheduler = 0;
 }
 
-static void __intel_context_unpin(struct i915_gem_context *ctx,
-				  struct intel_engine_cs *engine)
-{
-	intel_context_unpin(intel_context_lookup(ctx, engine));
-}
-
 struct measure_breadcrumb {
 	struct i915_request rq;
 	struct i915_timeline timeline;
@@ -697,6 +691,20 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
 	return dw;
 }
 
+static int pin_context(struct i915_gem_context *ctx,
+		       struct intel_engine_cs *engine,
+		       struct intel_context **out)
+{
+	struct intel_context *ce;
+
+	ce = intel_context_pin(ctx, engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	*out = ce;
+	return 0;
+}
+
 /**
  * intel_engines_init_common - initialize cengine state which might require hw access
  * @engine: Engine to initialize.
@@ -711,11 +719,8 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
 int intel_engine_init_common(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *i915 = engine->i915;
-	struct intel_context *ce;
 	int ret;
 
-	engine->set_default_submission(engine);
-
 	/* We may need to do things with the shrinker which
 	 * require us to immediately switch back to the default
 	 * context. This can cause a problem as pinning the
@@ -723,36 +728,34 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	 * be available. To avoid this we always pin the default
 	 * context.
 	 */
-	ce = intel_context_pin(i915->kernel_context, engine);
-	if (IS_ERR(ce))
-		return PTR_ERR(ce);
+	ret = pin_context(i915->kernel_context, engine,
+			  &engine->kernel_context);
+	if (ret)
+		return ret;
 
 	/*
 	 * Similarly the preempt context must always be available so that
-	 * we can interrupt the engine at any time.
+	 * we can interrupt the engine at any time. However, as preemption
+	 * is optional, we allow it to fail.
 	 */
-	if (i915->preempt_context) {
-		ce = intel_context_pin(i915->preempt_context, engine);
-		if (IS_ERR(ce)) {
-			ret = PTR_ERR(ce);
-			goto err_unpin_kernel;
-		}
-	}
+	if (i915->preempt_context)
+		pin_context(i915->preempt_context, engine,
+			    &engine->preempt_context);
 
 	ret = measure_breadcrumb_dw(engine);
 	if (ret < 0)
-		goto err_unpin_preempt;
+		goto err_unpin;
 
 	engine->emit_fini_breadcrumb_dw = ret;
 
-	return 0;
+	engine->set_default_submission(engine);
 
-err_unpin_preempt:
-	if (i915->preempt_context)
-		__intel_context_unpin(i915->preempt_context, engine);
+	return 0;
 
-err_unpin_kernel:
-	__intel_context_unpin(i915->kernel_context, engine);
+err_unpin:
+	if (engine->preempt_context)
+		intel_context_unpin(engine->preempt_context);
+	intel_context_unpin(engine->kernel_context);
 	return ret;
 }
 
@@ -765,8 +768,6 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
  */
 void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *i915 = engine->i915;
-
 	cleanup_status_page(engine);
 
 	intel_engine_fini_breadcrumbs(engine);
@@ -776,9 +777,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	if (engine->default_state)
 		i915_gem_object_put(engine->default_state);
 
-	if (i915->preempt_context)
-		__intel_context_unpin(i915->preempt_context, engine);
-	__intel_context_unpin(i915->kernel_context, engine);
+	if (engine->preempt_context)
+		intel_context_unpin(engine->preempt_context);
+	intel_context_unpin(engine->kernel_context);
 
 	i915_timeline_fini(&engine->timeline);
 
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
index 21f3275cc00c..b0aa1f0d4e47 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -231,11 +231,6 @@ struct intel_engine_execlists {
 	 */
 	u32 *csb_status;
 
-	/**
-	 * @preempt_context: the HW context for injecting preempt-to-idle
-	 */
-	struct intel_context *preempt_context;
-
 	/**
 	 * @preempt_complete_status: expected CSB upon completing preemption
 	 */
@@ -271,6 +266,9 @@ struct intel_engine_cs {
 
 	struct i915_timeline timeline;
 
+	struct intel_context *kernel_context; /* pinned */
+	struct intel_context *preempt_context; /* pinned; optional */
+
 	struct drm_i915_gem_object *default_state;
 	void *pinned_default_state;
 
diff --git a/drivers/gpu/drm/i915/intel_guc_ads.c b/drivers/gpu/drm/i915/intel_guc_ads.c
index b4ff0045a605..bec62f34b15a 100644
--- a/drivers/gpu/drm/i915/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/intel_guc_ads.c
@@ -121,8 +121,7 @@ int intel_guc_ads_create(struct intel_guc *guc)
 	 * to find it. Note that we have to skip our header (1 page),
 	 * because our GuC shared data is there.
 	 */
-	kernel_ctx_vma = intel_context_lookup(dev_priv->kernel_context,
-					      dev_priv->engine[RCS0])->state;
+	kernel_ctx_vma = dev_priv->engine[RCS0]->kernel_context->state;
 	blob->ads.golden_context_lrca =
 		intel_guc_ggtt_offset(guc, kernel_ctx_vma) + skipped_offset;
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 79107ab771d8..f7134d650187 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -626,7 +626,7 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
 static void inject_preempt_context(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists *execlists = &engine->execlists;
-	struct intel_context *ce = execlists->preempt_context;
+	struct intel_context *ce = engine->preempt_context;
 	unsigned int n;
 
 	GEM_BUG_ON(execlists->preempt_complete_status !=
@@ -2321,7 +2321,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 
 	engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-	if (engine->i915->preempt_context)
+	if (engine->preempt_context)
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 }
 
@@ -2423,14 +2423,9 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 	}
 
 	execlists->preempt_complete_status = ~0u;
-	if (i915->preempt_context) {
-		struct intel_context *ce =
-			intel_context_lookup(i915->preempt_context, engine);
-
-		execlists->preempt_context = ce;
+	if (engine->preempt_context)
 		execlists->preempt_complete_status =
-			upper_32_bits(ce->lrc_desc);
-	}
+			upper_32_bits(engine->preempt_context->lrc_desc);
 
 	execlists->csb_status =
 		&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d06908c9584c..03656021d04c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1735,8 +1735,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 		 * placeholder we use to flush other contexts.
 		 */
 		*cs++ = MI_SET_CONTEXT;
-		*cs++ = i915_ggtt_offset(intel_context_lookup(i915->kernel_context,
-							      engine)->state) |
+		*cs++ = i915_ggtt_offset(engine->kernel_context->state) |
 			MI_MM_SPACE_GTT |
 			MI_RESTORE_INHIBIT;
 	}
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index e483329fd533..99d7463218d0 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -233,7 +233,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	timer_setup(&engine->hw_delay, hw_delay_complete, 0);
 	INIT_LIST_HEAD(&engine->hw_queue);
 
-	if (IS_ERR(intel_context_pin(i915->kernel_context, &engine->base)))
+	if (pin_context(i915->kernel_context, &engine->base,
+			&engine->base.kernel_context))
 		goto err_breadcrumbs;
 
 	return &engine->base;
@@ -276,7 +277,7 @@ void mock_engine_free(struct intel_engine_cs *engine)
 	if (ce)
 		intel_context_unpin(ce);
 
-	__intel_context_unpin(engine->i915->kernel_context, engine);
+	intel_context_unpin(engine->kernel_context);
 
 	intel_engine_fini_breadcrumbs(engine);
 	i915_timeline_fini(&engine->timeline);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 22/43] drm/i915: Introduce intel_context.pin_mutex for pin management
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (20 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 21/43] drm/i915: Track the pinned kernel contexts on each engine Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-06 14:43   ` Tvrtko Ursulin
  2019-03-06 14:24 ` [PATCH 23/43] drm/i915: Load balancing across a virtual engine Chris Wilson
                   ` (23 subsequent siblings)
  45 siblings, 1 reply; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Introduce a mutex to start locking the HW contexts independently of
struct_mutex, with a view to reducing the coarse struct_mutex. The
intel_context.pin_mutex is used to guard the transition to and from being
pinned on the gpu, and so is required before starting to build any
request. The intel_context will then remain pinned until the request
completes, but the mutex can be released immediately unpin completion of
pinning the context.

A slight variant of the above is used by per-context sseu that wants to
inspect the pinned status of the context, and requires that it remains
stable (either !pinned or pinned) across its operation. By using the
pin_mutex to serialise operations while pin_count==0, we can take that
pin_mutex for stabilise the boolean pin status.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c               |  2 +-
 drivers/gpu/drm/i915/i915_gem_context.c       | 76 +++++++------------
 drivers/gpu/drm/i915/intel_context.c          | 56 ++++++++++++--
 drivers/gpu/drm/i915/intel_context.h          | 38 ++++++----
 drivers/gpu/drm/i915/intel_context_types.h    |  8 +-
 drivers/gpu/drm/i915/intel_lrc.c              |  4 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c       |  4 +-
 .../gpu/drm/i915/selftests/i915_gem_context.c |  2 +-
 drivers/gpu/drm/i915/selftests/mock_engine.c  |  2 +-
 9 files changed, 110 insertions(+), 82 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 71d8ce38f42c..8863dd414cde 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4662,7 +4662,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 		if (!state)
 			continue;
 
-		GEM_BUG_ON(ce->pin_count);
+		GEM_BUG_ON(intel_context_is_pinned(ce));
 
 		/*
 		 * As we will hold a reference to the logical state, it will
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index f7a075efccad..35541f99b37d 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1098,23 +1098,27 @@ static int gen8_emit_rpcs_config(struct i915_request *rq,
 }
 
 static int
-gen8_modify_rpcs_gpu(struct intel_context *ce,
-		     struct intel_engine_cs *engine,
-		     struct intel_sseu sseu)
+gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 {
-	struct drm_i915_private *i915 = engine->i915;
+	struct drm_i915_private *i915 = ce->engine->i915;
 	struct i915_request *rq, *prev;
 	intel_wakeref_t wakeref;
 	int ret;
 
-	GEM_BUG_ON(!ce->pin_count);
+	lockdep_assert_held(&ce->pin_mutex);
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
+	/*
+	 * If context is not idle we have to submit an ordered request to modify
+	 * its context image via the kernel context. Pristine and idle contexts
+	 * will be configured on pinning.
+	 */
+	if (!intel_context_is_pinned(ce))
+		return 0;
 
 	/* Submitting requests etc needs the hw awake. */
 	wakeref = intel_runtime_pm_get(i915);
 
-	rq = i915_request_alloc(engine, i915->kernel_context);
+	rq = i915_request_alloc(ce->engine, i915->kernel_context);
 	if (IS_ERR(rq)) {
 		ret = PTR_ERR(rq);
 		goto out_put;
@@ -1158,53 +1162,30 @@ gen8_modify_rpcs_gpu(struct intel_context *ce,
 }
 
 static int
-__i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
-				    struct intel_engine_cs *engine,
-				    struct intel_sseu sseu)
+i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
+				  struct intel_engine_cs *engine,
+				  struct intel_sseu sseu)
 {
 	struct intel_context *ce;
 	int ret = 0;
 
-	ce = intel_context_instance(ctx, engine);
-	if (IS_ERR(ce))
-		return PTR_ERR(ce);
-
 	GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
 	GEM_BUG_ON(engine->id != RCS0);
 
+	ce = intel_context_pin_lock(ctx, engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
 	/* Nothing to do if unmodified. */
 	if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
-		return 0;
-
-	/*
-	 * If context is not idle we have to submit an ordered request to modify
-	 * its context image via the kernel context. Pristine and idle contexts
-	 * will be configured on pinning.
-	 */
-	if (ce->pin_count)
-		ret = gen8_modify_rpcs_gpu(ce, engine, sseu);
+		goto unlock;
 
+	ret = gen8_modify_rpcs(ce, sseu);
 	if (!ret)
 		ce->sseu = sseu;
 
-	return ret;
-}
-
-static int
-i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
-				  struct intel_engine_cs *engine,
-				  struct intel_sseu sseu)
-{
-	int ret;
-
-	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
-	ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
-
-	mutex_unlock(&ctx->i915->drm.struct_mutex);
-
+unlock:
+	intel_context_pin_unlock(ce);
 	return ret;
 }
 
@@ -1622,11 +1603,12 @@ static int clone_sseu(struct i915_gem_context *dst,
 		if (!memcmp(&sseu, &default_sseu, sizeof(sseu)))
 			continue;
 
-		ce = intel_context_instance(dst, engine);
+		ce = intel_context_pin_lock(dst, engine);
 		if (IS_ERR(ce))
 			return PTR_ERR(ce);
 
 		ce->sseu = sseu;
+		intel_context_pin_unlock(ce);
 	}
 
 	return 0;
@@ -1806,7 +1788,6 @@ static int get_sseu(struct i915_gem_context *ctx,
 	struct intel_engine_cs *engine;
 	struct intel_context *ce;
 	unsigned long lookup;
-	int ret;
 
 	if (args->size == 0)
 		goto out;
@@ -1833,21 +1814,16 @@ static int get_sseu(struct i915_gem_context *ctx,
 	if (!engine)
 		return -EINVAL;
 
-	ce = intel_context_instance(ctx, engine);
+	ce = intel_context_pin_lock(ctx, engine); /* serialise with set_sseu */
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
-	/* Only use for mutex here is to serialize get_param and set_param. */
-	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
 	user_sseu.slice_mask = ce->sseu.slice_mask;
 	user_sseu.subslice_mask = ce->sseu.subslice_mask;
 	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
 	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
 
-	mutex_unlock(&ctx->i915->drm.struct_mutex);
+	intel_context_pin_unlock(ce);
 
 	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
 			 sizeof(user_sseu)))
diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
index 4c1dacb69f51..8bf0d38d3a08 100644
--- a/drivers/gpu/drm/i915/intel_context.c
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -102,7 +102,7 @@ void __intel_context_remove(struct intel_context *ce)
 	spin_unlock(&ctx->hw_contexts_lock);
 }
 
-struct intel_context *
+static struct intel_context *
 intel_context_instance(struct i915_gem_context *ctx,
 		       struct intel_engine_cs *engine)
 {
@@ -126,6 +126,21 @@ intel_context_instance(struct i915_gem_context *ctx,
 	return pos;
 }
 
+struct intel_context *
+intel_context_pin_lock(struct i915_gem_context *ctx,
+		       struct intel_engine_cs *engine)
+	__acquires(ce->pin_mutex)
+{
+	struct intel_context *ce;
+
+	ce = intel_context_instance(ctx, engine);
+	if (IS_ERR(ce))
+		return ce;
+
+	mutex_lock(&ce->pin_mutex);
+	return ce;
+}
+
 struct intel_context *
 intel_context_pin(struct i915_gem_context *ctx,
 		  struct intel_engine_cs *engine)
@@ -133,16 +148,20 @@ intel_context_pin(struct i915_gem_context *ctx,
 	struct intel_context *ce;
 	int err;
 
-	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
-
 	ce = intel_context_instance(ctx, engine);
 	if (IS_ERR(ce))
 		return ce;
 
-	if (unlikely(!ce->pin_count++)) {
+	if (likely(atomic_inc_not_zero(&ce->pin_count)))
+		return ce;
+
+	if (mutex_lock_interruptible(&ce->pin_mutex))
+		return ERR_PTR(-EINTR);
+
+	if (likely(!atomic_read(&ce->pin_count))) {
 		err = ce->ops->pin(ce);
 		if (err)
-			goto err_unpin;
+			goto err;
 
 		mutex_lock(&ctx->mutex);
 		list_add(&ce->active_link, &ctx->active_engines);
@@ -150,16 +169,35 @@ intel_context_pin(struct i915_gem_context *ctx,
 
 		i915_gem_context_get(ctx);
 		GEM_BUG_ON(ce->gem_context != ctx);
+
+		smp_mb__before_atomic();
 	}
-	GEM_BUG_ON(!ce->pin_count); /* no overflow! */
 
+	atomic_inc(&ce->pin_count);
+	GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
+
+	mutex_unlock(&ce->pin_mutex);
 	return ce;
 
-err_unpin:
-	ce->pin_count = 0;
+err:
+	mutex_unlock(&ce->pin_mutex);
 	return ERR_PTR(err);
 }
 
+void intel_context_unpin(struct intel_context *ce)
+{
+	if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
+		return;
+
+	/* We may be called from inside intel_context_pin() to evict another */
+	mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING);
+
+	if (likely(atomic_dec_and_test(&ce->pin_count)))
+		ce->ops->unpin(ce);
+
+	mutex_unlock(&ce->pin_mutex);
+}
+
 static void intel_context_retire(struct i915_active_request *active,
 				 struct i915_request *rq)
 {
@@ -181,6 +219,8 @@ intel_context_init(struct intel_context *ce,
 	INIT_LIST_HEAD(&ce->signal_link);
 	INIT_LIST_HEAD(&ce->signals);
 
+	mutex_init(&ce->pin_mutex);
+
 	/* Use the whole device by default */
 	ce->sseu = intel_device_default_sseu(ctx->i915);
 
diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/intel_context.h
index ea21473891f7..9546d932406a 100644
--- a/drivers/gpu/drm/i915/intel_context.h
+++ b/drivers/gpu/drm/i915/intel_context.h
@@ -7,6 +7,8 @@
 #ifndef __INTEL_CONTEXT_H__
 #define __INTEL_CONTEXT_H__
 
+#include <linux/lockdep.h>
+
 #include "intel_context_types.h"
 #include "intel_engine_types.h"
 
@@ -29,18 +31,30 @@ intel_context_lookup(struct i915_gem_context *ctx,
 		     struct intel_engine_cs *engine);
 
 /**
- * intel_context_instance - Lookup or allocate the HW context for (ctx, engine)
+ * intel_context_pin_lock - Stablises the 'pinned' status of the HW context
  * @ctx - the parent GEM context
  * @engine - the target HW engine
  *
- * Returns the existing HW context for this pair of (GEM context, engine), or
- * allocates and initialises a fresh context. Once allocated, the HW context
- * remains resident until the GEM context is destroyed.
+ * Acquire a lock on the pinned status of the HW context, such that the context
+ * can neither be bound to the GPU or unbound whilst the lock is held, i.e.
+ * intel_context_is_pinned() remains stable.
  */
 struct intel_context *
-intel_context_instance(struct i915_gem_context *ctx,
+intel_context_pin_lock(struct i915_gem_context *ctx,
 		       struct intel_engine_cs *engine);
 
+static inline bool
+intel_context_is_pinned(struct intel_context *ce)
+{
+	return atomic_read(&ce->pin_count);
+}
+
+static inline void intel_context_pin_unlock(struct intel_context *ce)
+__releases(ce->pin_mutex)
+{
+	mutex_unlock(&ce->pin_mutex);
+}
+
 struct intel_context *
 __intel_context_insert(struct i915_gem_context *ctx,
 		       struct intel_engine_cs *engine,
@@ -53,18 +67,10 @@ intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine);
 
 static inline void __intel_context_pin(struct intel_context *ce)
 {
-	GEM_BUG_ON(!ce->pin_count);
-	ce->pin_count++;
+	GEM_BUG_ON(!intel_context_is_pinned(ce));
+	atomic_inc(&ce->pin_count);
 }
 
-static inline void intel_context_unpin(struct intel_context *ce)
-{
-	GEM_BUG_ON(!ce->pin_count);
-	if (--ce->pin_count)
-		return;
-
-	GEM_BUG_ON(!ce->ops);
-	ce->ops->unpin(ce);
-}
+void intel_context_unpin(struct intel_context *ce);
 
 #endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
index 804fa93de853..6dc9b4b9067b 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -8,6 +8,7 @@
 #define __INTEL_CONTEXT_TYPES__
 
 #include <linux/list.h>
+#include <linux/mutex.h>
 #include <linux/rbtree.h>
 #include <linux/types.h>
 
@@ -38,14 +39,19 @@ struct intel_context {
 	struct i915_gem_context *gem_context;
 	struct intel_engine_cs *engine;
 	struct intel_engine_cs *active;
+
 	struct list_head active_link;
 	struct list_head signal_link;
 	struct list_head signals;
+
 	struct i915_vma *state;
 	struct intel_ring *ring;
+
 	u32 *lrc_reg_state;
 	u64 lrc_desc;
-	int pin_count;
+
+	atomic_t pin_count;
+	struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
 
 	/**
 	 * active_tracker: Active tracker for the external rq activity
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f7134d650187..7b938eaff9c5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1244,7 +1244,7 @@ static void __execlists_context_fini(struct intel_context *ce)
 
 static void execlists_context_destroy(struct intel_context *ce)
 {
-	GEM_BUG_ON(ce->pin_count);
+	GEM_BUG_ON(intel_context_is_pinned(ce));
 
 	if (ce->state)
 		__execlists_context_fini(ce);
@@ -1481,7 +1481,7 @@ static int execlists_request_alloc(struct i915_request *request)
 {
 	int ret;
 
-	GEM_BUG_ON(!request->hw_context->pin_count);
+	GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
 
 	/*
 	 * Flush enough space to reduce the likelihood of waiting after
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 03656021d04c..cc4fcd89b845 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1357,7 +1357,7 @@ static void __ring_context_fini(struct intel_context *ce)
 
 static void ring_context_destroy(struct intel_context *ce)
 {
-	GEM_BUG_ON(ce->pin_count);
+	GEM_BUG_ON(intel_context_is_pinned(ce));
 
 	if (ce->state)
 		__ring_context_fini(ce);
@@ -1918,7 +1918,7 @@ static int ring_request_alloc(struct i915_request *request)
 {
 	int ret;
 
-	GEM_BUG_ON(!request->hw_context->pin_count);
+	GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
 	GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
 
 	/*
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index df1380498612..b6ed055abf5c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -1024,7 +1024,7 @@ __sseu_test(struct drm_i915_private *i915,
 	if (ret)
 		goto out_context;
 
-	ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
+	ret = i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
 	if (ret)
 		goto out_spin;
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 99d7463218d0..f6d120e05ee4 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -131,7 +131,7 @@ static void mock_context_unpin(struct intel_context *ce)
 
 static void mock_context_destroy(struct intel_context *ce)
 {
-	GEM_BUG_ON(ce->pin_count);
+	GEM_BUG_ON(intel_context_is_pinned(ce));
 
 	if (ce->ring)
 		mock_ring_free(ce->ring);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 23/43] drm/i915: Load balancing across a virtual engine
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (21 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 22/43] drm/i915: Introduce intel_context.pin_mutex for pin management Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-06 14:24 ` [PATCH 24/43] drm/i915: Extend execution fence to support a callback Chris Wilson
                   ` (22 subsequent siblings)
  45 siblings, 0 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Having allowed the user to define a set of engines that they will want
to only use, we go one step further and allow them to bind those engines
into a single virtual instance. Submitting a batch to the virtual engine
will then forward it to any one of the set in a manner as best to
distribute load.  The virtual engine has a single timeline across all
engines (it operates as a single queue), so it is not able to concurrently
run batches across multiple engines by itself; that is left up to the user
to submit multiple concurrent batches to multiple queues. Multiple users
will be load balanced across the system.

The mechanism used for load balancing in this patch is a late greedy
balancer. When a request is ready for execution, it is added to each
engine's queue, and when an engine is ready for its next request it
claims it from the virtual engine. The first engine to do so, wins, i.e.
the request is executed at the earliest opportunity (idle moment) in the
system.

As not all HW is created equal, the user is still able to skip the
virtual engine and execute the batch on a specific engine, all within the
same queue. It will then be executed in order on the correct engine,
with execution on other virtual engines being moved away due to the load
detection.

A couple of areas for potential improvement left!

- The virtual engine always take priority over equal-priority tasks.
Mostly broken up by applying FQ_CODEL rules for prioritising new clients,
and hopefully the virtual and real engines are not then congested (i.e.
all work is via virtual engines, or all work is to the real engine).

- We require the breadcrumb irq around every virtual engine request. For
normal engines, we eliminate the need for the slow round trip via
interrupt by using the submit fence and queueing in order. For virtual
engines, we have to allow any job to transfer to a new ring, and cannot
coalesce the submissions, so require the completion fence instead,
forcing the persistent use of interrupts.

- We only drip feed single requests through each virtual engine and onto
the physical engines, even if there was enough work to fill all ELSP,
leaving small stalls with an idle CS event at the end of every request.
Could we be greedy and fill both slots? Being lazy is virtuous for load
distribution on less-than-full workloads though.

Other areas of improvement are more general, such as reducing lock
contention, reducing dispatch overhead, looking at direct submission
rather than bouncing around tasklets etc.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.h            |   5 +
 drivers/gpu/drm/i915/i915_gem_context.c    | 153 +++++-
 drivers/gpu/drm/i915/i915_scheduler.c      |  17 +-
 drivers/gpu/drm/i915/i915_timeline_types.h |   1 +
 drivers/gpu/drm/i915/intel_engine_types.h  |   8 +
 drivers/gpu/drm/i915/intel_lrc.c           | 521 ++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_lrc.h           |  11 +
 drivers/gpu/drm/i915/selftests/intel_lrc.c | 165 +++++++
 include/uapi/drm/i915_drm.h                |  30 ++
 9 files changed, 895 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index b0e4b976880c..9905fcdd33c8 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -89,4 +89,9 @@ static inline bool __tasklet_is_enabled(const struct tasklet_struct *t)
 	return !atomic_read(&t->count);
 }
 
+static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
+{
+	return test_bit(TASKLET_STATE_SCHED, &t->state);
+}
+
 #endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 35541f99b37d..aa8076c06006 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -86,12 +86,16 @@
  */
 
 #include <linux/log2.h>
+#include <linux/nospec.h>
+
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
 #include "i915_globals.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
 #include "intel_lrc_reg.h"
+#include "intel_lrc.h"
 #include "intel_workarounds.h"
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
@@ -238,6 +242,20 @@ static void release_hw_id(struct i915_gem_context *ctx)
 	mutex_unlock(&i915->contexts.mutex);
 }
 
+static void free_engines(struct intel_engine_cs **engines, int count)
+{
+	int i;
+
+	if (!engines)
+		return;
+
+	/* We own the veng we created; regular engines are ignored */
+	for (i = 0; i < count; i++)
+		intel_virtual_engine_destroy(engines[i]);
+
+	kfree(engines);
+}
+
 static void i915_gem_context_free(struct i915_gem_context *ctx)
 {
 	struct intel_context *it, *n;
@@ -248,8 +266,7 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 
 	release_hw_id(ctx);
 	i915_ppgtt_put(ctx->ppgtt);
-
-	kfree(ctx->engines);
+	free_engines(ctx->engines, ctx->nengine);
 
 	rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
 		it->ops->destroy(it);
@@ -1340,13 +1357,116 @@ static int set_sseu(struct i915_gem_context *ctx,
 	return 0;
 };
 
+static int check_user_mbz16(u16 __user *user)
+{
+	u16 mbz;
+
+	if (get_user(mbz, user))
+		return -EFAULT;
+
+	return mbz ? -EINVAL : 0;
+}
+
+static int check_user_mbz32(u32 __user *user)
+{
+	u32 mbz;
+
+	if (get_user(mbz, user))
+		return -EFAULT;
+
+	return mbz ? -EINVAL : 0;
+}
+
+static int check_user_mbz64(u64 __user *user)
+{
+	u64 mbz;
+
+	if (get_user(mbz, user))
+		return -EFAULT;
+
+	return mbz ? -EINVAL : 0;
+}
+
 struct set_engines {
 	struct i915_gem_context *ctx;
 	struct intel_engine_cs **engines;
 	unsigned int nengine;
 };
 
+static int
+set_engines__load_balance(struct i915_user_extension __user *base, void *data)
+{
+	struct i915_context_engines_load_balance __user *ext =
+		container_of_user(base, typeof(*ext), base);
+	const struct set_engines *set = data;
+	struct intel_engine_cs *ve;
+	unsigned int n;
+	u64 mask;
+	u16 idx;
+	int err;
+
+	if (!HAS_EXECLISTS(set->ctx->i915))
+		return -ENODEV;
+
+	if (USES_GUC_SUBMISSION(set->ctx->i915))
+		return -ENODEV; /* not implement yet */
+
+	if (get_user(idx, &ext->engine_index))
+		return -EFAULT;
+
+	if (idx >= set->nengine)
+		return -EINVAL;
+
+	idx = array_index_nospec(idx, set->nengine);
+	if (set->engines[idx])
+		return -EEXIST;
+
+	err = check_user_mbz16(&ext->mbz16);
+	if (err)
+		return err;
+
+	err = check_user_mbz32(&ext->flags);
+	if (err)
+		return err;
+
+	for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
+		err = check_user_mbz64(&ext->mbz64[n]);
+		if (err)
+			return err;
+	}
+
+	if (get_user(mask, &ext->engines_mask))
+		return -EFAULT;
+
+	mask &= GENMASK_ULL(set->nengine - 1, 0) & ~BIT_ULL(idx);
+	if (!mask)
+		return -EINVAL;
+
+	if (is_power_of_2(mask)) {
+		ve = set->engines[__ffs64(mask)];
+	} else {
+		struct intel_engine_cs *stack[64];
+		int bit;
+
+		n = 0;
+		for_each_set_bit(bit, (unsigned long *)&mask, set->nengine)
+			stack[n++] = set->engines[bit];
+
+		ve = intel_execlists_create_virtual(set->ctx, stack, n);
+	}
+	if (IS_ERR(ve))
+		return PTR_ERR(ve);
+
+	if (cmpxchg(&set->engines[idx], NULL, ve)) {
+		intel_virtual_engine_destroy(ve);
+		return -EEXIST;
+	}
+
+	return 0;
+}
+
 static const i915_user_extension_fn set_engines__extensions[] = {
+	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
 };
 
 static int
@@ -1407,13 +1527,13 @@ set_engines(struct i915_gem_context *ctx,
 					   ARRAY_SIZE(set_engines__extensions),
 					   &set);
 	if (err) {
-		kfree(set.engines);
+		free_engines(set.engines, set.nengine);
 		return err;
 	}
 
 out:
 	mutex_lock(&ctx->i915->drm.struct_mutex);
-	kfree(ctx->engines);
+	free_engines(ctx->engines, ctx->nengine);
 	ctx->engines = set.engines;
 	ctx->nengine = set.nengine;
 	mutex_unlock(&ctx->i915->drm.struct_mutex);
@@ -1618,6 +1738,7 @@ static int clone_engines(struct i915_gem_context *dst,
 			 struct i915_gem_context *src)
 {
 	struct intel_engine_cs **engines;
+	int i;
 
 	engines = kmemdup(src->engines,
 			  sizeof(*src->engines) * src->nengine,
@@ -1625,6 +1746,30 @@ static int clone_engines(struct i915_gem_context *dst,
 	if (!engines)
 		return -ENOMEM;
 
+	/*
+	 * Virtual engines are singletons; they can only exist
+	 * inside a single context, because they embed their
+	 * HW context... As each virtual context implies a single
+	 * timeline (each engine can only dequeue a single request
+	 * at any time), it would be surprising for two contexts
+	 * to use the same engine. So let's create a copy of
+	 * the virtual engine instead.
+	 */
+	for (i = 0; i < src->nengine; i++) {
+		struct intel_engine_cs *engine = engines[i];
+
+		if (!intel_engine_is_virtual(engine))
+			continue;
+
+		engine = intel_execlists_clone_virtual(dst, engine);
+		if (IS_ERR(engine)) {
+			free_engines(engines, i);
+			return PTR_ERR(engine);
+		}
+
+		engines[i] = engine;
+	}
+
 	dst->engines = engines;
 	dst->nengine = src->nengine;
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index e0f609d01564..bb9819dbe313 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -247,17 +247,25 @@ sched_lock_engine(const struct i915_sched_node *node,
 		  struct intel_engine_cs *locked,
 		  struct sched_cache *cache)
 {
-	struct intel_engine_cs *engine = node_to_request(node)->engine;
+	const struct i915_request *rq = node_to_request(node);
+	struct intel_engine_cs *engine;
 
 	GEM_BUG_ON(!locked);
 
-	if (engine != locked) {
+	/*
+	 * Virtual engines complicate acquiring the engine timeline lock,
+	 * as their rq->engine pointer is not stable until under that
+	 * engine lock. The simple ploy we use is to take the lock then
+	 * check that the rq still belongs to the newly locked engine.
+	 */
+	while (locked != (engine = READ_ONCE(rq->engine))) {
 		spin_unlock(&locked->timeline.lock);
 		memset(cache, 0, sizeof(*cache));
 		spin_lock(&engine->timeline.lock);
+		locked = engine;
 	}
 
-	return engine;
+	return locked;
 }
 
 static bool inflight(const struct i915_request *rq,
@@ -370,8 +378,11 @@ static void __i915_schedule(struct i915_request *rq,
 		if (prio <= node->attr.priority || node_signaled(node))
 			continue;
 
+		GEM_BUG_ON(node_to_request(node)->engine != engine);
+
 		node->attr.priority = prio;
 		if (!list_empty(&node->link)) {
+			GEM_BUG_ON(intel_engine_is_virtual(engine));
 			if (!cache.priolist)
 				cache.priolist =
 					i915_sched_lookup_priolist(engine,
diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h
index 8ff146dc05ba..5e445f145eb1 100644
--- a/drivers/gpu/drm/i915/i915_timeline_types.h
+++ b/drivers/gpu/drm/i915/i915_timeline_types.h
@@ -25,6 +25,7 @@ struct i915_timeline {
 	spinlock_t lock;
 #define TIMELINE_CLIENT 0 /* default subclass */
 #define TIMELINE_ENGINE 1
+#define TIMELINE_VIRTUAL 2
 	struct mutex mutex; /* protects the flow of requests */
 
 	unsigned int pin_count;
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
index b0aa1f0d4e47..d54d2a1840cc 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -216,6 +216,7 @@ struct intel_engine_execlists {
 	 * @queue: queue of requests, in priority lists
 	 */
 	struct rb_root_cached queue;
+	struct rb_root_cached virtual;
 
 	/**
 	 * @csb_write: control register for Context Switch buffer
@@ -421,6 +422,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
+#define I915_ENGINE_IS_VIRTUAL       BIT(4)
 	unsigned int flags;
 
 	/*
@@ -504,6 +506,12 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_HAS_SEMAPHORES;
 }
 
+static inline bool
+intel_engine_is_virtual(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_IS_VIRTUAL;
+}
+
 #define instdone_slice_mask(dev_priv__) \
 	(IS_GEN(dev_priv__, 7) ? \
 	 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7b938eaff9c5..0c97e8f30223 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -166,6 +166,28 @@
 
 #define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT | I915_PRIORITY_NOSEMAPHORE)
 
+struct virtual_engine {
+	struct intel_engine_cs base;
+
+	struct intel_context context;
+	struct kref kref;
+	struct rcu_head rcu;
+
+	struct i915_request *request;
+	struct ve_node {
+		struct rb_node rb;
+		int prio;
+	} nodes[I915_NUM_ENGINES];
+
+	unsigned int count;
+	struct intel_engine_cs *siblings[0];
+};
+
+static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
+{
+	return container_of(engine, struct virtual_engine, base);
+}
+
 static int execlists_context_deferred_alloc(struct intel_context *ce,
 					    struct intel_engine_cs *engine);
 static void execlists_init_reg_state(u32 *reg_state,
@@ -235,7 +257,8 @@ static int queue_prio(const struct intel_engine_execlists *execlists)
 }
 
 static inline bool need_preempt(const struct intel_engine_cs *engine,
-				const struct i915_request *rq)
+				const struct i915_request *rq,
+				struct rb_node *rb)
 {
 	int last_prio;
 
@@ -270,6 +293,22 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 	    rq_prio(list_next_entry(rq, link)) > last_prio)
 		return true;
 
+	if (rb) { /* XXX virtual precedence */
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		bool preempt = false;
+
+		if (engine == ve->siblings[0]) { /* only preempt one sibling */
+			spin_lock(&ve->base.timeline.lock);
+			if (ve->request)
+				preempt = rq_prio(ve->request) > last_prio;
+			spin_unlock(&ve->base.timeline.lock);
+		}
+
+		if (preempt)
+			return preempt;
+	}
+
 	/*
 	 * If the inflight context did not trigger the preemption, then maybe
 	 * it was the set of queued requests? Pick the highest priority in
@@ -388,6 +427,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	list_for_each_entry_safe_reverse(rq, rn,
 					 &engine->timeline.requests,
 					 link) {
+		struct intel_engine_cs *owner;
+
 		if (i915_request_completed(rq))
 			break;
 
@@ -396,14 +437,23 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 
 		GEM_BUG_ON(rq->hw_context->active);
 
-		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
-		if (rq_prio(rq) != prio) {
-			prio = rq_prio(rq);
-			pl = i915_sched_lookup_priolist(engine, prio);
-		}
-		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
+		owner = rq->hw_context->engine;
+		if (likely(owner == engine)) {
+			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
+			if (rq_prio(rq) != prio) {
+				prio = rq_prio(rq);
+				pl = i915_sched_lookup_priolist(engine, prio);
+			}
+			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
+
+			list_add(&rq->sched.link, pl);
+		} else {
+			if (__i915_request_has_started(rq))
+				rq->sched.attr.priority |= ACTIVE_PRIORITY;
 
-		list_add(&rq->sched.link, pl);
+			rq->engine = owner;
+			owner->submit_request(rq);
+		}
 
 		active = rq;
 	}
@@ -665,6 +715,50 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
 						  execlists));
 }
 
+static void virtual_update_register_offsets(u32 *regs,
+					    struct intel_engine_cs *engine)
+{
+	u32 base = engine->mmio_base;
+
+	regs[CTX_CONTEXT_CONTROL] =
+		i915_mmio_reg_offset(RING_CONTEXT_CONTROL(engine));
+	regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
+	regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
+	regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
+	regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
+
+	regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
+	regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
+	regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
+	regs[CTX_SECOND_BB_HEAD_U] =
+		i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
+	regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
+	regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
+
+	regs[CTX_CTX_TIMESTAMP] =
+		i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
+	regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 3));
+	regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 3));
+	regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 2));
+	regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 2));
+	regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 1));
+	regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 1));
+	regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 0));
+	regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 0));
+
+	if (engine->class == RENDER_CLASS) {
+		regs[CTX_RCS_INDIRECT_CTX] =
+			i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
+		regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
+			i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
+		regs[CTX_BB_PER_CTX_PTR] =
+			i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
+
+		regs[CTX_R_PWR_CLK_STATE] =
+			i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
+	}
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -697,6 +791,28 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * and context switches) submission.
 	 */
 
+	for (rb = rb_first_cached(&execlists->virtual); rb; ) {
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		struct i915_request *rq = READ_ONCE(ve->request);
+		struct intel_engine_cs *active;
+
+		if (!rq) {
+			rb_erase_cached(rb, &execlists->virtual);
+			RB_CLEAR_NODE(rb);
+			rb = rb_first_cached(&execlists->virtual);
+			continue;
+		}
+
+		active = READ_ONCE(ve->context.active);
+		if (active && active != engine) {
+			rb = rb_next(rb);
+			continue;
+		}
+
+		break;
+	}
+
 	if (last) {
 		/*
 		 * Don't resubmit or switch until all outstanding
@@ -718,7 +834,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
 			return;
 
-		if (need_preempt(engine, last)) {
+		if (need_preempt(engine, last, rb)) {
 			inject_preempt_context(engine);
 			return;
 		}
@@ -758,6 +874,72 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		last->tail = last->wa_tail;
 	}
 
+	while (rb) { /* XXX virtual is always taking precedence */
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		struct i915_request *rq;
+
+		spin_lock(&ve->base.timeline.lock);
+
+		rq = ve->request;
+		if (unlikely(!rq)) { /* lost the race to a sibling */
+			spin_unlock(&ve->base.timeline.lock);
+			rb_erase_cached(rb, &execlists->virtual);
+			RB_CLEAR_NODE(rb);
+			rb = rb_first_cached(&execlists->virtual);
+			continue;
+		}
+
+		if (rq_prio(rq) >= queue_prio(execlists)) {
+			if (last && !can_merge_rq(last, rq)) {
+				spin_unlock(&ve->base.timeline.lock);
+				return; /* leave this rq for another engine */
+			}
+
+			GEM_BUG_ON(rq->engine != &ve->base);
+			ve->request = NULL;
+			ve->base.execlists.queue_priority_hint = INT_MIN;
+			rb_erase_cached(rb, &execlists->virtual);
+			RB_CLEAR_NODE(rb);
+
+			GEM_BUG_ON(rq->hw_context != &ve->context);
+			rq->engine = engine;
+
+			if (engine != ve->siblings[0]) {
+				u32 *regs = ve->context.lrc_reg_state;
+				unsigned int n;
+
+				GEM_BUG_ON(READ_ONCE(ve->context.active));
+				virtual_update_register_offsets(regs, engine);
+
+				/*
+				 * Move the bound engine to the top of the list
+				 * for future execution. We then kick this
+				 * tasklet first before checking others, so that
+				 * we preferentially reuse this set of bound
+				 * registers.
+				 */
+				for (n = 1; n < ve->count; n++) {
+					if (ve->siblings[n] == engine) {
+						swap(ve->siblings[n],
+						     ve->siblings[0]);
+						break;
+					}
+				}
+
+				GEM_BUG_ON(ve->siblings[0] != engine);
+			}
+
+			__i915_request_submit(rq);
+			trace_i915_request_in(rq, port_index(port, execlists));
+			submit = true;
+			last = rq;
+		}
+
+		spin_unlock(&ve->base.timeline.lock);
+		break;
+	}
+
 	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
 		struct i915_request *rq, *rn;
@@ -2904,6 +3086,304 @@ void intel_lr_context_resume(struct drm_i915_private *i915)
 	}
 }
 
+static void __virtual_engine_free(struct rcu_head *rcu)
+{
+	struct virtual_engine *ve = container_of(rcu, typeof(*ve), rcu);
+
+	kfree(ve);
+}
+
+static void virtual_engine_free(struct kref *kref)
+{
+	struct virtual_engine *ve = container_of(kref, typeof(*ve), kref);
+	unsigned int n;
+
+	GEM_BUG_ON(ve->request);
+	GEM_BUG_ON(ve->context.active);
+
+	for (n = 0; n < ve->count; n++) {
+		struct intel_engine_cs *sibling = ve->siblings[n];
+		struct rb_node *node = &ve->nodes[sibling->id].rb;
+
+		if (RB_EMPTY_NODE(node))
+			continue;
+
+		spin_lock_irq(&sibling->timeline.lock);
+
+		if (!RB_EMPTY_NODE(node))
+			rb_erase_cached(node, &sibling->execlists.virtual);
+
+		spin_unlock_irq(&sibling->timeline.lock);
+	}
+	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
+
+	if (ve->context.state)
+		__execlists_context_fini(&ve->context);
+
+	i915_timeline_fini(&ve->base.timeline);
+	call_rcu(&ve->rcu, __virtual_engine_free);
+}
+
+static void virtual_context_unpin(struct intel_context *ce)
+{
+	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+
+	execlists_context_unpin(ce);
+
+	kref_put(&ve->kref, virtual_engine_free);
+}
+
+static void virtual_engine_initial_hint(struct virtual_engine *ve)
+{
+	int swp;
+
+	/*
+	 * Pick a random sibling on starting to help spread the load around.
+	 *
+	 * New contexts are typically created with exactly the same order
+	 * of siblings, and often started in batches. Due to the way we iterate
+	 * the array of sibling when submitting requests, sibling[0] is
+	 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
+	 * randomised across the system, we also help spread the load by the
+	 * first engine we inspect being different each time.
+	 *
+	 * NB This does not force us to execute on this engine, it will just
+	 * typically be the first we inspect for submission.
+	 */
+	swp = prandom_u32_max(ve->count);
+	if (!swp)
+		return;
+
+	swap(ve->siblings[swp], ve->siblings[0]);
+	virtual_update_register_offsets(ve->context.lrc_reg_state,
+					ve->siblings[0]);
+}
+
+static int virtual_context_pin(struct intel_context *ce)
+{
+	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+	int err;
+
+	/* Note: we must use a real engine class for setting up reg state */
+	err = __execlists_context_pin(ce, ve->siblings[0]);
+	if (err)
+		return err;
+
+	virtual_engine_initial_hint(ve);
+
+	kref_get(&ve->kref);
+	return 0;
+}
+
+static const struct intel_context_ops virtual_context_ops = {
+	.pin = virtual_context_pin,
+	.unpin = virtual_context_unpin,
+};
+
+static void virtual_submission_tasklet(unsigned long data)
+{
+	struct virtual_engine * const ve = (struct virtual_engine *)data;
+	unsigned int n;
+	int prio;
+
+	prio = READ_ONCE(ve->base.execlists.queue_priority_hint);
+	if (prio == INT_MIN)
+		return;
+
+	local_irq_disable();
+	for (n = 0; READ_ONCE(ve->request) && n < ve->count; n++) {
+		struct intel_engine_cs *sibling = ve->siblings[n];
+		struct ve_node * const node = &ve->nodes[sibling->id];
+		struct rb_node **parent, *rb;
+		bool first;
+
+		spin_lock(&sibling->timeline.lock);
+
+		if (!RB_EMPTY_NODE(&node->rb)) {
+			first = rb_first_cached(&sibling->execlists.virtual) == &node->rb;
+			if (prio == node->prio || (prio > node->prio && first))
+				goto submit_engine;
+
+			rb_erase_cached(&node->rb, &sibling->execlists.virtual);
+		}
+
+		rb = NULL;
+		first = true;
+		parent = &sibling->execlists.virtual.rb_root.rb_node;
+		while (*parent) {
+			struct ve_node *other;
+
+			rb = *parent;
+			other = rb_entry(rb, typeof(*other), rb);
+			if (prio > other->prio) {
+				parent = &rb->rb_left;
+			} else {
+				parent = &rb->rb_right;
+				first = false;
+			}
+		}
+
+		rb_link_node(&node->rb, rb, parent);
+		rb_insert_color_cached(&node->rb,
+				       &sibling->execlists.virtual,
+				       first);
+
+submit_engine:
+		GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
+		node->prio = prio;
+		if (first && prio > sibling->execlists.queue_priority_hint) {
+			sibling->execlists.queue_priority_hint = prio;
+			tasklet_hi_schedule(&sibling->execlists.tasklet);
+		}
+
+		spin_unlock(&sibling->timeline.lock);
+	}
+	local_irq_enable();
+}
+
+static void virtual_submit_request(struct i915_request *request)
+{
+	struct virtual_engine *ve = to_virtual_engine(request->engine);
+
+	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
+
+	GEM_BUG_ON(ve->request);
+	ve->base.execlists.queue_priority_hint = rq_prio(request);
+	WRITE_ONCE(ve->request, request);
+
+	tasklet_schedule(&ve->base.execlists.tasklet);
+}
+
+struct intel_engine_cs *
+intel_execlists_create_virtual(struct i915_gem_context *ctx,
+			       struct intel_engine_cs **siblings,
+			       unsigned int count)
+{
+	struct virtual_engine *ve;
+	unsigned int n;
+	int err;
+
+	if (!count)
+		return ERR_PTR(-EINVAL);
+
+	ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
+	if (!ve)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&ve->kref);
+	rcu_head_init(&ve->rcu);
+	ve->base.i915 = ctx->i915;
+	ve->base.id = -1;
+	ve->base.class = OTHER_CLASS;
+	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
+	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
+	ve->base.flags = I915_ENGINE_IS_VIRTUAL;
+
+	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
+
+	err = i915_timeline_init(ctx->i915,
+				 &ve->base.timeline,
+				 ve->base.name,
+				 NULL);
+	if (err)
+		goto err_put;
+	i915_timeline_set_subclass(&ve->base.timeline, TIMELINE_VIRTUAL);
+
+	ve->base.cops = &virtual_context_ops;
+	ve->base.request_alloc = execlists_request_alloc;
+
+	ve->base.schedule = i915_schedule;
+	ve->base.submit_request = virtual_submit_request;
+
+	ve->base.execlists.queue_priority_hint = INT_MIN;
+	tasklet_init(&ve->base.execlists.tasklet,
+		     virtual_submission_tasklet,
+		     (unsigned long)ve);
+
+	intel_context_init(&ve->context, ctx, &ve->base);
+
+	for (n = 0; n < count; n++) {
+		struct intel_engine_cs *sibling = siblings[n];
+
+		GEM_BUG_ON(!is_power_of_2(sibling->mask));
+		if (sibling->mask & ve->base.mask)
+			continue;
+
+		if (sibling->execlists.tasklet.func != execlists_submission_tasklet) {
+			err = -ENODEV;
+			goto err_put;
+		}
+
+		GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
+		RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
+
+		ve->siblings[ve->count++] = sibling;
+		ve->base.mask |= sibling->mask;
+
+		if (ve->base.class != OTHER_CLASS) {
+			if (ve->base.class != sibling->class) {
+				err = -EINVAL;
+				goto err_put;
+			}
+			continue;
+		}
+
+		ve->base.class = sibling->class;
+		snprintf(ve->base.name, sizeof(ve->base.name),
+			 "v%dx%d", ve->base.class, count);
+		ve->base.context_size = sibling->context_size;
+
+		ve->base.emit_bb_start = sibling->emit_bb_start;
+		ve->base.emit_flush = sibling->emit_flush;
+		ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
+		ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
+		ve->base.emit_fini_breadcrumb_dw =
+			sibling->emit_fini_breadcrumb_dw;
+	}
+
+	/* gracefully replace a degenerate virtual engine */
+	if (is_power_of_2(ve->base.mask)) {
+		struct intel_engine_cs *actual = ve->siblings[0];
+		virtual_engine_free(&ve->kref);
+		return actual;
+	}
+
+	__intel_context_insert(ctx, &ve->base, &ve->context);
+	return &ve->base;
+
+err_put:
+	virtual_engine_free(&ve->kref);
+	return ERR_PTR(err);
+}
+
+struct intel_engine_cs *
+intel_execlists_clone_virtual(struct i915_gem_context *ctx,
+			      struct intel_engine_cs *src)
+{
+	struct virtual_engine *se = to_virtual_engine(src);
+	struct intel_engine_cs *dst;
+
+	dst = intel_execlists_create_virtual(ctx,
+					     se->siblings,
+					     se->count);
+	if (IS_ERR(dst))
+		return dst;
+
+	return dst;
+}
+
+void intel_virtual_engine_destroy(struct intel_engine_cs *engine)
+{
+	struct virtual_engine *ve = to_virtual_engine(engine);
+
+	if (!engine || !intel_engine_is_virtual(engine))
+		return;
+
+	__intel_context_remove(&ve->context);
+
+	kref_put(&ve->kref, virtual_engine_free);
+}
+
 void intel_execlists_show_requests(struct intel_engine_cs *engine,
 				   struct drm_printer *m,
 				   void (*show_request)(struct drm_printer *m,
@@ -2961,6 +3441,29 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 		show_request(m, last, "\t\tQ ");
 	}
 
+	last = NULL;
+	count = 0;
+	for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		struct i915_request *rq = READ_ONCE(ve->request);
+
+		if (rq) {
+			if (count++ < max - 1)
+				show_request(m, rq, "\t\tV ");
+			else
+				last = rq;
+		}
+	}
+	if (last) {
+		if (count > max) {
+			drm_printf(m,
+				   "\t\t...skipping %d virtual requests...\n",
+				   count - max);
+		}
+		show_request(m, last, "\t\tV ");
+	}
+
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index f1aec8a6986f..9d90dc68e02b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -112,6 +112,17 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 							const char *prefix),
 				   unsigned int max);
 
+struct intel_engine_cs *
+intel_execlists_create_virtual(struct i915_gem_context *ctx,
+			       struct intel_engine_cs **siblings,
+			       unsigned int count);
+
+struct intel_engine_cs *
+intel_execlists_clone_virtual(struct i915_gem_context *ctx,
+			      struct intel_engine_cs *src);
+
+void intel_virtual_engine_destroy(struct intel_engine_cs *engine);
+
 u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu);
 
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 565e949a4722..7c0e95527bcb 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -10,6 +10,7 @@
 
 #include "../i915_selftest.h"
 #include "igt_flush_test.h"
+#include "igt_live_test.h"
 #include "igt_spinner.h"
 #include "i915_random.h"
 
@@ -1042,6 +1043,169 @@ static int live_preempt_smoke(void *arg)
 	return err;
 }
 
+static int nop_virtual_engine(struct drm_i915_private *i915,
+			      struct intel_engine_cs **siblings,
+			      unsigned int nsibling,
+			      unsigned int nctx,
+			      unsigned int flags)
+#define CHAIN BIT(0)
+{
+	IGT_TIMEOUT(end_time);
+	struct i915_request *request[16];
+	struct i915_gem_context *ctx[16];
+	struct intel_engine_cs *ve[16];
+	unsigned long n, prime, nc;
+	struct igt_live_test t;
+	ktime_t times[2] = {};
+	int err;
+
+	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
+
+	for (n = 0; n < nctx; n++) {
+		ctx[n] = kernel_context(i915);
+		if (!ctx[n])
+			return -ENOMEM;
+
+		ve[n] = intel_execlists_create_virtual(ctx[n],
+						       siblings, nsibling);
+		if (IS_ERR(ve[n]))
+			return PTR_ERR(ve[n]);
+	}
+
+	err = igt_live_test_begin(&t, i915, __func__, ve[0]->name);
+	if (err)
+		goto out;
+
+	for_each_prime_number_from(prime, 1, 8192) {
+		times[1] = ktime_get_raw();
+
+		if (flags & CHAIN) {
+			for (nc = 0; nc < nctx; nc++) {
+				for (n = 0; n < prime; n++) {
+					request[nc] =
+						i915_request_alloc(ve[nc], ctx[nc]);
+					if (IS_ERR(request[nc])) {
+						err = PTR_ERR(request[nc]);
+						goto out;
+					}
+
+					i915_request_add(request[nc]);
+				}
+			}
+		} else {
+			for (n = 0; n < prime; n++) {
+				for (nc = 0; nc < nctx; nc++) {
+					request[nc] =
+						i915_request_alloc(ve[nc], ctx[nc]);
+					if (IS_ERR(request[nc])) {
+						err = PTR_ERR(request[nc]);
+						goto out;
+					}
+
+					i915_request_add(request[nc]);
+				}
+			}
+		}
+
+		for (nc = 0; nc < nctx; nc++) {
+			if (i915_request_wait(request[nc],
+					      I915_WAIT_LOCKED,
+					      HZ / 10) < 0) {
+				pr_err("%s(%s): wait for %llx:%lld timed out\n",
+				       __func__, ve[0]->name,
+				       request[nc]->fence.context,
+				       request[nc]->fence.seqno);
+
+				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
+					  __func__, ve[0]->name,
+					  request[nc]->fence.context,
+					  request[nc]->fence.seqno);
+				GEM_TRACE_DUMP();
+				i915_gem_set_wedged(i915);
+				break;
+			}
+		}
+
+		times[1] = ktime_sub(ktime_get_raw(), times[1]);
+		if (prime == 1)
+			times[0] = times[1];
+
+		if (__igt_timeout(end_time, NULL))
+			break;
+	}
+
+	err = igt_live_test_end(&t);
+	if (err)
+		goto out;
+
+	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
+		nctx, ve[0]->name, ktime_to_ns(times[0]),
+		prime, div64_u64(ktime_to_ns(times[1]), prime));
+
+out:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	for (nc = 0; nc < nctx; nc++) {
+		intel_virtual_engine_destroy(ve[nc]);
+		kernel_context_close(ctx[nc]);
+	}
+	return err;
+}
+
+static int live_virtual_engine(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	unsigned int class, inst;
+	int err = -ENODEV;
+
+	if (USES_GUC_SUBMISSION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	for_each_engine(engine, i915, id) {
+		err = nop_virtual_engine(i915, &engine, 1, 1, 0);
+		if (err) {
+			pr_err("Failed to wrap engine %s: err=%d\n",
+			       engine->name, err);
+			goto out_unlock;
+		}
+	}
+
+	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+		int nsibling, n;
+
+		nsibling = 0;
+		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+			if (!i915->engine_class[class][inst])
+				break;
+
+			siblings[nsibling++] = i915->engine_class[class][inst];
+		}
+		if (nsibling < 2)
+			continue;
+
+		for (n = 1; n <= nsibling + 1; n++) {
+			err = nop_virtual_engine(i915, siblings, nsibling,
+						 n, 0);
+			if (err)
+				goto out_unlock;
+		}
+
+		err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN);
+		if (err)
+			goto out_unlock;
+	}
+
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -1053,6 +1217,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_chain_preempt),
 		SUBTEST(live_preempt_hang),
 		SUBTEST(live_preempt_smoke),
+		SUBTEST(live_virtual_engine),
 	};
 
 	if (!HAS_EXECLISTS(i915))
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index b68e1ad12c0f..102a2f9de443 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -125,6 +125,7 @@ enum drm_i915_gem_engine_class {
 };
 
 #define I915_ENGINE_CLASS_INVALID_NONE -1
+#define I915_ENGINE_CLASS_INVALID_VIRTUAL 0
 
 /**
  * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
@@ -1596,8 +1597,37 @@ struct drm_i915_gem_context_param_sseu {
 	__u32 rsvd;
 };
 
+/*
+ * i915_context_engines_load_balance:
+ *
+ * Enable load balancing across this set of engines.
+ *
+ * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when
+ * used will proxy the execbuffer request onto one of the set of engines
+ * in such a way as to distribute the load evenly across the set.
+ *
+ * The set of engines must be compatible (e.g. the same HW class) as they
+ * will share the same logical GPU context and ring.
+ *
+ * To intermix rendering with the virtual engine and direct rendering onto
+ * the backing engines (bypassing the load balancing proxy), the context must
+ * be defined to use a single timeline for all engines.
+ */
+struct i915_context_engines_load_balance {
+	struct i915_user_extension base;
+
+	__u16 engine_index;
+	__u16 mbz16; /* reserved for future use; must be zero */
+	__u32 flags; /* all undefined flags must be zero */
+
+	__u64 engines_mask; /* selection mask of engines[] */
+
+	__u64 mbz64[4]; /* reserved for future use; must be zero */
+};
+
 struct i915_context_param_engines {
 	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
+#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0
 
 	struct {
 		__u16 engine_class; /* see enum drm_i915_gem_engine_class */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 24/43] drm/i915: Extend execution fence to support a callback
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (22 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 23/43] drm/i915: Load balancing across a virtual engine Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-06 14:24 ` [PATCH 25/43] drm/i915/execlists: Virtual engine bonding Chris Wilson
                   ` (21 subsequent siblings)
  45 siblings, 0 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

In the next patch, we will want to configure the slave request
depending on which physical engine the master request is executed on.
For this, we introduce a callback from the execute fence to convey this
information.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c | 84 +++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_request.h |  4 ++
 2 files changed, 83 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 18f49132a526..4cd334462e21 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -38,6 +38,8 @@ struct execute_cb {
 	struct list_head link;
 	struct irq_work work;
 	struct i915_sw_fence *fence;
+	void (*hook)(struct i915_request *rq, struct dma_fence *signal);
+	struct i915_request *signal;
 };
 
 static struct i915_global_request {
@@ -343,6 +345,17 @@ static void irq_execute_cb(struct irq_work *wrk)
 	kmem_cache_free(global.slab_execute_cbs, cb);
 }
 
+static void irq_execute_cb_hook(struct irq_work *wrk)
+{
+	struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
+
+	cb->hook(container_of(cb->fence, struct i915_request, submit),
+		 &cb->signal->fence);
+	i915_request_put(cb->signal);
+
+	irq_execute_cb(wrk);
+}
+
 static void __notify_execute_cb(struct i915_request *rq)
 {
 	struct execute_cb *cb;
@@ -369,14 +382,19 @@ static void __notify_execute_cb(struct i915_request *rq)
 }
 
 static int
-i915_request_await_execution(struct i915_request *rq,
-			     struct i915_request *signal,
-			     gfp_t gfp)
+__i915_request_await_execution(struct i915_request *rq,
+			       struct i915_request *signal,
+			       void (*hook)(struct i915_request *rq,
+					    struct dma_fence *signal),
+			       gfp_t gfp)
 {
 	struct execute_cb *cb;
 
-	if (i915_request_is_active(signal))
+	if (i915_request_is_active(signal)) {
+		if (hook)
+			hook(rq, &signal->fence);
 		return 0;
+	}
 
 	cb = kmem_cache_alloc(global.slab_execute_cbs, gfp);
 	if (!cb)
@@ -386,8 +404,18 @@ i915_request_await_execution(struct i915_request *rq,
 	i915_sw_fence_await(cb->fence);
 	init_irq_work(&cb->work, irq_execute_cb);
 
+	if (hook) {
+		cb->hook = hook;
+		cb->signal = i915_request_get(signal);
+		cb->work.func = irq_execute_cb_hook;
+	}
+
 	spin_lock_irq(&signal->lock);
 	if (i915_request_is_active(signal)) {
+		if (hook) {
+			hook(rq, &signal->fence);
+			i915_request_put(signal);
+		}
 		i915_sw_fence_complete(cb->fence);
 		kmem_cache_free(global.slab_execute_cbs, cb);
 	} else {
@@ -790,7 +818,7 @@ emit_semaphore_wait(struct i915_request *to,
 		return err;
 
 	/* Only submit our spinner after the signaler is running! */
-	err = i915_request_await_execution(to, from, gfp);
+	err = __i915_request_await_execution(to, from, NULL, gfp);
 	if (err)
 		return err;
 
@@ -910,6 +938,52 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
 	return 0;
 }
 
+int
+i915_request_await_execution(struct i915_request *rq,
+			     struct dma_fence *fence,
+			     void (*hook)(struct i915_request *rq,
+					  struct dma_fence *signal))
+{
+	struct dma_fence **child = &fence;
+	unsigned int nchild = 1;
+	int ret;
+
+	if (dma_fence_is_array(fence)) {
+		struct dma_fence_array *array = to_dma_fence_array(fence);
+
+		/* XXX Error for signal-on-any fence arrays */
+
+		child = array->fences;
+		nchild = array->num_fences;
+		GEM_BUG_ON(!nchild);
+	}
+
+	do {
+		fence = *child++;
+		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+			continue;
+
+		/*
+		 * We don't squash repeated fence dependencies here as we
+		 * want to run our callback in all cases.
+		 */
+
+		if (dma_fence_is_i915(fence))
+			ret = __i915_request_await_execution(rq,
+							     to_request(fence),
+							     hook,
+							     I915_FENCE_GFP);
+		else
+			ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
+							    I915_FENCE_TIMEOUT,
+							    GFP_KERNEL);
+		if (ret < 0)
+			return ret;
+	} while (--nchild);
+
+	return 0;
+}
+
 /**
  * i915_request_await_object - set this request to (async) wait upon a bo
  * @to: request we are wishing to use
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index cd6c130964cd..d4f6b2940130 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -265,6 +265,10 @@ int i915_request_await_object(struct i915_request *to,
 			      bool write);
 int i915_request_await_dma_fence(struct i915_request *rq,
 				 struct dma_fence *fence);
+int i915_request_await_execution(struct i915_request *rq,
+				 struct dma_fence *fence,
+				 void (*hook)(struct i915_request *rq,
+					      struct dma_fence *signal));
 
 void i915_request_add(struct i915_request *rq);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 25/43] drm/i915/execlists: Virtual engine bonding
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (23 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 24/43] drm/i915: Extend execution fence to support a callback Chris Wilson
@ 2019-03-06 14:24 ` Chris Wilson
  2019-03-06 14:25 ` [PATCH 26/43] drm/i915: Allow specification of parallel execbuf Chris Wilson
                   ` (20 subsequent siblings)
  45 siblings, 0 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Some users require that when a master batch is executed on one particular
engine, a companion batch is run simultaneously on a specific slave
engine. For this purpose, we introduce virtual engine bonding, allowing
maps of master:slaves to be constructed to constrain which physical
engines a virtual engine may select given a fence on a master engine.

For the moment, we continue to ignore the issue of preemption deferring
the master request for later. Ideally, we would like to then also remove
the slave and run something else rather than have it stall the pipeline.
With load balancing, we should be able to move workload around it, but
there is a similar stall on the master pipeline while it may wait for
the slave to be executed. At the cost of more latency for the bonded
request, it may be interesting to launch both on their engines in
lockstep. (Bubbles abound.)

Opens: Also what about bonding an engine as its own master? It doesn't
break anything internally, so allow the silliness.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c    |  50 ++++++
 drivers/gpu/drm/i915/i915_request.c        |   1 +
 drivers/gpu/drm/i915/i915_request.h        |   1 +
 drivers/gpu/drm/i915/intel_engine_types.h  |   7 +
 drivers/gpu/drm/i915/intel_lrc.c           | 111 ++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.h           |   4 +
 drivers/gpu/drm/i915/selftests/intel_lrc.c | 167 +++++++++++++++++++++
 include/uapi/drm/i915_drm.h                |  22 +++
 8 files changed, 363 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index aa8076c06006..f6a7ecef392b 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1465,8 +1465,58 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data)
 	return 0;
 }
 
+static int
+set_engines__bond(struct i915_user_extension __user *base, void *data)
+{
+	struct i915_context_engines_bond __user *ext =
+		container_of_user(base, typeof(*ext), base);
+	const struct set_engines *set = data;
+	struct intel_engine_cs *master;
+	u32 class, instance, siblings;
+	u16 idx;
+	int err;
+
+	if (get_user(idx, &ext->engine_index))
+		return -EFAULT;
+
+	if (idx >= set->nengine)
+		return -EINVAL;
+
+	idx = array_index_nospec(idx, set->nengine);
+	if (!set->engines[idx])
+		return -EINVAL;
+
+	/*
+	 * A non-virtual engine has 0 siblings to choose between; and submit
+	 * fence will always be directed to the one engine.
+	 */
+	if (!intel_engine_is_virtual(set->engines[idx]))
+		return 0;
+
+	err = check_user_mbz16(&ext->mbz);
+	if (err)
+		return err;
+
+	if (get_user(class, &ext->master_class))
+		return -EFAULT;
+
+	if (get_user(instance, &ext->master_instance))
+		return -EFAULT;
+
+	master = intel_engine_lookup_user(set->ctx->i915, class, instance);
+	if (!master)
+		return -EINVAL;
+
+	if (get_user(siblings, &ext->sibling_mask))
+		return -EFAULT;
+
+	return intel_virtual_engine_attach_bond(set->engines[idx],
+						master, siblings);
+}
+
 static const i915_user_extension_fn set_engines__extensions[] = {
 	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
+	[I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
 };
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 4cd334462e21..d8c943984bd2 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -743,6 +743,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	rq->batch = NULL;
 	rq->capture_list = NULL;
 	rq->waitboost = false;
+	rq->execution_mask = ~0u;
 
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index d4f6b2940130..862b25930de0 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -145,6 +145,7 @@ struct i915_request {
 	 */
 	struct i915_sched_node sched;
 	struct i915_dependency dep;
+	unsigned int execution_mask;
 
 	/*
 	 * A convenience pointer to the current breadcrumb value stored in
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
index d54d2a1840cc..6dfcf5cc08c1 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -382,6 +382,13 @@ struct intel_engine_cs {
 	 */
 	void		(*submit_request)(struct i915_request *rq);
 
+	/*
+	 * Called on signaling of a SUBMIT_FENCE, passing along the signaling
+	 * request down to the bonded pairs.
+	 */
+	void            (*bond_execute)(struct i915_request *rq,
+					struct dma_fence *signal);
+
 	/*
 	 * Call when the priority on a request has changed and it and its
 	 * dependencies may need rescheduling. Note the request itself may
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 0c97e8f30223..f06312d185af 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -179,6 +179,12 @@ struct virtual_engine {
 		int prio;
 	} nodes[I915_NUM_ENGINES];
 
+	struct ve_bond {
+		struct intel_engine_cs *master;
+		unsigned int sibling_mask;
+	} *bonds;
+	unsigned int nbond;
+
 	unsigned int count;
 	struct intel_engine_cs *siblings[0];
 };
@@ -3183,6 +3189,7 @@ static const struct intel_context_ops virtual_context_ops = {
 static void virtual_submission_tasklet(unsigned long data)
 {
 	struct virtual_engine * const ve = (struct virtual_engine *)data;
+	unsigned int mask;
 	unsigned int n;
 	int prio;
 
@@ -3191,12 +3198,30 @@ static void virtual_submission_tasklet(unsigned long data)
 		return;
 
 	local_irq_disable();
+
+	mask = 0;
+	spin_lock(&ve->base.timeline.lock);
+	if (ve->request)
+		mask = ve->request->execution_mask;
+	spin_unlock(&ve->base.timeline.lock);
+
 	for (n = 0; READ_ONCE(ve->request) && n < ve->count; n++) {
 		struct intel_engine_cs *sibling = ve->siblings[n];
 		struct ve_node * const node = &ve->nodes[sibling->id];
 		struct rb_node **parent, *rb;
 		bool first;
 
+		if (unlikely(!(mask & sibling->mask))) {
+			if (!RB_EMPTY_NODE(&node->rb)) {
+				spin_lock(&sibling->timeline.lock);
+				rb_erase_cached(&node->rb,
+						&sibling->execlists.virtual);
+				RB_CLEAR_NODE(&node->rb);
+				spin_unlock(&sibling->timeline.lock);
+			}
+			continue;
+		}
+
 		spin_lock(&sibling->timeline.lock);
 
 		if (!RB_EMPTY_NODE(&node->rb)) {
@@ -3254,6 +3279,30 @@ static void virtual_submit_request(struct i915_request *request)
 	tasklet_schedule(&ve->base.execlists.tasklet);
 }
 
+static struct ve_bond *
+virtual_find_bond(struct virtual_engine *ve, struct intel_engine_cs *master)
+{
+	int i;
+
+	for (i = 0; i < ve->nbond; i++) {
+		if (ve->bonds[i].master == master)
+			return &ve->bonds[i];
+	}
+
+	return NULL;
+}
+
+static void
+virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
+{
+	struct virtual_engine *ve = to_virtual_engine(rq->engine);
+	struct ve_bond *bond;
+
+	bond = virtual_find_bond(ve, to_request(signal)->engine);
+	if (bond) /* XXX serialise with rq->lock? */
+		rq->execution_mask &= bond->sibling_mask;
+}
+
 struct intel_engine_cs *
 intel_execlists_create_virtual(struct i915_gem_context *ctx,
 			       struct intel_engine_cs **siblings,
@@ -3294,6 +3343,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
 
 	ve->base.schedule = i915_schedule;
 	ve->base.submit_request = virtual_submit_request;
+	ve->base.bond_execute = virtual_bond_execute;
 
 	ve->base.execlists.queue_priority_hint = INT_MIN;
 	tasklet_init(&ve->base.execlists.tasklet,
@@ -3369,9 +3419,70 @@ intel_execlists_clone_virtual(struct i915_gem_context *ctx,
 	if (IS_ERR(dst))
 		return dst;
 
+	if (se->nbond) {
+		struct virtual_engine *de = to_virtual_engine(dst);
+
+		de->bonds = kmemdup(se->bonds,
+				    sizeof(*se->bonds) * se->nbond,
+				    GFP_KERNEL);
+		if (!de->bonds) {
+			intel_virtual_engine_destroy(dst);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		de->nbond = se->nbond;
+	}
+
 	return dst;
 }
 
+static unsigned long
+virtual_execution_mask(struct virtual_engine *ve, unsigned long mask)
+{
+	unsigned long emask = 0;
+	int bit;
+
+	for_each_set_bit(bit, &mask, ve->count)
+		emask |= ve->siblings[bit]->mask;
+
+	return emask;
+}
+
+int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
+				     struct intel_engine_cs *master,
+				     unsigned long mask)
+{
+	struct virtual_engine *ve = to_virtual_engine(engine);
+	struct ve_bond *bond;
+
+	if (mask >> ve->count)
+		return -EINVAL;
+
+	mask = virtual_execution_mask(ve, mask);
+	if (!mask)
+		return -EINVAL;
+
+	bond = virtual_find_bond(ve, master);
+	if (bond) {
+		bond->sibling_mask |= mask;
+		return 0;
+	}
+
+	bond = krealloc(ve->bonds,
+			sizeof(*bond) * (ve->nbond + 1),
+			GFP_KERNEL);
+	if (!bond)
+		return -ENOMEM;
+
+	bond[ve->nbond].master = master;
+	bond[ve->nbond].sibling_mask = mask;
+
+	ve->bonds = bond;
+	ve->nbond++;
+
+	return 0;
+}
+
 void intel_virtual_engine_destroy(struct intel_engine_cs *engine)
 {
 	struct virtual_engine *ve = to_virtual_engine(engine);
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 9d90dc68e02b..77b85648045a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -121,6 +121,10 @@ struct intel_engine_cs *
 intel_execlists_clone_virtual(struct i915_gem_context *ctx,
 			      struct intel_engine_cs *src);
 
+int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
+				     struct intel_engine_cs *master,
+				     unsigned long mask);
+
 void intel_virtual_engine_destroy(struct intel_engine_cs *engine);
 
 u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu);
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 7c0e95527bcb..b6329280a59f 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -13,6 +13,7 @@
 #include "igt_live_test.h"
 #include "igt_spinner.h"
 #include "i915_random.h"
+#include "lib_sw_fence.h"
 
 #include "mock_context.h"
 
@@ -1206,6 +1207,171 @@ static int live_virtual_engine(void *arg)
 	return err;
 }
 
+static int bond_virtual_engine(struct drm_i915_private *i915,
+			       unsigned int class,
+			       struct intel_engine_cs **siblings,
+			       unsigned int nsibling,
+			       unsigned int flags)
+#define BOND_SCHEDULE BIT(0)
+{
+	struct intel_engine_cs *master;
+	struct i915_gem_context *ctx;
+	struct i915_request *rq[16];
+	enum intel_engine_id id;
+	unsigned long n;
+	int err;
+
+	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
+
+	ctx = kernel_context(i915);
+	if (!ctx)
+		return -ENOMEM;
+
+	err = 0;
+	rq[0] = ERR_PTR(-ENOMEM);
+	for_each_engine(master, i915, id) {
+		struct i915_sw_fence fence;
+
+		if (master->class == class)
+			continue;
+
+		rq[0] = i915_request_alloc(master, ctx);
+		if (IS_ERR(rq[0])) {
+			err = PTR_ERR(rq[0]);
+			goto out;
+		}
+
+		if (flags & BOND_SCHEDULE)
+			onstack_fence_init(&fence);
+
+		i915_request_get(rq[0]);
+		i915_request_add(rq[0]);
+
+		for (n = 0; n < nsibling; n++) {
+			struct intel_engine_cs *engine;
+
+			engine = intel_execlists_create_virtual(ctx,
+								siblings,
+								nsibling);
+			if (IS_ERR(engine)) {
+				err = PTR_ERR(engine);
+				goto out;
+			}
+
+			err = intel_virtual_engine_attach_bond(engine,
+							       master,
+							       BIT(n));
+			if (err) {
+				intel_virtual_engine_destroy(engine);
+				goto out;
+			}
+
+			rq[n + 1] = i915_request_alloc(engine, ctx);
+			if (IS_ERR(rq[n + 1])) {
+				err = PTR_ERR(rq[n + 1]);
+				intel_virtual_engine_destroy(engine);
+				goto out;
+			}
+			i915_request_get(rq[n + 1]);
+
+			err = i915_request_await_execution(rq[n + 1],
+							   &rq[0]->fence,
+							   engine->bond_execute);
+			i915_request_add(rq[n + 1]);
+			intel_virtual_engine_destroy(engine);
+			if (err < 0)
+				goto out;
+		}
+		rq[n + 1] = ERR_PTR(-EINVAL);
+
+		if (flags & BOND_SCHEDULE)
+			onstack_fence_fini(&fence);
+
+		for (n = 0; n < nsibling; n++) {
+			if (i915_request_wait(rq[n + 1],
+					      I915_WAIT_LOCKED,
+					      MAX_SCHEDULE_TIMEOUT) < 0) {
+				err = -EIO;
+				goto out;
+			}
+
+			if (rq[n + 1]->engine != siblings[n]) {
+				pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
+				       siblings[n]->name,
+				       rq[n + 1]->engine->name,
+				       rq[0]->engine->name);
+				err = -EINVAL;
+				goto out;
+			}
+		}
+
+		for (n = 0; !IS_ERR(rq[n]); n++)
+			i915_request_put(rq[n]);
+		rq[0] = ERR_PTR(-ENOMEM);
+	}
+
+out:
+	for (n = 0; !IS_ERR(rq[n]); n++)
+		i915_request_put(rq[n]);
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	kernel_context_close(ctx);
+	return err;
+}
+
+static int live_virtual_bond(void *arg)
+{
+	static const struct phase {
+		const char *name;
+		unsigned int flags;
+	} phases[] = {
+		{ "", 0 },
+		{ "schedule", BOND_SCHEDULE },
+		{ },
+	};
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	unsigned int class, inst;
+	int err = 0;
+
+	if (USES_GUC_SUBMISSION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+		const struct phase *p;
+		int nsibling;
+
+		nsibling = 0;
+		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+			if (!i915->engine_class[class][inst])
+				break;
+
+			GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
+			siblings[nsibling++] = i915->engine_class[class][inst];
+		}
+		if (nsibling < 2)
+			continue;
+
+		for (p = phases; p->name; p++) {
+			err = bond_virtual_engine(i915,
+						  class, siblings, nsibling,
+						  p->flags);
+			if (err) {
+				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
+				       __func__, p->name, class, nsibling, err);
+				goto out_unlock;
+			}
+		}
+	}
+
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -1218,6 +1384,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_preempt_hang),
 		SUBTEST(live_preempt_smoke),
 		SUBTEST(live_virtual_engine),
+		SUBTEST(live_virtual_bond),
 	};
 
 	if (!HAS_EXECLISTS(i915))
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 102a2f9de443..c9683b19aaf2 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1530,6 +1530,10 @@ struct drm_i915_gem_context_param {
  * sized argument, will revert back to default settings.
  *
  * See struct i915_context_param_engines.
+ *
+ * Extensions:
+ *   i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
+ *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
  */
 #define I915_CONTEXT_PARAM_ENGINES	0xa
 /* Must be kept compact -- no holes and well documented */
@@ -1625,9 +1629,27 @@ struct i915_context_engines_load_balance {
 	__u64 mbz64[4]; /* reserved for future use; must be zero */
 };
 
+/*
+ * i915_context_engines_bond:
+ *
+ */
+struct i915_context_engines_bond {
+	struct i915_user_extension base;
+
+	__u16 engine_index;
+	__u16 mbz;
+
+	__u16 master_class;
+	__u16 master_instance;
+
+	__u64 sibling_mask;
+	__u64 flags; /* all undefined flags must be zero */
+};
+
 struct i915_context_param_engines {
 	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
 #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0
+#define I915_CONTEXT_ENGINES_EXT_BOND 1
 
 	struct {
 		__u16 engine_class; /* see enum drm_i915_gem_engine_class */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 26/43] drm/i915: Allow specification of parallel execbuf
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (24 preceding siblings ...)
  2019-03-06 14:24 ` [PATCH 25/43] drm/i915/execlists: Virtual engine bonding Chris Wilson
@ 2019-03-06 14:25 ` Chris Wilson
  2019-03-06 14:25 ` [PATCH 27/43] drm/i915/selftests: Check preemption support on each engine Chris Wilson
                   ` (19 subsequent siblings)
  45 siblings, 0 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:25 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

There is a desire to split a task onto two engines and have them run at
the same time, e.g. scanline interleaving to spread the workload evenly.
Through the use of the out-fence from the first execbuf, we can
coordinate secondary execbuf to only become ready simultaneously with
the first, so that with all things idle the second execbufs are executed
in parallel with the first. The key difference here between the new
EXEC_FENCE_SUBMIT and the existing EXEC_FENCE_IN is that the in-fence
waits for the completion of the first request (so that all of its
rendering results are visible to the second execbuf, the more common
userspace fence requirement).

Since we only have a single input fence slot, userspace cannot mix an
in-fence and a submit-fence. It has to use one or the other! This is not
such a harsh requirement, since by virtue of the submit-fence, the
secondary execbuf inherit all of the dependencies from the first
request, and for the application the dependencies should be common
between the primary and secondary execbuf.

Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Testcase: igt/gem_exec_fence/parallel
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c            |  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 25 +++++++++++++++++++++-
 include/uapi/drm/i915_drm.h                | 17 ++++++++++++++-
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index a54030d60f84..8e65def8bcc5 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -421,6 +421,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_CAPTURE:
 	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
 	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
+	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 8fc5bbdba9fa..87f059e418ab 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2266,6 +2266,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 {
 	struct i915_execbuffer eb;
 	struct dma_fence *in_fence = NULL;
+	struct dma_fence *exec_fence = NULL;
 	struct sync_file *out_fence = NULL;
 	intel_wakeref_t wakeref;
 	int out_fence_fd = -1;
@@ -2309,11 +2310,24 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 			return -EINVAL;
 	}
 
+	if (args->flags & I915_EXEC_FENCE_SUBMIT) {
+		if (in_fence) {
+			err = -EINVAL;
+			goto err_in_fence;
+		}
+
+		exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+		if (!exec_fence) {
+			err = -EINVAL;
+			goto err_in_fence;
+		}
+	}
+
 	if (args->flags & I915_EXEC_FENCE_OUT) {
 		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
 		if (out_fence_fd < 0) {
 			err = out_fence_fd;
-			goto err_in_fence;
+			goto err_exec_fence;
 		}
 	}
 
@@ -2445,6 +2459,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 			goto err_request;
 	}
 
+	if (exec_fence) {
+		err = i915_request_await_execution(eb.request, exec_fence,
+						   eb.engine->bond_execute);
+		if (err < 0)
+			goto err_request;
+	}
+
 	if (fences) {
 		err = await_fence_array(&eb, fences);
 		if (err)
@@ -2505,6 +2526,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_out_fence:
 	if (out_fence_fd != -1)
 		put_unused_fd(out_fence_fd);
+err_exec_fence:
+	dma_fence_put(exec_fence);
 err_in_fence:
 	dma_fence_put(in_fence);
 	return err;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index c9683b19aaf2..677d435ac64b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -591,6 +591,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_MMAP_GTT_COHERENT	52
 
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel
+ * execution through use of explicit fence support.
+ * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
+ */
+#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1113,7 +1119,16 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_FENCE_ARRAY   (1<<19)
 
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
+/*
+ * Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent
+ * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
+ * the batch.
+ *
+ * Returns -EINVAL if the sync_file fd cannot be found.
+ */
+#define I915_EXEC_FENCE_SUBMIT		(1 << 20)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 27/43] drm/i915/selftests: Check preemption support on each engine
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (25 preceding siblings ...)
  2019-03-06 14:25 ` [PATCH 26/43] drm/i915: Allow specification of parallel execbuf Chris Wilson
@ 2019-03-06 14:25 ` Chris Wilson
  2019-03-08  9:31   ` Tvrtko Ursulin
  2019-03-06 14:25 ` [PATCH 28/43] drm/i915/execlists: Skip direct submission if only lite-restore Chris Wilson
                   ` (18 subsequent siblings)
  45 siblings, 1 reply; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:25 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Check that we have setup on preemption for the engine before testing,
instead warn if it is not enabled on supported HW.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/selftests/intel_lrc.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index b6329280a59f..a7de7a8fc24a 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -90,6 +90,9 @@ static int live_preempt(void *arg)
 	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
 		return 0;
 
+	if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
+		pr_err("Logical preemption supported, but not exposed\n");
+
 	mutex_lock(&i915->drm.struct_mutex);
 	wakeref = intel_runtime_pm_get(i915);
 
@@ -114,6 +117,9 @@ static int live_preempt(void *arg)
 	for_each_engine(engine, i915, id) {
 		struct i915_request *rq;
 
+		if (!intel_engine_has_preemption(engine))
+			continue;
+
 		rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine,
 						MI_ARB_CHECK);
 		if (IS_ERR(rq)) {
@@ -205,6 +211,9 @@ static int live_late_preempt(void *arg)
 	for_each_engine(engine, i915, id) {
 		struct i915_request *rq;
 
+		if (!intel_engine_has_preemption(engine))
+			continue;
+
 		rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine,
 						MI_ARB_CHECK);
 		if (IS_ERR(rq)) {
@@ -337,6 +346,9 @@ static int live_suppress_self_preempt(void *arg)
 		struct i915_request *rq_a, *rq_b;
 		int depth;
 
+		if (!intel_engine_has_preemption(engine))
+			continue;
+
 		engine->execlists.preempt_hang.count = 0;
 
 		rq_a = igt_spinner_create_request(&a.spin,
@@ -483,6 +495,9 @@ static int live_suppress_wait_preempt(void *arg)
 	for_each_engine(engine, i915, id) {
 		int depth;
 
+		if (!intel_engine_has_preemption(engine))
+			continue;
+
 		if (!engine->emit_init_breadcrumb)
 			continue;
 
@@ -604,6 +619,9 @@ static int live_chain_preempt(void *arg)
 		};
 		int count, i;
 
+		if (!intel_engine_has_preemption(engine))
+			continue;
+
 		for_each_prime_number_from(count, 1, 32) { /* must fit ring! */
 			struct i915_request *rq;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 28/43] drm/i915/execlists: Skip direct submission if only lite-restore
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (26 preceding siblings ...)
  2019-03-06 14:25 ` [PATCH 27/43] drm/i915/selftests: Check preemption support on each engine Chris Wilson
@ 2019-03-06 14:25 ` Chris Wilson
  2019-03-06 14:25 ` [PATCH 29/43] drm/i915: Split GEM object type definition to its own header Chris Wilson
                   ` (17 subsequent siblings)
  45 siblings, 0 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:25 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

If resubmitting the active context, simply skip the submission as
performing the submission from the interrupt handler has higher
throughput than continually provoking lite-restores. If however, we find
ourselves with a new client, we check whether or not we can dequeue into
the second port or to resolve preemption.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f06312d185af..93701a8a79be 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1396,12 +1396,26 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
 		tasklet_hi_schedule(&execlists->tasklet);
 }
 
-static void submit_queue(struct intel_engine_cs *engine, int prio)
+static bool inflight(const struct intel_engine_execlists *execlists,
+		     const struct i915_request *rq)
 {
-	if (prio > engine->execlists.queue_priority_hint) {
-		engine->execlists.queue_priority_hint = prio;
+	const struct i915_request *active = port_request(execlists->port);
+
+	return active && active->hw_context == rq->hw_context;
+}
+
+static void submit_queue(struct intel_engine_cs *engine,
+			 const struct i915_request *rq)
+{
+	struct intel_engine_execlists *execlists = &engine->execlists;
+
+	if (rq_prio(rq) <= execlists->queue_priority_hint)
+		return;
+
+	execlists->queue_priority_hint = rq_prio(rq);
+
+	if (!inflight(execlists, rq))
 		__submit_queue_imm(engine);
-	}
 }
 
 static void execlists_submit_request(struct i915_request *request)
@@ -1417,7 +1431,7 @@ static void execlists_submit_request(struct i915_request *request)
 	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 	GEM_BUG_ON(list_empty(&request->sched.link));
 
-	submit_queue(engine, rq_prio(request));
+	submit_queue(engine, request);
 
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 29/43] drm/i915: Split GEM object type definition to its own header
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (27 preceding siblings ...)
  2019-03-06 14:25 ` [PATCH 28/43] drm/i915/execlists: Skip direct submission if only lite-restore Chris Wilson
@ 2019-03-06 14:25 ` Chris Wilson
  2019-03-06 15:46   ` Matthew Auld
  2019-03-06 14:25 ` [PATCH 30/43] drm/i915: Pull GEM ioctls interface to its own file Chris Wilson
                   ` (16 subsequent siblings)
  45 siblings, 1 reply; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:25 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

For convenience in avoiding inline spaghetti, keep the type definition
as a separate header.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |   3 +-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 285 +++++++++++++++++
 .../test_i915_gem_object_types_standalone.c   |   7 +
 drivers/gpu/drm/i915/i915_drv.h               |   3 +-
 drivers/gpu/drm/i915/i915_gem_batch_pool.h    |   3 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h           |   1 +
 drivers/gpu/drm/i915/i915_gem_object.h        | 295 +-----------------
 drivers/gpu/drm/i915/intel_engine_types.h     |   1 +
 8 files changed, 303 insertions(+), 295 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_object_types.h
 create mode 100644 drivers/gpu/drm/i915/gem/test_i915_gem_object_types_standalone.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 60de05f3fa60..87fb8c21510e 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -59,6 +59,7 @@ i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 
 # Test the headers are compilable as standalone units
 i915-$(CONFIG_DRM_I915_WERROR) += \
+	gem/test_i915_gem_object_types_standalone.o \
 	test_i915_active_types_standalone.o \
 	test_i915_gem_context_types_standalone.o \
 	test_i915_timeline_types_standalone.o \
@@ -102,7 +103,7 @@ i915-y += \
 	  intel_mocs.o \
 	  intel_ringbuffer.o \
 	  intel_uncore.o \
-	  intel_wopcm.o
+	  intel_wopcm.o \
 
 # general-purpose microcontroller (GuC) support
 i915-y += intel_uc.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
new file mode 100644
index 000000000000..e4b50944f553
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -0,0 +1,285 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_GEM_OBJECT_TYPES_H__
+#define __I915_GEM_OBJECT_TYPES_H__
+
+#include <linux/reservation.h>
+
+#include <drm/drm_gem.h>
+
+#include "../i915_active.h"
+#include "../i915_selftest.h"
+
+struct drm_i915_gem_object;
+
+/*
+ * struct i915_lut_handle tracks the fast lookups from handle to vma used
+ * for execbuf. Although we use a radixtree for that mapping, in order to
+ * remove them as the object or context is closed, we need a secondary list
+ * and a translation entry (i915_lut_handle).
+ */
+struct i915_lut_handle {
+	struct list_head obj_link;
+	struct list_head ctx_link;
+	struct i915_gem_context *ctx;
+	u32 handle;
+};
+
+struct drm_i915_gem_object_ops {
+	unsigned int flags;
+#define I915_GEM_OBJECT_HAS_STRUCT_PAGE	BIT(0)
+#define I915_GEM_OBJECT_IS_SHRINKABLE	BIT(1)
+#define I915_GEM_OBJECT_IS_PROXY	BIT(2)
+#define I915_GEM_OBJECT_ASYNC_CANCEL	BIT(3)
+
+	/* Interface between the GEM object and its backing storage.
+	 * get_pages() is called once prior to the use of the associated set
+	 * of pages before to binding them into the GTT, and put_pages() is
+	 * called after we no longer need them. As we expect there to be
+	 * associated cost with migrating pages between the backing storage
+	 * and making them available for the GPU (e.g. clflush), we may hold
+	 * onto the pages after they are no longer referenced by the GPU
+	 * in case they may be used again shortly (for example migrating the
+	 * pages to a different memory domain within the GTT). put_pages()
+	 * will therefore most likely be called when the object itself is
+	 * being released or under memory pressure (where we attempt to
+	 * reap pages for the shrinker).
+	 */
+	int (*get_pages)(struct drm_i915_gem_object *obj);
+	void (*put_pages)(struct drm_i915_gem_object *obj,
+			  struct sg_table *pages);
+
+	int (*pwrite)(struct drm_i915_gem_object *obj,
+		      const struct drm_i915_gem_pwrite *arg);
+
+	int (*dmabuf_export)(struct drm_i915_gem_object *obj);
+	void (*release)(struct drm_i915_gem_object *obj);
+};
+
+struct drm_i915_gem_object {
+	struct drm_gem_object base;
+
+	const struct drm_i915_gem_object_ops *ops;
+
+	struct {
+		/**
+		 * @vma.lock: protect the list/tree of vmas
+		 */
+		spinlock_t lock;
+
+		/**
+		 * @vma.list: List of VMAs backed by this object
+		 *
+		 * The VMA on this list are ordered by type, all GGTT vma are
+		 * placed at the head and all ppGTT vma are placed at the tail.
+		 * The different types of GGTT vma are unordered between
+		 * themselves, use the @vma.tree (which has a defined order
+		 * between all VMA) to quickly find an exact match.
+		 */
+		struct list_head list;
+
+		/**
+		 * @vma.tree: Ordered tree of VMAs backed by this object
+		 *
+		 * All VMA created for this object are placed in the @vma.tree
+		 * for fast retrieval via a binary search in
+		 * i915_vma_instance(). They are also added to @vma.list for
+		 * easy iteration.
+		 */
+		struct rb_root tree;
+	} vma;
+
+	/**
+	 * @lut_list: List of vma lookup entries in use for this object.
+	 *
+	 * If this object is closed, we need to remove all of its VMA from
+	 * the fast lookup index in associated contexts; @lut_list provides
+	 * this translation from object to context->handles_vma.
+	 */
+	struct list_head lut_list;
+
+	/** Stolen memory for this object, instead of being backed by shmem. */
+	struct drm_mm_node *stolen;
+	union {
+		struct rcu_head rcu;
+		struct llist_node freed;
+	};
+
+	/**
+	 * Whether the object is currently in the GGTT mmap.
+	 */
+	unsigned int userfault_count;
+	struct list_head userfault_link;
+
+	struct list_head batch_pool_link;
+	I915_SELFTEST_DECLARE(struct list_head st_link);
+
+	unsigned long flags;
+
+	/**
+	 * Have we taken a reference for the object for incomplete GPU
+	 * activity?
+	 */
+#define I915_BO_ACTIVE_REF 0
+
+	/*
+	 * Is the object to be mapped as read-only to the GPU
+	 * Only honoured if hardware has relevant pte bit
+	 */
+	unsigned int cache_level:3;
+	unsigned int cache_coherent:2;
+#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
+#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
+	unsigned int cache_dirty:1;
+
+	/**
+	 * @read_domains: Read memory domains.
+	 *
+	 * These monitor which caches contain read/write data related to the
+	 * object. When transitioning from one set of domains to another,
+	 * the driver is called to ensure that caches are suitably flushed and
+	 * invalidated.
+	 */
+	u16 read_domains;
+
+	/**
+	 * @write_domain: Corresponding unique write memory domain.
+	 */
+	u16 write_domain;
+
+	atomic_t frontbuffer_bits;
+	unsigned int frontbuffer_ggtt_origin; /* write once */
+	struct i915_active_request frontbuffer_write;
+
+	/** Current tiling stride for the object, if it's tiled. */
+	unsigned int tiling_and_stride;
+#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
+#define TILING_MASK (FENCE_MINIMUM_STRIDE - 1)
+#define STRIDE_MASK (~TILING_MASK)
+
+	/** Count of VMA actually bound by this object */
+	unsigned int bind_count;
+	unsigned int active_count;
+	/** Count of how many global VMA are currently pinned for use by HW */
+	unsigned int pin_global;
+
+	struct {
+		struct mutex lock; /* protects the pages and their use */
+		atomic_t pages_pin_count;
+
+		struct sg_table *pages;
+		void *mapping;
+
+		/* TODO: whack some of this into the error state */
+		struct i915_page_sizes {
+			/**
+			 * The sg mask of the pages sg_table. i.e the mask of
+			 * of the lengths for each sg entry.
+			 */
+			unsigned int phys;
+
+			/**
+			 * The gtt page sizes we are allowed to use given the
+			 * sg mask and the supported page sizes. This will
+			 * express the smallest unit we can use for the whole
+			 * object, as well as the larger sizes we may be able
+			 * to use opportunistically.
+			 */
+			unsigned int sg;
+
+			/**
+			 * The actual gtt page size usage. Since we can have
+			 * multiple vma associated with this object we need to
+			 * prevent any trampling of state, hence a copy of this
+			 * struct also lives in each vma, therefore the gtt
+			 * value here should only be read/write through the vma.
+			 */
+			unsigned int gtt;
+		} page_sizes;
+
+		I915_SELFTEST_DECLARE(unsigned int page_mask);
+
+		struct i915_gem_object_page_iter {
+			struct scatterlist *sg_pos;
+			unsigned int sg_idx; /* in pages, but 32bit eek! */
+
+			struct radix_tree_root radix;
+			struct mutex lock; /* protects this cache */
+		} get_page;
+
+		/**
+		 * Element within i915->mm.unbound_list or i915->mm.bound_list,
+		 * locked by i915->mm.obj_lock.
+		 */
+		struct list_head link;
+
+		/**
+		 * Advice: are the backing pages purgeable?
+		 */
+		unsigned int madv:2;
+
+		/**
+		 * This is set if the object has been written to since the
+		 * pages were last acquired.
+		 */
+		bool dirty:1;
+
+		/**
+		 * This is set if the object has been pinned due to unknown
+		 * swizzling.
+		 */
+		bool quirked:1;
+	} mm;
+
+	/** Breadcrumb of last rendering to the buffer.
+	 * There can only be one writer, but we allow for multiple readers.
+	 * If there is a writer that necessarily implies that all other
+	 * read requests are complete - but we may only be lazily clearing
+	 * the read requests. A read request is naturally the most recent
+	 * request on a ring, so we may have two different write and read
+	 * requests on one ring where the write request is older than the
+	 * read request. This allows for the CPU to read from an active
+	 * buffer by only waiting for the write to complete.
+	 */
+	struct reservation_object *resv;
+
+	/** References from framebuffers, locks out tiling changes. */
+	unsigned int framebuffer_references;
+
+	/** Record of address bit 17 of each page at last unbind. */
+	unsigned long *bit_17;
+
+	union {
+		struct i915_gem_userptr {
+			uintptr_t ptr;
+
+			struct i915_mm_struct *mm;
+			struct i915_mmu_object *mmu_object;
+			struct work_struct *work;
+		} userptr;
+
+		unsigned long scratch;
+
+		void *gvt_info;
+	};
+
+	/** for phys allocated objects */
+	struct drm_dma_handle *phys_handle;
+
+	struct reservation_object __builtin_resv;
+};
+
+static inline struct drm_i915_gem_object *
+to_intel_bo(struct drm_gem_object *gem)
+{
+	/* Assert that to_intel_bo(NULL) == NULL */
+	BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
+
+	return container_of(gem, struct drm_i915_gem_object, base);
+}
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/test_i915_gem_object_types_standalone.c b/drivers/gpu/drm/i915/gem/test_i915_gem_object_types_standalone.c
new file mode 100644
index 000000000000..57b0c9290035
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/test_i915_gem_object_types_standalone.c
@@ -0,0 +1,7 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_gem_object_types.h"
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9de4080c47a6..d32ce1937e68 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -77,7 +77,6 @@
 #include "i915_gem.h"
 #include "i915_gem_context.h"
 #include "i915_gem_fence_reg.h"
-#include "i915_gem_object.h"
 #include "i915_gem_gtt.h"
 #include "i915_gpu_error.h"
 #include "i915_request.h"
@@ -134,6 +133,8 @@ bool i915_error_injected(void);
 
 typedef depot_stack_handle_t intel_wakeref_t;
 
+struct drm_i915_gem_object;
+
 enum hpd_pin {
 	HPD_NONE = 0,
 	HPD_TV = HPD_NONE,     /* TV is known to be unreliable */
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
index 56947daaaf65..feeeeeaa54d8 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
@@ -9,6 +9,7 @@
 
 #include <linux/types.h>
 
+struct drm_i915_gem_object;
 struct intel_engine_cs;
 
 struct i915_gem_batch_pool {
@@ -19,7 +20,7 @@ struct i915_gem_batch_pool {
 void i915_gem_batch_pool_init(struct i915_gem_batch_pool *pool,
 			      struct intel_engine_cs *engine);
 void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
-struct drm_i915_gem_object*
+struct drm_i915_gem_object *
 i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
 
 #endif /* I915_GEM_BATCH_POOL_H */
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 25d5f7682bda..ae9dd0c7b3cb 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -61,6 +61,7 @@
 
 struct drm_i915_file_private;
 struct drm_i915_fence_reg;
+struct drm_i915_gem_object;
 struct i915_vma;
 
 typedef u32 gen6_pte_t;
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 1a24dc97e4fd..509210b1945b 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -1,308 +1,19 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
 #ifndef __I915_GEM_OBJECT_H__
 #define __I915_GEM_OBJECT_H__
 
-#include <linux/reservation.h>
-
-#include <drm/drm_vma_manager.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_file.h>
 #include <drm/drm_device.h>
 
 #include <drm/i915_drm.h>
 
-#include "i915_request.h"
-#include "i915_selftest.h"
-
-struct drm_i915_gem_object;
-
-/*
- * struct i915_lut_handle tracks the fast lookups from handle to vma used
- * for execbuf. Although we use a radixtree for that mapping, in order to
- * remove them as the object or context is closed, we need a secondary list
- * and a translation entry (i915_lut_handle).
- */
-struct i915_lut_handle {
-	struct list_head obj_link;
-	struct list_head ctx_link;
-	struct i915_gem_context *ctx;
-	u32 handle;
-};
-
-struct drm_i915_gem_object_ops {
-	unsigned int flags;
-#define I915_GEM_OBJECT_HAS_STRUCT_PAGE	BIT(0)
-#define I915_GEM_OBJECT_IS_SHRINKABLE	BIT(1)
-#define I915_GEM_OBJECT_IS_PROXY	BIT(2)
-#define I915_GEM_OBJECT_ASYNC_CANCEL	BIT(3)
-
-	/* Interface between the GEM object and its backing storage.
-	 * get_pages() is called once prior to the use of the associated set
-	 * of pages before to binding them into the GTT, and put_pages() is
-	 * called after we no longer need them. As we expect there to be
-	 * associated cost with migrating pages between the backing storage
-	 * and making them available for the GPU (e.g. clflush), we may hold
-	 * onto the pages after they are no longer referenced by the GPU
-	 * in case they may be used again shortly (for example migrating the
-	 * pages to a different memory domain within the GTT). put_pages()
-	 * will therefore most likely be called when the object itself is
-	 * being released or under memory pressure (where we attempt to
-	 * reap pages for the shrinker).
-	 */
-	int (*get_pages)(struct drm_i915_gem_object *);
-	void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
-
-	int (*pwrite)(struct drm_i915_gem_object *,
-		      const struct drm_i915_gem_pwrite *);
-
-	int (*dmabuf_export)(struct drm_i915_gem_object *);
-	void (*release)(struct drm_i915_gem_object *);
-};
-
-struct drm_i915_gem_object {
-	struct drm_gem_object base;
-
-	const struct drm_i915_gem_object_ops *ops;
-
-	struct {
-		/**
-		 * @vma.lock: protect the list/tree of vmas
-		 */
-		spinlock_t lock;
-
-		/**
-		 * @vma.list: List of VMAs backed by this object
-		 *
-		 * The VMA on this list are ordered by type, all GGTT vma are
-		 * placed at the head and all ppGTT vma are placed at the tail.
-		 * The different types of GGTT vma are unordered between
-		 * themselves, use the @vma.tree (which has a defined order
-		 * between all VMA) to quickly find an exact match.
-		 */
-		struct list_head list;
-
-		/**
-		 * @vma.tree: Ordered tree of VMAs backed by this object
-		 *
-		 * All VMA created for this object are placed in the @vma.tree
-		 * for fast retrieval via a binary search in
-		 * i915_vma_instance(). They are also added to @vma.list for
-		 * easy iteration.
-		 */
-		struct rb_root tree;
-	} vma;
-
-	/**
-	 * @lut_list: List of vma lookup entries in use for this object.
-	 *
-	 * If this object is closed, we need to remove all of its VMA from
-	 * the fast lookup index in associated contexts; @lut_list provides
-	 * this translation from object to context->handles_vma.
-	 */
-	struct list_head lut_list;
-
-	/** Stolen memory for this object, instead of being backed by shmem. */
-	struct drm_mm_node *stolen;
-	union {
-		struct rcu_head rcu;
-		struct llist_node freed;
-	};
-
-	/**
-	 * Whether the object is currently in the GGTT mmap.
-	 */
-	unsigned int userfault_count;
-	struct list_head userfault_link;
-
-	struct list_head batch_pool_link;
-	I915_SELFTEST_DECLARE(struct list_head st_link);
-
-	unsigned long flags;
-
-	/**
-	 * Have we taken a reference for the object for incomplete GPU
-	 * activity?
-	 */
-#define I915_BO_ACTIVE_REF 0
-
-	/*
-	 * Is the object to be mapped as read-only to the GPU
-	 * Only honoured if hardware has relevant pte bit
-	 */
-	unsigned int cache_level:3;
-	unsigned int cache_coherent:2;
-#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
-#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
-	unsigned int cache_dirty:1;
-
-	/**
-	 * @read_domains: Read memory domains.
-	 *
-	 * These monitor which caches contain read/write data related to the
-	 * object. When transitioning from one set of domains to another,
-	 * the driver is called to ensure that caches are suitably flushed and
-	 * invalidated.
-	 */
-	u16 read_domains;
-
-	/**
-	 * @write_domain: Corresponding unique write memory domain.
-	 */
-	u16 write_domain;
-
-	atomic_t frontbuffer_bits;
-	unsigned int frontbuffer_ggtt_origin; /* write once */
-	struct i915_active_request frontbuffer_write;
-
-	/** Current tiling stride for the object, if it's tiled. */
-	unsigned int tiling_and_stride;
-#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
-#define TILING_MASK (FENCE_MINIMUM_STRIDE-1)
-#define STRIDE_MASK (~TILING_MASK)
-
-	/** Count of VMA actually bound by this object */
-	unsigned int bind_count;
-	unsigned int active_count;
-	/** Count of how many global VMA are currently pinned for use by HW */
-	unsigned int pin_global;
-
-	struct {
-		struct mutex lock; /* protects the pages and their use */
-		atomic_t pages_pin_count;
-
-		struct sg_table *pages;
-		void *mapping;
-
-		/* TODO: whack some of this into the error state */
-		struct i915_page_sizes {
-			/**
-			 * The sg mask of the pages sg_table. i.e the mask of
-			 * of the lengths for each sg entry.
-			 */
-			unsigned int phys;
-
-			/**
-			 * The gtt page sizes we are allowed to use given the
-			 * sg mask and the supported page sizes. This will
-			 * express the smallest unit we can use for the whole
-			 * object, as well as the larger sizes we may be able
-			 * to use opportunistically.
-			 */
-			unsigned int sg;
-
-			/**
-			 * The actual gtt page size usage. Since we can have
-			 * multiple vma associated with this object we need to
-			 * prevent any trampling of state, hence a copy of this
-			 * struct also lives in each vma, therefore the gtt
-			 * value here should only be read/write through the vma.
-			 */
-			unsigned int gtt;
-		} page_sizes;
-
-		I915_SELFTEST_DECLARE(unsigned int page_mask);
-
-		struct i915_gem_object_page_iter {
-			struct scatterlist *sg_pos;
-			unsigned int sg_idx; /* in pages, but 32bit eek! */
-
-			struct radix_tree_root radix;
-			struct mutex lock; /* protects this cache */
-		} get_page;
-
-		/**
-		 * Element within i915->mm.unbound_list or i915->mm.bound_list,
-		 * locked by i915->mm.obj_lock.
-		 */
-		struct list_head link;
-
-		/**
-		 * Advice: are the backing pages purgeable?
-		 */
-		unsigned int madv:2;
-
-		/**
-		 * This is set if the object has been written to since the
-		 * pages were last acquired.
-		 */
-		bool dirty:1;
-
-		/**
-		 * This is set if the object has been pinned due to unknown
-		 * swizzling.
-		 */
-		bool quirked:1;
-	} mm;
-
-	/** Breadcrumb of last rendering to the buffer.
-	 * There can only be one writer, but we allow for multiple readers.
-	 * If there is a writer that necessarily implies that all other
-	 * read requests are complete - but we may only be lazily clearing
-	 * the read requests. A read request is naturally the most recent
-	 * request on a ring, so we may have two different write and read
-	 * requests on one ring where the write request is older than the
-	 * read request. This allows for the CPU to read from an active
-	 * buffer by only waiting for the write to complete.
-	 */
-	struct reservation_object *resv;
-
-	/** References from framebuffers, locks out tiling changes. */
-	unsigned int framebuffer_references;
-
-	/** Record of address bit 17 of each page at last unbind. */
-	unsigned long *bit_17;
-
-	union {
-		struct i915_gem_userptr {
-			uintptr_t ptr;
-
-			struct i915_mm_struct *mm;
-			struct i915_mmu_object *mmu_object;
-			struct work_struct *work;
-		} userptr;
-
-		unsigned long scratch;
-
-		void *gvt_info;
-	};
-
-	/** for phys allocated objects */
-	struct drm_dma_handle *phys_handle;
-
-	struct reservation_object __builtin_resv;
-};
-
-static inline struct drm_i915_gem_object *
-to_intel_bo(struct drm_gem_object *gem)
-{
-	/* Assert that to_intel_bo(NULL) == NULL */
-	BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
-
-	return container_of(gem, struct drm_i915_gem_object, base);
-}
+#include "gem/i915_gem_object_types.h"
 
 struct drm_i915_gem_object *i915_gem_object_alloc(void);
 void i915_gem_object_free(struct drm_i915_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
index 6dfcf5cc08c1..525e1f1c4479 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -24,6 +24,7 @@
 
 #define I915_CMD_HASH_ORDER 9
 
+struct drm_i915_gem_object;
 struct drm_i915_reg_table;
 struct i915_gem_context;
 struct i915_request;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 30/43] drm/i915: Pull GEM ioctls interface to its own file
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (28 preceding siblings ...)
  2019-03-06 14:25 ` [PATCH 29/43] drm/i915: Split GEM object type definition to its own header Chris Wilson
@ 2019-03-06 14:25 ` Chris Wilson
  2019-03-06 15:48   ` Matthew Auld
  2019-03-06 14:25 ` [PATCH 31/43] drm/i915: Move object->pages API to i915_gem_object.[ch] Chris Wilson
                   ` (15 subsequent siblings)
  45 siblings, 1 reply; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:25 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Declutter i915_gem.h by moving the ioctl API into its own header.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_ioctls.h    | 52 +++++++++++++++++++
 .../gem/test_i915_gem_ioctls_standalone.c     |  7 +++
 drivers/gpu/drm/i915/i915_drv.c               |  2 +
 drivers/gpu/drm/i915/i915_drv.h               | 38 --------------
 drivers/gpu/drm/i915/i915_gem.c               |  2 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    |  2 +
 drivers/gpu/drm/i915/i915_gem_tiling.c        |  3 ++
 drivers/gpu/drm/i915/i915_gem_userptr.c       | 12 +++--
 9 files changed, 77 insertions(+), 42 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
 create mode 100644 drivers/gpu/drm/i915/gem/test_i915_gem_ioctls_standalone.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 87fb8c21510e..2f84fac02578 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -59,6 +59,7 @@ i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 
 # Test the headers are compilable as standalone units
 i915-$(CONFIG_DRM_I915_WERROR) += \
+	gem/test_i915_gem_ioctls_standalone.o \
 	gem/test_i915_gem_object_types_standalone.o \
 	test_i915_active_types_standalone.o \
 	test_i915_gem_context_types_standalone.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
new file mode 100644
index 000000000000..ddc7f2a52b3e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
@@ -0,0 +1,52 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef I915_GEM_IOCTLS_H
+#define I915_GEM_IOCTLS_H
+
+struct drm_device;
+struct drm_file;
+
+int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+int i915_gem_create_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file);
+int i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file);
+int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
+int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file);
+int i915_gem_pread_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file);
+int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file);
+int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file);
+int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
+int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/test_i915_gem_ioctls_standalone.c b/drivers/gpu/drm/i915/gem/test_i915_gem_ioctls_standalone.c
new file mode 100644
index 000000000000..df5f2b43d81c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/test_i915_gem_ioctls_standalone.c
@@ -0,0 +1,7 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_gem_ioctls.h"
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 8e65def8bcc5..771b9159e6df 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -47,6 +47,8 @@
 #include <drm/drm_probe_helper.h>
 #include <drm/i915_drm.h>
 
+#include "gem/i915_gem_ioctls.h"
+
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "i915_pmu.h"
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d32ce1937e68..468232bdb571 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2735,46 +2735,8 @@ ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, u32 bits)
 }
 
 /* i915_gem.c */
-int i915_gem_create_ioctl(struct drm_device *dev, void *data,
-			  struct drm_file *file_priv);
-int i915_gem_pread_ioctl(struct drm_device *dev, void *data,
-			 struct drm_file *file_priv);
-int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
-			  struct drm_file *file_priv);
-int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
-int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
-int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
-int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
-			     struct drm_file *file_priv);
-int i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
-int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file_priv);
-int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
-int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file);
-int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file);
-int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
-			    struct drm_file *file_priv);
-int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
-			   struct drm_file *file_priv);
-int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
-int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
 int i915_gem_init_userptr(struct drm_i915_private *dev_priv);
 void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv);
-int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
-			   struct drm_file *file);
-int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *file_priv);
-int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
 void i915_gem_sanitize(struct drm_i915_private *i915);
 int i915_gem_init_early(struct drm_i915_private *dev_priv);
 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8863dd414cde..37413c2e596b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -39,6 +39,8 @@
 #include <linux/dma-buf.h>
 #include <linux/mman.h>
 
+#include "gem/i915_gem_ioctls.h"
+
 #include "i915_drv.h"
 #include "i915_gem_clflush.h"
 #include "i915_gemfs.h"
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 87f059e418ab..f1b5091ee817 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -34,6 +34,8 @@
 #include <drm/drm_syncobj.h>
 #include <drm/i915_drm.h>
 
+#include "gem/i915_gem_ioctls.h"
+
 #include "i915_drv.h"
 #include "i915_gem_clflush.h"
 #include "i915_trace.h"
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 16cc9ddbce34..8675a6e24788 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -28,6 +28,9 @@
 #include <linux/string.h>
 #include <linux/bitops.h>
 #include <drm/i915_drm.h>
+
+#include "gem/i915_gem_ioctls.h"
+
 #include "i915_drv.h"
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index ad0087127144..bfe985b36e07 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -22,16 +22,20 @@
  *
  */
 
-#include <drm/i915_drm.h>
-#include "i915_drv.h"
-#include "i915_trace.h"
-#include "intel_drv.h"
 #include <linux/mmu_context.h>
 #include <linux/mmu_notifier.h>
 #include <linux/mempolicy.h>
 #include <linux/swap.h>
 #include <linux/sched/mm.h>
 
+#include <drm/i915_drm.h>
+
+#include "gem/i915_gem_ioctls.h"
+
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+
 struct i915_mm_struct {
 	struct mm_struct *mm;
 	struct drm_i915_private *i915;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 31/43] drm/i915: Move object->pages API to i915_gem_object.[ch]
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (29 preceding siblings ...)
  2019-03-06 14:25 ` [PATCH 30/43] drm/i915: Pull GEM ioctls interface to its own file Chris Wilson
@ 2019-03-06 14:25 ` Chris Wilson
  2019-03-06 16:23   ` Matthew Auld
  2019-03-06 14:25 ` [PATCH 32/43] drm/i915: Move shmem object setup to its own file Chris Wilson
                   ` (14 subsequent siblings)
  45 siblings, 1 reply; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:25 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Currently the code for manipulating the pages on an object is still
residing in i915_gem.c, move it to i915_gem_object.c

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   3 +-
 .../gpu/drm/i915/{ => gem}/i915_gem_object.c  |   3 +-
 .../gpu/drm/i915/{ => gem}/i915_gem_object.h  | 119 +++++++++++++++-
 .../gem/test_i915_gem_object_standalone.c     |   7 +
 drivers/gpu/drm/i915/i915_drv.h               | 129 +-----------------
 drivers/gpu/drm/i915/i915_globals.c           |   2 +-
 drivers/gpu/drm/i915/i915_vma.h               |   2 +-
 7 files changed, 137 insertions(+), 128 deletions(-)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_object.c (99%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_object.h (60%)
 create mode 100644 drivers/gpu/drm/i915/gem/test_i915_gem_object_standalone.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 2f84fac02578..d270f01e1091 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -60,6 +60,7 @@ i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 # Test the headers are compilable as standalone units
 i915-$(CONFIG_DRM_I915_WERROR) += \
 	gem/test_i915_gem_ioctls_standalone.o \
+	gem/test_i915_gem_object_standalone.o \
 	gem/test_i915_gem_object_types_standalone.o \
 	test_i915_active_types_standalone.o \
 	test_i915_gem_context_types_standalone.o \
@@ -70,6 +71,7 @@ i915-$(CONFIG_DRM_I915_WERROR) += \
 
 # GEM code
 i915-y += \
+	  gem/i915_gem_object.o \
 	  i915_active.o \
 	  i915_cmd_parser.o \
 	  i915_gem_batch_pool.o \
@@ -82,7 +84,6 @@ i915-y += \
 	  i915_gem_gtt.o \
 	  i915_gem_internal.o \
 	  i915_gem.o \
-	  i915_gem_object.o \
 	  i915_gem_render_state.o \
 	  i915_gem_shrinker.o \
 	  i915_gem_stolen.o \
diff --git a/drivers/gpu/drm/i915/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
similarity index 99%
rename from drivers/gpu/drm/i915/i915_gem_object.c
rename to drivers/gpu/drm/i915/gem/i915_gem_object.c
index ac6a5ab84586..93f4d92c9909 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -22,10 +22,11 @@
  *
  */
 
-#include "i915_drv.h"
 #include "i915_gem_object.h"
 #include "i915_globals.h"
 
+#include "../i915_drv.h"
+
 static struct i915_global_object {
 	struct i915_global base;
 	struct kmem_cache *slab_objects;
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
similarity index 60%
rename from drivers/gpu/drm/i915/i915_gem_object.h
rename to drivers/gpu/drm/i915/gem/i915_gem_object.h
index 509210b1945b..80d866de34a8 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -13,7 +13,7 @@
 
 #include <drm/i915_drm.h>
 
-#include "gem/i915_gem_object_types.h"
+#include "i915_gem_object_types.h"
 
 struct drm_i915_gem_object *i915_gem_object_alloc(void);
 void i915_gem_object_free(struct drm_i915_gem_object *obj);
@@ -192,6 +192,123 @@ i915_gem_object_get_tile_row_size(const struct drm_i915_gem_object *obj)
 int i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
 			       unsigned int tiling, unsigned int stride);
 
+struct scatterlist *
+i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+		       unsigned int n, unsigned int *offset);
+
+struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj,
+			 unsigned int n);
+
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+			       unsigned int n);
+
+dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
+				unsigned long n);
+
+void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+				 struct sg_table *pages,
+				 unsigned int sg_page_sizes);
+int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
+
+static inline int __must_check
+i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
+{
+	might_lock(&obj->mm.lock);
+
+	if (atomic_inc_not_zero(&obj->mm.pages_pin_count))
+		return 0;
+
+	return __i915_gem_object_get_pages(obj);
+}
+
+static inline bool
+i915_gem_object_has_pages(struct drm_i915_gem_object *obj)
+{
+	return !IS_ERR_OR_NULL(READ_ONCE(obj->mm.pages));
+}
+
+static inline void
+__i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
+{
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+
+	atomic_inc(&obj->mm.pages_pin_count);
+}
+
+static inline bool
+i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
+{
+	return atomic_read(&obj->mm.pages_pin_count);
+}
+
+static inline void
+__i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
+{
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+	atomic_dec(&obj->mm.pages_pin_count);
+}
+
+static inline void
+i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
+{
+	__i915_gem_object_unpin_pages(obj);
+}
+
+enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
+	I915_MM_NORMAL = 0,
+	I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
+};
+
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+				enum i915_mm_subclass subclass);
+void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
+
+enum i915_map_type {
+	I915_MAP_WB = 0,
+	I915_MAP_WC,
+#define I915_MAP_OVERRIDE BIT(31)
+	I915_MAP_FORCE_WB = I915_MAP_WB | I915_MAP_OVERRIDE,
+	I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE,
+};
+
+/**
+ * i915_gem_object_pin_map - return a contiguous mapping of the entire object
+ * @obj: the object to map into kernel address space
+ * @type: the type of mapping, used to select pgprot_t
+ *
+ * Calls i915_gem_object_pin_pages() to prevent reaping of the object's
+ * pages and then returns a contiguous mapping of the backing storage into
+ * the kernel address space. Based on the @type of mapping, the PTE will be
+ * set to either WriteBack or WriteCombine (via pgprot_t).
+ *
+ * The caller is responsible for calling i915_gem_object_unpin_map() when the
+ * mapping is no longer required.
+ *
+ * Returns the pointer through which to access the mapped object, or an
+ * ERR_PTR() on error.
+ */
+void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
+					   enum i915_map_type type);
+
+/**
+ * i915_gem_object_unpin_map - releases an earlier mapping
+ * @obj: the object to unmap
+ *
+ * After pinning the object and mapping its pages, once you are finished
+ * with your access, call i915_gem_object_unpin_map() to release the pin
+ * upon the mapping. Once the pin count reaches zero, that mapping may be
+ * removed.
+ */
+static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin_pages(obj);
+}
+
 static inline struct intel_engine_cs *
 i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/gem/test_i915_gem_object_standalone.c b/drivers/gpu/drm/i915/gem/test_i915_gem_object_standalone.c
new file mode 100644
index 000000000000..6f8897541178
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/test_i915_gem_object_standalone.c
@@ -0,0 +1,7 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_gem_object.h"
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 468232bdb571..90f84d3dca08 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2807,129 +2807,6 @@ static inline int __sg_page_count(const struct scatterlist *sg)
 	return sg->length >> PAGE_SHIFT;
 }
 
-struct scatterlist *
-i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
-		       unsigned int n, unsigned int *offset);
-
-struct page *
-i915_gem_object_get_page(struct drm_i915_gem_object *obj,
-			 unsigned int n);
-
-struct page *
-i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
-			       unsigned int n);
-
-dma_addr_t
-i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
-				unsigned long n);
-
-void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
-				 struct sg_table *pages,
-				 unsigned int sg_page_sizes);
-int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
-
-static inline int __must_check
-i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
-{
-	might_lock(&obj->mm.lock);
-
-	if (atomic_inc_not_zero(&obj->mm.pages_pin_count))
-		return 0;
-
-	return __i915_gem_object_get_pages(obj);
-}
-
-static inline bool
-i915_gem_object_has_pages(struct drm_i915_gem_object *obj)
-{
-	return !IS_ERR_OR_NULL(READ_ONCE(obj->mm.pages));
-}
-
-static inline void
-__i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
-{
-	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
-
-	atomic_inc(&obj->mm.pages_pin_count);
-}
-
-static inline bool
-i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
-{
-	return atomic_read(&obj->mm.pages_pin_count);
-}
-
-static inline void
-__i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
-{
-	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
-	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-
-	atomic_dec(&obj->mm.pages_pin_count);
-}
-
-static inline void
-i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
-{
-	__i915_gem_object_unpin_pages(obj);
-}
-
-enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
-	I915_MM_NORMAL = 0,
-	I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
-};
-
-int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
-				enum i915_mm_subclass subclass);
-void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
-
-enum i915_map_type {
-	I915_MAP_WB = 0,
-	I915_MAP_WC,
-#define I915_MAP_OVERRIDE BIT(31)
-	I915_MAP_FORCE_WB = I915_MAP_WB | I915_MAP_OVERRIDE,
-	I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE,
-};
-
-static inline enum i915_map_type
-i915_coherent_map_type(struct drm_i915_private *i915)
-{
-	return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
-}
-
-/**
- * i915_gem_object_pin_map - return a contiguous mapping of the entire object
- * @obj: the object to map into kernel address space
- * @type: the type of mapping, used to select pgprot_t
- *
- * Calls i915_gem_object_pin_pages() to prevent reaping of the object's
- * pages and then returns a contiguous mapping of the backing storage into
- * the kernel address space. Based on the @type of mapping, the PTE will be
- * set to either WriteBack or WriteCombine (via pgprot_t).
- *
- * The caller is responsible for calling i915_gem_object_unpin_map() when the
- * mapping is no longer required.
- *
- * Returns the pointer through which to access the mapped object, or an
- * ERR_PTR() on error.
- */
-void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
-					   enum i915_map_type type);
-
-/**
- * i915_gem_object_unpin_map - releases an earlier mapping
- * @obj: the object to unmap
- *
- * After pinning the object and mapping its pages, once you are finished
- * with your access, call i915_gem_object_unpin_map() to release the pin
- * upon the mapping. Once the pin count reaches zero, that mapping may be
- * removed.
- */
-static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
-{
-	i915_gem_object_unpin_pages(obj);
-}
-
 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 				    unsigned int *needs_clflush);
 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
@@ -3589,4 +3466,10 @@ static inline u32 i915_scratch_offset(const struct drm_i915_private *i915)
 	return i915_ggtt_offset(i915->gt.scratch);
 }
 
+static inline enum i915_map_type
+i915_coherent_map_type(struct drm_i915_private *i915)
+{
+	return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c
index 2f5c72e2a9d1..f3858d42183f 100644
--- a/drivers/gpu/drm/i915/i915_globals.c
+++ b/drivers/gpu/drm/i915/i915_globals.c
@@ -9,7 +9,7 @@
 
 #include "i915_active.h"
 #include "i915_gem_context.h"
-#include "i915_gem_object.h"
+#include "gem/i915_gem_object.h"
 #include "i915_globals.h"
 #include "i915_request.h"
 #include "i915_scheduler.h"
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 6eab70953a57..7950fa96ee86 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -32,7 +32,7 @@
 
 #include "i915_gem_gtt.h"
 #include "i915_gem_fence_reg.h"
-#include "i915_gem_object.h"
+#include "gem/i915_gem_object.h"
 
 #include "i915_active.h"
 #include "i915_request.h"
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 32/43] drm/i915: Move shmem object setup to its own file
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (30 preceding siblings ...)
  2019-03-06 14:25 ` [PATCH 31/43] drm/i915: Move object->pages API to i915_gem_object.[ch] Chris Wilson
@ 2019-03-06 14:25 ` Chris Wilson
  2019-03-06 17:05   ` Matthew Auld
  2019-03-06 14:25 ` [PATCH 33/43] drm/i915: Move phys objects " Chris Wilson
                   ` (13 subsequent siblings)
  45 siblings, 1 reply; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:25 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Split the plain old shmem object into its own file to start decluttering
i915_gem.c

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_object.c    | 300 ++++++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |  39 +
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c     | 494 +++++++++++
 drivers/gpu/drm/i915/gvt/cmd_parser.c         |   8 +-
 drivers/gpu/drm/i915/i915_drv.h               |  10 -
 drivers/gpu/drm/i915/i915_gem.c               | 811 +-----------------
 drivers/gpu/drm/i915/i915_perf.c              |   2 +-
 drivers/gpu/drm/i915/intel_fbdev.c            |   2 +-
 drivers/gpu/drm/i915/intel_guc.c              |   2 +-
 drivers/gpu/drm/i915/intel_lrc.c              |   4 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c       |   2 +-
 drivers/gpu/drm/i915/intel_uc_fw.c            |   3 +-
 drivers/gpu/drm/i915/selftests/huge_pages.c   |   6 +-
 .../gpu/drm/i915/selftests/i915_gem_dmabuf.c  |   8 +-
 .../gpu/drm/i915/selftests/i915_gem_object.c  |   4 +-
 16 files changed, 860 insertions(+), 836 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_shmem.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index d270f01e1091..54418ce5faac 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -72,6 +72,7 @@ i915-$(CONFIG_DRM_I915_WERROR) += \
 # GEM code
 i915-y += \
 	  gem/i915_gem_object.o \
+	  gem/i915_gem_shmem.o \
 	  i915_active.o \
 	  i915_cmd_parser.o \
 	  i915_gem_batch_pool.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 93f4d92c9909..05efce885961 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -23,9 +23,10 @@
  */
 
 #include "i915_gem_object.h"
-#include "i915_globals.h"
 
 #include "../i915_drv.h"
+#include "../i915_globals.h"
+#include "../intel_frontbuffer.h"
 
 static struct i915_global_object {
 	struct i915_global base;
@@ -42,6 +43,64 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj)
 	return kmem_cache_free(global.slab_objects, obj);
 }
 
+/* some bookkeeping */
+static void i915_gem_info_add_obj(struct drm_i915_private *i915,
+				  u64 size)
+{
+	spin_lock(&i915->mm.object_stat_lock);
+	i915->mm.object_count++;
+	i915->mm.object_memory += size;
+	spin_unlock(&i915->mm.object_stat_lock);
+}
+
+static void i915_gem_info_remove_obj(struct drm_i915_private *i915,
+				     u64 size)
+{
+	spin_lock(&i915->mm.object_stat_lock);
+	i915->mm.object_count--;
+	i915->mm.object_memory -= size;
+	spin_unlock(&i915->mm.object_stat_lock);
+}
+
+static void
+frontbuffer_retire(struct i915_active_request *active,
+		   struct i915_request *request)
+{
+	struct drm_i915_gem_object *obj =
+		container_of(active, typeof(*obj), frontbuffer_write);
+
+	intel_fb_obj_flush(obj, ORIGIN_CS);
+}
+
+void i915_gem_object_init(struct drm_i915_gem_object *obj,
+			  const struct drm_i915_gem_object_ops *ops)
+{
+	mutex_init(&obj->mm.lock);
+
+	spin_lock_init(&obj->vma.lock);
+	INIT_LIST_HEAD(&obj->vma.list);
+
+	INIT_LIST_HEAD(&obj->lut_list);
+	INIT_LIST_HEAD(&obj->batch_pool_link);
+
+	init_rcu_head(&obj->rcu);
+
+	obj->ops = ops;
+
+	reservation_object_init(&obj->__builtin_resv);
+	obj->resv = &obj->__builtin_resv;
+
+	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
+	i915_active_request_init(&obj->frontbuffer_write,
+				 NULL, frontbuffer_retire);
+
+	obj->mm.madv = I915_MADV_WILLNEED;
+	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
+	mutex_init(&obj->mm.get_page.lock);
+
+	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
+}
+
 /**
  * Mark up the object's coherency levels for a given cache_level
  * @obj: #drm_i915_gem_object
@@ -64,6 +123,245 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
 		!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
 }
 
+void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(gem->dev);
+	struct drm_i915_gem_object *obj = to_intel_bo(gem);
+	struct drm_i915_file_private *fpriv = file->driver_priv;
+	struct i915_lut_handle *lut, *ln;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
+		struct i915_gem_context *ctx = lut->ctx;
+		struct i915_vma *vma;
+
+		GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
+		if (ctx->file_priv != fpriv)
+			continue;
+
+		vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
+		GEM_BUG_ON(vma->obj != obj);
+
+		/* We allow the process to have multiple handles to the same
+		 * vma, in the same fd namespace, by virtue of flink/open.
+		 */
+		GEM_BUG_ON(!vma->open_count);
+		if (!--vma->open_count && !i915_vma_is_ggtt(vma))
+			i915_vma_close(vma);
+
+		list_del(&lut->obj_link);
+		list_del(&lut->ctx_link);
+
+		i915_lut_handle_free(lut);
+		__i915_gem_object_release_unless_active(obj);
+	}
+
+	mutex_unlock(&i915->drm.struct_mutex);
+}
+
+static bool discard_backing_storage(struct drm_i915_gem_object *obj)
+{
+	/* If we are the last user of the backing storage (be it shmemfs
+	 * pages or stolen etc), we know that the pages are going to be
+	 * immediately released. In this case, we can then skip copying
+	 * back the contents from the GPU.
+	 */
+
+	if (obj->mm.madv != I915_MADV_WILLNEED)
+		return false;
+
+	if (!obj->base.filp)
+		return true;
+
+	/* At first glance, this looks racy, but then again so would be
+	 * userspace racing mmap against close. However, the first external
+	 * reference to the filp can only be obtained through the
+	 * i915_gem_mmap_ioctl() which safeguards us against the user
+	 * acquiring such a reference whilst we are in the middle of
+	 * freeing the object.
+	 */
+	return atomic_long_read(&obj->base.filp->f_count) == 1;
+}
+
+static void __i915_gem_free_objects(struct drm_i915_private *i915,
+				    struct llist_node *freed)
+{
+	struct drm_i915_gem_object *obj, *on;
+	intel_wakeref_t wakeref;
+
+	wakeref = intel_runtime_pm_get(i915);
+	llist_for_each_entry_safe(obj, on, freed, freed) {
+		struct i915_vma *vma, *vn;
+
+		trace_i915_gem_object_destroy(obj);
+
+		mutex_lock(&i915->drm.struct_mutex);
+
+		GEM_BUG_ON(i915_gem_object_is_active(obj));
+		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
+			GEM_BUG_ON(i915_vma_is_active(vma));
+			vma->flags &= ~I915_VMA_PIN_MASK;
+			i915_vma_destroy(vma);
+		}
+		GEM_BUG_ON(!list_empty(&obj->vma.list));
+		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
+
+		/* This serializes freeing with the shrinker. Since the free
+		 * is delayed, first by RCU then by the workqueue, we want the
+		 * shrinker to be able to free pages of unreferenced objects,
+		 * or else we may oom whilst there are plenty of deferred
+		 * freed objects.
+		 */
+		if (i915_gem_object_has_pages(obj)) {
+			spin_lock(&i915->mm.obj_lock);
+			list_del_init(&obj->mm.link);
+			spin_unlock(&i915->mm.obj_lock);
+		}
+
+		mutex_unlock(&i915->drm.struct_mutex);
+
+		GEM_BUG_ON(obj->bind_count);
+		GEM_BUG_ON(obj->userfault_count);
+		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
+		GEM_BUG_ON(!list_empty(&obj->lut_list));
+
+		if (obj->ops->release)
+			obj->ops->release(obj);
+
+		if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
+			atomic_set(&obj->mm.pages_pin_count, 0);
+		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+		GEM_BUG_ON(i915_gem_object_has_pages(obj));
+
+		if (obj->base.import_attach)
+			drm_prime_gem_destroy(&obj->base, NULL);
+
+		reservation_object_fini(&obj->__builtin_resv);
+		drm_gem_object_release(&obj->base);
+		i915_gem_info_remove_obj(i915, obj->base.size);
+
+		kfree(obj->bit_17);
+		i915_gem_object_free(obj);
+
+		GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
+		atomic_dec(&i915->mm.free_count);
+
+		if (on)
+			cond_resched();
+	}
+	intel_runtime_pm_put(i915, wakeref);
+}
+
+void i915_gem_flush_free_objects(struct drm_i915_private *i915)
+{
+	struct llist_node *freed;
+
+	/* Free the oldest, most stale object to keep the free_list short */
+	freed = NULL;
+	if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
+		/* Only one consumer of llist_del_first() allowed */
+		spin_lock(&i915->mm.free_lock);
+		freed = llist_del_first(&i915->mm.free_list);
+		spin_unlock(&i915->mm.free_lock);
+	}
+	if (unlikely(freed)) {
+		freed->next = NULL;
+		__i915_gem_free_objects(i915, freed);
+	}
+}
+
+static void __i915_gem_free_work(struct work_struct *work)
+{
+	struct drm_i915_private *i915 =
+		container_of(work, struct drm_i915_private, mm.free_work);
+	struct llist_node *freed;
+
+	/*
+	 * All file-owned VMA should have been released by this point through
+	 * i915_gem_close_object(), or earlier by i915_gem_context_close().
+	 * However, the object may also be bound into the global GTT (e.g.
+	 * older GPUs without per-process support, or for direct access through
+	 * the GTT either for the user or for scanout). Those VMA still need to
+	 * unbound now.
+	 */
+
+	spin_lock(&i915->mm.free_lock);
+	while ((freed = llist_del_all(&i915->mm.free_list))) {
+		spin_unlock(&i915->mm.free_lock);
+
+		__i915_gem_free_objects(i915, freed);
+		if (need_resched())
+			return;
+
+		spin_lock(&i915->mm.free_lock);
+	}
+	spin_unlock(&i915->mm.free_lock);
+}
+
+static void __i915_gem_free_object_rcu(struct rcu_head *head)
+{
+	struct drm_i915_gem_object *obj =
+		container_of(head, typeof(*obj), rcu);
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+	/*
+	 * We reuse obj->rcu for the freed list, so we had better not treat
+	 * it like a rcu_head from this point forwards. And we expect all
+	 * objects to be freed via this path.
+	 */
+	destroy_rcu_head(&obj->rcu);
+
+	/*
+	 * Since we require blocking on struct_mutex to unbind the freed
+	 * object from the GPU before releasing resources back to the
+	 * system, we can not do that directly from the RCU callback (which may
+	 * be a softirq context), but must instead then defer that work onto a
+	 * kthread. We use the RCU callback rather than move the freed object
+	 * directly onto the work queue so that we can mix between using the
+	 * worker and performing frees directly from subsequent allocations for
+	 * crude but effective memory throttling.
+	 */
+	if (llist_add(&obj->freed, &i915->mm.free_list))
+		queue_work(i915->wq, &i915->mm.free_work);
+}
+
+void i915_gem_free_object(struct drm_gem_object *gem_obj)
+{
+	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
+
+	if (obj->mm.quirked)
+		__i915_gem_object_unpin_pages(obj);
+
+	if (discard_backing_storage(obj))
+		obj->mm.madv = I915_MADV_DONTNEED;
+
+	/*
+	 * Before we free the object, make sure any pure RCU-only
+	 * read-side critical sections are complete, e.g.
+	 * i915_gem_busy_ioctl(). For the corresponding synchronized
+	 * lookup see i915_gem_object_lookup_rcu().
+	 */
+	atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
+	call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
+}
+
+void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
+{
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	if (!i915_gem_object_has_active_reference(obj) &&
+	    i915_gem_object_is_active(obj))
+		i915_gem_object_set_active_reference(obj);
+	else
+		i915_gem_object_put(obj);
+}
+
+void i915_gem_init__objects(struct drm_i915_private *i915)
+{
+	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
+}
+
 static void i915_global_objects_shrink(void)
 {
 	kmem_cache_shrink(global.slab_objects);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 80d866de34a8..05bbb3f33904 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -15,9 +15,29 @@
 
 #include "i915_gem_object_types.h"
 
+void i915_gem_init__objects(struct drm_i915_private *i915);
+
 struct drm_i915_gem_object *i915_gem_object_alloc(void);
 void i915_gem_object_free(struct drm_i915_gem_object *obj);
 
+void i915_gem_object_init(struct drm_i915_gem_object *obj,
+			  const struct drm_i915_gem_object_ops *ops);
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size);
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
+				       const void *data, size_t size);
+
+extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops;
+void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
+				     struct sg_table *pages,
+				     bool needs_clflush);
+
+void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
+void i915_gem_free_object(struct drm_gem_object *obj);
+
+void i915_gem_flush_free_objects(struct drm_i915_private *i915);
+
 /**
  * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
  * @filp: DRM file private date
@@ -330,4 +350,23 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
 					 unsigned int cache_level);
 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
 
+static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+	if (obj->cache_dirty)
+		return false;
+
+	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+		return true;
+
+	return obj->pin_global; /* currently in use by HW, keep flushed */
+}
+
+static inline void __start_cpu_write(struct drm_i915_gem_object *obj)
+{
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+	if (cpu_write_needs_clflush(obj))
+		obj->cache_dirty = true;
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
new file mode 100644
index 000000000000..bccc0944d177
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -0,0 +1,494 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/pagevec.h>
+#include <linux/swap.h>
+
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+
+/*
+ * Move pages to appropriate lru and release the pagevec, decrementing the
+ * ref count of those pages.
+ */
+static void check_release_pagevec(struct pagevec *pvec)
+{
+	check_move_unevictable_pages(pvec);
+	__pagevec_release(pvec);
+	cond_resched();
+}
+
+static int shmem_get_pages_gtt(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	const unsigned long page_count = obj->base.size / PAGE_SIZE;
+	unsigned long i;
+	struct address_space *mapping;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	struct sgt_iter sgt_iter;
+	struct page *page;
+	unsigned long last_pfn = 0;	/* suppress gcc warning */
+	unsigned int max_segment = i915_sg_segment_size();
+	unsigned int sg_page_sizes;
+	struct pagevec pvec;
+	gfp_t noreclaim;
+	int ret;
+
+	/*
+	 * Assert that the object is not currently in any GPU domain. As it
+	 * wasn't in the GTT, there shouldn't be any way it could have been in
+	 * a GPU cache
+	 */
+	GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
+	GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
+
+	/*
+	 * If there's no chance of allocating enough pages for the whole
+	 * object, bail early.
+	 */
+	if (page_count > totalram_pages())
+		return -ENOMEM;
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return -ENOMEM;
+
+rebuild_st:
+	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
+		kfree(st);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Get the list of pages out of our struct file.  They'll be pinned
+	 * at this point until we release them.
+	 *
+	 * Fail silently without starting the shrinker
+	 */
+	mapping = obj->base.filp->f_mapping;
+	mapping_set_unevictable(mapping);
+	noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
+	noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
+
+	sg = st->sgl;
+	st->nents = 0;
+	sg_page_sizes = 0;
+	for (i = 0; i < page_count; i++) {
+		const unsigned int shrink[] = {
+			(I915_SHRINK_BOUND |
+			 I915_SHRINK_UNBOUND |
+			 I915_SHRINK_PURGEABLE),
+			0,
+		}, *s = shrink;
+		gfp_t gfp = noreclaim;
+
+		do {
+			cond_resched();
+			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
+			if (!IS_ERR(page))
+				break;
+
+			if (!*s) {
+				ret = PTR_ERR(page);
+				goto err_sg;
+			}
+
+			i915_gem_shrink(i915, 2 * page_count, NULL, *s++);
+
+			/*
+			 * We've tried hard to allocate the memory by reaping
+			 * our own buffer, now let the real VM do its job and
+			 * go down in flames if truly OOM.
+			 *
+			 * However, since graphics tend to be disposable,
+			 * defer the oom here by reporting the ENOMEM back
+			 * to userspace.
+			 */
+			if (!*s) {
+				/* reclaim and warn, but no oom */
+				gfp = mapping_gfp_mask(mapping);
+
+				/*
+				 * Our bo are always dirty and so we require
+				 * kswapd to reclaim our pages (direct reclaim
+				 * does not effectively begin pageout of our
+				 * buffers on its own). However, direct reclaim
+				 * only waits for kswapd when under allocation
+				 * congestion. So as a result __GFP_RECLAIM is
+				 * unreliable and fails to actually reclaim our
+				 * dirty pages -- unless you try over and over
+				 * again with !__GFP_NORETRY. However, we still
+				 * want to fail this allocation rather than
+				 * trigger the out-of-memory killer and for
+				 * this we want __GFP_RETRY_MAYFAIL.
+				 */
+				gfp |= __GFP_RETRY_MAYFAIL;
+			}
+		} while (1);
+
+		if (!i ||
+		    sg->length >= max_segment ||
+		    page_to_pfn(page) != last_pfn + 1) {
+			if (i) {
+				sg_page_sizes |= sg->length;
+				sg = sg_next(sg);
+			}
+			st->nents++;
+			sg_set_page(sg, page, PAGE_SIZE, 0);
+		} else {
+			sg->length += PAGE_SIZE;
+		}
+		last_pfn = page_to_pfn(page);
+
+		/* Check that the i965g/gm workaround works. */
+		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
+	}
+	if (sg) { /* loop terminated early; short sg table */
+		sg_page_sizes |= sg->length;
+		sg_mark_end(sg);
+	}
+
+	/* Trim unused sg entries to avoid wasting memory. */
+	i915_sg_trim(st);
+
+	ret = i915_gem_gtt_prepare_pages(obj, st);
+	if (ret) {
+		/*
+		 * DMA remapping failed? One possible cause is that
+		 * it could not reserve enough large entries, asking
+		 * for PAGE_SIZE chunks instead may be helpful.
+		 */
+		if (max_segment > PAGE_SIZE) {
+			for_each_sgt_page(page, sgt_iter, st)
+				put_page(page);
+			sg_free_table(st);
+
+			max_segment = PAGE_SIZE;
+			goto rebuild_st;
+		} else {
+			dev_warn(&i915->drm.pdev->dev,
+				 "Failed to DMA remap %lu pages\n",
+				 page_count);
+			goto err_pages;
+		}
+	}
+
+	if (i915_gem_object_needs_bit17_swizzle(obj))
+		i915_gem_object_do_bit_17_swizzle(obj, st);
+
+	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
+
+	return 0;
+
+err_sg:
+	sg_mark_end(sg);
+err_pages:
+	mapping_clear_unevictable(mapping);
+	pagevec_init(&pvec);
+	for_each_sgt_page(page, sgt_iter, st) {
+		if (!pagevec_add(&pvec, page))
+			check_release_pagevec(&pvec);
+	}
+	if (pagevec_count(&pvec))
+		check_release_pagevec(&pvec);
+	sg_free_table(st);
+	kfree(st);
+
+	/*
+	 * shmemfs first checks if there is enough memory to allocate the page
+	 * and reports ENOSPC should there be insufficient, along with the usual
+	 * ENOMEM for a genuine allocation failure.
+	 *
+	 * We use ENOSPC in our driver to mean that we have run out of aperture
+	 * space and so want to translate the error from shmemfs back to our
+	 * usual understanding of ENOMEM.
+	 */
+	if (ret == -ENOSPC)
+		ret = -ENOMEM;
+
+	return ret;
+}
+
+void
+__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
+				struct sg_table *pages,
+				bool needs_clflush)
+{
+	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
+
+	if (obj->mm.madv == I915_MADV_DONTNEED)
+		obj->mm.dirty = false;
+
+	if (needs_clflush &&
+	    (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
+	    !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
+		drm_clflush_sg(pages);
+
+	__start_cpu_write(obj);
+}
+
+static void
+shmem_put_pages_gtt(struct drm_i915_gem_object *obj, struct sg_table *pages)
+{
+	struct sgt_iter sgt_iter;
+	struct pagevec pvec;
+	struct page *page;
+
+	__i915_gem_object_release_shmem(obj, pages, true);
+
+	i915_gem_gtt_finish_pages(obj, pages);
+
+	if (i915_gem_object_needs_bit17_swizzle(obj))
+		i915_gem_object_save_bit_17_swizzle(obj, pages);
+
+	mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
+
+	pagevec_init(&pvec);
+	for_each_sgt_page(page, sgt_iter, pages) {
+		if (obj->mm.dirty)
+			set_page_dirty(page);
+
+		if (obj->mm.madv == I915_MADV_WILLNEED)
+			mark_page_accessed(page);
+
+		if (!pagevec_add(&pvec, page))
+			check_release_pagevec(&pvec);
+	}
+	if (pagevec_count(&pvec))
+		check_release_pagevec(&pvec);
+	obj->mm.dirty = false;
+
+	sg_free_table(pages);
+	kfree(pages);
+}
+
+static int
+shmem_pwrite_gtt(struct drm_i915_gem_object *obj,
+		 const struct drm_i915_gem_pwrite *arg)
+{
+	struct address_space *mapping = obj->base.filp->f_mapping;
+	char __user *user_data = u64_to_user_ptr(arg->data_ptr);
+	u64 remain, offset;
+	unsigned int pg;
+
+	/* Before we instantiate/pin the backing store for our use, we
+	 * can prepopulate the shmemfs filp efficiently using a write into
+	 * the pagecache. We avoid the penalty of instantiating all the
+	 * pages, important if the user is just writing to a few and never
+	 * uses the object on the GPU, and using a direct write into shmemfs
+	 * allows it to avoid the cost of retrieving a page (either swapin
+	 * or clearing-before-use) before it is overwritten.
+	 */
+	if (i915_gem_object_has_pages(obj))
+		return -ENODEV;
+
+	if (obj->mm.madv != I915_MADV_WILLNEED)
+		return -EFAULT;
+
+	/* Before the pages are instantiated the object is treated as being
+	 * in the CPU domain. The pages will be clflushed as required before
+	 * use, and we can freely write into the pages directly. If userspace
+	 * races pwrite with any other operation; corruption will ensue -
+	 * that is userspace's prerogative!
+	 */
+
+	remain = arg->size;
+	offset = arg->offset;
+	pg = offset_in_page(offset);
+
+	do {
+		unsigned int len, unwritten;
+		struct page *page;
+		void *data, *vaddr;
+		int err;
+
+		len = PAGE_SIZE - pg;
+		if (len > remain)
+			len = remain;
+
+		err = pagecache_write_begin(obj->base.filp, mapping,
+					    offset, len, 0,
+					    &page, &data);
+		if (err < 0)
+			return err;
+
+		vaddr = kmap(page);
+		unwritten = copy_from_user(vaddr + pg, user_data, len);
+		kunmap(page);
+
+		err = pagecache_write_end(obj->base.filp, mapping,
+					  offset, len, len - unwritten,
+					  page, data);
+		if (err < 0)
+			return err;
+
+		if (unwritten)
+			return -EFAULT;
+
+		remain -= len;
+		user_data += len;
+		offset += len;
+		pg = 0;
+	} while (remain);
+
+	return 0;
+}
+
+const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
+	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
+		 I915_GEM_OBJECT_IS_SHRINKABLE,
+
+	.get_pages = shmem_get_pages_gtt,
+	.put_pages = shmem_put_pages_gtt,
+
+	.pwrite = shmem_pwrite_gtt,
+};
+
+static int create_shmem(struct drm_i915_private *i915,
+			struct drm_gem_object *obj,
+			size_t size)
+{
+	unsigned long flags = VM_NORESERVE;
+	struct file *filp;
+
+	drm_gem_private_object_init(&i915->drm, obj, size);
+
+	if (i915->mm.gemfs)
+		filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
+						 flags);
+	else
+		filp = shmem_file_setup("i915", size, flags);
+	if (IS_ERR(filp))
+		return PTR_ERR(filp);
+
+	obj->filp = filp;
+	return 0;
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size)
+{
+	struct drm_i915_gem_object *obj;
+	struct address_space *mapping;
+	unsigned int cache_level;
+	gfp_t mask;
+	int ret;
+
+	/* There is a prevalence of the assumption that we fit the object's
+	 * page count inside a 32bit _signed_ variable. Let's document this and
+	 * catch if we ever need to fix it. In the meantime, if you do spot
+	 * such a local variable, please consider fixing!
+	 */
+	if (size >> PAGE_SHIFT > INT_MAX)
+		return ERR_PTR(-E2BIG);
+
+	if (overflows_type(size, obj->base.size))
+		return ERR_PTR(-E2BIG);
+
+	obj = i915_gem_object_alloc();
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	ret = create_shmem(i915, &obj->base, size);
+	if (ret)
+		goto fail;
+
+	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
+	if (IS_I965GM(i915) || IS_I965G(i915)) {
+		/* 965gm cannot relocate objects above 4GiB. */
+		mask &= ~__GFP_HIGHMEM;
+		mask |= __GFP_DMA32;
+	}
+
+	mapping = obj->base.filp->f_mapping;
+	mapping_set_gfp_mask(mapping, mask);
+	GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
+
+	i915_gem_object_init(obj, &i915_gem_shmem_ops);
+
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+
+	if (HAS_LLC(i915))
+		/* On some devices, we can have the GPU use the LLC (the CPU
+		 * cache) for about a 10% performance improvement
+		 * compared to uncached.  Graphics requests other than
+		 * display scanout are coherent with the CPU in
+		 * accessing this cache.  This means in this mode we
+		 * don't need to clflush on the CPU side, and on the
+		 * GPU side we only need to flush internal caches to
+		 * get data visible to the CPU.
+		 *
+		 * However, we maintain the display planes as UC, and so
+		 * need to rebind when first used as such.
+		 */
+		cache_level = I915_CACHE_LLC;
+	else
+		cache_level = I915_CACHE_NONE;
+
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+
+	trace_i915_gem_object_create(obj);
+
+	return obj;
+
+fail:
+	i915_gem_object_free(obj);
+	return ERR_PTR(ret);
+}
+
+/* Allocate a new GEM object and fill it with the supplied data */
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
+				       const void *data, size_t size)
+{
+	struct drm_i915_gem_object *obj;
+	struct file *file;
+	size_t offset;
+	int err;
+
+	obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE));
+	if (IS_ERR(obj))
+		return obj;
+
+	GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
+
+	file = obj->base.filp;
+	offset = 0;
+	do {
+		unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
+		struct page *page;
+		void *pgdata, *vaddr;
+
+		err = pagecache_write_begin(file, file->f_mapping,
+					    offset, len, 0,
+					    &page, &pgdata);
+		if (err < 0)
+			goto fail;
+
+		vaddr = kmap(page);
+		memcpy(vaddr, data, len);
+		kunmap(page);
+
+		err = pagecache_write_end(file, file->f_mapping,
+					  offset, len, len,
+					  page, pgdata);
+		if (err < 0)
+			goto fail;
+
+		size -= len;
+		data += len;
+		offset += len;
+	} while (size);
+
+	return obj;
+
+fail:
+	i915_gem_object_put(obj);
+	return ERR_PTR(err);
+}
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index cf4a1ecf6853..05d054e0bfe7 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -1726,7 +1726,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 	if (bb->ppgtt)
 		gma_start_offset = gma & ~I915_GTT_PAGE_MASK;
 
-	bb->obj = i915_gem_object_create(s->vgpu->gvt->dev_priv,
+	bb->obj = i915_gem_object_create_shmem(s->vgpu->gvt->dev_priv,
 			 roundup(bb_size + gma_start_offset, PAGE_SIZE));
 	if (IS_ERR(bb->obj)) {
 		ret = PTR_ERR(bb->obj);
@@ -2799,9 +2799,9 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 	int ret = 0;
 	void *map;
 
-	obj = i915_gem_object_create(workload->vgpu->gvt->dev_priv,
-				     roundup(ctx_size + CACHELINE_BYTES,
-					     PAGE_SIZE));
+	obj = i915_gem_object_create_shmem(workload->vgpu->gvt->dev_priv,
+					   roundup(ctx_size + CACHELINE_BYTES,
+						   PAGE_SIZE));
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 90f84d3dca08..ffbb8a10a714 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2744,16 +2744,6 @@ void i915_gem_load_init_fences(struct drm_i915_private *dev_priv);
 int i915_gem_freeze(struct drm_i915_private *dev_priv);
 int i915_gem_freeze_late(struct drm_i915_private *dev_priv);
 
-void i915_gem_object_init(struct drm_i915_gem_object *obj,
-			 const struct drm_i915_gem_object_ops *ops);
-struct drm_i915_gem_object *
-i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size);
-struct drm_i915_gem_object *
-i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
-				 const void *data, size_t size);
-void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
-void i915_gem_free_object(struct drm_gem_object *obj);
-
 static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
 {
 	if (!atomic_read(&i915->mm.free_count))
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 37413c2e596b..5816e32fe30b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -54,19 +54,6 @@
 #include "intel_mocs.h"
 #include "intel_workarounds.h"
 
-static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
-
-static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
-{
-	if (obj->cache_dirty)
-		return false;
-
-	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
-		return true;
-
-	return obj->pin_global; /* currently in use by HW, keep flushed */
-}
-
 static int
 insert_mappable_node(struct i915_ggtt *ggtt,
                      struct drm_mm_node *node, u32 size)
@@ -84,25 +71,6 @@ remove_mappable_node(struct drm_mm_node *node)
 	drm_mm_remove_node(node);
 }
 
-/* some bookkeeping */
-static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
-				  u64 size)
-{
-	spin_lock(&dev_priv->mm.object_stat_lock);
-	dev_priv->mm.object_count++;
-	dev_priv->mm.object_memory += size;
-	spin_unlock(&dev_priv->mm.object_stat_lock);
-}
-
-static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
-				     u64 size)
-{
-	spin_lock(&dev_priv->mm.object_stat_lock);
-	dev_priv->mm.object_count--;
-	dev_priv->mm.object_memory -= size;
-	spin_unlock(&dev_priv->mm.object_stat_lock);
-}
-
 static void __i915_gem_park(struct drm_i915_private *i915)
 {
 	intel_wakeref_t wakeref;
@@ -302,32 +270,6 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 	return err;
 }
 
-static void __start_cpu_write(struct drm_i915_gem_object *obj)
-{
-	obj->read_domains = I915_GEM_DOMAIN_CPU;
-	obj->write_domain = I915_GEM_DOMAIN_CPU;
-	if (cpu_write_needs_clflush(obj))
-		obj->cache_dirty = true;
-}
-
-static void
-__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
-				struct sg_table *pages,
-				bool needs_clflush)
-{
-	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
-
-	if (obj->mm.madv == I915_MADV_DONTNEED)
-		obj->mm.dirty = false;
-
-	if (needs_clflush &&
-	    (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
-	    !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
-		drm_clflush_sg(pages);
-
-	__start_cpu_write(obj);
-}
-
 static void
 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
 			       struct sg_table *pages)
@@ -379,8 +321,6 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
 	.release = i915_gem_object_release_phys,
 };
 
-static const struct drm_i915_gem_object_ops i915_gem_object_ops;
-
 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
@@ -631,12 +571,12 @@ i915_gem_create(struct drm_file *file,
 	int ret;
 	u32 handle;
 
-	size = roundup(size, PAGE_SIZE);
+	size = round_up(size, PAGE_SIZE);
 	if (size == 0)
 		return -EINVAL;
 
 	/* Allocate the new object */
-	obj = i915_gem_object_create(dev_priv, size);
+	obj = i915_gem_object_create_shmem(dev_priv, size);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -2171,53 +2111,6 @@ void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
 	invalidate_mapping_pages(mapping, 0, (loff_t)-1);
 }
 
-/*
- * Move pages to appropriate lru and release the pagevec, decrementing the
- * ref count of those pages.
- */
-static void check_release_pagevec(struct pagevec *pvec)
-{
-	check_move_unevictable_pages(pvec);
-	__pagevec_release(pvec);
-	cond_resched();
-}
-
-static void
-i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
-			      struct sg_table *pages)
-{
-	struct sgt_iter sgt_iter;
-	struct pagevec pvec;
-	struct page *page;
-
-	__i915_gem_object_release_shmem(obj, pages, true);
-
-	i915_gem_gtt_finish_pages(obj, pages);
-
-	if (i915_gem_object_needs_bit17_swizzle(obj))
-		i915_gem_object_save_bit_17_swizzle(obj, pages);
-
-	mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
-
-	pagevec_init(&pvec);
-	for_each_sgt_page(page, sgt_iter, pages) {
-		if (obj->mm.dirty)
-			set_page_dirty(page);
-
-		if (obj->mm.madv == I915_MADV_WILLNEED)
-			mark_page_accessed(page);
-
-		if (!pagevec_add(&pvec, page))
-			check_release_pagevec(&pvec);
-	}
-	if (pagevec_count(&pvec))
-		check_release_pagevec(&pvec);
-	obj->mm.dirty = false;
-
-	sg_free_table(pages);
-	kfree(pages);
-}
-
 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
 {
 	struct radix_tree_iter iter;
@@ -2333,196 +2226,6 @@ bool i915_sg_trim(struct sg_table *orig_st)
 	return true;
 }
 
-static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	const unsigned long page_count = obj->base.size / PAGE_SIZE;
-	unsigned long i;
-	struct address_space *mapping;
-	struct sg_table *st;
-	struct scatterlist *sg;
-	struct sgt_iter sgt_iter;
-	struct page *page;
-	unsigned long last_pfn = 0;	/* suppress gcc warning */
-	unsigned int max_segment = i915_sg_segment_size();
-	unsigned int sg_page_sizes;
-	struct pagevec pvec;
-	gfp_t noreclaim;
-	int ret;
-
-	/*
-	 * Assert that the object is not currently in any GPU domain. As it
-	 * wasn't in the GTT, there shouldn't be any way it could have been in
-	 * a GPU cache
-	 */
-	GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
-	GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
-
-	/*
-	 * If there's no chance of allocating enough pages for the whole
-	 * object, bail early.
-	 */
-	if (page_count > totalram_pages())
-		return -ENOMEM;
-
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
-	if (st == NULL)
-		return -ENOMEM;
-
-rebuild_st:
-	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
-		kfree(st);
-		return -ENOMEM;
-	}
-
-	/*
-	 * Get the list of pages out of our struct file.  They'll be pinned
-	 * at this point until we release them.
-	 *
-	 * Fail silently without starting the shrinker
-	 */
-	mapping = obj->base.filp->f_mapping;
-	mapping_set_unevictable(mapping);
-	noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
-	noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
-
-	sg = st->sgl;
-	st->nents = 0;
-	sg_page_sizes = 0;
-	for (i = 0; i < page_count; i++) {
-		const unsigned int shrink[] = {
-			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
-			0,
-		}, *s = shrink;
-		gfp_t gfp = noreclaim;
-
-		do {
-			cond_resched();
-			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
-			if (!IS_ERR(page))
-				break;
-
-			if (!*s) {
-				ret = PTR_ERR(page);
-				goto err_sg;
-			}
-
-			i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++);
-
-			/*
-			 * We've tried hard to allocate the memory by reaping
-			 * our own buffer, now let the real VM do its job and
-			 * go down in flames if truly OOM.
-			 *
-			 * However, since graphics tend to be disposable,
-			 * defer the oom here by reporting the ENOMEM back
-			 * to userspace.
-			 */
-			if (!*s) {
-				/* reclaim and warn, but no oom */
-				gfp = mapping_gfp_mask(mapping);
-
-				/*
-				 * Our bo are always dirty and so we require
-				 * kswapd to reclaim our pages (direct reclaim
-				 * does not effectively begin pageout of our
-				 * buffers on its own). However, direct reclaim
-				 * only waits for kswapd when under allocation
-				 * congestion. So as a result __GFP_RECLAIM is
-				 * unreliable and fails to actually reclaim our
-				 * dirty pages -- unless you try over and over
-				 * again with !__GFP_NORETRY. However, we still
-				 * want to fail this allocation rather than
-				 * trigger the out-of-memory killer and for
-				 * this we want __GFP_RETRY_MAYFAIL.
-				 */
-				gfp |= __GFP_RETRY_MAYFAIL;
-			}
-		} while (1);
-
-		if (!i ||
-		    sg->length >= max_segment ||
-		    page_to_pfn(page) != last_pfn + 1) {
-			if (i) {
-				sg_page_sizes |= sg->length;
-				sg = sg_next(sg);
-			}
-			st->nents++;
-			sg_set_page(sg, page, PAGE_SIZE, 0);
-		} else {
-			sg->length += PAGE_SIZE;
-		}
-		last_pfn = page_to_pfn(page);
-
-		/* Check that the i965g/gm workaround works. */
-		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
-	}
-	if (sg) { /* loop terminated early; short sg table */
-		sg_page_sizes |= sg->length;
-		sg_mark_end(sg);
-	}
-
-	/* Trim unused sg entries to avoid wasting memory. */
-	i915_sg_trim(st);
-
-	ret = i915_gem_gtt_prepare_pages(obj, st);
-	if (ret) {
-		/*
-		 * DMA remapping failed? One possible cause is that
-		 * it could not reserve enough large entries, asking
-		 * for PAGE_SIZE chunks instead may be helpful.
-		 */
-		if (max_segment > PAGE_SIZE) {
-			for_each_sgt_page(page, sgt_iter, st)
-				put_page(page);
-			sg_free_table(st);
-
-			max_segment = PAGE_SIZE;
-			goto rebuild_st;
-		} else {
-			dev_warn(&dev_priv->drm.pdev->dev,
-				 "Failed to DMA remap %lu pages\n",
-				 page_count);
-			goto err_pages;
-		}
-	}
-
-	if (i915_gem_object_needs_bit17_swizzle(obj))
-		i915_gem_object_do_bit_17_swizzle(obj, st);
-
-	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
-
-	return 0;
-
-err_sg:
-	sg_mark_end(sg);
-err_pages:
-	mapping_clear_unevictable(mapping);
-	pagevec_init(&pvec);
-	for_each_sgt_page(page, sgt_iter, st) {
-		if (!pagevec_add(&pvec, page))
-			check_release_pagevec(&pvec);
-	}
-	if (pagevec_count(&pvec))
-		check_release_pagevec(&pvec);
-	sg_free_table(st);
-	kfree(st);
-
-	/*
-	 * shmemfs first checks if there is enough memory to allocate the page
-	 * and reports ENOSPC should there be insufficient, along with the usual
-	 * ENOMEM for a genuine allocation failure.
-	 *
-	 * We use ENOSPC in our driver to mean that we have run out of aperture
-	 * space and so want to translate the error from shmemfs back to our
-	 * usual understanding of ENOMEM.
-	 */
-	if (ret == -ENOSPC)
-		ret = -ENOMEM;
-
-	return ret;
-}
-
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 				 struct sg_table *pages,
 				 unsigned int sg_page_sizes)
@@ -2734,78 +2437,6 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 	goto out_unlock;
 }
 
-static int
-i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
-			   const struct drm_i915_gem_pwrite *arg)
-{
-	struct address_space *mapping = obj->base.filp->f_mapping;
-	char __user *user_data = u64_to_user_ptr(arg->data_ptr);
-	u64 remain, offset;
-	unsigned int pg;
-
-	/* Before we instantiate/pin the backing store for our use, we
-	 * can prepopulate the shmemfs filp efficiently using a write into
-	 * the pagecache. We avoid the penalty of instantiating all the
-	 * pages, important if the user is just writing to a few and never
-	 * uses the object on the GPU, and using a direct write into shmemfs
-	 * allows it to avoid the cost of retrieving a page (either swapin
-	 * or clearing-before-use) before it is overwritten.
-	 */
-	if (i915_gem_object_has_pages(obj))
-		return -ENODEV;
-
-	if (obj->mm.madv != I915_MADV_WILLNEED)
-		return -EFAULT;
-
-	/* Before the pages are instantiated the object is treated as being
-	 * in the CPU domain. The pages will be clflushed as required before
-	 * use, and we can freely write into the pages directly. If userspace
-	 * races pwrite with any other operation; corruption will ensue -
-	 * that is userspace's prerogative!
-	 */
-
-	remain = arg->size;
-	offset = arg->offset;
-	pg = offset_in_page(offset);
-
-	do {
-		unsigned int len, unwritten;
-		struct page *page;
-		void *data, *vaddr;
-		int err;
-
-		len = PAGE_SIZE - pg;
-		if (len > remain)
-			len = remain;
-
-		err = pagecache_write_begin(obj->base.filp, mapping,
-					    offset, len, 0,
-					    &page, &data);
-		if (err < 0)
-			return err;
-
-		vaddr = kmap(page);
-		unwritten = copy_from_user(vaddr + pg, user_data, len);
-		kunmap(page);
-
-		err = pagecache_write_end(obj->base.filp, mapping,
-					  offset, len, len - unwritten,
-					  page, data);
-		if (err < 0)
-			return err;
-
-		if (unwritten)
-			return -EFAULT;
-
-		remain -= len;
-		user_data += len;
-		offset += len;
-		pg = 0;
-	} while (remain);
-
-	return 0;
-}
-
 static void
 i915_gem_retire_work_handler(struct work_struct *work)
 {
@@ -2920,43 +2551,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	}
 }
 
-void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
-{
-	struct drm_i915_private *i915 = to_i915(gem->dev);
-	struct drm_i915_gem_object *obj = to_intel_bo(gem);
-	struct drm_i915_file_private *fpriv = file->driver_priv;
-	struct i915_lut_handle *lut, *ln;
-
-	mutex_lock(&i915->drm.struct_mutex);
-
-	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
-		struct i915_gem_context *ctx = lut->ctx;
-		struct i915_vma *vma;
-
-		GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
-		if (ctx->file_priv != fpriv)
-			continue;
-
-		vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
-		GEM_BUG_ON(vma->obj != obj);
-
-		/* We allow the process to have multiple handles to the same
-		 * vma, in the same fd namespace, by virtue of flink/open.
-		 */
-		GEM_BUG_ON(!vma->open_count);
-		if (!--vma->open_count && !i915_vma_is_ggtt(vma))
-			i915_vma_close(vma);
-
-		list_del(&lut->obj_link);
-		list_del(&lut->ctx_link);
-
-		i915_lut_handle_free(lut);
-		__i915_gem_object_release_unless_active(obj);
-	}
-
-	mutex_unlock(&i915->drm.struct_mutex);
-}
-
 static unsigned long to_wait_timeout(s64 timeout_ns)
 {
 	if (timeout_ns < 0)
@@ -3956,348 +3550,6 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 	return err;
 }
 
-static void
-frontbuffer_retire(struct i915_active_request *active,
-		   struct i915_request *request)
-{
-	struct drm_i915_gem_object *obj =
-		container_of(active, typeof(*obj), frontbuffer_write);
-
-	intel_fb_obj_flush(obj, ORIGIN_CS);
-}
-
-void i915_gem_object_init(struct drm_i915_gem_object *obj,
-			  const struct drm_i915_gem_object_ops *ops)
-{
-	mutex_init(&obj->mm.lock);
-
-	spin_lock_init(&obj->vma.lock);
-	INIT_LIST_HEAD(&obj->vma.list);
-
-	INIT_LIST_HEAD(&obj->lut_list);
-	INIT_LIST_HEAD(&obj->batch_pool_link);
-
-	init_rcu_head(&obj->rcu);
-
-	obj->ops = ops;
-
-	reservation_object_init(&obj->__builtin_resv);
-	obj->resv = &obj->__builtin_resv;
-
-	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
-	i915_active_request_init(&obj->frontbuffer_write,
-				 NULL, frontbuffer_retire);
-
-	obj->mm.madv = I915_MADV_WILLNEED;
-	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
-	mutex_init(&obj->mm.get_page.lock);
-
-	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
-}
-
-static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
-	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
-		 I915_GEM_OBJECT_IS_SHRINKABLE,
-
-	.get_pages = i915_gem_object_get_pages_gtt,
-	.put_pages = i915_gem_object_put_pages_gtt,
-
-	.pwrite = i915_gem_object_pwrite_gtt,
-};
-
-static int i915_gem_object_create_shmem(struct drm_device *dev,
-					struct drm_gem_object *obj,
-					size_t size)
-{
-	struct drm_i915_private *i915 = to_i915(dev);
-	unsigned long flags = VM_NORESERVE;
-	struct file *filp;
-
-	drm_gem_private_object_init(dev, obj, size);
-
-	if (i915->mm.gemfs)
-		filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
-						 flags);
-	else
-		filp = shmem_file_setup("i915", size, flags);
-
-	if (IS_ERR(filp))
-		return PTR_ERR(filp);
-
-	obj->filp = filp;
-
-	return 0;
-}
-
-struct drm_i915_gem_object *
-i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
-{
-	struct drm_i915_gem_object *obj;
-	struct address_space *mapping;
-	unsigned int cache_level;
-	gfp_t mask;
-	int ret;
-
-	/* There is a prevalence of the assumption that we fit the object's
-	 * page count inside a 32bit _signed_ variable. Let's document this and
-	 * catch if we ever need to fix it. In the meantime, if you do spot
-	 * such a local variable, please consider fixing!
-	 */
-	if (size >> PAGE_SHIFT > INT_MAX)
-		return ERR_PTR(-E2BIG);
-
-	if (overflows_type(size, obj->base.size))
-		return ERR_PTR(-E2BIG);
-
-	obj = i915_gem_object_alloc();
-	if (obj == NULL)
-		return ERR_PTR(-ENOMEM);
-
-	ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size);
-	if (ret)
-		goto fail;
-
-	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
-	if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) {
-		/* 965gm cannot relocate objects above 4GiB. */
-		mask &= ~__GFP_HIGHMEM;
-		mask |= __GFP_DMA32;
-	}
-
-	mapping = obj->base.filp->f_mapping;
-	mapping_set_gfp_mask(mapping, mask);
-	GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
-
-	i915_gem_object_init(obj, &i915_gem_object_ops);
-
-	obj->write_domain = I915_GEM_DOMAIN_CPU;
-	obj->read_domains = I915_GEM_DOMAIN_CPU;
-
-	if (HAS_LLC(dev_priv))
-		/* On some devices, we can have the GPU use the LLC (the CPU
-		 * cache) for about a 10% performance improvement
-		 * compared to uncached.  Graphics requests other than
-		 * display scanout are coherent with the CPU in
-		 * accessing this cache.  This means in this mode we
-		 * don't need to clflush on the CPU side, and on the
-		 * GPU side we only need to flush internal caches to
-		 * get data visible to the CPU.
-		 *
-		 * However, we maintain the display planes as UC, and so
-		 * need to rebind when first used as such.
-		 */
-		cache_level = I915_CACHE_LLC;
-	else
-		cache_level = I915_CACHE_NONE;
-
-	i915_gem_object_set_cache_coherency(obj, cache_level);
-
-	trace_i915_gem_object_create(obj);
-
-	return obj;
-
-fail:
-	i915_gem_object_free(obj);
-	return ERR_PTR(ret);
-}
-
-static bool discard_backing_storage(struct drm_i915_gem_object *obj)
-{
-	/* If we are the last user of the backing storage (be it shmemfs
-	 * pages or stolen etc), we know that the pages are going to be
-	 * immediately released. In this case, we can then skip copying
-	 * back the contents from the GPU.
-	 */
-
-	if (obj->mm.madv != I915_MADV_WILLNEED)
-		return false;
-
-	if (obj->base.filp == NULL)
-		return true;
-
-	/* At first glance, this looks racy, but then again so would be
-	 * userspace racing mmap against close. However, the first external
-	 * reference to the filp can only be obtained through the
-	 * i915_gem_mmap_ioctl() which safeguards us against the user
-	 * acquiring such a reference whilst we are in the middle of
-	 * freeing the object.
-	 */
-	return atomic_long_read(&obj->base.filp->f_count) == 1;
-}
-
-static void __i915_gem_free_objects(struct drm_i915_private *i915,
-				    struct llist_node *freed)
-{
-	struct drm_i915_gem_object *obj, *on;
-	intel_wakeref_t wakeref;
-
-	wakeref = intel_runtime_pm_get(i915);
-	llist_for_each_entry_safe(obj, on, freed, freed) {
-		struct i915_vma *vma, *vn;
-
-		trace_i915_gem_object_destroy(obj);
-
-		mutex_lock(&i915->drm.struct_mutex);
-
-		GEM_BUG_ON(i915_gem_object_is_active(obj));
-		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
-			GEM_BUG_ON(i915_vma_is_active(vma));
-			vma->flags &= ~I915_VMA_PIN_MASK;
-			i915_vma_destroy(vma);
-		}
-		GEM_BUG_ON(!list_empty(&obj->vma.list));
-		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
-
-		/* This serializes freeing with the shrinker. Since the free
-		 * is delayed, first by RCU then by the workqueue, we want the
-		 * shrinker to be able to free pages of unreferenced objects,
-		 * or else we may oom whilst there are plenty of deferred
-		 * freed objects.
-		 */
-		if (i915_gem_object_has_pages(obj)) {
-			spin_lock(&i915->mm.obj_lock);
-			list_del_init(&obj->mm.link);
-			spin_unlock(&i915->mm.obj_lock);
-		}
-
-		mutex_unlock(&i915->drm.struct_mutex);
-
-		GEM_BUG_ON(obj->bind_count);
-		GEM_BUG_ON(obj->userfault_count);
-		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
-		GEM_BUG_ON(!list_empty(&obj->lut_list));
-
-		if (obj->ops->release)
-			obj->ops->release(obj);
-
-		if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
-			atomic_set(&obj->mm.pages_pin_count, 0);
-		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
-		GEM_BUG_ON(i915_gem_object_has_pages(obj));
-
-		if (obj->base.import_attach)
-			drm_prime_gem_destroy(&obj->base, NULL);
-
-		reservation_object_fini(&obj->__builtin_resv);
-		drm_gem_object_release(&obj->base);
-		i915_gem_info_remove_obj(i915, obj->base.size);
-
-		kfree(obj->bit_17);
-		i915_gem_object_free(obj);
-
-		GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
-		atomic_dec(&i915->mm.free_count);
-
-		if (on)
-			cond_resched();
-	}
-	intel_runtime_pm_put(i915, wakeref);
-}
-
-static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
-{
-	struct llist_node *freed;
-
-	/* Free the oldest, most stale object to keep the free_list short */
-	freed = NULL;
-	if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
-		/* Only one consumer of llist_del_first() allowed */
-		spin_lock(&i915->mm.free_lock);
-		freed = llist_del_first(&i915->mm.free_list);
-		spin_unlock(&i915->mm.free_lock);
-	}
-	if (unlikely(freed)) {
-		freed->next = NULL;
-		__i915_gem_free_objects(i915, freed);
-	}
-}
-
-static void __i915_gem_free_work(struct work_struct *work)
-{
-	struct drm_i915_private *i915 =
-		container_of(work, struct drm_i915_private, mm.free_work);
-	struct llist_node *freed;
-
-	/*
-	 * All file-owned VMA should have been released by this point through
-	 * i915_gem_close_object(), or earlier by i915_gem_context_close().
-	 * However, the object may also be bound into the global GTT (e.g.
-	 * older GPUs without per-process support, or for direct access through
-	 * the GTT either for the user or for scanout). Those VMA still need to
-	 * unbound now.
-	 */
-
-	spin_lock(&i915->mm.free_lock);
-	while ((freed = llist_del_all(&i915->mm.free_list))) {
-		spin_unlock(&i915->mm.free_lock);
-
-		__i915_gem_free_objects(i915, freed);
-		if (need_resched())
-			return;
-
-		spin_lock(&i915->mm.free_lock);
-	}
-	spin_unlock(&i915->mm.free_lock);
-}
-
-static void __i915_gem_free_object_rcu(struct rcu_head *head)
-{
-	struct drm_i915_gem_object *obj =
-		container_of(head, typeof(*obj), rcu);
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-
-	/*
-	 * We reuse obj->rcu for the freed list, so we had better not treat
-	 * it like a rcu_head from this point forwards. And we expect all
-	 * objects to be freed via this path.
-	 */
-	destroy_rcu_head(&obj->rcu);
-
-	/*
-	 * Since we require blocking on struct_mutex to unbind the freed
-	 * object from the GPU before releasing resources back to the
-	 * system, we can not do that directly from the RCU callback (which may
-	 * be a softirq context), but must instead then defer that work onto a
-	 * kthread. We use the RCU callback rather than move the freed object
-	 * directly onto the work queue so that we can mix between using the
-	 * worker and performing frees directly from subsequent allocations for
-	 * crude but effective memory throttling.
-	 */
-	if (llist_add(&obj->freed, &i915->mm.free_list))
-		queue_work(i915->wq, &i915->mm.free_work);
-}
-
-void i915_gem_free_object(struct drm_gem_object *gem_obj)
-{
-	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
-
-	if (obj->mm.quirked)
-		__i915_gem_object_unpin_pages(obj);
-
-	if (discard_backing_storage(obj))
-		obj->mm.madv = I915_MADV_DONTNEED;
-
-	/*
-	 * Before we free the object, make sure any pure RCU-only
-	 * read-side critical sections are complete, e.g.
-	 * i915_gem_busy_ioctl(). For the corresponding synchronized
-	 * lookup see i915_gem_object_lookup_rcu().
-	 */
-	atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
-	call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
-}
-
-void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
-{
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	if (!i915_gem_object_has_active_reference(obj) &&
-	    i915_gem_object_is_active(obj))
-		i915_gem_object_set_active_reference(obj);
-	else
-		i915_gem_object_put(obj);
-}
-
 void i915_gem_sanitize(struct drm_i915_private *i915)
 {
 	intel_wakeref_t wakeref;
@@ -5030,7 +4282,7 @@ static void i915_gem_init__mm(struct drm_i915_private *i915)
 	INIT_LIST_HEAD(&i915->mm.fence_list);
 	INIT_LIST_HEAD(&i915->mm.userfault_list);
 
-	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
+	i915_gem_init__objects(i915);
 }
 
 int i915_gem_init_early(struct drm_i915_private *dev_priv)
@@ -5197,57 +4449,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 	}
 }
 
-/* Allocate a new GEM object and fill it with the supplied data */
-struct drm_i915_gem_object *
-i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
-			         const void *data, size_t size)
-{
-	struct drm_i915_gem_object *obj;
-	struct file *file;
-	size_t offset;
-	int err;
-
-	obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE));
-	if (IS_ERR(obj))
-		return obj;
-
-	GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
-
-	file = obj->base.filp;
-	offset = 0;
-	do {
-		unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
-		struct page *page;
-		void *pgdata, *vaddr;
-
-		err = pagecache_write_begin(file, file->f_mapping,
-					    offset, len, 0,
-					    &page, &pgdata);
-		if (err < 0)
-			goto fail;
-
-		vaddr = kmap(page);
-		memcpy(vaddr, data, len);
-		kunmap(page);
-
-		err = pagecache_write_end(file, file->f_mapping,
-					  offset, len, len,
-					  page, pgdata);
-		if (err < 0)
-			goto fail;
-
-		size -= len;
-		data += len;
-		offset += len;
-	} while (size);
-
-	return obj;
-
-fail:
-	i915_gem_object_put(obj);
-	return ERR_PTR(err);
-}
-
 struct scatterlist *
 i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
 		       unsigned int n,
@@ -5409,7 +4610,7 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
 	if (obj->ops == &i915_gem_phys_ops)
 		return 0;
 
-	if (obj->ops != &i915_gem_object_ops)
+	if (obj->ops != &i915_gem_shmem_ops)
 		return -EINVAL;
 
 	err = i915_gem_object_unbind(obj);
@@ -5445,12 +4646,12 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
 	__i915_gem_object_pin_pages(obj);
 
 	if (!IS_ERR_OR_NULL(pages))
-		i915_gem_object_ops.put_pages(obj, pages);
+		i915_gem_shmem_ops.put_pages(obj, pages);
 	mutex_unlock(&obj->mm.lock);
 	return 0;
 
 err_xfer:
-	obj->ops = &i915_gem_object_ops;
+	obj->ops = &i915_gem_shmem_ops;
 	if (!IS_ERR_OR_NULL(pages)) {
 		unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index e19a89e4df64..3c4f5017cf2a 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1502,7 +1502,7 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
 	BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
 	BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
 
-	bo = i915_gem_object_create(dev_priv, OA_BUFFER_SIZE);
+	bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE);
 	if (IS_ERR(bo)) {
 		DRM_ERROR("Failed to allocate OA buffer\n");
 		ret = PTR_ERR(bo);
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index e8f694b57b8a..25999573dfe2 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -143,7 +143,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
 	if (size * 2 < dev_priv->stolen_usable_size)
 		obj = i915_gem_object_create_stolen(dev_priv, size);
 	if (obj == NULL)
-		obj = i915_gem_object_create(dev_priv, size);
+		obj = i915_gem_object_create_shmem(dev_priv, size);
 	if (IS_ERR(obj)) {
 		DRM_ERROR("failed to allocate framebuffer\n");
 		ret = PTR_ERR(obj);
diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c
index 8ecb47087457..4bb101adbc01 100644
--- a/drivers/gpu/drm/i915/intel_guc.c
+++ b/drivers/gpu/drm/i915/intel_guc.c
@@ -674,7 +674,7 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
 	u64 flags;
 	int ret;
 
-	obj = i915_gem_object_create(dev_priv, size);
+	obj = i915_gem_object_create_shmem(dev_priv, size);
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 93701a8a79be..1b3c0fe8f8b1 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1928,7 +1928,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
 	struct i915_vma *vma;
 	int err;
 
-	obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE);
+	obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -3038,7 +3038,7 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
 	 */
 	context_size += LRC_HEADER_PAGES * PAGE_SIZE;
 
-	ctx_obj = i915_gem_object_create(engine->i915, context_size);
+	ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
 	if (IS_ERR(ctx_obj))
 		return PTR_ERR(ctx_obj);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index cc4fcd89b845..4ecb79eb147b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1448,7 +1448,7 @@ alloc_context_vma(struct intel_engine_cs *engine)
 	struct i915_vma *vma;
 	int err;
 
-	obj = i915_gem_object_create(i915, engine->context_size);
+	obj = i915_gem_object_create_shmem(i915, engine->context_size);
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);
 
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c
index becf05ebae4d..e31fa3ac845d 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/intel_uc_fw.c
@@ -159,7 +159,8 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
 		goto fail;
 	}
 
-	obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size);
+	obj = i915_gem_object_create_shmem_from_data(dev_priv,
+						     fw->data, fw->size);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		DRM_DEBUG_DRIVER("%s fw object_create err=%d\n",
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index 0b7740dc18cb..68060e5ac78d 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -1393,7 +1393,7 @@ static int igt_ppgtt_gemfs_huge(void *arg)
 	for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
 		unsigned int size = sizes[i];
 
-		obj = i915_gem_object_create(i915, size);
+		obj = i915_gem_object_create_shmem(i915, size);
 		if (IS_ERR(obj))
 			return PTR_ERR(obj);
 
@@ -1571,7 +1571,7 @@ static int igt_tmpfs_fallback(void *arg)
 
 	i915->mm.gemfs = NULL;
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		goto out_restore;
@@ -1630,7 +1630,7 @@ static int igt_shrink_thp(void *arg)
 		return 0;
 	}
 
-	obj = i915_gem_object_create(i915, SZ_2M);
+	obj = i915_gem_object_create_shmem(i915, SZ_2M);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
index a7055b12e53c..37dc22699f1f 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
@@ -33,7 +33,7 @@ static int igt_dmabuf_export(void *arg)
 	struct drm_i915_gem_object *obj;
 	struct dma_buf *dmabuf;
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -57,7 +57,7 @@ static int igt_dmabuf_import_self(void *arg)
 	struct dma_buf *dmabuf;
 	int err;
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -232,7 +232,7 @@ static int igt_dmabuf_export_vmap(void *arg)
 	void *ptr;
 	int err;
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -279,7 +279,7 @@ static int igt_dmabuf_export_kmap(void *arg)
 	void *ptr;
 	int err;
 
-	obj = i915_gem_object_create(i915, 2*PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, 2 * PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 971148fbe6f5..d5bd33c867c0 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -35,7 +35,7 @@ static int igt_gem_object(void *arg)
 
 	/* Basic test to ensure we can create an object */
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		pr_err("i915_gem_object_create failed, err=%d\n", err);
@@ -58,7 +58,7 @@ static int igt_phys_object(void *arg)
 	 * i.e. exercise the i915_gem_object_phys API.
 	 */
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		pr_err("i915_gem_object_create failed, err=%d\n", err);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 33/43] drm/i915: Move phys objects to its own file
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (31 preceding siblings ...)
  2019-03-06 14:25 ` [PATCH 32/43] drm/i915: Move shmem object setup to its own file Chris Wilson
@ 2019-03-06 14:25 ` Chris Wilson
  2019-03-07 12:31   ` Matthew Auld
  2019-03-06 14:25 ` [PATCH 34/43] drm/i915: Move mmap and friends " Chris Wilson
                   ` (12 subsequent siblings)
  45 siblings, 1 reply; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:25 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Continuing the decluttering of i915_gem.c

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |   2 +
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |   8 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   2 +
 drivers/gpu/drm/i915/gem/i915_gem_pages.c     | 482 ++++++++++++
 drivers/gpu/drm/i915/gem/i915_gem_phys.c      | 212 ++++++
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c     |  22 +
 .../drm/i915/gem/selftests/i915_gem_phys.c    |  80 ++
 drivers/gpu/drm/i915/i915_drv.h               |   2 -
 drivers/gpu/drm/i915/i915_gem.c               | 685 ------------------
 .../gpu/drm/i915/selftests/i915_gem_object.c  |  54 --
 .../drm/i915/selftests/i915_mock_selftests.h  |   1 +
 11 files changed, 809 insertions(+), 741 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_pages.c
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_phys.c
 create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 54418ce5faac..8e6ef54f2497 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -72,6 +72,8 @@ i915-$(CONFIG_DRM_I915_WERROR) += \
 # GEM code
 i915-y += \
 	  gem/i915_gem_object.o \
+	  gem/i915_gem_pages.o \
+	  gem/i915_gem_phys.o \
 	  gem/i915_gem_shmem.o \
 	  i915_active.o \
 	  i915_cmd_parser.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 05bbb3f33904..ebab3505e51d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -33,11 +33,17 @@ void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 				     struct sg_table *pages,
 				     bool needs_clflush);
 
+int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align);
+
 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
 void i915_gem_free_object(struct drm_gem_object *obj);
 
 void i915_gem_flush_free_objects(struct drm_i915_private *i915);
 
+struct sg_table *
+__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj);
+void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
+
 /**
  * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
  * @filp: DRM file private date
@@ -231,6 +237,8 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 				 struct sg_table *pages,
 				 unsigned int sg_page_sizes);
+
+int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 
 static inline int __must_check
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index e4b50944f553..da6a33e2395f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -52,6 +52,8 @@ struct drm_i915_gem_object_ops {
 	int (*get_pages)(struct drm_i915_gem_object *obj);
 	void (*put_pages)(struct drm_i915_gem_object *obj,
 			  struct sg_table *pages);
+	void (*truncate)(struct drm_i915_gem_object *obj);
+	void (*invalidate)(struct drm_i915_gem_object *obj);
 
 	int (*pwrite)(struct drm_i915_gem_object *obj,
 		      const struct drm_i915_gem_pwrite *arg);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
new file mode 100644
index 000000000000..a594f48db28e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -0,0 +1,482 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+
+void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+				 struct sg_table *pages,
+				 unsigned int sg_page_sizes)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	unsigned long supported = INTEL_INFO(i915)->page_sizes;
+	int i;
+
+	lockdep_assert_held(&obj->mm.lock);
+
+	obj->mm.get_page.sg_pos = pages->sgl;
+	obj->mm.get_page.sg_idx = 0;
+
+	obj->mm.pages = pages;
+
+	if (i915_gem_object_is_tiled(obj) &&
+	    i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
+		GEM_BUG_ON(obj->mm.quirked);
+		__i915_gem_object_pin_pages(obj);
+		obj->mm.quirked = true;
+	}
+
+	GEM_BUG_ON(!sg_page_sizes);
+	obj->mm.page_sizes.phys = sg_page_sizes;
+
+	/*
+	 * Calculate the supported page-sizes which fit into the given
+	 * sg_page_sizes. This will give us the page-sizes which we may be able
+	 * to use opportunistically when later inserting into the GTT. For
+	 * example if phys=2G, then in theory we should be able to use 1G, 2M,
+	 * 64K or 4K pages, although in practice this will depend on a number of
+	 * other factors.
+	 */
+	obj->mm.page_sizes.sg = 0;
+	for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
+		if (obj->mm.page_sizes.phys & ~0u << i)
+			obj->mm.page_sizes.sg |= BIT(i);
+	}
+	GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
+
+	spin_lock(&i915->mm.obj_lock);
+	list_add(&obj->mm.link, &i915->mm.unbound_list);
+	spin_unlock(&i915->mm.obj_lock);
+}
+
+int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+{
+	int err;
+
+	if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
+		DRM_DEBUG("Attempting to obtain a purgeable object\n");
+		return -EFAULT;
+	}
+
+	err = obj->ops->get_pages(obj);
+	GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
+
+	return err;
+}
+
+/* Ensure that the associated pages are gathered from the backing storage
+ * and pinned into our object. i915_gem_object_pin_pages() may be called
+ * multiple times before they are released by a single call to
+ * i915_gem_object_unpin_pages() - once the pages are no longer referenced
+ * either as a result of memory pressure (reaping pages under the shrinker)
+ * or as the object is itself released.
+ */
+int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+{
+	int err;
+
+	err = mutex_lock_interruptible(&obj->mm.lock);
+	if (err)
+		return err;
+
+	if (unlikely(!i915_gem_object_has_pages(obj))) {
+		GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
+
+		err = ____i915_gem_object_get_pages(obj);
+		if (err)
+			goto unlock;
+
+		smp_mb__before_atomic();
+	}
+	atomic_inc(&obj->mm.pages_pin_count);
+
+unlock:
+	mutex_unlock(&obj->mm.lock);
+	return err;
+}
+
+/* Immediately discard the backing storage */
+void i915_gem_object_truncate(struct drm_i915_gem_object *obj)
+{
+	drm_gem_free_mmap_offset(&obj->base);
+	if (obj->ops->truncate)
+		obj->ops->truncate(obj);
+}
+
+/* Try to discard unwanted pages */
+void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
+{
+	lockdep_assert_held(&obj->mm.lock);
+	GEM_BUG_ON(i915_gem_object_has_pages(obj));
+
+	switch (obj->mm.madv) {
+	case I915_MADV_DONTNEED:
+		i915_gem_object_truncate(obj);
+	case __I915_MADV_PURGED:
+		return;
+	}
+
+	if (obj->ops->invalidate)
+		obj->ops->invalidate(obj);
+}
+
+static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
+{
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+
+	rcu_read_lock();
+	radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
+		radix_tree_delete(&obj->mm.get_page.radix, iter.index);
+	rcu_read_unlock();
+}
+
+struct sg_table *
+__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct sg_table *pages;
+
+	pages = fetch_and_zero(&obj->mm.pages);
+	if (IS_ERR_OR_NULL(pages))
+		return pages;
+
+	spin_lock(&i915->mm.obj_lock);
+	list_del(&obj->mm.link);
+	spin_unlock(&i915->mm.obj_lock);
+
+	if (obj->mm.mapping) {
+		void *ptr;
+
+		ptr = page_mask_bits(obj->mm.mapping);
+		if (is_vmalloc_addr(ptr))
+			vunmap(ptr);
+		else
+			kunmap(kmap_to_page(ptr));
+
+		obj->mm.mapping = NULL;
+	}
+
+	__i915_gem_object_reset_page_iter(obj);
+	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
+
+	return pages;
+}
+
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+				enum i915_mm_subclass subclass)
+{
+	struct sg_table *pages;
+	int err;
+
+	if (i915_gem_object_has_pinned_pages(obj))
+		return -EBUSY;
+
+	GEM_BUG_ON(obj->bind_count);
+
+	/* May be called by shrinker from within get_pages() (on another bo) */
+	mutex_lock_nested(&obj->mm.lock, subclass);
+	if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
+		err = -EBUSY;
+		goto unlock;
+	}
+
+	/*
+	 * ->put_pages might need to allocate memory for the bit17 swizzle
+	 * array, hence protect them from being reaped by removing them from gtt
+	 * lists early.
+	 */
+	pages = __i915_gem_object_unset_pages(obj);
+
+	/*
+	 * XXX Temporary hijinx to avoid updating all backends to handle
+	 * NULL pages. In the future, when we have more asynchronous
+	 * get_pages backends we should be better able to handle the
+	 * cancellation of the async task in a more uniform manner.
+	 */
+	if (!pages && !i915_gem_object_needs_async_cancel(obj))
+		pages = ERR_PTR(-EINVAL);
+
+	if (!IS_ERR(pages))
+		obj->ops->put_pages(obj, pages);
+
+	err = 0;
+unlock:
+	mutex_unlock(&obj->mm.lock);
+
+	return err;
+}
+
+/* The 'mapping' part of i915_gem_object_pin_map() below */
+static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
+				 enum i915_map_type type)
+{
+	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
+	struct sg_table *sgt = obj->mm.pages;
+	struct sgt_iter sgt_iter;
+	struct page *page;
+	struct page *stack_pages[32];
+	struct page **pages = stack_pages;
+	unsigned long i = 0;
+	pgprot_t pgprot;
+	void *addr;
+
+	/* A single page can always be kmapped */
+	if (n_pages == 1 && type == I915_MAP_WB)
+		return kmap(sg_page(sgt->sgl));
+
+	if (n_pages > ARRAY_SIZE(stack_pages)) {
+		/* Too big for stack -- allocate temporary array instead */
+		pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
+		if (!pages)
+			return NULL;
+	}
+
+	for_each_sgt_page(page, sgt_iter, sgt)
+		pages[i++] = page;
+
+	/* Check that we have the expected number of pages */
+	GEM_BUG_ON(i != n_pages);
+
+	switch (type) {
+	default:
+		MISSING_CASE(type);
+		/* fallthrough to use PAGE_KERNEL anyway */
+	case I915_MAP_WB:
+		pgprot = PAGE_KERNEL;
+		break;
+	case I915_MAP_WC:
+		pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
+		break;
+	}
+	addr = vmap(pages, n_pages, 0, pgprot);
+
+	if (pages != stack_pages)
+		kvfree(pages);
+
+	return addr;
+}
+
+/* get, pin, and map the pages of the object into kernel space */
+void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
+			      enum i915_map_type type)
+{
+	enum i915_map_type has_type;
+	bool pinned;
+	void *ptr;
+	int err;
+
+	if (unlikely(!i915_gem_object_has_struct_page(obj)))
+		return ERR_PTR(-ENXIO);
+
+	err = mutex_lock_interruptible(&obj->mm.lock);
+	if (err)
+		return ERR_PTR(err);
+
+	pinned = !(type & I915_MAP_OVERRIDE);
+	type &= ~I915_MAP_OVERRIDE;
+
+	if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
+		if (unlikely(!i915_gem_object_has_pages(obj))) {
+			GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
+
+			err = ____i915_gem_object_get_pages(obj);
+			if (err)
+				goto err_unlock;
+
+			smp_mb__before_atomic();
+		}
+		atomic_inc(&obj->mm.pages_pin_count);
+		pinned = false;
+	}
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+
+	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
+	if (ptr && has_type != type) {
+		if (pinned) {
+			err = -EBUSY;
+			goto err_unpin;
+		}
+
+		if (is_vmalloc_addr(ptr))
+			vunmap(ptr);
+		else
+			kunmap(kmap_to_page(ptr));
+
+		ptr = obj->mm.mapping = NULL;
+	}
+
+	if (!ptr) {
+		ptr = i915_gem_object_map(obj, type);
+		if (!ptr) {
+			err = -ENOMEM;
+			goto err_unpin;
+		}
+
+		obj->mm.mapping = page_pack_bits(ptr, type);
+	}
+
+out_unlock:
+	mutex_unlock(&obj->mm.lock);
+	return ptr;
+
+err_unpin:
+	atomic_dec(&obj->mm.pages_pin_count);
+err_unlock:
+	ptr = ERR_PTR(err);
+	goto out_unlock;
+}
+
+struct scatterlist *
+i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+		       unsigned int n,
+		       unsigned int *offset)
+{
+	struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
+	struct scatterlist *sg;
+	unsigned int idx, count;
+
+	might_sleep();
+	GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+	/* As we iterate forward through the sg, we record each entry in a
+	 * radixtree for quick repeated (backwards) lookups. If we have seen
+	 * this index previously, we will have an entry for it.
+	 *
+	 * Initial lookup is O(N), but this is amortized to O(1) for
+	 * sequential page access (where each new request is consecutive
+	 * to the previous one). Repeated lookups are O(lg(obj->base.size)),
+	 * i.e. O(1) with a large constant!
+	 */
+	if (n < READ_ONCE(iter->sg_idx))
+		goto lookup;
+
+	mutex_lock(&iter->lock);
+
+	/* We prefer to reuse the last sg so that repeated lookup of this
+	 * (or the subsequent) sg are fast - comparing against the last
+	 * sg is faster than going through the radixtree.
+	 */
+
+	sg = iter->sg_pos;
+	idx = iter->sg_idx;
+	count = __sg_page_count(sg);
+
+	while (idx + count <= n) {
+		void *entry;
+		unsigned long i;
+		int ret;
+
+		/* If we cannot allocate and insert this entry, or the
+		 * individual pages from this range, cancel updating the
+		 * sg_idx so that on this lookup we are forced to linearly
+		 * scan onwards, but on future lookups we will try the
+		 * insertion again (in which case we need to be careful of
+		 * the error return reporting that we have already inserted
+		 * this index).
+		 */
+		ret = radix_tree_insert(&iter->radix, idx, sg);
+		if (ret && ret != -EEXIST)
+			goto scan;
+
+		entry = xa_mk_value(idx);
+		for (i = 1; i < count; i++) {
+			ret = radix_tree_insert(&iter->radix, idx + i, entry);
+			if (ret && ret != -EEXIST)
+				goto scan;
+		}
+
+		idx += count;
+		sg = ____sg_next(sg);
+		count = __sg_page_count(sg);
+	}
+
+scan:
+	iter->sg_pos = sg;
+	iter->sg_idx = idx;
+
+	mutex_unlock(&iter->lock);
+
+	if (unlikely(n < idx)) /* insertion completed by another thread */
+		goto lookup;
+
+	/* In case we failed to insert the entry into the radixtree, we need
+	 * to look beyond the current sg.
+	 */
+	while (idx + count <= n) {
+		idx += count;
+		sg = ____sg_next(sg);
+		count = __sg_page_count(sg);
+	}
+
+	*offset = n - idx;
+	return sg;
+
+lookup:
+	rcu_read_lock();
+
+	sg = radix_tree_lookup(&iter->radix, n);
+	GEM_BUG_ON(!sg);
+
+	/* If this index is in the middle of multi-page sg entry,
+	 * the radix tree will contain a value entry that points
+	 * to the start of that range. We will return the pointer to
+	 * the base page and the offset of this page within the
+	 * sg entry's range.
+	 */
+	*offset = 0;
+	if (unlikely(xa_is_value(sg))) {
+		unsigned long base = xa_to_value(sg);
+
+		sg = radix_tree_lookup(&iter->radix, base);
+		GEM_BUG_ON(!sg);
+
+		*offset = n - base;
+	}
+
+	rcu_read_unlock();
+
+	return sg;
+}
+
+struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
+{
+	struct scatterlist *sg;
+	unsigned int offset;
+
+	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
+
+	sg = i915_gem_object_get_sg(obj, n, &offset);
+	return nth_page(sg_page(sg), offset);
+}
+
+/* Like i915_gem_object_get_page(), but mark the returned page dirty */
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+			       unsigned int n)
+{
+	struct page *page;
+
+	page = i915_gem_object_get_page(obj, n);
+	if (!obj->mm.dirty)
+		set_page_dirty(page);
+
+	return page;
+}
+
+dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
+				unsigned long n)
+{
+	struct scatterlist *sg;
+	unsigned int offset;
+
+	sg = i915_gem_object_get_sg(obj, n, &offset);
+	return sg_dma_address(sg) + (offset << PAGE_SHIFT);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
new file mode 100644
index 000000000000..1bf3e0afcba2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -0,0 +1,212 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/highmem.h>
+#include <linux/shmem_fs.h>
+#include <linux/swap.h>
+
+#include <drm/drm.h> /* for drm_legacy.h! */
+#include <drm/drm_cache.h>
+#include <drm/drm_legacy.h> /* for drm_pci.h! */
+#include <drm/drm_pci.h>
+
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+
+static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
+{
+	struct address_space *mapping = obj->base.filp->f_mapping;
+	struct drm_dma_handle *phys;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	char *vaddr;
+	int i;
+	int err;
+
+	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
+		return -EINVAL;
+
+	/* Always aligning to the object size, allows a single allocation
+	 * to handle all possible callers, and given typical object sizes,
+	 * the alignment of the buddy allocation will naturally match.
+	 */
+	phys = drm_pci_alloc(obj->base.dev,
+			     roundup_pow_of_two(obj->base.size),
+			     roundup_pow_of_two(obj->base.size));
+	if (!phys)
+		return -ENOMEM;
+
+	vaddr = phys->vaddr;
+	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+		struct page *page;
+		char *src;
+
+		page = shmem_read_mapping_page(mapping, i);
+		if (IS_ERR(page)) {
+			err = PTR_ERR(page);
+			goto err_phys;
+		}
+
+		src = kmap_atomic(page);
+		memcpy(vaddr, src, PAGE_SIZE);
+		drm_clflush_virt_range(vaddr, PAGE_SIZE);
+		kunmap_atomic(src);
+
+		put_page(page);
+		vaddr += PAGE_SIZE;
+	}
+
+	i915_gem_chipset_flush(to_i915(obj->base.dev));
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st) {
+		err = -ENOMEM;
+		goto err_phys;
+	}
+
+	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
+		kfree(st);
+		err = -ENOMEM;
+		goto err_phys;
+	}
+
+	sg = st->sgl;
+	sg->offset = 0;
+	sg->length = obj->base.size;
+
+	sg_dma_address(sg) = phys->busaddr;
+	sg_dma_len(sg) = obj->base.size;
+
+	obj->phys_handle = phys;
+
+	__i915_gem_object_set_pages(obj, st, sg->length);
+
+	return 0;
+
+err_phys:
+	drm_pci_free(obj->base.dev, phys);
+
+	return err;
+}
+
+static void
+i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
+			       struct sg_table *pages)
+{
+	__i915_gem_object_release_shmem(obj, pages, false);
+
+	if (obj->mm.dirty) {
+		struct address_space *mapping = obj->base.filp->f_mapping;
+		char *vaddr = obj->phys_handle->vaddr;
+		int i;
+
+		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+			struct page *page;
+			char *dst;
+
+			page = shmem_read_mapping_page(mapping, i);
+			if (IS_ERR(page))
+				continue;
+
+			dst = kmap_atomic(page);
+			drm_clflush_virt_range(vaddr, PAGE_SIZE);
+			memcpy(dst, vaddr, PAGE_SIZE);
+			kunmap_atomic(dst);
+
+			set_page_dirty(page);
+			if (obj->mm.madv == I915_MADV_WILLNEED)
+				mark_page_accessed(page);
+			put_page(page);
+			vaddr += PAGE_SIZE;
+		}
+		obj->mm.dirty = false;
+	}
+
+	sg_free_table(pages);
+	kfree(pages);
+
+	drm_pci_free(obj->base.dev, obj->phys_handle);
+}
+
+static void
+i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin_pages(obj);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
+	.get_pages = i915_gem_object_get_pages_phys,
+	.put_pages = i915_gem_object_put_pages_phys,
+	.release = i915_gem_object_release_phys,
+};
+
+int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
+{
+	struct sg_table *pages;
+	int err;
+
+	if (align > obj->base.size)
+		return -EINVAL;
+
+	if (obj->ops == &i915_gem_phys_ops)
+		return 0;
+
+	if (obj->ops != &i915_gem_shmem_ops)
+		return -EINVAL;
+
+	err = i915_gem_object_unbind(obj);
+	if (err)
+		return err;
+
+	mutex_lock(&obj->mm.lock);
+
+	if (obj->mm.madv != I915_MADV_WILLNEED) {
+		err = -EFAULT;
+		goto err_unlock;
+	}
+
+	if (obj->mm.quirked) {
+		err = -EFAULT;
+		goto err_unlock;
+	}
+
+	if (obj->mm.mapping) {
+		err = -EBUSY;
+		goto err_unlock;
+	}
+
+	pages = __i915_gem_object_unset_pages(obj);
+
+	obj->ops = &i915_gem_phys_ops;
+
+	err = ____i915_gem_object_get_pages(obj);
+	if (err)
+		goto err_xfer;
+
+	/* Perma-pin (until release) the physical set of pages */
+	__i915_gem_object_pin_pages(obj);
+
+	if (!IS_ERR_OR_NULL(pages))
+		i915_gem_shmem_ops.put_pages(obj, pages);
+	mutex_unlock(&obj->mm.lock);
+	return 0;
+
+err_xfer:
+	obj->ops = &i915_gem_shmem_ops;
+	if (!IS_ERR_OR_NULL(pages)) {
+		unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
+
+		__i915_gem_object_set_pages(obj, pages, sg_page_sizes);
+	}
+err_unlock:
+	mutex_unlock(&obj->mm.lock);
+	return err;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_phys.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index bccc0944d177..9a08aba6005e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -214,6 +214,26 @@ static int shmem_get_pages_gtt(struct drm_i915_gem_object *obj)
 	return ret;
 }
 
+static void
+shmem_truncate(struct drm_i915_gem_object *obj)
+{
+	/*
+	 * Our goal here is to return as much of the memory as
+	 * is possible back to the system as we are called from OOM.
+	 * To do this we must instruct the shmfs to drop all of its
+	 * backing pages, *now*.
+	 */
+	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
+	obj->mm.madv = __I915_MADV_PURGED;
+	obj->mm.pages = ERR_PTR(-EFAULT);
+}
+
+static void
+shmem_invalidate(struct drm_i915_gem_object *obj)
+{
+	invalidate_mapping_pages(obj->base.filp->f_mapping, 0, (loff_t)-1);
+}
+
 void
 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 				struct sg_table *pages,
@@ -345,6 +365,8 @@ const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
 
 	.get_pages = shmem_get_pages_gtt,
 	.put_pages = shmem_put_pages_gtt,
+	.truncate = shmem_truncate,
+	.invalidate = shmem_invalidate,
 
 	.pwrite = shmem_pwrite_gtt,
 };
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
new file mode 100644
index 000000000000..b76b503b3999
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
@@ -0,0 +1,80 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "../../i915_selftest.h"
+
+#include "../../selftests/mock_gem_device.h"
+
+static int mock_phys_object(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	int err;
+
+	/* Create an object and bind it to a contiguous set of physical pages,
+	 * i.e. exercise the i915_gem_object_phys API.
+	 */
+
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		pr_err("i915_gem_object_create failed, err=%d\n", err);
+		goto out;
+	}
+
+	mutex_lock(&i915->drm.struct_mutex);
+	err = i915_gem_object_attach_phys(obj, PAGE_SIZE);
+	mutex_unlock(&i915->drm.struct_mutex);
+	if (err) {
+		pr_err("i915_gem_object_attach_phys failed, err=%d\n", err);
+		goto out_obj;
+	}
+
+	if (obj->ops != &i915_gem_phys_ops) {
+		pr_err("i915_gem_object_attach_phys did not create a phys object\n");
+		err = -EINVAL;
+		goto out_obj;
+	}
+
+	if (!atomic_read(&obj->mm.pages_pin_count)) {
+		pr_err("i915_gem_object_attach_phys did not pin its phys pages\n");
+		err = -EINVAL;
+		goto out_obj;
+	}
+
+	/* Make the object dirty so that put_pages must do copy back the data */
+	mutex_lock(&i915->drm.struct_mutex);
+	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	mutex_unlock(&i915->drm.struct_mutex);
+	if (err) {
+		pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n",
+		       err);
+		goto out_obj;
+	}
+
+out_obj:
+	i915_gem_object_put(obj);
+out:
+	return err;
+}
+
+int i915_gem_phys_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(mock_phys_object),
+	};
+	struct drm_i915_private *i915;
+	int err;
+
+	i915 = mock_gem_device();
+	if (!i915)
+		return -ENOMEM;
+
+	err = i915_subtests(tests, i915);
+
+	drm_dev_put(&i915->drm);
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ffbb8a10a714..2d9aaa45f950 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2886,8 +2886,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     const struct i915_ggtt_view *view,
 				     unsigned int flags);
 void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
-int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
-				int align);
 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file);
 void i915_gem_release(struct drm_device *dev, struct drm_file *file);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5816e32fe30b..69a1f3ce19f5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -26,7 +26,6 @@
  */
 
 #include <drm/drm_vma_manager.h>
-#include <drm/drm_pci.h>
 #include <drm/i915_drm.h>
 #include <linux/dma-fence-array.h>
 #include <linux/kthread.h>
@@ -194,133 +193,6 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
-{
-	struct address_space *mapping = obj->base.filp->f_mapping;
-	drm_dma_handle_t *phys;
-	struct sg_table *st;
-	struct scatterlist *sg;
-	char *vaddr;
-	int i;
-	int err;
-
-	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
-		return -EINVAL;
-
-	/* Always aligning to the object size, allows a single allocation
-	 * to handle all possible callers, and given typical object sizes,
-	 * the alignment of the buddy allocation will naturally match.
-	 */
-	phys = drm_pci_alloc(obj->base.dev,
-			     roundup_pow_of_two(obj->base.size),
-			     roundup_pow_of_two(obj->base.size));
-	if (!phys)
-		return -ENOMEM;
-
-	vaddr = phys->vaddr;
-	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
-		struct page *page;
-		char *src;
-
-		page = shmem_read_mapping_page(mapping, i);
-		if (IS_ERR(page)) {
-			err = PTR_ERR(page);
-			goto err_phys;
-		}
-
-		src = kmap_atomic(page);
-		memcpy(vaddr, src, PAGE_SIZE);
-		drm_clflush_virt_range(vaddr, PAGE_SIZE);
-		kunmap_atomic(src);
-
-		put_page(page);
-		vaddr += PAGE_SIZE;
-	}
-
-	i915_gem_chipset_flush(to_i915(obj->base.dev));
-
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
-	if (!st) {
-		err = -ENOMEM;
-		goto err_phys;
-	}
-
-	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
-		kfree(st);
-		err = -ENOMEM;
-		goto err_phys;
-	}
-
-	sg = st->sgl;
-	sg->offset = 0;
-	sg->length = obj->base.size;
-
-	sg_dma_address(sg) = phys->busaddr;
-	sg_dma_len(sg) = obj->base.size;
-
-	obj->phys_handle = phys;
-
-	__i915_gem_object_set_pages(obj, st, sg->length);
-
-	return 0;
-
-err_phys:
-	drm_pci_free(obj->base.dev, phys);
-
-	return err;
-}
-
-static void
-i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
-			       struct sg_table *pages)
-{
-	__i915_gem_object_release_shmem(obj, pages, false);
-
-	if (obj->mm.dirty) {
-		struct address_space *mapping = obj->base.filp->f_mapping;
-		char *vaddr = obj->phys_handle->vaddr;
-		int i;
-
-		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
-			struct page *page;
-			char *dst;
-
-			page = shmem_read_mapping_page(mapping, i);
-			if (IS_ERR(page))
-				continue;
-
-			dst = kmap_atomic(page);
-			drm_clflush_virt_range(vaddr, PAGE_SIZE);
-			memcpy(dst, vaddr, PAGE_SIZE);
-			kunmap_atomic(dst);
-
-			set_page_dirty(page);
-			if (obj->mm.madv == I915_MADV_WILLNEED)
-				mark_page_accessed(page);
-			put_page(page);
-			vaddr += PAGE_SIZE;
-		}
-		obj->mm.dirty = false;
-	}
-
-	sg_free_table(pages);
-	kfree(pages);
-
-	drm_pci_free(obj->base.dev, obj->phys_handle);
-}
-
-static void
-i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
-{
-	i915_gem_object_unpin_pages(obj);
-}
-
-static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
-	.get_pages = i915_gem_object_get_pages_phys,
-	.put_pages = i915_gem_object_put_pages_phys,
-	.release = i915_gem_object_release_phys,
-};
-
 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
@@ -2020,11 +1892,6 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
 	return err;
 }
 
-static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
-{
-	drm_gem_free_mmap_offset(&obj->base);
-}
-
 int
 i915_gem_mmap_gtt(struct drm_file *file,
 		  struct drm_device *dev,
@@ -2070,134 +1937,6 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
 	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
 }
 
-/* Immediately discard the backing storage */
-static void
-i915_gem_object_truncate(struct drm_i915_gem_object *obj)
-{
-	i915_gem_object_free_mmap_offset(obj);
-
-	if (obj->base.filp == NULL)
-		return;
-
-	/* Our goal here is to return as much of the memory as
-	 * is possible back to the system as we are called from OOM.
-	 * To do this we must instruct the shmfs to drop all of its
-	 * backing pages, *now*.
-	 */
-	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
-	obj->mm.madv = __I915_MADV_PURGED;
-	obj->mm.pages = ERR_PTR(-EFAULT);
-}
-
-/* Try to discard unwanted pages */
-void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
-{
-	struct address_space *mapping;
-
-	lockdep_assert_held(&obj->mm.lock);
-	GEM_BUG_ON(i915_gem_object_has_pages(obj));
-
-	switch (obj->mm.madv) {
-	case I915_MADV_DONTNEED:
-		i915_gem_object_truncate(obj);
-	case __I915_MADV_PURGED:
-		return;
-	}
-
-	if (obj->base.filp == NULL)
-		return;
-
-	mapping = obj->base.filp->f_mapping,
-	invalidate_mapping_pages(mapping, 0, (loff_t)-1);
-}
-
-static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
-{
-	struct radix_tree_iter iter;
-	void __rcu **slot;
-
-	rcu_read_lock();
-	radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
-		radix_tree_delete(&obj->mm.get_page.radix, iter.index);
-	rcu_read_unlock();
-}
-
-static struct sg_table *
-__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct sg_table *pages;
-
-	pages = fetch_and_zero(&obj->mm.pages);
-	if (IS_ERR_OR_NULL(pages))
-		return pages;
-
-	spin_lock(&i915->mm.obj_lock);
-	list_del(&obj->mm.link);
-	spin_unlock(&i915->mm.obj_lock);
-
-	if (obj->mm.mapping) {
-		void *ptr;
-
-		ptr = page_mask_bits(obj->mm.mapping);
-		if (is_vmalloc_addr(ptr))
-			vunmap(ptr);
-		else
-			kunmap(kmap_to_page(ptr));
-
-		obj->mm.mapping = NULL;
-	}
-
-	__i915_gem_object_reset_page_iter(obj);
-	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
-
-	return pages;
-}
-
-int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
-				enum i915_mm_subclass subclass)
-{
-	struct sg_table *pages;
-	int ret;
-
-	if (i915_gem_object_has_pinned_pages(obj))
-		return -EBUSY;
-
-	GEM_BUG_ON(obj->bind_count);
-
-	/* May be called by shrinker from within get_pages() (on another bo) */
-	mutex_lock_nested(&obj->mm.lock, subclass);
-	if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
-		ret = -EBUSY;
-		goto unlock;
-	}
-
-	/*
-	 * ->put_pages might need to allocate memory for the bit17 swizzle
-	 * array, hence protect them from being reaped by removing them from gtt
-	 * lists early.
-	 */
-	pages = __i915_gem_object_unset_pages(obj);
-
-	/*
-	 * XXX Temporary hijinx to avoid updating all backends to handle
-	 * NULL pages. In the future, when we have more asynchronous
-	 * get_pages backends we should be better able to handle the
-	 * cancellation of the async task in a more uniform manner.
-	 */
-	if (!pages && !i915_gem_object_needs_async_cancel(obj))
-		pages = ERR_PTR(-EINVAL);
-
-	if (!IS_ERR(pages))
-		obj->ops->put_pages(obj, pages);
-
-	ret = 0;
-unlock:
-	mutex_unlock(&obj->mm.lock);
-
-	return ret;
-}
-
 bool i915_sg_trim(struct sg_table *orig_st)
 {
 	struct sg_table new_st;
@@ -2226,217 +1965,6 @@ bool i915_sg_trim(struct sg_table *orig_st)
 	return true;
 }
 
-void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
-				 struct sg_table *pages,
-				 unsigned int sg_page_sizes)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	unsigned long supported = INTEL_INFO(i915)->page_sizes;
-	int i;
-
-	lockdep_assert_held(&obj->mm.lock);
-
-	obj->mm.get_page.sg_pos = pages->sgl;
-	obj->mm.get_page.sg_idx = 0;
-
-	obj->mm.pages = pages;
-
-	if (i915_gem_object_is_tiled(obj) &&
-	    i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
-		GEM_BUG_ON(obj->mm.quirked);
-		__i915_gem_object_pin_pages(obj);
-		obj->mm.quirked = true;
-	}
-
-	GEM_BUG_ON(!sg_page_sizes);
-	obj->mm.page_sizes.phys = sg_page_sizes;
-
-	/*
-	 * Calculate the supported page-sizes which fit into the given
-	 * sg_page_sizes. This will give us the page-sizes which we may be able
-	 * to use opportunistically when later inserting into the GTT. For
-	 * example if phys=2G, then in theory we should be able to use 1G, 2M,
-	 * 64K or 4K pages, although in practice this will depend on a number of
-	 * other factors.
-	 */
-	obj->mm.page_sizes.sg = 0;
-	for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
-		if (obj->mm.page_sizes.phys & ~0u << i)
-			obj->mm.page_sizes.sg |= BIT(i);
-	}
-	GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
-
-	spin_lock(&i915->mm.obj_lock);
-	list_add(&obj->mm.link, &i915->mm.unbound_list);
-	spin_unlock(&i915->mm.obj_lock);
-}
-
-static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
-{
-	int err;
-
-	if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
-		DRM_DEBUG("Attempting to obtain a purgeable object\n");
-		return -EFAULT;
-	}
-
-	err = obj->ops->get_pages(obj);
-	GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
-
-	return err;
-}
-
-/* Ensure that the associated pages are gathered from the backing storage
- * and pinned into our object. i915_gem_object_pin_pages() may be called
- * multiple times before they are released by a single call to
- * i915_gem_object_unpin_pages() - once the pages are no longer referenced
- * either as a result of memory pressure (reaping pages under the shrinker)
- * or as the object is itself released.
- */
-int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
-{
-	int err;
-
-	err = mutex_lock_interruptible(&obj->mm.lock);
-	if (err)
-		return err;
-
-	if (unlikely(!i915_gem_object_has_pages(obj))) {
-		GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
-
-		err = ____i915_gem_object_get_pages(obj);
-		if (err)
-			goto unlock;
-
-		smp_mb__before_atomic();
-	}
-	atomic_inc(&obj->mm.pages_pin_count);
-
-unlock:
-	mutex_unlock(&obj->mm.lock);
-	return err;
-}
-
-/* The 'mapping' part of i915_gem_object_pin_map() below */
-static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
-				 enum i915_map_type type)
-{
-	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
-	struct sg_table *sgt = obj->mm.pages;
-	struct sgt_iter sgt_iter;
-	struct page *page;
-	struct page *stack_pages[32];
-	struct page **pages = stack_pages;
-	unsigned long i = 0;
-	pgprot_t pgprot;
-	void *addr;
-
-	/* A single page can always be kmapped */
-	if (n_pages == 1 && type == I915_MAP_WB)
-		return kmap(sg_page(sgt->sgl));
-
-	if (n_pages > ARRAY_SIZE(stack_pages)) {
-		/* Too big for stack -- allocate temporary array instead */
-		pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
-		if (!pages)
-			return NULL;
-	}
-
-	for_each_sgt_page(page, sgt_iter, sgt)
-		pages[i++] = page;
-
-	/* Check that we have the expected number of pages */
-	GEM_BUG_ON(i != n_pages);
-
-	switch (type) {
-	default:
-		MISSING_CASE(type);
-		/* fallthrough to use PAGE_KERNEL anyway */
-	case I915_MAP_WB:
-		pgprot = PAGE_KERNEL;
-		break;
-	case I915_MAP_WC:
-		pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
-		break;
-	}
-	addr = vmap(pages, n_pages, 0, pgprot);
-
-	if (pages != stack_pages)
-		kvfree(pages);
-
-	return addr;
-}
-
-/* get, pin, and map the pages of the object into kernel space */
-void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
-			      enum i915_map_type type)
-{
-	enum i915_map_type has_type;
-	bool pinned;
-	void *ptr;
-	int ret;
-
-	if (unlikely(!i915_gem_object_has_struct_page(obj)))
-		return ERR_PTR(-ENXIO);
-
-	ret = mutex_lock_interruptible(&obj->mm.lock);
-	if (ret)
-		return ERR_PTR(ret);
-
-	pinned = !(type & I915_MAP_OVERRIDE);
-	type &= ~I915_MAP_OVERRIDE;
-
-	if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
-		if (unlikely(!i915_gem_object_has_pages(obj))) {
-			GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
-
-			ret = ____i915_gem_object_get_pages(obj);
-			if (ret)
-				goto err_unlock;
-
-			smp_mb__before_atomic();
-		}
-		atomic_inc(&obj->mm.pages_pin_count);
-		pinned = false;
-	}
-	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
-
-	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
-	if (ptr && has_type != type) {
-		if (pinned) {
-			ret = -EBUSY;
-			goto err_unpin;
-		}
-
-		if (is_vmalloc_addr(ptr))
-			vunmap(ptr);
-		else
-			kunmap(kmap_to_page(ptr));
-
-		ptr = obj->mm.mapping = NULL;
-	}
-
-	if (!ptr) {
-		ptr = i915_gem_object_map(obj, type);
-		if (!ptr) {
-			ret = -ENOMEM;
-			goto err_unpin;
-		}
-
-		obj->mm.mapping = page_pack_bits(ptr, type);
-	}
-
-out_unlock:
-	mutex_unlock(&obj->mm.lock);
-	return ptr;
-
-err_unpin:
-	atomic_dec(&obj->mm.pages_pin_count);
-err_unlock:
-	ptr = ERR_PTR(ret);
-	goto out_unlock;
-}
-
 static void
 i915_gem_retire_work_handler(struct work_struct *work)
 {
@@ -4449,219 +3977,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 	}
 }
 
-struct scatterlist *
-i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
-		       unsigned int n,
-		       unsigned int *offset)
-{
-	struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
-	struct scatterlist *sg;
-	unsigned int idx, count;
-
-	might_sleep();
-	GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
-	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-
-	/* As we iterate forward through the sg, we record each entry in a
-	 * radixtree for quick repeated (backwards) lookups. If we have seen
-	 * this index previously, we will have an entry for it.
-	 *
-	 * Initial lookup is O(N), but this is amortized to O(1) for
-	 * sequential page access (where each new request is consecutive
-	 * to the previous one). Repeated lookups are O(lg(obj->base.size)),
-	 * i.e. O(1) with a large constant!
-	 */
-	if (n < READ_ONCE(iter->sg_idx))
-		goto lookup;
-
-	mutex_lock(&iter->lock);
-
-	/* We prefer to reuse the last sg so that repeated lookup of this
-	 * (or the subsequent) sg are fast - comparing against the last
-	 * sg is faster than going through the radixtree.
-	 */
-
-	sg = iter->sg_pos;
-	idx = iter->sg_idx;
-	count = __sg_page_count(sg);
-
-	while (idx + count <= n) {
-		void *entry;
-		unsigned long i;
-		int ret;
-
-		/* If we cannot allocate and insert this entry, or the
-		 * individual pages from this range, cancel updating the
-		 * sg_idx so that on this lookup we are forced to linearly
-		 * scan onwards, but on future lookups we will try the
-		 * insertion again (in which case we need to be careful of
-		 * the error return reporting that we have already inserted
-		 * this index).
-		 */
-		ret = radix_tree_insert(&iter->radix, idx, sg);
-		if (ret && ret != -EEXIST)
-			goto scan;
-
-		entry = xa_mk_value(idx);
-		for (i = 1; i < count; i++) {
-			ret = radix_tree_insert(&iter->radix, idx + i, entry);
-			if (ret && ret != -EEXIST)
-				goto scan;
-		}
-
-		idx += count;
-		sg = ____sg_next(sg);
-		count = __sg_page_count(sg);
-	}
-
-scan:
-	iter->sg_pos = sg;
-	iter->sg_idx = idx;
-
-	mutex_unlock(&iter->lock);
-
-	if (unlikely(n < idx)) /* insertion completed by another thread */
-		goto lookup;
-
-	/* In case we failed to insert the entry into the radixtree, we need
-	 * to look beyond the current sg.
-	 */
-	while (idx + count <= n) {
-		idx += count;
-		sg = ____sg_next(sg);
-		count = __sg_page_count(sg);
-	}
-
-	*offset = n - idx;
-	return sg;
-
-lookup:
-	rcu_read_lock();
-
-	sg = radix_tree_lookup(&iter->radix, n);
-	GEM_BUG_ON(!sg);
-
-	/* If this index is in the middle of multi-page sg entry,
-	 * the radix tree will contain a value entry that points
-	 * to the start of that range. We will return the pointer to
-	 * the base page and the offset of this page within the
-	 * sg entry's range.
-	 */
-	*offset = 0;
-	if (unlikely(xa_is_value(sg))) {
-		unsigned long base = xa_to_value(sg);
-
-		sg = radix_tree_lookup(&iter->radix, base);
-		GEM_BUG_ON(!sg);
-
-		*offset = n - base;
-	}
-
-	rcu_read_unlock();
-
-	return sg;
-}
-
-struct page *
-i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
-{
-	struct scatterlist *sg;
-	unsigned int offset;
-
-	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
-
-	sg = i915_gem_object_get_sg(obj, n, &offset);
-	return nth_page(sg_page(sg), offset);
-}
-
-/* Like i915_gem_object_get_page(), but mark the returned page dirty */
-struct page *
-i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
-			       unsigned int n)
-{
-	struct page *page;
-
-	page = i915_gem_object_get_page(obj, n);
-	if (!obj->mm.dirty)
-		set_page_dirty(page);
-
-	return page;
-}
-
-dma_addr_t
-i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
-				unsigned long n)
-{
-	struct scatterlist *sg;
-	unsigned int offset;
-
-	sg = i915_gem_object_get_sg(obj, n, &offset);
-	return sg_dma_address(sg) + (offset << PAGE_SHIFT);
-}
-
-int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
-{
-	struct sg_table *pages;
-	int err;
-
-	if (align > obj->base.size)
-		return -EINVAL;
-
-	if (obj->ops == &i915_gem_phys_ops)
-		return 0;
-
-	if (obj->ops != &i915_gem_shmem_ops)
-		return -EINVAL;
-
-	err = i915_gem_object_unbind(obj);
-	if (err)
-		return err;
-
-	mutex_lock(&obj->mm.lock);
-
-	if (obj->mm.madv != I915_MADV_WILLNEED) {
-		err = -EFAULT;
-		goto err_unlock;
-	}
-
-	if (obj->mm.quirked) {
-		err = -EFAULT;
-		goto err_unlock;
-	}
-
-	if (obj->mm.mapping) {
-		err = -EBUSY;
-		goto err_unlock;
-	}
-
-	pages = __i915_gem_object_unset_pages(obj);
-
-	obj->ops = &i915_gem_phys_ops;
-
-	err = ____i915_gem_object_get_pages(obj);
-	if (err)
-		goto err_xfer;
-
-	/* Perma-pin (until release) the physical set of pages */
-	__i915_gem_object_pin_pages(obj);
-
-	if (!IS_ERR_OR_NULL(pages))
-		i915_gem_shmem_ops.put_pages(obj, pages);
-	mutex_unlock(&obj->mm.lock);
-	return 0;
-
-err_xfer:
-	obj->ops = &i915_gem_shmem_ops;
-	if (!IS_ERR_OR_NULL(pages)) {
-		unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
-
-		__i915_gem_object_set_pages(obj, pages, sg_page_sizes);
-	}
-err_unlock:
-	mutex_unlock(&obj->mm.lock);
-	return err;
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/scatterlist.c"
 #include "selftests/mock_gem_device.c"
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index d5bd33c867c0..ab6ae9ecf03b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -48,59 +48,6 @@ static int igt_gem_object(void *arg)
 	return err;
 }
 
-static int igt_phys_object(void *arg)
-{
-	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj;
-	int err;
-
-	/* Create an object and bind it to a contiguous set of physical pages,
-	 * i.e. exercise the i915_gem_object_phys API.
-	 */
-
-	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
-	if (IS_ERR(obj)) {
-		err = PTR_ERR(obj);
-		pr_err("i915_gem_object_create failed, err=%d\n", err);
-		goto out;
-	}
-
-	mutex_lock(&i915->drm.struct_mutex);
-	err = i915_gem_object_attach_phys(obj, PAGE_SIZE);
-	mutex_unlock(&i915->drm.struct_mutex);
-	if (err) {
-		pr_err("i915_gem_object_attach_phys failed, err=%d\n", err);
-		goto out_obj;
-	}
-
-	if (obj->ops != &i915_gem_phys_ops) {
-		pr_err("i915_gem_object_attach_phys did not create a phys object\n");
-		err = -EINVAL;
-		goto out_obj;
-	}
-
-	if (!atomic_read(&obj->mm.pages_pin_count)) {
-		pr_err("i915_gem_object_attach_phys did not pin its phys pages\n");
-		err = -EINVAL;
-		goto out_obj;
-	}
-
-	/* Make the object dirty so that put_pages must do copy back the data */
-	mutex_lock(&i915->drm.struct_mutex);
-	err = i915_gem_object_set_to_gtt_domain(obj, true);
-	mutex_unlock(&i915->drm.struct_mutex);
-	if (err) {
-		pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n",
-		       err);
-		goto out_obj;
-	}
-
-out_obj:
-	i915_gem_object_put(obj);
-out:
-	return err;
-}
-
 static int igt_gem_huge(void *arg)
 {
 	const unsigned int nreal = 509; /* just to be awkward */
@@ -632,7 +579,6 @@ int i915_gem_object_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_gem_object),
-		SUBTEST(igt_phys_object),
 	};
 	struct drm_i915_private *i915;
 	int err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 88e5ab586337..510eb176bb2c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -18,6 +18,7 @@ selftest(engine, intel_engine_cs_mock_selftests)
 selftest(timelines, i915_timeline_mock_selftests)
 selftest(requests, i915_request_mock_selftests)
 selftest(objects, i915_gem_object_mock_selftests)
+selftest(phys, i915_gem_phys_mock_selftests)
 selftest(dmabuf, i915_gem_dmabuf_mock_selftests)
 selftest(vma, i915_vma_mock_selftests)
 selftest(evict, i915_gem_evict_mock_selftests)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 34/43] drm/i915: Move mmap and friends to its own file
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (32 preceding siblings ...)
  2019-03-06 14:25 ` [PATCH 33/43] drm/i915: Move phys objects " Chris Wilson
@ 2019-03-06 14:25 ` Chris Wilson
  2019-03-07 12:42   ` Matthew Auld
  2019-03-06 14:25 ` [PATCH 35/43] drm/i915: Move GEM domain management " Chris Wilson
                   ` (11 subsequent siblings)
  45 siblings, 1 reply; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:25 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Continuing the decluttering of i915_gem.c

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_mman.c      | 514 ++++++++++++++++
 drivers/gpu/drm/i915/gem/i915_gem_object.c    |  56 ++
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |   7 +
 .../drm/i915/gem/selftests/i915_gem_mman.c    | 507 ++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h               |   1 -
 drivers/gpu/drm/i915/i915_gem.c               | 570 +-----------------
 drivers/gpu/drm/i915/i915_gem_tiling.c        |   2 +-
 .../gpu/drm/i915/selftests/i915_gem_object.c  | 489 ---------------
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 10 files changed, 1101 insertions(+), 1047 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_mman.c
 create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 8e6ef54f2497..c2804efe4e5a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -72,6 +72,7 @@ i915-$(CONFIG_DRM_I915_WERROR) += \
 # GEM code
 i915-y += \
 	  gem/i915_gem_object.o \
+	  gem/i915_gem_mman.o \
 	  gem/i915_gem_pages.o \
 	  gem/i915_gem_phys.o \
 	  gem/i915_gem_shmem.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
new file mode 100644
index 000000000000..92a2b9cd879c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -0,0 +1,514 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/mman.h>
+#include <linux/sizes.h>
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+#include "../i915_gem_gtt.h"
+#include "../i915_vma.h"
+#include "../i915_drv.h"
+#include "../intel_drv.h"
+
+static inline bool
+__vma_matches(struct vm_area_struct *vma, struct file *filp,
+	      unsigned long addr, unsigned long size)
+{
+	if (vma->vm_file != filp)
+		return false;
+
+	return vma->vm_start == addr &&
+	       (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
+}
+
+/**
+ * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
+ *			 it is mapped to.
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
+ *
+ * While the mapping holds a reference on the contents of the object, it doesn't
+ * imply a ref on the object itself.
+ *
+ * IMPORTANT:
+ *
+ * DRM driver writers who look a this function as an example for how to do GEM
+ * mmap support, please don't implement mmap support like here. The modern way
+ * to implement DRM mmap support is with an mmap offset ioctl (like
+ * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
+ * That way debug tooling like valgrind will understand what's going on, hiding
+ * the mmap call in a driver private ioctl will break that. The i915 driver only
+ * does cpu mmaps this way because we didn't know better.
+ */
+int
+i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
+		    struct drm_file *file)
+{
+	struct drm_i915_gem_mmap *args = data;
+	struct drm_i915_gem_object *obj;
+	unsigned long addr;
+
+	if (args->flags & ~(I915_MMAP_WC))
+		return -EINVAL;
+
+	if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
+		return -ENODEV;
+
+	obj = i915_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	/* prime objects have no backing filp to GEM mmap
+	 * pages from.
+	 */
+	if (!obj->base.filp) {
+		i915_gem_object_put(obj);
+		return -ENXIO;
+	}
+
+	addr = vm_mmap(obj->base.filp, 0, args->size,
+		       PROT_READ | PROT_WRITE, MAP_SHARED,
+		       args->offset);
+	if (IS_ERR_VALUE(addr))
+		goto err;
+
+	if (args->flags & I915_MMAP_WC) {
+		struct mm_struct *mm = current->mm;
+		struct vm_area_struct *vma;
+
+		if (down_write_killable(&mm->mmap_sem)) {
+			i915_gem_object_put(obj);
+			return -EINTR;
+		}
+		vma = find_vma(mm, addr);
+		if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
+			vma->vm_page_prot =
+				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+		else
+			addr = -ENOMEM;
+		up_write(&mm->mmap_sem);
+		if (IS_ERR_VALUE(addr))
+			goto err;
+
+		/* This may race, but that's ok, it only gets set */
+		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
+	}
+	i915_gem_object_put(obj);
+
+	args->addr_ptr = (u64)addr;
+
+	return 0;
+
+err:
+	i915_gem_object_put(obj);
+
+	return addr;
+}
+
+static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
+{
+	return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
+}
+
+/**
+ * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
+ *
+ * A history of the GTT mmap interface:
+ *
+ * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
+ *     aligned and suitable for fencing, and still fit into the available
+ *     mappable space left by the pinned display objects. A classic problem
+ *     we called the page-fault-of-doom where we would ping-pong between
+ *     two objects that could not fit inside the GTT and so the memcpy
+ *     would page one object in at the expense of the other between every
+ *     single byte.
+ *
+ * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
+ *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
+ *     object is too large for the available space (or simply too large
+ *     for the mappable aperture!), a view is created instead and faulted
+ *     into userspace. (This view is aligned and sized appropriately for
+ *     fenced access.)
+ *
+ * 2 - Recognise WC as a separate cache domain so that we can flush the
+ *     delayed writes via GTT before performing direct access via WC.
+ *
+ * Restrictions:
+ *
+ *  * snoopable objects cannot be accessed via the GTT. It can cause machine
+ *    hangs on some architectures, corruption on others. An attempt to service
+ *    a GTT page fault from a snoopable object will generate a SIGBUS.
+ *
+ *  * the object must be able to fit into RAM (physical memory, though no
+ *    limited to the mappable aperture).
+ *
+ *
+ * Caveats:
+ *
+ *  * a new GTT page fault will synchronize rendering from the GPU and flush
+ *    all data to system memory. Subsequent access will not be synchronized.
+ *
+ *  * all mappings are revoked on runtime device suspend.
+ *
+ *  * there are only 8, 16 or 32 fence registers to share between all users
+ *    (older machines require fence register for display and blitter access
+ *    as well). Contention of the fence registers will cause the previous users
+ *    to be unmapped and any new access will generate new page faults.
+ *
+ *  * running out of memory while servicing a fault may generate a SIGBUS,
+ *    rather than the expected SIGSEGV.
+ */
+int i915_gem_mmap_gtt_version(void)
+{
+	return 2;
+}
+
+static inline struct i915_ggtt_view
+compute_partial_view(const struct drm_i915_gem_object *obj,
+		     pgoff_t page_offset,
+		     unsigned int chunk)
+{
+	struct i915_ggtt_view view;
+
+	if (i915_gem_object_is_tiled(obj))
+		chunk = roundup(chunk, tile_row_pages(obj));
+
+	view.type = I915_GGTT_VIEW_PARTIAL;
+	view.partial.offset = rounddown(page_offset, chunk);
+	view.partial.size =
+		min_t(unsigned int, chunk,
+		      (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
+
+	/* If the partial covers the entire object, just create a normal VMA. */
+	if (chunk >= obj->base.size >> PAGE_SHIFT)
+		view.type = I915_GGTT_VIEW_NORMAL;
+
+	return view;
+}
+
+/**
+ * i915_gem_fault - fault a page into the GTT
+ * @vmf: fault info
+ *
+ * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
+ * from userspace.  The fault handler takes care of binding the object to
+ * the GTT (if needed), allocating and programming a fence register (again,
+ * only if needed based on whether the old reg is still valid or the object
+ * is tiled) and inserting a new PTE into the faulting process.
+ *
+ * Note that the faulting process may involve evicting existing objects
+ * from the GTT and/or fence registers to make room.  So performance may
+ * suffer if the GTT working set is large or there are few fence registers
+ * left.
+ *
+ * The current feature set supported by i915_gem_fault() and thus GTT mmaps
+ * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
+ */
+vm_fault_t i915_gem_fault(struct vm_fault *vmf)
+{
+#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
+	struct vm_area_struct *area = vmf->vma;
+	struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct i915_ggtt *ggtt = &i915->ggtt;
+	bool write = area->vm_flags & VM_WRITE;
+	intel_wakeref_t wakeref;
+	struct i915_vma *vma;
+	pgoff_t page_offset;
+	int srcu;
+	int ret;
+
+	/* Sanity check that we allow writing into this object */
+	if (i915_gem_object_is_readonly(obj) && write)
+		return VM_FAULT_SIGBUS;
+
+	/* We don't use vmf->pgoff since that has the fake offset */
+	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
+
+	trace_i915_gem_object_fault(obj, page_offset, true, write);
+
+	/* Try to flush the object off the GPU first without holding the lock.
+	 * Upon acquiring the lock, we will perform our sanity checks and then
+	 * repeat the flush holding the lock in the normal manner to catch cases
+	 * where we are gazumped.
+	 */
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		goto err;
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		goto err;
+
+	wakeref = intel_runtime_pm_get(i915);
+
+	srcu = i915_reset_trylock(i915);
+	if (srcu < 0) {
+		ret = srcu;
+		goto err_rpm;
+	}
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		goto err_reset;
+
+	/* Access to snoopable pages through the GTT is incoherent. */
+	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
+		ret = -EFAULT;
+		goto err_unlock;
+	}
+
+	/* Now pin it into the GTT as needed */
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+				       PIN_MAPPABLE |
+				       PIN_NONBLOCK |
+				       PIN_NONFAULT);
+	if (IS_ERR(vma)) {
+		/* Use a partial view if it is bigger than available space */
+		struct i915_ggtt_view view =
+			compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
+		unsigned int flags;
+
+		flags = PIN_MAPPABLE;
+		if (view.type == I915_GGTT_VIEW_NORMAL)
+			flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
+
+		/*
+		 * Userspace is now writing through an untracked VMA, abandon
+		 * all hope that the hardware is able to track future writes.
+		 */
+		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
+
+		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+		if (IS_ERR(vma) && !view.type) {
+			flags = PIN_MAPPABLE;
+			view.type = I915_GGTT_VIEW_PARTIAL;
+			vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+		}
+	}
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_unlock;
+	}
+
+	ret = i915_gem_object_set_to_gtt_domain(obj, write);
+	if (ret)
+		goto err_unpin;
+
+	ret = i915_vma_pin_fence(vma);
+	if (ret)
+		goto err_unpin;
+
+	/* Finally, remap it using the new GTT offset */
+	ret = remap_io_mapping(area,
+			       area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
+			       (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
+			       min_t(u64, vma->size, area->vm_end - area->vm_start),
+			       &ggtt->iomap);
+	if (ret)
+		goto err_fence;
+
+	/* Mark as being mmapped into userspace for later revocation */
+	assert_rpm_wakelock_held(i915);
+	if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
+		list_add(&obj->userfault_link, &i915->mm.userfault_list);
+	GEM_BUG_ON(!obj->userfault_count);
+
+	i915_vma_set_ggtt_write(vma);
+
+err_fence:
+	i915_vma_unpin_fence(vma);
+err_unpin:
+	__i915_vma_unpin(vma);
+err_unlock:
+	mutex_unlock(&dev->struct_mutex);
+err_reset:
+	i915_reset_unlock(i915, srcu);
+err_rpm:
+	intel_runtime_pm_put(i915, wakeref);
+	i915_gem_object_unpin_pages(obj);
+err:
+	switch (ret) {
+	case -EIO:
+		/*
+		 * We eat errors when the gpu is terminally wedged to avoid
+		 * userspace unduly crashing (gl has no provisions for mmaps to
+		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
+		 * and so needs to be reported.
+		 */
+		if (!i915_terminally_wedged(i915))
+			return VM_FAULT_SIGBUS;
+		/* else: fall through */
+	case -EAGAIN:
+		/*
+		 * EAGAIN means the gpu is hung and we'll wait for the error
+		 * handler to reset everything when re-faulting in
+		 * i915_mutex_lock_interruptible.
+		 */
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+	case -EBUSY:
+		/*
+		 * EBUSY is ok: this just means that another thread
+		 * already did the job.
+		 */
+		return VM_FAULT_NOPAGE;
+	case -ENOMEM:
+		return VM_FAULT_OOM;
+	case -ENOSPC:
+	case -EFAULT:
+		return VM_FAULT_SIGBUS;
+	default:
+		WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret);
+		return VM_FAULT_SIGBUS;
+	}
+}
+
+void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+
+	GEM_BUG_ON(!obj->userfault_count);
+
+	obj->userfault_count = 0;
+	list_del(&obj->userfault_link);
+	drm_vma_node_unmap(&obj->base.vma_node,
+			   obj->base.dev->anon_inode->i_mapping);
+
+	for_each_ggtt_vma(vma, obj)
+		i915_vma_unset_userfault(vma);
+}
+
+/**
+ * i915_gem_object_release_mmap - remove physical page mappings
+ * @obj: obj in question
+ *
+ * Preserve the reservation of the mmapping with the DRM core code, but
+ * relinquish ownership of the pages back to the system.
+ *
+ * It is vital that we remove the page mapping if we have mapped a tiled
+ * object through the GTT and then lose the fence register due to
+ * resource pressure. Similarly if the object has been moved out of the
+ * aperture, than pages mapped into userspace must be revoked. Removing the
+ * mapping will then trigger a page fault on the next user access, allowing
+ * fixup by i915_gem_fault().
+ */
+void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	intel_wakeref_t wakeref;
+
+	/* Serialisation between user GTT access and our code depends upon
+	 * revoking the CPU's PTE whilst the mutex is held. The next user
+	 * pagefault then has to wait until we release the mutex.
+	 *
+	 * Note that RPM complicates somewhat by adding an additional
+	 * requirement that operations to the GGTT be made holding the RPM
+	 * wakeref.
+	 */
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	if (!obj->userfault_count)
+		goto out;
+
+	__i915_gem_object_release_mmap(obj);
+
+	/* Ensure that the CPU's PTE are revoked and there are not outstanding
+	 * memory transactions from userspace before we return. The TLB
+	 * flushing implied above by changing the PTE above *should* be
+	 * sufficient, an extra barrier here just provides us with a bit
+	 * of paranoid documentation about our requirement to serialise
+	 * memory writes before touching registers / GSM.
+	 */
+	wmb();
+
+out:
+	intel_runtime_pm_put(i915, wakeref);
+}
+
+static int create_mmap_offset(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	int err;
+
+	err = drm_gem_create_mmap_offset(&obj->base);
+	if (likely(!err))
+		return 0;
+
+	/* Attempt to reap some mmap space from dead objects */
+	do {
+		err = i915_gem_wait_for_idle(i915,
+					     I915_WAIT_INTERRUPTIBLE,
+					     MAX_SCHEDULE_TIMEOUT);
+		if (err)
+			break;
+
+		i915_gem_drain_freed_objects(i915);
+		err = drm_gem_create_mmap_offset(&obj->base);
+		if (!err)
+			break;
+
+	} while (flush_delayed_work(&i915->gt.retire_work));
+
+	return err;
+}
+
+int
+i915_gem_mmap_gtt(struct drm_file *file,
+		  struct drm_device *dev,
+		  u32 handle,
+		  u64 *offset)
+{
+	struct drm_i915_gem_object *obj;
+	int ret;
+
+	obj = i915_gem_object_lookup(file, handle);
+	if (!obj)
+		return -ENOENT;
+
+	ret = create_mmap_offset(obj);
+	if (ret == 0)
+		*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
+
+	i915_gem_object_put(obj);
+	return ret;
+}
+
+/**
+ * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
+ * @dev: DRM device
+ * @data: GTT mapping ioctl data
+ * @file: GEM object info
+ *
+ * Simply returns the fake offset to userspace so it can mmap it.
+ * The mmap call will end up in drm_gem_mmap(), which will set things
+ * up so we can get faults in the handler above.
+ *
+ * The fault handler will take care of binding the object into the GTT
+ * (since it may have been evicted to make room for something), allocating
+ * a fence register, and mapping the appropriate aperture address into
+ * userspace.
+ */
+int
+i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct drm_i915_gem_mmap_gtt *args = data;
+
+	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_mman.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 05efce885961..4a6c5ddd1265 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -26,6 +26,7 @@
 
 #include "../i915_drv.h"
 #include "../i915_globals.h"
+#include "../i915_gem_clflush.h"
 #include "../intel_frontbuffer.h"
 
 static struct i915_global_object {
@@ -357,6 +358,61 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
 		i915_gem_object_put(obj);
 }
 
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+	return (domain == I915_GEM_DOMAIN_GTT ?
+		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
+static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+	return !(obj->cache_level == I915_CACHE_NONE ||
+		 obj->cache_level == I915_CACHE_WT);
+}
+
+void
+i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
+				   unsigned int flush_domains)
+{
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct i915_vma *vma;
+
+	if (!(obj->write_domain & flush_domains))
+		return;
+
+	switch (obj->write_domain) {
+	case I915_GEM_DOMAIN_GTT:
+		i915_gem_flush_ggtt_writes(dev_priv);
+
+		intel_fb_obj_flush(obj,
+				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+
+		for_each_ggtt_vma(vma, obj) {
+			if (vma->iomap)
+				continue;
+
+			i915_vma_unset_ggtt_write(vma);
+		}
+		break;
+
+	case I915_GEM_DOMAIN_WC:
+		wmb();
+		break;
+
+	case I915_GEM_DOMAIN_CPU:
+		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+		break;
+
+	case I915_GEM_DOMAIN_RENDER:
+		if (gpu_write_needs_clflush(obj))
+			obj->cache_dirty = true;
+		break;
+	}
+
+	obj->write_domain = 0;
+}
+
 void i915_gem_init__objects(struct drm_i915_private *i915)
 {
 	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index ebab3505e51d..20b72c16c486 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -337,6 +337,13 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
 	i915_gem_object_unpin_pages(obj);
 }
 
+void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj);
+void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj);
+
+void
+i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
+				   unsigned int flush_domains);
+
 static inline struct intel_engine_cs *
 i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
new file mode 100644
index 000000000000..a99a51a93dcb
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -0,0 +1,507 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include <linux/prime_numbers.h>
+
+#include "../../i915_selftest.h"
+#include "../../selftests/huge_gem_object.h"
+
+struct tile {
+	unsigned int width;
+	unsigned int height;
+	unsigned int stride;
+	unsigned int size;
+	unsigned int tiling;
+	unsigned int swizzle;
+};
+
+static u64 swizzle_bit(unsigned int bit, u64 offset)
+{
+	return (offset & BIT_ULL(bit)) >> (bit - 6);
+}
+
+static u64 tiled_offset(const struct tile *tile, u64 v)
+{
+	u64 x, y;
+
+	if (tile->tiling == I915_TILING_NONE)
+		return v;
+
+	y = div64_u64_rem(v, tile->stride, &x);
+	v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height;
+
+	if (tile->tiling == I915_TILING_X) {
+		v += y * tile->width;
+		v += div64_u64_rem(x, tile->width, &x) << tile->size;
+		v += x;
+	} else if (tile->width == 128) {
+		const unsigned int ytile_span = 16;
+		const unsigned int ytile_height = 512;
+
+		v += y * ytile_span;
+		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
+		v += x;
+	} else {
+		const unsigned int ytile_span = 32;
+		const unsigned int ytile_height = 256;
+
+		v += y * ytile_span;
+		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
+		v += x;
+	}
+
+	switch (tile->swizzle) {
+	case I915_BIT_6_SWIZZLE_9:
+		v ^= swizzle_bit(9, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_10:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_11:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_10_11:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v);
+		break;
+	}
+
+	return v;
+}
+
+static int check_partial_mapping(struct drm_i915_gem_object *obj,
+				 const struct tile *tile,
+				 unsigned long end_time)
+{
+	const unsigned int nreal = obj->scratch / PAGE_SIZE;
+	const unsigned long npages = obj->base.size / PAGE_SIZE;
+	struct i915_vma *vma;
+	unsigned long page;
+	int err;
+
+	if (igt_timeout(end_time,
+			"%s: timed out before tiling=%d stride=%d\n",
+			__func__, tile->tiling, tile->stride))
+		return -EINTR;
+
+	err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride);
+	if (err) {
+		pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n",
+		       tile->tiling, tile->stride, err);
+		return err;
+	}
+
+	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
+	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
+
+	for_each_prime_number_from(page, 1, npages) {
+		struct i915_ggtt_view view =
+			compute_partial_view(obj, page, MIN_CHUNK_PAGES);
+		u32 __iomem *io;
+		struct page *p;
+		unsigned int n;
+		u64 offset;
+		u32 *cpu;
+
+		GEM_BUG_ON(view.partial.size > nreal);
+		cond_resched();
+
+		err = i915_gem_object_set_to_gtt_domain(obj, true);
+		if (err) {
+			pr_err("Failed to flush to GTT write domain; err=%d\n",
+			       err);
+			return err;
+		}
+
+		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
+		if (IS_ERR(vma)) {
+			pr_err("Failed to pin partial view: offset=%lu; err=%d\n",
+			       page, (int)PTR_ERR(vma));
+			return PTR_ERR(vma);
+		}
+
+		n = page - view.partial.offset;
+		GEM_BUG_ON(n >= view.partial.size);
+
+		io = i915_vma_pin_iomap(vma);
+		i915_vma_unpin(vma);
+		if (IS_ERR(io)) {
+			pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
+			       page, (int)PTR_ERR(io));
+			return PTR_ERR(io);
+		}
+
+		iowrite32(page, io + n * PAGE_SIZE / sizeof(*io));
+		i915_vma_unpin_iomap(vma);
+
+		offset = tiled_offset(tile, page << PAGE_SHIFT);
+		if (offset >= obj->base.size)
+			continue;
+
+		i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
+		cpu = kmap(p) + offset_in_page(offset);
+		drm_clflush_virt_range(cpu, sizeof(*cpu));
+		if (*cpu != (u32)page) {
+			pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n",
+			       page, n,
+			       view.partial.offset,
+			       view.partial.size,
+			       vma->size >> PAGE_SHIFT,
+			       tile->tiling ? tile_row_pages(obj) : 0,
+			       vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride,
+			       offset >> PAGE_SHIFT,
+			       (unsigned int)offset_in_page(offset),
+			       offset,
+			       (u32)page, *cpu);
+			err = -EINVAL;
+		}
+		*cpu = 0;
+		drm_clflush_virt_range(cpu, sizeof(*cpu));
+		kunmap(p);
+		if (err)
+			return err;
+
+		i915_vma_destroy(vma);
+	}
+
+	return 0;
+}
+
+static int igt_partial_tiling(void *arg)
+{
+	const unsigned int nreal = 1 << 12; /* largest tile row x2 */
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	intel_wakeref_t wakeref;
+	int tiling;
+	int err;
+
+	/* We want to check the page mapping and fencing of a large object
+	 * mmapped through the GTT. The object we create is larger than can
+	 * possibly be mmaped as a whole, and so we must use partial GGTT vma.
+	 * We then check that a write through each partial GGTT vma ends up
+	 * in the right set of pages within the object, and with the expected
+	 * tiling, which we verify by manual swizzling.
+	 */
+
+	obj = huge_gem_object(i915,
+			      nreal << PAGE_SHIFT,
+			      (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	err = i915_gem_object_pin_pages(obj);
+	if (err) {
+		pr_err("Failed to allocate %u pages (%lu total), err=%d\n",
+		       nreal, obj->base.size / PAGE_SIZE, err);
+		goto out;
+	}
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	if (1) {
+		IGT_TIMEOUT(end);
+		struct tile tile;
+
+		tile.height = 1;
+		tile.width = 1;
+		tile.size = 0;
+		tile.stride = 0;
+		tile.swizzle = I915_BIT_6_SWIZZLE_NONE;
+		tile.tiling = I915_TILING_NONE;
+
+		err = check_partial_mapping(obj, &tile, end);
+		if (err && err != -EINTR)
+			goto out_unlock;
+	}
+
+	for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) {
+		IGT_TIMEOUT(end);
+		unsigned int max_pitch;
+		unsigned int pitch;
+		struct tile tile;
+
+		if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
+			/*
+			 * The swizzling pattern is actually unknown as it
+			 * varies based on physical address of each page.
+			 * See i915_gem_detect_bit_6_swizzle().
+			 */
+			break;
+
+		tile.tiling = tiling;
+		switch (tiling) {
+		case I915_TILING_X:
+			tile.swizzle = i915->mm.bit_6_swizzle_x;
+			break;
+		case I915_TILING_Y:
+			tile.swizzle = i915->mm.bit_6_swizzle_y;
+			break;
+		}
+
+		GEM_BUG_ON(tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN);
+		if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 ||
+		    tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17)
+			continue;
+
+		if (INTEL_GEN(i915) <= 2) {
+			tile.height = 16;
+			tile.width = 128;
+			tile.size = 11;
+		} else if (tile.tiling == I915_TILING_Y &&
+			   HAS_128_BYTE_Y_TILING(i915)) {
+			tile.height = 32;
+			tile.width = 128;
+			tile.size = 12;
+		} else {
+			tile.height = 8;
+			tile.width = 512;
+			tile.size = 12;
+		}
+
+		if (INTEL_GEN(i915) < 4)
+			max_pitch = 8192 / tile.width;
+		else if (INTEL_GEN(i915) < 7)
+			max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width;
+		else
+			max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width;
+
+		for (pitch = max_pitch; pitch; pitch >>= 1) {
+			tile.stride = tile.width * pitch;
+			err = check_partial_mapping(obj, &tile, end);
+			if (err == -EINTR)
+				goto next_tiling;
+			if (err)
+				goto out_unlock;
+
+			if (pitch > 2 && INTEL_GEN(i915) >= 4) {
+				tile.stride = tile.width * (pitch - 1);
+				err = check_partial_mapping(obj, &tile, end);
+				if (err == -EINTR)
+					goto next_tiling;
+				if (err)
+					goto out_unlock;
+			}
+
+			if (pitch < max_pitch && INTEL_GEN(i915) >= 4) {
+				tile.stride = tile.width * (pitch + 1);
+				err = check_partial_mapping(obj, &tile, end);
+				if (err == -EINTR)
+					goto next_tiling;
+				if (err)
+					goto out_unlock;
+			}
+		}
+
+		if (INTEL_GEN(i915) >= 4) {
+			for_each_prime_number(pitch, max_pitch) {
+				tile.stride = tile.width * pitch;
+				err = check_partial_mapping(obj, &tile, end);
+				if (err == -EINTR)
+					goto next_tiling;
+				if (err)
+					goto out_unlock;
+			}
+		}
+
+next_tiling: ;
+	}
+
+out_unlock:
+	intel_runtime_pm_put(i915, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+	i915_gem_object_unpin_pages(obj);
+out:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+static int make_obj_busy(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	int err;
+
+	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		return err;
+
+	rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context);
+	if (IS_ERR(rq)) {
+		i915_vma_unpin(vma);
+		return PTR_ERR(rq);
+	}
+
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+
+	i915_request_add(rq);
+
+	__i915_gem_object_release_unless_active(obj);
+	i915_vma_unpin(vma);
+
+	return err;
+}
+
+static bool assert_mmap_offset(struct drm_i915_private *i915,
+			       unsigned long size,
+			       int expected)
+{
+	struct drm_i915_gem_object *obj;
+	int err;
+
+	obj = i915_gem_object_create_internal(i915, size);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	err = create_mmap_offset(obj);
+	i915_gem_object_put(obj);
+
+	return err == expected;
+}
+
+static void disable_retire_worker(struct drm_i915_private *i915)
+{
+	i915_gem_shrinker_unregister(i915);
+
+	mutex_lock(&i915->drm.struct_mutex);
+	if (!i915->gt.active_requests++) {
+		intel_wakeref_t wakeref;
+
+		with_intel_runtime_pm(i915, wakeref)
+			i915_gem_unpark(i915);
+	}
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	cancel_delayed_work_sync(&i915->gt.retire_work);
+	cancel_delayed_work_sync(&i915->gt.idle_work);
+}
+
+static int igt_mmap_offset_exhaustion(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm;
+	struct drm_i915_gem_object *obj;
+	struct drm_mm_node resv, *hole;
+	u64 hole_start, hole_end;
+	int loop, err;
+
+	/* Disable background reaper */
+	disable_retire_worker(i915);
+	GEM_BUG_ON(!i915->gt.awake);
+
+	/* Trim the device mmap space to only a page */
+	memset(&resv, 0, sizeof(resv));
+	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
+		resv.start = hole_start;
+		resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */
+		err = drm_mm_reserve_node(mm, &resv);
+		if (err) {
+			pr_err("Failed to trim VMA manager, err=%d\n", err);
+			goto out_park;
+		}
+		break;
+	}
+
+	/* Just fits! */
+	if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) {
+		pr_err("Unable to insert object into single page hole\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Too large */
+	if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, -ENOSPC)) {
+		pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Fill the hole, further allocation attempts should then fail */
+	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		goto out;
+	}
+
+	err = create_mmap_offset(obj);
+	if (err) {
+		pr_err("Unable to insert object into reclaimed hole\n");
+		goto err_obj;
+	}
+
+	if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) {
+		pr_err("Unexpectedly succeeded in inserting object into no holes!\n");
+		err = -EINVAL;
+		goto err_obj;
+	}
+
+	i915_gem_object_put(obj);
+
+	/* Now fill with busy dead objects that we expect to reap */
+	for (loop = 0; loop < 3; loop++) {
+		intel_wakeref_t wakeref;
+
+		if (i915_terminally_wedged(i915))
+			break;
+
+		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+		if (IS_ERR(obj)) {
+			err = PTR_ERR(obj);
+			goto out;
+		}
+
+		err = 0;
+		mutex_lock(&i915->drm.struct_mutex);
+		with_intel_runtime_pm(i915, wakeref)
+			err = make_obj_busy(obj);
+		mutex_unlock(&i915->drm.struct_mutex);
+		if (err) {
+			pr_err("[loop %d] Failed to busy the object\n", loop);
+			goto err_obj;
+		}
+
+		/* NB we rely on the _active_ reference to access obj now */
+		GEM_BUG_ON(!i915_gem_object_is_active(obj));
+		err = create_mmap_offset(obj);
+		if (err) {
+			pr_err("[loop %d] create_mmap_offset failed with err=%d\n",
+			       loop, err);
+			goto out;
+		}
+	}
+
+out:
+	drm_mm_remove_node(&resv);
+out_park:
+	mutex_lock(&i915->drm.struct_mutex);
+	if (--i915->gt.active_requests)
+		queue_delayed_work(i915->wq, &i915->gt.retire_work, 0);
+	else
+		queue_delayed_work(i915->wq, &i915->gt.idle_work, 0);
+	mutex_unlock(&i915->drm.struct_mutex);
+	i915_gem_shrinker_register(i915);
+	return err;
+err_obj:
+	i915_gem_object_put(obj);
+	goto out;
+}
+
+int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_partial_tiling),
+		SUBTEST(igt_mmap_offset_exhaustion),
+	};
+
+	return i915_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2d9aaa45f950..095e0da448d3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2788,7 +2788,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 u64 flags);
 
 int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
-void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
 
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 69a1f3ce19f5..b78dc34efbaf 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -474,12 +474,6 @@ i915_gem_dumb_create(struct drm_file *file,
 			       args->size, &args->handle);
 }
 
-static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
-{
-	return !(obj->cache_level == I915_CACHE_NONE ||
-		 obj->cache_level == I915_CACHE_WT);
-}
-
 /**
  * Creates a new mm object and returns a handle to it.
  * @dev: drm device pointer
@@ -499,13 +493,6 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
 			       args->size, &args->handle);
 }
 
-static inline enum fb_op_origin
-fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
-{
-	return (domain == I915_GEM_DOMAIN_GTT ?
-		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 {
 	intel_wakeref_t wakeref;
@@ -545,47 +532,6 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 	}
 }
 
-static void
-flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
-{
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	struct i915_vma *vma;
-
-	if (!(obj->write_domain & flush_domains))
-		return;
-
-	switch (obj->write_domain) {
-	case I915_GEM_DOMAIN_GTT:
-		i915_gem_flush_ggtt_writes(dev_priv);
-
-		intel_fb_obj_flush(obj,
-				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
-
-		for_each_ggtt_vma(vma, obj) {
-			if (vma->iomap)
-				continue;
-
-			i915_vma_unset_ggtt_write(vma);
-		}
-		break;
-
-	case I915_GEM_DOMAIN_WC:
-		wmb();
-		break;
-
-	case I915_GEM_DOMAIN_CPU:
-		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
-		break;
-
-	case I915_GEM_DOMAIN_RENDER:
-		if (gpu_write_needs_clflush(obj))
-			obj->cache_dirty = true;
-		break;
-	}
-
-	obj->write_domain = 0;
-}
-
 /*
  * Pins the specified object's pages and synchronizes the object with
  * GPU accesses. Sets needs_clflush to non-zero if the caller should
@@ -622,7 +568,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 			goto out;
 	}
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* If we're not in the cpu read domain, set ourself into the gtt
 	 * read domain and manually flush cachelines (if required). This
@@ -674,7 +620,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 			goto out;
 	}
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* If we're not in the cpu write domain, set ourself into the
 	 * gtt write domain and manually flush cachelines (as required).
@@ -1277,6 +1223,13 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 	spin_unlock(&i915->mm.obj_lock);
 }
 
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+	return (domain == I915_GEM_DOMAIN_GTT ?
+		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
 /**
  * Called when user space prepares to use an object with the CPU, either
  * through the mmap ioctl's mapping or a GTT mapping.
@@ -1400,429 +1353,6 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static inline bool
-__vma_matches(struct vm_area_struct *vma, struct file *filp,
-	      unsigned long addr, unsigned long size)
-{
-	if (vma->vm_file != filp)
-		return false;
-
-	return vma->vm_start == addr &&
-	       (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
-}
-
-/**
- * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
- *			 it is mapped to.
- * @dev: drm device
- * @data: ioctl data blob
- * @file: drm file
- *
- * While the mapping holds a reference on the contents of the object, it doesn't
- * imply a ref on the object itself.
- *
- * IMPORTANT:
- *
- * DRM driver writers who look a this function as an example for how to do GEM
- * mmap support, please don't implement mmap support like here. The modern way
- * to implement DRM mmap support is with an mmap offset ioctl (like
- * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
- * That way debug tooling like valgrind will understand what's going on, hiding
- * the mmap call in a driver private ioctl will break that. The i915 driver only
- * does cpu mmaps this way because we didn't know better.
- */
-int
-i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
-		    struct drm_file *file)
-{
-	struct drm_i915_gem_mmap *args = data;
-	struct drm_i915_gem_object *obj;
-	unsigned long addr;
-
-	if (args->flags & ~(I915_MMAP_WC))
-		return -EINVAL;
-
-	if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
-		return -ENODEV;
-
-	obj = i915_gem_object_lookup(file, args->handle);
-	if (!obj)
-		return -ENOENT;
-
-	/* prime objects have no backing filp to GEM mmap
-	 * pages from.
-	 */
-	if (!obj->base.filp) {
-		i915_gem_object_put(obj);
-		return -ENXIO;
-	}
-
-	addr = vm_mmap(obj->base.filp, 0, args->size,
-		       PROT_READ | PROT_WRITE, MAP_SHARED,
-		       args->offset);
-	if (IS_ERR_VALUE(addr))
-		goto err;
-
-	if (args->flags & I915_MMAP_WC) {
-		struct mm_struct *mm = current->mm;
-		struct vm_area_struct *vma;
-
-		if (down_write_killable(&mm->mmap_sem)) {
-			i915_gem_object_put(obj);
-			return -EINTR;
-		}
-		vma = find_vma(mm, addr);
-		if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
-			vma->vm_page_prot =
-				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
-		else
-			addr = -ENOMEM;
-		up_write(&mm->mmap_sem);
-		if (IS_ERR_VALUE(addr))
-			goto err;
-
-		/* This may race, but that's ok, it only gets set */
-		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
-	}
-	i915_gem_object_put(obj);
-
-	args->addr_ptr = (u64)addr;
-
-	return 0;
-
-err:
-	i915_gem_object_put(obj);
-
-	return addr;
-}
-
-static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
-{
-	return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
-}
-
-/**
- * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
- *
- * A history of the GTT mmap interface:
- *
- * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
- *     aligned and suitable for fencing, and still fit into the available
- *     mappable space left by the pinned display objects. A classic problem
- *     we called the page-fault-of-doom where we would ping-pong between
- *     two objects that could not fit inside the GTT and so the memcpy
- *     would page one object in at the expense of the other between every
- *     single byte.
- *
- * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
- *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
- *     object is too large for the available space (or simply too large
- *     for the mappable aperture!), a view is created instead and faulted
- *     into userspace. (This view is aligned and sized appropriately for
- *     fenced access.)
- *
- * 2 - Recognise WC as a separate cache domain so that we can flush the
- *     delayed writes via GTT before performing direct access via WC.
- *
- * Restrictions:
- *
- *  * snoopable objects cannot be accessed via the GTT. It can cause machine
- *    hangs on some architectures, corruption on others. An attempt to service
- *    a GTT page fault from a snoopable object will generate a SIGBUS.
- *
- *  * the object must be able to fit into RAM (physical memory, though no
- *    limited to the mappable aperture).
- *
- *
- * Caveats:
- *
- *  * a new GTT page fault will synchronize rendering from the GPU and flush
- *    all data to system memory. Subsequent access will not be synchronized.
- *
- *  * all mappings are revoked on runtime device suspend.
- *
- *  * there are only 8, 16 or 32 fence registers to share between all users
- *    (older machines require fence register for display and blitter access
- *    as well). Contention of the fence registers will cause the previous users
- *    to be unmapped and any new access will generate new page faults.
- *
- *  * running out of memory while servicing a fault may generate a SIGBUS,
- *    rather than the expected SIGSEGV.
- */
-int i915_gem_mmap_gtt_version(void)
-{
-	return 2;
-}
-
-static inline struct i915_ggtt_view
-compute_partial_view(const struct drm_i915_gem_object *obj,
-		     pgoff_t page_offset,
-		     unsigned int chunk)
-{
-	struct i915_ggtt_view view;
-
-	if (i915_gem_object_is_tiled(obj))
-		chunk = roundup(chunk, tile_row_pages(obj));
-
-	view.type = I915_GGTT_VIEW_PARTIAL;
-	view.partial.offset = rounddown(page_offset, chunk);
-	view.partial.size =
-		min_t(unsigned int, chunk,
-		      (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
-
-	/* If the partial covers the entire object, just create a normal VMA. */
-	if (chunk >= obj->base.size >> PAGE_SHIFT)
-		view.type = I915_GGTT_VIEW_NORMAL;
-
-	return view;
-}
-
-/**
- * i915_gem_fault - fault a page into the GTT
- * @vmf: fault info
- *
- * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
- * from userspace.  The fault handler takes care of binding the object to
- * the GTT (if needed), allocating and programming a fence register (again,
- * only if needed based on whether the old reg is still valid or the object
- * is tiled) and inserting a new PTE into the faulting process.
- *
- * Note that the faulting process may involve evicting existing objects
- * from the GTT and/or fence registers to make room.  So performance may
- * suffer if the GTT working set is large or there are few fence registers
- * left.
- *
- * The current feature set supported by i915_gem_fault() and thus GTT mmaps
- * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
- */
-vm_fault_t i915_gem_fault(struct vm_fault *vmf)
-{
-#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
-	struct vm_area_struct *area = vmf->vma;
-	struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	bool write = area->vm_flags & VM_WRITE;
-	intel_wakeref_t wakeref;
-	struct i915_vma *vma;
-	pgoff_t page_offset;
-	int srcu;
-	int ret;
-
-	/* Sanity check that we allow writing into this object */
-	if (i915_gem_object_is_readonly(obj) && write)
-		return VM_FAULT_SIGBUS;
-
-	/* We don't use vmf->pgoff since that has the fake offset */
-	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
-
-	trace_i915_gem_object_fault(obj, page_offset, true, write);
-
-	/* Try to flush the object off the GPU first without holding the lock.
-	 * Upon acquiring the lock, we will perform our sanity checks and then
-	 * repeat the flush holding the lock in the normal manner to catch cases
-	 * where we are gazumped.
-	 */
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE,
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		goto err;
-
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		goto err;
-
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	srcu = i915_reset_trylock(dev_priv);
-	if (srcu < 0) {
-		ret = srcu;
-		goto err_rpm;
-	}
-
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		goto err_reset;
-
-	/* Access to snoopable pages through the GTT is incoherent. */
-	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
-		ret = -EFAULT;
-		goto err_unlock;
-	}
-
-	/* Now pin it into the GTT as needed */
-	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
-				       PIN_MAPPABLE |
-				       PIN_NONBLOCK |
-				       PIN_NONFAULT);
-	if (IS_ERR(vma)) {
-		/* Use a partial view if it is bigger than available space */
-		struct i915_ggtt_view view =
-			compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
-		unsigned int flags;
-
-		flags = PIN_MAPPABLE;
-		if (view.type == I915_GGTT_VIEW_NORMAL)
-			flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
-
-		/*
-		 * Userspace is now writing through an untracked VMA, abandon
-		 * all hope that the hardware is able to track future writes.
-		 */
-		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
-
-		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
-		if (IS_ERR(vma) && !view.type) {
-			flags = PIN_MAPPABLE;
-			view.type = I915_GGTT_VIEW_PARTIAL;
-			vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
-		}
-	}
-	if (IS_ERR(vma)) {
-		ret = PTR_ERR(vma);
-		goto err_unlock;
-	}
-
-	ret = i915_gem_object_set_to_gtt_domain(obj, write);
-	if (ret)
-		goto err_unpin;
-
-	ret = i915_vma_pin_fence(vma);
-	if (ret)
-		goto err_unpin;
-
-	/* Finally, remap it using the new GTT offset */
-	ret = remap_io_mapping(area,
-			       area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
-			       (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
-			       min_t(u64, vma->size, area->vm_end - area->vm_start),
-			       &ggtt->iomap);
-	if (ret)
-		goto err_fence;
-
-	/* Mark as being mmapped into userspace for later revocation */
-	assert_rpm_wakelock_held(dev_priv);
-	if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
-		list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
-	GEM_BUG_ON(!obj->userfault_count);
-
-	i915_vma_set_ggtt_write(vma);
-
-err_fence:
-	i915_vma_unpin_fence(vma);
-err_unpin:
-	__i915_vma_unpin(vma);
-err_unlock:
-	mutex_unlock(&dev->struct_mutex);
-err_reset:
-	i915_reset_unlock(dev_priv, srcu);
-err_rpm:
-	intel_runtime_pm_put(dev_priv, wakeref);
-	i915_gem_object_unpin_pages(obj);
-err:
-	switch (ret) {
-	case -EIO:
-		/*
-		 * We eat errors when the gpu is terminally wedged to avoid
-		 * userspace unduly crashing (gl has no provisions for mmaps to
-		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
-		 * and so needs to be reported.
-		 */
-		if (!i915_terminally_wedged(dev_priv))
-			return VM_FAULT_SIGBUS;
-		/* else: fall through */
-	case -EAGAIN:
-		/*
-		 * EAGAIN means the gpu is hung and we'll wait for the error
-		 * handler to reset everything when re-faulting in
-		 * i915_mutex_lock_interruptible.
-		 */
-	case 0:
-	case -ERESTARTSYS:
-	case -EINTR:
-	case -EBUSY:
-		/*
-		 * EBUSY is ok: this just means that another thread
-		 * already did the job.
-		 */
-		return VM_FAULT_NOPAGE;
-	case -ENOMEM:
-		return VM_FAULT_OOM;
-	case -ENOSPC:
-	case -EFAULT:
-		return VM_FAULT_SIGBUS;
-	default:
-		WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
-		return VM_FAULT_SIGBUS;
-	}
-}
-
-static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
-{
-	struct i915_vma *vma;
-
-	GEM_BUG_ON(!obj->userfault_count);
-
-	obj->userfault_count = 0;
-	list_del(&obj->userfault_link);
-	drm_vma_node_unmap(&obj->base.vma_node,
-			   obj->base.dev->anon_inode->i_mapping);
-
-	for_each_ggtt_vma(vma, obj)
-		i915_vma_unset_userfault(vma);
-}
-
-/**
- * i915_gem_release_mmap - remove physical page mappings
- * @obj: obj in question
- *
- * Preserve the reservation of the mmapping with the DRM core code, but
- * relinquish ownership of the pages back to the system.
- *
- * It is vital that we remove the page mapping if we have mapped a tiled
- * object through the GTT and then lose the fence register due to
- * resource pressure. Similarly if the object has been moved out of the
- * aperture, than pages mapped into userspace must be revoked. Removing the
- * mapping will then trigger a page fault on the next user access, allowing
- * fixup by i915_gem_fault().
- */
-void
-i915_gem_release_mmap(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	intel_wakeref_t wakeref;
-
-	/* Serialisation between user GTT access and our code depends upon
-	 * revoking the CPU's PTE whilst the mutex is held. The next user
-	 * pagefault then has to wait until we release the mutex.
-	 *
-	 * Note that RPM complicates somewhat by adding an additional
-	 * requirement that operations to the GGTT be made holding the RPM
-	 * wakeref.
-	 */
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
-
-	if (!obj->userfault_count)
-		goto out;
-
-	__i915_gem_object_release_mmap(obj);
-
-	/* Ensure that the CPU's PTE are revoked and there are not outstanding
-	 * memory transactions from userspace before we return. The TLB
-	 * flushing implied above by changing the PTE above *should* be
-	 * sufficient, an extra barrier here just provides us with a bit
-	 * of paranoid documentation about our requirement to serialise
-	 * memory writes before touching registers / GSM.
-	 */
-	wmb();
-
-out:
-	intel_runtime_pm_put(i915, wakeref);
-}
-
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_gem_object *obj, *on;
@@ -1865,78 +1395,6 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 	}
 }
 
-static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	int err;
-
-	err = drm_gem_create_mmap_offset(&obj->base);
-	if (likely(!err))
-		return 0;
-
-	/* Attempt to reap some mmap space from dead objects */
-	do {
-		err = i915_gem_wait_for_idle(dev_priv,
-					     I915_WAIT_INTERRUPTIBLE,
-					     MAX_SCHEDULE_TIMEOUT);
-		if (err)
-			break;
-
-		i915_gem_drain_freed_objects(dev_priv);
-		err = drm_gem_create_mmap_offset(&obj->base);
-		if (!err)
-			break;
-
-	} while (flush_delayed_work(&dev_priv->gt.retire_work));
-
-	return err;
-}
-
-int
-i915_gem_mmap_gtt(struct drm_file *file,
-		  struct drm_device *dev,
-		  u32 handle,
-		  u64 *offset)
-{
-	struct drm_i915_gem_object *obj;
-	int ret;
-
-	obj = i915_gem_object_lookup(file, handle);
-	if (!obj)
-		return -ENOENT;
-
-	ret = i915_gem_object_create_mmap_offset(obj);
-	if (ret == 0)
-		*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
-
-	i915_gem_object_put(obj);
-	return ret;
-}
-
-/**
- * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
- * @dev: DRM device
- * @data: GTT mapping ioctl data
- * @file: GEM object info
- *
- * Simply returns the fake offset to userspace so it can mmap it.
- * The mmap call will end up in drm_gem_mmap(), which will set things
- * up so we can get faults in the handler above.
- *
- * The fault handler will take care of binding the object into the GTT
- * (since it may have been evicted to make room for something), allocating
- * a fence register, and mapping the appropriate aperture address into
- * userspace.
- */
-int
-i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file)
-{
-	struct drm_i915_gem_mmap_gtt *args = data;
-
-	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
-}
-
 bool i915_sg_trim(struct sg_table *orig_st)
 {
 	struct sg_table new_st;
@@ -2256,7 +1714,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
 	 * We manually flush the CPU domain so that we can override and
 	 * force the flush for the display, and perform it asyncrhonously.
 	 */
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 	if (obj->cache_dirty)
 		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
 	obj->write_domain = 0;
@@ -2310,7 +1768,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
 
 	/* Serialise direct access to this object with the barriers for
 	 * coherent writes from the GPU, by effectively invalidating the
@@ -2372,7 +1830,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
 
 	/* Serialise direct access to this object with the barriers for
 	 * coherent writes from the GPU, by effectively invalidating the
@@ -2481,7 +1939,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 			 * then double check if the GTT mapping is still
 			 * valid for that pointer access.
 			 */
-			i915_gem_release_mmap(obj);
+			i915_gem_object_release_mmap(obj);
 
 			/* As we no longer need a fence for GTT access,
 			 * we can relinquish it now (and so prevent having
@@ -2736,7 +2194,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* Flush the CPU cache if it's still invalid. */
 	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 8675a6e24788..8ce3a30fb0a1 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -299,7 +299,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
 	i915_gem_object_unlock(obj);
 
 	/* Force the fence to be reacquired for GTT access */
-	i915_gem_release_mmap(obj);
+	i915_gem_object_release_mmap(obj);
 
 	/* Try to preallocate memory required to save swizzling on put-pages */
 	if (i915_gem_object_needs_bit17_swizzle(obj)) {
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index ab6ae9ecf03b..e4b2ee4c54e3 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -88,493 +88,6 @@ static int igt_gem_huge(void *arg)
 	return err;
 }
 
-struct tile {
-	unsigned int width;
-	unsigned int height;
-	unsigned int stride;
-	unsigned int size;
-	unsigned int tiling;
-	unsigned int swizzle;
-};
-
-static u64 swizzle_bit(unsigned int bit, u64 offset)
-{
-	return (offset & BIT_ULL(bit)) >> (bit - 6);
-}
-
-static u64 tiled_offset(const struct tile *tile, u64 v)
-{
-	u64 x, y;
-
-	if (tile->tiling == I915_TILING_NONE)
-		return v;
-
-	y = div64_u64_rem(v, tile->stride, &x);
-	v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height;
-
-	if (tile->tiling == I915_TILING_X) {
-		v += y * tile->width;
-		v += div64_u64_rem(x, tile->width, &x) << tile->size;
-		v += x;
-	} else if (tile->width == 128) {
-		const unsigned int ytile_span = 16;
-		const unsigned int ytile_height = 512;
-
-		v += y * ytile_span;
-		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
-		v += x;
-	} else {
-		const unsigned int ytile_span = 32;
-		const unsigned int ytile_height = 256;
-
-		v += y * ytile_span;
-		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
-		v += x;
-	}
-
-	switch (tile->swizzle) {
-	case I915_BIT_6_SWIZZLE_9:
-		v ^= swizzle_bit(9, v);
-		break;
-	case I915_BIT_6_SWIZZLE_9_10:
-		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v);
-		break;
-	case I915_BIT_6_SWIZZLE_9_11:
-		v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v);
-		break;
-	case I915_BIT_6_SWIZZLE_9_10_11:
-		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v);
-		break;
-	}
-
-	return v;
-}
-
-static int check_partial_mapping(struct drm_i915_gem_object *obj,
-				 const struct tile *tile,
-				 unsigned long end_time)
-{
-	const unsigned int nreal = obj->scratch / PAGE_SIZE;
-	const unsigned long npages = obj->base.size / PAGE_SIZE;
-	struct i915_vma *vma;
-	unsigned long page;
-	int err;
-
-	if (igt_timeout(end_time,
-			"%s: timed out before tiling=%d stride=%d\n",
-			__func__, tile->tiling, tile->stride))
-		return -EINTR;
-
-	err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride);
-	if (err) {
-		pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n",
-		       tile->tiling, tile->stride, err);
-		return err;
-	}
-
-	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
-	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
-
-	for_each_prime_number_from(page, 1, npages) {
-		struct i915_ggtt_view view =
-			compute_partial_view(obj, page, MIN_CHUNK_PAGES);
-		u32 __iomem *io;
-		struct page *p;
-		unsigned int n;
-		u64 offset;
-		u32 *cpu;
-
-		GEM_BUG_ON(view.partial.size > nreal);
-		cond_resched();
-
-		err = i915_gem_object_set_to_gtt_domain(obj, true);
-		if (err) {
-			pr_err("Failed to flush to GTT write domain; err=%d\n",
-			       err);
-			return err;
-		}
-
-		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
-		if (IS_ERR(vma)) {
-			pr_err("Failed to pin partial view: offset=%lu; err=%d\n",
-			       page, (int)PTR_ERR(vma));
-			return PTR_ERR(vma);
-		}
-
-		n = page - view.partial.offset;
-		GEM_BUG_ON(n >= view.partial.size);
-
-		io = i915_vma_pin_iomap(vma);
-		i915_vma_unpin(vma);
-		if (IS_ERR(io)) {
-			pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
-			       page, (int)PTR_ERR(io));
-			return PTR_ERR(io);
-		}
-
-		iowrite32(page, io + n * PAGE_SIZE/sizeof(*io));
-		i915_vma_unpin_iomap(vma);
-
-		offset = tiled_offset(tile, page << PAGE_SHIFT);
-		if (offset >= obj->base.size)
-			continue;
-
-		flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
-		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
-		cpu = kmap(p) + offset_in_page(offset);
-		drm_clflush_virt_range(cpu, sizeof(*cpu));
-		if (*cpu != (u32)page) {
-			pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n",
-			       page, n,
-			       view.partial.offset,
-			       view.partial.size,
-			       vma->size >> PAGE_SHIFT,
-			       tile->tiling ? tile_row_pages(obj) : 0,
-			       vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride,
-			       offset >> PAGE_SHIFT,
-			       (unsigned int)offset_in_page(offset),
-			       offset,
-			       (u32)page, *cpu);
-			err = -EINVAL;
-		}
-		*cpu = 0;
-		drm_clflush_virt_range(cpu, sizeof(*cpu));
-		kunmap(p);
-		if (err)
-			return err;
-
-		i915_vma_destroy(vma);
-	}
-
-	return 0;
-}
-
-static int igt_partial_tiling(void *arg)
-{
-	const unsigned int nreal = 1 << 12; /* largest tile row x2 */
-	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj;
-	intel_wakeref_t wakeref;
-	int tiling;
-	int err;
-
-	/* We want to check the page mapping and fencing of a large object
-	 * mmapped through the GTT. The object we create is larger than can
-	 * possibly be mmaped as a whole, and so we must use partial GGTT vma.
-	 * We then check that a write through each partial GGTT vma ends up
-	 * in the right set of pages within the object, and with the expected
-	 * tiling, which we verify by manual swizzling.
-	 */
-
-	obj = huge_gem_object(i915,
-			      nreal << PAGE_SHIFT,
-			      (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
-
-	err = i915_gem_object_pin_pages(obj);
-	if (err) {
-		pr_err("Failed to allocate %u pages (%lu total), err=%d\n",
-		       nreal, obj->base.size / PAGE_SIZE, err);
-		goto out;
-	}
-
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
-
-	if (1) {
-		IGT_TIMEOUT(end);
-		struct tile tile;
-
-		tile.height = 1;
-		tile.width = 1;
-		tile.size = 0;
-		tile.stride = 0;
-		tile.swizzle = I915_BIT_6_SWIZZLE_NONE;
-		tile.tiling = I915_TILING_NONE;
-
-		err = check_partial_mapping(obj, &tile, end);
-		if (err && err != -EINTR)
-			goto out_unlock;
-	}
-
-	for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) {
-		IGT_TIMEOUT(end);
-		unsigned int max_pitch;
-		unsigned int pitch;
-		struct tile tile;
-
-		if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
-			/*
-			 * The swizzling pattern is actually unknown as it
-			 * varies based on physical address of each page.
-			 * See i915_gem_detect_bit_6_swizzle().
-			 */
-			break;
-
-		tile.tiling = tiling;
-		switch (tiling) {
-		case I915_TILING_X:
-			tile.swizzle = i915->mm.bit_6_swizzle_x;
-			break;
-		case I915_TILING_Y:
-			tile.swizzle = i915->mm.bit_6_swizzle_y;
-			break;
-		}
-
-		GEM_BUG_ON(tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN);
-		if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 ||
-		    tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17)
-			continue;
-
-		if (INTEL_GEN(i915) <= 2) {
-			tile.height = 16;
-			tile.width = 128;
-			tile.size = 11;
-		} else if (tile.tiling == I915_TILING_Y &&
-			   HAS_128_BYTE_Y_TILING(i915)) {
-			tile.height = 32;
-			tile.width = 128;
-			tile.size = 12;
-		} else {
-			tile.height = 8;
-			tile.width = 512;
-			tile.size = 12;
-		}
-
-		if (INTEL_GEN(i915) < 4)
-			max_pitch = 8192 / tile.width;
-		else if (INTEL_GEN(i915) < 7)
-			max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width;
-		else
-			max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width;
-
-		for (pitch = max_pitch; pitch; pitch >>= 1) {
-			tile.stride = tile.width * pitch;
-			err = check_partial_mapping(obj, &tile, end);
-			if (err == -EINTR)
-				goto next_tiling;
-			if (err)
-				goto out_unlock;
-
-			if (pitch > 2 && INTEL_GEN(i915) >= 4) {
-				tile.stride = tile.width * (pitch - 1);
-				err = check_partial_mapping(obj, &tile, end);
-				if (err == -EINTR)
-					goto next_tiling;
-				if (err)
-					goto out_unlock;
-			}
-
-			if (pitch < max_pitch && INTEL_GEN(i915) >= 4) {
-				tile.stride = tile.width * (pitch + 1);
-				err = check_partial_mapping(obj, &tile, end);
-				if (err == -EINTR)
-					goto next_tiling;
-				if (err)
-					goto out_unlock;
-			}
-		}
-
-		if (INTEL_GEN(i915) >= 4) {
-			for_each_prime_number(pitch, max_pitch) {
-				tile.stride = tile.width * pitch;
-				err = check_partial_mapping(obj, &tile, end);
-				if (err == -EINTR)
-					goto next_tiling;
-				if (err)
-					goto out_unlock;
-			}
-		}
-
-next_tiling: ;
-	}
-
-out_unlock:
-	intel_runtime_pm_put(i915, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-	i915_gem_object_unpin_pages(obj);
-out:
-	i915_gem_object_put(obj);
-	return err;
-}
-
-static int make_obj_busy(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_request *rq;
-	struct i915_vma *vma;
-	int err;
-
-	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
-	if (IS_ERR(vma))
-		return PTR_ERR(vma);
-
-	err = i915_vma_pin(vma, 0, 0, PIN_USER);
-	if (err)
-		return err;
-
-	rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context);
-	if (IS_ERR(rq)) {
-		i915_vma_unpin(vma);
-		return PTR_ERR(rq);
-	}
-
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-
-	i915_request_add(rq);
-
-	__i915_gem_object_release_unless_active(obj);
-	i915_vma_unpin(vma);
-
-	return err;
-}
-
-static bool assert_mmap_offset(struct drm_i915_private *i915,
-			       unsigned long size,
-			       int expected)
-{
-	struct drm_i915_gem_object *obj;
-	int err;
-
-	obj = i915_gem_object_create_internal(i915, size);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
-
-	err = i915_gem_object_create_mmap_offset(obj);
-	i915_gem_object_put(obj);
-
-	return err == expected;
-}
-
-static void disable_retire_worker(struct drm_i915_private *i915)
-{
-	i915_gem_shrinker_unregister(i915);
-
-	mutex_lock(&i915->drm.struct_mutex);
-	if (!i915->gt.active_requests++) {
-		intel_wakeref_t wakeref;
-
-		with_intel_runtime_pm(i915, wakeref)
-			i915_gem_unpark(i915);
-	}
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	cancel_delayed_work_sync(&i915->gt.retire_work);
-	cancel_delayed_work_sync(&i915->gt.idle_work);
-}
-
-static int igt_mmap_offset_exhaustion(void *arg)
-{
-	struct drm_i915_private *i915 = arg;
-	struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm;
-	struct drm_i915_gem_object *obj;
-	struct drm_mm_node resv, *hole;
-	u64 hole_start, hole_end;
-	int loop, err;
-
-	/* Disable background reaper */
-	disable_retire_worker(i915);
-	GEM_BUG_ON(!i915->gt.awake);
-
-	/* Trim the device mmap space to only a page */
-	memset(&resv, 0, sizeof(resv));
-	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
-		resv.start = hole_start;
-		resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */
-		err = drm_mm_reserve_node(mm, &resv);
-		if (err) {
-			pr_err("Failed to trim VMA manager, err=%d\n", err);
-			goto out_park;
-		}
-		break;
-	}
-
-	/* Just fits! */
-	if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) {
-		pr_err("Unable to insert object into single page hole\n");
-		err = -EINVAL;
-		goto out;
-	}
-
-	/* Too large */
-	if (!assert_mmap_offset(i915, 2*PAGE_SIZE, -ENOSPC)) {
-		pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n");
-		err = -EINVAL;
-		goto out;
-	}
-
-	/* Fill the hole, further allocation attempts should then fail */
-	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
-	if (IS_ERR(obj)) {
-		err = PTR_ERR(obj);
-		goto out;
-	}
-
-	err = i915_gem_object_create_mmap_offset(obj);
-	if (err) {
-		pr_err("Unable to insert object into reclaimed hole\n");
-		goto err_obj;
-	}
-
-	if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) {
-		pr_err("Unexpectedly succeeded in inserting object into no holes!\n");
-		err = -EINVAL;
-		goto err_obj;
-	}
-
-	i915_gem_object_put(obj);
-
-	/* Now fill with busy dead objects that we expect to reap */
-	for (loop = 0; loop < 3; loop++) {
-		intel_wakeref_t wakeref;
-
-		if (i915_terminally_wedged(i915))
-			break;
-
-		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
-		if (IS_ERR(obj)) {
-			err = PTR_ERR(obj);
-			goto out;
-		}
-
-		err = 0;
-		mutex_lock(&i915->drm.struct_mutex);
-		with_intel_runtime_pm(i915, wakeref)
-			err = make_obj_busy(obj);
-		mutex_unlock(&i915->drm.struct_mutex);
-		if (err) {
-			pr_err("[loop %d] Failed to busy the object\n", loop);
-			goto err_obj;
-		}
-
-		/* NB we rely on the _active_ reference to access obj now */
-		GEM_BUG_ON(!i915_gem_object_is_active(obj));
-		err = i915_gem_object_create_mmap_offset(obj);
-		if (err) {
-			pr_err("[loop %d] i915_gem_object_create_mmap_offset failed with err=%d\n",
-			       loop, err);
-			goto out;
-		}
-	}
-
-out:
-	drm_mm_remove_node(&resv);
-out_park:
-	mutex_lock(&i915->drm.struct_mutex);
-	if (--i915->gt.active_requests)
-		queue_delayed_work(i915->wq, &i915->gt.retire_work, 0);
-	else
-		queue_delayed_work(i915->wq, &i915->gt.idle_work, 0);
-	mutex_unlock(&i915->drm.struct_mutex);
-	i915_gem_shrinker_register(i915);
-	return err;
-err_obj:
-	i915_gem_object_put(obj);
-	goto out;
-}
-
 int i915_gem_object_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
@@ -597,8 +110,6 @@ int i915_gem_object_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_gem_huge),
-		SUBTEST(igt_partial_tiling),
-		SUBTEST(igt_mmap_offset_exhaustion),
 	};
 
 	return i915_subtests(tests, i915);
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index 6d766925ad04..bbf387de8db3 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -16,6 +16,7 @@ selftest(timelines, i915_timeline_live_selftests)
 selftest(requests, i915_request_live_selftests)
 selftest(active, i915_active_live_selftests)
 selftest(objects, i915_gem_object_live_selftests)
+selftest(mman, i915_gem_mman_live_selftests)
 selftest(dmabuf, i915_gem_dmabuf_live_selftests)
 selftest(coherency, i915_gem_coherency_live_selftests)
 selftest(gtt, i915_gem_gtt_live_selftests)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 35/43] drm/i915: Move GEM domain management to its own file
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (33 preceding siblings ...)
  2019-03-06 14:25 ` [PATCH 34/43] drm/i915: Move mmap and friends " Chris Wilson
@ 2019-03-06 14:25 ` Chris Wilson
  2019-03-07 13:00   ` Matthew Auld
  2019-03-06 14:25 ` [PATCH 36/43] drm/i915: Move more GEM objects under gem/ Chris Wilson
                   ` (10 subsequent siblings)
  45 siblings, 1 reply; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:25 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Continuing the decluttering of i915_gem.c

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_domain.c    | 764 ++++++++++++++++++
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |  29 +
 drivers/gpu/drm/i915/gvt/cmd_parser.c         |   4 +-
 drivers/gpu/drm/i915/gvt/scheduler.c          |   6 +-
 drivers/gpu/drm/i915/i915_cmd_parser.c        |   8 +-
 drivers/gpu/drm/i915/i915_drv.h               |  34 -
 drivers/gpu/drm/i915/i915_gem.c               | 757 +----------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    |   4 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c  |   4 +-
 drivers/gpu/drm/i915/intel_drv.h              |   2 +
 drivers/gpu/drm/i915/intel_frontbuffer.h      |   8 +
 drivers/gpu/drm/i915/selftests/huge_pages.c   |   4 +-
 .../drm/i915/selftests/i915_gem_coherency.c   |   8 +-
 .../gpu/drm/i915/selftests/i915_gem_context.c |   8 +-
 15 files changed, 831 insertions(+), 810 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_domain.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index c2804efe4e5a..c4b78634b5ee 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -71,6 +71,7 @@ i915-$(CONFIG_DRM_I915_WERROR) += \
 
 # GEM code
 i915-y += \
+	  gem/i915_gem_domain.o \
 	  gem/i915_gem_object.o \
 	  gem/i915_gem_mman.o \
 	  gem/i915_gem_pages.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
new file mode 100644
index 000000000000..c21ceb08f845
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -0,0 +1,764 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+#include "../i915_gem_clflush.h"
+#include "../i915_gem_gtt.h"
+#include "../i915_vma.h"
+
+#include "../intel_frontbuffer.h"
+
+static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
+{
+	/*
+	 * We manually flush the CPU domain so that we can override and
+	 * force the flush for the display, and perform it asyncrhonously.
+	 */
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	if (obj->cache_dirty)
+		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
+	obj->write_domain = 0;
+}
+
+void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
+{
+	if (!READ_ONCE(obj->pin_global))
+		return;
+
+	mutex_lock(&obj->base.dev->struct_mutex);
+	__i915_gem_object_flush_for_display(obj);
+	mutex_unlock(&obj->base.dev->struct_mutex);
+}
+
+/**
+ * Moves a single object to the WC read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	if (obj->write_domain == I915_GEM_DOMAIN_WC)
+		return 0;
+
+	/* Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+
+	/* Serialise direct access to this object with the barriers for
+	 * coherent writes from the GPU, by effectively invalidating the
+	 * WC domain upon first access.
+	 */
+	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
+		mb();
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
+	obj->read_domains |= I915_GEM_DOMAIN_WC;
+	if (write) {
+		obj->read_domains = I915_GEM_DOMAIN_WC;
+		obj->write_domain = I915_GEM_DOMAIN_WC;
+		obj->mm.dirty = true;
+	}
+
+	i915_gem_object_unpin_pages(obj);
+	return 0;
+}
+
+/**
+ * Moves a single object to the GTT read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
+		return 0;
+
+	/* Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
+
+	/* Serialise direct access to this object with the barriers for
+	 * coherent writes from the GPU, by effectively invalidating the
+	 * GTT domain upon first access.
+	 */
+	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
+		mb();
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
+	obj->read_domains |= I915_GEM_DOMAIN_GTT;
+	if (write) {
+		obj->read_domains = I915_GEM_DOMAIN_GTT;
+		obj->write_domain = I915_GEM_DOMAIN_GTT;
+		obj->mm.dirty = true;
+	}
+
+	i915_gem_object_unpin_pages(obj);
+	return 0;
+}
+
+/**
+ * Changes the cache-level of an object across all VMA.
+ * @obj: object to act on
+ * @cache_level: new cache level to set for the object
+ *
+ * After this function returns, the object will be in the new cache-level
+ * across all GTT and the contents of the backing storage will be coherent,
+ * with respect to the new cache-level. In order to keep the backing storage
+ * coherent for all users, we only allow a single cache level to be set
+ * globally on the object and prevent it from being changed whilst the
+ * hardware is reading from the object. That is if the object is currently
+ * on the scanout it will be set to uncached (or equivalent display
+ * cache coherency) and all non-MOCS GPU access will also be uncached so
+ * that all direct access to the scanout remains coherent.
+ */
+int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+				    enum i915_cache_level cache_level)
+{
+	struct i915_vma *vma;
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	if (obj->cache_level == cache_level)
+		return 0;
+
+	/* Inspect the list of currently bound VMA and unbind any that would
+	 * be invalid given the new cache-level. This is principally to
+	 * catch the issue of the CS prefetch crossing page boundaries and
+	 * reading an invalid PTE on older architectures.
+	 */
+restart:
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
+		if (!drm_mm_node_allocated(&vma->node))
+			continue;
+
+		if (i915_vma_is_pinned(vma)) {
+			DRM_DEBUG("can not change the cache level of pinned objects\n");
+			return -EBUSY;
+		}
+
+		if (!i915_vma_is_closed(vma) &&
+		    i915_gem_valid_gtt_space(vma, cache_level))
+			continue;
+
+		ret = i915_vma_unbind(vma);
+		if (ret)
+			return ret;
+
+		/* As unbinding may affect other elements in the
+		 * obj->vma_list (due to side-effects from retiring
+		 * an active vma), play safe and restart the iterator.
+		 */
+		goto restart;
+	}
+
+	/* We can reuse the existing drm_mm nodes but need to change the
+	 * cache-level on the PTE. We could simply unbind them all and
+	 * rebind with the correct cache-level on next use. However since
+	 * we already have a valid slot, dma mapping, pages etc, we may as
+	 * rewrite the PTE in the belief that doing so tramples upon less
+	 * state and so involves less work.
+	 */
+	if (obj->bind_count) {
+		/* Before we change the PTE, the GPU must not be accessing it.
+		 * If we wait upon the object, we know that all the bound
+		 * VMA are no longer active.
+		 */
+		ret = i915_gem_object_wait(obj,
+					   I915_WAIT_INTERRUPTIBLE |
+					   I915_WAIT_LOCKED |
+					   I915_WAIT_ALL,
+					   MAX_SCHEDULE_TIMEOUT);
+		if (ret)
+			return ret;
+
+		if (!HAS_LLC(to_i915(obj->base.dev)) &&
+		    cache_level != I915_CACHE_NONE) {
+			/* Access to snoopable pages through the GTT is
+			 * incoherent and on some machines causes a hard
+			 * lockup. Relinquish the CPU mmaping to force
+			 * userspace to refault in the pages and we can
+			 * then double check if the GTT mapping is still
+			 * valid for that pointer access.
+			 */
+			i915_gem_object_release_mmap(obj);
+
+			/* As we no longer need a fence for GTT access,
+			 * we can relinquish it now (and so prevent having
+			 * to steal a fence from someone else on the next
+			 * fence request). Note GPU activity would have
+			 * dropped the fence as all snoopable access is
+			 * supposed to be linear.
+			 */
+			for_each_ggtt_vma(vma, obj) {
+				ret = i915_vma_put_fence(vma);
+				if (ret)
+					return ret;
+			}
+		} else {
+			/* We either have incoherent backing store and
+			 * so no GTT access or the architecture is fully
+			 * coherent. In such cases, existing GTT mmaps
+			 * ignore the cache bit in the PTE and we can
+			 * rewrite it without confusing the GPU or having
+			 * to force userspace to fault back in its mmaps.
+			 */
+		}
+
+		list_for_each_entry(vma, &obj->vma.list, obj_link) {
+			if (!drm_mm_node_allocated(&vma->node))
+				continue;
+
+			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
+			if (ret)
+				return ret;
+		}
+	}
+
+	list_for_each_entry(vma, &obj->vma.list, obj_link)
+		vma->node.color = cache_level;
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+	obj->cache_dirty = true; /* Always invalidate stale cachelines */
+
+	return 0;
+}
+
+int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file)
+{
+	struct drm_i915_gem_caching *args = data;
+	struct drm_i915_gem_object *obj;
+	int err = 0;
+
+	rcu_read_lock();
+	obj = i915_gem_object_lookup_rcu(file, args->handle);
+	if (!obj) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	switch (obj->cache_level) {
+	case I915_CACHE_LLC:
+	case I915_CACHE_L3_LLC:
+		args->caching = I915_CACHING_CACHED;
+		break;
+
+	case I915_CACHE_WT:
+		args->caching = I915_CACHING_DISPLAY;
+		break;
+
+	default:
+		args->caching = I915_CACHING_NONE;
+		break;
+	}
+out:
+	rcu_read_unlock();
+	return err;
+}
+
+int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_caching *args = data;
+	struct drm_i915_gem_object *obj;
+	enum i915_cache_level level;
+	int ret = 0;
+
+	switch (args->caching) {
+	case I915_CACHING_NONE:
+		level = I915_CACHE_NONE;
+		break;
+	case I915_CACHING_CACHED:
+		/*
+		 * Due to a HW issue on BXT A stepping, GPU stores via a
+		 * snooped mapping may leave stale data in a corresponding CPU
+		 * cacheline, whereas normally such cachelines would get
+		 * invalidated.
+		 */
+		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
+			return -ENODEV;
+
+		level = I915_CACHE_LLC;
+		break;
+	case I915_CACHING_DISPLAY:
+		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	obj = i915_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	/*
+	 * The caching mode of proxy object is handled by its generator, and
+	 * not allowed to be changed by userspace.
+	 */
+	if (i915_gem_object_is_proxy(obj)) {
+		ret = -ENXIO;
+		goto out;
+	}
+
+	if (obj->cache_level == level)
+		goto out;
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		goto out;
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		goto out;
+
+	ret = i915_gem_object_set_cache_level(obj, level);
+	mutex_unlock(&dev->struct_mutex);
+
+out:
+	i915_gem_object_put(obj);
+	return ret;
+}
+
+/*
+ * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
+ * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
+ * (for pageflips). We only flush the caches while preparing the buffer for
+ * display, the callers are responsible for frontbuffer flush.
+ */
+struct i915_vma *
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+				     u32 alignment,
+				     const struct i915_ggtt_view *view,
+				     unsigned int flags)
+{
+	struct i915_vma *vma;
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	/* Mark the global pin early so that we account for the
+	 * display coherency whilst setting up the cache domains.
+	 */
+	obj->pin_global++;
+
+	/* The display engine is not coherent with the LLC cache on gen6.  As
+	 * a result, we make sure that the pinning that is about to occur is
+	 * done with uncached PTEs. This is lowest common denominator for all
+	 * chipsets.
+	 *
+	 * However for gen6+, we could do better by using the GFDT bit instead
+	 * of uncaching, which would allow us to flush all the LLC-cached data
+	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
+	 */
+	ret = i915_gem_object_set_cache_level(obj,
+					      HAS_WT(to_i915(obj->base.dev)) ?
+					      I915_CACHE_WT : I915_CACHE_NONE);
+	if (ret) {
+		vma = ERR_PTR(ret);
+		goto err_unpin_global;
+	}
+
+	/* As the user may map the buffer once pinned in the display plane
+	 * (e.g. libkms for the bootup splash), we have to ensure that we
+	 * always use map_and_fenceable for all scanout buffers. However,
+	 * it may simply be too big to fit into mappable, in which case
+	 * put it anyway and hope that userspace can cope (but always first
+	 * try to preserve the existing ABI).
+	 */
+	vma = ERR_PTR(-ENOSPC);
+	if ((flags & PIN_MAPPABLE) == 0 &&
+	    (!view || view->type == I915_GGTT_VIEW_NORMAL))
+		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
+					       flags |
+					       PIN_MAPPABLE |
+					       PIN_NONBLOCK);
+	if (IS_ERR(vma))
+		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
+	if (IS_ERR(vma))
+		goto err_unpin_global;
+
+	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
+
+	__i915_gem_object_flush_for_display(obj);
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	obj->read_domains |= I915_GEM_DOMAIN_GTT;
+
+	return vma;
+
+err_unpin_global:
+	obj->pin_global--;
+	return vma;
+}
+
+static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct list_head *list;
+	struct i915_vma *vma;
+
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+	mutex_lock(&i915->ggtt.vm.mutex);
+	for_each_ggtt_vma(vma, obj) {
+		if (!drm_mm_node_allocated(&vma->node))
+			continue;
+
+		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+	}
+	mutex_unlock(&i915->ggtt.vm.mutex);
+
+	spin_lock(&i915->mm.obj_lock);
+	list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
+	list_move_tail(&obj->mm.link, list);
+	spin_unlock(&i915->mm.obj_lock);
+}
+
+void
+i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
+{
+	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+
+	if (WARN_ON(vma->obj->pin_global == 0))
+		return;
+
+	if (--vma->obj->pin_global == 0)
+		vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
+
+	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
+	i915_gem_object_bump_inactive_ggtt(vma->obj);
+
+	i915_vma_unpin(vma);
+}
+
+/**
+ * Moves a single object to the CPU read, and possibly write domain.
+ * @obj: object to act on
+ * @write: requesting write or read-only access
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+	/* Flush the CPU cache if it's still invalid. */
+	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
+		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+		obj->read_domains |= I915_GEM_DOMAIN_CPU;
+	}
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
+
+	/* If we're writing through the CPU, then the GPU read domains will
+	 * need to be invalidated at next use.
+	 */
+	if (write)
+		__start_cpu_write(obj);
+
+	return 0;
+}
+
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+	return (domain == I915_GEM_DOMAIN_GTT ?
+		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
+/**
+ * Called when user space prepares to use an object with the CPU, either
+ * through the mmap ioctl's mapping or a GTT mapping.
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
+ */
+int
+i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file)
+{
+	struct drm_i915_gem_set_domain *args = data;
+	struct drm_i915_gem_object *obj;
+	u32 read_domains = args->read_domains;
+	u32 write_domain = args->write_domain;
+	int err;
+
+	/* Only handle setting domains to types used by the CPU. */
+	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
+		return -EINVAL;
+
+	/* Having something in the write domain implies it's in the read
+	 * domain, and only that read domain.  Enforce that in the request.
+	 */
+	if (write_domain != 0 && read_domains != write_domain)
+		return -EINVAL;
+
+	obj = i915_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	/* Try to flush the object off the GPU without holding the lock.
+	 * We will repeat the flush holding the lock in the normal manner
+	 * to catch cases where we are gazumped.
+	 */
+	err = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
+				   (write_domain ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (err)
+		goto out;
+
+	/*
+	 * Proxy objects do not control access to the backing storage, ergo
+	 * they cannot be used as a means to manipulate the cache domain
+	 * tracking for that backing storage. The proxy object is always
+	 * considered to be outside of any cache domain.
+	 */
+	if (i915_gem_object_is_proxy(obj)) {
+		err = -ENXIO;
+		goto out;
+	}
+
+	/*
+	 * Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	err = i915_gem_object_pin_pages(obj);
+	if (err)
+		goto out;
+
+	err = i915_mutex_lock_interruptible(dev);
+	if (err)
+		goto out_unpin;
+
+	if (read_domains & I915_GEM_DOMAIN_WC)
+		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
+	else if (read_domains & I915_GEM_DOMAIN_GTT)
+		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
+	else
+		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
+
+	/* And bump the LRU for this access */
+	i915_gem_object_bump_inactive_ggtt(obj);
+
+	mutex_unlock(&dev->struct_mutex);
+
+	if (write_domain != 0)
+		intel_fb_obj_invalidate(obj,
+					fb_write_origin(obj, write_domain));
+
+out_unpin:
+	i915_gem_object_unpin_pages(obj);
+out:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+/*
+ * Pins the specified object's pages and synchronizes the object with
+ * GPU accesses. Sets needs_clflush to non-zero if the caller should
+ * flush the object from the CPU cache.
+ */
+int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
+				 unsigned int *needs_clflush)
+{
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	*needs_clflush = 0;
+	if (!i915_gem_object_has_struct_page(obj))
+		return -ENODEV;
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
+	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+		ret = i915_gem_object_set_to_cpu_domain(obj, false);
+		if (ret)
+			goto err_unpin;
+		else
+			goto out;
+	}
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+	/* If we're not in the cpu read domain, set ourself into the gtt
+	 * read domain and manually flush cachelines (if required). This
+	 * optimizes for the case when the gpu will dirty the data
+	 * anyway again before the next pread happens.
+	 */
+	if (!obj->cache_dirty &&
+	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
+		*needs_clflush = CLFLUSH_BEFORE;
+
+out:
+	/* return with the pages pinned */
+	return 0;
+
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
+	return ret;
+}
+
+int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
+				  unsigned int *needs_clflush)
+{
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	*needs_clflush = 0;
+	if (!i915_gem_object_has_struct_page(obj))
+		return -ENODEV;
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   I915_WAIT_ALL,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
+	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+		ret = i915_gem_object_set_to_cpu_domain(obj, true);
+		if (ret)
+			goto err_unpin;
+		else
+			goto out;
+	}
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+	/* If we're not in the cpu write domain, set ourself into the
+	 * gtt write domain and manually flush cachelines (as required).
+	 * This optimizes for the case when the gpu will use the data
+	 * right away and we therefore have to clflush anyway.
+	 */
+	if (!obj->cache_dirty) {
+		*needs_clflush |= CLFLUSH_AFTER;
+
+		/*
+		 * Same trick applies to invalidate partially written
+		 * cachelines read before writing.
+		 */
+		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
+			*needs_clflush |= CLFLUSH_BEFORE;
+	}
+
+out:
+	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
+	obj->mm.dirty = true;
+	/* return with the pages pinned */
+	return 0;
+
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 20b72c16c486..1c820ddc5ab6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -15,6 +15,8 @@
 
 #include "i915_gem_object_types.h"
 
+#include "../i915_gem_gtt.h"
+
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
 struct drm_i915_gem_object *i915_gem_object_alloc(void);
@@ -344,6 +346,20 @@ void
 i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
 				   unsigned int flush_domains);
 
+int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
+				 unsigned int *needs_clflush);
+int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
+				  unsigned int *needs_clflush);
+#define CLFLUSH_BEFORE	BIT(0)
+#define CLFLUSH_AFTER	BIT(1)
+#define CLFLUSH_FLAGS	(CLFLUSH_BEFORE | CLFLUSH_AFTER)
+
+static inline void
+i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin_pages(obj);
+}
+
 static inline struct intel_engine_cs *
 i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
 {
@@ -365,6 +381,19 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
 					 unsigned int cache_level);
 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
 
+int __must_check
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
+struct i915_vma * __must_check
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+				     u32 alignment,
+				     const struct i915_ggtt_view *view,
+				     unsigned int flags);
+void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
+
 static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 {
 	if (obj->cache_dirty)
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 05d054e0bfe7..7cd737e513a0 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -1733,7 +1733,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 		goto err_free_bb;
 	}
 
-	ret = i915_gem_obj_prepare_shmem_write(bb->obj, &bb->clflush);
+	ret = i915_gem_object_prepare_write(bb->obj, &bb->clflush);
 	if (ret)
 		goto err_free_obj;
 
@@ -1782,7 +1782,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 err_unmap:
 	i915_gem_object_unpin_map(bb->obj);
 err_finish_shmem_access:
-	i915_gem_obj_finish_shmem_access(bb->obj);
+	i915_gem_object_finish_access(bb->obj);
 err_free_obj:
 	i915_gem_object_put(bb->obj);
 err_free_bb:
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 709bcaaed765..628b2d3991a5 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -490,7 +490,7 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 						bb->obj->base.size);
 				bb->clflush &= ~CLFLUSH_AFTER;
 			}
-			i915_gem_obj_finish_shmem_access(bb->obj);
+			i915_gem_object_finish_access(bb->obj);
 			bb->accessing = false;
 
 		} else {
@@ -518,7 +518,7 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 			if (ret)
 				goto err;
 
-			i915_gem_obj_finish_shmem_access(bb->obj);
+			i915_gem_object_finish_access(bb->obj);
 			bb->accessing = false;
 
 			ret = i915_vma_move_to_active(bb->vma,
@@ -596,7 +596,7 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 	list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) {
 		if (bb->obj) {
 			if (bb->accessing)
-				i915_gem_obj_finish_shmem_access(bb->obj);
+				i915_gem_object_finish_access(bb->obj);
 
 			if (bb->va && !IS_ERR(bb->va))
 				i915_gem_object_unpin_map(bb->obj);
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 503d548a55f7..acc7487cd85d 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1057,11 +1057,11 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 	void *dst, *src;
 	int ret;
 
-	ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush);
+	ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
 	if (ret)
 		return ERR_PTR(ret);
 
-	ret = i915_gem_obj_prepare_shmem_write(dst_obj, &dst_needs_clflush);
+	ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
 	if (ret) {
 		dst = ERR_PTR(ret);
 		goto unpin_src;
@@ -1119,9 +1119,9 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 	*needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
 
 unpin_dst:
-	i915_gem_obj_finish_shmem_access(dst_obj);
+	i915_gem_object_finish_access(dst_obj);
 unpin_src:
-	i915_gem_obj_finish_shmem_access(src_obj);
+	i915_gem_object_finish_access(src_obj);
 	return dst;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 095e0da448d3..a7b31dd9b745 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -372,14 +372,6 @@ enum i915_cache_level {
 
 #define I915_COLOR_UNEVICTABLE (-1) /* a non-vma sharing the address space */
 
-enum fb_op_origin {
-	ORIGIN_GTT,
-	ORIGIN_CPU,
-	ORIGIN_CS,
-	ORIGIN_FLIP,
-	ORIGIN_DIRTYFB,
-};
-
 struct intel_fbc {
 	/* This is always the inner lock when overlapping with struct_mutex and
 	 * it's the outer lock when overlapping with stolen_lock. */
@@ -2796,20 +2788,6 @@ static inline int __sg_page_count(const struct scatterlist *sg)
 	return sg->length >> PAGE_SHIFT;
 }
 
-int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
-				    unsigned int *needs_clflush);
-int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
-				     unsigned int *needs_clflush);
-#define CLFLUSH_BEFORE	BIT(0)
-#define CLFLUSH_AFTER	BIT(1)
-#define CLFLUSH_FLAGS	(CLFLUSH_BEFORE | CLFLUSH_AFTER)
-
-static inline void
-i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
-{
-	i915_gem_object_unpin_pages(obj);
-}
-
 static inline int __must_check
 i915_mutex_lock_interruptible(struct drm_device *dev)
 {
@@ -2873,18 +2851,6 @@ int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 				  const struct i915_sched_attr *attr);
 #define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
 
-int __must_check
-i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
-int __must_check
-i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
-int __must_check
-i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
-struct i915_vma * __must_check
-i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
-				     u32 alignment,
-				     const struct i915_ggtt_view *view,
-				     unsigned int flags);
-void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file);
 void i915_gem_release(struct drm_device *dev, struct drm_file *file);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b78dc34efbaf..8e0c6f7a5e69 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -532,123 +532,6 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 	}
 }
 
-/*
- * Pins the specified object's pages and synchronizes the object with
- * GPU accesses. Sets needs_clflush to non-zero if the caller should
- * flush the object from the CPU cache.
- */
-int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
-				    unsigned int *needs_clflush)
-{
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	*needs_clflush = 0;
-	if (!i915_gem_object_has_struct_page(obj))
-		return -ENODEV;
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED,
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		return ret;
-
-	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
-	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
-		ret = i915_gem_object_set_to_cpu_domain(obj, false);
-		if (ret)
-			goto err_unpin;
-		else
-			goto out;
-	}
-
-	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
-	/* If we're not in the cpu read domain, set ourself into the gtt
-	 * read domain and manually flush cachelines (if required). This
-	 * optimizes for the case when the gpu will dirty the data
-	 * anyway again before the next pread happens.
-	 */
-	if (!obj->cache_dirty &&
-	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
-		*needs_clflush = CLFLUSH_BEFORE;
-
-out:
-	/* return with the pages pinned */
-	return 0;
-
-err_unpin:
-	i915_gem_object_unpin_pages(obj);
-	return ret;
-}
-
-int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
-				     unsigned int *needs_clflush)
-{
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	*needs_clflush = 0;
-	if (!i915_gem_object_has_struct_page(obj))
-		return -ENODEV;
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
-				   I915_WAIT_ALL,
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		return ret;
-
-	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
-	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
-		ret = i915_gem_object_set_to_cpu_domain(obj, true);
-		if (ret)
-			goto err_unpin;
-		else
-			goto out;
-	}
-
-	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
-	/* If we're not in the cpu write domain, set ourself into the
-	 * gtt write domain and manually flush cachelines (as required).
-	 * This optimizes for the case when the gpu will use the data
-	 * right away and we therefore have to clflush anyway.
-	 */
-	if (!obj->cache_dirty) {
-		*needs_clflush |= CLFLUSH_AFTER;
-
-		/*
-		 * Same trick applies to invalidate partially written
-		 * cachelines read before writing.
-		 */
-		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
-			*needs_clflush |= CLFLUSH_BEFORE;
-	}
-
-out:
-	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
-	obj->mm.dirty = true;
-	/* return with the pages pinned */
-	return 0;
-
-err_unpin:
-	i915_gem_object_unpin_pages(obj);
-	return ret;
-}
-
 static int
 shmem_pread(struct page *page, int offset, int len, char __user *user_data,
 	    bool needs_clflush)
@@ -682,7 +565,7 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
+	ret = i915_gem_object_prepare_read(obj, &needs_clflush);
 	mutex_unlock(&obj->base.dev->struct_mutex);
 	if (ret)
 		return ret;
@@ -704,7 +587,7 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 		offset = 0;
 	}
 
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 	return ret;
 }
 
@@ -1079,7 +962,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
+	ret = i915_gem_object_prepare_write(obj, &needs_clflush);
 	mutex_unlock(&i915->drm.struct_mutex);
 	if (ret)
 		return ret;
@@ -1111,7 +994,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 	}
 
 	intel_fb_obj_flush(obj, ORIGIN_CPU);
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 	return ret;
 }
 
@@ -1200,130 +1083,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
-static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct list_head *list;
-	struct i915_vma *vma;
-
-	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-
-	mutex_lock(&i915->ggtt.vm.mutex);
-	for_each_ggtt_vma(vma, obj) {
-		if (!drm_mm_node_allocated(&vma->node))
-			continue;
-
-		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
-	}
-	mutex_unlock(&i915->ggtt.vm.mutex);
-
-	spin_lock(&i915->mm.obj_lock);
-	list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
-	list_move_tail(&obj->mm.link, list);
-	spin_unlock(&i915->mm.obj_lock);
-}
-
-static inline enum fb_op_origin
-fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
-{
-	return (domain == I915_GEM_DOMAIN_GTT ?
-		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
-/**
- * Called when user space prepares to use an object with the CPU, either
- * through the mmap ioctl's mapping or a GTT mapping.
- * @dev: drm device
- * @data: ioctl data blob
- * @file: drm file
- */
-int
-i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
-			  struct drm_file *file)
-{
-	struct drm_i915_gem_set_domain *args = data;
-	struct drm_i915_gem_object *obj;
-	u32 read_domains = args->read_domains;
-	u32 write_domain = args->write_domain;
-	int err;
-
-	/* Only handle setting domains to types used by the CPU. */
-	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
-		return -EINVAL;
-
-	/* Having something in the write domain implies it's in the read
-	 * domain, and only that read domain.  Enforce that in the request.
-	 */
-	if (write_domain != 0 && read_domains != write_domain)
-		return -EINVAL;
-
-	obj = i915_gem_object_lookup(file, args->handle);
-	if (!obj)
-		return -ENOENT;
-
-	/* Try to flush the object off the GPU without holding the lock.
-	 * We will repeat the flush holding the lock in the normal manner
-	 * to catch cases where we are gazumped.
-	 */
-	err = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_PRIORITY |
-				   (write_domain ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (err)
-		goto out;
-
-	/*
-	 * Proxy objects do not control access to the backing storage, ergo
-	 * they cannot be used as a means to manipulate the cache domain
-	 * tracking for that backing storage. The proxy object is always
-	 * considered to be outside of any cache domain.
-	 */
-	if (i915_gem_object_is_proxy(obj)) {
-		err = -ENXIO;
-		goto out;
-	}
-
-	/*
-	 * Flush and acquire obj->pages so that we are coherent through
-	 * direct access in memory with previous cached writes through
-	 * shmemfs and that our cache domain tracking remains valid.
-	 * For example, if the obj->filp was moved to swap without us
-	 * being notified and releasing the pages, we would mistakenly
-	 * continue to assume that the obj remained out of the CPU cached
-	 * domain.
-	 */
-	err = i915_gem_object_pin_pages(obj);
-	if (err)
-		goto out;
-
-	err = i915_mutex_lock_interruptible(dev);
-	if (err)
-		goto out_unpin;
-
-	if (read_domains & I915_GEM_DOMAIN_WC)
-		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
-	else if (read_domains & I915_GEM_DOMAIN_GTT)
-		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
-	else
-		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
-
-	/* And bump the LRU for this access */
-	i915_gem_object_bump_inactive_ggtt(obj);
-
-	mutex_unlock(&dev->struct_mutex);
-
-	if (write_domain != 0)
-		intel_fb_obj_invalidate(obj,
-					fb_write_origin(obj, write_domain));
-
-out_unpin:
-	i915_gem_object_unpin_pages(obj);
-out:
-	i915_gem_object_put(obj);
-	return err;
-}
-
 /**
  * Called when user space has done writes to this buffer
  * @dev: drm device
@@ -1708,514 +1467,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 	return 0;
 }
 
-static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
-{
-	/*
-	 * We manually flush the CPU domain so that we can override and
-	 * force the flush for the display, and perform it asyncrhonously.
-	 */
-	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-	if (obj->cache_dirty)
-		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
-	obj->write_domain = 0;
-}
-
-void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
-{
-	if (!READ_ONCE(obj->pin_global))
-		return;
-
-	mutex_lock(&obj->base.dev->struct_mutex);
-	__i915_gem_object_flush_for_display(obj);
-	mutex_unlock(&obj->base.dev->struct_mutex);
-}
-
-/**
- * Moves a single object to the WC read, and possibly write domain.
- * @obj: object to act on
- * @write: ask for write access or read only
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
-int
-i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
-{
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
-				   (write ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	if (obj->write_domain == I915_GEM_DOMAIN_WC)
-		return 0;
-
-	/* Flush and acquire obj->pages so that we are coherent through
-	 * direct access in memory with previous cached writes through
-	 * shmemfs and that our cache domain tracking remains valid.
-	 * For example, if the obj->filp was moved to swap without us
-	 * being notified and releasing the pages, we would mistakenly
-	 * continue to assume that the obj remained out of the CPU cached
-	 * domain.
-	 */
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		return ret;
-
-	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
-
-	/* Serialise direct access to this object with the barriers for
-	 * coherent writes from the GPU, by effectively invalidating the
-	 * WC domain upon first access.
-	 */
-	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
-		mb();
-
-	/* It should now be out of any other write domains, and we can update
-	 * the domain values for our changes.
-	 */
-	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
-	obj->read_domains |= I915_GEM_DOMAIN_WC;
-	if (write) {
-		obj->read_domains = I915_GEM_DOMAIN_WC;
-		obj->write_domain = I915_GEM_DOMAIN_WC;
-		obj->mm.dirty = true;
-	}
-
-	i915_gem_object_unpin_pages(obj);
-	return 0;
-}
-
-/**
- * Moves a single object to the GTT read, and possibly write domain.
- * @obj: object to act on
- * @write: ask for write access or read only
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
-int
-i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
-{
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
-				   (write ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
-		return 0;
-
-	/* Flush and acquire obj->pages so that we are coherent through
-	 * direct access in memory with previous cached writes through
-	 * shmemfs and that our cache domain tracking remains valid.
-	 * For example, if the obj->filp was moved to swap without us
-	 * being notified and releasing the pages, we would mistakenly
-	 * continue to assume that the obj remained out of the CPU cached
-	 * domain.
-	 */
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		return ret;
-
-	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
-
-	/* Serialise direct access to this object with the barriers for
-	 * coherent writes from the GPU, by effectively invalidating the
-	 * GTT domain upon first access.
-	 */
-	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
-		mb();
-
-	/* It should now be out of any other write domains, and we can update
-	 * the domain values for our changes.
-	 */
-	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
-	obj->read_domains |= I915_GEM_DOMAIN_GTT;
-	if (write) {
-		obj->read_domains = I915_GEM_DOMAIN_GTT;
-		obj->write_domain = I915_GEM_DOMAIN_GTT;
-		obj->mm.dirty = true;
-	}
-
-	i915_gem_object_unpin_pages(obj);
-	return 0;
-}
-
-/**
- * Changes the cache-level of an object across all VMA.
- * @obj: object to act on
- * @cache_level: new cache level to set for the object
- *
- * After this function returns, the object will be in the new cache-level
- * across all GTT and the contents of the backing storage will be coherent,
- * with respect to the new cache-level. In order to keep the backing storage
- * coherent for all users, we only allow a single cache level to be set
- * globally on the object and prevent it from being changed whilst the
- * hardware is reading from the object. That is if the object is currently
- * on the scanout it will be set to uncached (or equivalent display
- * cache coherency) and all non-MOCS GPU access will also be uncached so
- * that all direct access to the scanout remains coherent.
- */
-int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
-				    enum i915_cache_level cache_level)
-{
-	struct i915_vma *vma;
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	if (obj->cache_level == cache_level)
-		return 0;
-
-	/* Inspect the list of currently bound VMA and unbind any that would
-	 * be invalid given the new cache-level. This is principally to
-	 * catch the issue of the CS prefetch crossing page boundaries and
-	 * reading an invalid PTE on older architectures.
-	 */
-restart:
-	list_for_each_entry(vma, &obj->vma.list, obj_link) {
-		if (!drm_mm_node_allocated(&vma->node))
-			continue;
-
-		if (i915_vma_is_pinned(vma)) {
-			DRM_DEBUG("can not change the cache level of pinned objects\n");
-			return -EBUSY;
-		}
-
-		if (!i915_vma_is_closed(vma) &&
-		    i915_gem_valid_gtt_space(vma, cache_level))
-			continue;
-
-		ret = i915_vma_unbind(vma);
-		if (ret)
-			return ret;
-
-		/* As unbinding may affect other elements in the
-		 * obj->vma_list (due to side-effects from retiring
-		 * an active vma), play safe and restart the iterator.
-		 */
-		goto restart;
-	}
-
-	/* We can reuse the existing drm_mm nodes but need to change the
-	 * cache-level on the PTE. We could simply unbind them all and
-	 * rebind with the correct cache-level on next use. However since
-	 * we already have a valid slot, dma mapping, pages etc, we may as
-	 * rewrite the PTE in the belief that doing so tramples upon less
-	 * state and so involves less work.
-	 */
-	if (obj->bind_count) {
-		/* Before we change the PTE, the GPU must not be accessing it.
-		 * If we wait upon the object, we know that all the bound
-		 * VMA are no longer active.
-		 */
-		ret = i915_gem_object_wait(obj,
-					   I915_WAIT_INTERRUPTIBLE |
-					   I915_WAIT_LOCKED |
-					   I915_WAIT_ALL,
-					   MAX_SCHEDULE_TIMEOUT);
-		if (ret)
-			return ret;
-
-		if (!HAS_LLC(to_i915(obj->base.dev)) &&
-		    cache_level != I915_CACHE_NONE) {
-			/* Access to snoopable pages through the GTT is
-			 * incoherent and on some machines causes a hard
-			 * lockup. Relinquish the CPU mmaping to force
-			 * userspace to refault in the pages and we can
-			 * then double check if the GTT mapping is still
-			 * valid for that pointer access.
-			 */
-			i915_gem_object_release_mmap(obj);
-
-			/* As we no longer need a fence for GTT access,
-			 * we can relinquish it now (and so prevent having
-			 * to steal a fence from someone else on the next
-			 * fence request). Note GPU activity would have
-			 * dropped the fence as all snoopable access is
-			 * supposed to be linear.
-			 */
-			for_each_ggtt_vma(vma, obj) {
-				ret = i915_vma_put_fence(vma);
-				if (ret)
-					return ret;
-			}
-		} else {
-			/* We either have incoherent backing store and
-			 * so no GTT access or the architecture is fully
-			 * coherent. In such cases, existing GTT mmaps
-			 * ignore the cache bit in the PTE and we can
-			 * rewrite it without confusing the GPU or having
-			 * to force userspace to fault back in its mmaps.
-			 */
-		}
-
-		list_for_each_entry(vma, &obj->vma.list, obj_link) {
-			if (!drm_mm_node_allocated(&vma->node))
-				continue;
-
-			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
-			if (ret)
-				return ret;
-		}
-	}
-
-	list_for_each_entry(vma, &obj->vma.list, obj_link)
-		vma->node.color = cache_level;
-	i915_gem_object_set_cache_coherency(obj, cache_level);
-	obj->cache_dirty = true; /* Always invalidate stale cachelines */
-
-	return 0;
-}
-
-int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file)
-{
-	struct drm_i915_gem_caching *args = data;
-	struct drm_i915_gem_object *obj;
-	int err = 0;
-
-	rcu_read_lock();
-	obj = i915_gem_object_lookup_rcu(file, args->handle);
-	if (!obj) {
-		err = -ENOENT;
-		goto out;
-	}
-
-	switch (obj->cache_level) {
-	case I915_CACHE_LLC:
-	case I915_CACHE_L3_LLC:
-		args->caching = I915_CACHING_CACHED;
-		break;
-
-	case I915_CACHE_WT:
-		args->caching = I915_CACHING_DISPLAY;
-		break;
-
-	default:
-		args->caching = I915_CACHING_NONE;
-		break;
-	}
-out:
-	rcu_read_unlock();
-	return err;
-}
-
-int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file)
-{
-	struct drm_i915_private *i915 = to_i915(dev);
-	struct drm_i915_gem_caching *args = data;
-	struct drm_i915_gem_object *obj;
-	enum i915_cache_level level;
-	int ret = 0;
-
-	switch (args->caching) {
-	case I915_CACHING_NONE:
-		level = I915_CACHE_NONE;
-		break;
-	case I915_CACHING_CACHED:
-		/*
-		 * Due to a HW issue on BXT A stepping, GPU stores via a
-		 * snooped mapping may leave stale data in a corresponding CPU
-		 * cacheline, whereas normally such cachelines would get
-		 * invalidated.
-		 */
-		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
-			return -ENODEV;
-
-		level = I915_CACHE_LLC;
-		break;
-	case I915_CACHING_DISPLAY:
-		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	obj = i915_gem_object_lookup(file, args->handle);
-	if (!obj)
-		return -ENOENT;
-
-	/*
-	 * The caching mode of proxy object is handled by its generator, and
-	 * not allowed to be changed by userspace.
-	 */
-	if (i915_gem_object_is_proxy(obj)) {
-		ret = -ENXIO;
-		goto out;
-	}
-
-	if (obj->cache_level == level)
-		goto out;
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE,
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		goto out;
-
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		goto out;
-
-	ret = i915_gem_object_set_cache_level(obj, level);
-	mutex_unlock(&dev->struct_mutex);
-
-out:
-	i915_gem_object_put(obj);
-	return ret;
-}
-
-/*
- * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
- * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
- * (for pageflips). We only flush the caches while preparing the buffer for
- * display, the callers are responsible for frontbuffer flush.
- */
-struct i915_vma *
-i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
-				     u32 alignment,
-				     const struct i915_ggtt_view *view,
-				     unsigned int flags)
-{
-	struct i915_vma *vma;
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	/* Mark the global pin early so that we account for the
-	 * display coherency whilst setting up the cache domains.
-	 */
-	obj->pin_global++;
-
-	/* The display engine is not coherent with the LLC cache on gen6.  As
-	 * a result, we make sure that the pinning that is about to occur is
-	 * done with uncached PTEs. This is lowest common denominator for all
-	 * chipsets.
-	 *
-	 * However for gen6+, we could do better by using the GFDT bit instead
-	 * of uncaching, which would allow us to flush all the LLC-cached data
-	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
-	 */
-	ret = i915_gem_object_set_cache_level(obj,
-					      HAS_WT(to_i915(obj->base.dev)) ?
-					      I915_CACHE_WT : I915_CACHE_NONE);
-	if (ret) {
-		vma = ERR_PTR(ret);
-		goto err_unpin_global;
-	}
-
-	/* As the user may map the buffer once pinned in the display plane
-	 * (e.g. libkms for the bootup splash), we have to ensure that we
-	 * always use map_and_fenceable for all scanout buffers. However,
-	 * it may simply be too big to fit into mappable, in which case
-	 * put it anyway and hope that userspace can cope (but always first
-	 * try to preserve the existing ABI).
-	 */
-	vma = ERR_PTR(-ENOSPC);
-	if ((flags & PIN_MAPPABLE) == 0 &&
-	    (!view || view->type == I915_GGTT_VIEW_NORMAL))
-		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
-					       flags |
-					       PIN_MAPPABLE |
-					       PIN_NONBLOCK);
-	if (IS_ERR(vma))
-		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
-	if (IS_ERR(vma))
-		goto err_unpin_global;
-
-	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
-
-	__i915_gem_object_flush_for_display(obj);
-
-	/* It should now be out of any other write domains, and we can update
-	 * the domain values for our changes.
-	 */
-	obj->read_domains |= I915_GEM_DOMAIN_GTT;
-
-	return vma;
-
-err_unpin_global:
-	obj->pin_global--;
-	return vma;
-}
-
-void
-i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
-{
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-
-	if (WARN_ON(vma->obj->pin_global == 0))
-		return;
-
-	if (--vma->obj->pin_global == 0)
-		vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
-
-	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
-	i915_gem_object_bump_inactive_ggtt(vma->obj);
-
-	i915_vma_unpin(vma);
-}
-
-/**
- * Moves a single object to the CPU read, and possibly write domain.
- * @obj: object to act on
- * @write: requesting write or read-only access
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
-int
-i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
-{
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
-				   (write ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
-	/* Flush the CPU cache if it's still invalid. */
-	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
-		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
-		obj->read_domains |= I915_GEM_DOMAIN_CPU;
-	}
-
-	/* It should now be out of any other write domains, and we can update
-	 * the domain values for our changes.
-	 */
-	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
-
-	/* If we're writing through the CPU, then the GPU read domains will
-	 * need to be invalidated at next use.
-	 */
-	if (write)
-		__start_cpu_write(obj);
-
-	return 0;
-}
-
 /* Throttle our rendering by waiting until the ring has completed our requests
  * emitted over 20 msec ago.
  *
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index f1b5091ee817..0a536068f7ac 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1007,7 +1007,7 @@ static void reloc_cache_reset(struct reloc_cache *cache)
 			mb();
 
 		kunmap_atomic(vaddr);
-		i915_gem_obj_finish_shmem_access((struct drm_i915_gem_object *)cache->node.mm);
+		i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
 	} else {
 		wmb();
 		io_mapping_unmap_atomic((void __iomem *)vaddr);
@@ -1039,7 +1039,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
 		unsigned int flushes;
 		int err;
 
-		err = i915_gem_obj_prepare_shmem_write(obj, &flushes);
+		err = i915_gem_object_prepare_write(obj, &flushes);
 		if (err)
 			return ERR_PTR(err);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 91196348c68c..056d2ad9a6d6 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -84,7 +84,7 @@ static int render_state_setup(struct intel_render_state *so,
 	u32 *d;
 	int ret;
 
-	ret = i915_gem_obj_prepare_shmem_write(so->obj, &needs_clflush);
+	ret = i915_gem_object_prepare_write(so->obj, &needs_clflush);
 	if (ret)
 		return ret;
 
@@ -166,7 +166,7 @@ static int render_state_setup(struct intel_render_state *so,
 
 	ret = i915_gem_object_set_to_gtt_domain(so->obj, false);
 out:
-	i915_gem_obj_finish_shmem_access(so->obj);
+	i915_gem_object_finish_access(so->obj);
 	return ret;
 
 err:
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 5412373e2f98..cb80f7397758 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -44,6 +44,8 @@
 #include <drm/i915_mei_hdcp_interface.h>
 #include <media/cec-notifier.h>
 
+#include "intel_frontbuffer.h"
+
 struct drm_printer;
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.h b/drivers/gpu/drm/i915/intel_frontbuffer.h
index 63cd9a753a72..95c00bf106ab 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.h
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.h
@@ -27,6 +27,14 @@
 struct drm_i915_private;
 struct drm_i915_gem_object;
 
+enum fb_op_origin {
+	ORIGIN_GTT,
+	ORIGIN_CPU,
+	ORIGIN_CS,
+	ORIGIN_FLIP,
+	ORIGIN_DIRTYFB,
+};
+
 void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv,
 				    unsigned frontbuffer_bits);
 void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index 68060e5ac78d..1db0e983f0ed 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -1020,7 +1020,7 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 	unsigned long n;
 	int err;
 
-	err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush);
+	err = i915_gem_object_prepare_read(obj, &needs_flush);
 	if (err)
 		return err;
 
@@ -1041,7 +1041,7 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 		kunmap_atomic(ptr);
 	}
 
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index e43630b40fce..a7d3510a2a9a 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -37,7 +37,7 @@ static int cpu_set(struct drm_i915_gem_object *obj,
 	u32 *cpu;
 	int err;
 
-	err = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
+	err = i915_gem_object_prepare_write(obj, &needs_clflush);
 	if (err)
 		return err;
 
@@ -54,7 +54,7 @@ static int cpu_set(struct drm_i915_gem_object *obj,
 		drm_clflush_virt_range(cpu, sizeof(*cpu));
 
 	kunmap_atomic(map);
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 
 	return 0;
 }
@@ -69,7 +69,7 @@ static int cpu_get(struct drm_i915_gem_object *obj,
 	u32 *cpu;
 	int err;
 
-	err = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
+	err = i915_gem_object_prepare_read(obj, &needs_clflush);
 	if (err)
 		return err;
 
@@ -83,7 +83,7 @@ static int cpu_get(struct drm_i915_gem_object *obj,
 	*v = *cpu;
 
 	kunmap_atomic(map);
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index b6ed055abf5c..14df22b93764 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -351,7 +351,7 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
 	unsigned int n, m, need_flush;
 	int err;
 
-	err = i915_gem_obj_prepare_shmem_write(obj, &need_flush);
+	err = i915_gem_object_prepare_write(obj, &need_flush);
 	if (err)
 		return err;
 
@@ -366,7 +366,7 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
 		kunmap_atomic(map);
 	}
 
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 	obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
 	obj->write_domain = 0;
 	return 0;
@@ -378,7 +378,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
 	unsigned int n, m, needs_flush;
 	int err;
 
-	err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush);
+	err = i915_gem_object_prepare_read(obj, &needs_flush);
 	if (err)
 		return err;
 
@@ -416,7 +416,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
 			break;
 	}
 
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 	return err;
 }
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 36/43] drm/i915: Move more GEM objects under gem/
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (34 preceding siblings ...)
  2019-03-06 14:25 ` [PATCH 35/43] drm/i915: Move GEM domain management " Chris Wilson
@ 2019-03-06 14:25 ` Chris Wilson
  2019-03-06 14:25 ` [PATCH 37/43] drm/i915: Pull scatterlist utils out of i915_gem.h Chris Wilson
                   ` (9 subsequent siblings)
  45 siblings, 0 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:25 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Continuing the theme of separating out the GEM clutter.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 | 25 ++++++-----
 .../gpu/drm/i915/{ => gem}/i915_gem_clflush.c | 27 +++--------
 drivers/gpu/drm/i915/gem/i915_gem_clflush.h   | 20 +++++++++
 .../gpu/drm/i915/{ => gem}/i915_gem_context.c | 40 +++++------------
 .../gpu/drm/i915/{ => gem}/i915_gem_context.h | 30 +++----------
 .../i915/{ => gem}/i915_gem_context_types.h   |  4 +-
 .../gpu/drm/i915/{ => gem}/i915_gem_dmabuf.c  | 28 +++---------
 drivers/gpu/drm/i915/gem/i915_gem_domain.c    |  2 +-
 .../drm/i915/{ => gem}/i915_gem_execbuffer.c  | 39 ++++------------
 .../drm/i915/{ => gem}/i915_gem_internal.c    | 33 +++++---------
 drivers/gpu/drm/i915/gem/i915_gem_object.c    | 10 ++++-
 .../drm/i915/{ => gem}/i915_gem_shrinker.c    | 25 ++---------
 .../gpu/drm/i915/{ => gem}/i915_gem_stolen.c  | 33 ++++----------
 .../gpu/drm/i915/{ => gem}/i915_gem_tiling.c  | 31 +++----------
 .../gpu/drm/i915/{ => gem}/i915_gem_userptr.c | 30 +++----------
 drivers/gpu/drm/i915/{ => gem}/i915_gemfs.c   | 25 ++---------
 drivers/gpu/drm/i915/gem/i915_gemfs.h         | 16 +++++++
 .../{ => gem}/selftests/huge_gem_object.c     | 22 +--------
 .../drm/i915/gem/selftests/huge_gem_object.h  | 27 +++++++++++
 .../drm/i915/{ => gem}/selftests/huge_pages.c | 33 ++++----------
 .../{ => gem}/selftests/i915_gem_coherency.c  | 26 ++---------
 .../{ => gem}/selftests/i915_gem_context.c    | 41 +++++------------
 .../{ => gem}/selftests/i915_gem_dmabuf.c     | 26 ++---------
 .../drm/i915/gem/selftests/i915_gem_mman.c    |  3 +-
 .../{ => gem}/selftests/i915_gem_object.c     | 27 +++--------
 .../i915/{ => gem}/selftests/mock_context.c   | 24 ++--------
 .../gpu/drm/i915/gem/selftests/mock_context.h | 24 ++++++++++
 .../i915/{ => gem}/selftests/mock_dmabuf.c    | 22 +--------
 .../gpu/drm/i915/gem/selftests/mock_dmabuf.h  | 22 +++++++++
 .../{ => gem}/selftests/mock_gem_object.h     |  7 ++-
 .../gem/test_i915_gem_clflush_standalone.c    |  7 +++
 .../gem/test_i915_gem_context_standalone.c    |  7 +++
 .../test_i915_gem_context_types_standalone.c  |  0
 .../drm/i915/gem/test_i915_gemfs_standalone.c |  7 +++
 drivers/gpu/drm/i915/gvt/mmio_context.c       |  1 +
 drivers/gpu/drm/i915/gvt/scheduler.c          |  3 ++
 drivers/gpu/drm/i915/i915_drv.c               |  1 +
 drivers/gpu/drm/i915/i915_drv.h               |  2 +-
 drivers/gpu/drm/i915/i915_gem.c               |  9 ++--
 drivers/gpu/drm/i915/i915_gem_clflush.h       | 36 ---------------
 drivers/gpu/drm/i915/i915_gem_evict.c         |  2 +
 drivers/gpu/drm/i915/i915_gemfs.h             | 34 --------------
 drivers/gpu/drm/i915/i915_globals.c           |  2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c         |  2 +
 drivers/gpu/drm/i915/i915_perf.c              |  3 ++
 drivers/gpu/drm/i915/i915_request.c           |  3 ++
 drivers/gpu/drm/i915/i915_reset.c             |  2 +
 drivers/gpu/drm/i915/intel_context.c          |  3 +-
 drivers/gpu/drm/i915/intel_display.c          |  2 -
 drivers/gpu/drm/i915/intel_engine_cs.c        |  1 +
 drivers/gpu/drm/i915/intel_guc_submission.c   |  1 +
 drivers/gpu/drm/i915/intel_lrc.c              |  3 ++
 drivers/gpu/drm/i915/intel_lrc.h              | 14 +++---
 drivers/gpu/drm/i915/intel_ringbuffer.c       |  3 ++
 .../gpu/drm/i915/selftests/huge_gem_object.h  | 45 -------------------
 drivers/gpu/drm/i915/selftests/i915_gem.c     |  4 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  3 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  3 +-
 drivers/gpu/drm/i915/selftests/i915_request.c |  3 +-
 drivers/gpu/drm/i915/selftests/i915_vma.c     |  3 +-
 .../gpu/drm/i915/selftests/igt_flush_test.c   |  2 +
 drivers/gpu/drm/i915/selftests/igt_spinner.h  |  1 -
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |  5 ++-
 drivers/gpu/drm/i915/selftests/intel_lrc.c    |  2 +-
 .../drm/i915/selftests/intel_workarounds.c    |  4 +-
 drivers/gpu/drm/i915/selftests/mock_context.h | 42 -----------------
 drivers/gpu/drm/i915/selftests/mock_dmabuf.h  | 41 -----------------
 drivers/gpu/drm/i915/selftests/mock_engine.c  |  2 +
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  5 ++-
 69 files changed, 344 insertions(+), 691 deletions(-)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_clflush.c (77%)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_clflush.h
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_context.c (97%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_context.h (80%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_context_types.h (98%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_dmabuf.c (86%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_execbuffer.c (98%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_internal.c (81%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_shrinker.c (93%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_stolen.c (93%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_tiling.c (90%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_userptr.c (94%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gemfs.c (50%)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gemfs.h
 rename drivers/gpu/drm/i915/{ => gem}/selftests/huge_gem_object.c (70%)
 create mode 100644 drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
 rename drivers/gpu/drm/i915/{ => gem}/selftests/huge_pages.c (97%)
 rename drivers/gpu/drm/i915/{ => gem}/selftests/i915_gem_coherency.c (87%)
 rename drivers/gpu/drm/i915/{ => gem}/selftests/i915_gem_context.c (96%)
 rename drivers/gpu/drm/i915/{ => gem}/selftests/i915_gem_dmabuf.c (87%)
 rename drivers/gpu/drm/i915/{ => gem}/selftests/i915_gem_object.c (62%)
 rename drivers/gpu/drm/i915/{ => gem}/selftests/mock_context.c (59%)
 create mode 100644 drivers/gpu/drm/i915/gem/selftests/mock_context.h
 rename drivers/gpu/drm/i915/{ => gem}/selftests/mock_dmabuf.c (73%)
 create mode 100644 drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
 rename drivers/gpu/drm/i915/{ => gem}/selftests/mock_gem_object.h (65%)
 create mode 100644 drivers/gpu/drm/i915/gem/test_i915_gem_clflush_standalone.c
 create mode 100644 drivers/gpu/drm/i915/gem/test_i915_gem_context_standalone.c
 rename drivers/gpu/drm/i915/{ => gem}/test_i915_gem_context_types_standalone.c (100%)
 create mode 100644 drivers/gpu/drm/i915/gem/test_i915_gemfs_standalone.c
 delete mode 100644 drivers/gpu/drm/i915/i915_gem_clflush.h
 delete mode 100644 drivers/gpu/drm/i915/i915_gemfs.h
 delete mode 100644 drivers/gpu/drm/i915/selftests/huge_gem_object.h
 delete mode 100644 drivers/gpu/drm/i915/selftests/mock_context.h
 delete mode 100644 drivers/gpu/drm/i915/selftests/mock_dmabuf.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index c4b78634b5ee..c12350df7793 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -59,11 +59,14 @@ i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 
 # Test the headers are compilable as standalone units
 i915-$(CONFIG_DRM_I915_WERROR) += \
+	gem/test_i915_gemfs_standalone.o \
+	gem/test_i915_gem_clflush_standalone.o \
+	gem/test_i915_gem_context_standalone.o \
+	gem/test_i915_gem_context_types_standalone.o \
 	gem/test_i915_gem_ioctls_standalone.o \
 	gem/test_i915_gem_object_standalone.o \
 	gem/test_i915_gem_object_types_standalone.o \
 	test_i915_active_types_standalone.o \
-	test_i915_gem_context_types_standalone.o \
 	test_i915_timeline_types_standalone.o \
 	test_intel_context_types_standalone.o \
 	test_intel_engine_types_standalone.o \
@@ -71,30 +74,30 @@ i915-$(CONFIG_DRM_I915_WERROR) += \
 
 # GEM code
 i915-y += \
+	  gem/i915_gem_clflush.o \
+	  gem/i915_gem_context.o \
+	  gem/i915_gem_dmabuf.o \
 	  gem/i915_gem_domain.o \
+	  gem/i915_gem_execbuffer.o \
+	  gem/i915_gem_internal.o \
 	  gem/i915_gem_object.o \
 	  gem/i915_gem_mman.o \
 	  gem/i915_gem_pages.o \
 	  gem/i915_gem_phys.o \
 	  gem/i915_gem_shmem.o \
+	  gem/i915_gem_shrinker.o \
+	  gem/i915_gem_stolen.o \
+	  gem/i915_gem_tiling.o \
+	  gem/i915_gem_userptr.o \
+	  gem/i915_gemfs.o \
 	  i915_active.o \
 	  i915_cmd_parser.o \
 	  i915_gem_batch_pool.o \
-	  i915_gem_clflush.o \
-	  i915_gem_context.o \
-	  i915_gem_dmabuf.o \
 	  i915_gem_evict.o \
-	  i915_gem_execbuffer.o \
 	  i915_gem_fence_reg.o \
 	  i915_gem_gtt.o \
-	  i915_gem_internal.o \
 	  i915_gem.o \
 	  i915_gem_render_state.o \
-	  i915_gem_shrinker.o \
-	  i915_gem_stolen.o \
-	  i915_gem_tiling.o \
-	  i915_gem_userptr.o \
-	  i915_gemfs.o \
 	  i915_globals.o \
 	  i915_query.o \
 	  i915_request.o \
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
similarity index 77%
rename from drivers/gpu/drm/i915/i915_gem_clflush.c
rename to drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index 8e74c23cbd91..093bfff55a96 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -1,31 +1,14 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
-#include "i915_drv.h"
-#include "intel_frontbuffer.h"
 #include "i915_gem_clflush.h"
 
+#include "../i915_drv.h"
+#include "../intel_frontbuffer.h"
+
 static DEFINE_SPINLOCK(clflush_lock);
 
 struct clflush {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
new file mode 100644
index 000000000000..e6c382973129
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
@@ -0,0 +1,20 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_GEM_CLFLUSH_H__
+#define __I915_GEM_CLFLUSH_H__
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct drm_i915_gem_object;
+
+bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+			     unsigned int flags);
+#define I915_CLFLUSH_FORCE BIT(0)
+#define I915_CLFLUSH_SYNC BIT(1)
+
+#endif /* __I915_GEM_CLFLUSH_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
similarity index 97%
rename from drivers/gpu/drm/i915/i915_gem_context.c
rename to drivers/gpu/drm/i915/gem/i915_gem_context.c
index f6a7ecef392b..1663144c1cee 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1,28 +1,7 @@
 /*
- * Copyright © 2011-2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Ben Widawsky <ben@bwidawsk.net>
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2011-2012 Intel Corporation
  */
 
 /*
@@ -90,13 +69,14 @@
 
 #include <drm/i915_drm.h>
 
-#include "i915_drv.h"
-#include "i915_globals.h"
-#include "i915_trace.h"
-#include "i915_user_extensions.h"
-#include "intel_lrc_reg.h"
-#include "intel_lrc.h"
-#include "intel_workarounds.h"
+#include "i915_gem_context.h"
+
+#include "../i915_globals.h"
+#include "../i915_trace.h"
+#include "../i915_user_extensions.h"
+#include "../intel_lrc_reg.h"
+#include "../intel_lrc.h"
+#include "../intel_workarounds.h"
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
similarity index 80%
rename from drivers/gpu/drm/i915/i915_gem_context.h
rename to drivers/gpu/drm/i915/gem/i915_gem_context.h
index 1e670372892c..107c713073cb 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -1,25 +1,7 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
 #ifndef __I915_GEM_CONTEXT_H__
@@ -27,10 +9,10 @@
 
 #include "i915_gem_context_types.h"
 
-#include "i915_gem.h"
-#include "i915_scheduler.h"
-#include "intel_context.h"
-#include "intel_device_info.h"
+#include "../i915_gem.h"
+#include "../i915_scheduler.h"
+#include "../intel_context.h"
+#include "../intel_device_info.h"
 
 struct drm_device;
 struct drm_file;
diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
similarity index 98%
rename from drivers/gpu/drm/i915/i915_gem_context_types.h
rename to drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 8a89f3053f73..47ed1f65cc06 100644
--- a/drivers/gpu/drm/i915/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -17,8 +17,8 @@
 #include <linux/rcupdate.h>
 #include <linux/types.h>
 
-#include "i915_scheduler.h"
-#include "intel_context_types.h"
+#include "../i915_scheduler.h"
+#include "../intel_context_types.h"
 
 struct pid;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
similarity index 86%
rename from drivers/gpu/drm/i915/i915_gem_dmabuf.c
rename to drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 33181678990e..47db261a98c0 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -1,34 +1,16 @@
 /*
- * Copyright 2012 Red Hat Inc
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
+ * SPDX-License-Identifier: MIT
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *	Dave Airlie <airlied@redhat.com>
+ * Copyright 2012 Red Hat Inc
  */
 
 #include <linux/dma-buf.h>
+#include <linux/highmem.h>
 #include <linux/reservation.h>
 
+#include "i915_gem_object.h"
 
-#include "i915_drv.h"
+#include "../i915_drv.h"
 
 static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index c21ceb08f845..c54e9e73212b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -4,11 +4,11 @@
  * Copyright © 2014-2016 Intel Corporation
  */
 
+#include "i915_gem_clflush.h"
 #include "i915_gem_ioctls.h"
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
-#include "../i915_gem_clflush.h"
 #include "../i915_gem_gtt.h"
 #include "../i915_vma.h"
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
similarity index 98%
rename from drivers/gpu/drm/i915/i915_gem_execbuffer.c
rename to drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 0a536068f7ac..4fe528dc23da 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1,29 +1,7 @@
 /*
- * Copyright © 2008,2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *    Chris Wilson <chris@chris-wilson.co.uk>
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2008,2010 Intel Corporation
  */
 
 #include <linux/intel-iommu.h>
@@ -34,13 +12,14 @@
 #include <drm/drm_syncobj.h>
 #include <drm/i915_drm.h>
 
-#include "gem/i915_gem_ioctls.h"
-
-#include "i915_drv.h"
+#include "i915_gem_ioctls.h"
 #include "i915_gem_clflush.h"
-#include "i915_trace.h"
-#include "intel_drv.h"
-#include "intel_frontbuffer.h"
+#include "i915_gem_context.h"
+
+#include "../i915_trace.h"
+#include "../intel_context.h"
+#include "../intel_drv.h"
+#include "../intel_frontbuffer.h"
 
 enum {
 	FORCE_CPU_RELOC = 1,
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
similarity index 81%
rename from drivers/gpu/drm/i915/i915_gem_internal.c
rename to drivers/gpu/drm/i915/gem/i915_gem_internal.c
index ab627ed1269c..d40adb3bbe29 100644
--- a/drivers/gpu/drm/i915/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -1,29 +1,20 @@
 /*
- * Copyright © 2014-2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2014-2016 Intel Corporation
  */
 
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/swiotlb.h>
+
 #include <drm/i915_drm.h>
-#include "i915_drv.h"
+
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+#include "../i915_gem.h"
+#include "../i915_utils.h"
 
 #define QUIET (__GFP_NORETRY | __GFP_NOWARN)
 #define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 4a6c5ddd1265..be5952aa4c11 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -22,11 +22,12 @@
  *
  */
 
+#include "i915_gem_context.h"
+#include "i915_gem_clflush.h"
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
 #include "../i915_globals.h"
-#include "../i915_gem_clflush.h"
 #include "../intel_frontbuffer.h"
 
 static struct i915_global_object {
@@ -443,3 +444,10 @@ int __init i915_global_objects_init(void)
 	i915_global_register(&global.base);
 	return 0;
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/huge_gem_object.c"
+#include "selftests/huge_pages.c"
+#include "selftests/i915_gem_object.c"
+#include "selftests/i915_gem_coherency.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
similarity index 93%
rename from drivers/gpu/drm/i915/i915_gem_shrinker.c
rename to drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 6da795c7e62e..506360f7de6d 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -1,25 +1,7 @@
 /*
- * Copyright © 2008-2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2008-2015 Intel Corporation
  */
 
 #include <linux/oom.h>
@@ -32,8 +14,7 @@
 #include <linux/vmalloc.h>
 #include <drm/i915_drm.h>
 
-#include "i915_drv.h"
-#include "i915_trace.h"
+#include "../i915_trace.h"
 
 static bool shrinker_lock(struct drm_i915_private *i915,
 			  unsigned int flags,
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
similarity index 93%
rename from drivers/gpu/drm/i915/i915_gem_stolen.c
rename to drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 0a8082cfc761..b0961f2c568b 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -1,33 +1,16 @@
 /*
- * Copyright © 2008-2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *    Chris Wilson <chris@chris-wilson.co.uk>
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2008-2012 Intel Corporation
  */
 
+#include <linux/errno.h>
+#include <linux/mutex.h>
+
+#include <drm/drm_mm.h>
 #include <drm/i915_drm.h>
-#include "i915_drv.h"
+
+#include "../i915_drv.h"
 
 /*
  * The BIOS typically reserves some of the system's memory for the exclusive
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
similarity index 90%
rename from drivers/gpu/drm/i915/i915_gem_tiling.c
rename to drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index 8ce3a30fb0a1..59b4c4dfbc41 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -1,37 +1,18 @@
 /*
- * Copyright © 2008 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2008 Intel Corporation
  */
 
 #include <linux/string.h>
 #include <linux/bitops.h>
 #include <drm/i915_drm.h>
 
-#include "gem/i915_gem_ioctls.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
 
-#include "i915_drv.h"
+#include "../i915_drv.h"
+#include "../i915_gem.h"
 
 /**
  * DOC: buffer object tiling
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
similarity index 94%
rename from drivers/gpu/drm/i915/i915_gem_userptr.c
rename to drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index bfe985b36e07..12ef6d8fb2dc 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -1,25 +1,7 @@
 /*
- * Copyright © 2012-2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2012-2014 Intel Corporation
  */
 
 #include <linux/mmu_context.h>
@@ -30,11 +12,11 @@
 
 #include <drm/i915_drm.h>
 
-#include "gem/i915_gem_ioctls.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
 
-#include "i915_drv.h"
-#include "i915_trace.h"
-#include "intel_drv.h"
+#include "../i915_trace.h"
+#include "../intel_drv.h"
 
 struct i915_mm_struct {
 	struct mm_struct *mm;
diff --git a/drivers/gpu/drm/i915/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c
similarity index 50%
rename from drivers/gpu/drm/i915/i915_gemfs.c
rename to drivers/gpu/drm/i915/gem/i915_gemfs.c
index 888b7d3f04c3..fecdbc19def4 100644
--- a/drivers/gpu/drm/i915/i915_gemfs.c
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c
@@ -1,34 +1,17 @@
 /*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2017 Intel Corporation
  */
 
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/pagemap.h>
 
-#include "i915_drv.h"
 #include "i915_gemfs.h"
 
+#include "../i915_drv.h"
+
 int i915_gemfs_init(struct drm_i915_private *i915)
 {
 	struct file_system_type *type;
diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.h b/drivers/gpu/drm/i915/gem/i915_gemfs.h
new file mode 100644
index 000000000000..2a1e59af3e4a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.h
@@ -0,0 +1,16 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017 Intel Corporation
+ */
+
+#ifndef __I915_GEMFS_H__
+#define __I915_GEMFS_H__
+
+struct drm_i915_private;
+
+int i915_gemfs_init(struct drm_i915_private *i915);
+
+void i915_gemfs_fini(struct drm_i915_private *i915);
+
+#endif
diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
similarity index 70%
rename from drivers/gpu/drm/i915/selftests/huge_gem_object.c
rename to drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
index 419fd4d6a8f0..824f3761314c 100644
--- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
@@ -1,25 +1,7 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
 #include "huge_gem_object.h"
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
new file mode 100644
index 000000000000..549c1394bcdc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
@@ -0,0 +1,27 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __HUGE_GEM_OBJECT_H
+#define __HUGE_GEM_OBJECT_H
+
+struct drm_i915_gem_object *
+huge_gem_object(struct drm_i915_private *i915,
+		phys_addr_t phys_size,
+		dma_addr_t dma_size);
+
+static inline phys_addr_t
+huge_gem_object_phys_size(struct drm_i915_gem_object *obj)
+{
+	return obj->scratch;
+}
+
+static inline dma_addr_t
+huge_gem_object_dma_size(struct drm_i915_gem_object *obj)
+{
+	return obj->base.size;
+}
+
+#endif /* !__HUGE_GEM_OBJECT_H */
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
similarity index 97%
rename from drivers/gpu/drm/i915/selftests/huge_pages.c
rename to drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 1db0e983f0ed..5d1d76957145 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -1,33 +1,18 @@
 /*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2017 Intel Corporation
  */
 
-#include "../i915_selftest.h"
-
 #include <linux/prime_numbers.h>
 
-#include "mock_drm.h"
-#include "i915_random.h"
+#include "../../i915_selftest.h"
+
+#include "mock_context.h"
+
+#include "../../selftests/mock_drm.h"
+#include "../../selftests/mock_gem_device.h"
+#include "../../selftests/i915_random.h"
 
 static const unsigned int page_sizes[] = {
 	I915_GTT_PAGE_SIZE_2M,
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
similarity index 87%
rename from drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
rename to drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index a7d3510a2a9a..4558d4828d61 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -1,31 +1,13 @@
 /*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2017 Intel Corporation
  */
 
 #include <linux/prime_numbers.h>
 
-#include "../i915_selftest.h"
-#include "i915_random.h"
+#include "../../i915_selftest.h"
+#include "../../selftests/i915_random.h"
 
 static int cpu_set(struct drm_i915_gem_object *obj,
 		   unsigned long offset,
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
similarity index 96%
rename from drivers/gpu/drm/i915/selftests/i915_gem_context.c
rename to drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 14df22b93764..a7c816e2a2bc 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -1,39 +1,22 @@
 /*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2017 Intel Corporation
  */
 
 #include <linux/prime_numbers.h>
 
-#include "../i915_reset.h"
-#include "../i915_selftest.h"
-#include "i915_random.h"
-#include "igt_flush_test.h"
-#include "igt_live_test.h"
-#include "igt_reset.h"
-#include "igt_spinner.h"
+#include "../../i915_reset.h"
+#include "../../i915_selftest.h"
+
+#include "../../selftests/i915_random.h"
+#include "../../selftests/igt_flush_test.h"
+#include "../../selftests/igt_live_test.h"
+#include "../../selftests/igt_reset.h"
+#include "../../selftests/igt_spinner.h"
 
-#include "mock_drm.h"
-#include "mock_gem_device.h"
+#include "../../selftests/mock_drm.h"
+#include "../../selftests/mock_gem_device.h"
 #include "huge_gem_object.h"
 
 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
similarity index 87%
rename from drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
rename to drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index 37dc22699f1f..67a9d551bb0e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -1,30 +1,12 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
-#include "../i915_selftest.h"
+#include "../../i915_selftest.h"
 
-#include "mock_gem_device.h"
+#include "../../selftests/mock_gem_device.h"
 #include "mock_dmabuf.h"
 
 static int igt_dmabuf_export(void *arg)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index a99a51a93dcb..f0e7b0e517cf 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -7,7 +7,8 @@
 #include <linux/prime_numbers.h>
 
 #include "../../i915_selftest.h"
-#include "../../selftests/huge_gem_object.h"
+
+#include "huge_gem_object.h"
 
 struct tile {
 	unsigned int width;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
similarity index 62%
rename from drivers/gpu/drm/i915/selftests/i915_gem_object.c
rename to drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
index e4b2ee4c54e3..79ecef7bdfcf 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
@@ -1,32 +1,15 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
-#include "../i915_selftest.h"
+#include "../../i915_selftest.h"
 
-#include "mock_gem_device.h"
 #include "huge_gem_object.h"
 
+#include "../../selftests/mock_gem_device.h"
+
 static int igt_gem_object(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
similarity index 59%
rename from drivers/gpu/drm/i915/selftests/mock_context.c
rename to drivers/gpu/drm/i915/gem/selftests/mock_context.c
index 1d6dc2fe36ab..7f88fd6b3ce8 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -1,29 +1,11 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
 #include "mock_context.h"
-#include "mock_gtt.h"
+#include "../../selftests/mock_gtt.h"
 
 struct i915_gem_context *
 mock_context(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.h b/drivers/gpu/drm/i915/gem/selftests/mock_context.h
new file mode 100644
index 000000000000..0b926653914f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.h
@@ -0,0 +1,24 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __MOCK_CONTEXT_H
+#define __MOCK_CONTEXT_H
+
+void mock_init_contexts(struct drm_i915_private *i915);
+
+struct i915_gem_context *
+mock_context(struct drm_i915_private *i915,
+	     const char *name);
+
+void mock_context_close(struct i915_gem_context *ctx);
+
+struct i915_gem_context *
+live_context(struct drm_i915_private *i915, struct drm_file *file);
+
+struct i915_gem_context *kernel_context(struct drm_i915_private *i915);
+void kernel_context_close(struct i915_gem_context *ctx);
+
+#endif /* !__MOCK_CONTEXT_H */
diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
similarity index 73%
rename from drivers/gpu/drm/i915/selftests/mock_dmabuf.c
rename to drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
index ca682caf1062..b9e059d4328a 100644
--- a/drivers/gpu/drm/i915/selftests/mock_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
@@ -1,25 +1,7 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
 #include "mock_dmabuf.h"
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
new file mode 100644
index 000000000000..f0f8bbd82dfc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
@@ -0,0 +1,22 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __MOCK_DMABUF_H__
+#define __MOCK_DMABUF_H__
+
+#include <linux/dma-buf.h>
+
+struct mock_dmabuf {
+	int npages;
+	struct page *pages[];
+};
+
+static struct mock_dmabuf *to_mock(struct dma_buf *buf)
+{
+	return buf->priv;
+}
+
+#endif /* !__MOCK_DMABUF_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
similarity index 65%
rename from drivers/gpu/drm/i915/selftests/mock_gem_object.h
rename to drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
index 20acdbee7bd0..370360b4a148 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
@@ -1,4 +1,9 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
 #ifndef __MOCK_GEM_OBJECT_H__
 #define __MOCK_GEM_OBJECT_H__
 
diff --git a/drivers/gpu/drm/i915/gem/test_i915_gem_clflush_standalone.c b/drivers/gpu/drm/i915/gem/test_i915_gem_clflush_standalone.c
new file mode 100644
index 000000000000..8fdd7eefd2c7
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/test_i915_gem_clflush_standalone.c
@@ -0,0 +1,7 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_gem_clflush.h"
diff --git a/drivers/gpu/drm/i915/gem/test_i915_gem_context_standalone.c b/drivers/gpu/drm/i915/gem/test_i915_gem_context_standalone.c
new file mode 100644
index 000000000000..5245f1b3bb07
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/test_i915_gem_context_standalone.c
@@ -0,0 +1,7 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_gem_context.h"
diff --git a/drivers/gpu/drm/i915/test_i915_gem_context_types_standalone.c b/drivers/gpu/drm/i915/gem/test_i915_gem_context_types_standalone.c
similarity index 100%
rename from drivers/gpu/drm/i915/test_i915_gem_context_types_standalone.c
rename to drivers/gpu/drm/i915/gem/test_i915_gem_context_types_standalone.c
diff --git a/drivers/gpu/drm/i915/gem/test_i915_gemfs_standalone.c b/drivers/gpu/drm/i915/gem/test_i915_gemfs_standalone.c
new file mode 100644
index 000000000000..f51e8c1d7e1c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/test_i915_gemfs_standalone.c
@@ -0,0 +1,7 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_gemfs.h"
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index f64c76dd11d4..25da98b5c0e8 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -34,6 +34,7 @@
  */
 
 #include "i915_drv.h"
+#include "intel_context.h"
 #include "gvt.h"
 #include "trace.h"
 
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 628b2d3991a5..62dd3a5e8413 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -35,7 +35,10 @@
 
 #include <linux/kthread.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
+#include "intel_context.h"
 #include "gvt.h"
 
 #define RING_CTX_OFF(x) \
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 771b9159e6df..caad78282590 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -47,6 +47,7 @@
 #include <drm/drm_probe_helper.h>
 #include <drm/i915_drm.h>
 
+#include "gem/i915_gem_context.h"
 #include "gem/i915_gem_ioctls.h"
 
 #include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a7b31dd9b745..82e32be4a3bd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -75,7 +75,7 @@
 #include "intel_uc.h"
 
 #include "i915_gem.h"
-#include "i915_gem_context.h"
+#include "gem/i915_gem_context_types.h"
 #include "i915_gem_fence_reg.h"
 #include "i915_gem_gtt.h"
 #include "i915_gpu_error.h"
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8e0c6f7a5e69..b0965fa15be9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -38,11 +38,12 @@
 #include <linux/dma-buf.h>
 #include <linux/mman.h>
 
+#include "gem/i915_gem_clflush.h"
+#include "gem/i915_gem_context.h"
 #include "gem/i915_gem_ioctls.h"
+#include "gem/i915_gemfs.h"
 
 #include "i915_drv.h"
-#include "i915_gem_clflush.h"
-#include "i915_gemfs.h"
 #include "i915_globals.h"
 #include "i915_reset.h"
 #include "i915_trace.h"
@@ -2689,9 +2690,5 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/scatterlist.c"
 #include "selftests/mock_gem_device.c"
-#include "selftests/huge_gem_object.c"
-#include "selftests/huge_pages.c"
-#include "selftests/i915_gem_object.c"
-#include "selftests/i915_gem_coherency.c"
 #include "selftests/i915_gem.c"
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h
deleted file mode 100644
index f390247561b3..000000000000
--- a/drivers/gpu/drm/i915/i915_gem_clflush.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __I915_GEM_CLFLUSH_H__
-#define __I915_GEM_CLFLUSH_H__
-
-struct drm_i915_private;
-struct drm_i915_gem_object;
-
-bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
-			     unsigned int flags);
-#define I915_CLFLUSH_FORCE BIT(0)
-#define I915_CLFLUSH_SYNC BIT(1)
-
-#endif /* __I915_GEM_CLFLUSH_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 060f5903544a..0947485775a3 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -28,6 +28,8 @@
 
 #include <drm/i915_drm.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 #include "i915_trace.h"
diff --git a/drivers/gpu/drm/i915/i915_gemfs.h b/drivers/gpu/drm/i915/i915_gemfs.h
deleted file mode 100644
index cca8bdc5b93e..000000000000
--- a/drivers/gpu/drm/i915/i915_gemfs.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __I915_GEMFS_H__
-#define __I915_GEMFS_H__
-
-struct drm_i915_private;
-
-int i915_gemfs_init(struct drm_i915_private *i915);
-
-void i915_gemfs_fini(struct drm_i915_private *i915);
-
-#endif
diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c
index f3858d42183f..9f2cd721acfb 100644
--- a/drivers/gpu/drm/i915/i915_globals.c
+++ b/drivers/gpu/drm/i915/i915_globals.c
@@ -8,7 +8,7 @@
 #include <linux/workqueue.h>
 
 #include "i915_active.h"
-#include "i915_gem_context.h"
+#include "gem/i915_gem_context.h"
 #include "gem/i915_gem_object.h"
 #include "i915_globals.h"
 #include "i915_request.h"
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 3d8020888604..526d1300a577 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -36,6 +36,8 @@
 
 #include <drm/drm_print.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_gpu_error.h"
 #include "i915_drv.h"
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 3c4f5017cf2a..07726165c2f9 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -195,6 +195,8 @@
 #include <linux/sizes.h>
 #include <linux/uuid.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 #include "i915_oa_hsw.h"
 #include "i915_oa_bdw.h"
@@ -210,6 +212,7 @@
 #include "i915_oa_cflgt3.h"
 #include "i915_oa_cnl.h"
 #include "i915_oa_icl.h"
+#include "intel_context.h"
 #include "intel_lrc_reg.h"
 
 /* HW requires this to be a power of two, between 128k and 16M, though driver
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index d8c943984bd2..5c3d59678458 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -29,10 +29,13 @@
 #include <linux/sched/clock.h>
 #include <linux/sched/signal.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 #include "i915_active.h"
 #include "i915_globals.h"
 #include "i915_reset.h"
+#include "intel_context.h"
 
 struct execute_cb {
 	struct list_head link;
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 3fbaa72a9eac..c56e9571cd31 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -7,6 +7,8 @@
 #include <linux/sched/mm.h>
 #include <linux/stop_machine.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 #include "i915_gpu_error.h"
 #include "i915_reset.h"
diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
index 8bf0d38d3a08..8c43e4d6bf7c 100644
--- a/drivers/gpu/drm/i915/intel_context.c
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -4,8 +4,9 @@
  * Copyright © 2019 Intel Corporation
  */
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
-#include "i915_gem_context.h"
 #include "i915_globals.h"
 #include "intel_context.h"
 #include "intel_ringbuffer.h"
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index d852cb282060..6d8d136e1239 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -45,7 +45,6 @@
 #include <drm/i915_drm.h>
 
 #include "i915_drv.h"
-#include "i915_gem_clflush.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include "intel_dsi.h"
@@ -56,7 +55,6 @@
 #include "intel_frontbuffer.h"
 
 #include "i915_drv.h"
-#include "i915_gem_clflush.h"
 #include "i915_reset.h"
 #include "i915_trace.h"
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 652c1b3ba190..26a99f249bb7 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -26,6 +26,7 @@
 
 #include "i915_drv.h"
 #include "i915_reset.h"
+#include "intel_context.h"
 #include "intel_ringbuffer.h"
 #include "intel_lrc.h"
 
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 4a5727233419..aec2b73cde2c 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -25,6 +25,7 @@
 #include <linux/circ_buf.h>
 #include <trace/events/dma_fence.h>
 
+#include "intel_context.h"
 #include "intel_guc_submission.h"
 #include "intel_lrc_reg.h"
 #include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 1b3c0fe8f8b1..3fb1dcc39277 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -134,6 +134,9 @@
 #include <linux/interrupt.h>
 
 #include <drm/i915_drm.h>
+
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
 #include "i915_reset.h"
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 77b85648045a..4e065af326ac 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -24,8 +24,13 @@
 #ifndef _INTEL_LRC_H_
 #define _INTEL_LRC_H_
 
-#include "intel_ringbuffer.h"
-#include "i915_gem_context.h"
+struct drm_printer;
+
+struct drm_i915_private;
+struct i915_gem_context;
+struct i915_request;
+struct intel_engine_cs;
+struct intel_sseu;
 
 /* Execlists regs */
 #define RING_ELSP(engine)			_MMIO((engine)->mmio_base + 0x230)
@@ -97,11 +102,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine);
  */
 #define LRC_HEADER_PAGES LRC_PPHWSP_PN
 
-struct drm_printer;
-
-struct drm_i915_private;
-struct i915_gem_context;
-
 void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 4ecb79eb147b..758d49389e31 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -31,10 +31,13 @@
 
 #include <drm/i915_drm.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
 #include "i915_reset.h"
 #include "i915_trace.h"
+#include "intel_context.h"
 #include "intel_drv.h"
 #include "intel_workarounds.h"
 
diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/selftests/huge_gem_object.h
deleted file mode 100644
index a6133a9e8029..000000000000
--- a/drivers/gpu/drm/i915/selftests/huge_gem_object.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __HUGE_GEM_OBJECT_H
-#define __HUGE_GEM_OBJECT_H
-
-struct drm_i915_gem_object *
-huge_gem_object(struct drm_i915_private *i915,
-		phys_addr_t phys_size,
-		dma_addr_t dma_size);
-
-static inline phys_addr_t
-huge_gem_object_phys_size(struct drm_i915_gem_object *obj)
-{
-	return obj->scratch;
-}
-
-static inline dma_addr_t
-huge_gem_object_dma_size(struct drm_i915_gem_object *obj)
-{
-	return obj->base.size;
-}
-
-#endif /* !__HUGE_GEM_OBJECT_H */
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index 50bb7bbd26d3..841703a7eb6e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -8,8 +8,10 @@
 
 #include "../i915_selftest.h"
 
-#include "mock_context.h"
 #include "igt_flush_test.h"
+#include "mock_drm.h"
+
+#include "../gem/selftests/mock_context.h"
 
 static int switch_to_context(struct drm_i915_private *i915,
 			     struct i915_gem_context *ctx)
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index b270eab1cad1..4c6900473060 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -25,10 +25,11 @@
 #include "../i915_selftest.h"
 
 #include "lib_sw_fence.h"
-#include "mock_context.h"
 #include "mock_drm.h"
 #include "mock_gem_device.h"
 
+#include "../gem/selftests/mock_context.h"
+
 static void quirk_add(struct drm_i915_gem_object *obj,
 		      struct list_head *objects)
 {
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 57b3d9867070..94b5271d564a 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -28,10 +28,11 @@
 #include "../i915_selftest.h"
 #include "i915_random.h"
 
-#include "mock_context.h"
 #include "mock_drm.h"
 #include "mock_gem_device.h"
 
+#include "../gem/selftests/mock_context.h"
+
 static void cleanup_freed_objects(struct drm_i915_private *i915)
 {
 	/*
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 3eb6a6b075ab..72d9d22ce306 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -29,10 +29,11 @@
 #include "igt_live_test.h"
 #include "lib_sw_fence.h"
 
-#include "mock_context.h"
 #include "mock_drm.h"
 #include "mock_gem_device.h"
 
+#include "../gem/selftests/mock_context.h"
+
 static int igt_add_request(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index fc594b030f5a..131d0d9a6cd1 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -27,9 +27,10 @@
 #include "../i915_selftest.h"
 
 #include "mock_gem_device.h"
-#include "mock_context.h"
 #include "mock_gtt.h"
 
+#include "../gem/selftests/mock_context.h"
+
 static bool assert_vma(struct i915_vma *vma,
 		       struct drm_i915_gem_object *obj,
 		       struct i915_gem_context *ctx)
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index 94aee4071a66..2eb1c92eca26 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -9,6 +9,8 @@
 #include "../i915_selftest.h"
 #include "igt_flush_test.h"
 
+#include "../gem/i915_gem_context.h"
+
 int igt_flush_test(struct drm_i915_private *i915, unsigned int flags)
 {
 	cond_resched();
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.h b/drivers/gpu/drm/i915/selftests/igt_spinner.h
index 391777c76dc7..7ec49d40dd8c 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.h
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.h
@@ -12,7 +12,6 @@
 #include "../i915_drv.h"
 #include "../i915_request.h"
 #include "../intel_ringbuffer.h"
-#include "../i915_gem_context.h"
 
 struct igt_spinner {
 	struct drm_i915_private *i915;
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 10658ad05305..63ac130b02e1 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -24,15 +24,18 @@
 
 #include <linux/kthread.h>
 
+#include "../gem/i915_gem_context.h"
+
 #include "../i915_selftest.h"
 #include "i915_random.h"
 #include "igt_flush_test.h"
 #include "igt_reset.h"
 #include "igt_wedge_me.h"
 
-#include "mock_context.h"
 #include "mock_drm.h"
 
+#include "../gem/selftests/mock_context.h"
+
 #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */
 
 struct hang {
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index a7de7a8fc24a..f5d2371e1423 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -15,7 +15,7 @@
 #include "i915_random.h"
 #include "lib_sw_fence.h"
 
-#include "mock_context.h"
+#include "../gem/selftests/mock_context.h"
 
 static int live_sanitycheck(void *arg)
 {
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index f2a2b51a4662..803a01ad9fdc 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -11,9 +11,11 @@
 #include "igt_reset.h"
 #include "igt_spinner.h"
 #include "igt_wedge_me.h"
-#include "mock_context.h"
 #include "mock_drm.h"
 
+#include "../gem/selftests/mock_context.h"
+#include "../gem/i915_gem_context.h"
+
 static const struct wo_register {
 	enum intel_platform platform;
 	u32 reg;
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.h b/drivers/gpu/drm/i915/selftests/mock_context.h
deleted file mode 100644
index 29b9d60a158b..000000000000
--- a/drivers/gpu/drm/i915/selftests/mock_context.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __MOCK_CONTEXT_H
-#define __MOCK_CONTEXT_H
-
-void mock_init_contexts(struct drm_i915_private *i915);
-
-struct i915_gem_context *
-mock_context(struct drm_i915_private *i915,
-	     const char *name);
-
-void mock_context_close(struct i915_gem_context *ctx);
-
-struct i915_gem_context *
-live_context(struct drm_i915_private *i915, struct drm_file *file);
-
-struct i915_gem_context *kernel_context(struct drm_i915_private *i915);
-void kernel_context_close(struct i915_gem_context *ctx);
-
-#endif /* !__MOCK_CONTEXT_H */
diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/selftests/mock_dmabuf.h
deleted file mode 100644
index ec80613159b9..000000000000
--- a/drivers/gpu/drm/i915/selftests/mock_dmabuf.h
+++ /dev/null
@@ -1,41 +0,0 @@
-
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __MOCK_DMABUF_H__
-#define __MOCK_DMABUF_H__
-
-#include <linux/dma-buf.h>
-
-struct mock_dmabuf {
-	int npages;
-	struct page *pages[];
-};
-
-static struct mock_dmabuf *to_mock(struct dma_buf *buf)
-{
-	return buf->priv;
-}
-
-#endif /* !__MOCK_DMABUF_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index f6d120e05ee4..bae88d5f4fbf 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -22,6 +22,8 @@
  *
  */
 
+#include "../gem/i915_gem_context.h"
+
 #include "mock_engine.h"
 #include "mock_request.h"
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 54cfb611c0aa..930f9cb1661f 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -26,13 +26,14 @@
 #include <linux/pm_runtime.h>
 
 #include "mock_engine.h"
-#include "mock_context.h"
 #include "mock_request.h"
 #include "mock_gem_device.h"
-#include "mock_gem_object.h"
 #include "mock_gtt.h"
 #include "mock_uncore.h"
 
+#include "../gem/selftests/mock_context.h"
+#include "../gem/selftests/mock_gem_object.h"
+
 void mock_device_flush(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 37/43] drm/i915: Pull scatterlist utils out of i915_gem.h
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (35 preceding siblings ...)
  2019-03-06 14:25 ` [PATCH 36/43] drm/i915: Move more GEM objects under gem/ Chris Wilson
@ 2019-03-06 14:25 ` Chris Wilson
  2019-03-06 14:25 ` [PATCH 38/43] drm/i915: Move GEM object domain management from struct_mutex to local Chris Wilson
                   ` (8 subsequent siblings)
  45 siblings, 0 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:25 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Out scatterlist utility routines can be pulled out of i915_gem.h for a
bit more decluttering.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c    |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_internal.c  |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_pages.c     |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_phys.c      |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c     |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c   |   1 +
 .../drm/i915/gem/selftests/huge_gem_object.c  |   2 +
 .../drm/i915/gem/selftests/i915_gem_dmabuf.c  |   2 +
 drivers/gpu/drm/i915/i915_drv.h               | 110 ---------------
 drivers/gpu/drm/i915/i915_gem.c               |  30 +---
 drivers/gpu/drm/i915/i915_gem_fence_reg.c     |   2 +
 drivers/gpu/drm/i915/i915_gem_gtt.c           |   3 +-
 drivers/gpu/drm/i915/i915_gpu_error.c         |   1 +
 drivers/gpu/drm/i915/i915_scatterlist.c       |  39 ++++++
 drivers/gpu/drm/i915/i915_scatterlist.h       | 128 ++++++++++++++++++
 drivers/gpu/drm/i915/selftests/i915_vma.c     |   1 +
 drivers/gpu/drm/i915/selftests/scatterlist.c  |   1 +
 18 files changed, 186 insertions(+), 140 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_scatterlist.c
 create mode 100644 drivers/gpu/drm/i915/i915_scatterlist.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index c12350df7793..84dfdf90e0e7 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -42,6 +42,7 @@ i915-y := i915_drv.o \
 	  i915_params.o \
 	  i915_pci.o \
 	  i915_reset.o \
+	  i915_scatterlist.o \
 	  i915_suspend.o \
 	  i915_sw_fence.o \
 	  i915_syncmap.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 47db261a98c0..b5d86cfadd46 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -11,6 +11,7 @@
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
+#include "../i915_scatterlist.h"
 
 static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index d40adb3bbe29..d072d0cbce06 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -14,6 +14,7 @@
 
 #include "../i915_drv.h"
 #include "../i915_gem.h"
+#include "../i915_scatterlist.h"
 #include "../i915_utils.h"
 
 #define QUIET (__GFP_NORETRY | __GFP_NOWARN)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index a594f48db28e..5c1a3cf2c33f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -7,6 +7,7 @@
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
+#include "../i915_scatterlist.h"
 
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 				 struct sg_table *pages,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index 1bf3e0afcba2..22d185301578 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -16,6 +16,7 @@
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
+#include "../i915_scatterlist.h"
 
 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 9a08aba6005e..f3e99c9705b7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -10,6 +10,7 @@
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
+#include "../i915_scatterlist.h"
 
 /*
  * Move pages to appropriate lru and release the pagevec, decrementing the
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 12ef6d8fb2dc..0ae793d997a5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -15,6 +15,7 @@
 #include "i915_gem_ioctls.h"
 #include "i915_gem_object.h"
 
+#include "../i915_scatterlist.h"
 #include "../i915_trace.h"
 #include "../intel_drv.h"
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
index 824f3761314c..c03781f8b435 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
@@ -4,6 +4,8 @@
  * Copyright © 2016 Intel Corporation
  */
 
+#include "../../i915_scatterlist.h"
+
 #include "huge_gem_object.h"
 
 static void huge_free_pages(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index 67a9d551bb0e..d9f31d3a4f24 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -9,6 +9,8 @@
 #include "../../selftests/mock_gem_device.h"
 #include "mock_dmabuf.h"
 
+#include "../../i915_drv.h"
+
 static int igt_dmabuf_export(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 82e32be4a3bd..e2ec46b12de7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2130,111 +2130,6 @@ enum hdmi_force_audio {
 	GENMASK(INTEL_FRONTBUFFER_BITS_PER_PIPE * ((pipe) + 1) - 1, \
 		INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))
 
-/*
- * Optimised SGL iterator for GEM objects
- */
-static __always_inline struct sgt_iter {
-	struct scatterlist *sgp;
-	union {
-		unsigned long pfn;
-		dma_addr_t dma;
-	};
-	unsigned int curr;
-	unsigned int max;
-} __sgt_iter(struct scatterlist *sgl, bool dma) {
-	struct sgt_iter s = { .sgp = sgl };
-
-	if (s.sgp) {
-		s.max = s.curr = s.sgp->offset;
-		s.max += s.sgp->length;
-		if (dma)
-			s.dma = sg_dma_address(s.sgp);
-		else
-			s.pfn = page_to_pfn(sg_page(s.sgp));
-	}
-
-	return s;
-}
-
-static inline struct scatterlist *____sg_next(struct scatterlist *sg)
-{
-	++sg;
-	if (unlikely(sg_is_chain(sg)))
-		sg = sg_chain_ptr(sg);
-	return sg;
-}
-
-/**
- * __sg_next - return the next scatterlist entry in a list
- * @sg:		The current sg entry
- *
- * Description:
- *   If the entry is the last, return NULL; otherwise, step to the next
- *   element in the array (@sg@+1). If that's a chain pointer, follow it;
- *   otherwise just return the pointer to the current element.
- **/
-static inline struct scatterlist *__sg_next(struct scatterlist *sg)
-{
-	return sg_is_last(sg) ? NULL : ____sg_next(sg);
-}
-
-/**
- * for_each_sgt_dma - iterate over the DMA addresses of the given sg_table
- * @__dmap:	DMA address (output)
- * @__iter:	'struct sgt_iter' (iterator state, internal)
- * @__sgt:	sg_table to iterate over (input)
- */
-#define for_each_sgt_dma(__dmap, __iter, __sgt)				\
-	for ((__iter) = __sgt_iter((__sgt)->sgl, true);			\
-	     ((__dmap) = (__iter).dma + (__iter).curr);			\
-	     (((__iter).curr += I915_GTT_PAGE_SIZE) >= (__iter).max) ?	\
-	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0 : 0)
-
-/**
- * for_each_sgt_page - iterate over the pages of the given sg_table
- * @__pp:	page pointer (output)
- * @__iter:	'struct sgt_iter' (iterator state, internal)
- * @__sgt:	sg_table to iterate over (input)
- */
-#define for_each_sgt_page(__pp, __iter, __sgt)				\
-	for ((__iter) = __sgt_iter((__sgt)->sgl, false);		\
-	     ((__pp) = (__iter).pfn == 0 ? NULL :			\
-	      pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
-	     (((__iter).curr += PAGE_SIZE) >= (__iter).max) ?		\
-	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0 : 0)
-
-bool i915_sg_trim(struct sg_table *orig_st);
-
-static inline unsigned int i915_sg_page_sizes(struct scatterlist *sg)
-{
-	unsigned int page_sizes;
-
-	page_sizes = 0;
-	while (sg) {
-		GEM_BUG_ON(sg->offset);
-		GEM_BUG_ON(!IS_ALIGNED(sg->length, PAGE_SIZE));
-		page_sizes |= sg->length;
-		sg = __sg_next(sg);
-	}
-
-	return page_sizes;
-}
-
-static inline unsigned int i915_sg_segment_size(void)
-{
-	unsigned int size = swiotlb_max_segment();
-
-	if (size == 0)
-		return SCATTERLIST_MAX_SEGMENT;
-
-	size = rounddown(size, PAGE_SIZE);
-	/* swiotlb_max_segment_size can return 1 byte when it means one page. */
-	if (size < PAGE_SIZE)
-		size = PAGE_SIZE;
-
-	return size;
-}
-
 #define INTEL_INFO(dev_priv)	(&(dev_priv)->__info)
 #define RUNTIME_INFO(dev_priv)	(&(dev_priv)->__runtime)
 #define DRIVER_CAPS(dev_priv)	(&(dev_priv)->caps)
@@ -2783,11 +2678,6 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
 
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
 
-static inline int __sg_page_count(const struct scatterlist *sg)
-{
-	return sg->length >> PAGE_SHIFT;
-}
-
 static inline int __must_check
 i915_mutex_lock_interruptible(struct drm_device *dev)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b0965fa15be9..aceff0ee38e5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -46,6 +46,7 @@
 #include "i915_drv.h"
 #include "i915_globals.h"
 #include "i915_reset.h"
+#include "i915_scatterlist.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
 
@@ -1155,34 +1156,6 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 	}
 }
 
-bool i915_sg_trim(struct sg_table *orig_st)
-{
-	struct sg_table new_st;
-	struct scatterlist *sg, *new_sg;
-	unsigned int i;
-
-	if (orig_st->nents == orig_st->orig_nents)
-		return false;
-
-	if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
-		return false;
-
-	new_sg = new_st.sgl;
-	for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
-		sg_set_page(new_sg, sg_page(sg), sg->length, 0);
-		sg_dma_address(new_sg) = sg_dma_address(sg);
-		sg_dma_len(new_sg) = sg_dma_len(sg);
-
-		new_sg = sg_next(new_sg);
-	}
-	GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
-
-	sg_free_table(orig_st);
-
-	*orig_st = new_st;
-	return true;
-}
-
 static void
 i915_gem_retire_work_handler(struct work_struct *work)
 {
@@ -2688,7 +2661,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/scatterlist.c"
 #include "selftests/mock_gem_device.c"
 #include "selftests/i915_gem.c"
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 65624b8e4d15..89cfaba00062 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -22,7 +22,9 @@
  */
 
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
+#include "i915_scatterlist.h"
 
 /**
  * DOC: fence register handling
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d717952cc430..fce201078a0e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -36,9 +36,10 @@
 #include <drm/i915_drm.h>
 
 #include "i915_drv.h"
-#include "i915_vgpu.h"
 #include "i915_reset.h"
+#include "i915_scatterlist.h"
 #include "i915_trace.h"
+#include "i915_vgpu.h"
 #include "intel_drv.h"
 #include "intel_frontbuffer.h"
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 526d1300a577..7c7f69114f4d 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -40,6 +40,7 @@
 
 #include "i915_gpu_error.h"
 #include "i915_drv.h"
+#include "i915_scatterlist.h"
 
 static inline const struct intel_engine_cs *
 engine_lookup(const struct drm_i915_private *i915, unsigned int id)
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c
new file mode 100644
index 000000000000..cc6b3846a8c7
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_scatterlist.c
@@ -0,0 +1,39 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "i915_scatterlist.h"
+
+bool i915_sg_trim(struct sg_table *orig_st)
+{
+	struct sg_table new_st;
+	struct scatterlist *sg, *new_sg;
+	unsigned int i;
+
+	if (orig_st->nents == orig_st->orig_nents)
+		return false;
+
+	if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
+		return false;
+
+	new_sg = new_st.sgl;
+	for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
+		sg_set_page(new_sg, sg_page(sg), sg->length, 0);
+		sg_dma_address(new_sg) = sg_dma_address(sg);
+		sg_dma_len(new_sg) = sg_dma_len(sg);
+
+		new_sg = sg_next(new_sg);
+	}
+	GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
+
+	sg_free_table(orig_st);
+
+	*orig_st = new_st;
+	return true;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/scatterlist.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h
new file mode 100644
index 000000000000..ca9ddee41e88
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_scatterlist.h
@@ -0,0 +1,128 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef I915_SCATTERLIST_H
+#define I915_SCATTERLIST_H
+
+#include <linux/pfn.h>
+#include <linux/scatterlist.h>
+#include <linux/swiotlb.h>
+
+#include "i915_gem.h"
+
+/*
+ * Optimised SGL iterator for GEM objects
+ */
+static __always_inline struct sgt_iter {
+	struct scatterlist *sgp;
+	union {
+		unsigned long pfn;
+		dma_addr_t dma;
+	};
+	unsigned int curr;
+	unsigned int max;
+} __sgt_iter(struct scatterlist *sgl, bool dma) {
+	struct sgt_iter s = { .sgp = sgl };
+
+	if (s.sgp) {
+		s.max = s.curr = s.sgp->offset;
+		s.max += s.sgp->length;
+		if (dma)
+			s.dma = sg_dma_address(s.sgp);
+		else
+			s.pfn = page_to_pfn(sg_page(s.sgp));
+	}
+
+	return s;
+}
+
+static inline int __sg_page_count(const struct scatterlist *sg)
+{
+	return sg->length >> PAGE_SHIFT;
+}
+
+static inline struct scatterlist *____sg_next(struct scatterlist *sg)
+{
+	++sg;
+	if (unlikely(sg_is_chain(sg)))
+		sg = sg_chain_ptr(sg);
+	return sg;
+}
+
+/**
+ * __sg_next - return the next scatterlist entry in a list
+ * @sg:		The current sg entry
+ *
+ * Description:
+ *   If the entry is the last, return NULL; otherwise, step to the next
+ *   element in the array (@sg@+1). If that's a chain pointer, follow it;
+ *   otherwise just return the pointer to the current element.
+ **/
+static inline struct scatterlist *__sg_next(struct scatterlist *sg)
+{
+	return sg_is_last(sg) ? NULL : ____sg_next(sg);
+}
+
+/**
+ * for_each_sgt_dma - iterate over the DMA addresses of the given sg_table
+ * @__dmap:	DMA address (output)
+ * @__iter:	'struct sgt_iter' (iterator state, internal)
+ * @__sgt:	sg_table to iterate over (input)
+ */
+#define for_each_sgt_dma(__dmap, __iter, __sgt)				\
+	for ((__iter) = __sgt_iter((__sgt)->sgl, true);			\
+	     ((__dmap) = (__iter).dma + (__iter).curr);			\
+	     (((__iter).curr += I915_GTT_PAGE_SIZE) >= (__iter).max) ?	\
+	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0 : 0)
+
+/**
+ * for_each_sgt_page - iterate over the pages of the given sg_table
+ * @__pp:	page pointer (output)
+ * @__iter:	'struct sgt_iter' (iterator state, internal)
+ * @__sgt:	sg_table to iterate over (input)
+ */
+#define for_each_sgt_page(__pp, __iter, __sgt)				\
+	for ((__iter) = __sgt_iter((__sgt)->sgl, false);		\
+	     ((__pp) = (__iter).pfn == 0 ? NULL :			\
+	      pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
+	     (((__iter).curr += PAGE_SIZE) >= (__iter).max) ?		\
+	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0 : 0)
+
+bool i915_sg_trim(struct sg_table *orig_st);
+
+static inline unsigned int i915_sg_page_sizes(struct scatterlist *sg)
+{
+	unsigned int page_sizes;
+
+	page_sizes = 0;
+	while (sg) {
+		GEM_BUG_ON(sg->offset);
+		GEM_BUG_ON(!IS_ALIGNED(sg->length, PAGE_SIZE));
+		page_sizes |= sg->length;
+		sg = __sg_next(sg);
+	}
+
+	return page_sizes;
+}
+
+static inline unsigned int i915_sg_segment_size(void)
+{
+	unsigned int size = swiotlb_max_segment();
+
+	if (size == 0)
+		return SCATTERLIST_MAX_SEGMENT;
+
+	size = rounddown(size, PAGE_SIZE);
+	/* swiotlb_max_segment_size can return 1 byte when it means one page. */
+	if (size < PAGE_SIZE)
+		size = PAGE_SIZE;
+
+	return size;
+}
+
+bool i915_sg_trim(struct sg_table *orig_st);
+
+#endif
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index 131d0d9a6cd1..b0a4296a0504 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -30,6 +30,7 @@
 #include "mock_gtt.h"
 
 #include "../gem/selftests/mock_context.h"
+#include "../i915_scatterlist.h"
 
 static bool assert_vma(struct i915_vma *vma,
 		       struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/selftests/scatterlist.c b/drivers/gpu/drm/i915/selftests/scatterlist.c
index cd6d2a16071f..5f0c48a9f051 100644
--- a/drivers/gpu/drm/i915/selftests/scatterlist.c
+++ b/drivers/gpu/drm/i915/selftests/scatterlist.c
@@ -25,6 +25,7 @@
 #include <linux/random.h>
 
 #include "../i915_selftest.h"
+#include "../i915_utils.h"
 
 #define PFN_BIAS (1 << 10)
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 78+ messages in thread

* [PATCH 38/43] drm/i915: Move GEM object domain management from struct_mutex to local
  2019-03-06 14:24 RFC Breaking up GEM struct_mutex for async-pages Chris Wilson
                   ` (36 preceding siblings ...)
  2019-03-06 14:25 ` [PATCH 37/43] drm/i915: Pull scatterlist utils out of i915_gem.h Chris Wilson
@ 2019-03-06 14:25 ` Chris Wilson
  2019-03-06 14:25 ` [PATCH 39/43] drm/i915: Move GEM object waiting to its own file Chris Wilson
                   ` (7 subsequent siblings)
  45 siblings, 0 replies; 78+ messages in thread
From: Chris Wilson @ 2019-03-06 14:25 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld

Use the per-object local lock to control the cache domain of the
individual GEM objects, not struct_mutex. This is a huge leap forward
for us in terms of object-level synchronisation; execbuffers are
coordinated using the ww_mutex and pread/pwrite is finally fully
serialised again.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_clflush.c   |   4 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c    |  10 +-
 drivers/gpu/drm/i915/gem/i915_gem_domain.c    |  70 +++++-----
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 125 +++++++++++------
 drivers/gpu/drm/i915/gem/i915_gem_fence.c     |  99 ++++++++++++++
 drivers/gpu/drm/i915/gem/i915_gem_mman.c      |   2 +
 drivers/gpu/drm/i915/gem/i915_gem_object.c    |   2 +
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |  14 ++
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  14 +-
 .../i915/gem/selftests/i915_gem_coherency.c   |  12 ++
 .../drm/i915/gem/selftests/i915_gem_context.c |  26 ++++
 .../drm/i915/gem/selftests/i915_gem_mman.c    |   6 +
 .../drm/i915/gem/selftests/i915_gem_phys.c    |   4 +-
 drivers/gpu/drm/i915/gvt/cmd_parser.c         |   2 +
 drivers/gpu/drm/i915/gvt/scheduler.c          |   8 +-
 drivers/gpu/drm/i915/i915_cmd_parser.c        |  23 ++--
 drivers/gpu/drm/i915/i915_gem.c               | 128 ++++++++++--------
 drivers/gpu/drm/i915/i915_gem_gtt.c           |   5 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c  |   2 +
 drivers/gpu/drm/i915/i915_perf.c              |   4 +-
 drivers/gpu/drm/i915/i915_vma.c               |   8 +-
 drivers/gpu/drm/i915/i915_vma.h               |  12 ++
 drivers/gpu/drm/i915/intel_display.c          |   5 +
 drivers/gpu/drm/i915/intel_engine_cs.c        |   4 +-
 drivers/gpu/drm/i915/intel_guc_log.c          |   6 +-
 drivers/gpu/drm/i915/intel_lrc.c              |   4 +
 drivers/gpu/drm/i915/intel_overlay.c          |  25 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c       |  10 +-
 drivers/gpu/drm/i915/intel_uc_fw.c            |   2 +
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |   4 +-
 drivers/gpu/drm/i915/selftests/i915_request.c |   8 ++
 drivers/gpu/drm/i915/selftests/igt_spinner.c  |   4 +-
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |   6 +-
 drivers/gpu/drm/i915/selftests/intel_lrc.c    |   4 +
 .../drm/i915/selftests/intel_workarounds.c    |   8 ++
 36 files changed, 482 insertions(+), 189 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_fence.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 84dfdf90e0e7..89fb4eaca4fb 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -80,6 +80,7 @@ i915-y += \
 	  gem/i915_gem_dmabuf.o \
 	  gem/i915_gem_domain.o \
 	  gem/i915_gem_execbuffer.o \
+	  gem/i915_gem_fence.o \
 	  gem/i915_gem_internal.o \
 	  gem/i915_gem_object.o \
 	  gem/i915_gem_mman.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index 093bfff55a96..efab47250588 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -96,6 +96,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 {
 	struct clflush *clflush;
 
+	assert_object_held(obj);
+
 	/*
 	 * Stolen memory is always coherent with the GPU as it is explicitly
 	 * marked as wc by the system, or the system is cache-coherent.
@@ -145,9 +147,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 						true, I915_FENCE_TIMEOUT,
 						I915_FENCE_GFP);
 
-		reservation_object_lock(obj->resv, NULL);
 		reservation_object_add_excl_fence(obj->resv, &clflush->dma);
-		reservation_object_unlock(obj->resv);
 
 		i915_sw_fence_commit(&clflush->wait);
 	} else if (obj->mm.pages) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index b5d86cfadd46..1585e54ef26b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -151,7 +151,6 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
 static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
-	struct drm_device *dev = obj->base.dev;
 	bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE);
 	int err;
 
@@ -159,12 +158,12 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
 	if (err)
 		return err;
 
-	err = i915_mutex_lock_interruptible(dev);
+	err = i915_gem_object_lock_interruptible(obj);
 	if (err)
 		goto out;
 
 	err = i915_gem_object_set_to_cpu_domain(obj, write);
-	mutex_unlock(&dev->struct_mutex);
+	i915_gem_object_unlock(obj);
 
 out:
 	i915_gem_object_unpin_pages(obj);
@@ -174,19 +173,18 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
 static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
-	struct drm_device *dev = obj->base.dev;
 	int err;
 
 	err = i915_gem_object_pin_pages(obj);
 	if (err)
 		return err;
 
-	err = i915_mutex_lock_interruptible(dev);
+	err = i915_gem_object_lock_interruptible(obj);
 	if (err)
 		goto out;
 
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
-	mutex_unlock(&dev->struct_mutex);
+	i915_gem_object_unlock(obj);
 
 out:
 	i915_gem_object_unpin_pages(obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index c54e9e73212b..a0346bc483f8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -31,9 +31,9 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
 	if (!READ_ONCE(obj->pin_global))
 		return;
 
-	mutex_lock(&obj->base.dev->struct_mutex);
+	i915_gem_object_lock(obj);
 	__i915_gem_object_flush_for_display(obj);
-	mutex_unlock(&obj->base.dev->struct_mutex);
+	i915_gem_object_unlock(obj);
 }
 
 /**
@@ -49,11 +49,10 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 {
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	assert_object_held(obj);
 
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
 				   (write ? I915_WAIT_ALL : 0),
 				   MAX_SCHEDULE_TIMEOUT);
 	if (ret)
@@ -111,11 +110,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 {
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	assert_object_held(obj);
 
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
 				   (write ? I915_WAIT_ALL : 0),
 				   MAX_SCHEDULE_TIMEOUT);
 	if (ret)
@@ -181,7 +179,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	assert_object_held(obj);
 
 	if (obj->cache_level == cache_level)
 		return 0;
@@ -230,7 +228,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 		 */
 		ret = i915_gem_object_wait(obj,
 					   I915_WAIT_INTERRUPTIBLE |
-					   I915_WAIT_LOCKED |
 					   I915_WAIT_ALL,
 					   MAX_SCHEDULE_TIMEOUT);
 		if (ret)
@@ -374,12 +371,16 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		goto out;
 
-	ret = i915_mutex_lock_interruptible(dev);
+	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
 	if (ret)
 		goto out;
 
-	ret = i915_gem_object_set_cache_level(obj, level);
-	mutex_unlock(&dev->struct_mutex);
+	ret = i915_gem_object_lock_interruptible(obj);
+	if (ret == 0) {
+		ret = i915_gem_object_set_cache_level(obj, level);
+		i915_gem_object_unlock(obj);
+	}
+	mutex_unlock(&i915->drm.struct_mutex);
 
 out:
 	i915_gem_object_put(obj);
@@ -401,7 +402,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	assert_object_held(obj);
 
 	/* Mark the global pin early so that we account for the
 	 * display coherency whilst setting up the cache domains.
@@ -486,16 +487,18 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 void
 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 {
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	struct drm_i915_gem_object *obj = vma->obj;
+
+	assert_object_held(obj);
 
-	if (WARN_ON(vma->obj->pin_global == 0))
+	if (WARN_ON(obj->pin_global == 0))
 		return;
 
-	if (--vma->obj->pin_global == 0)
+	if (--obj->pin_global == 0)
 		vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
 
 	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
-	i915_gem_object_bump_inactive_ggtt(vma->obj);
+	i915_gem_object_bump_inactive_ggtt(obj);
 
 	i915_vma_unpin(vma);
 }
@@ -513,11 +516,10 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 {
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	assert_object_held(obj);
 
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
 				   (write ? I915_WAIT_ALL : 0),
 				   MAX_SCHEDULE_TIMEOUT);
 	if (ret)
@@ -619,7 +621,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	if (err)
 		goto out;
 
-	err = i915_mutex_lock_interruptible(dev);
+	err = i915_gem_object_lock_interruptible(obj);
 	if (err)
 		goto out_unpin;
 
@@ -633,7 +635,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	/* And bump the LRU for this access */
 	i915_gem_object_bump_inactive_ggtt(obj);
 
-	mutex_unlock(&dev->struct_mutex);
+	i915_gem_object_unlock(obj);
 
 	if (write_domain != 0)
 		intel_fb_obj_invalidate(obj,
@@ -656,22 +658,23 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
 {
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
 	*needs_clflush = 0;
 	if (!i915_gem_object_has_struct_page(obj))
 		return -ENODEV;
 
+	ret = i915_gem_object_lock_interruptible(obj);
+	if (ret)
+		return ret;
+
 	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED,
+				   I915_WAIT_INTERRUPTIBLE,
 				   MAX_SCHEDULE_TIMEOUT);
 	if (ret)
-		return ret;
+		goto err_unlock;
 
 	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
-		return ret;
+		goto err_unlock;
 
 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
@@ -699,6 +702,8 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
 
 err_unpin:
 	i915_gem_object_unpin_pages(obj);
+err_unlock:
+	i915_gem_object_unlock(obj);
 	return ret;
 }
 
@@ -707,23 +712,24 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 {
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
 	*needs_clflush = 0;
 	if (!i915_gem_object_has_struct_page(obj))
 		return -ENODEV;
 
+	ret = i915_gem_object_lock_interruptible(obj);
+	if (ret)
+		return ret;
+
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
 				   I915_WAIT_ALL,
 				   MAX_SCHEDULE_TIMEOUT);
 	if (ret)
-		return ret;
+		goto err_unlock;
 
 	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
-		return ret;
+		goto err_unlock;
 
 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
@@ -760,5 +766,7 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 
 err_unpin:
 	i915_gem_object_unpin_pages(obj);
+err_unlock:
+	i915_gem_object_unlock(obj);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 4fe528dc23da..55817ee754b5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1055,7 +1055,9 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 		if (use_cpu_reloc(cache, obj))
 			return NULL;
 
+		i915_gem_object_lock(obj);
 		err = i915_gem_object_set_to_gtt_domain(obj, true);
+		i915_gem_object_unlock(obj);
 		if (err)
 			return ERR_PTR(err);
 
@@ -1144,6 +1146,26 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
 		*addr = value;
 }
 
+static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+	int err;
+
+	i915_vma_lock(vma);
+
+	if (obj->cache_dirty & ~obj->cache_coherent)
+		i915_gem_clflush_object(obj, 0);
+	obj->write_domain = 0;
+
+	err = i915_request_await_object(rq, vma->obj, true);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+
+	i915_vma_unlock(vma);
+
+	return err;
+}
+
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 			     struct i915_vma *vma,
 			     unsigned int len)
@@ -1155,15 +1177,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	u32 *cmd;
 	int err;
 
-	if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) {
-		obj = vma->obj;
-		if (obj->cache_dirty & ~obj->cache_coherent)
-			i915_gem_clflush_object(obj, 0);
-		obj->write_domain = 0;
-	}
-
-	GEM_BUG_ON(vma->obj->write_domain & I915_GEM_DOMAIN_CPU);
-
 	obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
@@ -1176,7 +1189,9 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	if (IS_ERR(cmd))
 		return PTR_ERR(cmd);
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_wc_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		goto err_unmap;
 
@@ -1196,7 +1211,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 		goto err_unpin;
 	}
 
-	err = i915_request_await_object(rq, vma->obj, true);
+	err = reloc_move_to_gpu(rq, vma);
 	if (err)
 		goto err_request;
 
@@ -1204,14 +1219,12 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 					batch->node.start, PAGE_SIZE,
 					cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
 	if (err)
-		goto err_request;
+		goto skip_request;
 
+	i915_vma_lock(batch);
 	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
 	err = i915_vma_move_to_active(batch, rq, 0);
-	if (err)
-		goto skip_request;
-
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(batch);
 	if (err)
 		goto skip_request;
 
@@ -1820,24 +1833,59 @@ static int eb_relocate(struct i915_execbuffer *eb)
 static int eb_move_to_gpu(struct i915_execbuffer *eb)
 {
 	const unsigned int count = eb->buffer_count;
+	struct ww_acquire_ctx acquire;
 	unsigned int i;
-	int err;
+	int err = 0;
+
+	ww_acquire_init(&acquire, &reservation_ww_class);
 
 	for (i = 0; i < count; i++) {
+		struct i915_vma *vma = eb->vma[i];
+
+		err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire);
+		if (!err)
+			continue;
+
+		GEM_BUG_ON(err == -EALREADY); /* No duplicate vma */
+
+		if (err == -EDEADLK) {
+			GEM_BUG_ON(i == 0);
+			do {
+				int j = i - 1;
+
+				ww_mutex_unlock(&eb->vma[j]->resv->lock);
+
+				swap(eb->flags[i], eb->flags[j]);
+				swap(eb->vma[i],  eb->vma[j]);
+				eb->vma[i]->exec_flags = &eb->flags[i];
+			} while (--i);
+			GEM_BUG_ON(vma != eb->vma[0]);
+			vma->exec_flags = &eb->flags[0];
+
+			err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
+							       &acquire);
+		}
+		if (err)
+			break;
+	}
+	ww_acquire_done(&acquire);
+
+	while (i--) {
 		unsigned int flags = eb->flags[i];
 		struct i915_vma *vma = eb->vma[i];
 		struct drm_i915_gem_object *obj = vma->obj;
 
+		assert_vma_held(vma);
+
 		if (flags & EXEC_OBJECT_CAPTURE) {
 			struct i915_capture_list *capture;
 
 			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
-			if (unlikely(!capture))
-				return -ENOMEM;
-
-			capture->next = eb->request->capture_list;
-			capture->vma = eb->vma[i];
-			eb->request->capture_list = capture;
+			if (capture) {
+				capture->next = eb->request->capture_list;
+				capture->vma = vma;
+				eb->request->capture_list = capture;
+			}
 		}
 
 		/*
@@ -1857,24 +1905,15 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 				flags &= ~EXEC_OBJECT_ASYNC;
 		}
 
-		if (flags & EXEC_OBJECT_ASYNC)
-			continue;
-
-		err = i915_request_await_object
-			(eb->request, obj, flags & EXEC_OBJECT_WRITE);
-		if (err)
-			return err;
-	}
+		if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) {
+			err = i915_request_await_object
+				(eb->request, obj, flags & EXEC_OBJECT_WRITE);
+		}
 
-	for (i = 0; i < count; i++) {
-		unsigned int flags = eb->flags[i];
-		struct i915_vma *vma = eb->vma[i];
+		if (err == 0)
+