All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH] drm/i915/gt: Split intel_ring_submission
Date: Thu, 24 Oct 2019 08:11:39 +0100	[thread overview]
Message-ID: <20191024071139.31917-1-chris@chris-wilson.co.uk> (raw)

Split the legacy submission backend from the common ring buffer.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |   5 +-
 drivers/gpu/drm/i915/display/intel_overlay.c  |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |   3 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |   1 +
 .../gpu/drm/i915/gem/i915_gem_object_blt.c    |   1 +
 .../i915/gem/selftests/i915_gem_coherency.c   |   1 +
 drivers/gpu/drm/i915/gt/intel_context.c       |   1 +
 drivers/gpu/drm/i915/gt/intel_context.h       |   1 +
 drivers/gpu/drm/i915/gt/intel_engine.h        | 114 -------
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   1 +
 drivers/gpu/drm/i915/gt/intel_engine_pm.c     |   1 +
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  27 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c           |   1 +
 drivers/gpu/drm/i915/gt/intel_mocs.c          |   1 +
 drivers/gpu/drm/i915/gt/intel_renderstate.c   |   1 +
 drivers/gpu/drm/i915/gt/intel_ring.c          | 314 ++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_ring.h          | 131 ++++++++
 drivers/gpu/drm/i915/gt/intel_timeline.c      |   6 +-
 drivers/gpu/drm/i915/gt/intel_workarounds.c   |   1 +
 drivers/gpu/drm/i915/gt/mock_engine.c         |   1 +
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |   1 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |   3 +-
 drivers/gpu/drm/i915/gvt/cmd_parser.c         |   2 +
 drivers/gpu/drm/i915/gvt/mmio_context.c       |   1 +
 drivers/gpu/drm/i915/gvt/scheduler.c          |   1 +
 drivers/gpu/drm/i915/i915_active.c            |   1 +
 drivers/gpu/drm/i915/i915_perf.c              |   1 +
 drivers/gpu/drm/i915/i915_request.c           |   1 +
 28 files changed, 477 insertions(+), 147 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_ring.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_ring.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index a91e0a487a79..5021aa7fa187 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -90,11 +90,12 @@ gt-y += \
 	gt/intel_gt_requests.o \
 	gt/intel_llc.o \
 	gt/intel_lrc.o \
+	gt/intel_mocs.o \
 	gt/intel_rc6.o \
 	gt/intel_renderstate.o \
 	gt/intel_reset.o \
-	gt/intel_ringbuffer.o \
-	gt/intel_mocs.o \
+	gt/intel_ring.o \
+	gt/intel_ring_submission.o \
 	gt/intel_sseu.o \
 	gt/intel_timeline.o \
 	gt/intel_workarounds.o
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 2360f19f9694..848ce07a8ec2 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -30,6 +30,7 @@
 #include <drm/i915_drm.h>
 
 #include "gem/i915_gem_pm.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "i915_reg.h"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index df528e48e566..a03bee30fac1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -69,9 +69,10 @@
 
 #include <drm/i915_drm.h>
 
-#include "gt/intel_lrc_reg.h"
 #include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_engine_user.h"
+#include "gt/intel_lrc_reg.h"
+#include "gt/intel_ring.h"
 
 #include "i915_gem_context.h"
 #include "i915_globals.h"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 99d79f94e641..c88948e4094b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -19,6 +19,7 @@
 #include "gt/intel_engine_pool.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "i915_gem_clflush.h"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
index 5bd8de124d74..516e61e99212 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
@@ -8,6 +8,7 @@
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_engine_pool.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_ring.h"
 #include "i915_gem_clflush.h"
 #include "i915_gem_object_blt.h"
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 549810f70aeb..0877ef4dff63 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -8,6 +8,7 @@
 
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
+#include "gt/intel_ring.h"
 
 #include "i915_selftest.h"
 #include "selftests/i915_random.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 59c3083c1ec1..ee9d2bcd2c13 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -13,6 +13,7 @@
 #include "intel_context.h"
 #include "intel_engine.h"
 #include "intel_engine_pm.h"
+#include "intel_ring.h"
 
 static struct i915_global_context {
 	struct i915_global base;
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index dd742ac2fbdb..68b3d317d959 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -12,6 +12,7 @@
 #include "i915_active.h"
 #include "intel_context_types.h"
 #include "intel_engine_types.h"
+#include "intel_ring_types.h"
 #include "intel_timeline_types.h"
 
 void intel_context_init(struct intel_context *ce,
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index c7f93d05c8e0..d77b9f9f096c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -19,7 +19,6 @@
 #include "intel_workarounds.h"
 
 struct drm_printer;
-
 struct intel_gt;
 
 /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
@@ -176,122 +175,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 #define I915_HWS_CSB_WRITE_INDEX	0x1f
 #define CNL_HWS_CSB_WRITE_INDEX		0x2f
 
-struct intel_ring *
-intel_engine_create_ring(struct intel_engine_cs *engine, int size);
-int intel_ring_pin(struct intel_ring *ring);
-void intel_ring_reset(struct intel_ring *ring, u32 tail);
-unsigned int intel_ring_update_space(struct intel_ring *ring);
-void intel_ring_unpin(struct intel_ring *ring);
-void intel_ring_free(struct kref *ref);
-
-static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
-{
-	kref_get(&ring->ref);
-	return ring;
-}
-
-static inline void intel_ring_put(struct intel_ring *ring)
-{
-	kref_put(&ring->ref, intel_ring_free);
-}
-
 void intel_engine_stop(struct intel_engine_cs *engine);
 void intel_engine_cleanup(struct intel_engine_cs *engine);
 
-int __must_check intel_ring_cacheline_align(struct i915_request *rq);
-
-u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
-
-static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
-{
-	/* Dummy function.
-	 *
-	 * This serves as a placeholder in the code so that the reader
-	 * can compare against the preceding intel_ring_begin() and
-	 * check that the number of dwords emitted matches the space
-	 * reserved for the command packet (i.e. the value passed to
-	 * intel_ring_begin()).
-	 */
-	GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
-}
-
-static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
-{
-	return pos & (ring->size - 1);
-}
-
-static inline bool
-intel_ring_offset_valid(const struct intel_ring *ring,
-			unsigned int pos)
-{
-	if (pos & -ring->size) /* must be strictly within the ring */
-		return false;
-
-	if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
-		return false;
-
-	return true;
-}
-
-static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
-{
-	/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
-	u32 offset = addr - rq->ring->vaddr;
-	GEM_BUG_ON(offset > rq->ring->size);
-	return intel_ring_wrap(rq->ring, offset);
-}
-
-static inline void
-assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
-{
-	GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
-
-	/*
-	 * "Ring Buffer Use"
-	 *	Gen2 BSpec "1. Programming Environment" / 1.4.4.6
-	 *	Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
-	 *	Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
-	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
-	 * same cacheline, the Head Pointer must not be greater than the Tail
-	 * Pointer."
-	 *
-	 * We use ring->head as the last known location of the actual RING_HEAD,
-	 * it may have advanced but in the worst case it is equally the same
-	 * as ring->head and so we should never program RING_TAIL to advance
-	 * into the same cacheline as ring->head.
-	 */
-#define cacheline(a) round_down(a, CACHELINE_BYTES)
-	GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
-		   tail < ring->head);
-#undef cacheline
-}
-
-static inline unsigned int
-intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
-{
-	/* Whilst writes to the tail are strictly order, there is no
-	 * serialisation between readers and the writers. The tail may be
-	 * read by i915_request_retire() just as it is being updated
-	 * by execlists, as although the breadcrumb is complete, the context
-	 * switch hasn't been seen.
-	 */
-	assert_ring_tail_valid(ring, tail);
-	ring->tail = tail;
-	return tail;
-}
-
-static inline unsigned int
-__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
-{
-	/*
-	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
-	 * same cacheline, the Head Pointer must not be greater than the Tail
-	 * Pointer."
-	 */
-	GEM_BUG_ON(!is_power_of_2(size));
-	return (head - tail - CACHELINE_BYTES) & (size - 1);
-}
-
 int intel_engines_init_mmio(struct intel_gt *gt);
 int intel_engines_setup(struct intel_gt *gt);
 int intel_engines_init(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index cd4caf54c59c..2afa2ef90482 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -37,6 +37,7 @@
 #include "intel_context.h"
 #include "intel_lrc.h"
 #include "intel_reset.h"
+#include "intel_ring.h"
 
 /* Haswell does have the CXT_SIZE register however it does not appear to be
  * valid. Now, docs explain in dwords what is in the context object. The full
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 6fbfa2162e54..3c0f490ff2c7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -13,6 +13,7 @@
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
 #include "intel_rc6.h"
+#include "intel_ring.h"
 
 static int __engine_unpark(struct intel_wakeref *wf)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index fbe89bfd3d4f..c5d1047a4bc5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -59,6 +59,7 @@ struct i915_gem_context;
 struct i915_request;
 struct i915_sched_attr;
 struct intel_gt;
+struct intel_ring;
 struct intel_uncore;
 
 typedef u8 intel_engine_mask_t;
@@ -77,32 +78,6 @@ struct intel_instdone {
 	u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
 };
 
-struct intel_ring {
-	struct kref ref;
-	struct i915_vma *vma;
-	void *vaddr;
-
-	/*
-	 * As we have two types of rings, one global to the engine used
-	 * by ringbuffer submission and those that are exclusive to a
-	 * context used by execlists, we have to play safe and allow
-	 * atomic updates to the pin_count. However, the actual pinning
-	 * of the context is either done during initialisation for
-	 * ringbuffer submission or serialised as part of the context
-	 * pinning for execlists, and so we do not need a mutex ourselves
-	 * to serialise intel_ring_pin/intel_ring_unpin.
-	 */
-	atomic_t pin_count;
-
-	u32 head;
-	u32 tail;
-	u32 emit;
-
-	u32 space;
-	u32 size;
-	u32 effective_size;
-};
-
 /*
  * we use a single page to load ctx workarounds so all of these
  * values are referred in terms of dwords
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 8d8fecc69809..9d59debfd168 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -145,6 +145,7 @@
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
 #include "intel_reset.h"
+#include "intel_ring.h"
 #include "intel_workarounds.h"
 
 #define RING_EXECLIST_QFULL		(1 << 0x2)
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 06dba7ff294e..6d4c665a997d 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -26,6 +26,7 @@
 #include "intel_gt.h"
 #include "intel_mocs.h"
 #include "intel_lrc.h"
+#include "intel_ring.h"
 
 /* structures required */
 struct drm_i915_mocs_entry {
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index 6d05f9c64178..c4edc35e7d89 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -27,6 +27,7 @@
 
 #include "i915_drv.h"
 #include "intel_renderstate.h"
+#include "intel_ring.h"
 
 struct intel_renderstate {
 	const struct intel_renderstate_rodata *rodata;
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
new file mode 100644
index 000000000000..98876b55f851
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -0,0 +1,314 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "gem/i915_gem_object.h"
+#include "i915_drv.h"
+#include "i915_vma.h"
+#include "intel_engine.h"
+#include "intel_ring.h"
+#include "intel_timeline.h"
+
+unsigned int intel_ring_update_space(struct intel_ring *ring)
+{
+	unsigned int space;
+
+	space = __intel_ring_space(ring->head, ring->emit, ring->size);
+
+	ring->space = space;
+	return space;
+}
+
+int intel_ring_pin(struct intel_ring *ring)
+{
+	struct i915_vma *vma = ring->vma;
+	unsigned int flags;
+	void *addr;
+	int ret;
+
+	if (atomic_fetch_inc(&ring->pin_count))
+		return 0;
+
+	flags = PIN_GLOBAL;
+
+	/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
+	flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+
+	if (vma->obj->stolen)
+		flags |= PIN_MAPPABLE;
+	else
+		flags |= PIN_HIGH;
+
+	ret = i915_vma_pin(vma, 0, 0, flags);
+	if (unlikely(ret))
+		goto err_unpin;
+
+	if (i915_vma_is_map_and_fenceable(vma))
+		addr = (void __force *)i915_vma_pin_iomap(vma);
+	else
+		addr = i915_gem_object_pin_map(vma->obj,
+					       i915_coherent_map_type(vma->vm->i915));
+	if (IS_ERR(addr)) {
+		ret = PTR_ERR(addr);
+		goto err_ring;
+	}
+
+	i915_vma_make_unshrinkable(vma);
+
+	GEM_BUG_ON(ring->vaddr);
+	ring->vaddr = addr;
+
+	return 0;
+
+err_ring:
+	i915_vma_unpin(vma);
+err_unpin:
+	atomic_dec(&ring->pin_count);
+	return ret;
+}
+
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+	tail = intel_ring_wrap(ring, tail);
+	ring->tail = tail;
+	ring->head = tail;
+	ring->emit = tail;
+	intel_ring_update_space(ring);
+}
+
+void intel_ring_unpin(struct intel_ring *ring)
+{
+	if (!atomic_dec_and_test(&ring->pin_count))
+		return;
+
+	/* Discard any unused bytes beyond that submitted to hw. */
+	intel_ring_reset(ring, ring->emit);
+
+	GEM_BUG_ON(!ring->vma);
+	i915_vma_unset_ggtt_write(ring->vma);
+	if (i915_vma_is_map_and_fenceable(ring->vma))
+		i915_vma_unpin_iomap(ring->vma);
+	else
+		i915_gem_object_unpin_map(ring->vma->obj);
+
+	GEM_BUG_ON(!ring->vaddr);
+	ring->vaddr = NULL;
+
+	i915_vma_unpin(ring->vma);
+	i915_vma_make_purgeable(ring->vma);
+}
+
+static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
+{
+	struct i915_address_space *vm = &ggtt->vm;
+	struct drm_i915_private *i915 = vm->i915;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+
+	obj = i915_gem_object_create_stolen(i915, size);
+	if (!obj)
+		obj = i915_gem_object_create_internal(i915, size);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	/*
+	 * Mark ring buffers as read-only from GPU side (so no stray overwrites)
+	 * if supported by the platform's GGTT.
+	 */
+	if (vm->has_read_only)
+		i915_gem_object_set_readonly(obj);
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	i915_gem_object_put(obj);
+
+	return vma;
+}
+
+struct intel_ring *
+intel_engine_create_ring(struct intel_engine_cs *engine, int size)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	struct intel_ring *ring;
+	struct i915_vma *vma;
+
+	GEM_BUG_ON(!is_power_of_2(size));
+	GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
+
+	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+	if (!ring)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&ring->ref);
+
+	ring->size = size;
+	/* Workaround an erratum on the i830 which causes a hang if
+	 * the TAIL pointer points to within the last 2 cachelines
+	 * of the buffer.
+	 */
+	ring->effective_size = size;
+	if (IS_I830(i915) || IS_I845G(i915))
+		ring->effective_size -= 2 * CACHELINE_BYTES;
+
+	intel_ring_update_space(ring);
+
+	vma = create_ring_vma(engine->gt->ggtt, size);
+	if (IS_ERR(vma)) {
+		kfree(ring);
+		return ERR_CAST(vma);
+	}
+	ring->vma = vma;
+
+	return ring;
+}
+
+void intel_ring_free(struct kref *ref)
+{
+	struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
+
+	i915_vma_put(ring->vma);
+
+	kfree(ring);
+}
+
+static noinline int
+wait_for_space(struct intel_ring *ring,
+	       struct intel_timeline *tl,
+	       unsigned int bytes)
+{
+	struct i915_request *target;
+	long timeout;
+
+	if (intel_ring_update_space(ring) >= bytes)
+		return 0;
+
+	GEM_BUG_ON(list_empty(&tl->requests));
+	list_for_each_entry(target, &tl->requests, link) {
+		if (target->ring != ring)
+			continue;
+
+		/* Would completion of this request free enough space? */
+		if (bytes <= __intel_ring_space(target->postfix,
+						ring->emit, ring->size))
+			break;
+	}
+
+	if (GEM_WARN_ON(&target->link == &tl->requests))
+		return -ENOSPC;
+
+	timeout = i915_request_wait(target,
+				    I915_WAIT_INTERRUPTIBLE,
+				    MAX_SCHEDULE_TIMEOUT);
+	if (timeout < 0)
+		return timeout;
+
+	i915_request_retire_upto(target);
+
+	intel_ring_update_space(ring);
+	GEM_BUG_ON(ring->space < bytes);
+	return 0;
+}
+
+u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
+{
+	struct intel_ring *ring = rq->ring;
+	const unsigned int remain_usable = ring->effective_size - ring->emit;
+	const unsigned int bytes = num_dwords * sizeof(u32);
+	unsigned int need_wrap = 0;
+	unsigned int total_bytes;
+	u32 *cs;
+
+	/* Packets must be qword aligned. */
+	GEM_BUG_ON(num_dwords & 1);
+
+	total_bytes = bytes + rq->reserved_space;
+	GEM_BUG_ON(total_bytes > ring->effective_size);
+
+	if (unlikely(total_bytes > remain_usable)) {
+		const int remain_actual = ring->size - ring->emit;
+
+		if (bytes > remain_usable) {
+			/*
+			 * Not enough space for the basic request. So need to
+			 * flush out the remainder and then wait for
+			 * base + reserved.
+			 */
+			total_bytes += remain_actual;
+			need_wrap = remain_actual | 1;
+		} else  {
+			/*
+			 * The base request will fit but the reserved space
+			 * falls off the end. So we don't need an immediate
+			 * wrap and only need to effectively wait for the
+			 * reserved size from the start of ringbuffer.
+			 */
+			total_bytes = rq->reserved_space + remain_actual;
+		}
+	}
+
+	if (unlikely(total_bytes > ring->space)) {
+		int ret;
+
+		/*
+		 * Space is reserved in the ringbuffer for finalising the
+		 * request, as that cannot be allowed to fail. During request
+		 * finalisation, reserved_space is set to 0 to stop the
+		 * overallocation and the assumption is that then we never need
+		 * to wait (which has the risk of failing with EINTR).
+		 *
+		 * See also i915_request_alloc() and i915_request_add().
+		 */
+		GEM_BUG_ON(!rq->reserved_space);
+
+		ret = wait_for_space(ring, rq->timeline, total_bytes);
+		if (unlikely(ret))
+			return ERR_PTR(ret);
+	}
+
+	if (unlikely(need_wrap)) {
+		need_wrap &= ~1;
+		GEM_BUG_ON(need_wrap > ring->space);
+		GEM_BUG_ON(ring->emit + need_wrap > ring->size);
+		GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
+
+		/* Fill the tail with MI_NOOP */
+		memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
+		ring->space -= need_wrap;
+		ring->emit = 0;
+	}
+
+	GEM_BUG_ON(ring->emit > ring->size - bytes);
+	GEM_BUG_ON(ring->space < bytes);
+	cs = ring->vaddr + ring->emit;
+	GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
+	ring->emit += bytes;
+	ring->space -= bytes;
+
+	return cs;
+}
+
+/* Align the ring tail to a cacheline boundary */
+int intel_ring_cacheline_align(struct i915_request *rq)
+{
+	int num_dwords;
+	void *cs;
+
+	num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
+	if (num_dwords == 0)
+		return 0;
+
+	num_dwords = CACHELINE_DWORDS - num_dwords;
+	GEM_BUG_ON(num_dwords & 1);
+
+	cs = intel_ring_begin(rq, num_dwords);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
+	intel_ring_advance(rq, cs + num_dwords);
+
+	GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
+	return 0;
+}
+
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
new file mode 100644
index 000000000000..ea2839d9e044
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ring.h
@@ -0,0 +1,131 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RING_H
+#define INTEL_RING_H
+
+#include "i915_gem.h" /* GEM_BUG_ON */
+#include "i915_request.h"
+#include "intel_ring_types.h"
+
+struct intel_engine_cs;
+
+struct intel_ring *
+intel_engine_create_ring(struct intel_engine_cs *engine, int size);
+
+u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords);
+int intel_ring_cacheline_align(struct i915_request *rq);
+
+unsigned int intel_ring_update_space(struct intel_ring *ring);
+
+int intel_ring_pin(struct intel_ring *ring);
+void intel_ring_unpin(struct intel_ring *ring);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+
+void intel_ring_free(struct kref *ref);
+
+static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
+{
+	kref_get(&ring->ref);
+	return ring;
+}
+
+static inline void intel_ring_put(struct intel_ring *ring)
+{
+	kref_put(&ring->ref, intel_ring_free);
+}
+
+static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
+{
+	/* Dummy function.
+	 *
+	 * This serves as a placeholder in the code so that the reader
+	 * can compare against the preceding intel_ring_begin() and
+	 * check that the number of dwords emitted matches the space
+	 * reserved for the command packet (i.e. the value passed to
+	 * intel_ring_begin()).
+	 */
+	GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
+}
+
+static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
+{
+	return pos & (ring->size - 1);
+}
+
+static inline bool
+intel_ring_offset_valid(const struct intel_ring *ring,
+			unsigned int pos)
+{
+	if (pos & -ring->size) /* must be strictly within the ring */
+		return false;
+
+	if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
+		return false;
+
+	return true;
+}
+
+static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
+{
+	/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
+	u32 offset = addr - rq->ring->vaddr;
+	GEM_BUG_ON(offset > rq->ring->size);
+	return intel_ring_wrap(rq->ring, offset);
+}
+
+static inline void
+assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
+{
+	GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
+
+	/*
+	 * "Ring Buffer Use"
+	 *	Gen2 BSpec "1. Programming Environment" / 1.4.4.6
+	 *	Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
+	 *	Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
+	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+	 * same cacheline, the Head Pointer must not be greater than the Tail
+	 * Pointer."
+	 *
+	 * We use ring->head as the last known location of the actual RING_HEAD,
+	 * it may have advanced but in the worst case it is equally the same
+	 * as ring->head and so we should never program RING_TAIL to advance
+	 * into the same cacheline as ring->head.
+	 */
+#define cacheline(a) round_down(a, CACHELINE_BYTES)
+	GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
+		   tail < ring->head);
+#undef cacheline
+}
+
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+	/* Whilst writes to the tail are strictly order, there is no
+	 * serialisation between readers and the writers. The tail may be
+	 * read by i915_request_retire() just as it is being updated
+	 * by execlists, as although the breadcrumb is complete, the context
+	 * switch hasn't been seen.
+	 */
+	assert_ring_tail_valid(ring, tail);
+	ring->tail = tail;
+	return tail;
+}
+
+static inline unsigned int
+__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
+{
+	/*
+	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+	 * same cacheline, the Head Pointer must not be greater than the Tail
+	 * Pointer."
+	 */
+	GEM_BUG_ON(!is_power_of_2(size));
+	return (head - tail - CACHELINE_BYTES) & (size - 1);
+}
+
+#endif /* INTEL_RING_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 0f959694303c..14ad10acd548 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -4,13 +4,13 @@
  * Copyright © 2016-2018 Intel Corporation
  */
 
-#include "gt/intel_gt_types.h"
-
 #include "i915_drv.h"
 
 #include "i915_active.h"
 #include "i915_syncmap.h"
-#include "gt/intel_timeline.h"
+#include "intel_gt.h"
+#include "intel_ring.h"
+#include "intel_timeline.h"
 
 #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
 #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index af8a8183154a..7cb6dab4399d 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -7,6 +7,7 @@
 #include "i915_drv.h"
 #include "intel_context.h"
 #include "intel_gt.h"
+#include "intel_ring.h"
 #include "intel_workarounds.h"
 
 /**
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 123db2c3f956..83f549d203a0 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -23,6 +23,7 @@
  */
 
 #include "gem/i915_gem_context.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "intel_context.h"
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index dac86f699a4c..f04a59fe5d2c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -9,6 +9,7 @@
 #include "intel_engine_pm.h"
 #include "intel_gt.h"
 #include "intel_gt_requests.h"
+#include "intel_ring.h"
 
 #include "../selftests/i915_random.h"
 #include "../i915_selftest.h"
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 009e54a3764f..1b1691aaed28 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -6,12 +6,13 @@
 #include <linux/circ_buf.h>
 
 #include "gem/i915_gem_context.h"
-
 #include "gt/intel_context.h"
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_lrc_reg.h"
+#include "gt/intel_ring.h"
+
 #include "intel_guc_submission.h"
 
 #include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index e753b1e706e2..6a3ac8cde95d 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -35,7 +35,9 @@
  */
 
 #include <linux/slab.h>
+
 #include "i915_drv.h"
+#include "gt/intel_ring.h"
 #include "gvt.h"
 #include "i915_pvinfo.h"
 #include "trace.h"
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 4208e40445b1..aaf15916d29a 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -35,6 +35,7 @@
 
 #include "i915_drv.h"
 #include "gt/intel_context.h"
+#include "gt/intel_ring.h"
 #include "gvt.h"
 #include "trace.h"
 
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index a5b942ee3ceb..377811f8853f 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -38,6 +38,7 @@
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_context.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "gvt.h"
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 4c190a548ba7..3e3495838a93 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -8,6 +8,7 @@
 
 #include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_engine_pm.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "i915_active.h"
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 3130b0c7ed83..38d3de2dfaa6 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -200,6 +200,7 @@
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_lrc_reg.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "i915_perf.h"
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 4575f368455d..932c5cf190b5 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -31,6 +31,7 @@
 
 #include "gem/i915_gem_context.h"
 #include "gt/intel_context.h"
+#include "gt/intel_ring.h"
 
 #include "i915_active.h"
 #include "i915_drv.h"
-- 
2.24.0.rc0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

WARNING: multiple messages have this Message-ID (diff)
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [Intel-gfx] [PATCH] drm/i915/gt: Split intel_ring_submission
Date: Thu, 24 Oct 2019 08:11:39 +0100	[thread overview]
Message-ID: <20191024071139.31917-1-chris@chris-wilson.co.uk> (raw)
Message-ID: <20191024071139.VR_8Sh0MBRbQ6rXAiU6wC6OeHjeWg4bttF7uhGUxB3M@z> (raw)

Split the legacy submission backend from the common ring buffer.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |   5 +-
 drivers/gpu/drm/i915/display/intel_overlay.c  |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |   3 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |   1 +
 .../gpu/drm/i915/gem/i915_gem_object_blt.c    |   1 +
 .../i915/gem/selftests/i915_gem_coherency.c   |   1 +
 drivers/gpu/drm/i915/gt/intel_context.c       |   1 +
 drivers/gpu/drm/i915/gt/intel_context.h       |   1 +
 drivers/gpu/drm/i915/gt/intel_engine.h        | 114 -------
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   1 +
 drivers/gpu/drm/i915/gt/intel_engine_pm.c     |   1 +
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  27 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c           |   1 +
 drivers/gpu/drm/i915/gt/intel_mocs.c          |   1 +
 drivers/gpu/drm/i915/gt/intel_renderstate.c   |   1 +
 drivers/gpu/drm/i915/gt/intel_ring.c          | 314 ++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_ring.h          | 131 ++++++++
 drivers/gpu/drm/i915/gt/intel_timeline.c      |   6 +-
 drivers/gpu/drm/i915/gt/intel_workarounds.c   |   1 +
 drivers/gpu/drm/i915/gt/mock_engine.c         |   1 +
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |   1 +
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |   3 +-
 drivers/gpu/drm/i915/gvt/cmd_parser.c         |   2 +
 drivers/gpu/drm/i915/gvt/mmio_context.c       |   1 +
 drivers/gpu/drm/i915/gvt/scheduler.c          |   1 +
 drivers/gpu/drm/i915/i915_active.c            |   1 +
 drivers/gpu/drm/i915/i915_perf.c              |   1 +
 drivers/gpu/drm/i915/i915_request.c           |   1 +
 28 files changed, 477 insertions(+), 147 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_ring.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_ring.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index a91e0a487a79..5021aa7fa187 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -90,11 +90,12 @@ gt-y += \
 	gt/intel_gt_requests.o \
 	gt/intel_llc.o \
 	gt/intel_lrc.o \
+	gt/intel_mocs.o \
 	gt/intel_rc6.o \
 	gt/intel_renderstate.o \
 	gt/intel_reset.o \
-	gt/intel_ringbuffer.o \
-	gt/intel_mocs.o \
+	gt/intel_ring.o \
+	gt/intel_ring_submission.o \
 	gt/intel_sseu.o \
 	gt/intel_timeline.o \
 	gt/intel_workarounds.o
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 2360f19f9694..848ce07a8ec2 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -30,6 +30,7 @@
 #include <drm/i915_drm.h>
 
 #include "gem/i915_gem_pm.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "i915_reg.h"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index df528e48e566..a03bee30fac1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -69,9 +69,10 @@
 
 #include <drm/i915_drm.h>
 
-#include "gt/intel_lrc_reg.h"
 #include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_engine_user.h"
+#include "gt/intel_lrc_reg.h"
+#include "gt/intel_ring.h"
 
 #include "i915_gem_context.h"
 #include "i915_globals.h"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 99d79f94e641..c88948e4094b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -19,6 +19,7 @@
 #include "gt/intel_engine_pool.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "i915_gem_clflush.h"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
index 5bd8de124d74..516e61e99212 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
@@ -8,6 +8,7 @@
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_engine_pool.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_ring.h"
 #include "i915_gem_clflush.h"
 #include "i915_gem_object_blt.h"
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 549810f70aeb..0877ef4dff63 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -8,6 +8,7 @@
 
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
+#include "gt/intel_ring.h"
 
 #include "i915_selftest.h"
 #include "selftests/i915_random.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 59c3083c1ec1..ee9d2bcd2c13 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -13,6 +13,7 @@
 #include "intel_context.h"
 #include "intel_engine.h"
 #include "intel_engine_pm.h"
+#include "intel_ring.h"
 
 static struct i915_global_context {
 	struct i915_global base;
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index dd742ac2fbdb..68b3d317d959 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -12,6 +12,7 @@
 #include "i915_active.h"
 #include "intel_context_types.h"
 #include "intel_engine_types.h"
+#include "intel_ring_types.h"
 #include "intel_timeline_types.h"
 
 void intel_context_init(struct intel_context *ce,
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index c7f93d05c8e0..d77b9f9f096c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -19,7 +19,6 @@
 #include "intel_workarounds.h"
 
 struct drm_printer;
-
 struct intel_gt;
 
 /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
@@ -176,122 +175,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 #define I915_HWS_CSB_WRITE_INDEX	0x1f
 #define CNL_HWS_CSB_WRITE_INDEX		0x2f
 
-struct intel_ring *
-intel_engine_create_ring(struct intel_engine_cs *engine, int size);
-int intel_ring_pin(struct intel_ring *ring);
-void intel_ring_reset(struct intel_ring *ring, u32 tail);
-unsigned int intel_ring_update_space(struct intel_ring *ring);
-void intel_ring_unpin(struct intel_ring *ring);
-void intel_ring_free(struct kref *ref);
-
-static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
-{
-	kref_get(&ring->ref);
-	return ring;
-}
-
-static inline void intel_ring_put(struct intel_ring *ring)
-{
-	kref_put(&ring->ref, intel_ring_free);
-}
-
 void intel_engine_stop(struct intel_engine_cs *engine);
 void intel_engine_cleanup(struct intel_engine_cs *engine);
 
-int __must_check intel_ring_cacheline_align(struct i915_request *rq);
-
-u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
-
-static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
-{
-	/* Dummy function.
-	 *
-	 * This serves as a placeholder in the code so that the reader
-	 * can compare against the preceding intel_ring_begin() and
-	 * check that the number of dwords emitted matches the space
-	 * reserved for the command packet (i.e. the value passed to
-	 * intel_ring_begin()).
-	 */
-	GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
-}
-
-static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
-{
-	return pos & (ring->size - 1);
-}
-
-static inline bool
-intel_ring_offset_valid(const struct intel_ring *ring,
-			unsigned int pos)
-{
-	if (pos & -ring->size) /* must be strictly within the ring */
-		return false;
-
-	if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
-		return false;
-
-	return true;
-}
-
-static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
-{
-	/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
-	u32 offset = addr - rq->ring->vaddr;
-	GEM_BUG_ON(offset > rq->ring->size);
-	return intel_ring_wrap(rq->ring, offset);
-}
-
-static inline void
-assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
-{
-	GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
-
-	/*
-	 * "Ring Buffer Use"
-	 *	Gen2 BSpec "1. Programming Environment" / 1.4.4.6
-	 *	Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
-	 *	Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
-	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
-	 * same cacheline, the Head Pointer must not be greater than the Tail
-	 * Pointer."
-	 *
-	 * We use ring->head as the last known location of the actual RING_HEAD,
-	 * it may have advanced but in the worst case it is equally the same
-	 * as ring->head and so we should never program RING_TAIL to advance
-	 * into the same cacheline as ring->head.
-	 */
-#define cacheline(a) round_down(a, CACHELINE_BYTES)
-	GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
-		   tail < ring->head);
-#undef cacheline
-}
-
-static inline unsigned int
-intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
-{
-	/* Whilst writes to the tail are strictly order, there is no
-	 * serialisation between readers and the writers. The tail may be
-	 * read by i915_request_retire() just as it is being updated
-	 * by execlists, as although the breadcrumb is complete, the context
-	 * switch hasn't been seen.
-	 */
-	assert_ring_tail_valid(ring, tail);
-	ring->tail = tail;
-	return tail;
-}
-
-static inline unsigned int
-__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
-{
-	/*
-	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
-	 * same cacheline, the Head Pointer must not be greater than the Tail
-	 * Pointer."
-	 */
-	GEM_BUG_ON(!is_power_of_2(size));
-	return (head - tail - CACHELINE_BYTES) & (size - 1);
-}
-
 int intel_engines_init_mmio(struct intel_gt *gt);
 int intel_engines_setup(struct intel_gt *gt);
 int intel_engines_init(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index cd4caf54c59c..2afa2ef90482 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -37,6 +37,7 @@
 #include "intel_context.h"
 #include "intel_lrc.h"
 #include "intel_reset.h"
+#include "intel_ring.h"
 
 /* Haswell does have the CXT_SIZE register however it does not appear to be
  * valid. Now, docs explain in dwords what is in the context object. The full
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 6fbfa2162e54..3c0f490ff2c7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -13,6 +13,7 @@
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
 #include "intel_rc6.h"
+#include "intel_ring.h"
 
 static int __engine_unpark(struct intel_wakeref *wf)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index fbe89bfd3d4f..c5d1047a4bc5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -59,6 +59,7 @@ struct i915_gem_context;
 struct i915_request;
 struct i915_sched_attr;
 struct intel_gt;
+struct intel_ring;
 struct intel_uncore;
 
 typedef u8 intel_engine_mask_t;
@@ -77,32 +78,6 @@ struct intel_instdone {
 	u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
 };
 
-struct intel_ring {
-	struct kref ref;
-	struct i915_vma *vma;
-	void *vaddr;
-
-	/*
-	 * As we have two types of rings, one global to the engine used
-	 * by ringbuffer submission and those that are exclusive to a
-	 * context used by execlists, we have to play safe and allow
-	 * atomic updates to the pin_count. However, the actual pinning
-	 * of the context is either done during initialisation for
-	 * ringbuffer submission or serialised as part of the context
-	 * pinning for execlists, and so we do not need a mutex ourselves
-	 * to serialise intel_ring_pin/intel_ring_unpin.
-	 */
-	atomic_t pin_count;
-
-	u32 head;
-	u32 tail;
-	u32 emit;
-
-	u32 space;
-	u32 size;
-	u32 effective_size;
-};
-
 /*
  * we use a single page to load ctx workarounds so all of these
  * values are referred in terms of dwords
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 8d8fecc69809..9d59debfd168 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -145,6 +145,7 @@
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
 #include "intel_reset.h"
+#include "intel_ring.h"
 #include "intel_workarounds.h"
 
 #define RING_EXECLIST_QFULL		(1 << 0x2)
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 06dba7ff294e..6d4c665a997d 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -26,6 +26,7 @@
 #include "intel_gt.h"
 #include "intel_mocs.h"
 #include "intel_lrc.h"
+#include "intel_ring.h"
 
 /* structures required */
 struct drm_i915_mocs_entry {
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index 6d05f9c64178..c4edc35e7d89 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -27,6 +27,7 @@
 
 #include "i915_drv.h"
 #include "intel_renderstate.h"
+#include "intel_ring.h"
 
 struct intel_renderstate {
 	const struct intel_renderstate_rodata *rodata;
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
new file mode 100644
index 000000000000..98876b55f851
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -0,0 +1,314 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "gem/i915_gem_object.h"
+#include "i915_drv.h"
+#include "i915_vma.h"
+#include "intel_engine.h"
+#include "intel_ring.h"
+#include "intel_timeline.h"
+
+unsigned int intel_ring_update_space(struct intel_ring *ring)
+{
+	unsigned int space;
+
+	space = __intel_ring_space(ring->head, ring->emit, ring->size);
+
+	ring->space = space;
+	return space;
+}
+
+int intel_ring_pin(struct intel_ring *ring)
+{
+	struct i915_vma *vma = ring->vma;
+	unsigned int flags;
+	void *addr;
+	int ret;
+
+	if (atomic_fetch_inc(&ring->pin_count))
+		return 0;
+
+	flags = PIN_GLOBAL;
+
+	/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
+	flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+
+	if (vma->obj->stolen)
+		flags |= PIN_MAPPABLE;
+	else
+		flags |= PIN_HIGH;
+
+	ret = i915_vma_pin(vma, 0, 0, flags);
+	if (unlikely(ret))
+		goto err_unpin;
+
+	if (i915_vma_is_map_and_fenceable(vma))
+		addr = (void __force *)i915_vma_pin_iomap(vma);
+	else
+		addr = i915_gem_object_pin_map(vma->obj,
+					       i915_coherent_map_type(vma->vm->i915));
+	if (IS_ERR(addr)) {
+		ret = PTR_ERR(addr);
+		goto err_ring;
+	}
+
+	i915_vma_make_unshrinkable(vma);
+
+	GEM_BUG_ON(ring->vaddr);
+	ring->vaddr = addr;
+
+	return 0;
+
+err_ring:
+	i915_vma_unpin(vma);
+err_unpin:
+	atomic_dec(&ring->pin_count);
+	return ret;
+}
+
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+	tail = intel_ring_wrap(ring, tail);
+	ring->tail = tail;
+	ring->head = tail;
+	ring->emit = tail;
+	intel_ring_update_space(ring);
+}
+
+void intel_ring_unpin(struct intel_ring *ring)
+{
+	if (!atomic_dec_and_test(&ring->pin_count))
+		return;
+
+	/* Discard any unused bytes beyond that submitted to hw. */
+	intel_ring_reset(ring, ring->emit);
+
+	GEM_BUG_ON(!ring->vma);
+	i915_vma_unset_ggtt_write(ring->vma);
+	if (i915_vma_is_map_and_fenceable(ring->vma))
+		i915_vma_unpin_iomap(ring->vma);
+	else
+		i915_gem_object_unpin_map(ring->vma->obj);
+
+	GEM_BUG_ON(!ring->vaddr);
+	ring->vaddr = NULL;
+
+	i915_vma_unpin(ring->vma);
+	i915_vma_make_purgeable(ring->vma);
+}
+
+static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
+{
+	struct i915_address_space *vm = &ggtt->vm;
+	struct drm_i915_private *i915 = vm->i915;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+
+	obj = i915_gem_object_create_stolen(i915, size);
+	if (!obj)
+		obj = i915_gem_object_create_internal(i915, size);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	/*
+	 * Mark ring buffers as read-only from GPU side (so no stray overwrites)
+	 * if supported by the platform's GGTT.
+	 */
+	if (vm->has_read_only)
+		i915_gem_object_set_readonly(obj);
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	i915_gem_object_put(obj);
+
+	return vma;
+}
+
+struct intel_ring *
+intel_engine_create_ring(struct intel_engine_cs *engine, int size)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	struct intel_ring *ring;
+	struct i915_vma *vma;
+
+	GEM_BUG_ON(!is_power_of_2(size));
+	GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
+
+	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+	if (!ring)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&ring->ref);
+
+	ring->size = size;
+	/* Workaround an erratum on the i830 which causes a hang if
+	 * the TAIL pointer points to within the last 2 cachelines
+	 * of the buffer.
+	 */
+	ring->effective_size = size;
+	if (IS_I830(i915) || IS_I845G(i915))
+		ring->effective_size -= 2 * CACHELINE_BYTES;
+
+	intel_ring_update_space(ring);
+
+	vma = create_ring_vma(engine->gt->ggtt, size);
+	if (IS_ERR(vma)) {
+		kfree(ring);
+		return ERR_CAST(vma);
+	}
+	ring->vma = vma;
+
+	return ring;
+}
+
+void intel_ring_free(struct kref *ref)
+{
+	struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
+
+	i915_vma_put(ring->vma);
+
+	kfree(ring);
+}
+
+static noinline int
+wait_for_space(struct intel_ring *ring,
+	       struct intel_timeline *tl,
+	       unsigned int bytes)
+{
+	struct i915_request *target;
+	long timeout;
+
+	if (intel_ring_update_space(ring) >= bytes)
+		return 0;
+
+	GEM_BUG_ON(list_empty(&tl->requests));
+	list_for_each_entry(target, &tl->requests, link) {
+		if (target->ring != ring)
+			continue;
+
+		/* Would completion of this request free enough space? */
+		if (bytes <= __intel_ring_space(target->postfix,
+						ring->emit, ring->size))
+			break;
+	}
+
+	if (GEM_WARN_ON(&target->link == &tl->requests))
+		return -ENOSPC;
+
+	timeout = i915_request_wait(target,
+				    I915_WAIT_INTERRUPTIBLE,
+				    MAX_SCHEDULE_TIMEOUT);
+	if (timeout < 0)
+		return timeout;
+
+	i915_request_retire_upto(target);
+
+	intel_ring_update_space(ring);
+	GEM_BUG_ON(ring->space < bytes);
+	return 0;
+}
+
+u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
+{
+	struct intel_ring *ring = rq->ring;
+	const unsigned int remain_usable = ring->effective_size - ring->emit;
+	const unsigned int bytes = num_dwords * sizeof(u32);
+	unsigned int need_wrap = 0;
+	unsigned int total_bytes;
+	u32 *cs;
+
+	/* Packets must be qword aligned. */
+	GEM_BUG_ON(num_dwords & 1);
+
+	total_bytes = bytes + rq->reserved_space;
+	GEM_BUG_ON(total_bytes > ring->effective_size);
+
+	if (unlikely(total_bytes > remain_usable)) {
+		const int remain_actual = ring->size - ring->emit;
+
+		if (bytes > remain_usable) {
+			/*
+			 * Not enough space for the basic request. So need to
+			 * flush out the remainder and then wait for
+			 * base + reserved.
+			 */
+			total_bytes += remain_actual;
+			need_wrap = remain_actual | 1;
+		} else  {
+			/*
+			 * The base request will fit but the reserved space
+			 * falls off the end. So we don't need an immediate
+			 * wrap and only need to effectively wait for the
+			 * reserved size from the start of ringbuffer.
+			 */
+			total_bytes = rq->reserved_space + remain_actual;
+		}
+	}
+
+	if (unlikely(total_bytes > ring->space)) {
+		int ret;
+
+		/*
+		 * Space is reserved in the ringbuffer for finalising the
+		 * request, as that cannot be allowed to fail. During request
+		 * finalisation, reserved_space is set to 0 to stop the
+		 * overallocation and the assumption is that then we never need
+		 * to wait (which has the risk of failing with EINTR).
+		 *
+		 * See also i915_request_alloc() and i915_request_add().
+		 */
+		GEM_BUG_ON(!rq->reserved_space);
+
+		ret = wait_for_space(ring, rq->timeline, total_bytes);
+		if (unlikely(ret))
+			return ERR_PTR(ret);
+	}
+
+	if (unlikely(need_wrap)) {
+		need_wrap &= ~1;
+		GEM_BUG_ON(need_wrap > ring->space);
+		GEM_BUG_ON(ring->emit + need_wrap > ring->size);
+		GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
+
+		/* Fill the tail with MI_NOOP */
+		memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
+		ring->space -= need_wrap;
+		ring->emit = 0;
+	}
+
+	GEM_BUG_ON(ring->emit > ring->size - bytes);
+	GEM_BUG_ON(ring->space < bytes);
+	cs = ring->vaddr + ring->emit;
+	GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
+	ring->emit += bytes;
+	ring->space -= bytes;
+
+	return cs;
+}
+
+/* Align the ring tail to a cacheline boundary */
+int intel_ring_cacheline_align(struct i915_request *rq)
+{
+	int num_dwords;
+	void *cs;
+
+	num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
+	if (num_dwords == 0)
+		return 0;
+
+	num_dwords = CACHELINE_DWORDS - num_dwords;
+	GEM_BUG_ON(num_dwords & 1);
+
+	cs = intel_ring_begin(rq, num_dwords);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
+	intel_ring_advance(rq, cs + num_dwords);
+
+	GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
+	return 0;
+}
+
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
new file mode 100644
index 000000000000..ea2839d9e044
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ring.h
@@ -0,0 +1,131 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RING_H
+#define INTEL_RING_H
+
+#include "i915_gem.h" /* GEM_BUG_ON */
+#include "i915_request.h"
+#include "intel_ring_types.h"
+
+struct intel_engine_cs;
+
+struct intel_ring *
+intel_engine_create_ring(struct intel_engine_cs *engine, int size);
+
+u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords);
+int intel_ring_cacheline_align(struct i915_request *rq);
+
+unsigned int intel_ring_update_space(struct intel_ring *ring);
+
+int intel_ring_pin(struct intel_ring *ring);
+void intel_ring_unpin(struct intel_ring *ring);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+
+void intel_ring_free(struct kref *ref);
+
+static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
+{
+	kref_get(&ring->ref);
+	return ring;
+}
+
+static inline void intel_ring_put(struct intel_ring *ring)
+{
+	kref_put(&ring->ref, intel_ring_free);
+}
+
+static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
+{
+	/* Dummy function.
+	 *
+	 * This serves as a placeholder in the code so that the reader
+	 * can compare against the preceding intel_ring_begin() and
+	 * check that the number of dwords emitted matches the space
+	 * reserved for the command packet (i.e. the value passed to
+	 * intel_ring_begin()).
+	 */
+	GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
+}
+
+static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
+{
+	return pos & (ring->size - 1);
+}
+
+static inline bool
+intel_ring_offset_valid(const struct intel_ring *ring,
+			unsigned int pos)
+{
+	if (pos & -ring->size) /* must be strictly within the ring */
+		return false;
+
+	if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
+		return false;
+
+	return true;
+}
+
+static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
+{
+	/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
+	u32 offset = addr - rq->ring->vaddr;
+	GEM_BUG_ON(offset > rq->ring->size);
+	return intel_ring_wrap(rq->ring, offset);
+}
+
+static inline void
+assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
+{
+	GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
+
+	/*
+	 * "Ring Buffer Use"
+	 *	Gen2 BSpec "1. Programming Environment" / 1.4.4.6
+	 *	Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
+	 *	Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
+	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+	 * same cacheline, the Head Pointer must not be greater than the Tail
+	 * Pointer."
+	 *
+	 * We use ring->head as the last known location of the actual RING_HEAD,
+	 * it may have advanced but in the worst case it is equally the same
+	 * as ring->head and so we should never program RING_TAIL to advance
+	 * into the same cacheline as ring->head.
+	 */
+#define cacheline(a) round_down(a, CACHELINE_BYTES)
+	GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
+		   tail < ring->head);
+#undef cacheline
+}
+
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+	/* Whilst writes to the tail are strictly order, there is no
+	 * serialisation between readers and the writers. The tail may be
+	 * read by i915_request_retire() just as it is being updated
+	 * by execlists, as although the breadcrumb is complete, the context
+	 * switch hasn't been seen.
+	 */
+	assert_ring_tail_valid(ring, tail);
+	ring->tail = tail;
+	return tail;
+}
+
+static inline unsigned int
+__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
+{
+	/*
+	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+	 * same cacheline, the Head Pointer must not be greater than the Tail
+	 * Pointer."
+	 */
+	GEM_BUG_ON(!is_power_of_2(size));
+	return (head - tail - CACHELINE_BYTES) & (size - 1);
+}
+
+#endif /* INTEL_RING_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 0f959694303c..14ad10acd548 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -4,13 +4,13 @@
  * Copyright © 2016-2018 Intel Corporation
  */
 
-#include "gt/intel_gt_types.h"
-
 #include "i915_drv.h"
 
 #include "i915_active.h"
 #include "i915_syncmap.h"
-#include "gt/intel_timeline.h"
+#include "intel_gt.h"
+#include "intel_ring.h"
+#include "intel_timeline.h"
 
 #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
 #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index af8a8183154a..7cb6dab4399d 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -7,6 +7,7 @@
 #include "i915_drv.h"
 #include "intel_context.h"
 #include "intel_gt.h"
+#include "intel_ring.h"
 #include "intel_workarounds.h"
 
 /**
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 123db2c3f956..83f549d203a0 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -23,6 +23,7 @@
  */
 
 #include "gem/i915_gem_context.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "intel_context.h"
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index dac86f699a4c..f04a59fe5d2c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -9,6 +9,7 @@
 #include "intel_engine_pm.h"
 #include "intel_gt.h"
 #include "intel_gt_requests.h"
+#include "intel_ring.h"
 
 #include "../selftests/i915_random.h"
 #include "../i915_selftest.h"
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 009e54a3764f..1b1691aaed28 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -6,12 +6,13 @@
 #include <linux/circ_buf.h>
 
 #include "gem/i915_gem_context.h"
-
 #include "gt/intel_context.h"
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_lrc_reg.h"
+#include "gt/intel_ring.h"
+
 #include "intel_guc_submission.h"
 
 #include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index e753b1e706e2..6a3ac8cde95d 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -35,7 +35,9 @@
  */
 
 #include <linux/slab.h>
+
 #include "i915_drv.h"
+#include "gt/intel_ring.h"
 #include "gvt.h"
 #include "i915_pvinfo.h"
 #include "trace.h"
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 4208e40445b1..aaf15916d29a 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -35,6 +35,7 @@
 
 #include "i915_drv.h"
 #include "gt/intel_context.h"
+#include "gt/intel_ring.h"
 #include "gvt.h"
 #include "trace.h"
 
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index a5b942ee3ceb..377811f8853f 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -38,6 +38,7 @@
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_context.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "gvt.h"
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 4c190a548ba7..3e3495838a93 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -8,6 +8,7 @@
 
 #include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_engine_pm.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "i915_active.h"
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 3130b0c7ed83..38d3de2dfaa6 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -200,6 +200,7 @@
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_lrc_reg.h"
+#include "gt/intel_ring.h"
 
 #include "i915_drv.h"
 #include "i915_perf.h"
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 4575f368455d..932c5cf190b5 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -31,6 +31,7 @@
 
 #include "gem/i915_gem_context.h"
 #include "gt/intel_context.h"
+#include "gt/intel_ring.h"
 
 #include "i915_active.h"
 #include "i915_drv.h"
-- 
2.24.0.rc0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

             reply	other threads:[~2019-10-24  7:11 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-24  7:11 Chris Wilson [this message]
2019-10-24  7:11 ` [Intel-gfx] [PATCH] drm/i915/gt: Split intel_ring_submission Chris Wilson
2019-10-24  7:15 ` Chris Wilson
2019-10-24  7:15   ` [Intel-gfx] " Chris Wilson
2019-10-24  7:32 ` ✗ Fi.CI.BUILD: failure for " Patchwork
2019-10-24  7:32   ` [Intel-gfx] " Patchwork
2019-10-24  7:33 ` [PATCH] " Mika Kuoppala
2019-10-24  7:33   ` [Intel-gfx] " Mika Kuoppala
2019-10-24  8:01 ` Chris Wilson
2019-10-24  8:01   ` [Intel-gfx] " Chris Wilson
2019-10-24 10:03   ` Chris Wilson
2019-10-24 10:03     ` [Intel-gfx] " Chris Wilson
2019-10-24 10:43 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gt: Split intel_ring_submission (rev3) Patchwork
2019-10-24 10:43   ` [Intel-gfx] " Patchwork
2019-10-24 11:04 ` ✗ Fi.CI.BAT: failure " Patchwork
2019-10-24 11:04   ` [Intel-gfx] " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191024071139.31917-1-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.