All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode
@ 2018-06-29  7:53 Chris Wilson
  2018-06-29  7:53 ` [PATCH 02/37] drm/i915/gtt: Read-only pages for insert_entries on bdw+ Chris Wilson
                   ` (40 more replies)
  0 siblings, 41 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

From: Jon Bloomfield <jon.bloomfield@intel.com>

We can set a bit inside the ppGTT PTE to indicate a page is read-only;
writes from the GPU will be discarded. We can use this to protect pages
and in particular support read-only userptr mappings (necessary for
importing PROT_READ vma).

Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c6aa761ca085..30914a765400 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -244,10 +244,13 @@ static void clear_pages(struct i915_vma *vma)
 }
 
 static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
-				  enum i915_cache_level level)
+				  enum i915_cache_level level,
+				  u32 flags)
 {
-	gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW;
-	pte |= addr;
+	gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
+
+	if (unlikely(flags & PTE_READ_ONLY))
+		pte &= ~_PAGE_RW;
 
 	switch (level) {
 	case I915_CACHE_NONE:
@@ -637,7 +640,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm,
 			       struct i915_page_table *pt)
 {
 	fill_px(vm, pt,
-		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC));
+		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0));
 }
 
 static void gen6_initialize_pt(struct gen6_hw_ppgtt *ppgtt,
@@ -785,7 +788,7 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 	unsigned int pte = gen8_pte_index(start);
 	unsigned int pte_end = pte + num_entries;
 	const gen8_pte_t scratch_pte =
-		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
+		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
 	gen8_pte_t *vaddr;
 
 	GEM_BUG_ON(num_entries > pt->used_ptes);
@@ -960,7 +963,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
 			      enum i915_cache_level cache_level)
 {
 	struct i915_page_directory *pd;
-	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
+	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, 0);
 	gen8_pte_t *vaddr;
 	bool ret;
 
@@ -1028,7 +1031,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
 					   struct sgt_dma *iter,
 					   enum i915_cache_level cache_level)
 {
-	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
+	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, 0);
 	u64 start = vma->node.start;
 	dma_addr_t rem = iter->sg->length;
 
@@ -1494,7 +1497,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 {
 	struct i915_address_space *vm = &ppgtt->vm;
 	const gen8_pte_t scratch_pte =
-		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
+		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
 	u64 start = 0, length = ppgtt->vm.total;
 
 	if (use_4lvl(vm)) {
@@ -2397,7 +2400,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
 	gen8_pte_t __iomem *pte =
 		(gen8_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
 
-	gen8_set_pte(pte, gen8_pte_encode(addr, level));
+	gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
 
 	ggtt->invalidate(vm->i915);
 }
@@ -2410,7 +2413,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	struct sgt_iter sgt_iter;
 	gen8_pte_t __iomem *gtt_entries;
-	const gen8_pte_t pte_encode = gen8_pte_encode(0, level);
+	const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
 	dma_addr_t addr;
 
 	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
@@ -2478,7 +2481,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 	unsigned first_entry = start >> PAGE_SHIFT;
 	unsigned num_entries = length >> PAGE_SHIFT;
 	const gen8_pte_t scratch_pte =
-		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
+		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
 	gen8_pte_t __iomem *gtt_base =
 		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
 	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 02/37] drm/i915/gtt: Read-only pages for insert_entries on bdw+
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 03/37] drm/i915: Prevent writing into a read-only object via a GGTT mmap Chris Wilson
                   ` (39 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

From: Jon Bloomfield <jon.bloomfield@intel.com>

Hook up the flags to allow read-only ppGTT mappings for gen8+

v2: Include a selftest to check that writes to a readonly PTE are
dropped
v3: Don't duplicate cpu_check() as we can just reuse it, and even worse
don't wholesale copy the theory-of-operation comment from igt_ctx_exec
without changing it to explain the intention behind the new test!
v4: Joonas really likes magic mystery values

Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c           |  45 ++++---
 drivers/gpu/drm/i915/i915_gem_gtt.h           |   7 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c       |  11 +-
 .../gpu/drm/i915/selftests/i915_gem_context.c | 112 +++++++++++++++++-
 4 files changed, 153 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 30914a765400..3773e661dd9d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -204,7 +204,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
 			return err;
 	}
 
-	/* Currently applicable only to VLV */
+	/* Applicable to VLV, and gen8+ */
 	pte_flags = 0;
 	if (vma->obj->gt_ro)
 		pte_flags |= PTE_READ_ONLY;
@@ -960,10 +960,11 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
 			      struct i915_page_directory_pointer *pdp,
 			      struct sgt_dma *iter,
 			      struct gen8_insert_pte *idx,
-			      enum i915_cache_level cache_level)
+			      enum i915_cache_level cache_level,
+			      u32 flags)
 {
 	struct i915_page_directory *pd;
-	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, 0);
+	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
 	gen8_pte_t *vaddr;
 	bool ret;
 
@@ -1014,14 +1015,14 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
 static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
 				   struct i915_vma *vma,
 				   enum i915_cache_level cache_level,
-				   u32 unused)
+				   u32 flags)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct sgt_dma iter = sgt_dma(vma);
 	struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
 
 	gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx,
-				      cache_level);
+				      cache_level, flags);
 
 	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
 }
@@ -1029,9 +1030,10 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
 static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
 					   struct i915_page_directory_pointer **pdps,
 					   struct sgt_dma *iter,
-					   enum i915_cache_level cache_level)
+					   enum i915_cache_level cache_level,
+					   u32 flags)
 {
-	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, 0);
+	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
 	u64 start = vma->node.start;
 	dma_addr_t rem = iter->sg->length;
 
@@ -1147,19 +1149,21 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
 static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
 				   struct i915_vma *vma,
 				   enum i915_cache_level cache_level,
-				   u32 unused)
+				   u32 flags)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct sgt_dma iter = sgt_dma(vma);
 	struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
 
 	if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
-		gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level);
+		gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level,
+					       flags);
 	} else {
 		struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
 
 		while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++],
-						     &iter, &idx, cache_level))
+						     &iter, &idx, cache_level,
+						     flags))
 			GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
 
 		vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
@@ -1572,6 +1576,9 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 		1ULL << 48 :
 		1ULL << 32;
 
+	/* From bdw, there is support for read-only pages in the PPGTT */
+	ppgtt->vm.has_read_only = true;
+
 	/* There are only few exceptions for gen >=6. chv and bxt.
 	 * And we are not sure about the latter so play safe for now.
 	 */
@@ -2408,7 +2415,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct i915_vma *vma,
 				     enum i915_cache_level level,
-				     u32 unused)
+				     u32 flags)
 {
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	struct sgt_iter sgt_iter;
@@ -2416,6 +2423,9 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
 	dma_addr_t addr;
 
+	/* The GTT does not support read-only mappings */
+	GEM_BUG_ON(flags & PTE_READ_ONLY);
+
 	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
 	gtt_entries += vma->node.start >> PAGE_SHIFT;
 	for_each_sgt_dma(addr, sgt_iter, vma->pages)
@@ -2542,13 +2552,14 @@ struct insert_entries {
 	struct i915_address_space *vm;
 	struct i915_vma *vma;
 	enum i915_cache_level level;
+	u32 flags;
 };
 
 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
 {
 	struct insert_entries *arg = _arg;
 
-	gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, 0);
+	gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
 	bxt_vtd_ggtt_wa(arg->vm);
 
 	return 0;
@@ -2557,9 +2568,9 @@ static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
 					     struct i915_vma *vma,
 					     enum i915_cache_level level,
-					     u32 unused)
+					     u32 flags)
 {
-	struct insert_entries arg = { vm, vma, level };
+	struct insert_entries arg = { vm, vma, level, flags };
 
 	stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
 }
@@ -2650,7 +2661,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 	struct drm_i915_gem_object *obj = vma->obj;
 	u32 pte_flags;
 
-	/* Currently applicable only to VLV */
+	/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
 	pte_flags = 0;
 	if (obj->gt_ro)
 		pte_flags |= PTE_READ_ONLY;
@@ -3530,6 +3541,10 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
 	 */
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	i915_address_space_init(&ggtt->vm, dev_priv, "[global]");
+
+	/* Only VLV supports read-only GGTT mappings */
+	ggtt->vm.has_read_only = IS_VALLEYVIEW(dev_priv);
+
 	if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv))
 		ggtt->vm.mm.color_adjust = i915_gtt_color_adjust;
 	mutex_unlock(&dev_priv->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 9a4824cae68d..f1f6d7076f12 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -325,7 +325,12 @@ struct i915_address_space {
 	struct list_head unbound_list;
 
 	struct pagevec free_pages;
-	bool pt_kmap_wc;
+
+	/* Some systems require uncached updates of the page directories */
+	bool pt_kmap_wc:1;
+
+	/* Some systems support read-only mappings for GGTT and/or PPGTT */
+	bool has_read_only:1;
 
 	/* FIXME: Need a more generic return type */
 	gen6_pte_t (*pte_encode)(dma_addr_t addr,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 700f94c371b3..7e5f4ba866cc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1089,6 +1089,7 @@ void intel_ring_unpin(struct intel_ring *ring)
 static struct i915_vma *
 intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
 {
+	struct i915_address_space *vm = &dev_priv->ggtt.vm;
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 
@@ -1098,10 +1099,14 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);
 
-	/* mark ring buffers as read-only from GPU side by default */
-	obj->gt_ro = 1;
+	/*
+	 * Mark ring buffers as read-only from GPU side (so no stray overwrites)
+	 * if supported by the platform's GGTT.
+	 */
+	if (vm->has_read_only)
+		obj->gt_ro = 1;
 
-	vma = i915_vma_instance(obj, &dev_priv->ggtt.vm, NULL);
+	vma = i915_vma_instance(obj, vm, NULL);
 	if (IS_ERR(vma))
 		goto err;
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index c0c26fae45c7..74c70752105f 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -23,6 +23,7 @@
  */
 
 #include "../i915_selftest.h"
+#include "i915_random.h"
 #include "igt_flush_test.h"
 
 #include "mock_drm.h"
@@ -248,9 +249,9 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
 		}
 
 		for (; m < DW_PER_PAGE; m++) {
-			if (map[m] != 0xdeadbeef) {
+			if (map[m] != STACK_MAGIC) {
 				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
-				       n, m, map[m], 0xdeadbeef);
+				       n, m, map[m], STACK_MAGIC);
 				err = -EINVAL;
 				goto out_unmap;
 			}
@@ -306,7 +307,7 @@ create_test_object(struct i915_gem_context *ctx,
 	if (err)
 		return ERR_PTR(err);
 
-	err = cpu_fill(obj, 0xdeadbeef);
+	err = cpu_fill(obj, STACK_MAGIC);
 	if (err) {
 		pr_err("Failed to fill object with cpu, err=%d\n",
 		       err);
@@ -421,6 +422,110 @@ static int igt_ctx_exec(void *arg)
 	return err;
 }
 
+static int igt_ctx_readonly(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj = NULL;
+	struct drm_file *file;
+	I915_RND_STATE(prng);
+	IGT_TIMEOUT(end_time);
+	LIST_HEAD(objects);
+	struct i915_gem_context *ctx;
+	struct i915_hw_ppgtt *ppgtt;
+	unsigned long ndwords, dw;
+	int err = -ENODEV;
+
+	/*
+	 * Create a few read-only objects (with the occasional writable object)
+	 * and try to write into these object checking that the GPU discards
+	 * any write to a read-only object.
+	 */
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	ctx = i915_gem_create_context(i915, file->driver_priv);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto out_unlock;
+	}
+
+	ppgtt = ctx->ppgtt ?: i915->mm.aliasing_ppgtt;
+	if (!ppgtt || !ppgtt->vm.has_read_only) {
+		err = 0;
+		goto out_unlock;
+	}
+
+	ndwords = 0;
+	dw = 0;
+	while (!time_after(jiffies, end_time)) {
+		struct intel_engine_cs *engine;
+		unsigned int id;
+
+		for_each_engine(engine, i915, id) {
+			if (!intel_engine_can_store_dword(engine))
+				continue;
+
+			if (!obj) {
+				obj = create_test_object(ctx, file, &objects);
+				if (IS_ERR(obj)) {
+					err = PTR_ERR(obj);
+					goto out_unlock;
+				}
+
+				obj->gt_ro = prandom_u32_state(&prng);
+			}
+
+			intel_runtime_pm_get(i915);
+			err = gpu_fill(obj, ctx, engine, dw);
+			intel_runtime_pm_put(i915);
+			if (err) {
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				       ndwords, dw, max_dwords(obj),
+				       engine->name, ctx->hw_id,
+				       yesno(!!ctx->ppgtt), err);
+				goto out_unlock;
+			}
+
+			if (++dw == max_dwords(obj)) {
+				obj = NULL;
+				dw = 0;
+			}
+			ndwords++;
+		}
+	}
+	pr_info("Submitted %lu dwords (across %u engines)\n",
+	       	ndwords, INTEL_INFO(i915)->num_rings);
+
+	dw = 0;
+	list_for_each_entry(obj, &objects, st_link) {
+		unsigned int rem =
+			min_t(unsigned int, ndwords - dw, max_dwords(obj));
+		unsigned int num_writes;
+
+		num_writes = rem;
+		if (obj->gt_ro)
+			num_writes = 0;
+
+		err = cpu_check(obj, num_writes);
+		if (err)
+			break;
+
+		dw += rem;
+	}
+
+out_unlock:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	mock_file_free(i915, file);
+	return err;
+}
+
 static __maybe_unused const char *
 __engine_name(struct drm_i915_private *i915, unsigned int engines)
 {
@@ -595,6 +700,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_switch_to_kernel_context),
 		SUBTEST(igt_ctx_exec),
+		SUBTEST(igt_ctx_readonly),
 	};
 	bool fake_alias = false;
 	int err;
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 03/37] drm/i915: Prevent writing into a read-only object via a GGTT mmap
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
  2018-06-29  7:53 ` [PATCH 02/37] drm/i915/gtt: Read-only pages for insert_entries on bdw+ Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 04/37] drm/i915: Reject attempted pwrites into a read-only object Chris Wilson
                   ` (38 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx; +Cc: David Herrmann

If the user has created a read-only object, they should not be allowed
to circumvent the write protection by using a GGTT mmapping. Deny it.

Also most machines do not support read-only GGTT PTEs, so again we have
to reject attempted writes. Fortunately, this is known a priori, so we
can at least reject in the call to create the mmap (with a sanity check
in the fault handler).

v2: Check the vma->vm_flags during mmap() to allow readonly access.
v3: Remove VM_MAYWRITE to curtail mprotect()

Testcase: igt/gem_userptr_blits/readonly_mmap*
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Cc: David Herrmann <dh.herrmann@gmail.com>
Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com> #v1
Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/drm_gem.c                         |  9 +++++++++
 drivers/gpu/drm/i915/i915_gem.c                   |  4 ++++
 drivers/gpu/drm/i915/i915_gem_gtt.c               | 12 +++++++-----
 drivers/gpu/drm/i915/i915_gem_object.h            | 13 ++++++++++++-
 drivers/gpu/drm/i915/intel_ringbuffer.c           |  2 +-
 drivers/gpu/drm/i915/selftests/i915_gem_context.c |  5 +++--
 include/drm/drm_vma_manager.h                     |  1 +
 7 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 4a16d7b26c89..bf90625df3c5 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -1036,6 +1036,15 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 		return -EACCES;
 	}
 
+	if (node->readonly) {
+		if (vma->vm_flags & VM_WRITE) {
+			drm_gem_object_put_unlocked(obj);
+			return -EINVAL;
+		}
+
+		vma->vm_flags &= ~VM_MAYWRITE;
+	}
+
 	ret = drm_gem_mmap_obj(obj, drm_vma_node_size(node) << PAGE_SHIFT,
 			       vma);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8954db6ab083..bdf5f90ce5c0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2009,6 +2009,10 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 	unsigned int flags;
 	int ret;
 
+	/* Sanity check that we allow writing into this object */
+	if (i915_gem_object_is_readonly(obj) && write)
+		return VM_FAULT_SIGBUS;
+
 	/* We don't use vmf->pgoff since that has the fake offset */
 	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 3773e661dd9d..f46d873a7530 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -206,7 +206,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
 
 	/* Applicable to VLV, and gen8+ */
 	pte_flags = 0;
-	if (vma->obj->gt_ro)
+	if (i915_gem_object_is_readonly(vma->obj))
 		pte_flags |= PTE_READ_ONLY;
 
 	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
@@ -2423,8 +2423,10 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
 	dma_addr_t addr;
 
-	/* The GTT does not support read-only mappings */
-	GEM_BUG_ON(flags & PTE_READ_ONLY);
+	/*
+	 * Note that we ignore PTE_READ_ONLY here. The caller must be careful
+	 * not to allow the user to override access to a read only page.
+	 */
 
 	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
 	gtt_entries += vma->node.start >> PAGE_SHIFT;
@@ -2663,7 +2665,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 
 	/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
 	pte_flags = 0;
-	if (obj->gt_ro)
+	if (i915_gem_object_is_readonly(obj))
 		pte_flags |= PTE_READ_ONLY;
 
 	intel_runtime_pm_get(i915);
@@ -2701,7 +2703,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 
 	/* Currently applicable only to VLV */
 	pte_flags = 0;
-	if (vma->obj->gt_ro)
+	if (i915_gem_object_is_readonly(vma->obj))
 		pte_flags |= PTE_READ_ONLY;
 
 	if (flags & I915_VMA_LOCAL_BIND) {
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index c3c6f2e588fb..56e9f00d2c4c 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -141,7 +141,6 @@ struct drm_i915_gem_object {
 	 * Is the object to be mapped as read-only to the GPU
 	 * Only honoured if hardware has relevant pte bit
 	 */
-	unsigned long gt_ro:1;
 	unsigned int cache_level:3;
 	unsigned int cache_coherent:2;
 #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
@@ -358,6 +357,18 @@ static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
 	reservation_object_unlock(obj->resv);
 }
 
+static inline void
+i915_gem_object_set_readonly(struct drm_i915_gem_object *obj)
+{
+	obj->base.vma_node.readonly = true;
+}
+
+static inline bool
+i915_gem_object_is_readonly(const struct drm_i915_gem_object *obj)
+{
+	return obj->base.vma_node.readonly;
+}
+
 static inline bool
 i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7e5f4ba866cc..772fb59e6cb9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1104,7 +1104,7 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
 	 * if supported by the platform's GGTT.
 	 */
 	if (vm->has_read_only)
-		obj->gt_ro = 1;
+		i915_gem_object_set_readonly(obj);
 
 	vma = i915_vma_instance(obj, vm, NULL);
 	if (IS_ERR(vma))
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 74c70752105f..3169b72180d0 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -476,7 +476,8 @@ static int igt_ctx_readonly(void *arg)
 					goto out_unlock;
 				}
 
-				obj->gt_ro = prandom_u32_state(&prng);
+				if (prandom_u32_state(&prng) & 1)
+					i915_gem_object_set_readonly(obj);
 			}
 
 			intel_runtime_pm_get(i915);
@@ -507,7 +508,7 @@ static int igt_ctx_readonly(void *arg)
 		unsigned int num_writes;
 
 		num_writes = rem;
-		if (obj->gt_ro)
+		if (i915_gem_object_is_readonly(obj))
 			num_writes = 0;
 
 		err = cpu_check(obj, num_writes);
diff --git a/include/drm/drm_vma_manager.h b/include/drm/drm_vma_manager.h
index 8758df94e9a0..c7987daeaed0 100644
--- a/include/drm/drm_vma_manager.h
+++ b/include/drm/drm_vma_manager.h
@@ -41,6 +41,7 @@ struct drm_vma_offset_node {
 	rwlock_t vm_lock;
 	struct drm_mm_node vm_node;
 	struct rb_root vm_files;
+	bool readonly:1;
 };
 
 struct drm_vma_offset_manager {
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 04/37] drm/i915: Reject attempted pwrites into a read-only object
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
  2018-06-29  7:53 ` [PATCH 02/37] drm/i915/gtt: Read-only pages for insert_entries on bdw+ Chris Wilson
  2018-06-29  7:53 ` [PATCH 03/37] drm/i915: Prevent writing into a read-only object via a GGTT mmap Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 05/37] drm/i915/userptr: Enable read-only support on gen8+ Chris Wilson
                   ` (37 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

If the user created a read-only object, they should not be allowed to
circumvent the write protection using the pwrite ioctl.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bdf5f90ce5c0..def8d7a78530 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1623,6 +1623,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 		goto err;
 	}
 
+	/* Writes not allowed into this read-only object */
+	if (i915_gem_object_is_readonly(obj)) {
+		ret = -EINVAL;
+		goto err;
+	}
+
 	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
 
 	ret = -ENODEV;
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 05/37] drm/i915/userptr: Enable read-only support on gen8+
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (2 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 04/37] drm/i915: Reject attempted pwrites into a read-only object Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 06/37] drm/i915: Move rate-limiting request retire to after submission Chris Wilson
                   ` (36 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

On gen8 and onwards, we can mark GPU accesses through the ppGTT as being
read-only, that is cause any GPU write onto that page to be discarded
(not triggering a fault). This is all that we need to finally support
the read-only flag for userptr!

Testcase: igt/gem_userptr_blits/readonly*
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com>
Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_object.h  |  1 -
 drivers/gpu/drm/i915/i915_gem_userptr.c | 15 +++++++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 56e9f00d2c4c..83e5e01fa9ea 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -267,7 +267,6 @@ struct drm_i915_gem_object {
 	union {
 		struct i915_gem_userptr {
 			uintptr_t ptr;
-			unsigned read_only :1;
 
 			struct i915_mm_struct *mm;
 			struct i915_mmu_object *mmu_object;
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 854bd51b9478..045db5ef17ac 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -507,7 +507,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 		struct mm_struct *mm = obj->userptr.mm->mm;
 		unsigned int flags = 0;
 
-		if (!obj->userptr.read_only)
+		if (!i915_gem_object_is_readonly(obj))
 			flags |= FOLL_WRITE;
 
 		ret = -EFAULT;
@@ -643,7 +643,7 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 		if (pvec) /* defer to worker if malloc fails */
 			pinned = __get_user_pages_fast(obj->userptr.ptr,
 						       num_pages,
-						       !obj->userptr.read_only,
+						       !i915_gem_object_is_readonly(obj),
 						       pvec);
 	}
 
@@ -789,10 +789,12 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
 		return -EFAULT;
 
 	if (args->flags & I915_USERPTR_READ_ONLY) {
-		/* On almost all of the current hw, we cannot tell the GPU that a
-		 * page is readonly, so this is just a placeholder in the uAPI.
+		/*
+		 * On almost all of the older hw, we cannot tell the GPU that
+		 * a page is readonly.
 		 */
-		return -ENODEV;
+		if (INTEL_GEN(dev_priv) < 8 || !USES_PPGTT(dev_priv))
+			return -ENODEV;
 	}
 
 	obj = i915_gem_object_alloc(dev_priv);
@@ -806,7 +808,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
 	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
 
 	obj->userptr.ptr = args->user_ptr;
-	obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
+	if (args->flags & I915_USERPTR_READ_ONLY)
+		i915_gem_object_set_readonly(obj);
 
 	/* And keep a pointer to the current->mm for resolving the user pages
 	 * at binding. This means that we need to hook into the mmu_notifier
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 06/37] drm/i915: Move rate-limiting request retire to after submission
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (3 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 05/37] drm/i915/userptr: Enable read-only support on gen8+ Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29 10:00   ` Tvrtko Ursulin
  2018-06-29  7:53 ` [PATCH 07/37] drm/i915: Move engine request retirement to intel_engine_cs Chris Wilson
                   ` (35 subsequent siblings)
  40 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

Our long standing defense against a single client from flooding the
system with requests (causing mempressure and stalls across the whole
system) is to retire the old request on every allocation. (By retiring
the oldest, we try to keep returning requests back to the system in a
steady flow.) This adds an extra step into the submission path that we
can reduce simply by moving it to after the submission itself.

We already do try to clean up a stale request list after submission, so
always retiring all completed requests fits in as a natural extension.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_request.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a2f7e9358450..4eea7a27291f 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -694,12 +694,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	if (ret)
 		goto err_unreserve;
 
-	/* Move our oldest request to the slab-cache (if not in use!) */
-	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
-	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
-	    i915_request_completed(rq))
-		i915_request_retire(rq);
-
 	/*
 	 * Beware: Dragons be flying overhead.
 	 *
@@ -1110,6 +1104,8 @@ void i915_request_add(struct i915_request *request)
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
 
 	/*
+	 * Move our oldest requests to the slab-cache (if not in use!)
+	 *
 	 * In typical scenarios, we do not expect the previous request on
 	 * the timeline to be still tracked by timeline->last_request if it
 	 * has been completed. If the completed request is still here, that
@@ -1126,8 +1122,22 @@ void i915_request_add(struct i915_request *request)
 	 * work on behalf of others -- but instead we should benefit from
 	 * improved resource management. (Well, that's the theory at least.)
 	 */
-	if (prev && i915_request_completed(prev))
-		i915_request_retire_upto(prev);
+	do {
+		prev = list_first_entry(&ring->request_list,
+					typeof(*prev), ring_link);
+
+		/*
+		 * Keep the current request, the caller may not be
+		 * expecting it to be retired (and freed!) immediately,
+		 * and preserving one request from the client allows us to
+		 * carry forward frequently reused state onto the next
+		 * submission.
+		 */
+		if (prev == request || !i915_request_completed(prev))
+			break;
+
+		i915_request_retire(prev);
+	} while (1);
 }
 
 static unsigned long local_clock_us(unsigned int *cpu)
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 07/37] drm/i915: Move engine request retirement to intel_engine_cs
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (4 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 06/37] drm/i915: Move rate-limiting request retire to after submission Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 08/37] drm/i915: Hold request reference for submission until retirement Chris Wilson
                   ` (34 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will move ownership of the fence reference to the
submission backend and will want to drop its final reference when
retiring it from the submission backend. We will also need a catch up
when parking the engine to cleanup any residual entries in the engine
timeline. In short, move the engine retirement from i915_request.c to
intel_engine_cs.c for future use.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c     | 47 +--------------------
 drivers/gpu/drm/i915/intel_engine_cs.c  | 54 +++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +
 3 files changed, 57 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 4eea7a27291f..9ff6d14a0fef 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -344,50 +344,6 @@ static void free_capture_list(struct i915_request *request)
 	}
 }
 
-static void __retire_engine_request(struct intel_engine_cs *engine,
-				    struct i915_request *rq)
-{
-	GEM_TRACE("%s(%s) fence %llx:%d, global=%d, current %d\n",
-		  __func__, engine->name,
-		  rq->fence.context, rq->fence.seqno,
-		  rq->global_seqno,
-		  intel_engine_get_seqno(engine));
-
-	GEM_BUG_ON(!i915_request_completed(rq));
-
-	local_irq_disable();
-
-	spin_lock(&engine->timeline.lock);
-	GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests));
-	list_del_init(&rq->link);
-	spin_unlock(&engine->timeline.lock);
-
-	spin_lock(&rq->lock);
-	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
-		dma_fence_signal_locked(&rq->fence);
-	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
-		intel_engine_cancel_signaling(rq);
-	if (rq->waitboost) {
-		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
-		atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
-	}
-	spin_unlock(&rq->lock);
-
-	local_irq_enable();
-
-	/*
-	 * The backing object for the context is done after switching to the
-	 * *next* context. Therefore we cannot retire the previous context until
-	 * the next context has already started running. However, since we
-	 * cannot take the required locks at i915_request_submit() we
-	 * defer the unpinning of the active context to now, retirement of
-	 * the subsequent request.
-	 */
-	if (engine->last_retired_context)
-		intel_context_unpin(engine->last_retired_context);
-	engine->last_retired_context = rq->hw_context;
-}
-
 static void __retire_engine_upto(struct intel_engine_cs *engine,
 				 struct i915_request *rq)
 {
@@ -400,8 +356,7 @@ static void __retire_engine_upto(struct intel_engine_cs *engine,
 		tmp = list_first_entry(&engine->timeline.requests,
 				       typeof(*tmp), link);
 
-		GEM_BUG_ON(tmp->engine != engine);
-		__retire_engine_request(engine, tmp);
+		intel_engine_retire_request(engine, tmp);
 	} while (tmp != rq);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 01862884a436..63f60ab908b4 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1089,6 +1089,60 @@ void intel_engines_sanitize(struct drm_i915_private *i915)
 	}
 }
 
+/**
+ * intel_engines_retire_request: drop the request reference from the engine
+ * @engine: the engine
+ * @rq: the request
+ *
+ * This request has been completed and is part of the chain being retired by
+ * the caller, so drop any reference to it from the engine.
+ */
+void intel_engine_retire_request(struct intel_engine_cs *engine,
+				 struct i915_request *rq)
+{
+	GEM_TRACE("%s(%s) fence %llx:%d, global=%d, current %d\n",
+		  __func__, engine->name,
+		  rq->fence.context, rq->fence.seqno,
+		  rq->global_seqno,
+		  intel_engine_get_seqno(engine));
+
+	lockdep_assert_held(&engine->i915->drm.struct_mutex);
+	GEM_BUG_ON(rq->engine != engine);
+	GEM_BUG_ON(!i915_request_completed(rq));
+
+	local_irq_disable();
+
+	spin_lock(&engine->timeline.lock);
+	GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests));
+	list_del_init(&rq->link);
+	spin_unlock(&engine->timeline.lock);
+
+	spin_lock(&rq->lock);
+	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
+		dma_fence_signal_locked(&rq->fence);
+	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
+		intel_engine_cancel_signaling(rq);
+	if (rq->waitboost) {
+		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
+		atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
+	}
+	spin_unlock(&rq->lock);
+
+	local_irq_enable();
+
+	/*
+	 * The backing object for the context is done after switching to the
+	 * *next* context. Therefore we cannot retire the previous context until
+	 * the next context has already started running. However, since we
+	 * cannot take the required locks at i915_request_submit() we
+	 * defer the unpinning of the active context to now, retirement of
+	 * the subsequent request.
+	 */
+	if (engine->last_retired_context)
+		intel_context_unpin(engine->last_retired_context);
+	engine->last_retired_context = rq->hw_context;
+}
+
 /**
  * intel_engines_park: called when the GT is transitioning from busy->idle
  * @i915: the i915 device
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ce6cc2a6cf7a..54678bd06fd9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -900,6 +900,8 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
 
+void intel_engine_retire_request(struct intel_engine_cs *engine,
+				 struct i915_request *rq);
 int intel_engine_stop_cs(struct intel_engine_cs *engine);
 
 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 08/37] drm/i915: Hold request reference for submission until retirement
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (5 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 07/37] drm/i915: Move engine request retirement to intel_engine_cs Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 09/37] drm/i915/execlists: Switch to rb_root_cached Chris Wilson
                   ` (33 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

Currently the async submission backends (guc and execlists) hold a extra
reference to the requests being processed as they are not serialised with
request retirement. If we instead, prevent the request being dropped
from the engine timeline until after submission has finished processing
the request, we can use a single reference held for the entire
submission process (currently, it is held only for the submission
fence).

By doing so we remove a few atomics from inside the irqoff path, on the
order of 200ns as measured by gem_syslatency, bringing the impact of
direct submission into line with the previous tasklet implementation.
The tradeoff is that as we may postpone the retirement, we have to check
for any residual requests after detecting that the engines are idle.

v2: switch-to-kernel-context needs to be cognisant of the delayed
release on the engine->timeline again.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c             | 39 +++++++++++++---
 drivers/gpu/drm/i915/i915_gem_context.c     |  2 +-
 drivers/gpu/drm/i915/i915_request.c         | 20 ++++----
 drivers/gpu/drm/i915/intel_engine_cs.c      | 51 +++++++++++++++------
 drivers/gpu/drm/i915/intel_guc_submission.c |  4 +-
 drivers/gpu/drm/i915/intel_lrc.c            | 10 +---
 drivers/gpu/drm/i915/intel_ringbuffer.h     |  4 +-
 7 files changed, 86 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index def8d7a78530..c13d5b78a02e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3789,15 +3789,42 @@ static int wait_for_timeline(struct i915_timeline *tl, unsigned int flags)
 
 static int wait_for_engines(struct drm_i915_private *i915)
 {
-	if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
-		dev_err(i915->drm.dev,
-			"Failed to idle engines, declaring wedged!\n");
-		GEM_TRACE_DUMP();
-		i915_gem_set_wedged(i915);
-		return -EIO;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, i915, id) {
+		struct i915_request *rq, *rn;
+
+		if (wait_for(intel_engine_is_idle(engine),
+			     I915_IDLE_ENGINES_TIMEOUT)) {
+			dev_err(i915->drm.dev,
+				"Failed to idle %s engine, declaring wedged!\n",
+				engine->name);
+			goto set_wedged;
+		}
+
+		/*
+		 * Now that we know the engine is definitely idle; explicitly
+		 * retire all residual requests as they may have been skipped
+		 * by earlier calls to i915_retire_requests().
+		 */
+		list_for_each_entry_safe(rq, rn,
+					 &engine->timeline.requests, link) {
+			if (!intel_engine_retire_request(engine, rq)) {
+				dev_err(i915->drm.dev,
+					"Failed to retire %s engine, declaring wedged!\n",
+					engine->name);
+				goto set_wedged;
+			}
+		}
 	}
 
 	return 0;
+
+set_wedged:
+	GEM_TRACE_DUMP();
+	i915_gem_set_wedged(i915);
+	return -EIO;
 }
 
 int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index ccf463ab6562..8995c1a57c4f 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -650,7 +650,7 @@ static bool engine_has_kernel_context_barrier(struct intel_engine_cs *engine)
 		return true;
 
 	/* The engine is idle; check that it is idling in the kernel context. */
-	return engine->last_retired_context == ce;
+	return intel_engine_has_kernel_context(engine);
 }
 
 int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 9ff6d14a0fef..14bf0be6f994 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -347,17 +347,15 @@ static void free_capture_list(struct i915_request *request)
 static void __retire_engine_upto(struct intel_engine_cs *engine,
 				 struct i915_request *rq)
 {
+	struct list_head * const requests = &engine->timeline.requests;
 	struct i915_request *tmp;
 
 	if (list_empty(&rq->link))
 		return;
 
-	do {
-		tmp = list_first_entry(&engine->timeline.requests,
-				       typeof(*tmp), link);
-
-		intel_engine_retire_request(engine, tmp);
-	} while (tmp != rq);
+	do
+		tmp = list_first_entry(requests, typeof(*tmp), link);
+	while (intel_engine_retire_request(engine, tmp) && tmp != rq);
 }
 
 static void i915_request_retire(struct i915_request *request)
@@ -376,6 +374,8 @@ static void i915_request_retire(struct i915_request *request)
 
 	trace_i915_request_retire(request);
 
+	__retire_engine_upto(request->engine, request);
+
 	advance_ring(request);
 	free_capture_list(request);
 
@@ -414,8 +414,6 @@ static void i915_request_retire(struct i915_request *request)
 	atomic_dec_if_positive(&request->gem_context->ban_score);
 	intel_context_unpin(request->hw_context);
 
-	__retire_engine_upto(request->engine, request);
-
 	unreserve_gt(request->i915);
 
 	i915_sched_node_fini(request->i915, &request->sched);
@@ -722,8 +720,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 		       rq->timeline->fence_context,
 		       timeline_get_seqno(rq->timeline));
 
-	/* We bump the ref for the fence chain */
-	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
+	/* We bump the ref for the fence chain and for the submit backend. */
+	refcount_set(&rq->fence.refcount.refcount, 3);
+
+	i915_sw_fence_init(&rq->submit, submit_notify);
 	init_waitqueue_head(&rq->execute);
 
 	i915_sched_node_init(&rq->sched);
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 63f60ab908b4..457003311b74 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1038,11 +1038,11 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv)
  * executed if the engine is already idle, is the kernel context
  * (#i915.kernel_context).
  */
-bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)
+bool intel_engine_has_kernel_context(struct intel_engine_cs *engine)
 {
 	const struct intel_context *kernel_context =
 		to_intel_context(engine->i915->kernel_context, engine);
-	struct i915_request *rq;
+	const struct intel_context *last;
 
 	lockdep_assert_held(&engine->i915->drm.struct_mutex);
 
@@ -1051,11 +1051,15 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)
 	 * the last request that remains in the timeline. When idle, it is
 	 * the last executed context as tracked by retirement.
 	 */
-	rq = __i915_gem_active_peek(&engine->timeline.last_request);
-	if (rq)
-		return rq->hw_context == kernel_context;
-	else
-		return engine->last_retired_context == kernel_context;
+	last = engine->last_retired_context;
+
+	spin_lock_irq(&engine->timeline.lock);
+	if (!list_empty(&engine->timeline.requests))
+		last = list_last_entry(&engine->timeline.requests,
+				       struct i915_request, link)->hw_context;
+	spin_unlock_irq(&engine->timeline.lock);
+
+	return last == kernel_context;
 }
 
 void intel_engines_reset_default_submission(struct drm_i915_private *i915)
@@ -1096,20 +1100,25 @@ void intel_engines_sanitize(struct drm_i915_private *i915)
  *
  * This request has been completed and is part of the chain being retired by
  * the caller, so drop any reference to it from the engine.
+ *
+ * Returns: true if the reference was dropped, false if it was still busy.
  */
-void intel_engine_retire_request(struct intel_engine_cs *engine,
+bool intel_engine_retire_request(struct intel_engine_cs *engine,
 				 struct i915_request *rq)
 {
-	GEM_TRACE("%s(%s) fence %llx:%d, global=%d, current %d\n",
-		  __func__, engine->name,
-		  rq->fence.context, rq->fence.seqno,
-		  rq->global_seqno,
-		  intel_engine_get_seqno(engine));
+	GEM_TRACE("%s: fence %llx:%d, global=%d, current %d, active?=%s\n",
+		  engine->name, rq->fence.context, rq->fence.seqno,
+		  rq->global_seqno, intel_engine_get_seqno(engine),
+		  yesno(port_request(engine->execlists.port) == rq));
 
 	lockdep_assert_held(&engine->i915->drm.struct_mutex);
 	GEM_BUG_ON(rq->engine != engine);
 	GEM_BUG_ON(!i915_request_completed(rq));
 
+	/* Don't drop the final ref until after the backend has finished */
+	if (port_request(engine->execlists.port) == rq)
+		return false;
+
 	local_irq_disable();
 
 	spin_lock(&engine->timeline.lock);
@@ -1141,6 +1150,19 @@ void intel_engine_retire_request(struct intel_engine_cs *engine,
 	if (engine->last_retired_context)
 		intel_context_unpin(engine->last_retired_context);
 	engine->last_retired_context = rq->hw_context;
+
+	i915_request_put(rq);
+	return true;
+}
+
+static void engine_retire_requests(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq, *next;
+
+	list_for_each_entry_safe(rq, next, &engine->timeline.requests, link) {
+		if (WARN_ON(!intel_engine_retire_request(engine, rq)))
+			break;
+	}
 }
 
 /**
@@ -1173,6 +1195,7 @@ void intel_engines_park(struct drm_i915_private *i915)
 				"%s is not idle before parking\n",
 				engine->name);
 			intel_engine_dump(engine, &p, NULL);
+			engine->cancel_requests(engine);
 		}
 
 		/* Must be reset upon idling, or we may miss the busy wakeup. */
@@ -1181,6 +1204,8 @@ void intel_engines_park(struct drm_i915_private *i915)
 		if (engine->park)
 			engine->park(engine);
 
+		engine_retire_requests(engine);
+
 		if (engine->pinned_default_state) {
 			i915_gem_object_unpin_map(engine->default_state);
 			engine->pinned_default_state = NULL;
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index f3945258fe1b..05449f636d94 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -669,8 +669,7 @@ static void guc_submit(struct intel_engine_cs *engine)
 static void port_assign(struct execlist_port *port, struct i915_request *rq)
 {
 	GEM_BUG_ON(port_isset(port));
-
-	port_set(port, i915_request_get(rq));
+	port_set(port, rq);
 }
 
 static inline int rq_prio(const struct i915_request *rq)
@@ -793,7 +792,6 @@ static void guc_submission_tasklet(unsigned long data)
 	rq = port_request(port);
 	while (rq && i915_request_completed(rq)) {
 		trace_i915_request_out(rq);
-		i915_request_put(rq);
 
 		port = execlists_port_complete(execlists, port);
 		if (port_isset(port)) {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ab89dabc2965..a6bc50d7195e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -518,11 +518,7 @@ static bool can_merge_ctx(const struct intel_context *prev,
 static void port_assign(struct execlist_port *port, struct i915_request *rq)
 {
 	GEM_BUG_ON(rq == port_request(port));
-
-	if (port_isset(port))
-		i915_request_put(port_request(port));
-
-	port_set(port, port_pack(i915_request_get(rq), port_count(port)));
+	port_set(port, port_pack(rq, port_count(port)));
 }
 
 static void inject_preempt_context(struct intel_engine_cs *engine)
@@ -793,8 +789,6 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 					       INTEL_CONTEXT_SCHEDULE_OUT :
 					       INTEL_CONTEXT_SCHEDULE_PREEMPTED);
 
-		i915_request_put(rq);
-
 		memset(port, 0, sizeof(*port));
 		port++;
 	}
@@ -1088,8 +1082,6 @@ static void process_csb(struct intel_engine_cs *engine)
 
 			execlists_context_schedule_out(rq,
 						       INTEL_CONTEXT_SCHEDULE_OUT);
-			i915_request_put(rq);
-
 			GEM_TRACE("%s completed ctx=%d\n",
 				  engine->name, port->context_id);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 54678bd06fd9..a1aff360d0ce 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -900,7 +900,7 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
 
-void intel_engine_retire_request(struct intel_engine_cs *engine,
+bool intel_engine_retire_request(struct intel_engine_cs *engine,
 				 struct i915_request *rq);
 int intel_engine_stop_cs(struct intel_engine_cs *engine);
 
@@ -1077,7 +1077,7 @@ void intel_engines_sanitize(struct drm_i915_private *i915);
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
-bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine);
+bool intel_engine_has_kernel_context(struct intel_engine_cs *engine);
 void intel_engine_lost_context(struct intel_engine_cs *engine);
 
 void intel_engines_park(struct drm_i915_private *i915);
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 09/37] drm/i915/execlists: Switch to rb_root_cached
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (6 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 08/37] drm/i915: Hold request reference for submission until retirement Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-07-11 13:20   ` Tvrtko Ursulin
  2018-06-29  7:53 ` [PATCH 10/37] drm/i915: Reserve some priority bits for internal use Chris Wilson
                   ` (32 subsequent siblings)
  40 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

The kernel recently gained an augmented rbtree with the purpose of
cacheing the leftmost element of the rbtree, a frequent optimisation to
avoid calls to rb_first() which is also employed by the
execlists->queue. Switch from our open-coded cache to the library.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_engine_cs.c      |  7 ++---
 drivers/gpu/drm/i915/intel_guc_submission.c | 12 +++-----
 drivers/gpu/drm/i915/intel_lrc.c            | 32 +++++++--------------
 drivers/gpu/drm/i915/intel_ringbuffer.h     |  7 +----
 4 files changed, 19 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 457003311b74..14d5b673fc27 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -464,8 +464,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
 	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
 
 	execlists->queue_priority = INT_MIN;
-	execlists->queue = RB_ROOT;
-	execlists->first = NULL;
+	execlists->queue = RB_ROOT_CACHED;
 }
 
 /**
@@ -1000,7 +999,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 	}
 
 	/* ELSP is empty, but there are ready requests? E.g. after reset */
-	if (READ_ONCE(engine->execlists.first))
+	if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root))
 		return false;
 
 	/* Ring stopped? */
@@ -1615,7 +1614,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	last = NULL;
 	count = 0;
 	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
-	for (rb = execlists->first; rb; rb = rb_next(rb)) {
+	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
 		struct i915_priolist *p =
 			rb_entry(rb, typeof(*p), node);
 
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 05449f636d94..9a2c6856a71e 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -694,9 +694,6 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 
 	lockdep_assert_held(&engine->timeline.lock);
 
-	rb = execlists->first;
-	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
-
 	if (port_isset(port)) {
 		if (intel_engine_has_preemption(engine)) {
 			struct guc_preempt_work *preempt_work =
@@ -718,7 +715,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 	}
 	GEM_BUG_ON(port_isset(port));
 
-	while (rb) {
+	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
 		struct i915_request *rq, *rn;
 
@@ -743,15 +740,13 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 			submit = true;
 		}
 
-		rb = rb_next(rb);
-		rb_erase(&p->node, &execlists->queue);
+		rb_erase_cached(&p->node, &execlists->queue);
 		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
 	}
 done:
 	execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
-	execlists->first = rb;
 	if (submit)
 		port_assign(port, last);
 	if (last)
@@ -760,7 +755,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 	/* We must always keep the beast fed if we have work piled up */
 	GEM_BUG_ON(port_isset(execlists->port) &&
 		   !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
-	GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
+	GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
+		   !port_isset(execlists->port));
 
 	return submit;
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a6bc50d7195e..422753c8641f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -273,7 +273,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio)
 find_priolist:
 	/* most positive priority is scheduled first, equal priorities fifo */
 	rb = NULL;
-	parent = &execlists->queue.rb_node;
+	parent = &execlists->queue.rb_root.rb_node;
 	while (*parent) {
 		rb = *parent;
 		p = to_priolist(rb);
@@ -311,10 +311,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio)
 	p->priority = prio;
 	INIT_LIST_HEAD(&p->requests);
 	rb_link_node(&p->node, rb, parent);
-	rb_insert_color(&p->node, &execlists->queue);
-
-	if (first)
-		execlists->first = &p->node;
+	rb_insert_color_cached(&p->node, &execlists->queue, first);
 
 	return p;
 }
@@ -598,9 +595,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * and context switches) submission.
 	 */
 
-	rb = execlists->first;
-	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
-
 	if (last) {
 		/*
 		 * Don't resubmit or switch until all outstanding
@@ -662,7 +656,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		last->tail = last->wa_tail;
 	}
 
-	while (rb) {
+	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
 		struct i915_request *rq, *rn;
 
@@ -721,8 +715,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			submit = true;
 		}
 
-		rb = rb_next(rb);
-		rb_erase(&p->node, &execlists->queue);
+		rb_erase_cached(&p->node, &execlists->queue);
 		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
@@ -748,14 +741,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	execlists->queue_priority =
 		port != execlists->port ? rq_prio(last) : INT_MIN;
 
-	execlists->first = rb;
 	if (submit) {
 		port_assign(port, last);
 		execlists_submit_ports(engine);
 	}
 
 	/* We must always keep the beast fed if we have work piled up */
-	GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
+	GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
+		   !port_isset(execlists->port));
 
 	/* Re-evaluate the executing context setup after each preemptive kick */
 	if (last)
@@ -915,8 +908,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	}
 
 	/* Flush the queued requests to the timeline list (for retiring). */
-	rb = execlists->first;
-	while (rb) {
+	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
 
 		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
@@ -926,8 +918,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 			__i915_request_submit(rq);
 		}
 
-		rb = rb_next(rb);
-		rb_erase(&p->node, &execlists->queue);
+		rb_erase_cached(&p->node, &execlists->queue);
 		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
@@ -936,8 +927,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	/* Remaining _unready_ requests will be nop'ed when submitted */
 
 	execlists->queue_priority = INT_MIN;
-	execlists->queue = RB_ROOT;
-	execlists->first = NULL;
+	execlists->queue = RB_ROOT_CACHED;
 	GEM_BUG_ON(port_isset(execlists->port));
 
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
@@ -1183,7 +1173,7 @@ static void execlists_submit_request(struct i915_request *request)
 
 	queue_request(engine, &request->sched, rq_prio(request));
 
-	GEM_BUG_ON(!engine->execlists.first);
+	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 	GEM_BUG_ON(list_empty(&request->sched.link));
 
 	submit_queue(engine, rq_prio(request));
@@ -2036,7 +2026,7 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 
 	/* After a GPU reset, we may have requests to replay */
-	if (execlists->first)
+	if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
 		tasklet_schedule(&execlists->tasklet);
 
 	/*
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a1aff360d0ce..923875500828 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -292,12 +292,7 @@ struct intel_engine_execlists {
 	/**
 	 * @queue: queue of requests, in priority lists
 	 */
-	struct rb_root queue;
-
-	/**
-	 * @first: leftmost level in priority @queue
-	 */
-	struct rb_node *first;
+	struct rb_root_cached queue;
 
 	/**
 	 * @csb_read: control register for Context Switch buffer
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 10/37] drm/i915: Reserve some priority bits for internal use
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (7 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 09/37] drm/i915/execlists: Switch to rb_root_cached Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 11/37] drm/i915: Combine multiple internal plists into the same i915_priolist bucket Chris Wilson
                   ` (31 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

In the next few patches, we will want to give a small priority boost to
some requests/queues but not so much that we perturb the user controlled
order. As such we shift the user priority bits higher leaving ourselves
a few low priority bits for our bumping.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            | 2 +-
 drivers/gpu/drm/i915/i915_gem_context.c    | 9 +++++----
 drivers/gpu/drm/i915/i915_scheduler.h      | 6 ++++++
 drivers/gpu/drm/i915/selftests/intel_lrc.c | 8 +++++---
 4 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2b684f431c60..af783a32abd8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3186,7 +3186,7 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj,
 int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 				  unsigned int flags,
 				  const struct i915_sched_attr *attr);
-#define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX
+#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
 
 int __must_check
 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 8995c1a57c4f..ca9205e19f0b 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -285,7 +285,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	kref_init(&ctx->ref);
 	list_add_tail(&ctx->link, &dev_priv->contexts.list);
 	ctx->i915 = dev_priv;
-	ctx->sched.priority = I915_PRIORITY_NORMAL;
+	ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
 
 	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
 		struct intel_context *ce = &ctx->__engine[n];
@@ -441,7 +441,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 		return ctx;
 
 	i915_gem_context_clear_bannable(ctx);
-	ctx->sched.priority = prio;
+	ctx->sched.priority = I915_USER_PRIORITY(prio);
 	ctx->ring_size = PAGE_SIZE;
 
 	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
@@ -816,7 +816,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 		args->value = i915_gem_context_is_bannable(ctx);
 		break;
 	case I915_CONTEXT_PARAM_PRIORITY:
-		args->value = ctx->sched.priority;
+		args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
 		break;
 	default:
 		ret = -EINVAL;
@@ -889,7 +889,8 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				 !capable(CAP_SYS_NICE))
 				ret = -EPERM;
 			else
-				ctx->sched.priority = priority;
+				ctx->sched.priority =
+					I915_USER_PRIORITY(priority);
 		}
 		break;
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 70a42220358d..7edfad0abfd7 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -19,6 +19,12 @@ enum {
 	I915_PRIORITY_INVALID = INT_MIN
 };
 
+#define I915_USER_PRIORITY_SHIFT 0
+#define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
+
+#define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
+#define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
+
 struct i915_sched_attr {
 	/**
 	 * @priority: execution and service priority
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index ea27c7cfbf96..90841415cf7a 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -281,12 +281,14 @@ static int live_preempt(void *arg)
 	ctx_hi = kernel_context(i915);
 	if (!ctx_hi)
 		goto err_spin_lo;
-	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
+	ctx_hi->sched.priority =
+		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
 
 	ctx_lo = kernel_context(i915);
 	if (!ctx_lo)
 		goto err_ctx_hi;
-	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
+	ctx_lo->sched.priority =
+		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
 
 	for_each_engine(engine, i915, id) {
 		struct i915_request *rq;
@@ -405,7 +407,7 @@ static int live_late_preempt(void *arg)
 			goto err_wedged;
 		}
 
-		attr.priority = I915_PRIORITY_MAX;
+		attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
 		engine->schedule(rq, &attr);
 
 		if (!wait_for_spinner(&spin_hi, rq)) {
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 11/37] drm/i915: Combine multiple internal plists into the same i915_priolist bucket
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (8 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 10/37] drm/i915: Reserve some priority bits for internal use Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 12/37] drm/i915: Priority boost for new clients Chris Wilson
                   ` (30 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

As we are about to allow ourselves to slightly bump the user priority
into a few different sublevels, packthose internal priority lists
into the same i915_priolist to keep the rbtree compact and avoid having
to allocate the default user priority even after the internal bumping.
The downside to having an requests[] rather than a node per active list,
is that we then have to walk over the empty higher priority lists. To
compensate, we track the active buckets and use a small bitmap to skip
over any inactive ones.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_engine_cs.c      |  6 +-
 drivers/gpu/drm/i915/intel_guc_submission.c | 12 ++-
 drivers/gpu/drm/i915/intel_lrc.c            | 87 ++++++++++++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.h     | 13 ++-
 4 files changed, 80 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 14d5b673fc27..d3d1b6959ae7 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1615,10 +1615,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	count = 0;
 	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
 	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
-		struct i915_priolist *p =
-			rb_entry(rb, typeof(*p), node);
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		int i;
 
-		list_for_each_entry(rq, &p->requests, sched.link) {
+		priolist_for_each_request(rq, p, i) {
 			if (count++ < MAX_REQUESTS_TO_SHOW - 1)
 				print_request(m, rq, "\t\tQ ");
 			else
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 9a2c6856a71e..660c41ec71ab 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -718,30 +718,28 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
 		struct i915_request *rq, *rn;
+		int i;
 
-		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
+		priolist_for_each_request_consume(rq, rn, p, i) {
 			if (last && rq->hw_context != last->hw_context) {
-				if (port == last_port) {
-					__list_del_many(&p->requests,
-							&rq->sched.link);
+				if (port == last_port)
 					goto done;
-				}
 
 				if (submit)
 					port_assign(port, last);
 				port++;
 			}
 
-			INIT_LIST_HEAD(&rq->sched.link);
+			list_del_init(&rq->sched.link);
 
 			__i915_request_submit(rq);
 			trace_i915_request_in(rq, port_index(port, execlists));
+
 			last = rq;
 			submit = true;
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
-		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
 	}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 422753c8641f..0121e1fd3160 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -259,14 +259,49 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
 	ce->lrc_desc = desc;
 }
 
-static struct i915_priolist *
+static void assert_priolists(struct intel_engine_execlists * const execlists,
+			     int queue_priority)
+{
+	struct rb_node *rb;
+	int last_prio, i;
+
+	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+		return;
+
+	GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
+		   rb_first(&execlists->queue.rb_root));
+
+	last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
+	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
+		struct i915_priolist *p = to_priolist(rb);
+
+		GEM_BUG_ON(p->priority >= last_prio);
+		last_prio = p->priority;
+
+		GEM_BUG_ON(!p->used);
+		for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
+			if (list_empty(&p->requests[i]))
+				continue;
+
+			GEM_BUG_ON(!(p->used & BIT(i)));
+		}
+	}
+}
+
+static struct list_head *
 lookup_priolist(struct intel_engine_cs *engine, int prio)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_priolist *p;
 	struct rb_node **parent, *rb;
 	bool first = true;
+	int idx, i;
+
+	assert_priolists(execlists, INT_MAX);
 
+	/* buckets sorted from highest [in slot 0] to lowest priority */
+	idx = I915_PRIORITY_COUNT - (prio & ~I915_PRIORITY_MASK) - 1;
+	prio >>= I915_USER_PRIORITY_SHIFT;
 	if (unlikely(execlists->no_priolist))
 		prio = I915_PRIORITY_NORMAL;
 
@@ -283,7 +318,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio)
 			parent = &rb->rb_right;
 			first = false;
 		} else {
-			return p;
+			goto out;
 		}
 	}
 
@@ -309,11 +344,15 @@ lookup_priolist(struct intel_engine_cs *engine, int prio)
 	}
 
 	p->priority = prio;
-	INIT_LIST_HEAD(&p->requests);
+	for (i = 0; i < ARRAY_SIZE(p->requests); i++)
+		INIT_LIST_HEAD(&p->requests[i]);
 	rb_link_node(&p->node, rb, parent);
 	rb_insert_color_cached(&p->node, &execlists->queue, first);
+	p->used = 0;
 
-	return p;
+out:
+	p->used |= BIT(idx);
+	return &p->requests[idx];
 }
 
 static void unwind_wa_tail(struct i915_request *rq)
@@ -325,7 +364,7 @@ static void unwind_wa_tail(struct i915_request *rq)
 static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
 	struct i915_request *rq, *rn;
-	struct i915_priolist *uninitialized_var(p);
+	struct list_head *uninitialized_var(pl);
 	int last_prio = I915_PRIORITY_INVALID;
 
 	lockdep_assert_held(&engine->timeline.lock);
@@ -342,11 +381,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
 		if (rq_prio(rq) != last_prio) {
 			last_prio = rq_prio(rq);
-			p = lookup_priolist(engine, last_prio);
+			pl = lookup_priolist(engine, last_prio);
 		}
 
-		GEM_BUG_ON(p->priority != rq_prio(rq));
-		list_add(&rq->sched.link, &p->requests);
+		list_add(&rq->sched.link, pl);
 	}
 }
 
@@ -659,8 +697,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
 		struct i915_request *rq, *rn;
+		int i;
 
-		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
+		priolist_for_each_request_consume(rq, rn, p, i) {
 			/*
 			 * Can we combine this request with the current port?
 			 * It has to be the same context/ringbuffer and not
@@ -679,11 +718,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				 * combine this request with the last, then we
 				 * are done.
 				 */
-				if (port == last_port) {
-					__list_del_many(&p->requests,
-							&rq->sched.link);
+				if (port == last_port)
 					goto done;
-				}
 
 				/*
 				 * If GVT overrides us we only ever submit
@@ -693,11 +729,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				 * request) to the second port.
 				 */
 				if (ctx_single_port_submission(last->hw_context) ||
-				    ctx_single_port_submission(rq->hw_context)) {
-					__list_del_many(&p->requests,
-							&rq->sched.link);
+				    ctx_single_port_submission(rq->hw_context))
 					goto done;
-				}
 
 				GEM_BUG_ON(last->hw_context == rq->hw_context);
 
@@ -708,15 +741,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				GEM_BUG_ON(port_isset(port));
 			}
 
-			INIT_LIST_HEAD(&rq->sched.link);
+			list_del_init(&rq->sched.link);
+
 			__i915_request_submit(rq);
 			trace_i915_request_in(rq, port_index(port, execlists));
+
 			last = rq;
 			submit = true;
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
-		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
 	}
@@ -740,6 +774,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 */
 	execlists->queue_priority =
 		port != execlists->port ? rq_prio(last) : INT_MIN;
+	assert_priolists(execlists, execlists->queue_priority);
 
 	if (submit) {
 		port_assign(port, last);
@@ -910,16 +945,16 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	/* Flush the queued requests to the timeline list (for retiring). */
 	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
+		int i;
 
-		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
-			INIT_LIST_HEAD(&rq->sched.link);
+		priolist_for_each_request_consume(rq, rn, p, i) {
+			list_del_init(&rq->sched.link);
 
 			dma_fence_set_error(&rq->fence, -EIO);
 			__i915_request_submit(rq);
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
-		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
 	}
@@ -1133,8 +1168,7 @@ static void queue_request(struct intel_engine_cs *engine,
 			  struct i915_sched_node *node,
 			  int prio)
 {
-	list_add_tail(&node->link,
-		      &lookup_priolist(engine, prio)->requests);
+	list_add_tail(&node->link, lookup_priolist(engine, prio));
 }
 
 static void __update_queue(struct intel_engine_cs *engine, int prio)
@@ -1204,7 +1238,7 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
 static void execlists_schedule(struct i915_request *request,
 			       const struct i915_sched_attr *attr)
 {
-	struct i915_priolist *uninitialized_var(pl);
+	struct list_head *uninitialized_var(pl);
 	struct intel_engine_cs *engine, *last;
 	struct i915_dependency *dep, *p;
 	struct i915_dependency stack;
@@ -1299,8 +1333,7 @@ static void execlists_schedule(struct i915_request *request,
 				pl = lookup_priolist(engine, prio);
 				last = engine;
 			}
-			GEM_BUG_ON(pl->priority != prio);
-			list_move_tail(&node->link, &pl->requests);
+			list_move_tail(&node->link, pl);
 		}
 
 		if (prio > engine->execlists.queue_priority &&
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 923875500828..b9ea5cee249f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -188,11 +188,22 @@ enum intel_engine_id {
 };
 
 struct i915_priolist {
+	struct list_head requests[I915_PRIORITY_COUNT];
 	struct rb_node node;
-	struct list_head requests;
+	unsigned long used;
 	int priority;
 };
 
+#define priolist_for_each_request(it, plist, idx) \
+	for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
+		list_for_each_entry(it, &(plist)->requests[idx], sched.link)
+
+#define priolist_for_each_request_consume(it, n, plist, idx) \
+	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
+		list_for_each_entry_safe(it, n, \
+					 &(plist)->requests[idx - 1], \
+					 sched.link)
+
 /**
  * struct intel_engine_execlists - execlist submission queue and port state
  *
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 12/37] drm/i915: Priority boost for new clients
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (9 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 11/37] drm/i915: Combine multiple internal plists into the same i915_priolist bucket Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29 10:04   ` Tvrtko Ursulin
  2018-06-29  7:53 ` [PATCH 13/37] drm/i915: Priority boost switching to an idle ring Chris Wilson
                   ` (29 subsequent siblings)
  40 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

Taken from an idea used for FQ_CODEL, we give new request flows a
priority boost. These flows are likely to correspond with interactive
tasks and so be more latency sensitive than the long queues. As soon as
the client has more than one request in the queue, further requests are
not boosted and it settles down into ordinary steady state behaviour.
Such small kicks dramatically help combat the starvation issue, by
allowing each client the opportunity to run even when the system is
under heavy throughput load (within the constraints of the user
selected priority).

Testcase: igt/benchmarks/rrul
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_request.c   | 16 ++++++++++++++--
 drivers/gpu/drm/i915/i915_scheduler.h |  4 +++-
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 14bf0be6f994..2d7a785dd88c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1052,8 +1052,20 @@ void i915_request_add(struct i915_request *request)
 	 */
 	local_bh_disable();
 	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
-	if (engine->schedule)
-		engine->schedule(request, &request->gem_context->sched);
+	if (engine->schedule) {
+		struct i915_sched_attr attr = request->gem_context->sched;
+
+		/*
+		 * Boost priorities to new clients (new request flows).
+		 *
+		 * Allow interactive/synchronous clients to jump ahead of
+		 * the bulk clients. (FQ_CODEL)
+		 */
+		if (!prev || i915_request_completed(prev))
+			attr.priority |= I915_PRIORITY_NEWCLIENT;
+
+		engine->schedule(request, &attr);
+	}
 	rcu_read_unlock();
 	i915_sw_fence_commit(&request->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 7edfad0abfd7..e9fb6c1d5e42 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -19,12 +19,14 @@ enum {
 	I915_PRIORITY_INVALID = INT_MIN
 };
 
-#define I915_USER_PRIORITY_SHIFT 0
+#define I915_USER_PRIORITY_SHIFT 1
 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
 
 #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
 #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
 
+#define I915_PRIORITY_NEWCLIENT BIT(0)
+
 struct i915_sched_attr {
 	/**
 	 * @priority: execution and service priority
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 13/37] drm/i915: Priority boost switching to an idle ring
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (10 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 12/37] drm/i915: Priority boost for new clients Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29 10:08   ` Tvrtko Ursulin
  2018-06-29  7:53 ` [PATCH 14/37] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
                   ` (28 subsequent siblings)
  40 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

In order to maximise concurrency between engines, if we queue a request
to a current idle ring, reorder its dependencies to execute that request
as early as possible and ideally improve occupancy of multiple engines
simultaneously.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c     | 6 +++++-
 drivers/gpu/drm/i915/i915_scheduler.h   | 5 +++--
 drivers/gpu/drm/i915/intel_ringbuffer.h | 6 ++++++
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 2d7a785dd88c..d618e7127e88 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1053,7 +1053,8 @@ void i915_request_add(struct i915_request *request)
 	local_bh_disable();
 	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
 	if (engine->schedule) {
-		struct i915_sched_attr attr = request->gem_context->sched;
+		struct i915_gem_context *ctx = request->gem_context;
+		struct i915_sched_attr attr = ctx->sched;
 
 		/*
 		 * Boost priorities to new clients (new request flows).
@@ -1064,6 +1065,9 @@ void i915_request_add(struct i915_request *request)
 		if (!prev || i915_request_completed(prev))
 			attr.priority |= I915_PRIORITY_NEWCLIENT;
 
+		if (intel_engine_queue_is_empty(engine))
+			attr.priority |= I915_PRIORITY_STALL;
+
 		engine->schedule(request, &attr);
 	}
 	rcu_read_unlock();
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index e9fb6c1d5e42..be132ceb83d9 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -19,13 +19,14 @@ enum {
 	I915_PRIORITY_INVALID = INT_MIN
 };
 
-#define I915_USER_PRIORITY_SHIFT 1
+#define I915_USER_PRIORITY_SHIFT 2
 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
 
 #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
 #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
 
-#define I915_PRIORITY_NEWCLIENT BIT(0)
+#define I915_PRIORITY_NEWCLIENT BIT(1)
+#define I915_PRIORITY_STALL BIT(0)
 
 struct i915_sched_attr {
 	/**
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index b9ea5cee249f..b26587d0c70d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -1080,6 +1080,12 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
 
 void intel_engines_sanitize(struct drm_i915_private *i915);
 
+static inline bool
+intel_engine_queue_is_empty(const struct intel_engine_cs *engine)
+{
+	return RB_EMPTY_ROOT(&engine->execlists.queue.rb_root);
+}
+
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 14/37] drm/i915: Refactor export_fence() after i915_vma_move_to_active()
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (11 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 13/37] drm/i915: Priority boost switching to an idle ring Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29 12:00   ` Tvrtko Ursulin
  2018-06-29  7:53 ` [PATCH 15/37] drm/i915: Export i915_request_skip() Chris Wilson
                   ` (27 subsequent siblings)
  40 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

Currently all callers are responsible for adding the vma to the active
timeline and then exporting its fence. Combine the two operations into
i915_vma_move_to_active() to move all the extra handling from the
callers to the single site.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    | 47 +++++++++----------
 drivers/gpu/drm/i915/selftests/huge_pages.c   |  4 --
 .../drm/i915/selftests/i915_gem_coherency.c   |  4 --
 .../gpu/drm/i915/selftests/i915_gem_context.c |  4 --
 4 files changed, 21 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c2dd9b4cdace..91f20445147f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1166,15 +1166,9 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 
 	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
 	i915_vma_move_to_active(batch, rq, 0);
-	reservation_object_lock(batch->resv, NULL);
-	reservation_object_add_excl_fence(batch->resv, &rq->fence);
-	reservation_object_unlock(batch->resv);
 	i915_vma_unpin(batch);
 
 	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-	reservation_object_lock(vma->resv, NULL);
-	reservation_object_add_excl_fence(vma->resv, &rq->fence);
-	reservation_object_unlock(vma->resv);
 
 	rq->batch = batch;
 
@@ -1771,25 +1765,6 @@ static int eb_relocate(struct i915_execbuffer *eb)
 	return eb_relocate_slow(eb);
 }
 
-static void eb_export_fence(struct i915_vma *vma,
-			    struct i915_request *rq,
-			    unsigned int flags)
-{
-	struct reservation_object *resv = vma->resv;
-
-	/*
-	 * Ignore errors from failing to allocate the new fence, we can't
-	 * handle an error right now. Worst case should be missed
-	 * synchronisation leading to rendering corruption.
-	 */
-	reservation_object_lock(resv, NULL);
-	if (flags & EXEC_OBJECT_WRITE)
-		reservation_object_add_excl_fence(resv, &rq->fence);
-	else if (reservation_object_reserve_shared(resv) == 0)
-		reservation_object_add_shared_fence(resv, &rq->fence);
-	reservation_object_unlock(resv);
-}
-
 static int eb_move_to_gpu(struct i915_execbuffer *eb)
 {
 	const unsigned int count = eb->buffer_count;
@@ -1844,7 +1819,6 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 		struct i915_vma *vma = eb->vma[i];
 
 		i915_vma_move_to_active(vma, eb->request, flags);
-		eb_export_fence(vma, eb->request, flags);
 
 		__eb_unreserve_vma(vma, flags);
 		vma->exec_flags = NULL;
@@ -1884,6 +1858,25 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 	return true;
 }
 
+static void export_fence(struct i915_vma *vma,
+			 struct i915_request *rq,
+			 unsigned int flags)
+{
+	struct reservation_object *resv = vma->resv;
+
+	/*
+	 * Ignore errors from failing to allocate the new fence, we can't
+	 * handle an error right now. Worst case should be missed
+	 * synchronisation leading to rendering corruption.
+	 */
+	reservation_object_lock(resv, NULL);
+	if (flags & EXEC_OBJECT_WRITE)
+		reservation_object_add_excl_fence(resv, &rq->fence);
+	else if (reservation_object_reserve_shared(resv) == 0)
+		reservation_object_add_shared_fence(resv, &rq->fence);
+	reservation_object_unlock(resv);
+}
+
 void i915_vma_move_to_active(struct i915_vma *vma,
 			     struct i915_request *rq,
 			     unsigned int flags)
@@ -1921,6 +1914,8 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 
 	if (flags & EXEC_OBJECT_NEEDS_FENCE)
 		i915_gem_active_set(&vma->last_fence, rq);
+
+	export_fence(vma, rq, flags);
 }
 
 static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index b5e87fcdcdae..358fc81f6c99 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -998,10 +998,6 @@ static int gpu_write(struct i915_vma *vma,
 
 	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 
-	reservation_object_lock(vma->resv, NULL);
-	reservation_object_add_excl_fence(vma->resv, &rq->fence);
-	reservation_object_unlock(vma->resv);
-
 err_request:
 	i915_request_add(rq);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index a4900091ae3d..11427aae0853 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -225,10 +225,6 @@ static int gpu_set(struct drm_i915_gem_object *obj,
 	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 	i915_vma_unpin(vma);
 
-	reservation_object_lock(obj->resv, NULL);
-	reservation_object_add_excl_fence(obj->resv, &rq->fence);
-	reservation_object_unlock(obj->resv);
-
 	i915_request_add(rq);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 3169b72180d0..a78f8bdb8222 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -179,10 +179,6 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	i915_vma_move_to_active(vma, rq, 0);
 	i915_vma_unpin(vma);
 
-	reservation_object_lock(obj->resv, NULL);
-	reservation_object_add_excl_fence(obj->resv, &rq->fence);
-	reservation_object_unlock(obj->resv);
-
 	i915_request_add(rq);
 
 	return 0;
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 15/37] drm/i915: Export i915_request_skip()
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (12 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 14/37] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29 12:10   ` Tvrtko Ursulin
  2018-06-29  7:53 ` [PATCH 16/37] drm/i915: Start returning an error from i915_vma_move_to_active() Chris Wilson
                   ` (26 subsequent siblings)
  40 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will want to start skipping requests on failing to
complete their payloads. So export the utility function current used to
make requests inoperable following a failed gpu reset.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c     | 25 +++----------------------
 drivers/gpu/drm/i915/i915_request.c | 21 +++++++++++++++++++++
 drivers/gpu/drm/i915/i915_request.h |  2 ++
 3 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c13d5b78a02e..8ae293c75ae9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3084,25 +3084,6 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
 	return err;
 }
 
-static void skip_request(struct i915_request *request)
-{
-	void *vaddr = request->ring->vaddr;
-	u32 head;
-
-	/* As this request likely depends on state from the lost
-	 * context, clear out all the user operations leaving the
-	 * breadcrumb at the end (so we get the fence notifications).
-	 */
-	head = request->head;
-	if (request->postfix < head) {
-		memset(vaddr + head, 0, request->ring->size - head);
-		head = 0;
-	}
-	memset(vaddr + head, 0, request->postfix - head);
-
-	dma_fence_set_error(&request->fence, -EIO);
-}
-
 static void engine_skip_context(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
@@ -3117,10 +3098,10 @@ static void engine_skip_context(struct i915_request *request)
 
 	list_for_each_entry_continue(request, &engine->timeline.requests, link)
 		if (request->gem_context == hung_ctx)
-			skip_request(request);
+			i915_request_skip(request, -EIO);
 
 	list_for_each_entry(request, &timeline->requests, link)
-		skip_request(request);
+		i915_request_skip(request, -EIO);
 
 	spin_unlock(&timeline->lock);
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
@@ -3163,7 +3144,7 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
 
 	if (stalled) {
 		i915_gem_context_mark_guilty(request->gem_context);
-		skip_request(request);
+		i915_request_skip(request, -EIO);
 
 		/* If this context is now banned, skip all pending requests. */
 		if (i915_gem_context_is_banned(request->gem_context))
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index d618e7127e88..51f28786b0e4 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -962,6 +962,27 @@ i915_request_await_object(struct i915_request *to,
 	return ret;
 }
 
+void i915_request_skip(struct i915_request *rq, int error)
+{
+	void *vaddr = rq->ring->vaddr;
+	u32 head;
+
+	GEM_BUG_ON(!IS_ERR_VALUE((long)error));
+	dma_fence_set_error(&rq->fence, error);
+
+	/*
+	 * As this request likely depends on state from the lost
+	 * context, clear out all the user operations leaving the
+	 * breadcrumb at the end (so we get the fence notifications).
+	 */
+	head = rq->infix;
+	if (rq->postfix < head) {
+		memset(vaddr + head, 0, rq->ring->size - head);
+		head = 0;
+	}
+	memset(vaddr + head, 0, rq->postfix - head);
+}
+
 /*
  * NB: This function is not allowed to fail. Doing so would mean the the
  * request is not being tracked for completion but the work itself is
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 7ee220ded9c9..a355a081485f 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -258,6 +258,8 @@ void i915_request_add(struct i915_request *rq);
 void __i915_request_submit(struct i915_request *request);
 void i915_request_submit(struct i915_request *request);
 
+void i915_request_skip(struct i915_request *request, int error);
+
 void __i915_request_unsubmit(struct i915_request *request);
 void i915_request_unsubmit(struct i915_request *request);
 
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 16/37] drm/i915: Start returning an error from i915_vma_move_to_active()
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (13 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 15/37] drm/i915: Export i915_request_skip() Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29 12:17   ` Tvrtko Ursulin
  2018-06-29  7:53 ` [PATCH 17/37] drm/i915: Track vma activity per fence.context, not per engine Chris Wilson
                   ` (25 subsequent siblings)
  40 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

Handling such a late error in request construction is tricky, but to
accommodate future patches which may allocate here, we potentially could
err. To handle the error after already adjusting global state to track
the new request, we must finish and submit the request. But we don't
want to use the request as not everything is being tracked by it, so we
opt to cancel the commands inside the request.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gvt/scheduler.c          |  6 ++++-
 drivers/gpu/drm/i915/i915_drv.h               |  6 ++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    | 25 +++++++++++++------
 drivers/gpu/drm/i915/i915_gem_render_state.c  |  2 +-
 drivers/gpu/drm/i915/selftests/huge_pages.c   |  9 +++++--
 .../drm/i915/selftests/i915_gem_coherency.c   |  4 +--
 .../gpu/drm/i915/selftests/i915_gem_context.c | 12 +++++++--
 .../gpu/drm/i915/selftests/i915_gem_object.c  |  7 +++---
 drivers/gpu/drm/i915/selftests/i915_request.c |  8 ++++--
 .../gpu/drm/i915/selftests/intel_hangcheck.c  | 11 ++++++--
 drivers/gpu/drm/i915/selftests/intel_lrc.c    | 11 ++++++--
 .../drm/i915/selftests/intel_workarounds.c    |  5 +++-
 12 files changed, 78 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 928818f218f7..b0e566956b8d 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -476,7 +476,11 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 			i915_gem_obj_finish_shmem_access(bb->obj);
 			bb->accessing = false;
 
-			i915_vma_move_to_active(bb->vma, workload->req, 0);
+			ret = i915_vma_move_to_active(bb->vma,
+						      workload->req,
+						      0);
+			if (ret)
+				goto err;
 		}
 	}
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index af783a32abd8..33e5ac8bf0b8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3104,9 +3104,9 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
 }
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
-void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct i915_request *rq,
-			     unsigned int flags);
+int __must_check i915_vma_move_to_active(struct i915_vma *vma,
+					 struct i915_request *rq,
+					 unsigned int flags);
 int i915_gem_dumb_create(struct drm_file *file_priv,
 			 struct drm_device *dev,
 			 struct drm_mode_create_dumb *args);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 91f20445147f..97136e4ce91d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1165,12 +1165,16 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 		goto err_request;
 
 	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
-	i915_vma_move_to_active(batch, rq, 0);
-	i915_vma_unpin(batch);
+	err = i915_vma_move_to_active(batch, rq, 0);
+	if (err)
+		goto skip_request;
 
-	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	if (err)
+		goto skip_request;
 
 	rq->batch = batch;
+	i915_vma_unpin(batch);
 
 	cache->rq = rq;
 	cache->rq_cmd = cmd;
@@ -1179,6 +1183,8 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	/* Return with batch mapping (cmd) still pinned */
 	return 0;
 
+skip_request:
+	i915_request_skip(rq, err);
 err_request:
 	i915_request_add(rq);
 err_unpin:
@@ -1818,7 +1824,11 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 		unsigned int flags = eb->flags[i];
 		struct i915_vma *vma = eb->vma[i];
 
-		i915_vma_move_to_active(vma, eb->request, flags);
+		err = i915_vma_move_to_active(vma, eb->request, flags);
+		if (unlikely(err)) {
+			i915_request_skip(eb->request, err);
+			return err;
+		}
 
 		__eb_unreserve_vma(vma, flags);
 		vma->exec_flags = NULL;
@@ -1877,9 +1887,9 @@ static void export_fence(struct i915_vma *vma,
 	reservation_object_unlock(resv);
 }
 
-void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct i915_request *rq,
-			     unsigned int flags)
+int i915_vma_move_to_active(struct i915_vma *vma,
+			    struct i915_request *rq,
+			    unsigned int flags)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
 	const unsigned int idx = rq->engine->id;
@@ -1916,6 +1926,7 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 		i915_gem_active_set(&vma->last_fence, rq);
 
 	export_fence(vma, rq, flags);
+	return 0;
 }
 
 static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 3210cedfa46c..90baf9086d0a 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -222,7 +222,7 @@ int i915_gem_render_state_emit(struct i915_request *rq)
 			goto err_unpin;
 	}
 
-	i915_vma_move_to_active(so.vma, rq, 0);
+	err = i915_vma_move_to_active(so.vma, rq, 0);
 err_unpin:
 	i915_vma_unpin(so.vma);
 err_vma:
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index 358fc81f6c99..d33e20940e0a 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -985,7 +985,10 @@ static int gpu_write(struct i915_vma *vma,
 		goto err_request;
 	}
 
-	i915_vma_move_to_active(batch, rq, 0);
+	err = i915_vma_move_to_active(batch, rq, 0);
+	if (err)
+		goto err_request;
+
 	i915_gem_object_set_active_reference(batch->obj);
 	i915_vma_unpin(batch);
 	i915_vma_close(batch);
@@ -996,7 +999,9 @@ static int gpu_write(struct i915_vma *vma,
 	if (err)
 		goto err_request;
 
-	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	if (err)
+		i915_request_skip(rq, err);
 
 err_request:
 	i915_request_add(rq);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index 11427aae0853..328585459c67 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -222,12 +222,12 @@ static int gpu_set(struct drm_i915_gem_object *obj,
 	}
 	intel_ring_advance(rq, cs);
 
-	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 	i915_vma_unpin(vma);
 
 	i915_request_add(rq);
 
-	return 0;
+	return err;
 }
 
 static bool always_valid(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index a78f8bdb8222..5bbffe9fcdda 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -171,18 +171,26 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	if (err)
 		goto err_request;
 
-	i915_vma_move_to_active(batch, rq, 0);
+	err = i915_vma_move_to_active(batch, rq, 0);
+	if (err)
+		goto skip_request;
+
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	if (err)
+		goto skip_request;
+
 	i915_gem_object_set_active_reference(batch->obj);
 	i915_vma_unpin(batch);
 	i915_vma_close(batch);
 
-	i915_vma_move_to_active(vma, rq, 0);
 	i915_vma_unpin(vma);
 
 	i915_request_add(rq);
 
 	return 0;
 
+skip_request:
+	i915_request_skip(rq, err);
 err_request:
 	i915_request_add(rq);
 err_batch:
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 549707b9d738..eb863b45b7c7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -454,12 +454,13 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
 		return PTR_ERR(rq);
 	}
 
-	i915_vma_move_to_active(vma, rq, 0);
+	err = i915_vma_move_to_active(vma, rq, 0);
 	i915_request_add(rq);
 
-	i915_gem_object_set_active_reference(obj);
+	__i915_gem_object_release_unless_active(obj);
 	i915_vma_unpin(vma);
-	return 0;
+
+	return err;
 }
 
 static bool assert_mmap_offset(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 521ae4a90ddf..c27f77a24ce0 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -678,7 +678,9 @@ static int live_all_engines(void *arg)
 			i915_gem_object_set_active_reference(batch->obj);
 		}
 
-		i915_vma_move_to_active(batch, request[id], 0);
+		err = i915_vma_move_to_active(batch, request[id], 0);
+		GEM_BUG_ON(err);
+
 		i915_request_get(request[id]);
 		i915_request_add(request[id]);
 	}
@@ -788,7 +790,9 @@ static int live_sequential_engines(void *arg)
 		GEM_BUG_ON(err);
 		request[id]->batch = batch;
 
-		i915_vma_move_to_active(batch, request[id], 0);
+		err = i915_vma_move_to_active(batch, request[id], 0);
+		GEM_BUG_ON(err);
+
 		i915_gem_object_set_active_reference(batch->obj);
 		i915_vma_get(batch);
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index fe7d3190ebfe..e11df2743704 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -130,13 +130,19 @@ static int emit_recurse_batch(struct hang *h,
 	if (err)
 		goto unpin_vma;
 
-	i915_vma_move_to_active(vma, rq, 0);
+	err = i915_vma_move_to_active(vma, rq, 0);
+	if (err)
+		goto unpin_hws;
+
 	if (!i915_gem_object_has_active_reference(vma->obj)) {
 		i915_gem_object_get(vma->obj);
 		i915_gem_object_set_active_reference(vma->obj);
 	}
 
-	i915_vma_move_to_active(hws, rq, 0);
+	err = i915_vma_move_to_active(hws, rq, 0);
+	if (err)
+		goto unpin_hws;
+
 	if (!i915_gem_object_has_active_reference(hws->obj)) {
 		i915_gem_object_get(hws->obj);
 		i915_gem_object_set_active_reference(hws->obj);
@@ -205,6 +211,7 @@ static int emit_recurse_batch(struct hang *h,
 
 	err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
 
+unpin_hws:
 	i915_vma_unpin(hws);
 unpin_vma:
 	i915_vma_unpin(vma);
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 90841415cf7a..146908b90787 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -104,13 +104,19 @@ static int emit_recurse_batch(struct spinner *spin,
 	if (err)
 		goto unpin_vma;
 
-	i915_vma_move_to_active(vma, rq, 0);
+	err = i915_vma_move_to_active(vma, rq, 0);
+	if (err)
+		goto unpin_hws;
+
 	if (!i915_gem_object_has_active_reference(vma->obj)) {
 		i915_gem_object_get(vma->obj);
 		i915_gem_object_set_active_reference(vma->obj);
 	}
 
-	i915_vma_move_to_active(hws, rq, 0);
+	err = i915_vma_move_to_active(hws, rq, 0);
+	if (err)
+		goto unpin_hws;
+
 	if (!i915_gem_object_has_active_reference(hws->obj)) {
 		i915_gem_object_get(hws->obj);
 		i915_gem_object_set_active_reference(hws->obj);
@@ -134,6 +140,7 @@ static int emit_recurse_batch(struct spinner *spin,
 
 	err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0);
 
+unpin_hws:
 	i915_vma_unpin(hws);
 unpin_vma:
 	i915_vma_unpin(vma);
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index e1ea2d2bedd2..c100153cb494 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -49,6 +49,10 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 		goto err_pin;
 	}
 
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	if (err)
+		goto err_req;
+
 	srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
 	if (INTEL_GEN(ctx->i915) >= 8)
 		srm++;
@@ -67,7 +71,6 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 	}
 	intel_ring_advance(rq, cs);
 
-	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 	reservation_object_lock(vma->resv, NULL);
 	reservation_object_add_excl_fence(vma->resv, &rq->fence);
 	reservation_object_unlock(vma->resv);
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 17/37] drm/i915: Track vma activity per fence.context, not per engine
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (14 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 16/37] drm/i915: Start returning an error from i915_vma_move_to_active() Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29 14:54   ` Tvrtko Ursulin
  2018-06-29 22:03   ` [PATCH v2] " Chris Wilson
  2018-06-29  7:53 ` [PATCH 18/37] drm/i915: Track the last-active inside the i915_vma Chris Wilson
                   ` (24 subsequent siblings)
  40 siblings, 2 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will want to be able to use more flexible request
timelines that can hop between engines. From the vma pov, we can then
not rely on the binding of this request to an engine and so can not
ensure that different requests are ordered through a per-engine
timeline, and so we must track activity of all timelines. (We track
activity on the vma itself to prevent unbinding from HW before the HW
has finished accessing it.)

For now, let's just ignore the potential issue with trying to use 64b
indices with radixtrees on 32b machines, it's unlikely to be a problem
in practice...

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            |   3 -
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  61 -------
 drivers/gpu/drm/i915/i915_gem_gtt.c        |   4 +-
 drivers/gpu/drm/i915/i915_gpu_error.c      |  14 +-
 drivers/gpu/drm/i915/i915_gpu_error.h      |   2 +-
 drivers/gpu/drm/i915/i915_request.h        |   1 +
 drivers/gpu/drm/i915/i915_vma.c            | 180 ++++++++++++++++++---
 drivers/gpu/drm/i915/i915_vma.h            |  42 ++---
 8 files changed, 173 insertions(+), 134 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 33e5ac8bf0b8..a786a3b6686f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3104,9 +3104,6 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
 }
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
-int __must_check i915_vma_move_to_active(struct i915_vma *vma,
-					 struct i915_request *rq,
-					 unsigned int flags);
 int i915_gem_dumb_create(struct drm_file *file_priv,
 			 struct drm_device *dev,
 			 struct drm_mode_create_dumb *args);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 97136e4ce91d..3f0c612d42e7 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1868,67 +1868,6 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 	return true;
 }
 
-static void export_fence(struct i915_vma *vma,
-			 struct i915_request *rq,
-			 unsigned int flags)
-{
-	struct reservation_object *resv = vma->resv;
-
-	/*
-	 * Ignore errors from failing to allocate the new fence, we can't
-	 * handle an error right now. Worst case should be missed
-	 * synchronisation leading to rendering corruption.
-	 */
-	reservation_object_lock(resv, NULL);
-	if (flags & EXEC_OBJECT_WRITE)
-		reservation_object_add_excl_fence(resv, &rq->fence);
-	else if (reservation_object_reserve_shared(resv) == 0)
-		reservation_object_add_shared_fence(resv, &rq->fence);
-	reservation_object_unlock(resv);
-}
-
-int i915_vma_move_to_active(struct i915_vma *vma,
-			    struct i915_request *rq,
-			    unsigned int flags)
-{
-	struct drm_i915_gem_object *obj = vma->obj;
-	const unsigned int idx = rq->engine->id;
-
-	lockdep_assert_held(&rq->i915->drm.struct_mutex);
-	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-
-	/*
-	 * Add a reference if we're newly entering the active list.
-	 * The order in which we add operations to the retirement queue is
-	 * vital here: mark_active adds to the start of the callback list,
-	 * such that subsequent callbacks are called first. Therefore we
-	 * add the active reference first and queue for it to be dropped
-	 * *last*.
-	 */
-	if (!i915_vma_is_active(vma))
-		obj->active_count++;
-	i915_vma_set_active(vma, idx);
-	i915_gem_active_set(&vma->last_read[idx], rq);
-	list_move_tail(&vma->vm_link, &vma->vm->active_list);
-
-	obj->write_domain = 0;
-	if (flags & EXEC_OBJECT_WRITE) {
-		obj->write_domain = I915_GEM_DOMAIN_RENDER;
-
-		if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
-			i915_gem_active_set(&obj->frontbuffer_write, rq);
-
-		obj->read_domains = 0;
-	}
-	obj->read_domains |= I915_GEM_GPU_DOMAINS;
-
-	if (flags & EXEC_OBJECT_NEEDS_FENCE)
-		i915_gem_active_set(&vma->last_fence, rq);
-
-	export_fence(vma, rq, flags);
-	return 0;
-}
-
 static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
 {
 	u32 *cs;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index f46d873a7530..acc779770e47 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2006,7 +2006,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 	struct drm_i915_private *i915 = ppgtt->base.vm.i915;
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct i915_vma *vma;
-	int i;
 
 	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(size > ggtt->vm.total);
@@ -2015,8 +2014,7 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 	if (!vma)
 		return ERR_PTR(-ENOMEM);
 
-	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
-		init_request_active(&vma->last_read[i], NULL);
+	INIT_RADIX_TREE(&vma->active_rt, GFP_KERNEL);
 	init_request_active(&vma->last_fence, NULL);
 
 	vma->vm = &ggtt->vm;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index df524c9cad40..8c81cf3aa182 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -335,21 +335,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
 				struct drm_i915_error_buffer *err,
 				int count)
 {
-	int i;
-
 	err_printf(m, "%s [%d]:\n", name, count);
 
 	while (count--) {
-		err_printf(m, "    %08x_%08x %8u %02x %02x [ ",
+		err_printf(m, "    %08x_%08x %8u %02x %02x %02x",
 			   upper_32_bits(err->gtt_offset),
 			   lower_32_bits(err->gtt_offset),
 			   err->size,
 			   err->read_domains,
-			   err->write_domain);
-		for (i = 0; i < I915_NUM_ENGINES; i++)
-			err_printf(m, "%02x ", err->rseqno[i]);
-
-		err_printf(m, "] %02x", err->wseqno);
+			   err->write_domain,
+			   err->wseqno);
 		err_puts(m, tiling_flag(err->tiling));
 		err_puts(m, dirty_flag(err->dirty));
 		err_puts(m, purgeable_flag(err->purgeable));
@@ -1021,13 +1016,10 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 		       struct i915_vma *vma)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
-	int i;
 
 	err->size = obj->base.size;
 	err->name = obj->base.name;
 
-	for (i = 0; i < I915_NUM_ENGINES; i++)
-		err->rseqno[i] = __active_get_seqno(&vma->last_read[i]);
 	err->wseqno = __active_get_seqno(&obj->frontbuffer_write);
 	err->engine = __active_get_engine_id(&obj->frontbuffer_write);
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 58910f1dc67c..f893a4e8b783 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -177,7 +177,7 @@ struct i915_gpu_state {
 	struct drm_i915_error_buffer {
 		u32 size;
 		u32 name;
-		u32 rseqno[I915_NUM_ENGINES], wseqno;
+		u32 wseqno;
 		u64 gtt_offset;
 		u32 read_domains;
 		u32 write_domain;
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index a355a081485f..e1c9365dfefb 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -380,6 +380,7 @@ static inline void
 init_request_active(struct i915_gem_active *active,
 		    i915_gem_retire_fn retire)
 {
+	RCU_INIT_POINTER(active->request, NULL);
 	INIT_LIST_HEAD(&active->link);
 	active->retire = retire ?: i915_gem_retire_noop;
 }
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index d0e606e9b27a..23852417dcbd 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -63,18 +63,18 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
 
 #endif
 
+struct i915_vma_active {
+	struct i915_gem_active base;
+	struct i915_vma *vma;
+};
+
 static void
-i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
+__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
 {
-	const unsigned int idx = rq->engine->id;
-	struct i915_vma *vma =
-		container_of(active, struct i915_vma, last_read[idx]);
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
-
-	i915_vma_clear_active(vma, idx);
-	if (i915_vma_is_active(vma))
+	GEM_BUG_ON(!i915_vma_is_active(vma));
+	if (--vma->active_count)
 		return;
 
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
@@ -108,6 +108,19 @@ i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
 	}
 }
 
+static void
+i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+	struct i915_vma_active *active =
+		container_of(base, typeof(*active), base);
+	struct i915_vma *vma = active->vma;
+
+	GEM_BUG_ON(base != radix_tree_lookup(&vma->active_rt,
+					     rq->fence.context));
+
+	__i915_vma_retire(vma, rq);
+}
+
 static struct i915_vma *
 vma_create(struct drm_i915_gem_object *obj,
 	   struct i915_address_space *vm,
@@ -115,7 +128,6 @@ vma_create(struct drm_i915_gem_object *obj,
 {
 	struct i915_vma *vma;
 	struct rb_node *rb, **p;
-	int i;
 
 	/* The aliasing_ppgtt should never be used directly! */
 	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
@@ -124,8 +136,8 @@ vma_create(struct drm_i915_gem_object *obj,
 	if (vma == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
-		init_request_active(&vma->last_read[i], i915_vma_retire);
+	INIT_RADIX_TREE(&vma->active_rt, GFP_KERNEL);
+
 	init_request_active(&vma->last_fence, NULL);
 	vma->vm = vm;
 	vma->ops = &vm->vma_ops;
@@ -778,13 +790,12 @@ void i915_vma_reopen(struct i915_vma *vma)
 static void __i915_vma_destroy(struct i915_vma *vma)
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
-	int i;
+	struct radix_tree_iter iter;
+	void __rcu **slot;
 
 	GEM_BUG_ON(vma->node.allocated);
 	GEM_BUG_ON(vma->fence);
 
-	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
-		GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i]));
 	GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
 
 	list_del(&vma->obj_link);
@@ -795,6 +806,17 @@ static void __i915_vma_destroy(struct i915_vma *vma)
 	if (!i915_vma_is_ggtt(vma))
 		i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
 
+	rcu_read_lock();
+	radix_tree_for_each_slot(slot, &vma->active_rt, &iter, 0) {
+		struct i915_vma_active *active = rcu_dereference_raw(*slot);
+
+		GEM_BUG_ON(i915_gem_active_isset(&active->base));
+		kfree(active);
+
+		radix_tree_delete(&vma->active_rt, iter.index);
+	}
+	rcu_read_unlock();
+
 	kmem_cache_free(i915->vmas, vma);
 }
 
@@ -859,9 +881,111 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
 		list_del(&vma->obj->userfault_link);
 }
 
+static void export_fence(struct i915_vma *vma,
+			 struct i915_request *rq,
+			 unsigned int flags)
+{
+	struct reservation_object *resv = vma->resv;
+
+	/*
+	 * Ignore errors from failing to allocate the new fence, we can't
+	 * handle an error right now. Worst case should be missed
+	 * synchronisation leading to rendering corruption.
+	 */
+	reservation_object_lock(resv, NULL);
+	if (flags & EXEC_OBJECT_WRITE)
+		reservation_object_add_excl_fence(resv, &rq->fence);
+	else if (reservation_object_reserve_shared(resv) == 0)
+		reservation_object_add_shared_fence(resv, &rq->fence);
+	reservation_object_unlock(resv);
+}
+
+static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
+{
+	struct i915_vma_active *active;
+	int err;
+
+	/*
+	 * XXX Note that the radix_tree uses unsigned longs for it indices,
+	 * a problem for us on i386 with 32bit longs. However, the likelihood
+	 * of 2 timelines being used on the same VMA aliasing is minimal,
+	 * and further reduced by that both timelines must be active
+	 * simultaneously to confuse us.
+	 */
+	active = radix_tree_lookup(&vma->active_rt, idx);
+	if (likely(active)) {
+		GEM_BUG_ON(i915_gem_active_isset(&active->base) &&
+			   idx != i915_gem_active_peek(&active->base,
+						       &vma->vm->i915->drm.struct_mutex)->fence.context);
+		return &active->base;
+	}
+
+	active = kmalloc(sizeof(*active), GFP_KERNEL);
+	if (unlikely(!active))
+		return ERR_PTR(-ENOMEM);
+
+	init_request_active(&active->base, i915_vma_retire);
+	active->vma = vma;
+
+	err = radix_tree_insert(&vma->active_rt, idx, active);
+	if (unlikely(err)) {
+		kfree(active);
+		return ERR_PTR(err);
+	}
+
+	return &active->base;
+}
+
+int i915_vma_move_to_active(struct i915_vma *vma,
+			    struct i915_request *rq,
+			    unsigned int flags)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+	struct i915_gem_active *active;
+
+	lockdep_assert_held(&rq->i915->drm.struct_mutex);
+	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+
+	active = lookup_active(vma, rq->fence.context);
+	if (IS_ERR(active))
+		return PTR_ERR(active);
+
+	/*
+	 * Add a reference if we're newly entering the active list.
+	 * The order in which we add operations to the retirement queue is
+	 * vital here: mark_active adds to the start of the callback list,
+	 * such that subsequent callbacks are called first. Therefore we
+	 * add the active reference first and queue for it to be dropped
+	 * *last*.
+	 */
+	if (!i915_gem_active_isset(active) && !vma->active_count++) {
+		list_move_tail(&vma->vm_link, &vma->vm->active_list);
+		obj->active_count++;
+	}
+	i915_gem_active_set(active, rq);
+	GEM_BUG_ON(!i915_vma_is_active(vma));
+	GEM_BUG_ON(!obj->active_count);
+
+	obj->write_domain = 0;
+	if (flags & EXEC_OBJECT_WRITE) {
+		obj->write_domain = I915_GEM_DOMAIN_RENDER;
+
+		if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
+			i915_gem_active_set(&obj->frontbuffer_write, rq);
+
+		obj->read_domains = 0;
+	}
+	obj->read_domains |= I915_GEM_GPU_DOMAINS;
+
+	if (flags & EXEC_OBJECT_NEEDS_FENCE)
+		i915_gem_active_set(&vma->last_fence, rq);
+
+	export_fence(vma, rq, flags);
+	return 0;
+}
+
 int i915_vma_unbind(struct i915_vma *vma)
 {
-	unsigned long active;
 	int ret;
 
 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
@@ -871,9 +995,9 @@ int i915_vma_unbind(struct i915_vma *vma)
 	 * have side-effects such as unpinning or even unbinding this vma.
 	 */
 	might_sleep();
-	active = i915_vma_get_active(vma);
-	if (active) {
-		int idx;
+	if (i915_vma_is_active(vma)) {
+		struct radix_tree_iter iter;
+		void __rcu **slot;
 
 		/*
 		 * When a closed VMA is retired, it is unbound - eek.
@@ -890,18 +1014,24 @@ int i915_vma_unbind(struct i915_vma *vma)
 		 */
 		__i915_vma_pin(vma);
 
-		for_each_active(active, idx) {
-			ret = i915_gem_active_retire(&vma->last_read[idx],
+		rcu_read_lock();
+		radix_tree_for_each_slot(slot, &vma->active_rt, &iter, 0) {
+			struct i915_vma_active *active =
+				rcu_dereference_raw(*slot);
+			rcu_read_unlock();
+
+			ret = i915_gem_active_retire(&active->base,
 						     &vma->vm->i915->drm.struct_mutex);
 			if (ret)
-				break;
-		}
+				goto unpin;
 
-		if (!ret) {
-			ret = i915_gem_active_retire(&vma->last_fence,
-						     &vma->vm->i915->drm.struct_mutex);
+			rcu_read_lock();
 		}
+		rcu_read_unlock();
 
+		ret = i915_gem_active_retire(&vma->last_fence,
+					     &vma->vm->i915->drm.struct_mutex);
+unpin:
 		__i915_vma_unpin(vma);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 66a228931517..94fdf4917e95 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -26,6 +26,7 @@
 #define __I915_VMA_H__
 
 #include <linux/io-mapping.h>
+#include <linux/radix-tree.h>
 
 #include <drm/drm_mm.h>
 
@@ -94,8 +95,8 @@ struct i915_vma {
 #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
 #define I915_VMA_GGTT_WRITE	BIT(12)
 
-	unsigned int active;
-	struct i915_gem_active last_read[I915_NUM_ENGINES];
+	unsigned int active_count;
+	struct radix_tree_root active_rt;
 	struct i915_gem_active last_fence;
 
 	/**
@@ -138,6 +139,15 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 
 void i915_vma_unpin_and_release(struct i915_vma **p_vma);
 
+static inline bool i915_vma_is_active(struct i915_vma *vma)
+{
+	return vma->active_count;
+}
+
+int __must_check i915_vma_move_to_active(struct i915_vma *vma,
+					 struct i915_request *rq,
+					 unsigned int flags);
+
 static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
 {
 	return vma->flags & I915_VMA_GGTT;
@@ -187,34 +197,6 @@ static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
 	return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
 }
 
-static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
-{
-	return vma->active;
-}
-
-static inline bool i915_vma_is_active(const struct i915_vma *vma)
-{
-	return i915_vma_get_active(vma);
-}
-
-static inline void i915_vma_set_active(struct i915_vma *vma,
-				       unsigned int engine)
-{
-	vma->active |= BIT(engine);
-}
-
-static inline void i915_vma_clear_active(struct i915_vma *vma,
-					 unsigned int engine)
-{
-	vma->active &= ~BIT(engine);
-}
-
-static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
-					      unsigned int engine)
-{
-	return vma->active & BIT(engine);
-}
-
 static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 18/37] drm/i915: Track the last-active inside the i915_vma
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (15 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 17/37] drm/i915: Track vma activity per fence.context, not per engine Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29 22:01   ` [PATCH v2] " Chris Wilson
  2018-06-29  7:53 ` [PATCH 19/37] drm/i915: Stop tracking MRU activity on VMA Chris Wilson
                   ` (23 subsequent siblings)
  40 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

Using a VMA on more than one timeline concurrently is the exception
rather than the rule (using it concurrently on multiple engines). As we
expect to only use one active tracker, store the most recently used
tracker inside the i915_vma itself and only fallback to the radixtree if
we need a second or more concurrent active trackers.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_vma.c | 46 ++++++++++++++++++++++++++-------
 drivers/gpu/drm/i915/i915_vma.h |  1 +
 2 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 23852417dcbd..42b9e0659945 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -121,6 +121,12 @@ i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
 	__i915_vma_retire(vma, rq);
 }
 
+static void
+i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+	__i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
+}
+
 static struct i915_vma *
 vma_create(struct drm_i915_gem_object *obj,
 	   struct i915_address_space *vm,
@@ -138,6 +144,7 @@ vma_create(struct drm_i915_gem_object *obj,
 
 	INIT_RADIX_TREE(&vma->active_rt, GFP_KERNEL);
 
+	init_request_active(&vma->last_active, i915_vma_last_retire);
 	init_request_active(&vma->last_fence, NULL);
 	vma->vm = vm;
 	vma->ops = &vm->vma_ops;
@@ -903,8 +910,14 @@ static void export_fence(struct i915_vma *vma,
 static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
 {
 	struct i915_vma_active *active;
+	struct i915_request *old;
 	int err;
 
+	old = i915_gem_active_raw(&vma->last_active,
+				  &vma->vm->i915->drm.struct_mutex);
+	if (!old || old->fence.context == idx)
+		goto out;
+
 	/*
 	 * XXX Note that the radix_tree uses unsigned longs for it indices,
 	 * a problem for us on i386 with 32bit longs. However, the likelihood
@@ -912,13 +925,9 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
 	 * and further reduced by that both timelines must be active
 	 * simultaneously to confuse us.
 	 */
-	active = radix_tree_lookup(&vma->active_rt, idx);
-	if (likely(active)) {
-		GEM_BUG_ON(i915_gem_active_isset(&active->base) &&
-			   idx != i915_gem_active_peek(&active->base,
-						       &vma->vm->i915->drm.struct_mutex)->fence.context);
-		return &active->base;
-	}
+	active = radix_tree_lookup(&vma->active_rt, old->fence.context);
+	if (likely(active))
+		goto replace;
 
 	active = kmalloc(sizeof(*active), GFP_KERNEL);
 	if (unlikely(!active))
@@ -927,13 +936,27 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
 	init_request_active(&active->base, i915_vma_retire);
 	active->vma = vma;
 
-	err = radix_tree_insert(&vma->active_rt, idx, active);
+	err = radix_tree_insert(&vma->active_rt, old->fence.context, active);
 	if (unlikely(err)) {
 		kfree(active);
 		return ERR_PTR(err);
 	}
 
-	return &active->base;
+replace:
+	if (i915_gem_active_isset(&active->base)) {
+		GEM_BUG_ON(old->fence.context !=
+			   i915_gem_active_raw(&active->base,
+					       &vma->vm->i915->drm.struct_mutex)->fence.context);
+		__list_del_entry(&active->base.link);
+		vma->active_count--;
+		GEM_BUG_ON(!vma->active_count);
+	}
+	GEM_BUG_ON(list_empty(&vma->last_active.link));
+	list_replace_init(&vma->last_active.link, &active->base.link);
+	active->base.request = fetch_and_zero(&vma->last_active.request);
+
+out:
+	return &vma->last_active;
 }
 
 int i915_vma_move_to_active(struct i915_vma *vma,
@@ -1014,6 +1037,11 @@ int i915_vma_unbind(struct i915_vma *vma)
 		 */
 		__i915_vma_pin(vma);
 
+		ret = i915_gem_active_retire(&vma->last_active,
+					     &vma->vm->i915->drm.struct_mutex);
+		if (ret)
+			goto unpin;
+
 		rcu_read_lock();
 		radix_tree_for_each_slot(slot, &vma->active_rt, &iter, 0) {
 			struct i915_vma_active *active =
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 94fdf4917e95..1d3080603a18 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -97,6 +97,7 @@ struct i915_vma {
 
 	unsigned int active_count;
 	struct radix_tree_root active_rt;
+	struct i915_gem_active last_active;
 	struct i915_gem_active last_fence;
 
 	/**
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 19/37] drm/i915: Stop tracking MRU activity on VMA
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (16 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 18/37] drm/i915: Track the last-active inside the i915_vma Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 20/37] drm/i915: Introduce the i915_user_extension_method Chris Wilson
                   ` (22 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

Our goal is to remove struct_mutex and replace it with fine grained
locking. One of the thorny issues is our eviction logic for reclaiming
space for an execbuffer (or GTT mmaping, among a few other examples).
While eviction itself is easy to move under a per-VM mutex, performing
the activity tracking is less agreeable. One solution is not to do any
MRU tracking and do a simple coarse evaluation during eviction of
active/inactive, with a loose temporal ordering of last
insertion/evaluation. That keeps all the locking constrained to when we
are manipulating the VM itself, neatly avoiding the tricky handling of
possible recursive locking during execbuf and elsewhere.

Note that discarding the MRU is unlikely to impact upon our efficiency
to reclaim VM space (where we think a LRU model is best) as our
current strategy is to use random idle replacement first before doing
a search, and over time the use of softpinned 48b per-ppGTT is growing
(thereby eliminating any need to perform any eviction searches, in
theory at least).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c               | 10 +---
 drivers/gpu/drm/i915/i915_gem_evict.c         | 56 ++++++++++---------
 drivers/gpu/drm/i915/i915_gem_gtt.c           | 15 ++---
 drivers/gpu/drm/i915/i915_gem_gtt.h           | 26 +--------
 drivers/gpu/drm/i915/i915_gem_shrinker.c      |  8 ++-
 drivers/gpu/drm/i915/i915_gem_stolen.c        |  2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c         | 37 ++++++------
 drivers/gpu/drm/i915/i915_vma.c               |  9 +--
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  2 +-
 drivers/gpu/drm/i915/selftests/mock_gtt.c     |  3 +-
 11 files changed, 69 insertions(+), 103 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8ae293c75ae9..6d9824323fab 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -251,10 +251,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 
 	pinned = ggtt->vm.reserved;
 	mutex_lock(&dev->struct_mutex);
-	list_for_each_entry(vma, &ggtt->vm.active_list, vm_link)
-		if (i915_vma_is_pinned(vma))
-			pinned += vma->node.size;
-	list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link)
+	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
 		if (i915_vma_is_pinned(vma))
 			pinned += vma->node.size;
 	mutex_unlock(&dev->struct_mutex);
@@ -1686,13 +1683,10 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 
 	for_each_ggtt_vma(vma, obj) {
-		if (i915_vma_is_active(vma))
-			continue;
-
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
-		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 	}
 
 	i915 = to_i915(obj->base.dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 54814a196ee4..8bc6fc66cea2 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -126,14 +126,10 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	struct drm_i915_private *dev_priv = vm->i915;
 	struct drm_mm_scan scan;
 	struct list_head eviction_list;
-	struct list_head *phases[] = {
-		&vm->inactive_list,
-		&vm->active_list,
-		NULL,
-	}, **phase;
 	struct i915_vma *vma, *next;
 	struct drm_mm_node *node;
 	enum drm_mm_insert_mode mode;
+	struct i915_vma *active;
 	int ret;
 
 	lockdep_assert_held(&vm->i915->drm.struct_mutex);
@@ -169,17 +165,31 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	 */
 	if (!(flags & PIN_NONBLOCK))
 		i915_retire_requests(dev_priv);
-	else
-		phases[1] = NULL;
 
 search_again:
+	active = NULL;
 	INIT_LIST_HEAD(&eviction_list);
-	phase = phases;
-	do {
-		list_for_each_entry(vma, *phase, vm_link)
-			if (mark_free(&scan, vma, flags, &eviction_list))
-				goto found;
-	} while (*++phase);
+	list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) {
+		if (i915_vma_is_active(vma)) {
+			if (vma == active) {
+				if (flags & PIN_NONBLOCK)
+					break;
+
+				active = ERR_PTR(-EAGAIN);
+			}
+
+			if (active != ERR_PTR(-EAGAIN)) {
+				if (!active)
+					active = vma;
+
+				list_move_tail(&vma->vm_link, &vm->bound_list);
+				continue;
+			}
+		}
+
+		if (mark_free(&scan, vma, flags, &eviction_list))
+			goto found;
+	}
 
 	/* Nothing found, clean up and bail out! */
 	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
@@ -388,11 +398,6 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
  */
 int i915_gem_evict_vm(struct i915_address_space *vm)
 {
-	struct list_head *phases[] = {
-		&vm->inactive_list,
-		&vm->active_list,
-		NULL
-	}, **phase;
 	struct list_head eviction_list;
 	struct i915_vma *vma, *next;
 	int ret;
@@ -412,16 +417,13 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 	}
 
 	INIT_LIST_HEAD(&eviction_list);
-	phase = phases;
-	do {
-		list_for_each_entry(vma, *phase, vm_link) {
-			if (i915_vma_is_pinned(vma))
-				continue;
+	list_for_each_entry(vma, &vm->bound_list, vm_link) {
+		if (i915_vma_is_pinned(vma))
+			continue;
 
-			__i915_vma_pin(vma);
-			list_add(&vma->evict_link, &eviction_list);
-		}
-	} while (*++phase);
+		__i915_vma_pin(vma);
+		list_add(&vma->evict_link, &eviction_list);
+	}
 
 	ret = 0;
 	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index acc779770e47..4f8fd483a565 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2120,9 +2120,8 @@ static void i915_address_space_init(struct i915_address_space *vm,
 	drm_mm_init(&vm->mm, 0, vm->total);
 	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
 
-	INIT_LIST_HEAD(&vm->active_list);
-	INIT_LIST_HEAD(&vm->inactive_list);
 	INIT_LIST_HEAD(&vm->unbound_list);
+	INIT_LIST_HEAD(&vm->bound_list);
 
 	list_add_tail(&vm->global_link, &dev_priv->vm_list);
 	pagevec_init(&vm->free_pages);
@@ -2234,8 +2233,7 @@ void i915_ppgtt_close(struct i915_address_space *vm)
 static void ppgtt_destroy_vma(struct i915_address_space *vm)
 {
 	struct list_head *phases[] = {
-		&vm->active_list,
-		&vm->inactive_list,
+		&vm->bound_list,
 		&vm->unbound_list,
 		NULL,
 	}, **phase;
@@ -2258,8 +2256,7 @@ void i915_ppgtt_release(struct kref *kref)
 
 	ppgtt_destroy_vma(&ppgtt->vm);
 
-	GEM_BUG_ON(!list_empty(&ppgtt->vm.active_list));
-	GEM_BUG_ON(!list_empty(&ppgtt->vm.inactive_list));
+	GEM_BUG_ON(!list_empty(&ppgtt->vm.bound_list));
 	GEM_BUG_ON(!list_empty(&ppgtt->vm.unbound_list));
 
 	ppgtt->vm.cleanup(&ppgtt->vm);
@@ -2918,8 +2915,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	i915_gem_fini_aliasing_ppgtt(dev_priv);
 
-	GEM_BUG_ON(!list_empty(&ggtt->vm.active_list));
-	list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link)
+	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
 		WARN_ON(i915_vma_unbind(vma));
 
 	if (drm_mm_node_allocated(&ggtt->error_capture))
@@ -3613,8 +3609,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 	ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
 
 	/* clflush objects bound into the GGTT and rebind them. */
-	GEM_BUG_ON(!list_empty(&ggtt->vm.active_list));
-	list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) {
+	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
 		if (!(vma->flags & I915_VMA_GLOBAL_BIND))
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index f1f6d7076f12..342cfbcbf2d4 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -295,32 +295,12 @@ struct i915_address_space {
 	struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */
 
 	/**
-	 * List of objects currently involved in rendering.
-	 *
-	 * Includes buffers having the contents of their GPU caches
-	 * flushed, not necessarily primitives. last_read_req
-	 * represents when the rendering involved will be completed.
-	 *
-	 * A reference is held on the buffer while on this list.
+	 * List of vma currently bound.
 	 */
-	struct list_head active_list;
+	struct list_head bound_list;
 
 	/**
-	 * LRU list of objects which are not in the ringbuffer and
-	 * are ready to unbind, but are still in the GTT.
-	 *
-	 * last_read_req is NULL while an object is in this list.
-	 *
-	 * A reference is not held on the buffer while on this list,
-	 * as merely being GTT-bound shouldn't prevent its being
-	 * freed, and we'll pull it off the list in the free path.
-	 */
-	struct list_head inactive_list;
-
-	/**
-	 * List of vma that have been unbound.
-	 *
-	 * A reference is not held on the buffer while on this list.
+	 * List of vma that are not unbound.
 	 */
 	struct list_head unbound_list;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 55e84e71f526..ba8885f3f428 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -480,9 +480,13 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 
 	/* We also want to clear any cached iomaps as they wrap vmap */
 	list_for_each_entry_safe(vma, next,
-				 &i915->ggtt.vm.inactive_list, vm_link) {
+				 &i915->ggtt.vm.bound_list, vm_link) {
 		unsigned long count = vma->node.size >> PAGE_SHIFT;
-		if (vma->iomap && i915_vma_unbind(vma) == 0)
+
+		if (!vma->iomap || i915_vma_is_active(vma))
+			continue;
+
+		if (i915_vma_unbind(vma) == 0)
 			freed_pages += count;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 79a347295e00..d2409477a80a 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -666,7 +666,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	vma->pages = obj->mm.pages;
 	vma->flags |= I915_VMA_GLOBAL_BIND;
 	__i915_vma_set_map_and_fenceable(vma);
-	list_move_tail(&vma->vm_link, &ggtt->vm.inactive_list);
+	list_move_tail(&vma->vm_link, &ggtt->vm.bound_list);
 
 	spin_lock(&dev_priv->mm.obj_lock);
 	list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 8c81cf3aa182..a3692c36814b 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1036,7 +1036,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 
 static u32 capture_error_bo(struct drm_i915_error_buffer *err,
 			    int count, struct list_head *head,
-			    bool pinned_only)
+			    bool active_only, bool pinned_only)
 {
 	struct i915_vma *vma;
 	int i = 0;
@@ -1045,6 +1045,9 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err,
 		if (!vma->obj)
 			continue;
 
+		if (active_only && !i915_vma_is_active(vma))
+			continue;
+
 		if (pinned_only && !i915_vma_is_pinned(vma))
 			continue;
 
@@ -1516,14 +1519,16 @@ static void gem_capture_vm(struct i915_gpu_state *error,
 	int count;
 
 	count = 0;
-	list_for_each_entry(vma, &vm->active_list, vm_link)
-		count++;
+	list_for_each_entry(vma, &vm->bound_list, vm_link)
+		if (i915_vma_is_active(vma))
+			count++;
 
 	active_bo = NULL;
 	if (count)
 		active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC);
 	if (active_bo)
-		count = capture_error_bo(active_bo, count, &vm->active_list, false);
+		count = capture_error_bo(active_bo, count, &vm->bound_list,
+					 true, false);
 	else
 		count = 0;
 
@@ -1561,28 +1566,20 @@ static void capture_pinned_buffers(struct i915_gpu_state *error)
 	struct i915_address_space *vm = &error->i915->ggtt.vm;
 	struct drm_i915_error_buffer *bo;
 	struct i915_vma *vma;
-	int count_inactive, count_active;
-
-	count_inactive = 0;
-	list_for_each_entry(vma, &vm->inactive_list, vm_link)
-		count_inactive++;
+	int count;
 
-	count_active = 0;
-	list_for_each_entry(vma, &vm->active_list, vm_link)
-		count_active++;
+	count = 0;
+	list_for_each_entry(vma, &vm->bound_list, vm_link)
+		count++;
 
 	bo = NULL;
-	if (count_inactive + count_active)
-		bo = kcalloc(count_inactive + count_active,
-			     sizeof(*bo), GFP_ATOMIC);
+	if (count)
+		bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC);
 	if (!bo)
 		return;
 
-	count_inactive = capture_error_bo(bo, count_inactive,
-					  &vm->active_list, true);
-	count_active = capture_error_bo(bo + count_inactive, count_active,
-					&vm->inactive_list, true);
-	error->pinned_bo_count = count_inactive + count_active;
+	error->pinned_bo_count =
+		capture_error_bo(bo, count, &vm->bound_list, false, true);
 	error->pinned_bo = bo;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 42b9e0659945..60a6617ebc55 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -77,9 +77,6 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
 	if (--vma->active_count)
 		return;
 
-	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
-
 	GEM_BUG_ON(!i915_gem_object_is_active(obj));
 	if (--obj->active_count)
 		return;
@@ -659,7 +656,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level));
 
-	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 
 	if (vma->obj) {
 		struct drm_i915_gem_object *obj = vma->obj;
@@ -981,10 +978,8 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	if (!i915_gem_active_isset(active) && !vma->active_count++) {
-		list_move_tail(&vma->vm_link, &vma->vm->active_list);
+	if (!i915_gem_active_isset(active) && !vma->active_count++)
 		obj->active_count++;
-	}
 	i915_gem_active_set(active, rq);
 	GEM_BUG_ON(!i915_vma_is_active(vma));
 	GEM_BUG_ON(!obj->active_count);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 8059268800fa..dd51822f1f7f 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -57,7 +57,7 @@ static int populate_ggtt(struct drm_i915_private *i915)
 		return -EINVAL;
 	}
 
-	if (list_empty(&i915->ggtt.vm.inactive_list)) {
+	if (list_empty(&i915->ggtt.vm.bound_list)) {
 		pr_err("No objects on the GGTT inactive list!\n");
 		return -EINVAL;
 	}
@@ -69,7 +69,7 @@ static void unpin_ggtt(struct drm_i915_private *i915)
 {
 	struct i915_vma *vma;
 
-	list_for_each_entry(vma, &i915->ggtt.vm.inactive_list, vm_link)
+	list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link)
 		i915_vma_unpin(vma);
 }
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index a28ee0cc6a63..23078923c5c5 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1206,7 +1206,7 @@ static void track_vma_bind(struct i915_vma *vma)
 	__i915_gem_object_pin_pages(obj);
 
 	vma->pages = obj->mm.pages;
-	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 }
 
 static int exercise_mock(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
index 6a7f4da7b523..d5c559580bda 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
@@ -70,8 +70,7 @@ mock_ppgtt(struct drm_i915_private *i915,
 	ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE);
 	ppgtt->vm.file = ERR_PTR(-ENODEV);
 
-	INIT_LIST_HEAD(&ppgtt->vm.active_list);
-	INIT_LIST_HEAD(&ppgtt->vm.inactive_list);
+	INIT_LIST_HEAD(&ppgtt->vm.bound_list);
 	INIT_LIST_HEAD(&ppgtt->vm.unbound_list);
 
 	INIT_LIST_HEAD(&ppgtt->vm.global_link);
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 20/37] drm/i915: Introduce the i915_user_extension_method
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (17 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 19/37] drm/i915: Stop tracking MRU activity on VMA Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 21/37] drm/i915: Extend CREATE_CONTEXT to allow inheritance ala clone() Chris Wilson
                   ` (21 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

An idea for extending uABI inspired by Vulkan's extension chains.
Instead of expanding the data struct for each ioctl every time we need
to add a new feature, define an extension chain instead. As we add
optional interfaces to control the ioctl, we define a new extension
struct that can be linked into the ioctl data only when required by the
user. The key advantage being able to ignore large control structs for
optional interfaces/extensions, while being able to process them in a
consistent manner.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile               |  1 +
 drivers/gpu/drm/i915/i915_user_extensions.c | 36 +++++++++++++++++++++
 drivers/gpu/drm/i915/i915_user_extensions.h | 20 ++++++++++++
 include/uapi/drm/i915_drm.h                 | 20 ++++++++++++
 4 files changed, 77 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_user_extensions.c
 create mode 100644 drivers/gpu/drm/i915/i915_user_extensions.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 4c6adae23e18..9bba43da4215 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -44,6 +44,7 @@ i915-y := i915_drv.o \
 	  i915_syncmap.o \
 	  i915_sw_fence.o \
 	  i915_sysfs.o \
+	  i915_user_extensions.o \
 	  intel_csr.o \
 	  intel_device_info.o \
 	  intel_pm.o \
diff --git a/drivers/gpu/drm/i915/i915_user_extensions.c b/drivers/gpu/drm/i915/i915_user_extensions.c
new file mode 100644
index 000000000000..ba64067151c1
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_user_extensions.c
@@ -0,0 +1,36 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include <linux/uaccess.h>
+#include <uapi/drm/i915_drm.h>
+
+#include "i915_user_extensions.h"
+
+int i915_user_extensions(struct i915_user_extension __user *ext,
+			 const i915_user_extension_fn *tbl,
+			 unsigned long count,
+			 void *data)
+{
+	int err;
+	u64 x;
+
+	if (!ext)
+		return 0;
+
+	if (get_user(x, &ext->name))
+		return -EFAULT;
+
+	err = -EINVAL;
+	if (x < count && tbl[x])
+		err = tbl[x](ext, data);
+	if (err)
+		return err;
+
+	if (get_user(x, &ext->next_extension))
+		return -EFAULT;
+
+	return i915_user_extensions(u64_to_user_ptr(x), tbl, count, data);
+}
diff --git a/drivers/gpu/drm/i915/i915_user_extensions.h b/drivers/gpu/drm/i915/i915_user_extensions.h
new file mode 100644
index 000000000000..313a510b068a
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_user_extensions.h
@@ -0,0 +1,20 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#ifndef I915_USER_EXTENSIONS_H
+#define I915_USER_EXTENSIONS_H
+
+struct i915_user_extension;
+
+typedef int (*i915_user_extension_fn)(struct i915_user_extension __user *ext,
+				      void *data);
+
+int i915_user_extensions(struct i915_user_extension __user *ext,
+			 const i915_user_extension_fn *tbl,
+			 unsigned long count,
+			 void *data);
+
+#endif /* I915_USER_EXTENSIONS_H */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7f5634ce8e88..e7a1fcd3a261 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -62,6 +62,26 @@ extern "C" {
 #define I915_ERROR_UEVENT		"ERROR"
 #define I915_RESET_UEVENT		"RESET"
 
+/*
+ * i915_user_extension: Base class for defining a chain of extensions
+ *
+ * Many interfaces need to grow over time. In most cases we can simply
+ * extend the struct and have userspace pass in more data. Another option,
+ * as demonstrated by Vulkan's approach to providing extensions for forward
+ * and backward compatibilty, is to use a list of optional structs to
+ * provide those extra details.
+ *
+ * The key advantage to using an extension chain is that it allows us to
+ * redefine the interface more easily than an ever growing struct of
+ * increasing complexity, and for large parts of that interface to be
+ * entirely optional. The downside is more pointer chasing; chasing across
+ * the __user boundary with pointers encapsulated inside u64.
+ */
+struct i915_user_extension {
+	__u64 next_extension;
+	__u64 name;
+};
+
 /*
  * MOCS indexes used for GPU surfaces, defining the cacheability of the
  * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 21/37] drm/i915: Extend CREATE_CONTEXT to allow inheritance ala clone()
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (18 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 20/37] drm/i915: Introduce the i915_user_extension_method Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 22/37] drm/i915: Allow contexts to share a single timeline across all engines Chris Wilson
                   ` (20 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

A context encompasses the driver's view of process related state, and
encapsulates the logical GPU state where available. Each context is
currently equivalent to a process in CPU terms. Like with processes,
sometimes the user wants a lighter encapsulation that shares some state
with the parent process, for example two threads have unique register
state but share the virtual memory mappings. We can support exactly the
same principle using contexts where we may share the GTT but keep the
logical GPU state distinct. This allows quicker switching between those
contexts, and for userspace to allocate a single offset in the GTT and
use it across multiple contexts. Like with clone(), in the future we may
wish to allow userspace to select more features to copy across from the
parent, but for now we only allow sharing of the GTT.

Note that if full per-process GTT is not supported on the harder, the
GTT are already implicitly shared between contexts, and this request
to create contexts with shared GTT fails. With full ppGTT, every fd
(i.e. every process) is allocated a unique GTT so this request cannot be
used to share GTT between processes/fds, it can only share GTT belonging
to this fd.

Testcase: igt/gem_ctx_shared
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Michał Winiarski <michal.winiarski@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c       |  62 ++++-
 drivers/gpu/drm/i915/i915_gem_gtt.c           |  19 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h           |  14 +-
 drivers/gpu/drm/i915/selftests/huge_pages.c   |   1 -
 .../gpu/drm/i915/selftests/i915_gem_context.c | 240 +++++++++++++-----
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |   1 -
 drivers/gpu/drm/i915/selftests/mock_context.c |   2 +-
 drivers/gpu/drm/i915/selftests/mock_gtt.c     |   2 +
 include/uapi/drm/i915_drm.h                   |  11 +-
 9 files changed, 270 insertions(+), 82 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index ca9205e19f0b..f377856a8edf 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -110,6 +110,8 @@ static void lut_close(struct i915_gem_context *ctx)
 		struct i915_vma *vma = rcu_dereference_raw(*slot);
 
 		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
+
+		vma->open_count--;
 		__i915_gem_object_release_unless_active(vma->obj);
 	}
 	rcu_read_unlock();
@@ -197,7 +199,7 @@ static void context_close(struct i915_gem_context *ctx)
 	 */
 	lut_close(ctx);
 	if (ctx->ppgtt)
-		i915_ppgtt_close(&ctx->ppgtt->vm);
+		i915_ppgtt_close(ctx->ppgtt);
 
 	ctx->file_priv = ERR_PTR(-EBADF);
 	i915_gem_context_put(ctx);
@@ -356,9 +358,12 @@ static void __destroy_hw_context(struct i915_gem_context *ctx,
 	context_close(ctx);
 }
 
+#define CREATE_VM BIT(0)
+
 static struct i915_gem_context *
 i915_gem_create_context(struct drm_i915_private *dev_priv,
-			struct drm_i915_file_private *file_priv)
+			struct drm_i915_file_private *file_priv,
+			unsigned int flags)
 {
 	struct i915_gem_context *ctx;
 
@@ -371,7 +376,7 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
 	if (IS_ERR(ctx))
 		return ctx;
 
-	if (USES_FULL_PPGTT(dev_priv)) {
+	if (flags & CREATE_VM && USES_FULL_PPGTT(dev_priv)) {
 		struct i915_hw_ppgtt *ppgtt;
 
 		ppgtt = i915_ppgtt_create(dev_priv, file_priv, ctx->name);
@@ -436,7 +441,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 {
 	struct i915_gem_context *ctx;
 
-	ctx = i915_gem_create_context(i915, NULL);
+	ctx = i915_gem_create_context(i915, NULL, CREATE_VM);
 	if (IS_ERR(ctx))
 		return ctx;
 
@@ -557,7 +562,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 	idr_init(&file_priv->context_idr);
 
 	mutex_lock(&i915->drm.struct_mutex);
-	ctx = i915_gem_create_context(i915, file_priv);
+	ctx = i915_gem_create_context(i915, file_priv, CREATE_VM);
 	mutex_unlock(&i915->drm.struct_mutex);
 	if (IS_ERR(ctx)) {
 		idr_destroy(&file_priv->context_idr);
@@ -715,10 +720,12 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_i915_gem_context_create *args = data;
+	struct drm_i915_gem_context_create_v2 *args = data;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct i915_gem_context *share = NULL;
 	struct i915_gem_context *ctx;
-	int ret;
+	unsigned int flags = CREATE_VM;
+	int err;
 
 	if (!dev_priv->engine[RCS]->context_size)
 		return -ENODEV;
@@ -726,6 +733,9 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	if (args->pad != 0)
 		return -EINVAL;
 
+	if (args->flags & ~I915_GEM_CONTEXT_SHARE_GTT)
+		return -EINVAL;
+
 	if (client_is_banned(file_priv)) {
 		DRM_DEBUG("client %s[%d] banned from creating ctx\n",
 			  current->comm,
@@ -734,21 +744,45 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 		return -EIO;
 	}
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
+	if (args->flags & I915_GEM_CONTEXT_SHARE_GTT) {
+		share = i915_gem_context_lookup(file_priv, args->share_ctx);
+		if (!share)
+			return -ENOENT;
+
+		if (!share->ppgtt) {
+			err = -ENODEV;
+			goto out;
+		}
+
+		flags &= ~CREATE_VM;
+	}
 
-	ctx = i915_gem_create_context(dev_priv, file_priv);
+	err = i915_mutex_lock_interruptible(dev);
+	if (err)
+		goto out;
+
+	ctx = i915_gem_create_context(dev_priv, file_priv, flags);
 	mutex_unlock(&dev->struct_mutex);
-	if (IS_ERR(ctx))
-		return PTR_ERR(ctx);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto out;
+	}
+
+	if (!(flags & CREATE_VM)) {
+		ctx->desc_template = share->desc_template;
+		ctx->ppgtt = i915_ppgtt_get(share->ppgtt);
+		i915_ppgtt_open(ctx->ppgtt);
+	}
 
 	GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
 
 	args->ctx_id = ctx->user_handle;
 	DRM_DEBUG("HW context %d created\n", args->ctx_id);
 
-	return 0;
+out:
+	if (share)
+		i915_gem_context_put(share);
+	return err;
 }
 
 int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 4f8fd483a565..fb452441252d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2216,6 +2216,8 @@ i915_ppgtt_create(struct drm_i915_private *i915,
 		return ppgtt;
 
 	kref_init(&ppgtt->ref);
+	ppgtt->open_count = 1;
+
 	i915_address_space_init(&ppgtt->vm, i915, name);
 	ppgtt->vm.file = fpriv;
 
@@ -2224,10 +2226,21 @@ i915_ppgtt_create(struct drm_i915_private *i915,
 	return ppgtt;
 }
 
-void i915_ppgtt_close(struct i915_address_space *vm)
+void i915_ppgtt_open(struct i915_hw_ppgtt *ppgtt)
 {
-	GEM_BUG_ON(vm->closed);
-	vm->closed = true;
+	GEM_BUG_ON(ppgtt->vm.closed);
+
+	ppgtt->open_count++;
+}
+
+void i915_ppgtt_close(struct i915_hw_ppgtt *ppgtt)
+{
+	GEM_BUG_ON(!ppgtt->open_count);
+	if (--ppgtt->open_count)
+		return;
+
+	GEM_BUG_ON(ppgtt->vm.closed);
+	ppgtt->vm.closed = true;
 }
 
 static void ppgtt_destroy_vma(struct i915_address_space *vm)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 342cfbcbf2d4..0366b36773b1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -383,6 +383,8 @@ struct i915_hw_ppgtt {
 	struct kref ref;
 
 	unsigned long pd_dirty_rings;
+	unsigned int open_count;
+
 	union {
 		struct i915_pml4 pml4;		/* GEN8+ & 48b PPGTT */
 		struct i915_page_directory_pointer pdp;	/* GEN8+ */
@@ -602,12 +604,16 @@ void i915_ppgtt_release(struct kref *kref);
 struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv,
 					struct drm_i915_file_private *fpriv,
 					const char *name);
-void i915_ppgtt_close(struct i915_address_space *vm);
-static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
+
+void i915_ppgtt_open(struct i915_hw_ppgtt *ppgtt);
+void i915_ppgtt_close(struct i915_hw_ppgtt *ppgtt);
+
+static inline struct i915_hw_ppgtt *i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
 {
-	if (ppgtt)
-		kref_get(&ppgtt->ref);
+	kref_get(&ppgtt->ref);
+	return ppgtt;
 }
+
 static inline void i915_ppgtt_put(struct i915_hw_ppgtt *ppgtt)
 {
 	if (ppgtt)
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index d33e20940e0a..862295b2c4a8 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -1717,7 +1717,6 @@ int i915_gem_huge_page_mock_selftests(void)
 	err = i915_subtests(tests, ppgtt);
 
 out_close:
-	i915_ppgtt_close(&ppgtt->vm);
 	i915_ppgtt_put(ppgtt);
 
 out_unlock:
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 5bbffe9fcdda..f88af80902fa 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -130,10 +130,6 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	err = i915_gem_object_set_to_gtt_domain(obj, false);
-	if (err)
-		return err;
-
 	err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
 	if (err)
 		return err;
@@ -227,7 +223,8 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
 	return 0;
 }
 
-static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
+static noinline int cpu_check(struct drm_i915_gem_object *obj,
+			      unsigned int idx, unsigned int max)
 {
 	unsigned int n, m, needs_flush;
 	int err;
@@ -245,8 +242,8 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
 
 		for (m = 0; m < max; m++) {
 			if (map[m] != m) {
-				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
-				       n, m, map[m], m);
+				pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x\n",
+				       __builtin_return_address(0), idx, n, m, map[m], m);
 				err = -EINVAL;
 				goto out_unmap;
 			}
@@ -254,8 +251,9 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
 
 		for (; m < DW_PER_PAGE; m++) {
 			if (map[m] != STACK_MAGIC) {
-				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
-				       n, m, map[m], STACK_MAGIC);
+				pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x\n",
+				       __builtin_return_address(0), idx, n, m,
+				       map[m], STACK_MAGIC);
 				err = -EINVAL;
 				goto out_unmap;
 			}
@@ -318,6 +316,10 @@ create_test_object(struct i915_gem_context *ctx,
 		return ERR_PTR(err);
 	}
 
+	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	if (err)
+		return ERR_PTR(err);
+
 	list_add_tail(&obj->st_link, objects);
 	return obj;
 }
@@ -333,12 +335,8 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj)
 static int igt_ctx_exec(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj = NULL;
-	struct drm_file *file;
-	IGT_TIMEOUT(end_time);
-	LIST_HEAD(objects);
-	unsigned long ncontexts, ndwords, dw;
-	bool first_shared_gtt = true;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	int err = -ENODEV;
 
 	/* Create a few different contexts (with different mm) and write
@@ -346,37 +344,160 @@ static int igt_ctx_exec(void *arg)
 	 * up in the expected pages of our obj.
 	 */
 
-	file = mock_file(i915);
-	if (IS_ERR(file))
-		return PTR_ERR(file);
+	for_each_engine(engine, i915, id) {
+		struct drm_i915_gem_object *obj = NULL;
+		struct drm_file *file;
+		IGT_TIMEOUT(end_time);
+		LIST_HEAD(objects);
+		unsigned long ncontexts, ndwords, dw;
+		bool first_shared_gtt = true;
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
 
-	mutex_lock(&i915->drm.struct_mutex);
+		file = mock_file(i915);
+		if (IS_ERR(file))
+			return PTR_ERR(file);
+
+		mutex_lock(&i915->drm.struct_mutex);
+
+		ncontexts = 0;
+		ndwords = 0;
+		dw = 0;
+		while (!time_after(jiffies, end_time)) {
+			struct i915_gem_context *ctx;
+
+			if (first_shared_gtt) {
+				ctx = __create_hw_context(i915, file->driver_priv);
+				first_shared_gtt = false;
+			} else {
+				ctx = i915_gem_create_context(i915,
+							      file->driver_priv,
+							      CREATE_VM);
+			}
+			if (IS_ERR(ctx)) {
+				err = PTR_ERR(ctx);
+				goto out_unlock;
+			}
 
-	ncontexts = 0;
-	ndwords = 0;
-	dw = 0;
-	while (!time_after(jiffies, end_time)) {
-		struct intel_engine_cs *engine;
-		struct i915_gem_context *ctx;
-		unsigned int id;
+			if (!obj) {
+				obj = create_test_object(ctx, file, &objects);
+				if (IS_ERR(obj)) {
+					err = PTR_ERR(obj);
+					goto out_unlock;
+				}
+			}
 
-		if (first_shared_gtt) {
-			ctx = __create_hw_context(i915, file->driver_priv);
-			first_shared_gtt = false;
-		} else {
-			ctx = i915_gem_create_context(i915, file->driver_priv);
+			intel_runtime_pm_get(i915);
+			err = gpu_fill(obj, ctx, engine, dw);
+			intel_runtime_pm_put(i915);
+			if (err) {
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				       ndwords, dw, max_dwords(obj),
+				       engine->name, ctx->hw_id,
+				       yesno(!!ctx->ppgtt), err);
+				goto out_unlock;
+			}
+
+			if (++dw == max_dwords(obj)) {
+				obj = NULL;
+				dw = 0;
+			}
+
+			ndwords++;
+			ncontexts++;
+		}
+
+		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
+			ncontexts, engine->name, ndwords);
+
+		ncontexts = dw = 0;
+		list_for_each_entry(obj, &objects, st_link) {
+			unsigned int rem =
+				min_t(unsigned int, ndwords - dw, max_dwords(obj));
+
+			err = cpu_check(obj, ncontexts++, rem);
+			if (err)
+				break;
+
+			dw += rem;
+		}
+
+out_unlock:
+		i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED);
+		mutex_unlock(&i915->drm.struct_mutex);
+
+		mock_file_free(i915, file);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int igt_shared_ctx_exec(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = -ENODEV;
+
+	/*
+	 * Create a few different contexts with the same mm and write
+	 * through each ctx using the GPU making sure those writes end
+	 * up in the expected pages of our obj.
+	 */
+
+	for_each_engine(engine, i915, id) {
+		IGT_TIMEOUT(end_time);
+		LIST_HEAD(objects);
+		unsigned long ncontexts, ndwords, dw;
+		struct drm_i915_gem_object *obj = NULL;
+		struct drm_file *file;
+		struct i915_gem_context *parent;
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		file = mock_file(i915);
+		if (IS_ERR(file))
+			return PTR_ERR(file);
+
+		mutex_lock(&i915->drm.struct_mutex);
+
+		parent = i915_gem_create_context(i915, file->driver_priv,
+						 CREATE_VM);
+		if (IS_ERR(parent)) {
+			err = PTR_ERR(parent);
+			goto out_unlock;
 		}
-		if (IS_ERR(ctx)) {
-			err = PTR_ERR(ctx);
+
+		if (!parent->ppgtt) {
+			err = 0;
 			goto out_unlock;
 		}
 
-		for_each_engine(engine, i915, id) {
-			if (!intel_engine_can_store_dword(engine))
-				continue;
+
+		ncontexts = 0;
+		ndwords = 0;
+		dw = 0;
+		while (!time_after(jiffies, end_time)) {
+			struct i915_gem_context *ctx;
+
+			ctx = i915_gem_create_context(i915,
+						      file->driver_priv,
+						      0);
+			if (IS_ERR(ctx)) {
+				err = PTR_ERR(ctx);
+				goto out_unlock;
+			}
+
+			i915_ppgtt_open(parent->ppgtt);
+			ctx->ppgtt = i915_ppgtt_get(parent->ppgtt);
+			ctx->desc_template = parent->desc_template;
 
 			if (!obj) {
-				obj = create_test_object(ctx, file, &objects);
+				obj = create_test_object(parent, file, &objects);
 				if (IS_ERR(obj)) {
 					err = PTR_ERR(obj);
 					goto out_unlock;
@@ -398,32 +519,36 @@ static int igt_ctx_exec(void *arg)
 				obj = NULL;
 				dw = 0;
 			}
+
 			ndwords++;
+			ncontexts++;
 		}
-		ncontexts++;
-	}
-	pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n",
-		ncontexts, INTEL_INFO(i915)->num_rings, ndwords);
+		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
+			ncontexts, engine->name, ndwords);
 
-	dw = 0;
-	list_for_each_entry(obj, &objects, st_link) {
-		unsigned int rem =
-			min_t(unsigned int, ndwords - dw, max_dwords(obj));
+		ncontexts = dw = 0;
+		list_for_each_entry(obj, &objects, st_link) {
+			unsigned int rem =
+				min_t(unsigned int, ndwords - dw, max_dwords(obj));
 
-		err = cpu_check(obj, rem);
-		if (err)
-			break;
+			err = cpu_check(obj, ncontexts++, rem);
+			if (err)
+				break;
 
-		dw += rem;
-	}
+			dw += rem;
+		}
 
 out_unlock:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
-		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
+		if (igt_flush_test(i915, I915_WAIT_LOCKED))
+			err = -EIO;
+		mutex_unlock(&i915->drm.struct_mutex);
 
-	mock_file_free(i915, file);
-	return err;
+		mock_file_free(i915, file);
+		if (err)
+			return err;
+	}
+
+	return 0;
 }
 
 static int igt_ctx_readonly(void *arg)
@@ -451,7 +576,7 @@ static int igt_ctx_readonly(void *arg)
 
 	mutex_lock(&i915->drm.struct_mutex);
 
-	ctx = i915_gem_create_context(i915, file->driver_priv);
+	ctx = i915_gem_create_context(i915, file->driver_priv, 0);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
 		goto out_unlock;
@@ -515,7 +640,7 @@ static int igt_ctx_readonly(void *arg)
 		if (i915_gem_object_is_readonly(obj))
 			num_writes = 0;
 
-		err = cpu_check(obj, num_writes);
+		err = cpu_check(obj, 0, num_writes);
 		if (err)
 			break;
 
@@ -706,6 +831,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
 		SUBTEST(igt_switch_to_kernel_context),
 		SUBTEST(igt_ctx_exec),
 		SUBTEST(igt_ctx_readonly),
+		SUBTEST(igt_shared_ctx_exec),
 	};
 	bool fake_alias = false;
 	int err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 23078923c5c5..ad879b6a2155 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -990,7 +990,6 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
 
 	err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time);
 
-	i915_ppgtt_close(&ppgtt->vm);
 	i915_ppgtt_put(ppgtt);
 out_unlock:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index 8904f1ce64e3..221efd2155d5 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -97,7 +97,7 @@ live_context(struct drm_i915_private *i915, struct drm_file *file)
 {
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
-	return i915_gem_create_context(i915, file->driver_priv);
+	return i915_gem_create_context(i915, file->driver_priv, CREATE_VM);
 }
 
 struct i915_gem_context *
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
index d5c559580bda..57ef5f65bf3b 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
@@ -66,6 +66,8 @@ mock_ppgtt(struct drm_i915_private *i915,
 		return NULL;
 
 	kref_init(&ppgtt->ref);
+	ppgtt->open_count = 1;
+
 	ppgtt->vm.i915 = i915;
 	ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE);
 	ppgtt->vm.file = ERR_PTR(-ENODEV);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e7a1fcd3a261..cb8a1354633d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -386,7 +386,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_SET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_SET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey)
 #define DRM_IOCTL_I915_GET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey)
 #define DRM_IOCTL_I915_GEM_WAIT		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT, struct drm_i915_gem_wait)
-#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
+#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_v2)
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
 #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
@@ -1407,6 +1407,15 @@ struct drm_i915_gem_context_create {
 	__u32 pad;
 };
 
+struct drm_i915_gem_context_create_v2 {
+	/*  output: id of new context*/
+	__u32 ctx_id;
+	__u32 flags;
+#define I915_GEM_CONTEXT_SHARE_GTT 0x1
+	__u32 share_ctx;
+	__u32 pad;
+};
+
 struct drm_i915_gem_context_destroy {
 	__u32 ctx_id;
 	__u32 pad;
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 22/37] drm/i915: Allow contexts to share a single timeline across all engines
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (19 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 21/37] drm/i915: Extend CREATE_CONTEXT to allow inheritance ala clone() Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 23/37] drm/i915: Fix I915_EXEC_RING_MASK Chris Wilson
                   ` (19 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

Previously, our view has been always to run the engines independently
within a context. (Multiple engines happened before we had contexts and
timelines, so they always operated independently and that behaviour
persisted into contexts.) However, at the user level the context often
represents a single timeline (e.g. GL contexts) and userspace must
ensure that the individual engines are serialised to present that
ordering to the client (or forgot about this detail entirely and hope no
one notices - a fair ploy if the client can only directly control one
engine themselves ;)

In the next patch, we will want to construct a set of engines that
operate as one, that have a single timeline interwoven between them, to
present a single virtual engine to the user. (They submit to the virtual
engine, then we decide which engine to execute on based.)

To that end, we want to be able to create contexts which have a single
timeline (fence context) shared between all engines, rather than multiple
timelines.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 26 ++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_context.h |  3 ++
 drivers/gpu/drm/i915/i915_request.c     | 10 +++++--
 drivers/gpu/drm/i915/i915_request.h     |  5 +++-
 drivers/gpu/drm/i915/i915_sw_fence.c    | 39 +++++++++++++++++++++----
 drivers/gpu/drm/i915/i915_sw_fence.h    | 13 +++++++--
 drivers/gpu/drm/i915/intel_lrc.c        |  5 +++-
 include/uapi/drm/i915_drm.h             |  3 +-
 8 files changed, 91 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index f377856a8edf..fa3212349256 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -133,6 +133,9 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 			ce->ops->destroy(ce);
 	}
 
+	if (ctx->timeline)
+		i915_timeline_put(ctx->timeline);
+
 	kfree(ctx->name);
 	put_pid(ctx->pid);
 
@@ -359,6 +362,7 @@ static void __destroy_hw_context(struct i915_gem_context *ctx,
 }
 
 #define CREATE_VM BIT(0)
+#define CREATE_TIMELINE BIT(1)
 
 static struct i915_gem_context *
 i915_gem_create_context(struct drm_i915_private *dev_priv,
@@ -369,6 +373,9 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
+	if (flags & CREATE_TIMELINE && !HAS_EXECLISTS(dev_priv))
+		return ERR_PTR(-EINVAL);
+
 	/* Reap the most stale context */
 	contexts_free_first(dev_priv);
 
@@ -391,6 +398,18 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
 		ctx->desc_template = default_desc_template(dev_priv, ppgtt);
 	}
 
+	if (flags & CREATE_TIMELINE) {
+		struct i915_timeline *timeline;
+
+		timeline = i915_timeline_create(dev_priv, ctx->name);
+		if (IS_ERR(timeline)) {
+			__destroy_hw_context(ctx, file_priv);
+			return ERR_CAST(timeline);
+		}
+
+		ctx->timeline = timeline;
+	}
+
 	trace_i915_context_create(ctx);
 
 	return ctx;
@@ -733,7 +752,9 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	if (args->pad != 0)
 		return -EINVAL;
 
-	if (args->flags & ~I915_GEM_CONTEXT_SHARE_GTT)
+	if (args->flags &
+	    ~(I915_GEM_CONTEXT_SHARE_GTT |
+	      I915_GEM_CONTEXT_SINGLE_TIMELINE))
 		return -EINVAL;
 
 	if (client_is_banned(file_priv)) {
@@ -757,6 +778,9 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 		flags &= ~CREATE_VM;
 	}
 
+	if (args->flags & I915_GEM_CONTEXT_SINGLE_TIMELINE)
+		flags |= CREATE_TIMELINE;
+
 	err = i915_mutex_lock_interruptible(dev);
 	if (err)
 		goto out;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index b116e4942c10..5e883d688afe 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -41,6 +41,7 @@ struct drm_i915_private;
 struct drm_i915_file_private;
 struct i915_hw_ppgtt;
 struct i915_request;
+struct i915_timeline;
 struct i915_vma;
 struct intel_ring;
 
@@ -66,6 +67,8 @@ struct i915_gem_context {
 	/** file_priv: owning file descriptor */
 	struct drm_i915_file_private *file_priv;
 
+	struct i915_timeline *timeline;
+
 	/**
 	 * @ppgtt: unique address space (GTT)
 	 *
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 51f28786b0e4..6305278b8040 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1037,8 +1037,14 @@ void i915_request_add(struct i915_request *request)
 	prev = i915_gem_active_raw(&timeline->last_request,
 				   &request->i915->drm.struct_mutex);
 	if (prev && !i915_request_completed(prev)) {
-		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
-					     &request->submitq);
+		if (prev->engine == engine)
+			i915_sw_fence_await_sw_fence(&request->submit,
+						     &prev->submit,
+						     &request->submitq);
+		else
+			__i915_sw_fence_await_dma_fence(&request->submit,
+							&prev->fence,
+							&request->dmaq);
 		if (engine->schedule)
 			__i915_sched_node_add_dependency(&request->sched,
 							 &prev->sched,
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index e1c9365dfefb..74b8d416b9e9 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -108,7 +108,10 @@ struct i915_request {
 	 * It is used by the driver to then queue the request for execution.
 	 */
 	struct i915_sw_fence submit;
-	wait_queue_entry_t submitq;
+	union {
+		wait_queue_entry_t submitq;
+		struct i915_sw_dma_fence_cb dmaq;
+	};
 	wait_queue_head_t execute;
 
 	/*
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 1de5173e53a2..376f933bfb4c 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -362,11 +362,6 @@ int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
 	return __i915_sw_fence_await_sw_fence(fence, signaler, NULL, gfp);
 }
 
-struct i915_sw_dma_fence_cb {
-	struct dma_fence_cb base;
-	struct i915_sw_fence *fence;
-};
-
 struct i915_sw_dma_fence_cb_timer {
 	struct i915_sw_dma_fence_cb base;
 	struct dma_fence *dma;
@@ -482,6 +477,40 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 	return ret;
 }
 
+static void __dma_i915_sw_fence_wake(struct dma_fence *dma,
+				     struct dma_fence_cb *data)
+{
+	struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base);
+
+	i915_sw_fence_complete(cb->fence);
+}
+
+int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+				    struct dma_fence *dma,
+				    struct i915_sw_dma_fence_cb *cb)
+{
+	int ret;
+
+	debug_fence_assert(fence);
+
+	if (dma_fence_is_signaled(dma))
+		return 0;
+
+	cb->fence = fence;
+	i915_sw_fence_await(fence);
+
+	ret = dma_fence_add_callback(dma, &cb->base, __dma_i915_sw_fence_wake);
+	if (ret == 0) {
+		ret = 1;
+	} else {
+		i915_sw_fence_complete(fence);
+		if (ret == -ENOENT) /* fence already signaled */
+			ret = 0;
+	}
+
+	return ret;
+}
+
 int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 				    struct reservation_object *resv,
 				    const struct dma_fence_ops *exclude,
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index fe2ef4dadfc6..914a734d49bc 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -10,14 +10,13 @@
 #ifndef _I915_SW_FENCE_H_
 #define _I915_SW_FENCE_H_
 
+#include <linux/dma-fence.h>
 #include <linux/gfp.h>
 #include <linux/kref.h>
 #include <linux/notifier.h> /* for NOTIFY_DONE */
 #include <linux/wait.h>
 
 struct completion;
-struct dma_fence;
-struct dma_fence_ops;
 struct reservation_object;
 
 struct i915_sw_fence {
@@ -69,10 +68,20 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
 				     struct i915_sw_fence *after,
 				     gfp_t gfp);
+
+struct i915_sw_dma_fence_cb {
+	struct dma_fence_cb base;
+	struct i915_sw_fence *fence;
+};
+
+int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+				    struct dma_fence *dma,
+				    struct i915_sw_dma_fence_cb *cb);
 int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 				  struct dma_fence *dma,
 				  unsigned long timeout,
 				  gfp_t gfp);
+
 int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 				    struct reservation_object *resv,
 				    const struct dma_fence_ops *exclude,
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 0121e1fd3160..8a4a1c0b986e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2836,7 +2836,10 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 		goto error_deref_obj;
 	}
 
-	timeline = i915_timeline_create(ctx->i915, ctx->name);
+	if (ctx->timeline)
+		timeline = i915_timeline_get(ctx->timeline);
+	else
+		timeline = i915_timeline_create(ctx->i915, ctx->name);
 	if (IS_ERR(timeline)) {
 		ret = PTR_ERR(timeline);
 		goto error_deref_obj;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index cb8a1354633d..f50065413bfa 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1411,7 +1411,8 @@ struct drm_i915_gem_context_create_v2 {
 	/*  output: id of new context*/
 	__u32 ctx_id;
 	__u32 flags;
-#define I915_GEM_CONTEXT_SHARE_GTT 0x1
+#define I915_GEM_CONTEXT_SHARE_GTT		0x1
+#define I915_GEM_CONTEXT_SINGLE_TIMELINE	0x2
 	__u32 share_ctx;
 	__u32 pad;
 };
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 23/37] drm/i915: Fix I915_EXEC_RING_MASK
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (20 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 22/37] drm/i915: Allow contexts to share a single timeline across all engines Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 24/37] drm/i915: Re-arrange execbuf so context is known before engine Chris Wilson
                   ` (18 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson, Tvrtko Ursulin, stable

This was supposed to be a mask of all known rings, but it is being used
by execbuffer to filter out invalid rings, and so is instead mapping high
unused values onto valid rings. Instead of a mask of all known rings,
we need it to be the mask of all possible rings.

Fixes: 549f7365820a ("drm/i915: Enable SandyBridge blitter ring")
Fixes: de1add360522 ("drm/i915: Decouple execbuf uAPI from internal implementation")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v4.6+
---
 include/uapi/drm/i915_drm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index f50065413bfa..b90d31ba913b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -962,7 +962,7 @@ struct drm_i915_gem_execbuffer2 {
 	 * struct drm_i915_gem_exec_fence *fences.
 	 */
 	__u64 cliprects_ptr;
-#define I915_EXEC_RING_MASK              (7<<0)
+#define I915_EXEC_RING_MASK              (0x3f)
 #define I915_EXEC_DEFAULT                (0<<0)
 #define I915_EXEC_RENDER                 (1<<0)
 #define I915_EXEC_BSD                    (2<<0)
-- 
2.18.0

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 24/37] drm/i915: Re-arrange execbuf so context is known before engine
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (21 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 23/37] drm/i915: Fix I915_EXEC_RING_MASK Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 25/37] drm/i915: Allow a context to define its set of engines Chris Wilson
                   ` (17 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Needed for a following patch.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 41 ++++++++++++----------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3f0c612d42e7..29279f5cf7b5 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2217,24 +2217,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (args->flags & I915_EXEC_IS_PINNED)
 		eb.batch_flags |= I915_DISPATCH_PINNED;
 
-	eb.engine = eb_select_engine(eb.i915, file, args);
-	if (!eb.engine)
-		return -EINVAL;
-
-	if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
-		if (!HAS_RESOURCE_STREAMER(eb.i915)) {
-			DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n");
-			return -EINVAL;
-		}
-		if (eb.engine->id != RCS) {
-			DRM_DEBUG("RS is not available on %s\n",
-				 eb.engine->name);
-			return -EINVAL;
-		}
-
-		eb.batch_flags |= I915_DISPATCH_RS;
-	}
-
 	if (args->flags & I915_EXEC_FENCE_IN) {
 		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
 		if (!in_fence)
@@ -2259,6 +2241,28 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_destroy;
 
+	eb.engine = eb_select_engine(eb.i915, file, args);
+	if (!eb.engine) {
+		err = -EINVAL;
+		goto err_engine;
+	}
+
+	if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
+		if (!HAS_RESOURCE_STREAMER(eb.i915)) {
+			DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n");
+			err = -EINVAL;
+			goto err_engine;
+		}
+		if (eb.engine->id != RCS) {
+			DRM_DEBUG("RS is not available on %s\n",
+				  eb.engine->name);
+			err = -EINVAL;
+			goto err_engine;
+		}
+
+		eb.batch_flags |= I915_DISPATCH_RS;
+	}
+
 	/*
 	 * Take a local wakeref for preparing to dispatch the execbuf as
 	 * we expect to access the hardware fairly frequently in the
@@ -2419,6 +2423,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	mutex_unlock(&dev->struct_mutex);
 err_rpm:
 	intel_runtime_pm_put(eb.i915);
+err_engine:
 	i915_gem_context_put(eb.ctx);
 err_destroy:
 	eb_destroy(&eb);
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 25/37] drm/i915: Allow a context to define its set of engines
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (22 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 24/37] drm/i915: Re-arrange execbuf so context is known before engine Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 26/37] drm/i915/execlists: Flush the tasklet before unpinning Chris Wilson
                   ` (16 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

Over the last few years, we have debated how to extend the user API to
support an increase in the number of engines, that may be sparse and
even be heterogeneous within a class (not all video decoders created
equal). We settled on using (class, instance) tuples to identify a
specific engine, with an API for the user to construct a map of engines
to capabilities. Into this picture, we then add a challenge of virtual
engines; one user engine that maps behind the scenes to any number of
physical engines. To keep it general, we want the user to have full
control over that mapping. To that end, we allow the user to constrain a
context to define the set of engines that it can access, order fully
controlled by the user via (class, instance). With such precise control
in context setup, we can continue to use the existing execbuf uABI of
specifying a single index; only now it doesn't automagically map onto
the engines, it uses the user defined engine map from the context.

The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
execbuf. It's use will be revealed in the next patch.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c    | 88 ++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_context.h    |  4 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 22 ++++--
 include/uapi/drm/i915_drm.h                | 27 +++++++
 4 files changed, 135 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index fa3212349256..63fe7ade3623 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -90,6 +90,7 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_trace.h"
+#include "i915_user_extensions.h"
 #include "intel_workarounds.h"
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
@@ -133,6 +134,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 			ce->ops->destroy(ce);
 	}
 
+	kfree(ctx->engines);
+
 	if (ctx->timeline)
 		i915_timeline_put(ctx->timeline);
 
@@ -885,6 +888,87 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
+struct set_engines {
+	struct i915_gem_context *ctx;
+	struct intel_engine_cs **engines;
+	unsigned int nengine;
+};
+
+static const i915_user_extension_fn set_engines__extensions[] = {
+};
+
+static int set_engines(struct i915_gem_context *ctx,
+		       struct drm_i915_gem_context_param *args)
+{
+	struct i915_context_param_engines __user *user;
+	struct set_engines set = {
+		.ctx = ctx,
+		.engines = NULL,
+		.nengine = -1,
+	};
+	unsigned int n;
+	u64 size, extensions;
+	int err;
+
+	user = u64_to_user_ptr(args->value);
+	size = args->size;
+	if (!size)
+		goto out;
+
+	if (size < sizeof(struct i915_context_param_engines))
+		return -EINVAL;
+
+	size -= sizeof(struct i915_context_param_engines);
+	if (size % sizeof(*user->class_instance))
+		return -EINVAL;
+
+	set.nengine = size / sizeof(*user->class_instance);
+	if (set.nengine == 0 || set.nengine >= I915_EXEC_RING_MASK)
+		return -EINVAL;
+
+	set.engines = kmalloc_array(set.nengine + 1,
+				    sizeof(*set.engines),
+				    GFP_KERNEL);
+	if (!set.engines)
+		return -ENOMEM;
+
+	set.engines[0] = NULL;
+	for (n = 0; n < set.nengine; n++) {
+		u32 class, instance;
+
+		if (get_user(class, &user->class_instance[n].class) ||
+		    get_user(instance, &user->class_instance[n].instance)) {
+			kfree(set.engines);
+			return -EFAULT;
+		}
+
+		set.engines[n + 1] =
+			intel_engine_lookup_user(ctx->i915, class, instance);
+		if (!set.engines[n + 1]) {
+			kfree(set.engines);
+			return -ENOENT;
+		}
+	}
+
+	err = -EFAULT;
+	if (!get_user(extensions, &user->extensions))
+		err = i915_user_extensions(u64_to_user_ptr(extensions),
+					   set_engines__extensions,
+					   ARRAY_SIZE(set_engines__extensions),
+					   &set);
+	if (err) {
+		kfree(set.engines);
+		return err;
+	}
+
+out:
+	kfree(ctx->engines);
+	ctx->engines = set.engines;
+	ctx->nengine = set.nengine + 1;
+
+	return 0;
+}
+
 int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file)
 {
@@ -952,6 +1036,10 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		}
 		break;
 
+	case I915_CONTEXT_PARAM_ENGINES:
+		ret = set_engines(ctx, args);
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 5e883d688afe..8c55f8582469 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -67,6 +67,8 @@ struct i915_gem_context {
 	/** file_priv: owning file descriptor */
 	struct drm_i915_file_private *file_priv;
 
+	struct intel_engine_cs **engines;
+
 	struct i915_timeline *timeline;
 
 	/**
@@ -130,6 +132,8 @@ struct i915_gem_context {
 #define CONTEXT_BANNED			4
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	5
 
+	unsigned int nengine;
+
 	/**
 	 * @hw_id: - unique identifier for the context
 	 *
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 29279f5cf7b5..b09395d44482 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1994,13 +1994,23 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
 };
 
 static struct intel_engine_cs *
-eb_select_engine(struct drm_i915_private *dev_priv,
+eb_select_engine(struct i915_execbuffer *eb,
 		 struct drm_file *file,
 		 struct drm_i915_gem_execbuffer2 *args)
 {
 	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
 	struct intel_engine_cs *engine;
 
+	if (eb->ctx->engines) {
+		if (user_ring_id >= eb->ctx->nengine) {
+			DRM_DEBUG("execbuf with unknown ring: %u\n",
+				  user_ring_id);
+			return NULL;
+		}
+
+		return eb->ctx->engines[user_ring_id];
+	}
+
 	if (user_ring_id > I915_USER_RINGS) {
 		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
 		return NULL;
@@ -2013,11 +2023,11 @@ eb_select_engine(struct drm_i915_private *dev_priv,
 		return NULL;
 	}
 
-	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
+	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(eb->i915)) {
 		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
 
 		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
-			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
+			bsd_idx = gen8_dispatch_bsd_engine(eb->i915, file);
 		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
 			   bsd_idx <= I915_EXEC_BSD_RING2) {
 			bsd_idx >>= I915_EXEC_BSD_SHIFT;
@@ -2028,9 +2038,9 @@ eb_select_engine(struct drm_i915_private *dev_priv,
 			return NULL;
 		}
 
-		engine = dev_priv->engine[_VCS(bsd_idx)];
+		engine = eb->i915->engine[_VCS(bsd_idx)];
 	} else {
-		engine = dev_priv->engine[user_ring_map[user_ring_id]];
+		engine = eb->i915->engine[user_ring_map[user_ring_id]];
 	}
 
 	if (!engine) {
@@ -2241,7 +2251,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_destroy;
 
-	eb.engine = eb_select_engine(eb.i915, file, args);
+	eb.engine = eb_select_engine(&eb, file, args);
 	if (!eb.engine) {
 		err = -EINVAL;
 		goto err_engine;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index b90d31ba913b..259eb665fc1a 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1486,9 +1486,36 @@ struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+
+/*
+ * I915_CONTEXT_PARAM_ENGINES:
+ *
+ * Bind this context to operate on this subset of available engines. Henceforth,
+ * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
+ * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
+ * and upwards. The array created is offset by 1, such that by default
+ * I915_EXEC_DEFAULT is left empty, to be filled in as directed. Slots 1...N
+ * are then filled in using the specified (class, instance).
+ *
+ * Setting the number of engines bound to the context will revert back to
+ * default settings.
+ *
+ * See struct i915_context_param_engines.
+ */
+#define I915_CONTEXT_PARAM_ENGINES	0x7
+
 	__u64 value;
 };
 
+struct i915_context_param_engines {
+	__u64 extensions;
+
+	struct {
+		__u32 class; /* see enum drm_i915_gem_engine_class */
+		__u32 instance;
+	} class_instance[0];
+};
+
 enum drm_i915_oa_format {
 	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
 	I915_OA_FORMAT_A29,	    /* HSW only */
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 26/37] drm/i915/execlists: Flush the tasklet before unpinning
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (23 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 25/37] drm/i915: Allow a context to define its set of engines Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 27/37] drm/i915/execlists: Refactor out can_merge_rq() Chris Wilson
                   ` (15 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

Inside the execlists submission tasklet, we often make the mistake of
assuming that everything beneath the request is available for use.
However, the submission and the request live on two separate timelines,
and the request contents may be freed from an early retirement before we
have had a chance to run the submission tasklet (think ksoftirqd). To
safeguard ourselves against any mistakes, flush the tasklet before we
unpin the context if execlists still has a reference to this context.

References: 60367132a214 ("drm/i915: Avoid use-after-free of ctx in request tracepoints")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.h |  1 +
 drivers/gpu/drm/i915/intel_lrc.c        | 25 ++++++++++++++++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 8c55f8582469..bc440a7fe894 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -160,6 +160,7 @@ struct i915_gem_context {
 	/** engine: per-engine logical HW state */
 	struct intel_context {
 		struct i915_gem_context *gem_context;
+		struct intel_engine_cs *active;
 		struct i915_vma *state;
 		struct intel_ring *ring;
 		u32 *lrc_reg_state;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8a4a1c0b986e..d70973c51a33 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -504,8 +504,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 		rq = port_unpack(&port[n], &count);
 		if (rq) {
 			GEM_BUG_ON(count > !n);
-			if (!count++)
+			if (!count++) {
+				GEM_BUG_ON(rq->hw_context->active);
 				execlists_context_schedule_in(rq);
+				rq->hw_context->active = engine;
+			}
 			port_set(&port[n], port_pack(rq, count));
 			desc = execlists_update_context(rq);
 			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
@@ -812,6 +815,8 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 			  intel_engine_get_seqno(rq->engine));
 
 		GEM_BUG_ON(!execlists->active);
+
+		rq->hw_context->active = NULL;
 		execlists_context_schedule_out(rq,
 					       i915_request_completed(rq) ?
 					       INTEL_CONTEXT_SCHEDULE_OUT :
@@ -1105,6 +1110,7 @@ static void process_csb(struct intel_engine_cs *engine)
 			 */
 			GEM_BUG_ON(!i915_request_completed(rq));
 
+			rq->hw_context->active = NULL;
 			execlists_context_schedule_out(rq,
 						       INTEL_CONTEXT_SCHEDULE_OUT);
 			GEM_TRACE("%s completed ctx=%d\n",
@@ -1362,6 +1368,23 @@ static void execlists_context_destroy(struct intel_context *ce)
 
 static void execlists_context_unpin(struct intel_context *ce)
 {
+	struct intel_engine_cs *engine;
+
+	/*
+	 * The tasklet may still be using a pointer to our state, via an
+	 * old request. However, since we know we only unpin the context
+	 * on retirement of the following request, we know that the last
+	 * request referencing us will have had a completion CS interrupt.
+	 * If we see that it is still active, it means that the tasklet hasn't
+	 * had the chance to run yet; let it run before we teardown the
+	 * reference it may use.
+	 */
+	engine = READ_ONCE(ce->active);
+	if (unlikely(engine)) {
+		tasklet_kill(&engine->execlists.tasklet);
+		GEM_BUG_ON(READ_ONCE(ce->active));
+	}
+
 	intel_ring_unpin(ce->ring);
 
 	ce->state->obj->pin_global--;
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 27/37] drm/i915/execlists: Refactor out can_merge_rq()
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (24 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 26/37] drm/i915/execlists: Flush the tasklet before unpinning Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 28/37] drm/i915: Replace nested subclassing with explicit subclasses Chris Wilson
                   ` (14 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we add another user that wants to check whether
requests can be merge into a single HW execution, and in the future we
want to add more conditions under which requests from the same context
cannot be merge. In preparation, extract out can_merge_rq().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d70973c51a33..76e1cbb18827 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -553,6 +553,15 @@ static bool can_merge_ctx(const struct intel_context *prev,
 	return true;
 }
 
+static bool can_merge_rq(const struct i915_request *prev,
+			 const struct i915_request *next)
+{
+	if (!can_merge_ctx(prev->hw_context, next->hw_context))
+		return false;
+
+	return true;
+}
+
 static void port_assign(struct execlist_port *port, struct i915_request *rq)
 {
 	GEM_BUG_ON(rq == port_request(port));
@@ -714,8 +723,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 * second request, and so we never need to tell the
 			 * hardware about the first.
 			 */
-			if (last &&
-			    !can_merge_ctx(rq->hw_context, last->hw_context)) {
+			if (last && !can_merge_rq(rq, last)) {
 				/*
 				 * If we are on the second port and cannot
 				 * combine this request with the last, then we
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 28/37] drm/i915: Replace nested subclassing with explicit subclasses
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (25 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 27/37] drm/i915/execlists: Refactor out can_merge_rq() Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 29/37] RFC drm/i915: Load balancing across a virtual engine Chris Wilson
                   ` (13 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will want a third distinct class of timeline that
may overlap with the current pair of client and engine timeline classes.
Rather than use the ad hoc markup of SINGLE_DEPTH_NESTING, initialise
the different timeline classes with an explicit subclass.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c              | 2 +-
 drivers/gpu/drm/i915/i915_request.c          | 2 +-
 drivers/gpu/drm/i915/i915_timeline.h         | 2 ++
 drivers/gpu/drm/i915/intel_engine_cs.c       | 1 +
 drivers/gpu/drm/i915/selftests/mock_engine.c | 2 ++
 5 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6d9824323fab..3e143672b42f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3088,7 +3088,7 @@ static void engine_skip_context(struct i915_request *request)
 	GEM_BUG_ON(timeline == &engine->timeline);
 
 	spin_lock_irqsave(&engine->timeline.lock, flags);
-	spin_lock_nested(&timeline->lock, SINGLE_DEPTH_NESTING);
+	spin_lock(&timeline->lock);
 
 	list_for_each_entry_continue(request, &engine->timeline.requests, link)
 		if (request->gem_context == hung_ctx)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 6305278b8040..ec6f9fa35310 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -456,7 +456,7 @@ static void move_to_timeline(struct i915_request *request,
 	GEM_BUG_ON(request->timeline == &request->engine->timeline);
 	lockdep_assert_held(&request->engine->timeline.lock);
 
-	spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING);
+	spin_lock(&request->timeline->lock);
 	list_move_tail(&request->link, &timeline->requests);
 	spin_unlock(&request->timeline->lock);
 }
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index dc2a4632faa7..a2c2c3ab5fb0 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -37,6 +37,8 @@ struct i915_timeline {
 	u32 seqno;
 
 	spinlock_t lock;
+#define TIMELINE_CLIENT 0 /* default subclass */
+#define TIMELINE_ENGINE 1
 
 	/**
 	 * List of breadcrumbs associated with GPU requests currently
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index d3d1b6959ae7..a0f82e0272dd 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -479,6 +479,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
 void intel_engine_setup_common(struct intel_engine_cs *engine)
 {
 	i915_timeline_init(engine->i915, &engine->timeline, engine->name);
+	lockdep_set_subclass(&engine->timeline.lock, TIMELINE_ENGINE);
 
 	intel_engine_init_execlist(engine);
 	intel_engine_init_hangcheck(engine);
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index c2a0451336cf..22a73da45ad5 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -200,6 +200,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	engine->base.submit_request = mock_submit_request;
 
 	i915_timeline_init(i915, &engine->base.timeline, engine->base.name);
+	lockdep_set_subclass(&engine->base.timeline.lock, TIMELINE_ENGINE);
+
 	intel_engine_init_breadcrumbs(&engine->base);
 	engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */
 
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 29/37] RFC drm/i915: Load balancing across a virtual engine
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (26 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 28/37] drm/i915: Replace nested subclassing with explicit subclasses Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 30/37] drm/i915: Introduce i915_address_space.mutex Chris Wilson
                   ` (12 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

Having allowed the user to define a set of engines that they will want
to only use, we go one step further and allow them to bind those engines
into a single virtual instance. Submitting a batch to the virtual engine
will then forward it to any one of the set in a manner as best to
distribute load.  The virtual engine has a single timeline across all
engines (it operates as a single queue), so it is not able to concurrently
run batches across multiple engines by itself; that is left up to the user
to submit multiple concurrent batches to multiple queues. Multiple users
will be load balanced across the system.

The mechanism used for load balancing in this patch is a late greedy
balancer. When a request is ready for execution, it is added to each
engine's queue, and when an engine is ready for its next request it
claims it from the virtual engine. The first engine to do so, wins, i.e.
the request is executed at the earliest opportunity (idle moment) in the
system.

As not all HW is created equal, the user is still able to skip the
virtual engine and execute the batch on a specific engine, all within the
same queue. It will then be executed in order on the correct engine,
with execution on other virtual engines being moved away due to the load
detection.

A couple of areas for potential improvement left!

- The virtual engine always take priority over equal-priority tasks.
Mostly broken up by applying FQ_CODEL rules for prioritising new clients,
and hopefully the virtual and real engines are not then congested (i.e.
all work is via virtual engines, or all work is to the real engine).

- We require the breadcrumb irq around every virtual engine request. For
normal engines, we eliminate the need for the slow round trip via
interrupt by using the submit fence and queueing in order. For virtual
engines, we have to allow any job to transfer to a new ring, and cannot
coalesce the submissions, so require the completion fence instead,
forcing the persistent use of interrupts.

Other areas of improvement are more general, such as reducing lock
contention, reducing dispatch overhead, looking at direct submission
rather than bouncing around tasklets etc.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.h            |   5 +
 drivers/gpu/drm/i915/i915_gem_context.c    |  82 +++-
 drivers/gpu/drm/i915/i915_gem_context.h    |   1 +
 drivers/gpu/drm/i915/i915_request.c        |   2 +-
 drivers/gpu/drm/i915/i915_timeline.h       |   1 +
 drivers/gpu/drm/i915/intel_engine_cs.c     |   1 +
 drivers/gpu/drm/i915/intel_lrc.c           | 412 ++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_lrc.h           |   6 +
 drivers/gpu/drm/i915/intel_ringbuffer.h    |   8 +
 drivers/gpu/drm/i915/selftests/intel_lrc.c | 177 +++++++++
 include/uapi/drm/i915_drm.h                |  27 ++
 11 files changed, 711 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index e46592956872..b3c81ac659ff 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -93,4 +93,9 @@ static inline bool __tasklet_is_enabled(const struct tasklet_struct *t)
 	return !atomic_read(&t->count);
 }
 
+static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
+{
+	return test_bit(TASKLET_STATE_SCHED, &t->state);
+}
+
 #endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 63fe7ade3623..a5492739284e 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -91,6 +91,7 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
+#include "intel_lrc.h"
 #include "intel_workarounds.h"
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
@@ -134,7 +135,10 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 			ce->ops->destroy(ce);
 	}
 
-	kfree(ctx->engines);
+	if (ctx->engines) {
+		intel_virtual_engine_put(ctx->engines[0]);
+		kfree(ctx->engines);
+	}
 
 	if (ctx->timeline)
 		i915_timeline_put(ctx->timeline);
@@ -299,6 +303,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 		struct intel_context *ce = &ctx->__engine[n];
 
 		ce->gem_context = ctx;
+		ce->owner = dev_priv->engine[n];
 	}
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
@@ -616,7 +621,8 @@ last_request_on_engine(struct i915_timeline *timeline,
 
 	rq = i915_gem_active_raw(&timeline->last_request,
 				 &engine->i915->drm.struct_mutex);
-	if (rq && rq->engine == engine) {
+	if (rq &&
+	    (rq->engine == engine || intel_engine_is_virtual(rq->engine))) {
 		GEM_TRACE("last request for %s on engine %s: %llx:%d\n",
 			  timeline->name, engine->name,
 			  rq->fence.context, rq->fence.seqno);
@@ -888,13 +894,79 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
+static int check_user_mbz64(u64 __user *user)
+{
+	u64 mbz;
+
+	if (get_user(mbz, user))
+		return -EFAULT;
+
+	return mbz ? -EINVAL : 0;
+}
+
 struct set_engines {
 	struct i915_gem_context *ctx;
 	struct intel_engine_cs **engines;
 	unsigned int nengine;
 };
 
+static int set_engines__load_balance(struct i915_user_extension __user *base,
+				     void *data)
+
+{
+	struct i915_context_engines_load_balance __user *ext =
+		container_of(base, typeof(*ext) __user, base);
+	const struct set_engines *set = data;
+	struct intel_engine_cs *ve;
+	unsigned int n;
+	u64 mask;
+	int err;
+
+	if (set->engines[0])
+		return -EEXIST;
+
+	if (!HAS_EXECLISTS(set->ctx->i915))
+		return -ENODEV;
+
+	if (!set->ctx->timeline)
+		return -EINVAL;
+
+	err = check_user_mbz64(&ext->flags);
+	if (err)
+		return err;
+
+	for (n = 0; n < ARRAY_SIZE(ext->mbz); n++) {
+		err = check_user_mbz64(&ext->mbz[n]);
+		if (err)
+			return err;
+	}
+
+	if (get_user(mask, &ext->engines_mask))
+		return -EFAULT;
+
+	if (mask == ~0ull) {
+		ve = intel_execlists_create_virtual(set->ctx,
+						    set->engines + 1,
+						    set->nengine);
+	} else {
+		struct intel_engine_cs *stack[64];
+		int bit;
+
+		n = 0;
+		for_each_set_bit(bit, (unsigned long *)&mask, set->nengine)
+			stack[n++] = set->engines[bit + 1];
+
+		ve = intel_execlists_create_virtual(set->ctx, stack, n);
+	}
+	if (IS_ERR(ve))
+		return PTR_ERR(ve);
+
+	set->engines[0] = ve;
+	return 0;
+}
+
 static const i915_user_extension_fn set_engines__extensions[] = {
+	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
 };
 
 static int set_engines(struct i915_gem_context *ctx,
@@ -957,12 +1029,16 @@ static int set_engines(struct i915_gem_context *ctx,
 					   ARRAY_SIZE(set_engines__extensions),
 					   &set);
 	if (err) {
+		intel_virtual_engine_put(set.engines[0]);
 		kfree(set.engines);
 		return err;
 	}
 
 out:
-	kfree(ctx->engines);
+	if (ctx->engines) {
+		intel_virtual_engine_put(ctx->engines[0]);
+		kfree(ctx->engines);
+	}
 	ctx->engines = set.engines;
 	ctx->nengine = set.nengine + 1;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index bc440a7fe894..95c0abfc6d9c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -161,6 +161,7 @@ struct i915_gem_context {
 	struct intel_context {
 		struct i915_gem_context *gem_context;
 		struct intel_engine_cs *active;
+		struct intel_engine_cs *owner;
 		struct i915_vma *state;
 		struct intel_ring *ring;
 		u32 *lrc_reg_state;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index ec6f9fa35310..beb2a4429a40 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1037,7 +1037,7 @@ void i915_request_add(struct i915_request *request)
 	prev = i915_gem_active_raw(&timeline->last_request,
 				   &request->i915->drm.struct_mutex);
 	if (prev && !i915_request_completed(prev)) {
-		if (prev->engine == engine)
+		if (prev->engine == engine && !intel_engine_is_virtual(engine))
 			i915_sw_fence_await_sw_fence(&request->submit,
 						     &prev->submit,
 						     &request->submitq);
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index a2c2c3ab5fb0..b5321dc2d5a5 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -39,6 +39,7 @@ struct i915_timeline {
 	spinlock_t lock;
 #define TIMELINE_CLIENT 0 /* default subclass */
 #define TIMELINE_ENGINE 1
+#define TIMELINE_VIRTUAL 2
 
 	/**
 	 * List of breadcrumbs associated with GPU requests currently
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index a0f82e0272dd..11aa8db47a4d 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1112,6 +1112,7 @@ bool intel_engine_retire_request(struct intel_engine_cs *engine,
 		  yesno(port_request(engine->execlists.port) == rq));
 
 	lockdep_assert_held(&engine->i915->drm.struct_mutex);
+	GEM_BUG_ON(intel_engine_is_virtual(engine));
 	GEM_BUG_ON(rq->engine != engine);
 	GEM_BUG_ON(!i915_request_completed(rq));
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 76e1cbb18827..0ce6b08ac64a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -164,6 +164,29 @@
 #define WA_TAIL_DWORDS 2
 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
 
+struct virtual_engine {
+	struct intel_engine_cs base;
+
+	struct intel_context context;
+	struct kref kref;
+
+	struct intel_engine_cs *bound;
+
+	struct i915_request *request;
+	struct ve_node {
+		struct rb_node node;
+		int prio;
+	} nodes[I915_NUM_ENGINES];
+
+	unsigned int count;
+	struct intel_engine_cs *siblings[0];
+};
+
+static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
+{
+	return container_of(engine, struct virtual_engine, base);
+}
+
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 					    struct intel_engine_cs *engine,
 					    struct intel_context *ce);
@@ -372,19 +395,26 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	list_for_each_entry_safe_reverse(rq, rn,
 					 &engine->timeline.requests,
 					 link) {
+		struct intel_engine_cs *owner;
+
 		if (i915_request_completed(rq))
 			return;
 
 		__i915_request_unsubmit(rq);
 		unwind_wa_tail(rq);
 
-		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
-		if (rq_prio(rq) != last_prio) {
-			last_prio = rq_prio(rq);
-			pl = lookup_priolist(engine, last_prio);
-		}
+		owner = rq->hw_context->owner;
+		if (likely(owner == engine)) {
+			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
+			if (rq_prio(rq) != last_prio) {
+				last_prio = rq_prio(rq);
+				pl = lookup_priolist(engine, last_prio);
+			}
 
-		list_add(&rq->sched.link, pl);
+			list_add(&rq->sched.link, pl);
+		} else {
+			owner->submit_request(rq);
+		}
 	}
 }
 
@@ -613,6 +643,50 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
+static void virtual_update_register_offsets(u32 *regs,
+					    struct intel_engine_cs *engine)
+{
+	u32 base = engine->mmio_base;
+
+	regs[CTX_CONTEXT_CONTROL] =
+		i915_mmio_reg_offset(RING_CONTEXT_CONTROL(engine));
+	regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
+	regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
+	regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
+	regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
+
+	regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
+	regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
+	regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
+	regs[CTX_SECOND_BB_HEAD_U] =
+		i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
+	regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
+	regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
+
+	regs[CTX_CTX_TIMESTAMP] =
+		i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
+	regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 3));
+	regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 3));
+	regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 2));
+	regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 2));
+	regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 1));
+	regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 1));
+	regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 0));
+	regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 0));
+
+	if (engine->class == RENDER_CLASS) {
+		regs[CTX_RCS_INDIRECT_CTX] =
+			i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
+		regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
+			i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
+		regs[CTX_BB_PER_CTX_PTR] =
+			i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
+
+		regs[CTX_R_PWR_CLK_STATE] =
+			i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
+	}
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -622,6 +696,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	struct i915_request *last = port_request(port);
 	struct rb_node *rb;
 	bool submit = false;
+	int prio;
 
 	/*
 	 * Hardware submission is through 2 ports. Conceptually each port
@@ -645,6 +720,31 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * and context switches) submission.
 	 */
 
+restart_virtual_engine:
+	prio = execlists->queue_priority;
+	for (rb = rb_first_cached(&execlists->virtual); rb; ) {
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].node);
+		struct i915_request *rq = READ_ONCE(ve->request);
+		struct intel_engine_cs *active;
+
+		if (!rq) {
+			rb_erase_cached(rb, &execlists->virtual);
+			RB_CLEAR_NODE(rb);
+			rb = rb_first_cached(&execlists->virtual);
+			continue;
+		}
+
+		active = READ_ONCE(ve->context.active);
+		if (active && active != engine) {
+			rb = rb_next(rb);
+			continue;
+		}
+
+		prio = max(prio, rq_prio(rq));
+		break;
+	}
+
 	if (last) {
 		/*
 		 * Don't resubmit or switch until all outstanding
@@ -666,7 +766,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
 			return;
 
-		if (need_preempt(engine, last, execlists->queue_priority)) {
+		if (need_preempt(engine, last, prio)) {
 			inject_preempt_context(engine);
 			return;
 		}
@@ -706,6 +806,65 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		last->tail = last->wa_tail;
 	}
 
+	if (rb) { /* XXX virtual is always taking precedence */
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].node);
+		struct i915_request *rq;
+
+		spin_lock(&ve->base.timeline.lock);
+
+		rq = ve->request;
+		if (unlikely(!rq)) { /* lost the race to a sibling */
+			spin_unlock(&ve->base.timeline.lock);
+			goto restart_virtual_engine;
+		}
+
+		if (rq_prio(rq) >= prio) {
+			if (last && !can_merge_rq(rq, last)) {
+				spin_unlock(&ve->base.timeline.lock);
+				return;
+			}
+
+			GEM_BUG_ON(rq->engine != &ve->base);
+			GEM_BUG_ON(rq->hw_context != &ve->context);
+			rq->engine = engine;
+
+			if (engine != ve->bound) {
+				u32 *regs = ve->context.lrc_reg_state;
+				unsigned int n;
+
+				virtual_update_register_offsets(regs, engine);
+				ve->bound = engine;
+
+				/*
+				 * Move the bound engine to the top of the list
+				 * for future execution. We then kick this
+				 * tasklet first before checking others, so that
+				 * we preferentially reuse this set of bound
+				 * registers.
+				 */
+				for (n = 1; n < ve->count; n++) {
+					if (ve->siblings[n] == engine) {
+						swap(ve->siblings[n],
+						     ve->siblings[0]);
+						break;
+					}
+				}
+			}
+
+			__i915_request_submit(rq);
+			trace_i915_request_in(rq, port_index(port, execlists));
+			submit = true;
+			last = rq;
+
+			ve->request = NULL;
+			rb_erase_cached(rb, &execlists->virtual);
+			RB_CLEAR_NODE(rb);
+		}
+
+		spin_unlock(&ve->base.timeline.lock);
+	}
+
 	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
 		struct i915_request *rq, *rn;
@@ -2943,6 +3102,245 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 	}
 }
 
+static void virtual_engine_free(struct kref *kref)
+{
+	struct virtual_engine *ve = container_of(kref, typeof(*ve), kref);
+	unsigned int n;
+
+	GEM_BUG_ON(ve->request);
+	GEM_BUG_ON(ve->context.active);
+
+	for (n = 0; n < ve->count; n++) {
+		struct intel_engine_cs *sibling = ve->siblings[n];
+		struct rb_node *node = &ve->nodes[sibling->id].node;
+
+		if (RB_EMPTY_NODE(node))
+			continue;
+
+		spin_lock_irq(&sibling->timeline.lock);
+
+		if (!RB_EMPTY_NODE(node))
+			rb_erase_cached(node, &sibling->execlists.virtual);
+
+		spin_unlock_irq(&sibling->timeline.lock);
+
+		tasklet_kill(&ve->siblings[n]->execlists.tasklet);
+	}
+	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
+
+	if (ve->context.state)
+		execlists_context_destroy(&ve->context);
+
+	intel_engine_cleanup_scratch(&ve->base);
+	i915_timeline_fini(&ve->base.timeline);
+	kfree(ve);
+}
+
+static void virtual_context_unpin(struct intel_context *ce)
+{
+	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+
+	execlists_context_unpin(ce);
+
+	kref_put(&ve->kref, virtual_engine_free);
+}
+
+static const struct intel_context_ops virtual_context_ops = {
+	.unpin = virtual_context_unpin,
+};
+
+static struct intel_context *
+virtual_context_pin(struct intel_engine_cs *engine,
+		    struct i915_gem_context *ctx)
+{
+	struct virtual_engine *ve = to_virtual_engine(engine);
+	struct intel_context *ce = &ve->context;
+
+	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
+
+	if (likely(ce->pin_count++))
+		return ce;
+	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
+
+	kref_get(&ve->kref);
+	ce->ops = &virtual_context_ops;
+
+	if (!ve->bound)
+		ve->bound = ve->siblings[0];
+
+	return __execlists_context_pin(ve->bound, ctx, ce);
+}
+
+static void virtual_submission_tasklet(unsigned long data)
+{
+	struct virtual_engine * const ve = (struct virtual_engine *)data;
+	unsigned int n;
+	int prio;
+
+	local_irq_disable();
+
+	prio = I915_PRIORITY_INVALID;
+	spin_lock(&ve->base.timeline.lock);
+	if (ve->request)
+		prio = rq_prio(ve->request);
+	spin_unlock(&ve->base.timeline.lock);
+	if (prio == I915_PRIORITY_INVALID)
+		goto out;
+
+	for (n = 0; READ_ONCE(ve->request) && n < ve->count; n++) {
+		struct intel_engine_cs *sibling = ve->siblings[n];
+		struct rb_node * const node = &ve->nodes[sibling->id].node;
+		struct rb_node **parent, *rb;
+		bool first;
+
+		spin_lock(&sibling->timeline.lock);
+
+		if (!RB_EMPTY_NODE(node)) {
+			if (prio == ve->nodes[sibling->id].prio) {
+				first = rb_first_cached(&sibling->execlists.virtual) == node;
+				goto submit_engine;
+			}
+
+			rb_erase_cached(node, &sibling->execlists.virtual);
+		}
+
+		rb = NULL;
+		first = true;
+		parent = &sibling->execlists.virtual.rb_root.rb_node;
+		while (*parent) {
+			struct ve_node *other;
+
+			rb = *parent;
+			other = rb_entry(rb, typeof(*other), node);
+			if (prio > other->prio) {
+				parent = &rb->rb_left;
+			} else {
+				parent = &rb->rb_right;
+				first = false;
+			}
+		}
+
+		rb_link_node(node, rb, parent);
+		rb_insert_color_cached(node,
+				       &sibling->execlists.virtual,
+				       first);
+
+		ve->nodes[sibling->id].prio = prio;
+
+submit_engine:
+		GEM_BUG_ON(RB_EMPTY_NODE(node));
+		if (first && prio > sibling->execlists.queue_priority)
+			tasklet_hi_schedule(&sibling->execlists.tasklet);
+
+		spin_unlock(&sibling->timeline.lock);
+	}
+
+out:
+	local_irq_enable();
+}
+
+static void virtual_submit_request(struct i915_request *request)
+{
+	struct virtual_engine *ve = to_virtual_engine(request->engine);
+
+	GEM_BUG_ON(ve->request);
+	ve->request = request;
+
+	tasklet_schedule(&ve->base.execlists.tasklet);
+}
+
+struct intel_engine_cs *
+intel_execlists_create_virtual(struct i915_gem_context *ctx,
+			       struct intel_engine_cs **siblings,
+			       unsigned int count)
+{
+	struct virtual_engine *ve;
+	unsigned int n;
+	int err;
+
+	if (!count)
+		return ERR_PTR(-EINVAL);
+
+	ve = kzalloc(sizeof(*ve) + count * sizeof(*ve->siblings), GFP_KERNEL);
+	if (!ve)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&ve->kref);
+	ve->base.i915 = ctx->i915;
+	ve->base.id = -1;
+	ve->base.class = OTHER_CLASS;
+	ve->base.flags = I915_ENGINE_IS_VIRTUAL;
+
+	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
+	i915_timeline_init(ctx->i915, &ve->base.timeline, ve->base.name);
+	lockdep_set_subclass(&ve->base.timeline.lock, TIMELINE_VIRTUAL);
+
+	err = intel_engine_create_scratch(&ve->base, 4096);
+	if (err)
+		goto err_put;
+
+	ve->context.gem_context = ctx;
+	ve->context.owner = &ve->base;
+
+	ve->base.context_pin = virtual_context_pin;
+	ve->base.request_alloc = execlists_request_alloc;
+
+	ve->base.schedule = execlists_schedule;
+	ve->base.submit_request = virtual_submit_request;
+
+	tasklet_init(&ve->base.execlists.tasklet,
+		     virtual_submission_tasklet,
+		     (unsigned long)ve);
+
+	ve->count = count;
+	for (n = 0; n < count; n++) {
+		struct intel_engine_cs *sibling = siblings[n];
+
+		ve->siblings[n] = sibling;
+
+		if (RB_EMPTY_NODE(&ve->nodes[sibling->id].node)) {
+			err = -EINVAL;
+			ve->count = n;
+			goto err_put;
+		}
+
+		RB_CLEAR_NODE(&ve->nodes[sibling->id].node);
+
+		if (ve->base.class != OTHER_CLASS) {
+			if (ve->base.class != sibling->class) {
+				err = -EINVAL;
+				ve->count = n;
+				goto err_put;
+			}
+			continue;
+		}
+
+		ve->base.class = sibling->class;
+		snprintf(ve->base.name, sizeof(ve->base.name),
+			 "v%dx%d", ve->base.class, count);
+		ve->base.context_size = sibling->context_size;
+
+		ve->base.emit_bb_start = sibling->emit_bb_start;
+		ve->base.emit_flush = sibling->emit_flush;
+		ve->base.emit_breadcrumb = sibling->emit_breadcrumb;
+		ve->base.emit_breadcrumb_sz = sibling->emit_breadcrumb_sz;
+	}
+
+	return &ve->base;
+
+err_put:
+	virtual_engine_free(&ve->kref);
+	return ERR_PTR(err);
+}
+
+void intel_virtual_engine_put(struct intel_engine_cs *engine)
+{
+	if (!engine)
+		return;
+
+	kref_put(&to_virtual_engine(engine)->kref, virtual_engine_free);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/intel_lrc.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 1593194e930c..ec90d9406cf4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -104,4 +104,10 @@ struct i915_gem_context;
 
 void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 
+struct intel_engine_cs *
+intel_execlists_create_virtual(struct i915_gem_context *ctx,
+			       struct intel_engine_cs **siblings,
+			       unsigned int count);
+void intel_virtual_engine_put(struct intel_engine_cs *engine);
+
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index b26587d0c70d..05c19aa8af69 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -304,6 +304,7 @@ struct intel_engine_execlists {
 	 * @queue: queue of requests, in priority lists
 	 */
 	struct rb_root_cached queue;
+	struct rb_root_cached virtual;
 
 	/**
 	 * @csb_read: control register for Context Switch buffer
@@ -591,6 +592,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
+#define I915_ENGINE_IS_VIRTUAL       BIT(3)
 	unsigned int flags;
 
 	/*
@@ -673,6 +675,12 @@ static inline bool __execlists_need_preempt(int prio, int last)
 	return prio > max(0, last);
 }
 
+static inline bool
+intel_engine_is_virtual(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_IS_VIRTUAL;
+}
+
 static inline void
 execlists_set_active(struct intel_engine_execlists *execlists,
 		     unsigned int bit)
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 146908b90787..97f37ee0ae8f 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -4,6 +4,8 @@
  * Copyright © 2018 Intel Corporation
  */
 
+#include <linux/prime_numbers.h>
+
 #include "../i915_selftest.h"
 #include "igt_flush_test.h"
 
@@ -453,12 +455,187 @@ static int live_late_preempt(void *arg)
 	goto err_ctx_lo;
 }
 
+struct live_test {
+	struct drm_i915_private *i915;
+	const char *func;
+	const char *name;
+
+	unsigned int reset_count;
+	bool wedge;
+};
+
+static int begin_live_test(struct live_test *t,
+			   struct drm_i915_private *i915,
+			   const char *func,
+			   const char *name)
+{
+	t->i915 = i915;
+	t->func = func;
+	t->name = name;
+
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		return -EIO;
+
+	i915->gpu_error.missed_irq_rings = 0;
+	t->reset_count = i915_reset_count(&i915->gpu_error);
+
+	return 0;
+}
+
+static int end_live_test(struct live_test *t)
+{
+	struct drm_i915_private *i915 = t->i915;
+
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		return -EIO;
+
+	if (t->reset_count != i915_reset_count(&i915->gpu_error)) {
+		pr_err("%s(%s): GPU was reset %d times!\n",
+		       t->func, t->name,
+		       i915_reset_count(&i915->gpu_error) - t->reset_count);
+		return -EIO;
+	}
+
+	if (i915->gpu_error.missed_irq_rings) {
+		pr_err("%s(%s): Missed interrupts on engines %lx\n",
+		       t->func, t->name, i915->gpu_error.missed_irq_rings);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int nop_virtual_engine(struct drm_i915_private *i915,
+			      struct intel_engine_cs **siblings,
+			      unsigned int nsibling,
+			      unsigned int nctx)
+{
+	IGT_TIMEOUT(end_time);
+	struct i915_request *request[16];
+	struct i915_gem_context *ctx[16];
+	struct intel_engine_cs *ve[16];
+	unsigned long n, prime, nc;
+	ktime_t times[2] = {};
+	struct live_test t;
+	int err;
+
+	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
+
+	for (n = 0; n < nctx; n++) {
+		ctx[n] = kernel_context(i915);
+		if (!ctx[n])
+			return -ENOMEM;
+
+		ve[n] = intel_execlists_create_virtual(ctx[n],
+						       siblings, nsibling);
+		if (IS_ERR(ve[n]))
+			return PTR_ERR(ve[n]);
+	}
+
+	err = begin_live_test(&t, i915, __func__, ve[0]->name);
+	if (err)
+		goto out;
+
+	for_each_prime_number_from(prime, 1, 8192) {
+		times[1] = ktime_get_raw();
+
+		for (nc = 0; nc < nctx; nc++) {
+			for (n = 0; n < prime; n++) {
+				request[nc] =
+					i915_request_alloc(ve[nc], ctx[nc]);
+				if (IS_ERR(request[nc])) {
+					err = PTR_ERR(request[nc]);
+					goto out;
+				}
+
+				i915_request_add(request[nc]);
+			}
+		}
+
+		for (nc = 0; nc < nctx; nc++)
+			i915_request_wait(request[nc],
+					  I915_WAIT_LOCKED,
+					  MAX_SCHEDULE_TIMEOUT);
+
+		times[1] = ktime_sub(ktime_get_raw(), times[1]);
+		if (prime == 1)
+			times[0] = times[1];
+
+		if (__igt_timeout(end_time, NULL))
+			break;
+	}
+
+	err = end_live_test(&t);
+	if (err)
+		goto out;
+
+	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
+		nctx, ve[0]->name, ktime_to_ns(times[0]),
+		prime, div64_u64(ktime_to_ns(times[1]), prime));
+
+out:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	for (nc = 0; nc < nctx; nc++) {
+		intel_virtual_engine_put(ve[nc]);
+		kernel_context_close(ctx[nc]);
+	}
+	return err;
+}
+
+static int live_virtual_engine(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	unsigned int class, inst;
+	int err = -ENODEV;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	for_each_engine(engine, i915, id) {
+		err = nop_virtual_engine(i915, &engine, 1, 1);
+		if (err) {
+			pr_err("Failed to wrap engine %s: err=%d\n",
+			       engine->name, err);
+			goto out_unlock;
+		}
+	}
+
+	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+		int nsibling, n;
+
+		nsibling = 0;
+		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+			if (!i915->engine_class[class][inst])
+				break;
+
+			siblings[nsibling++] = i915->engine_class[class][inst];
+		}
+		if (nsibling < 2)
+			continue;
+
+		for (n = 1; n <= nsibling + 1; n++) {
+			err = nop_virtual_engine(i915, siblings, nsibling, n);
+			if (err)
+				goto out_unlock;
+		}
+	}
+
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(live_sanitycheck),
 		SUBTEST(live_preempt),
 		SUBTEST(live_late_preempt),
+		SUBTEST(live_virtual_engine),
 	};
 
 	if (!HAS_EXECLISTS(i915))
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 259eb665fc1a..faff47ecf92b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1501,14 +1501,41 @@ struct drm_i915_gem_context_param {
  * default settings.
  *
  * See struct i915_context_param_engines.
+ *
+ * Extensions:
+ *   i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
  */
 #define I915_CONTEXT_PARAM_ENGINES	0x7
 
 	__u64 value;
 };
 
+/*
+ * i915_context_engines_load_balance:
+ *
+ * Enable load balancing across this set of engines.
+ *
+ * Into the I915_EXEC_DEFAULT slot, a virtual engine is created that when
+ * used will proxy the execbuffer request onto one of the set of engines
+ * in such a way as to distribute the load evenly across the set.
+ *
+ * The set of engines must be compatible (e.g. the same HW class) as they
+ * will share the same logical GPU context and ring.
+ *
+ * The context must be defined to use a single timeline for all engines.
+ */
+struct i915_context_engines_load_balance {
+	struct i915_user_extension base;
+
+	__u64 flags; /* all undefined flags must be zero */
+	__u64 engines_mask;
+
+	__u64 mbz[4]; /* reserved for future use; must be zero */
+};
+
 struct i915_context_param_engines {
 	__u64 extensions;
+#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0
 
 	struct {
 		__u32 class; /* see enum drm_i915_gem_engine_class */
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 30/37] drm/i915: Introduce i915_address_space.mutex
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (27 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 29/37] RFC drm/i915: Load balancing across a virtual engine Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 31/37] drm/i915: Move fence register tracking to GGTT Chris Wilson
                   ` (11 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

Add a mutex into struct i915_address_space to be used while operating on
the vma and their lists for a particular vm. As this may be called from
the shrinker, we taint the mutex with fs_reclaim so that from the start
lockdep warns us if we are caught holding the mutex across an
allocation. (With such small steps we will eventually rid ourselves of
struct_mutex recursion!)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h          |  2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c      | 10 ++++++++++
 drivers/gpu/drm/i915/i915_gem_gtt.h      |  2 ++
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 12 ++++++++++++
 4 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a786a3b6686f..58aee50bd330 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3321,7 +3321,7 @@ unsigned long i915_gem_shrink(struct drm_i915_private *i915,
 unsigned long i915_gem_shrink_all(struct drm_i915_private *i915);
 void i915_gem_shrinker_register(struct drm_i915_private *i915);
 void i915_gem_shrinker_unregister(struct drm_i915_private *i915);
-
+void i915_gem_shrinker_taints_mutex(struct mutex *mutex);
 
 /* i915_gem_tiling.c */
 static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index fb452441252d..524ea501198e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2117,6 +2117,7 @@ static void i915_address_space_init(struct i915_address_space *vm,
 				    struct drm_i915_private *dev_priv,
 				    const char *name)
 {
+	mutex_init(&vm->mutex);
 	drm_mm_init(&vm->mm, 0, vm->total);
 	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
 
@@ -2125,6 +2126,13 @@ static void i915_address_space_init(struct i915_address_space *vm,
 
 	list_add_tail(&vm->global_link, &dev_priv->vm_list);
 	pagevec_init(&vm->free_pages);
+
+	/*
+	 * The vm->mutex must be reclaim safe (for use in the shrinker).
+	 * Do a dummy acquire now under fs_reclaim so that any allocation
+	 * attempt holding the lock is immediately reported by lockdep.
+	 */
+	i915_gem_shrinker_taints_mutex(&vm->mutex);
 }
 
 static void i915_address_space_fini(struct i915_address_space *vm)
@@ -2134,6 +2142,8 @@ static void i915_address_space_fini(struct i915_address_space *vm)
 
 	drm_mm_takedown(&vm->mm);
 	list_del(&vm->global_link);
+
+	mutex_destroy(&vm->mutex);
 }
 
 static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 0366b36773b1..9281c582f1d5 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -289,6 +289,8 @@ struct i915_address_space {
 
 	bool closed;
 
+	struct mutex mutex; /* protects vma and our lists */
+
 	struct i915_page_dma scratch_page;
 	struct i915_page_table *scratch_pt;
 	struct i915_page_directory *scratch_pd;
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index ba8885f3f428..52c061b728ca 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/oom.h>
+#include <linux/sched/mm.h>
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
@@ -530,3 +531,14 @@ void i915_gem_shrinker_unregister(struct drm_i915_private *i915)
 	WARN_ON(unregister_oom_notifier(&i915->mm.oom_notifier));
 	unregister_shrinker(&i915->mm.shrinker);
 }
+
+void i915_gem_shrinker_taints_mutex(struct mutex *mutex)
+{
+	if (!IS_ENABLED(CONFIG_LOCKDEP))
+		return;
+
+	fs_reclaim_acquire(GFP_KERNEL);
+	mutex_lock(mutex);
+	mutex_unlock(mutex);
+	fs_reclaim_release(GFP_KERNEL);
+}
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 31/37] drm/i915: Move fence register tracking to GGTT
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (28 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 30/37] drm/i915: Introduce i915_address_space.mutex Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 32/37] drm/i915: Convert fences to use a GGTT lock rather than struct_mutex Chris Wilson
                   ` (10 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

As the fence registers define special regions of the mappable aperture
inside the Global GTT, and we track those regions using GGTT VMA, it
makes sense to pull that bookkeeping under i915_ggtt. The advantage is
that we can then start using a local GGTT lock to handle the fence
registers (in conjunction with the GGTT VMA) rather than struct_mutex.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gvt/gvt.h            |  2 +-
 drivers/gpu/drm/i915/i915_debugfs.c       | 16 ++---
 drivers/gpu/drm/i915/i915_drv.c           |  4 +-
 drivers/gpu/drm/i915/i915_drv.h           |  7 ---
 drivers/gpu/drm/i915/i915_gem.c           | 33 +++++-----
 drivers/gpu/drm/i915/i915_gem_fence_reg.c | 76 ++++++++++++-----------
 drivers/gpu/drm/i915/i915_gem_fence_reg.h |  9 ++-
 drivers/gpu/drm/i915/i915_gem_gtt.c       |  8 ++-
 drivers/gpu/drm/i915/i915_gem_gtt.h       |  7 ++-
 drivers/gpu/drm/i915/i915_gpu_error.c     |  7 ++-
 10 files changed, 89 insertions(+), 80 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index de2a3a2580be..11609a4003ff 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -391,7 +391,7 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt);
 #define gvt_hidden_gmadr_end(gvt) (gvt_hidden_gmadr_base(gvt) \
 				   + gvt_hidden_sz(gvt) - 1)
 
-#define gvt_fence_sz(gvt) (gvt->dev_priv->num_fence_regs)
+#define gvt_fence_sz(gvt) ((gvt)->dev_priv->ggtt.num_fence_regs)
 
 /* Aperture/GM space definitions for vGPU */
 #define vgpu_aperture_offset(vgpu)	((vgpu)->gm.low_gm_node.start)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index c400f42a54ec..1511d6db626d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -914,20 +914,20 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 
 static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
+	struct drm_i915_private *i915 = node_to_i915(m->private);
+	const struct i915_ggtt *ggtt = &i915->ggtt;
 	int i, ret;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
 	if (ret)
 		return ret;
 
-	seq_printf(m, "Total fences = %d\n", dev_priv->num_fence_regs);
-	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		struct i915_vma *vma = dev_priv->fence_regs[i].vma;
+	seq_printf(m, "Total fences = %d\n", ggtt->num_fence_regs);
+	for (i = 0; i < ggtt->num_fence_regs; i++) {
+		struct i915_vma *vma = ggtt->fence_regs[i].vma;
 
 		seq_printf(m, "Fence %d, pin count = %d, object = ",
-			   i, dev_priv->fence_regs[i].pin_count);
+			   i, ggtt->fence_regs[i].pin_count);
 		if (!vma)
 			seq_puts(m, "unused");
 		else
@@ -935,7 +935,7 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
 		seq_putc(m, '\n');
 	}
 
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&i915->drm.struct_mutex);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 0db3c83cce29..5c030c96b6ce 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -318,7 +318,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 		value = pdev->revision;
 		break;
 	case I915_PARAM_NUM_FENCES_AVAIL:
-		value = dev_priv->num_fence_regs;
+		value = dev_priv->ggtt.num_fence_regs;
 		break;
 	case I915_PARAM_HAS_OVERLAY:
 		value = dev_priv->overlay ? 1 : 0;
@@ -1151,8 +1151,6 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 
 	intel_opregion_setup(dev_priv);
 
-	i915_gem_load_init_fences(dev_priv);
-
 	/* On the 945G/GM, the chipset reports the MSI capability on the
 	 * integrated graphics even though the support isn't actually there
 	 * according to the published specs.  It doesn't appear to function
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 58aee50bd330..804a77d6f346 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -971,9 +971,6 @@ struct i915_gem_mm {
 	struct notifier_block vmap_notifier;
 	struct shrinker shrinker;
 
-	/** LRU list of objects with fence regs on them. */
-	struct list_head fence_list;
-
 	/**
 	 * Workqueue to fault in userptr pages, flushed by the execbuf
 	 * when required but otherwise left to userspace to try again
@@ -1692,9 +1689,6 @@ struct drm_i915_private {
 	/* protects panel power sequencer state */
 	struct mutex pps_mutex;
 
-	struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 965 */
-	int num_fence_regs; /* 8 on pre-965, 16 otherwise */
-
 	unsigned int fsb_freq, mem_freq, is_ddr3;
 	unsigned int skl_preferred_vco_freq;
 	unsigned int max_cdclk_freq;
@@ -2903,7 +2897,6 @@ int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
 void i915_gem_sanitize(struct drm_i915_private *i915);
 int i915_gem_init_early(struct drm_i915_private *dev_priv);
 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv);
-void i915_gem_load_init_fences(struct drm_i915_private *dev_priv);
 int i915_gem_freeze(struct drm_i915_private *dev_priv);
 int i915_gem_freeze_late(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3e143672b42f..c3812a917c0b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2207,8 +2207,9 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	intel_runtime_pm_put(i915);
 }
 
-void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
+void i915_gem_runtime_suspend(struct drm_i915_private *i915)
 {
+	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct drm_i915_gem_object *obj, *on;
 	int i;
 
@@ -2220,15 +2221,15 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 	 */
 
 	list_for_each_entry_safe(obj, on,
-				 &dev_priv->mm.userfault_list, userfault_link)
+				 &i915->mm.userfault_list, userfault_link)
 		__i915_gem_object_release_mmap(obj);
 
 	/* The fence will be lost when the device powers down. If any were
 	 * in use by hardware (i.e. they are pinned), we should not be powering
 	 * down! All other fences will be reacquired by the user upon waking.
 	 */
-	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
+	for (i = 0; i < ggtt->num_fence_regs; i++) {
+		struct drm_i915_fence_reg *reg = &ggtt->fence_regs[i];
 
 		/* Ideally we want to assert that the fence register is not
 		 * live at this point (i.e. that no piece of code will be
@@ -5636,32 +5637,33 @@ i915_gem_cleanup_engines(struct drm_i915_private *dev_priv)
 		dev_priv->gt.cleanup_engine(engine);
 }
 
-void
-i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
+void i915_ggtt_init_fences(struct i915_ggtt *ggtt)
 {
+	struct drm_i915_private *dev_priv = ggtt->vm.i915;
 	int i;
 
 	if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
 	    !IS_CHERRYVIEW(dev_priv))
-		dev_priv->num_fence_regs = 32;
+		ggtt->num_fence_regs = 32;
 	else if (INTEL_GEN(dev_priv) >= 4 ||
 		 IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
 		 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
-		dev_priv->num_fence_regs = 16;
+		ggtt->num_fence_regs = 16;
 	else
-		dev_priv->num_fence_regs = 8;
+		ggtt->num_fence_regs = 8;
 
 	if (intel_vgpu_active(dev_priv))
-		dev_priv->num_fence_regs =
-				I915_READ(vgtif_reg(avail_rs.fence_num));
+		ggtt->num_fence_regs = I915_READ(vgtif_reg(avail_rs.fence_num));
+
+	INIT_LIST_HEAD(&ggtt->fence_list);
 
 	/* Initialize fence registers to zero */
-	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
+	for (i = 0; i < ggtt->num_fence_regs; i++) {
+		struct drm_i915_fence_reg *fence = &ggtt->fence_regs[i];
 
-		fence->i915 = dev_priv;
+		fence->ggtt = ggtt;
 		fence->id = i;
-		list_add_tail(&fence->link, &dev_priv->mm.fence_list);
+		list_add_tail(&fence->link, &ggtt->fence_list);
 	}
 	i915_gem_restore_fences(dev_priv);
 
@@ -5678,7 +5680,6 @@ static void i915_gem_init__mm(struct drm_i915_private *i915)
 
 	INIT_LIST_HEAD(&i915->mm.unbound_list);
 	INIT_LIST_HEAD(&i915->mm.bound_list);
-	INIT_LIST_HEAD(&i915->mm.fence_list);
 	INIT_LIST_HEAD(&i915->mm.userfault_list);
 
 	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index d548ac05ccd7..60fa5a8276cb 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -64,7 +64,7 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
 	int fence_pitch_shift;
 	u64 val;
 
-	if (INTEL_GEN(fence->i915) >= 6) {
+	if (INTEL_GEN(fence->ggtt->vm.i915) >= 6) {
 		fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
 		fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
 		fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
@@ -93,7 +93,7 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
 	}
 
 	if (!pipelined) {
-		struct drm_i915_private *dev_priv = fence->i915;
+		struct drm_i915_private *dev_priv = fence->ggtt->vm.i915;
 
 		/* To w/a incoherency with non-atomic 64-bit register updates,
 		 * we split the 64-bit update into two 32-bit writes. In order
@@ -129,7 +129,7 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
 		GEM_BUG_ON(!is_power_of_2(vma->fence_size));
 		GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_size));
 
-		if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915))
+		if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->ggtt->vm.i915))
 			stride /= 128;
 		else
 			stride /= 512;
@@ -145,7 +145,7 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
 	}
 
 	if (!pipelined) {
-		struct drm_i915_private *dev_priv = fence->i915;
+		struct drm_i915_private *dev_priv = fence->ggtt->vm.i915;
 		i915_reg_t reg = FENCE_REG(fence->id);
 
 		I915_WRITE(reg, val);
@@ -177,7 +177,7 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
 	}
 
 	if (!pipelined) {
-		struct drm_i915_private *dev_priv = fence->i915;
+		struct drm_i915_private *dev_priv = fence->ggtt->vm.i915;
 		i915_reg_t reg = FENCE_REG(fence->id);
 
 		I915_WRITE(reg, val);
@@ -193,9 +193,9 @@ static void fence_write(struct drm_i915_fence_reg *fence,
 	 * and explicitly managed for internal users.
 	 */
 
-	if (IS_GEN2(fence->i915))
+	if (IS_GEN2(fence->ggtt->vm.i915))
 		i830_write_fence_reg(fence, vma);
-	else if (IS_GEN3(fence->i915))
+	else if (IS_GEN3(fence->ggtt->vm.i915))
 		i915_write_fence_reg(fence, vma);
 	else
 		i965_write_fence_reg(fence, vma);
@@ -210,6 +210,7 @@ static void fence_write(struct drm_i915_fence_reg *fence,
 static int fence_update(struct drm_i915_fence_reg *fence,
 			struct i915_vma *vma)
 {
+	struct i915_ggtt *ggtt = fence->ggtt;
 	int ret;
 
 	if (vma) {
@@ -250,16 +251,16 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 		fence->vma->fence = NULL;
 		fence->vma = NULL;
 
-		list_move(&fence->link, &fence->i915->mm.fence_list);
+		list_move(&fence->link, &ggtt->fence_list);
 	}
 
 	/* We only need to update the register itself if the device is awake.
 	 * If the device is currently powered down, we will defer the write
 	 * to the runtime resume, see i915_gem_restore_fences().
 	 */
-	if (intel_runtime_pm_get_if_in_use(fence->i915)) {
+	if (intel_runtime_pm_get_if_in_use(ggtt->vm.i915)) {
 		fence_write(fence, vma);
-		intel_runtime_pm_put(fence->i915);
+		intel_runtime_pm_put(ggtt->vm.i915);
 	}
 
 	if (vma) {
@@ -268,7 +269,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 			fence->vma = vma;
 		}
 
-		list_move_tail(&fence->link, &fence->i915->mm.fence_list);
+		list_move_tail(&fence->link, &ggtt->fence_list);
 	}
 
 	return 0;
@@ -298,11 +299,11 @@ int i915_vma_put_fence(struct i915_vma *vma)
 	return fence_update(fence, NULL);
 }
 
-static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
+static struct drm_i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
 {
 	struct drm_i915_fence_reg *fence;
 
-	list_for_each_entry(fence, &dev_priv->mm.fence_list, link) {
+	list_for_each_entry(fence, &ggtt->fence_list, link) {
 		GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
 
 		if (fence->pin_count)
@@ -312,7 +313,7 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
 	}
 
 	/* Wait for completion of pending flips which consume fences */
-	if (intel_has_pending_fb_unpin(dev_priv))
+	if (intel_has_pending_fb_unpin(ggtt->vm.i915))
 		return ERR_PTR(-EAGAIN);
 
 	return ERR_PTR(-EDEADLK);
@@ -339,14 +340,15 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
 int
 i915_vma_pin_fence(struct i915_vma *vma)
 {
-	struct drm_i915_fence_reg *fence;
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
 	struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
+	struct drm_i915_fence_reg *fence;
 	int err;
 
 	/* Note that we revoke fences on runtime suspend. Therefore the user
 	 * must keep the device awake whilst using the fence.
 	 */
-	assert_rpm_wakelock_held(vma->vm->i915);
+	assert_rpm_wakelock_held(ggtt->vm.i915);
 
 	/* Just update our place in the LRU if our fence is getting reused. */
 	if (vma->fence) {
@@ -354,12 +356,11 @@ i915_vma_pin_fence(struct i915_vma *vma)
 		GEM_BUG_ON(fence->vma != vma);
 		fence->pin_count++;
 		if (!fence->dirty) {
-			list_move_tail(&fence->link,
-				       &fence->i915->mm.fence_list);
+			list_move_tail(&fence->link, &ggtt->fence_list);
 			return 0;
 		}
 	} else if (set) {
-		fence = fence_find(vma->vm->i915);
+		fence = fence_find(ggtt);
 		if (IS_ERR(fence))
 			return PTR_ERR(fence);
 
@@ -385,28 +386,29 @@ i915_vma_pin_fence(struct i915_vma *vma)
 
 /**
  * i915_reserve_fence - Reserve a fence for vGPU
- * @dev_priv: i915 device private
+ * @i915: i915 device private
  *
  * This function walks the fence regs looking for a free one and remove
  * it from the fence_list. It is used to reserve fence for vGPU to use.
  */
 struct drm_i915_fence_reg *
-i915_reserve_fence(struct drm_i915_private *dev_priv)
+i915_reserve_fence(struct drm_i915_private *i915)
 {
+	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct drm_i915_fence_reg *fence;
 	int count;
 	int ret;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+	lockdep_assert_held(&i915->drm.struct_mutex);
 
 	/* Keep at least one fence available for the display engine. */
 	count = 0;
-	list_for_each_entry(fence, &dev_priv->mm.fence_list, link)
+	list_for_each_entry(fence, &ggtt->fence_list, link)
 		count += !fence->pin_count;
 	if (count <= 1)
 		return ERR_PTR(-ENOSPC);
 
-	fence = fence_find(dev_priv);
+	fence = fence_find(ggtt);
 	if (IS_ERR(fence))
 		return fence;
 
@@ -429,14 +431,14 @@ i915_reserve_fence(struct drm_i915_private *dev_priv)
  */
 void i915_unreserve_fence(struct drm_i915_fence_reg *fence)
 {
-	lockdep_assert_held(&fence->i915->drm.struct_mutex);
+	lockdep_assert_held(&fence->ggtt->vm.i915->drm.struct_mutex);
 
-	list_add(&fence->link, &fence->i915->mm.fence_list);
+	list_add(&fence->link, &fence->ggtt->fence_list);
 }
 
 /**
  * i915_gem_revoke_fences - revoke fence state
- * @dev_priv: i915 device private
+ * @i915: i915 device private
  *
  * Removes all GTT mmappings via the fence registers. This forces any user
  * of the fence to reacquire that fence before continuing with their access.
@@ -444,14 +446,15 @@ void i915_unreserve_fence(struct drm_i915_fence_reg *fence)
  * revoke concurrent userspace access via GTT mmaps until the hardware has been
  * reset and the fence registers have been restored.
  */
-void i915_gem_revoke_fences(struct drm_i915_private *dev_priv)
+void i915_gem_revoke_fences(struct drm_i915_private *i915)
 {
+	struct i915_ggtt *ggtt = &i915->ggtt;
 	int i;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+	lockdep_assert_held(&i915->drm.struct_mutex);
 
-	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
+	for (i = 0; i < ggtt->num_fence_regs; i++) {
+		struct drm_i915_fence_reg *fence = &ggtt->fence_regs[i];
 
 		GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
 
@@ -462,18 +465,19 @@ void i915_gem_revoke_fences(struct drm_i915_private *dev_priv)
 
 /**
  * i915_gem_restore_fences - restore fence state
- * @dev_priv: i915 device private
+ * @i915: i915 device private
  *
  * Restore the hw fence state to match the software tracking again, to be called
  * after a gpu reset and on resume. Note that on runtime suspend we only cancel
  * the fences, to be reacquired by the user later.
  */
-void i915_gem_restore_fences(struct drm_i915_private *dev_priv)
+void i915_gem_restore_fences(struct drm_i915_private *i915)
 {
+	struct i915_ggtt *ggtt = &i915->ggtt;
 	int i;
 
-	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
+	for (i = 0; i < ggtt->num_fence_regs; i++) {
+		struct drm_i915_fence_reg *reg = &ggtt->fence_regs[i];
 		struct i915_vma *vma = reg->vma;
 
 		GEM_BUG_ON(vma && vma->fence != reg);
@@ -486,7 +490,7 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv)
 			GEM_BUG_ON(!reg->dirty);
 			GEM_BUG_ON(i915_vma_has_userfault(vma));
 
-			list_move(&reg->link, &dev_priv->mm.fence_list);
+			list_move(&reg->link, &ggtt->fence_list);
 			vma->fence = NULL;
 			vma = NULL;
 		}
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
index 99a31ded4dfd..c8f1d0cdfa90 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
@@ -28,16 +28,20 @@
 #include <linux/list.h>
 
 struct drm_i915_private;
+struct i915_ggtt;
 struct i915_vma;
 
 #define I965_FENCE_PAGE 4096UL
 
 struct drm_i915_fence_reg {
 	struct list_head link;
-	struct drm_i915_private *i915;
+
+	struct i915_ggtt *ggtt;
 	struct i915_vma *vma;
+
 	int pin_count;
 	int id;
+
 	/**
 	 * Whether the tiling parameters for the currently
 	 * associated fence register have changed. Note that
@@ -49,5 +53,6 @@ struct drm_i915_fence_reg {
 	bool dirty;
 };
 
-#endif
+void i915_ggtt_init_fences(struct i915_ggtt *ggtt);
 
+#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 524ea501198e..c4b8f9c3a8cc 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3568,9 +3568,11 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
 		ggtt->vm.mm.color_adjust = i915_gtt_color_adjust;
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	if (!io_mapping_init_wc(&dev_priv->ggtt.iomap,
-				dev_priv->ggtt.gmadr.start,
-				dev_priv->ggtt.mappable_end)) {
+	i915_ggtt_init_fences(ggtt);
+
+	if (!io_mapping_init_wc(&ggtt->iomap,
+				ggtt->gmadr.start,
+				ggtt->mappable_end)) {
 		ret = -EIO;
 		goto out_gtt_cleanup;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 9281c582f1d5..aae0e62141fa 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -38,6 +38,7 @@
 #include <linux/mm.h>
 #include <linux/pagevec.h>
 
+#include "i915_gem_fence_reg.h"
 #include "i915_request.h"
 #include "i915_selftest.h"
 #include "i915_timeline.h"
@@ -57,7 +58,6 @@
 #define I915_MAX_NUM_FENCE_BITS 6
 
 struct drm_i915_file_private;
-struct drm_i915_fence_reg;
 struct i915_vma;
 
 typedef u32 gen6_pte_t;
@@ -377,6 +377,11 @@ struct i915_ggtt {
 
 	int mtrr;
 
+	/** LRU list of objects with fence regs on them. */
+	struct list_head fence_list;
+	struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES];
+	int num_fence_regs;
+
 	struct drm_mm_node error_capture;
 };
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a3692c36814b..a4ed9a0cc0b5 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1096,16 +1096,17 @@ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv,
 static void gem_record_fences(struct i915_gpu_state *error)
 {
 	struct drm_i915_private *dev_priv = error->i915;
+	const struct i915_ggtt *ggtt = &error->i915->ggtt;
 	int i;
 
 	if (INTEL_GEN(dev_priv) >= 6) {
-		for (i = 0; i < dev_priv->num_fence_regs; i++)
+		for (i = 0; i < ggtt->num_fence_regs; i++)
 			error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i));
 	} else if (INTEL_GEN(dev_priv) >= 4) {
-		for (i = 0; i < dev_priv->num_fence_regs; i++)
+		for (i = 0; i < ggtt->num_fence_regs; i++)
 			error->fence[i] = I915_READ64(FENCE_REG_965_LO(i));
 	} else {
-		for (i = 0; i < dev_priv->num_fence_regs; i++)
+		for (i = 0; i < ggtt->num_fence_regs; i++)
 			error->fence[i] = I915_READ(FENCE_REG(i));
 	}
 	error->nfence = i;
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 32/37] drm/i915: Convert fences to use a GGTT lock rather than struct_mutex
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (29 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 31/37] drm/i915: Move fence register tracking to GGTT Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 33/37] drm/i915: Tidy i915_gem_suspend() Chris Wilson
                   ` (9 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

Introduce a new mutex to guard all of the vma operations within a vm (as
opposed to the BKL struct_mutex) and start by using it to guard the
fence operations for a GGTT VMA.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  2 +-
 drivers/gpu/drm/i915/i915_gem.c            | 13 ++++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  5 +-
 drivers/gpu/drm/i915/i915_gem_fence_reg.c  | 68 +++++++++++++++++-----
 drivers/gpu/drm/i915/i915_vma.c            | 12 ++--
 drivers/gpu/drm/i915/i915_vma.h            | 23 +++++++-
 6 files changed, 95 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 1511d6db626d..d2c8e9edd0d5 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -80,7 +80,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj)
 
 static char get_global_flag(struct drm_i915_gem_object *obj)
 {
-	return obj->userfault_count ? 'g' : ' ';
+	return READ_ONCE(obj->userfault_count) ? 'g' : ' ';
 }
 
 static char get_pin_mapped_flag(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c3812a917c0b..b9d6d862c830 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2091,12 +2091,14 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 		goto err_fence;
 
 	/* Mark as being mmapped into userspace for later revocation */
+	mutex_lock(&vma->vm->mutex);
 	assert_rpm_wakelock_held(dev_priv);
 	if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
 		list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
 	GEM_BUG_ON(!obj->userfault_count);
 
 	i915_vma_set_ggtt_write(vma);
+	mutex_unlock(&vma->vm->mutex);
 
 err_fence:
 	i915_vma_unpin_fence(vma);
@@ -2186,8 +2188,8 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	 * requirement that operations to the GGTT be made holding the RPM
 	 * wakeref.
 	 */
-	lockdep_assert_held(&i915->drm.struct_mutex);
 	intel_runtime_pm_get(i915);
+	mutex_lock(&i915->ggtt.vm.mutex);
 
 	if (!obj->userfault_count)
 		goto out;
@@ -2204,6 +2206,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	wmb();
 
 out:
+	mutex_unlock(&i915->ggtt.vm.mutex);
 	intel_runtime_pm_put(i915);
 }
 
@@ -2216,10 +2219,12 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
 	/*
 	 * Only called during RPM suspend. All users of the userfault_list
 	 * must be holding an RPM wakeref to ensure that this can not
-	 * run concurrently with themselves (and use the struct_mutex for
+	 * run concurrently with themselves (and use the ggtt->mutex for
 	 * protection between themselves).
 	 */
 
+	mutex_lock(&i915->ggtt.vm.mutex);
+
 	list_for_each_entry_safe(obj, on,
 				 &i915->mm.userfault_list, userfault_link)
 		__i915_gem_object_release_mmap(obj);
@@ -2248,6 +2253,8 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
 		GEM_BUG_ON(i915_vma_has_userfault(reg->vma));
 		reg->dirty = true;
 	}
+
+	mutex_unlock(&i915->ggtt.vm.mutex);
 }
 
 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
@@ -4876,7 +4883,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 		mutex_unlock(&i915->drm.struct_mutex);
 
 		GEM_BUG_ON(obj->bind_count);
-		GEM_BUG_ON(obj->userfault_count);
+		GEM_BUG_ON(READ_ONCE(obj->userfault_count));
 		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
 		GEM_BUG_ON(!list_empty(&obj->lut_list));
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index b09395d44482..77fa86670e9b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -426,8 +426,11 @@ static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
 {
 	GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
 
-	if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
+	if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) {
+		mutex_lock(&vma->vm->mutex);
 		__i915_vma_unpin_fence(vma);
+		mutex_unlock(&vma->vm->mutex);
+	}
 
 	__i915_vma_unpin(vma);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 60fa5a8276cb..9313a8e675c8 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -188,6 +188,8 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
 static void fence_write(struct drm_i915_fence_reg *fence,
 			struct i915_vma *vma)
 {
+	lockdep_assert_held(&fence->ggtt->vm.mutex);
+
 	/* Previous access through the fence register is marshalled by
 	 * the mb() inside the fault handlers (i915_gem_release_mmaps)
 	 * and explicitly managed for internal users.
@@ -213,6 +215,8 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 	struct i915_ggtt *ggtt = fence->ggtt;
 	int ret;
 
+	lockdep_assert_held(&ggtt->vm.mutex);
+
 	if (vma) {
 		if (!i915_vma_is_map_and_fenceable(vma))
 			return -EINVAL;
@@ -289,14 +293,39 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 int i915_vma_put_fence(struct i915_vma *vma)
 {
 	struct drm_i915_fence_reg *fence = vma->fence;
+	int err;
 
 	if (!fence)
 		return 0;
 
-	if (fence->pin_count)
-		return -EBUSY;
+	mutex_lock(&vma->vm->mutex);
+	if (!fence->pin_count)
+		err = fence_update(fence, NULL);
+	else
+		err = -EBUSY;
+	mutex_unlock(&vma->vm->mutex);
 
-	return fence_update(fence, NULL);
+	return err;
+}
+
+void i915_vma_revoke_fence(struct i915_vma *vma)
+{
+	struct drm_i915_fence_reg *fence;
+
+	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
+	lockdep_assert_held(&vma->vm->mutex);
+
+	fence = vma->fence;
+	if (!fence)
+		return;
+
+	GEM_BUG_ON(fence->pin_count);
+
+	list_move(&fence->link, &i915_vm_to_ggtt(vma->vm)->fence_list);
+	vma->fence = NULL;
+
+	fence_write(fence, NULL);
+	fence->vma = NULL;
 }
 
 static struct drm_i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
@@ -337,8 +366,7 @@ static struct drm_i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
  *
  * 0 on success, negative error code on failure.
  */
-int
-i915_vma_pin_fence(struct i915_vma *vma)
+int __i915_vma_pin_fence(struct i915_vma *vma)
 {
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
 	struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
@@ -349,6 +377,7 @@ i915_vma_pin_fence(struct i915_vma *vma)
 	 * must keep the device awake whilst using the fence.
 	 */
 	assert_rpm_wakelock_held(ggtt->vm.i915);
+	lockdep_assert_held(&ggtt->vm.mutex);
 
 	/* Just update our place in the LRU if our fence is getting reused. */
 	if (vma->fence) {
@@ -399,27 +428,34 @@ i915_reserve_fence(struct drm_i915_private *i915)
 	int count;
 	int ret;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
+	mutex_lock(&i915->ggtt.vm.mutex);
 
 	/* Keep at least one fence available for the display engine. */
 	count = 0;
 	list_for_each_entry(fence, &ggtt->fence_list, link)
 		count += !fence->pin_count;
-	if (count <= 1)
-		return ERR_PTR(-ENOSPC);
+	if (count <= 1) {
+		fence = ERR_PTR(-ENOSPC);
+		goto out_unlock;
+	}
 
 	fence = fence_find(ggtt);
 	if (IS_ERR(fence))
-		return fence;
+		goto out_unlock;
 
 	if (fence->vma) {
 		/* Force-remove fence from VMA */
 		ret = fence_update(fence, NULL);
-		if (ret)
-			return ERR_PTR(ret);
+		if (ret) {
+			fence = ERR_PTR(ret);
+			goto out_unlock;
+		}
 	}
 
 	list_del(&fence->link);
+
+out_unlock:
+	mutex_unlock(&i915->ggtt.vm.mutex);
 	return fence;
 }
 
@@ -431,9 +467,9 @@ i915_reserve_fence(struct drm_i915_private *i915)
  */
 void i915_unreserve_fence(struct drm_i915_fence_reg *fence)
 {
-	lockdep_assert_held(&fence->ggtt->vm.i915->drm.struct_mutex);
-
+	mutex_lock(&fence->ggtt->vm.mutex);
 	list_add(&fence->link, &fence->ggtt->fence_list);
+	mutex_unlock(&fence->ggtt->vm.mutex);
 }
 
 /**
@@ -451,8 +487,7 @@ void i915_gem_revoke_fences(struct drm_i915_private *i915)
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	int i;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
+	mutex_lock(&ggtt->vm.mutex);
 	for (i = 0; i < ggtt->num_fence_regs; i++) {
 		struct drm_i915_fence_reg *fence = &ggtt->fence_regs[i];
 
@@ -461,6 +496,7 @@ void i915_gem_revoke_fences(struct drm_i915_private *i915)
 		if (fence->vma)
 			i915_vma_revoke_mmap(fence->vma);
 	}
+	mutex_unlock(&ggtt->vm.mutex);
 }
 
 /**
@@ -476,6 +512,7 @@ void i915_gem_restore_fences(struct drm_i915_private *i915)
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	int i;
 
+	mutex_lock(&ggtt->vm.mutex);
 	for (i = 0; i < ggtt->num_fence_regs; i++) {
 		struct drm_i915_fence_reg *reg = &ggtt->fence_regs[i];
 		struct i915_vma *vma = reg->vma;
@@ -498,6 +535,7 @@ void i915_gem_restore_fences(struct drm_i915_private *i915)
 		fence_write(reg, vma);
 		reg->vma = vma;
 	}
+	mutex_unlock(&ggtt->vm.mutex);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 60a6617ebc55..03f0a20667a9 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -866,7 +866,7 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
 	struct drm_vma_offset_node *node = &vma->obj->base.vma_node;
 	u64 vma_offset;
 
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vma->vm->mutex);
 
 	if (!i915_vma_has_userfault(vma))
 		return;
@@ -1070,6 +1070,8 @@ int i915_vma_unbind(struct i915_vma *vma)
 		return 0;
 
 	if (i915_vma_is_map_and_fenceable(vma)) {
+		mutex_lock(&vma->vm->mutex);
+
 		/*
 		 * Check that we have flushed all writes through the GGTT
 		 * before the unbind, other due to non-strict nature of those
@@ -1079,16 +1081,14 @@ int i915_vma_unbind(struct i915_vma *vma)
 		i915_vma_flush_writes(vma);
 		GEM_BUG_ON(i915_vma_has_ggtt_write(vma));
 
-		/* release the fence reg _after_ flushing */
-		ret = i915_vma_put_fence(vma);
-		if (ret)
-			return ret;
-
 		/* Force a pagefault for domain tracking on next user access */
 		i915_vma_revoke_mmap(vma);
+		i915_vma_revoke_fence(vma);
 
 		__i915_vma_iounmap(vma);
 		vma->flags &= ~I915_VMA_CAN_FENCE;
+
+		mutex_unlock(&vma->vm->mutex);
 	}
 	GEM_BUG_ON(vma->fence);
 	GEM_BUG_ON(i915_vma_has_userfault(vma));
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 1d3080603a18..0af82ff98da7 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -190,6 +190,7 @@ static inline bool i915_vma_set_userfault(struct i915_vma *vma)
 
 static inline void i915_vma_unset_userfault(struct i915_vma *vma)
 {
+	lockdep_assert_held(&vma->vm->mutex);
 	return __clear_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
 }
 
@@ -378,11 +379,26 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma)
  *
  * True if the vma has a fence, false otherwise.
  */
-int i915_vma_pin_fence(struct i915_vma *vma);
+int __i915_vma_pin_fence(struct i915_vma *vma);
+static inline int i915_vma_pin_fence(struct i915_vma *vma)
+{
+	int err;
+
+	mutex_lock(&vma->vm->mutex);
+	err = __i915_vma_pin_fence(vma);
+	mutex_unlock(&vma->vm->mutex);
+
+	return err;
+}
+
 int __must_check i915_vma_put_fence(struct i915_vma *vma);
+void i915_vma_revoke_fence(struct i915_vma *vma);
 
 static inline void __i915_vma_unpin_fence(struct i915_vma *vma)
 {
+	lockdep_assert_held(&vma->vm->mutex);
+	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
+
 	GEM_BUG_ON(vma->fence->pin_count <= 0);
 	vma->fence->pin_count--;
 }
@@ -399,8 +415,11 @@ static inline void
 i915_vma_unpin_fence(struct i915_vma *vma)
 {
 	/* lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); */
-	if (vma->fence)
+	if (vma->fence) {
+		mutex_lock(&vma->vm->mutex);
 		__i915_vma_unpin_fence(vma);
+		mutex_unlock(&vma->vm->mutex);
+	}
 }
 
 void i915_vma_parked(struct drm_i915_private *i915);
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 33/37] drm/i915: Tidy i915_gem_suspend()
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (30 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 32/37] drm/i915: Convert fences to use a GGTT lock rather than struct_mutex Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 34/37] drm/i915: Move fence-reg interface to i915_gem_fence_reg.h Chris Wilson
                   ` (8 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will make a fairly minor change to flush
outstanding resets before suspend. In order to keep churn to a minimum
in that functional patch, we fix up the comments and coding style now.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 50 +++++++++++++++++----------------
 1 file changed, 26 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b9d6d862c830..fbc2bb5611fe 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5052,65 +5052,67 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	mutex_unlock(&i915->drm.struct_mutex);
 }
 
-int i915_gem_suspend(struct drm_i915_private *dev_priv)
+int i915_gem_suspend(struct drm_i915_private *i915)
 {
-	struct drm_device *dev = &dev_priv->drm;
 	int ret;
 
 	GEM_TRACE("\n");
 
-	intel_runtime_pm_get(dev_priv);
-	intel_suspend_gt_powersave(dev_priv);
+	intel_runtime_pm_get(i915);
+	intel_suspend_gt_powersave(i915);
 
-	mutex_lock(&dev->struct_mutex);
+	mutex_lock(&i915->drm.struct_mutex);
 
-	/* We have to flush all the executing contexts to main memory so
+	/*
+	 * We have to flush all the executing contexts to main memory so
 	 * that they can saved in the hibernation image. To ensure the last
 	 * context image is coherent, we have to switch away from it. That
-	 * leaves the dev_priv->kernel_context still active when
+	 * leaves the i915->kernel_context still active when
 	 * we actually suspend, and its image in memory may not match the GPU
 	 * state. Fortunately, the kernel_context is disposable and we do
 	 * not rely on its state.
 	 */
-	if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
-		ret = i915_gem_switch_to_kernel_context(dev_priv);
+	if (!i915_terminally_wedged(&i915->gpu_error)) {
+		ret = i915_gem_switch_to_kernel_context(i915);
 		if (ret)
 			goto err_unlock;
 
-		ret = i915_gem_wait_for_idle(dev_priv,
+		ret = i915_gem_wait_for_idle(i915,
 					     I915_WAIT_INTERRUPTIBLE |
 					     I915_WAIT_LOCKED |
 					     I915_WAIT_FOR_IDLE_BOOST);
 		if (ret && ret != -EIO)
 			goto err_unlock;
 
-		assert_kernel_context_is_current(dev_priv);
+		assert_kernel_context_is_current(i915);
 	}
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&i915->drm.struct_mutex);
 
-	intel_uc_suspend(dev_priv);
+	intel_uc_suspend(i915);
 
-	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
-	cancel_delayed_work_sync(&dev_priv->gt.retire_work);
+	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
+	cancel_delayed_work_sync(&i915->gt.retire_work);
 
-	/* As the idle_work is rearming if it detects a race, play safe and
+	/*
+	 * As the idle_work is rearming if it detects a race, play safe and
 	 * repeat the flush until it is definitely idle.
 	 */
-	drain_delayed_work(&dev_priv->gt.idle_work);
+	drain_delayed_work(&i915->gt.idle_work);
 
-	/* Assert that we sucessfully flushed all the work and
+	/*
+	 * Assert that we successfully flushed all the work and
 	 * reset the GPU back to its idle, low power state.
 	 */
-	WARN_ON(dev_priv->gt.awake);
-	if (WARN_ON(!intel_engines_are_idle(dev_priv)))
-		i915_gem_set_wedged(dev_priv); /* no hope, discard everything */
+	WARN_ON(i915->gt.awake);
+	if (WARN_ON(!intel_engines_are_idle(i915)))
+		i915_gem_set_wedged(i915); /* no hope, discard everything */
 
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(i915);
 	return 0;
 
 err_unlock:
-	mutex_unlock(&dev->struct_mutex);
-	intel_runtime_pm_put(dev_priv);
+	mutex_unlock(&i915->drm.struct_mutex);
+	intel_runtime_pm_put(i915);
 	return ret;
 }
 
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 34/37] drm/i915: Move fence-reg interface to i915_gem_fence_reg.h
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (31 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 33/37] drm/i915: Tidy i915_gem_suspend() Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 35/37] drm/i915: Dynamically allocate the array of drm_i915_gem_fence_reg Chris Wilson
                   ` (7 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

Since we have a header file for i915_gem_fence_reg, let's use it for the
interface prototypes currently hidden away in the huge i915_drv.h

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h           | 15 ---------------
 drivers/gpu/drm/i915/i915_gem_fence_reg.h | 16 ++++++++++++++++
 drivers/gpu/drm/i915/i915_vma.h           |  2 +-
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 804a77d6f346..4e9d8e529c52 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -70,7 +70,6 @@
 
 #include "i915_gem.h"
 #include "i915_gem_context.h"
-#include "i915_gem_fence_reg.h"
 #include "i915_gem_object.h"
 #include "i915_gem_gtt.h"
 #include "i915_gpu_error.h"
@@ -3210,20 +3209,6 @@ i915_vm_to_ppgtt(struct i915_address_space *vm)
 	return container_of(vm, struct i915_hw_ppgtt, vm);
 }
 
-/* i915_gem_fence_reg.c */
-struct drm_i915_fence_reg *
-i915_reserve_fence(struct drm_i915_private *dev_priv);
-void i915_unreserve_fence(struct drm_i915_fence_reg *fence);
-
-void i915_gem_revoke_fences(struct drm_i915_private *dev_priv);
-void i915_gem_restore_fences(struct drm_i915_private *dev_priv);
-
-void i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv);
-void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
-				       struct sg_table *pages);
-void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
-					 struct sg_table *pages);
-
 static inline struct i915_gem_context *
 __i915_gem_context_lookup_rcu(struct drm_i915_file_private *file_priv, u32 id)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
index c8f1d0cdfa90..c510f8efc1bb 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
@@ -27,7 +27,10 @@
 
 #include <linux/list.h>
 
+struct sg_table;
+
 struct drm_i915_private;
+struct drm_i915_gem_object;
 struct i915_ggtt;
 struct i915_vma;
 
@@ -55,4 +58,17 @@ struct drm_i915_fence_reg {
 
 void i915_ggtt_init_fences(struct i915_ggtt *ggtt);
 
+struct drm_i915_fence_reg *
+i915_reserve_fence(struct drm_i915_private *i915);
+void i915_unreserve_fence(struct drm_i915_fence_reg *fence);
+
+void i915_gem_revoke_fences(struct drm_i915_private *i915);
+void i915_gem_restore_fences(struct drm_i915_private *i915);
+
+void i915_gem_detect_bit_6_swizzle(struct drm_i915_private *i915);
+void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
+				       struct sg_table *pages);
+void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
+					 struct sg_table *pages);
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 0af82ff98da7..c84fe8ac4ff6 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -31,12 +31,12 @@
 #include <drm/drm_mm.h>
 
 #include "i915_gem_gtt.h"
-#include "i915_gem_fence_reg.h"
 #include "i915_gem_object.h"
 
 #include "i915_request.h"
 
 enum i915_cache_level;
+struct drm_i915_fence_reg;
 
 /**
  * A VMA represents a GEM BO that is bound into an address space. Therefore, a
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 35/37] drm/i915: Dynamically allocate the array of drm_i915_gem_fence_reg
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (32 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 34/37] drm/i915: Move fence-reg interface to i915_gem_fence_reg.h Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 36/37] drm/i915: Pull all the reset functionality together into i915_reset.c Chris Wilson
                   ` (6 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

If we dynamically allocate the correct sized array for the fence
registers, we can avoid the 4x overallocation on older, typically
smaller devices and avoid having to know the static layout in advance.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c           | 33 ------------
 drivers/gpu/drm/i915/i915_gem_fence_reg.h |  2 -
 drivers/gpu/drm/i915/i915_gem_gtt.c       | 64 +++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_gem_gtt.h       |  3 +-
 drivers/gpu/drm/i915/i915_vma.h           |  1 +
 5 files changed, 62 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fbc2bb5611fe..8913443fc0f4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5646,39 +5646,6 @@ i915_gem_cleanup_engines(struct drm_i915_private *dev_priv)
 		dev_priv->gt.cleanup_engine(engine);
 }
 
-void i915_ggtt_init_fences(struct i915_ggtt *ggtt)
-{
-	struct drm_i915_private *dev_priv = ggtt->vm.i915;
-	int i;
-
-	if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
-	    !IS_CHERRYVIEW(dev_priv))
-		ggtt->num_fence_regs = 32;
-	else if (INTEL_GEN(dev_priv) >= 4 ||
-		 IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
-		 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
-		ggtt->num_fence_regs = 16;
-	else
-		ggtt->num_fence_regs = 8;
-
-	if (intel_vgpu_active(dev_priv))
-		ggtt->num_fence_regs = I915_READ(vgtif_reg(avail_rs.fence_num));
-
-	INIT_LIST_HEAD(&ggtt->fence_list);
-
-	/* Initialize fence registers to zero */
-	for (i = 0; i < ggtt->num_fence_regs; i++) {
-		struct drm_i915_fence_reg *fence = &ggtt->fence_regs[i];
-
-		fence->ggtt = ggtt;
-		fence->id = i;
-		list_add_tail(&fence->link, &ggtt->fence_list);
-	}
-	i915_gem_restore_fences(dev_priv);
-
-	i915_gem_detect_bit_6_swizzle(dev_priv);
-}
-
 static void i915_gem_init__mm(struct drm_i915_private *i915)
 {
 	spin_lock_init(&i915->mm.object_stat_lock);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
index c510f8efc1bb..6e66f6b3f851 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
@@ -56,8 +56,6 @@ struct drm_i915_fence_reg {
 	bool dirty;
 };
 
-void i915_ggtt_init_fences(struct i915_ggtt *ggtt);
-
 struct drm_i915_fence_reg *
 i915_reserve_fence(struct drm_i915_private *i915);
 void i915_unreserve_fence(struct drm_i915_fence_reg *fence);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c4b8f9c3a8cc..32ff15581f97 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -37,6 +37,7 @@
 #include <drm/i915_drm.h>
 
 #include "i915_drv.h"
+#include "i915_gem_fence_reg.h"
 #include "i915_vgpu.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
@@ -2871,6 +2872,51 @@ void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915)
 	ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
 }
 
+static int i915_ggtt_init_fences(struct i915_ggtt *ggtt)
+{
+	struct drm_i915_private *dev_priv = ggtt->vm.i915;
+	int i;
+
+	if (INTEL_GEN(dev_priv) >= 7 &&
+	    !(IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)))
+		ggtt->num_fence_regs = 32;
+	else if (INTEL_GEN(dev_priv) >= 4 ||
+		 IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
+		 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
+		ggtt->num_fence_regs = 16;
+	else
+		ggtt->num_fence_regs = 8;
+
+	if (intel_vgpu_active(dev_priv))
+		ggtt->num_fence_regs = I915_READ(vgtif_reg(avail_rs.fence_num));
+
+	ggtt->fence_regs = kcalloc(ggtt->num_fence_regs,
+				   sizeof(*ggtt->fence_regs),
+				   GFP_KERNEL);
+	if (!ggtt->fence_regs)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&ggtt->fence_list);
+
+	/* Initialize fence registers to zero */
+	for (i = 0; i < ggtt->num_fence_regs; i++) {
+		struct drm_i915_fence_reg *fence = &ggtt->fence_regs[i];
+
+		fence->ggtt = ggtt;
+		fence->id = i;
+		list_add_tail(&fence->link, &ggtt->fence_list);
+	}
+	i915_gem_restore_fences(dev_priv);
+
+	i915_gem_detect_bit_6_swizzle(dev_priv);
+	return 0;
+}
+
+static void i915_ggtt_cleanup_fences(struct i915_ggtt *ggtt)
+{
+	kfree(ggtt->fence_regs);
+}
+
 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 {
 	/* Let GEM Manage all of the aperture.
@@ -2959,6 +3005,8 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
 
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
+	i915_ggtt_cleanup_fences(ggtt);
+
 	arch_phys_wc_del(ggtt->mtrr);
 	io_mapping_fini(&ggtt->iomap);
 
@@ -3568,13 +3616,15 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
 		ggtt->vm.mm.color_adjust = i915_gtt_color_adjust;
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	i915_ggtt_init_fences(ggtt);
+	ret = i915_ggtt_init_fences(ggtt);
+	if (ret)
+		goto err_fini;
 
 	if (!io_mapping_init_wc(&ggtt->iomap,
 				ggtt->gmadr.start,
 				ggtt->mappable_end)) {
 		ret = -EIO;
-		goto out_gtt_cleanup;
+		goto err_fences;
 	}
 
 	ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end);
@@ -3585,12 +3635,18 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
 	 */
 	ret = i915_gem_init_stolen(dev_priv);
 	if (ret)
-		goto out_gtt_cleanup;
+		goto err_io;
 
 	return 0;
 
-out_gtt_cleanup:
+err_io:
+	arch_phys_wc_del(ggtt->mtrr);
+	io_mapping_fini(&ggtt->iomap);
+err_fences:
+	i915_ggtt_cleanup_fences(ggtt);
+err_fini:
 	ggtt->vm.cleanup(&ggtt->vm);
+	i915_address_space_fini(&ggtt->vm);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index aae0e62141fa..89db3a05e9b1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -38,7 +38,6 @@
 #include <linux/mm.h>
 #include <linux/pagevec.h>
 
-#include "i915_gem_fence_reg.h"
 #include "i915_request.h"
 #include "i915_selftest.h"
 #include "i915_timeline.h"
@@ -379,7 +378,7 @@ struct i915_ggtt {
 
 	/** LRU list of objects with fence regs on them. */
 	struct list_head fence_list;
-	struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES];
+	struct drm_i915_fence_reg *fence_regs;
 	int num_fence_regs;
 
 	struct drm_mm_node error_capture;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index c84fe8ac4ff6..8a23474c0f44 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -30,6 +30,7 @@
 
 #include <drm/drm_mm.h>
 
+#include "i915_gem_fence_reg.h"
 #include "i915_gem_gtt.h"
 #include "i915_gem_object.h"
 
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 36/37] drm/i915: Pull all the reset functionality together into i915_reset.c
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (33 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 35/37] drm/i915: Dynamically allocate the array of drm_i915_gem_fence_reg Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  7:53 ` [PATCH 37/37] drm/i915: Remove GPU reset dependence on struct_mutex Chris Wilson
                   ` (5 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

Currently the code to reset the GPU and our state is spread widely
across a few files. Pull the logic together into a common file.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |    3 +-
 drivers/gpu/drm/i915/i915_debugfs.c           |    2 +
 drivers/gpu/drm/i915/i915_drv.c               |  207 +--
 drivers/gpu/drm/i915/i915_drv.h               |   31 +-
 drivers/gpu/drm/i915/i915_gem.c               |  465 +-----
 drivers/gpu/drm/i915/i915_irq.c               |  220 ---
 drivers/gpu/drm/i915/i915_request.c           |    1 +
 drivers/gpu/drm/i915/i915_reset.c             | 1272 +++++++++++++++++
 drivers/gpu/drm/i915/i915_reset.h             |   37 +
 drivers/gpu/drm/i915/intel_display.c          |   15 +-
 drivers/gpu/drm/i915/intel_guc.h              |    3 +
 drivers/gpu/drm/i915/intel_hangcheck.c        |    1 +
 drivers/gpu/drm/i915/intel_uc.c               |    1 +
 drivers/gpu/drm/i915/intel_uncore.c           |  415 ------
 .../drm/i915/selftests/intel_workarounds.c    |    1 +
 15 files changed, 1343 insertions(+), 1331 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_reset.c
 create mode 100644 drivers/gpu/drm/i915/i915_reset.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 9bba43da4215..012fd4dd469a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -40,7 +40,8 @@ i915-y := i915_drv.o \
 	  i915_mm.o \
 	  i915_params.o \
 	  i915_pci.o \
-          i915_suspend.o \
+	  i915_reset.o \
+	  i915_suspend.o \
 	  i915_syncmap.o \
 	  i915_sw_fence.o \
 	  i915_sysfs.o \
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d2c8e9edd0d5..f93ab55d61b3 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -32,6 +32,8 @@
 #include "intel_drv.h"
 #include "intel_guc_submission.h"
 
+#include "i915_reset.h"
+
 static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node)
 {
 	return to_i915(node->minor->dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5c030c96b6ce..8c127c34d7e7 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -49,6 +49,7 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "i915_pmu.h"
+#include "i915_reset.h"
 #include "i915_query.h"
 #include "i915_vgpu.h"
 #include "intel_drv.h"
@@ -1869,212 +1870,6 @@ static int i915_resume_switcheroo(struct drm_device *dev)
 	return i915_drm_resume(dev);
 }
 
-/**
- * i915_reset - reset chip after a hang
- * @i915: #drm_i915_private to reset
- * @stalled_mask: mask of the stalled engines with the guilty requests
- * @reason: user error message for why we are resetting
- *
- * Reset the chip.  Useful if a hang is detected. Marks the device as wedged
- * on failure.
- *
- * Caller must hold the struct_mutex.
- *
- * Procedure is fairly simple:
- *   - reset the chip using the reset reg
- *   - re-init context state
- *   - re-init hardware status page
- *   - re-init ring buffer
- *   - re-init interrupt state
- *   - re-init display
- */
-void i915_reset(struct drm_i915_private *i915,
-		unsigned int stalled_mask,
-		const char *reason)
-{
-	struct i915_gpu_error *error = &i915->gpu_error;
-	int ret;
-	int i;
-
-	GEM_TRACE("flags=%lx\n", error->flags);
-
-	might_sleep();
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
-
-	if (!test_bit(I915_RESET_HANDOFF, &error->flags))
-		return;
-
-	/* Clear any previous failed attempts at recovery. Time to try again. */
-	if (!i915_gem_unset_wedged(i915))
-		goto wakeup;
-
-	if (reason)
-		dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
-	error->reset_count++;
-
-	disable_irq(i915->drm.irq);
-	ret = i915_gem_reset_prepare(i915);
-	if (ret) {
-		dev_err(i915->drm.dev, "GPU recovery failed\n");
-		goto taint;
-	}
-
-	if (!intel_has_gpu_reset(i915)) {
-		if (i915_modparams.reset)
-			dev_err(i915->drm.dev, "GPU reset not supported\n");
-		else
-			DRM_DEBUG_DRIVER("GPU reset disabled\n");
-		goto error;
-	}
-
-	for (i = 0; i < 3; i++) {
-		ret = intel_gpu_reset(i915, ALL_ENGINES);
-		if (ret == 0)
-			break;
-
-		msleep(100);
-	}
-	if (ret) {
-		dev_err(i915->drm.dev, "Failed to reset chip\n");
-		goto taint;
-	}
-
-	/* Ok, now get things going again... */
-
-	/*
-	 * Everything depends on having the GTT running, so we need to start
-	 * there.
-	 */
-	ret = i915_ggtt_enable_hw(i915);
-	if (ret) {
-		DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n",
-			  ret);
-		goto error;
-	}
-
-	i915_gem_reset(i915, stalled_mask);
-	intel_overlay_reset(i915);
-
-	/*
-	 * Next we need to restore the context, but we don't use those
-	 * yet either...
-	 *
-	 * Ring buffer needs to be re-initialized in the KMS case, or if X
-	 * was running at the time of the reset (i.e. we weren't VT
-	 * switched away).
-	 */
-	ret = i915_gem_init_hw(i915);
-	if (ret) {
-		DRM_ERROR("Failed to initialise HW following reset (%d)\n",
-			  ret);
-		goto error;
-	}
-
-	i915_queue_hangcheck(i915);
-
-finish:
-	i915_gem_reset_finish(i915);
-	enable_irq(i915->drm.irq);
-
-wakeup:
-	clear_bit(I915_RESET_HANDOFF, &error->flags);
-	wake_up_bit(&error->flags, I915_RESET_HANDOFF);
-	return;
-
-taint:
-	/*
-	 * History tells us that if we cannot reset the GPU now, we
-	 * never will. This then impacts everything that is run
-	 * subsequently. On failing the reset, we mark the driver
-	 * as wedged, preventing further execution on the GPU.
-	 * We also want to go one step further and add a taint to the
-	 * kernel so that any subsequent faults can be traced back to
-	 * this failure. This is important for CI, where if the
-	 * GPU/driver fails we would like to reboot and restart testing
-	 * rather than continue on into oblivion. For everyone else,
-	 * the system should still plod along, but they have been warned!
-	 */
-	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-error:
-	i915_gem_set_wedged(i915);
-	i915_retire_requests(i915);
-	goto finish;
-}
-
-static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv,
-					struct intel_engine_cs *engine)
-{
-	return intel_gpu_reset(dev_priv, intel_engine_flag(engine));
-}
-
-/**
- * i915_reset_engine - reset GPU engine to recover from a hang
- * @engine: engine to reset
- * @msg: reason for GPU reset; or NULL for no dev_notice()
- *
- * Reset a specific GPU engine. Useful if a hang is detected.
- * Returns zero on successful reset or otherwise an error code.
- *
- * Procedure is:
- *  - identifies the request that caused the hang and it is dropped
- *  - reset engine (which will force the engine to idle)
- *  - re-init/configure engine
- */
-int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
-{
-	struct i915_gpu_error *error = &engine->i915->gpu_error;
-	struct i915_request *active_request;
-	int ret;
-
-	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
-	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
-
-	active_request = i915_gem_reset_prepare_engine(engine);
-	if (IS_ERR_OR_NULL(active_request)) {
-		/* Either the previous reset failed, or we pardon the reset. */
-		ret = PTR_ERR(active_request);
-		goto out;
-	}
-
-	if (msg)
-		dev_notice(engine->i915->drm.dev,
-			   "Resetting %s for %s\n", engine->name, msg);
-	error->reset_engine_count[engine->id]++;
-
-	if (!engine->i915->guc.execbuf_client)
-		ret = intel_gt_reset_engine(engine->i915, engine);
-	else
-		ret = intel_guc_reset_engine(&engine->i915->guc, engine);
-	if (ret) {
-		/* If we fail here, we expect to fallback to a global reset */
-		DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
-				 engine->i915->guc.execbuf_client ? "GuC " : "",
-				 engine->name, ret);
-		goto out;
-	}
-
-	/*
-	 * The request that caused the hang is stuck on elsp, we know the
-	 * active request and can drop it, adjust head to skip the offending
-	 * request to resume executing remaining requests in the queue.
-	 */
-	i915_gem_reset_engine(engine, active_request, true);
-
-	/*
-	 * The engine and its registers (and workarounds in case of render)
-	 * have been reset to their default values. Follow the init_ring
-	 * process to program RING_MODE, HWSP and re-enable submission.
-	 */
-	ret = engine->init_hw(engine);
-	if (ret)
-		goto out;
-
-out:
-	i915_gem_reset_finish_engine(engine);
-	return ret;
-}
-
 static int i915_pm_prepare(struct device *kdev)
 {
 	struct pci_dev *pdev = to_pci_dev(kdev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4e9d8e529c52..49903c7966fa 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2717,19 +2717,7 @@ extern const struct dev_pm_ops i915_pm_ops;
 extern int i915_driver_load(struct pci_dev *pdev,
 			    const struct pci_device_id *ent);
 extern void i915_driver_unload(struct drm_device *dev);
-extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
-extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
-
-extern void i915_reset(struct drm_i915_private *i915,
-		       unsigned int stalled_mask,
-		       const char *reason);
-extern int i915_reset_engine(struct intel_engine_cs *engine,
-			     const char *reason);
-
-extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv);
-extern int intel_reset_guc(struct drm_i915_private *dev_priv);
-extern int intel_guc_reset_engine(struct intel_guc *guc,
-				  struct intel_engine_cs *engine);
+
 extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
 extern void intel_hangcheck_init(struct drm_i915_private *dev_priv);
 extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv);
@@ -2774,13 +2762,6 @@ static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv)
 			   &dev_priv->gpu_error.hangcheck_work, delay);
 }
 
-__printf(4, 5)
-void i915_handle_error(struct drm_i915_private *dev_priv,
-		       u32 engine_mask,
-		       unsigned long flags,
-		       const char *fmt, ...);
-#define I915_ERROR_CAPTURE BIT(0)
-
 extern void intel_irq_init(struct drm_i915_private *dev_priv);
 extern void intel_irq_fini(struct drm_i915_private *dev_priv);
 int intel_irq_install(struct drm_i915_private *dev_priv);
@@ -3143,18 +3124,8 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
 	return READ_ONCE(error->reset_engine_count[engine->id]);
 }
 
-struct i915_request *
-i915_gem_reset_prepare_engine(struct intel_engine_cs *engine);
-int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
-void i915_gem_reset(struct drm_i915_private *dev_priv,
-		    unsigned int stalled_mask);
-void i915_gem_reset_finish_engine(struct intel_engine_cs *engine);
-void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
 bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
-void i915_gem_reset_engine(struct intel_engine_cs *engine,
-			   struct i915_request *request,
-			   bool stalled);
 
 void i915_gem_init_mmio(struct drm_i915_private *i915);
 int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8913443fc0f4..8821342dfbaf 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -28,15 +28,6 @@
 #include <drm/drmP.h>
 #include <drm/drm_vma_manager.h>
 #include <drm/i915_drm.h>
-#include "i915_drv.h"
-#include "i915_gem_clflush.h"
-#include "i915_vgpu.h"
-#include "i915_trace.h"
-#include "intel_drv.h"
-#include "intel_frontbuffer.h"
-#include "intel_mocs.h"
-#include "intel_workarounds.h"
-#include "i915_gemfs.h"
 #include <linux/dma-fence-array.h>
 #include <linux/kthread.h>
 #include <linux/reservation.h>
@@ -47,6 +38,18 @@
 #include <linux/pci.h>
 #include <linux/dma-buf.h>
 
+#include "i915_drv.h"
+#include "i915_gem_clflush.h"
+#include "i915_gemfs.h"
+#include "i915_reset.h"
+#include "i915_trace.h"
+#include "i915_vgpu.h"
+
+#include "intel_drv.h"
+#include "intel_frontbuffer.h"
+#include "intel_mocs.h"
+#include "intel_workarounds.h"
+
 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
 
 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
@@ -2953,61 +2956,6 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
 	return 0;
 }
 
-static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv,
-					const struct i915_gem_context *ctx)
-{
-	unsigned int score;
-	unsigned long prev_hang;
-
-	if (i915_gem_context_is_banned(ctx))
-		score = I915_CLIENT_SCORE_CONTEXT_BAN;
-	else
-		score = 0;
-
-	prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
-	if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
-		score += I915_CLIENT_SCORE_HANG_FAST;
-
-	if (score) {
-		atomic_add(score, &file_priv->ban_score);
-
-		DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
-				 ctx->name, score,
-				 atomic_read(&file_priv->ban_score));
-	}
-}
-
-static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
-{
-	unsigned int score;
-	bool banned, bannable;
-
-	atomic_inc(&ctx->guilty_count);
-
-	bannable = i915_gem_context_is_bannable(ctx);
-	score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
-	banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
-
-	/* Cool contexts don't accumulate client ban score */
-	if (!bannable)
-		return;
-
-	if (banned) {
-		DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n",
-				 ctx->name, atomic_read(&ctx->guilty_count),
-				 score);
-		i915_gem_context_set_banned(ctx);
-	}
-
-	if (!IS_ERR_OR_NULL(ctx->file_priv))
-		i915_gem_client_mark_guilty(ctx->file_priv, ctx);
-}
-
-static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
-{
-	atomic_inc(&ctx->active_count);
-}
-
 struct i915_request *
 i915_gem_find_active_request(struct intel_engine_cs *engine)
 {
@@ -3038,395 +2986,6 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 	return active;
 }
 
-/*
- * Ensure irq handler finishes, and not run again.
- * Also return the active request so that we only search for it once.
- */
-struct i915_request *
-i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
-{
-	struct i915_request *request;
-
-	/*
-	 * During the reset sequence, we must prevent the engine from
-	 * entering RC6. As the context state is undefined until we restart
-	 * the engine, if it does enter RC6 during the reset, the state
-	 * written to the powercontext is undefined and so we may lose
-	 * GPU state upon resume, i.e. fail to restart after a reset.
-	 */
-	intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
-
-	request = engine->reset.prepare(engine);
-	if (request && request->fence.error == -EIO)
-		request = ERR_PTR(-EIO); /* Previous reset failed! */
-
-	return request;
-}
-
-int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	struct i915_request *request;
-	enum intel_engine_id id;
-	int err = 0;
-
-	for_each_engine(engine, dev_priv, id) {
-		request = i915_gem_reset_prepare_engine(engine);
-		if (IS_ERR(request)) {
-			err = PTR_ERR(request);
-			continue;
-		}
-
-		engine->hangcheck.active_request = request;
-	}
-
-	i915_gem_revoke_fences(dev_priv);
-	intel_uc_sanitize(dev_priv);
-
-	return err;
-}
-
-static void engine_skip_context(struct i915_request *request)
-{
-	struct intel_engine_cs *engine = request->engine;
-	struct i915_gem_context *hung_ctx = request->gem_context;
-	struct i915_timeline *timeline = request->timeline;
-	unsigned long flags;
-
-	GEM_BUG_ON(timeline == &engine->timeline);
-
-	spin_lock_irqsave(&engine->timeline.lock, flags);
-	spin_lock(&timeline->lock);
-
-	list_for_each_entry_continue(request, &engine->timeline.requests, link)
-		if (request->gem_context == hung_ctx)
-			i915_request_skip(request, -EIO);
-
-	list_for_each_entry(request, &timeline->requests, link)
-		i915_request_skip(request, -EIO);
-
-	spin_unlock(&timeline->lock);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
-}
-
-/* Returns the request if it was guilty of the hang */
-static struct i915_request *
-i915_gem_reset_request(struct intel_engine_cs *engine,
-		       struct i915_request *request,
-		       bool stalled)
-{
-	/* The guilty request will get skipped on a hung engine.
-	 *
-	 * Users of client default contexts do not rely on logical
-	 * state preserved between batches so it is safe to execute
-	 * queued requests following the hang. Non default contexts
-	 * rely on preserved state, so skipping a batch loses the
-	 * evolution of the state and it needs to be considered corrupted.
-	 * Executing more queued batches on top of corrupted state is
-	 * risky. But we take the risk by trying to advance through
-	 * the queued requests in order to make the client behaviour
-	 * more predictable around resets, by not throwing away random
-	 * amount of batches it has prepared for execution. Sophisticated
-	 * clients can use gem_reset_stats_ioctl and dma fence status
-	 * (exported via sync_file info ioctl on explicit fences) to observe
-	 * when it loses the context state and should rebuild accordingly.
-	 *
-	 * The context ban, and ultimately the client ban, mechanism are safety
-	 * valves if client submission ends up resulting in nothing more than
-	 * subsequent hangs.
-	 */
-
-	if (i915_request_completed(request)) {
-		GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n",
-			  engine->name, request->global_seqno,
-			  request->fence.context, request->fence.seqno,
-			  intel_engine_get_seqno(engine));
-		stalled = false;
-	}
-
-	if (stalled) {
-		i915_gem_context_mark_guilty(request->gem_context);
-		i915_request_skip(request, -EIO);
-
-		/* If this context is now banned, skip all pending requests. */
-		if (i915_gem_context_is_banned(request->gem_context))
-			engine_skip_context(request);
-	} else {
-		/*
-		 * Since this is not the hung engine, it may have advanced
-		 * since the hang declaration. Double check by refinding
-		 * the active request at the time of the reset.
-		 */
-		request = i915_gem_find_active_request(engine);
-		if (request) {
-			unsigned long flags;
-
-			i915_gem_context_mark_innocent(request->gem_context);
-			dma_fence_set_error(&request->fence, -EAGAIN);
-
-			/* Rewind the engine to replay the incomplete rq */
-			spin_lock_irqsave(&engine->timeline.lock, flags);
-			request = list_prev_entry(request, link);
-			if (&request->link == &engine->timeline.requests)
-				request = NULL;
-			spin_unlock_irqrestore(&engine->timeline.lock, flags);
-		}
-	}
-
-	return request;
-}
-
-void i915_gem_reset_engine(struct intel_engine_cs *engine,
-			   struct i915_request *request,
-			   bool stalled)
-{
-	/*
-	 * Make sure this write is visible before we re-enable the interrupt
-	 * handlers on another CPU, as tasklet_enable() resolves to just
-	 * a compiler barrier which is insufficient for our purpose here.
-	 */
-	smp_store_mb(engine->irq_posted, 0);
-
-	if (request)
-		request = i915_gem_reset_request(engine, request, stalled);
-
-	/* Setup the CS to resume from the breadcrumb of the hung request */
-	engine->reset.reset(engine, request);
-}
-
-void i915_gem_reset(struct drm_i915_private *dev_priv,
-		    unsigned int stalled_mask)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	i915_retire_requests(dev_priv);
-
-	for_each_engine(engine, dev_priv, id) {
-		struct intel_context *ce;
-
-		i915_gem_reset_engine(engine,
-				      engine->hangcheck.active_request,
-				      stalled_mask & ENGINE_MASK(id));
-		ce = fetch_and_zero(&engine->last_retired_context);
-		if (ce)
-			intel_context_unpin(ce);
-
-		/*
-		 * Ostensibily, we always want a context loaded for powersaving,
-		 * so if the engine is idle after the reset, send a request
-		 * to load our scratch kernel_context.
-		 *
-		 * More mysteriously, if we leave the engine idle after a reset,
-		 * the next userspace batch may hang, with what appears to be
-		 * an incoherent read by the CS (presumably stale TLB). An
-		 * empty request appears sufficient to paper over the glitch.
-		 */
-		if (intel_engine_is_idle(engine)) {
-			struct i915_request *rq;
-
-			rq = i915_request_alloc(engine,
-						dev_priv->kernel_context);
-			if (!IS_ERR(rq))
-				i915_request_add(rq);
-		}
-	}
-
-	i915_gem_restore_fences(dev_priv);
-}
-
-void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
-{
-	engine->reset.finish(engine);
-
-	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
-}
-
-void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	for_each_engine(engine, dev_priv, id) {
-		engine->hangcheck.active_request = NULL;
-		i915_gem_reset_finish_engine(engine);
-	}
-}
-
-static void nop_submit_request(struct i915_request *request)
-{
-	GEM_TRACE("%s fence %llx:%d -> -EIO\n",
-		  request->engine->name,
-		  request->fence.context, request->fence.seqno);
-	dma_fence_set_error(&request->fence, -EIO);
-
-	i915_request_submit(request);
-}
-
-static void nop_complete_submit_request(struct i915_request *request)
-{
-	unsigned long flags;
-
-	GEM_TRACE("%s fence %llx:%d -> -EIO\n",
-		  request->engine->name,
-		  request->fence.context, request->fence.seqno);
-	dma_fence_set_error(&request->fence, -EIO);
-
-	spin_lock_irqsave(&request->engine->timeline.lock, flags);
-	__i915_request_submit(request);
-	intel_engine_init_global_seqno(request->engine, request->global_seqno);
-	spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
-}
-
-void i915_gem_set_wedged(struct drm_i915_private *i915)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	GEM_TRACE("start\n");
-
-	if (GEM_SHOW_DEBUG()) {
-		struct drm_printer p = drm_debug_printer(__func__);
-
-		for_each_engine(engine, i915, id)
-			intel_engine_dump(engine, &p, "%s\n", engine->name);
-	}
-
-	set_bit(I915_WEDGED, &i915->gpu_error.flags);
-	smp_mb__after_atomic();
-
-	/*
-	 * First, stop submission to hw, but do not yet complete requests by
-	 * rolling the global seqno forward (since this would complete requests
-	 * for which we haven't set the fence error to EIO yet).
-	 */
-	for_each_engine(engine, i915, id) {
-		i915_gem_reset_prepare_engine(engine);
-
-		engine->submit_request = nop_submit_request;
-		engine->schedule = NULL;
-	}
-	i915->caps.scheduler = 0;
-
-	/* Even if the GPU reset fails, it should still stop the engines */
-	intel_gpu_reset(i915, ALL_ENGINES);
-
-	/*
-	 * Make sure no one is running the old callback before we proceed with
-	 * cancelling requests and resetting the completion tracking. Otherwise
-	 * we might submit a request to the hardware which never completes.
-	 */
-	synchronize_rcu();
-
-	for_each_engine(engine, i915, id) {
-		/* Mark all executing requests as skipped */
-		engine->cancel_requests(engine);
-
-		/*
-		 * Only once we've force-cancelled all in-flight requests can we
-		 * start to complete all requests.
-		 */
-		engine->submit_request = nop_complete_submit_request;
-	}
-
-	/*
-	 * Make sure no request can slip through without getting completed by
-	 * either this call here to intel_engine_init_global_seqno, or the one
-	 * in nop_complete_submit_request.
-	 */
-	synchronize_rcu();
-
-	for_each_engine(engine, i915, id) {
-		unsigned long flags;
-
-		/*
-		 * Mark all pending requests as complete so that any concurrent
-		 * (lockless) lookup doesn't try and wait upon the request as we
-		 * reset it.
-		 */
-		spin_lock_irqsave(&engine->timeline.lock, flags);
-		intel_engine_init_global_seqno(engine,
-					       intel_engine_last_submit(engine));
-		spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
-		i915_gem_reset_finish_engine(engine);
-	}
-
-	GEM_TRACE("end\n");
-
-	wake_up_all(&i915->gpu_error.reset_queue);
-}
-
-bool i915_gem_unset_wedged(struct drm_i915_private *i915)
-{
-	struct i915_timeline *tl;
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
-		return true;
-
-	GEM_TRACE("start\n");
-
-	/*
-	 * Before unwedging, make sure that all pending operations
-	 * are flushed and errored out - we may have requests waiting upon
-	 * third party fences. We marked all inflight requests as EIO, and
-	 * every execbuf since returned EIO, for consistency we want all
-	 * the currently pending requests to also be marked as EIO, which
-	 * is done inside our nop_submit_request - and so we must wait.
-	 *
-	 * No more can be submitted until we reset the wedged bit.
-	 */
-	list_for_each_entry(tl, &i915->gt.timelines, link) {
-		struct i915_request *rq;
-
-		rq = i915_gem_active_peek(&tl->last_request,
-					  &i915->drm.struct_mutex);
-		if (!rq)
-			continue;
-
-		/*
-		 * We can't use our normal waiter as we want to
-		 * avoid recursively trying to handle the current
-		 * reset. The basic dma_fence_default_wait() installs
-		 * a callback for dma_fence_signal(), which is
-		 * triggered by our nop handler (indirectly, the
-		 * callback enables the signaler thread which is
-		 * woken by the nop_submit_request() advancing the seqno
-		 * and when the seqno passes the fence, the signaler
-		 * then signals the fence waking us up).
-		 */
-		if (dma_fence_default_wait(&rq->fence, true,
-					   MAX_SCHEDULE_TIMEOUT) < 0)
-			return false;
-	}
-	i915_retire_requests(i915);
-	GEM_BUG_ON(i915->gt.active_requests);
-
-	/*
-	 * Undo nop_submit_request. We prevent all new i915 requests from
-	 * being queued (by disallowing execbuf whilst wedged) so having
-	 * waited for all active requests above, we know the system is idle
-	 * and do not have to worry about a thread being inside
-	 * engine->submit_request() as we swap over. So unlike installing
-	 * the nop_submit_request on reset, we can do this from normal
-	 * context and do not require stop_machine().
-	 */
-	intel_engines_reset_default_submission(i915);
-	i915_gem_contexts_lost(i915);
-
-	GEM_TRACE("end\n");
-
-	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
-	clear_bit(I915_WEDGED, &i915->gpu_error.flags);
-
-	return true;
-}
-
 static void
 i915_gem_retire_work_handler(struct work_struct *work)
 {
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 435a4444314c..7b2f3c864b49 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2935,46 +2935,6 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
-struct wedge_me {
-	struct delayed_work work;
-	struct drm_i915_private *i915;
-	const char *name;
-};
-
-static void wedge_me(struct work_struct *work)
-{
-	struct wedge_me *w = container_of(work, typeof(*w), work.work);
-
-	dev_err(w->i915->drm.dev,
-		"%s timed out, cancelling all in-flight rendering.\n",
-		w->name);
-	i915_gem_set_wedged(w->i915);
-}
-
-static void __init_wedge(struct wedge_me *w,
-			 struct drm_i915_private *i915,
-			 long timeout,
-			 const char *name)
-{
-	w->i915 = i915;
-	w->name = name;
-
-	INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me);
-	schedule_delayed_work(&w->work, timeout);
-}
-
-static void __fini_wedge(struct wedge_me *w)
-{
-	cancel_delayed_work_sync(&w->work);
-	destroy_delayed_work_on_stack(&w->work);
-	w->i915 = NULL;
-}
-
-#define i915_wedge_on_timeout(W, DEV, TIMEOUT)				\
-	for (__init_wedge((W), (DEV), (TIMEOUT), __func__);		\
-	     (W)->i915;							\
-	     __fini_wedge((W)))
-
 static u32
 gen11_gt_engine_identity(struct drm_i915_private * const i915,
 			 const unsigned int bank, const unsigned int bit)
@@ -3179,186 +3139,6 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
-static void i915_reset_device(struct drm_i915_private *dev_priv,
-			      u32 engine_mask,
-			      const char *reason)
-{
-	struct i915_gpu_error *error = &dev_priv->gpu_error;
-	struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj;
-	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
-	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
-	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
-	struct wedge_me w;
-
-	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
-
-	DRM_DEBUG_DRIVER("resetting chip\n");
-	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
-
-	/* Use a watchdog to ensure that our reset completes */
-	i915_wedge_on_timeout(&w, dev_priv, 5*HZ) {
-		intel_prepare_reset(dev_priv);
-
-		error->reason = reason;
-		error->stalled_mask = engine_mask;
-
-		/* Signal that locked waiters should reset the GPU */
-		smp_mb__before_atomic();
-		set_bit(I915_RESET_HANDOFF, &error->flags);
-		wake_up_all(&error->wait_queue);
-
-		/* Wait for anyone holding the lock to wakeup, without
-		 * blocking indefinitely on struct_mutex.
-		 */
-		do {
-			if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
-				i915_reset(dev_priv, engine_mask, reason);
-				mutex_unlock(&dev_priv->drm.struct_mutex);
-			}
-		} while (wait_on_bit_timeout(&error->flags,
-					     I915_RESET_HANDOFF,
-					     TASK_UNINTERRUPTIBLE,
-					     1));
-
-		error->stalled_mask = 0;
-		error->reason = NULL;
-
-		intel_finish_reset(dev_priv);
-	}
-
-	if (!test_bit(I915_WEDGED, &error->flags))
-		kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event);
-}
-
-static void i915_clear_error_registers(struct drm_i915_private *dev_priv)
-{
-	u32 eir;
-
-	if (!IS_GEN2(dev_priv))
-		I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER));
-
-	if (INTEL_GEN(dev_priv) < 4)
-		I915_WRITE(IPEIR, I915_READ(IPEIR));
-	else
-		I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965));
-
-	I915_WRITE(EIR, I915_READ(EIR));
-	eir = I915_READ(EIR);
-	if (eir) {
-		/*
-		 * some errors might have become stuck,
-		 * mask them.
-		 */
-		DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
-		I915_WRITE(EMR, I915_READ(EMR) | eir);
-		I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
-	}
-}
-
-/**
- * i915_handle_error - handle a gpu error
- * @dev_priv: i915 device private
- * @engine_mask: mask representing engines that are hung
- * @flags: control flags
- * @fmt: Error message format string
- *
- * Do some basic checking of register state at error time and
- * dump it to the syslog.  Also call i915_capture_error_state() to make
- * sure we get a record and make it available in debugfs.  Fire a uevent
- * so userspace knows something bad happened (should trigger collection
- * of a ring dump etc.).
- */
-void i915_handle_error(struct drm_i915_private *dev_priv,
-		       u32 engine_mask,
-		       unsigned long flags,
-		       const char *fmt, ...)
-{
-	struct intel_engine_cs *engine;
-	unsigned int tmp;
-	char error_msg[80];
-	char *msg = NULL;
-
-	if (fmt) {
-		va_list args;
-
-		va_start(args, fmt);
-		vscnprintf(error_msg, sizeof(error_msg), fmt, args);
-		va_end(args);
-
-		msg = error_msg;
-	}
-
-	/*
-	 * In most cases it's guaranteed that we get here with an RPM
-	 * reference held, for example because there is a pending GPU
-	 * request that won't finish until the reset is done. This
-	 * isn't the case at least when we get here by doing a
-	 * simulated reset via debugfs, so get an RPM reference.
-	 */
-	intel_runtime_pm_get(dev_priv);
-
-	engine_mask &= INTEL_INFO(dev_priv)->ring_mask;
-
-	if (flags & I915_ERROR_CAPTURE) {
-		i915_capture_error_state(dev_priv, engine_mask, msg);
-		i915_clear_error_registers(dev_priv);
-	}
-
-	/*
-	 * Try engine reset when available. We fall back to full reset if
-	 * single reset fails.
-	 */
-	if (intel_has_reset_engine(dev_priv)) {
-		for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
-			BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
-			if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
-					     &dev_priv->gpu_error.flags))
-				continue;
-
-			if (i915_reset_engine(engine, msg) == 0)
-				engine_mask &= ~intel_engine_flag(engine);
-
-			clear_bit(I915_RESET_ENGINE + engine->id,
-				  &dev_priv->gpu_error.flags);
-			wake_up_bit(&dev_priv->gpu_error.flags,
-				    I915_RESET_ENGINE + engine->id);
-		}
-	}
-
-	if (!engine_mask)
-		goto out;
-
-	/* Full reset needs the mutex, stop any other user trying to do so. */
-	if (test_and_set_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags)) {
-		wait_event(dev_priv->gpu_error.reset_queue,
-			   !test_bit(I915_RESET_BACKOFF,
-				     &dev_priv->gpu_error.flags));
-		goto out;
-	}
-
-	/* Prevent any other reset-engine attempt. */
-	for_each_engine(engine, dev_priv, tmp) {
-		while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
-					&dev_priv->gpu_error.flags))
-			wait_on_bit(&dev_priv->gpu_error.flags,
-				    I915_RESET_ENGINE + engine->id,
-				    TASK_UNINTERRUPTIBLE);
-	}
-
-	i915_reset_device(dev_priv, engine_mask, msg);
-
-	for_each_engine(engine, dev_priv, tmp) {
-		clear_bit(I915_RESET_ENGINE + engine->id,
-			  &dev_priv->gpu_error.flags);
-	}
-
-	clear_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags);
-	wake_up_all(&dev_priv->gpu_error.reset_queue);
-
-out:
-	intel_runtime_pm_put(dev_priv);
-}
-
 /* Called from drm generic code, passed 'crtc' which
  * we use as a pipe index
  */
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index beb2a4429a40..041d8decd431 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -29,6 +29,7 @@
 #include <linux/sched/signal.h>
 
 #include "i915_drv.h"
+#include "i915_reset.h"
 
 static const char *i915_fence_get_driver_name(struct dma_fence *fence)
 {
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
new file mode 100644
index 000000000000..d8b7840c9e20
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -0,0 +1,1272 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2008-2018 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_gpu_error.h"
+#include "i915_reset.h"
+
+#include "intel_guc.h"
+
+static void engine_skip_context(struct i915_request *rq)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	struct i915_gem_context *hung_ctx = rq->gem_context;
+	struct i915_timeline *timeline = rq->timeline;
+	unsigned long flags;
+
+	GEM_BUG_ON(timeline == &engine->timeline);
+
+	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock(&timeline->lock);
+
+	list_for_each_entry_continue(rq, &engine->timeline.requests, link)
+		if (rq->gem_context == hung_ctx)
+			i915_request_skip(rq, -EIO);
+
+	list_for_each_entry(rq, &timeline->requests, link)
+		i915_request_skip(rq, -EIO);
+
+	spin_unlock(&timeline->lock);
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+}
+
+static void client_mark_guilty(struct drm_i915_file_private *file_priv,
+			       const struct i915_gem_context *ctx)
+{
+	unsigned int score;
+	unsigned long prev_hang;
+
+	if (i915_gem_context_is_banned(ctx))
+		score = I915_CLIENT_SCORE_CONTEXT_BAN;
+	else
+		score = 0;
+
+	prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
+	if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
+		score += I915_CLIENT_SCORE_HANG_FAST;
+
+	if (score) {
+		atomic_add(score, &file_priv->ban_score);
+
+		DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
+				 ctx->name, score,
+				 atomic_read(&file_priv->ban_score));
+	}
+}
+
+static void context_mark_guilty(struct i915_gem_context *ctx)
+{
+	unsigned int score;
+	bool banned, bannable;
+
+	atomic_inc(&ctx->guilty_count);
+
+	bannable = i915_gem_context_is_bannable(ctx);
+	score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
+	banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
+
+	/* Cool contexts don't accumulate client ban score */
+	if (!bannable)
+		return;
+
+	if (banned) {
+		DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n",
+				 ctx->name, atomic_read(&ctx->guilty_count),
+				 score);
+		i915_gem_context_set_banned(ctx);
+	}
+
+	if (!IS_ERR_OR_NULL(ctx->file_priv))
+		client_mark_guilty(ctx->file_priv, ctx);
+}
+
+static void context_mark_innocent(struct i915_gem_context *ctx)
+{
+	atomic_inc(&ctx->active_count);
+}
+
+static void gen3_stop_engine(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	const u32 base = engine->mmio_base;
+
+	if (intel_engine_stop_cs(engine))
+		DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n", engine->name);
+
+	I915_WRITE_FW(RING_HEAD(base), I915_READ_FW(RING_TAIL(base)));
+	POSTING_READ_FW(RING_HEAD(base)); /* paranoia */
+
+	I915_WRITE_FW(RING_HEAD(base), 0);
+	I915_WRITE_FW(RING_TAIL(base), 0);
+	POSTING_READ_FW(RING_TAIL(base));
+
+	/* The ring must be empty before it is disabled */
+	I915_WRITE_FW(RING_CTL(base), 0);
+
+	/* Check acts as a post */
+	if (I915_READ_FW(RING_HEAD(base)) != 0)
+		DRM_DEBUG_DRIVER("%s: ring head not parked\n",
+				 engine->name);
+}
+
+static void i915_stop_engines(struct drm_i915_private *i915,
+			      unsigned int engine_mask)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	if (INTEL_GEN(i915) < 3)
+		return;
+
+	for_each_engine_masked(engine, i915, engine_mask, id)
+		gen3_stop_engine(engine);
+}
+
+static bool i915_in_reset(struct pci_dev *pdev)
+{
+	u8 gdrst;
+
+	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
+	return gdrst & GRDOM_RESET_STATUS;
+}
+
+static int i915_do_reset(struct drm_i915_private *i915,
+			 unsigned int engine_mask)
+{
+	struct pci_dev *pdev = i915->drm.pdev;
+	int err;
+
+	/* Assert reset for at least 20 usec, and wait for acknowledgement. */
+	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
+	usleep_range(50, 200);
+	err = wait_for(i915_in_reset(pdev), 500);
+
+	/* Clear the reset request. */
+	pci_write_config_byte(pdev, I915_GDRST, 0);
+	usleep_range(50, 200);
+	if (!err)
+		err = wait_for(!i915_in_reset(pdev), 500);
+
+	return err;
+}
+
+static bool g4x_reset_complete(struct pci_dev *pdev)
+{
+	u8 gdrst;
+
+	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
+	return (gdrst & GRDOM_RESET_ENABLE) == 0;
+}
+
+static int g33_do_reset(struct drm_i915_private *i915, unsigned int engine_mask)
+{
+	struct pci_dev *pdev = i915->drm.pdev;
+
+	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
+	return wait_for(g4x_reset_complete(pdev), 500);
+}
+
+static int g4x_do_reset(struct drm_i915_private *dev_priv,
+			unsigned int engine_mask)
+{
+	struct pci_dev *pdev = dev_priv->drm.pdev;
+	int ret;
+
+	/* WaVcpClkGateDisableForMediaReset:ctg,elk */
+	I915_WRITE(VDECCLK_GATE_D,
+		   I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
+	POSTING_READ(VDECCLK_GATE_D);
+
+	pci_write_config_byte(pdev, I915_GDRST,
+			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
+	ret =  wait_for(g4x_reset_complete(pdev), 500);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
+		goto out;
+	}
+
+	pci_write_config_byte(pdev, I915_GDRST,
+			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
+	ret =  wait_for(g4x_reset_complete(pdev), 500);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
+		goto out;
+	}
+
+out:
+	pci_write_config_byte(pdev, I915_GDRST, 0);
+
+	I915_WRITE(VDECCLK_GATE_D,
+		   I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
+	POSTING_READ(VDECCLK_GATE_D);
+
+	return ret;
+}
+
+static int ironlake_do_reset(struct drm_i915_private *dev_priv,
+			     unsigned int engine_mask)
+{
+	int ret;
+
+	I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
+	ret = intel_wait_for_register(dev_priv,
+				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
+				      500);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
+		goto out;
+	}
+
+	I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
+	ret = intel_wait_for_register(dev_priv,
+				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
+				      500);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
+		goto out;
+	}
+
+out:
+	I915_WRITE(ILK_GDSR, 0);
+	POSTING_READ(ILK_GDSR);
+	return ret;
+}
+
+/* Reset the hardware domains (GENX_GRDOM_*) specified by mask */
+static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv,
+				u32 hw_domain_mask)
+{
+	int err;
+
+	/*
+	 * GEN6_GDRST is not in the gt power well, no need to check
+	 * for fifo space for the write or forcewake the chip for
+	 * the read
+	 */
+	I915_WRITE_FW(GEN6_GDRST, hw_domain_mask);
+
+	/* Wait for the device to ack the reset requests */
+	err = __intel_wait_for_register_fw(dev_priv,
+					   GEN6_GDRST, hw_domain_mask, 0,
+					   500, 0,
+					   NULL);
+	if (err)
+		DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n",
+				 hw_domain_mask);
+
+	return err;
+}
+
+static int gen6_reset_engines(struct drm_i915_private *i915,
+			      unsigned int engine_mask)
+{
+	struct intel_engine_cs *engine;
+	const u32 hw_engine_mask[I915_NUM_ENGINES] = {
+		[RCS] = GEN6_GRDOM_RENDER,
+		[BCS] = GEN6_GRDOM_BLT,
+		[VCS] = GEN6_GRDOM_MEDIA,
+		[VCS2] = GEN8_GRDOM_MEDIA2,
+		[VECS] = GEN6_GRDOM_VECS,
+	};
+	u32 hw_mask;
+
+	if (engine_mask == ALL_ENGINES) {
+		hw_mask = GEN6_GRDOM_FULL;
+	} else {
+		unsigned int tmp;
+
+		hw_mask = 0;
+		for_each_engine_masked(engine, i915, engine_mask, tmp)
+			hw_mask |= hw_engine_mask[engine->id];
+	}
+
+	return gen6_hw_domain_reset(i915, hw_mask);
+}
+
+static int gen11_reset_engines(struct drm_i915_private *i915,
+			       unsigned int engine_mask)
+{
+	struct intel_engine_cs *engine;
+	const u32 hw_engine_mask[I915_NUM_ENGINES] = {
+		[RCS] = GEN11_GRDOM_RENDER,
+		[BCS] = GEN11_GRDOM_BLT,
+		[VCS] = GEN11_GRDOM_MEDIA,
+		[VCS2] = GEN11_GRDOM_MEDIA2,
+		[VCS3] = GEN11_GRDOM_MEDIA3,
+		[VCS4] = GEN11_GRDOM_MEDIA4,
+		[VECS] = GEN11_GRDOM_VECS,
+		[VECS2] = GEN11_GRDOM_VECS2,
+	};
+	u32 hw_mask;
+
+	BUILD_BUG_ON(VECS2 + 1 != I915_NUM_ENGINES);
+
+	if (engine_mask == ALL_ENGINES) {
+		hw_mask = GEN11_GRDOM_FULL;
+	} else {
+		unsigned int tmp;
+
+		hw_mask = 0;
+		for_each_engine_masked(engine, i915, engine_mask, tmp)
+			hw_mask |= hw_engine_mask[engine->id];
+	}
+
+	return gen6_hw_domain_reset(i915, hw_mask);
+}
+
+
+static int gen8_reset_engine_start(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	int ret;
+
+	I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base),
+		      _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));
+
+	ret = __intel_wait_for_register_fw(dev_priv,
+					   RING_RESET_CTL(engine->mmio_base),
+					   RESET_CTL_READY_TO_RESET,
+					   RESET_CTL_READY_TO_RESET,
+					   700, 0,
+					   NULL);
+	if (ret)
+		DRM_ERROR("%s: reset request timeout\n", engine->name);
+
+	return ret;
+}
+
+static void gen8_reset_engine_cancel(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+
+	I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base),
+		      _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET));
+}
+
+static int gen8_reset_engines(struct drm_i915_private *i915,
+			      unsigned int engine_mask)
+{
+	struct intel_engine_cs *engine;
+	unsigned int tmp;
+	int ret;
+
+	for_each_engine_masked(engine, i915, engine_mask, tmp) {
+		if (gen8_reset_engine_start(engine)) {
+			ret = -EIO;
+			goto not_ready;
+		}
+	}
+
+	if (INTEL_GEN(i915) >= 11)
+		ret = gen11_reset_engines(i915, engine_mask);
+	else
+		ret = gen6_reset_engines(i915, engine_mask);
+
+not_ready:
+	for_each_engine_masked(engine, i915, engine_mask, tmp)
+		gen8_reset_engine_cancel(engine);
+
+	return ret;
+}
+
+typedef int (*reset_func)(struct drm_i915_private *, unsigned int engine_mask);
+
+static reset_func intel_get_gpu_reset(struct drm_i915_private *i915)
+{
+	if (!i915_modparams.reset)
+		return NULL;
+
+	if (INTEL_GEN(i915) >= 8)
+		return gen8_reset_engines;
+	else if (INTEL_GEN(i915) >= 6)
+		return gen6_reset_engines;
+	else if (IS_GEN5(i915))
+		return ironlake_do_reset;
+	else if (IS_G4X(i915))
+		return g4x_do_reset;
+	else if (IS_G33(i915) || IS_PINEVIEW(i915))
+		return g33_do_reset;
+	else if (INTEL_GEN(i915) >= 3)
+		return i915_do_reset;
+	else
+		return NULL;
+}
+
+int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)
+{
+	reset_func reset = intel_get_gpu_reset(i915);
+	int retry;
+	int ret;
+
+	/*
+	 * We want to perform per-engine reset from atomic context (e.g.
+	 * softirq), which imposes the constraint that we cannot sleep.
+	 * However, experience suggests that spending a bit of time waiting
+	 * for a reset helps in various cases, so for a full-device reset
+	 * we apply the opposite rule and wait if we want to. As we should
+	 * always follow up a failed per-engine reset with a full device reset,
+	 * being a little faster, stricter and more error prone for the
+	 * atomic case seems an acceptable compromise.
+	 *
+	 * Unfortunately this leads to a bimodal routine, when the goal was
+	 * to have a single reset function that worked for resetting any
+	 * number of engines simultaneously.
+	 */
+	might_sleep_if(engine_mask == ALL_ENGINES);
+
+	/*
+	 * If the power well sleeps during the reset, the reset
+	 * request may be dropped and never completes (causing -EIO).
+	 */
+	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
+	for (retry = 0; retry < 3; retry++) {
+		/*
+		 * We stop engines, otherwise we might get failed reset and a
+		 * dead gpu (on elk). Also as modern gpu as kbl can suffer
+		 * from system hang if batchbuffer is progressing when
+		 * the reset is issued, regardless of READY_TO_RESET ack.
+		 * Thus assume it is best to stop engines on all gens
+		 * where we have a gpu reset.
+		 *
+		 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
+		 *
+		 * WaMediaResetMainRingCleanup:ctg,elk (presumably)
+		 *
+		 * FIXME: Wa for more modern gens needs to be validated
+		 */
+		i915_stop_engines(i915, engine_mask);
+
+		ret = -ENODEV;
+		if (reset) {
+			GEM_TRACE("engine_mask=%x\n", engine_mask);
+			ret = reset(i915, engine_mask);
+		}
+		if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES)
+			break;
+
+		cond_resched();
+	}
+	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
+
+	return ret;
+}
+
+bool intel_has_gpu_reset(struct drm_i915_private *i915)
+{
+	return intel_get_gpu_reset(i915);
+}
+
+bool intel_has_reset_engine(struct drm_i915_private *i915)
+{
+	return i915->info.has_reset_engine && i915_modparams.reset >= 2;
+}
+
+int intel_reset_guc(struct drm_i915_private *i915)
+{
+	u32 guc_domain =
+		INTEL_GEN(i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC;
+	int ret;
+
+	GEM_BUG_ON(!HAS_GUC(i915));
+
+	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
+	ret = gen6_hw_domain_reset(i915, guc_domain);
+	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
+
+	return ret;
+}
+
+/*
+ * Ensure irq handler finishes, and not run again.
+ * Also return the active request so that we only search for it once.
+ */
+static struct i915_request *
+reset_prepare_engine(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+
+	/*
+	 * During the reset sequence, we must prevent the engine from
+	 * entering RC6. As the context state is undefined until we restart
+	 * the engine, if it does enter RC6 during the reset, the state
+	 * written to the powercontext is undefined and so we may lose
+	 * GPU state upon resume, i.e. fail to restart after a reset.
+	 */
+	intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
+
+	rq = engine->reset.prepare(engine);
+	if (rq && rq->fence.error == -EIO)
+		rq = ERR_PTR(-EIO); /* Previous reset failed! */
+
+	return rq;
+}
+
+static int reset_prepare(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	struct i915_request *rq;
+	enum intel_engine_id id;
+	int err = 0;
+
+	disable_irq(i915->drm.irq);
+
+	for_each_engine(engine, i915, id) {
+		rq = reset_prepare_engine(engine);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			continue;
+		}
+
+		engine->hangcheck.active_request = rq;
+	}
+
+	i915_gem_revoke_fences(i915);
+	intel_uc_sanitize(i915);
+
+	return err;
+}
+
+/* Returns the request if it was guilty of the hang */
+static struct i915_request *
+reset_request(struct intel_engine_cs *engine,
+	      struct i915_request *rq,
+	      bool stalled)
+{
+	/*
+	 * The guilty request will get skipped on a hung engine.
+	 *
+	 * Users of client default contexts do not rely on logical
+	 * state preserved between batches so it is safe to execute
+	 * queued requests following the hang. Non default contexts
+	 * rely on preserved state, so skipping a batch loses the
+	 * evolution of the state and it needs to be considered corrupted.
+	 * Executing more queued batches on top of corrupted state is
+	 * risky. But we take the risk by trying to advance through
+	 * the queued requests in order to make the client behaviour
+	 * more predictable around resets, by not throwing away random
+	 * amount of batches it has prepared for execution. Sophisticated
+	 * clients can use gem_reset_stats_ioctl and dma fence status
+	 * (exported via sync_file info ioctl on explicit fences) to observe
+	 * when it loses the context state and should rebuild accordingly.
+	 *
+	 * The context ban, and ultimately the client ban, mechanism are safety
+	 * valves if client submission ends up resulting in nothing more than
+	 * subsequent hangs.
+	 */
+
+	if (i915_request_completed(rq)) {
+		GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n",
+			  engine->name, rq->global_seqno,
+			  rq->fence.context, rq->fence.seqno,
+			  intel_engine_get_seqno(engine));
+		stalled = false;
+	}
+
+	if (stalled) {
+		context_mark_guilty(rq->gem_context);
+		i915_request_skip(rq, -EIO);
+
+		/* If this context is now banned, skip all pending requests. */
+		if (i915_gem_context_is_banned(rq->gem_context))
+			engine_skip_context(rq);
+	} else {
+		/*
+		 * Since this is not the hung engine, it may have advanced
+		 * since the hang declaration. Double check by refinding
+		 * the active request at the time of the reset.
+		 */
+		rq = i915_gem_find_active_request(engine);
+		if (rq) {
+			unsigned long flags;
+
+			context_mark_innocent(rq->gem_context);
+			dma_fence_set_error(&rq->fence, -EAGAIN);
+
+			/* Rewind the engine to replay the incomplete rq */
+			spin_lock_irqsave(&engine->timeline.lock, flags);
+			rq = list_prev_entry(rq, link);
+			if (&rq->link == &engine->timeline.requests)
+				rq = NULL;
+			spin_unlock_irqrestore(&engine->timeline.lock, flags);
+		}
+	}
+
+	return rq;
+}
+
+static void reset_engine(struct intel_engine_cs *engine,
+			 struct i915_request *rq,
+			 bool stalled)
+{
+	/*
+	 * Make sure this write is visible before we re-enable the interrupt
+	 * handlers on another CPU, as tasklet_enable() resolves to just
+	 * a compiler barrier which is insufficient for our purpose here.
+	 */
+	smp_store_mb(engine->irq_posted, 0);
+
+	if (rq)
+		rq = reset_request(engine, rq, stalled);
+
+	/* Setup the CS to resume from the breadcrumb of the hung request */
+	engine->reset.reset(engine, rq);
+}
+
+static void gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	i915_retire_requests(i915);
+
+	for_each_engine(engine, i915, id) {
+		struct intel_context *ce;
+
+		reset_engine(engine,
+			     engine->hangcheck.active_request,
+			     stalled_mask & ENGINE_MASK(id));
+		ce = fetch_and_zero(&engine->last_retired_context);
+		if (ce)
+			intel_context_unpin(ce);
+
+		/*
+		 * Ostensibily, we always want a context loaded for powersaving,
+		 * so if the engine is idle after the reset, send a request
+		 * to load our scratch kernel_context.
+		 *
+		 * More mysteriously, if we leave the engine idle after a reset,
+		 * the next userspace batch may hang, with what appears to be
+		 * an incoherent read by the CS (presumably stale TLB). An
+		 * empty request appears sufficient to paper over the glitch.
+		 */
+		if (intel_engine_is_idle(engine)) {
+			struct i915_request *rq;
+
+			rq = i915_request_alloc(engine, i915->kernel_context);
+			if (!IS_ERR(rq))
+				i915_request_add(rq);
+		}
+	}
+
+	i915_gem_restore_fences(i915);
+}
+
+static void reset_finish_engine(struct intel_engine_cs *engine)
+{
+	engine->reset.finish(engine);
+
+	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
+}
+
+static void reset_finish(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	for_each_engine(engine, i915, id) {
+		engine->hangcheck.active_request = NULL;
+		reset_finish_engine(engine);
+	}
+
+	enable_irq(i915->drm.irq);
+}
+
+static void nop_submit_request(struct i915_request *rq)
+{
+	GEM_TRACE("%s fence %llx:%d -> -EIO\n",
+		  rq->engine->name, rq->fence.context, rq->fence.seqno);
+	dma_fence_set_error(&rq->fence, -EIO);
+
+	i915_request_submit(rq);
+}
+
+static void nop_complete_submit_request(struct i915_request *rq)
+{
+	unsigned long flags;
+
+	GEM_TRACE("%s fence %llx:%d -> -EIO\n",
+		  rq->engine->name,
+		  rq->fence.context, rq->fence.seqno);
+	dma_fence_set_error(&rq->fence, -EIO);
+
+	spin_lock_irqsave(&rq->engine->timeline.lock, flags);
+	__i915_request_submit(rq);
+	intel_engine_init_global_seqno(rq->engine, rq->global_seqno);
+	spin_unlock_irqrestore(&rq->engine->timeline.lock, flags);
+}
+
+void i915_gem_set_wedged(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	GEM_TRACE("start\n");
+
+	if (GEM_SHOW_DEBUG()) {
+		struct drm_printer p = drm_debug_printer(__func__);
+
+		for_each_engine(engine, i915, id)
+			intel_engine_dump(engine, &p, "%s\n", engine->name);
+	}
+
+	set_bit(I915_WEDGED, &i915->gpu_error.flags);
+	smp_mb__after_atomic();
+
+	/*
+	 * First, stop submission to hw, but do not yet complete requests by
+	 * rolling the global seqno forward (since this would complete requests
+	 * for which we haven't set the fence error to EIO yet).
+	 */
+	for_each_engine(engine, i915, id) {
+		reset_prepare_engine(engine);
+
+		engine->submit_request = nop_submit_request;
+		engine->schedule = NULL;
+	}
+	i915->caps.scheduler = 0;
+
+	/* Even if the GPU reset fails, it should still stop the engines */
+	intel_gpu_reset(i915, ALL_ENGINES);
+
+	/*
+	 * Make sure no one is running the old callback before we proceed with
+	 * cancelling requests and resetting the completion tracking. Otherwise
+	 * we might submit a request to the hardware which never completes.
+	 */
+	synchronize_rcu();
+
+	for_each_engine(engine, i915, id) {
+		/* Mark all executing requests as skipped */
+		engine->cancel_requests(engine);
+
+		/*
+		 * Only once we've force-cancelled all in-flight requests can we
+		 * start to complete all requests.
+		 */
+		engine->submit_request = nop_complete_submit_request;
+	}
+
+	/*
+	 * Make sure no request can slip through without getting completed by
+	 * either this call here to intel_engine_init_global_seqno, or the one
+	 * in nop_complete_submit_request.
+	 */
+	synchronize_rcu();
+
+	for_each_engine(engine, i915, id) {
+		unsigned long flags;
+
+		/*
+		 * Mark all pending requests as complete so that any concurrent
+		 * (lockless) lookup doesn't try and wait upon the request as we
+		 * reset it.
+		 */
+		spin_lock_irqsave(&engine->timeline.lock, flags);
+		intel_engine_init_global_seqno(engine,
+					       intel_engine_last_submit(engine));
+		spin_unlock_irqrestore(&engine->timeline.lock, flags);
+
+		reset_finish_engine(engine);
+	}
+
+	GEM_TRACE("end\n");
+
+	wake_up_all(&i915->gpu_error.reset_queue);
+}
+
+bool i915_gem_unset_wedged(struct drm_i915_private *i915)
+{
+	struct i915_timeline *tl;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
+		return true;
+
+	GEM_TRACE("start\n");
+
+	/*
+	 * Before unwedging, make sure that all pending operations
+	 * are flushed and errored out - we may have requests waiting upon
+	 * third party fences. We marked all inflight requests as EIO, and
+	 * every execbuf since returned EIO, for consistency we want all
+	 * the currently pending requests to also be marked as EIO, which
+	 * is done inside our nop_submit_request - and so we must wait.
+	 *
+	 * No more can be submitted until we reset the wedged bit.
+	 */
+	list_for_each_entry(tl, &i915->gt.timelines, link) {
+		struct i915_request *rq;
+
+		rq = i915_gem_active_peek(&tl->last_request,
+					  &i915->drm.struct_mutex);
+		if (!rq)
+			continue;
+
+		/*
+		 * We can't use our normal waiter as we want to
+		 * avoid recursively trying to handle the current
+		 * reset. The basic dma_fence_default_wait() installs
+		 * a callback for dma_fence_signal(), which is
+		 * triggered by our nop handler (indirectly, the
+		 * callback enables the signaler thread which is
+		 * woken by the nop_submit_request() advancing the seqno
+		 * and when the seqno passes the fence, the signaler
+		 * then signals the fence waking us up).
+		 */
+		if (dma_fence_default_wait(&rq->fence, true,
+					   MAX_SCHEDULE_TIMEOUT) < 0)
+			return false;
+	}
+	i915_retire_requests(i915);
+	GEM_BUG_ON(i915->gt.active_requests);
+
+	/*
+	 * Undo nop_submit_request. We prevent all new i915 requests from
+	 * being queued (by disallowing execbuf whilst wedged) so having
+	 * waited for all active requests above, we know the system is idle
+	 * and do not have to worry about a thread being inside
+	 * engine->submit_request() as we swap over. So unlike installing
+	 * the nop_submit_request on reset, we can do this from normal
+	 * context and do not require stop_machine().
+	 */
+	intel_engines_reset_default_submission(i915);
+	i915_gem_contexts_lost(i915);
+
+	GEM_TRACE("end\n");
+
+	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
+	clear_bit(I915_WEDGED, &i915->gpu_error.flags);
+
+	return true;
+}
+
+/**
+ * i915_reset - reset chip after a hang
+ * @i915: #drm_i915_private to reset
+ * @stalled_mask: mask of the stalled engines with the guilty requests
+ * @reason: user error message for why we are resetting
+ *
+ * Reset the chip.  Useful if a hang is detected. Marks the device as wedged
+ * on failure.
+ *
+ * Caller must hold the struct_mutex.
+ *
+ * Procedure is fairly simple:
+ *   - reset the chip using the reset reg
+ *   - re-init context state
+ *   - re-init hardware status page
+ *   - re-init ring buffer
+ *   - re-init interrupt state
+ *   - re-init display
+ */
+void i915_reset(struct drm_i915_private *i915,
+		unsigned int stalled_mask,
+		const char *reason)
+{
+	struct i915_gpu_error *error = &i915->gpu_error;
+	int ret;
+	int i;
+
+	GEM_TRACE("flags=%lx\n", error->flags);
+
+	might_sleep();
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
+
+	if (!test_bit(I915_RESET_HANDOFF, &error->flags))
+		return;
+
+	/* Clear any previous failed attempts at recovery. Time to try again. */
+	if (!i915_gem_unset_wedged(i915))
+		goto wakeup;
+
+	if (reason)
+		dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
+	error->reset_count++;
+
+	ret = reset_prepare(i915);
+	if (ret) {
+		dev_err(i915->drm.dev, "GPU recovery failed\n");
+		goto taint;
+	}
+
+	if (!intel_has_gpu_reset(i915)) {
+		if (i915_modparams.reset)
+			dev_err(i915->drm.dev, "GPU reset not supported\n");
+		else
+			DRM_DEBUG_DRIVER("GPU reset disabled\n");
+		goto error;
+	}
+
+	for (i = 0; i < 3; i++) {
+		ret = intel_gpu_reset(i915, ALL_ENGINES);
+		if (ret == 0)
+			break;
+
+		msleep(100);
+	}
+	if (ret) {
+		dev_err(i915->drm.dev, "Failed to reset chip\n");
+		goto taint;
+	}
+
+	/* Ok, now get things going again... */
+
+	/*
+	 * Everything depends on having the GTT running, so we need to start
+	 * there.
+	 */
+	ret = i915_ggtt_enable_hw(i915);
+	if (ret) {
+		DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n",
+			  ret);
+		goto error;
+	}
+
+	gt_reset(i915, stalled_mask);
+	intel_overlay_reset(i915);
+
+	/*
+	 * Next we need to restore the context, but we don't use those
+	 * yet either...
+	 *
+	 * Ring buffer needs to be re-initialized in the KMS case, or if X
+	 * was running at the time of the reset (i.e. we weren't VT
+	 * switched away).
+	 */
+	ret = i915_gem_init_hw(i915);
+	if (ret) {
+		DRM_ERROR("Failed to initialise HW following reset (%d)\n",
+			  ret);
+		goto error;
+	}
+
+	i915_queue_hangcheck(i915);
+
+finish:
+	reset_finish(i915);
+wakeup:
+	clear_bit(I915_RESET_HANDOFF, &error->flags);
+	wake_up_bit(&error->flags, I915_RESET_HANDOFF);
+	return;
+
+taint:
+	/*
+	 * History tells us that if we cannot reset the GPU now, we
+	 * never will. This then impacts everything that is run
+	 * subsequently. On failing the reset, we mark the driver
+	 * as wedged, preventing further execution on the GPU.
+	 * We also want to go one step further and add a taint to the
+	 * kernel so that any subsequent faults can be traced back to
+	 * this failure. This is important for CI, where if the
+	 * GPU/driver fails we would like to reboot and restart testing
+	 * rather than continue on into oblivion. For everyone else,
+	 * the system should still plod along, but they have been warned!
+	 */
+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
+error:
+	i915_gem_set_wedged(i915);
+	i915_retire_requests(i915);
+	goto finish;
+}
+
+static inline int intel_gt_reset_engine(struct drm_i915_private *i915,
+					struct intel_engine_cs *engine)
+{
+	return intel_gpu_reset(i915, intel_engine_flag(engine));
+}
+
+/**
+ * i915_reset_engine - reset GPU engine to recover from a hang
+ * @engine: engine to reset
+ * @msg: reason for GPU reset; or NULL for no dev_notice()
+ *
+ * Reset a specific GPU engine. Useful if a hang is detected.
+ * Returns zero on successful reset or otherwise an error code.
+ *
+ * Procedure is:
+ *  - identifies the request that caused the hang and it is dropped
+ *  - reset engine (which will force the engine to idle)
+ *  - re-init/configure engine
+ */
+int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
+{
+	struct i915_gpu_error *error = &engine->i915->gpu_error;
+	struct i915_request *active_request;
+	int ret;
+
+	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
+	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
+
+	active_request = reset_prepare_engine(engine);
+	if (IS_ERR_OR_NULL(active_request)) {
+		/* Either the previous reset failed, or we pardon the reset. */
+		ret = PTR_ERR(active_request);
+		goto out;
+	}
+
+	if (msg)
+		dev_notice(engine->i915->drm.dev,
+			   "Resetting %s for %s\n", engine->name, msg);
+	error->reset_engine_count[engine->id]++;
+
+	if (!engine->i915->guc.execbuf_client)
+		ret = intel_gt_reset_engine(engine->i915, engine);
+	else
+		ret = intel_guc_reset_engine(&engine->i915->guc, engine);
+	if (ret) {
+		/* If we fail here, we expect to fallback to a global reset */
+		DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
+				 engine->i915->guc.execbuf_client ? "GuC " : "",
+				 engine->name, ret);
+		goto out;
+	}
+
+	/*
+	 * The request that caused the hang is stuck on elsp, we know the
+	 * active request and can drop it, adjust head to skip the offending
+	 * request to resume executing remaining requests in the queue.
+	 */
+	reset_engine(engine, active_request, true);
+
+	/*
+	 * The engine and its registers (and workarounds in case of render)
+	 * have been reset to their default values. Follow the init_ring
+	 * process to program RING_MODE, HWSP and re-enable submission.
+	 */
+	ret = engine->init_hw(engine);
+	if (ret)
+		goto out;
+
+out:
+	reset_finish_engine(engine);
+	return ret;
+}
+
+struct wedge_me {
+	struct delayed_work work;
+	struct drm_i915_private *i915;
+	const char *name;
+};
+
+static void wedge_me(struct work_struct *work)
+{
+	struct wedge_me *w = container_of(work, typeof(*w), work.work);
+
+	dev_err(w->i915->drm.dev,
+		"%s timed out, cancelling all in-flight rendering.\n",
+		w->name);
+	i915_gem_set_wedged(w->i915);
+}
+
+static void __init_wedge(struct wedge_me *w,
+			 struct drm_i915_private *i915,
+			 long timeout,
+			 const char *name)
+{
+	w->i915 = i915;
+	w->name = name;
+
+	INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me);
+	schedule_delayed_work(&w->work, timeout);
+}
+
+static void __fini_wedge(struct wedge_me *w)
+{
+	cancel_delayed_work_sync(&w->work);
+	destroy_delayed_work_on_stack(&w->work);
+	w->i915 = NULL;
+}
+
+#define i915_wedge_on_timeout(W, DEV, TIMEOUT)				\
+	for (__init_wedge((W), (DEV), (TIMEOUT), __func__);		\
+	     (W)->i915;							\
+	     __fini_wedge((W)))
+
+static void i915_reset_device(struct drm_i915_private *i915,
+			      u32 engine_mask,
+			      const char *reason)
+{
+	struct i915_gpu_error *error = &i915->gpu_error;
+	struct kobject *kobj = &i915->drm.primary->kdev->kobj;
+	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
+	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
+	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
+	struct wedge_me w;
+
+	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
+
+	DRM_DEBUG_DRIVER("resetting chip\n");
+	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
+
+	/* Use a watchdog to ensure that our reset completes */
+	i915_wedge_on_timeout(&w, i915, 5 * HZ) {
+		intel_prepare_reset(i915);
+
+		error->reason = reason;
+		error->stalled_mask = engine_mask;
+
+		/* Signal that locked waiters should reset the GPU */
+		smp_mb__before_atomic();
+		set_bit(I915_RESET_HANDOFF, &error->flags);
+		wake_up_all(&error->wait_queue);
+
+		/*
+		 * Wait for anyone holding the lock to wakeup, without
+		 * blocking indefinitely on struct_mutex.
+		 */
+		do {
+			if (mutex_trylock(&i915->drm.struct_mutex)) {
+				i915_reset(i915, engine_mask, reason);
+				mutex_unlock(&i915->drm.struct_mutex);
+			}
+		} while (wait_on_bit_timeout(&error->flags,
+					     I915_RESET_HANDOFF,
+					     TASK_UNINTERRUPTIBLE,
+					     1));
+
+		error->stalled_mask = 0;
+		error->reason = NULL;
+
+		intel_finish_reset(i915);
+	}
+
+	if (!test_bit(I915_WEDGED, &error->flags))
+		kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event);
+}
+
+static void i915_clear_error_registers(struct drm_i915_private *dev_priv)
+{
+	u32 eir;
+
+	if (!IS_GEN2(dev_priv))
+		I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER));
+
+	if (INTEL_GEN(dev_priv) < 4)
+		I915_WRITE(IPEIR, I915_READ(IPEIR));
+	else
+		I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965));
+
+	I915_WRITE(EIR, I915_READ(EIR));
+	eir = I915_READ(EIR);
+	if (eir) {
+		/*
+		 * some errors might have become stuck,
+		 * mask them.
+		 */
+		DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
+		I915_WRITE(EMR, I915_READ(EMR) | eir);
+		I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
+	}
+}
+
+/**
+ * i915_handle_error - handle a gpu error
+ * @i915: i915 device private
+ * @engine_mask: mask representing engines that are hung
+ * @flags: control flags
+ * @fmt: Error message format string
+ *
+ * Do some basic checking of register state at error time and
+ * dump it to the syslog.  Also call i915_capture_error_state() to make
+ * sure we get a record and make it available in debugfs.  Fire a uevent
+ * so userspace knows something bad happened (should trigger collection
+ * of a ring dump etc.).
+ */
+void i915_handle_error(struct drm_i915_private *i915,
+		       u32 engine_mask,
+		       unsigned long flags,
+		       const char *fmt, ...)
+{
+	struct intel_engine_cs *engine;
+	unsigned int tmp;
+	char error_msg[80];
+	char *msg = NULL;
+
+	if (fmt) {
+		va_list args;
+
+		va_start(args, fmt);
+		vscnprintf(error_msg, sizeof(error_msg), fmt, args);
+		va_end(args);
+
+		msg = error_msg;
+	}
+
+	/*
+	 * In most cases it's guaranteed that we get here with an RPM
+	 * reference held, for example because there is a pending GPU
+	 * request that won't finish until the reset is done. This
+	 * isn't the case at least when we get here by doing a
+	 * simulated reset via debugfs, so get an RPM reference.
+	 */
+	intel_runtime_pm_get(i915);
+
+	engine_mask &= INTEL_INFO(i915)->ring_mask;
+
+	if (flags & I915_ERROR_CAPTURE) {
+		i915_capture_error_state(i915, engine_mask, msg);
+		i915_clear_error_registers(i915);
+	}
+
+	/*
+	 * Try engine reset when available. We fall back to full reset if
+	 * single reset fails.
+	 */
+	if (intel_has_reset_engine(i915)) {
+		for_each_engine_masked(engine, i915, engine_mask, tmp) {
+			BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
+			if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
+					     &i915->gpu_error.flags))
+				continue;
+
+			if (i915_reset_engine(engine, msg) == 0)
+				engine_mask &= ~intel_engine_flag(engine);
+
+			clear_bit(I915_RESET_ENGINE + engine->id,
+				  &i915->gpu_error.flags);
+			wake_up_bit(&i915->gpu_error.flags,
+				    I915_RESET_ENGINE + engine->id);
+		}
+	}
+
+	if (!engine_mask)
+		goto out;
+
+	/* Full reset needs the mutex, stop any other user trying to do so. */
+	if (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags)) {
+		wait_event(i915->gpu_error.reset_queue,
+			   !test_bit(I915_RESET_BACKOFF,
+				     &i915->gpu_error.flags));
+		goto out;
+	}
+
+	/* Prevent any other reset-engine attempt. */
+	for_each_engine(engine, i915, tmp) {
+		while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
+					&i915->gpu_error.flags))
+			wait_on_bit(&i915->gpu_error.flags,
+				    I915_RESET_ENGINE + engine->id,
+				    TASK_UNINTERRUPTIBLE);
+	}
+
+	i915_reset_device(i915, engine_mask, msg);
+
+	for_each_engine(engine, i915, tmp) {
+		clear_bit(I915_RESET_ENGINE + engine->id,
+			  &i915->gpu_error.flags);
+	}
+
+	clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
+	wake_up_all(&i915->gpu_error.reset_queue);
+
+out:
+	intel_runtime_pm_put(i915);
+}
diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h
new file mode 100644
index 000000000000..09422c4772dd
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_reset.h
@@ -0,0 +1,37 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2008-2018 Intel Corporation
+ */
+
+#ifndef I915_RESET_H
+#define I915_RESET_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct intel_engine_cs;
+struct intel_guc;
+
+__printf(4, 5)
+void i915_handle_error(struct drm_i915_private *i915,
+		       u32 engine_mask,
+		       unsigned long flags,
+		       const char *fmt, ...);
+#define I915_ERROR_CAPTURE BIT(0)
+
+void i915_reset(struct drm_i915_private *i915,
+		unsigned int stalled_mask,
+		const char *reason);
+int i915_reset_engine(struct intel_engine_cs *engine,
+		      const char *reason);
+
+bool intel_has_gpu_reset(struct drm_i915_private *i915);
+bool intel_has_reset_engine(struct drm_i915_private *i915);
+
+int intel_gpu_reset(struct drm_i915_private *i915, u32 engine_mask);
+
+int intel_reset_guc(struct drm_i915_private *i915);
+
+#endif /* I915_RESET_H */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 56818a45181c..0d0f3853838a 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -33,13 +33,7 @@
 #include <linux/vgaarb.h>
 #include <drm/drm_edid.h>
 #include <drm/drmP.h>
-#include "intel_drv.h"
-#include "intel_frontbuffer.h"
 #include <drm/i915_drm.h>
-#include "i915_drv.h"
-#include "i915_gem_clflush.h"
-#include "intel_dsi.h"
-#include "i915_trace.h"
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_dp_helper.h>
@@ -49,6 +43,15 @@
 #include <linux/dma_remapping.h>
 #include <linux/reservation.h>
 
+#include "intel_drv.h"
+#include "intel_dsi.h"
+#include "intel_frontbuffer.h"
+
+#include "i915_drv.h"
+#include "i915_gem_clflush.h"
+#include "i915_reset.h"
+#include "i915_trace.h"
+
 /* Primary plane formats for gen <= 3 */
 static const uint32_t i8xx_primary_formats[] = {
 	DRM_FORMAT_C8,
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 4121928a495e..df1a384c2f92 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -189,4 +189,7 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask)
 	spin_unlock_irq(&guc->irq_lock);
 }
 
+int intel_guc_reset_engine(struct intel_guc *guc,
+			   struct intel_engine_cs *engine);
+
 #endif
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index 2fc7a0dd0df9..5141df342884 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -23,6 +23,7 @@
  */
 
 #include "i915_drv.h"
+#include "i915_reset.h"
 
 static bool
 ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr)
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 7c95697e1a35..88352ff7164a 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -26,6 +26,7 @@
 #include "intel_guc_submission.h"
 #include "intel_guc.h"
 #include "i915_drv.h"
+#include "i915_reset.h"
 
 static void guc_free_load_err_log(struct intel_guc *guc);
 
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index b892ca8396e8..1abd342e9cce 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1698,258 +1698,6 @@ int i915_reg_read_ioctl(struct drm_device *dev,
 	return ret;
 }
 
-static void gen3_stop_engine(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	const u32 base = engine->mmio_base;
-
-	if (intel_engine_stop_cs(engine))
-		DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n", engine->name);
-
-	I915_WRITE_FW(RING_HEAD(base), I915_READ_FW(RING_TAIL(base)));
-	POSTING_READ_FW(RING_HEAD(base)); /* paranoia */
-
-	I915_WRITE_FW(RING_HEAD(base), 0);
-	I915_WRITE_FW(RING_TAIL(base), 0);
-	POSTING_READ_FW(RING_TAIL(base));
-
-	/* The ring must be empty before it is disabled */
-	I915_WRITE_FW(RING_CTL(base), 0);
-
-	/* Check acts as a post */
-	if (I915_READ_FW(RING_HEAD(base)) != 0)
-		DRM_DEBUG_DRIVER("%s: ring head not parked\n",
-				 engine->name);
-}
-
-static void i915_stop_engines(struct drm_i915_private *dev_priv,
-			      unsigned engine_mask)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	if (INTEL_GEN(dev_priv) < 3)
-		return;
-
-	for_each_engine_masked(engine, dev_priv, engine_mask, id)
-		gen3_stop_engine(engine);
-}
-
-static bool i915_in_reset(struct pci_dev *pdev)
-{
-	u8 gdrst;
-
-	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
-	return gdrst & GRDOM_RESET_STATUS;
-}
-
-static int i915_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
-{
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-	int err;
-
-	/* Assert reset for at least 20 usec, and wait for acknowledgement. */
-	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
-	usleep_range(50, 200);
-	err = wait_for(i915_in_reset(pdev), 500);
-
-	/* Clear the reset request. */
-	pci_write_config_byte(pdev, I915_GDRST, 0);
-	usleep_range(50, 200);
-	if (!err)
-		err = wait_for(!i915_in_reset(pdev), 500);
-
-	return err;
-}
-
-static bool g4x_reset_complete(struct pci_dev *pdev)
-{
-	u8 gdrst;
-
-	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
-	return (gdrst & GRDOM_RESET_ENABLE) == 0;
-}
-
-static int g33_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
-{
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-
-	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
-	return wait_for(g4x_reset_complete(pdev), 500);
-}
-
-static int g4x_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
-{
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-	int ret;
-
-	/* WaVcpClkGateDisableForMediaReset:ctg,elk */
-	I915_WRITE(VDECCLK_GATE_D,
-		   I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
-	POSTING_READ(VDECCLK_GATE_D);
-
-	pci_write_config_byte(pdev, I915_GDRST,
-			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
-	ret =  wait_for(g4x_reset_complete(pdev), 500);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
-		goto out;
-	}
-
-	pci_write_config_byte(pdev, I915_GDRST,
-			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
-	ret =  wait_for(g4x_reset_complete(pdev), 500);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
-		goto out;
-	}
-
-out:
-	pci_write_config_byte(pdev, I915_GDRST, 0);
-
-	I915_WRITE(VDECCLK_GATE_D,
-		   I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
-	POSTING_READ(VDECCLK_GATE_D);
-
-	return ret;
-}
-
-static int ironlake_do_reset(struct drm_i915_private *dev_priv,
-			     unsigned engine_mask)
-{
-	int ret;
-
-	I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
-	ret = intel_wait_for_register(dev_priv,
-				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
-				      500);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
-		goto out;
-	}
-
-	I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
-	ret = intel_wait_for_register(dev_priv,
-				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
-				      500);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
-		goto out;
-	}
-
-out:
-	I915_WRITE(ILK_GDSR, 0);
-	POSTING_READ(ILK_GDSR);
-	return ret;
-}
-
-/* Reset the hardware domains (GENX_GRDOM_*) specified by mask */
-static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv,
-				u32 hw_domain_mask)
-{
-	int err;
-
-	/* GEN6_GDRST is not in the gt power well, no need to check
-	 * for fifo space for the write or forcewake the chip for
-	 * the read
-	 */
-	__raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask);
-
-	/* Wait for the device to ack the reset requests */
-	err = __intel_wait_for_register_fw(dev_priv,
-					   GEN6_GDRST, hw_domain_mask, 0,
-					   500, 0,
-					   NULL);
-	if (err)
-		DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n",
-				 hw_domain_mask);
-
-	return err;
-}
-
-/**
- * gen6_reset_engines - reset individual engines
- * @dev_priv: i915 device
- * @engine_mask: mask of intel_ring_flag() engines or ALL_ENGINES for full reset
- *
- * This function will reset the individual engines that are set in engine_mask.
- * If you provide ALL_ENGINES as mask, full global domain reset will be issued.
- *
- * Note: It is responsibility of the caller to handle the difference between
- * asking full domain reset versus reset for all available individual engines.
- *
- * Returns 0 on success, nonzero on error.
- */
-static int gen6_reset_engines(struct drm_i915_private *dev_priv,
-			      unsigned engine_mask)
-{
-	struct intel_engine_cs *engine;
-	const u32 hw_engine_mask[I915_NUM_ENGINES] = {
-		[RCS] = GEN6_GRDOM_RENDER,
-		[BCS] = GEN6_GRDOM_BLT,
-		[VCS] = GEN6_GRDOM_MEDIA,
-		[VCS2] = GEN8_GRDOM_MEDIA2,
-		[VECS] = GEN6_GRDOM_VECS,
-	};
-	u32 hw_mask;
-
-	if (engine_mask == ALL_ENGINES) {
-		hw_mask = GEN6_GRDOM_FULL;
-	} else {
-		unsigned int tmp;
-
-		hw_mask = 0;
-		for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
-			hw_mask |= hw_engine_mask[engine->id];
-	}
-
-	return gen6_hw_domain_reset(dev_priv, hw_mask);
-}
-
-/**
- * gen11_reset_engines - reset individual engines
- * @dev_priv: i915 device
- * @engine_mask: mask of intel_ring_flag() engines or ALL_ENGINES for full reset
- *
- * This function will reset the individual engines that are set in engine_mask.
- * If you provide ALL_ENGINES as mask, full global domain reset will be issued.
- *
- * Note: It is responsibility of the caller to handle the difference between
- * asking full domain reset versus reset for all available individual engines.
- *
- * Returns 0 on success, nonzero on error.
- */
-static int gen11_reset_engines(struct drm_i915_private *dev_priv,
-			       unsigned engine_mask)
-{
-	struct intel_engine_cs *engine;
-	const u32 hw_engine_mask[I915_NUM_ENGINES] = {
-		[RCS] = GEN11_GRDOM_RENDER,
-		[BCS] = GEN11_GRDOM_BLT,
-		[VCS] = GEN11_GRDOM_MEDIA,
-		[VCS2] = GEN11_GRDOM_MEDIA2,
-		[VCS3] = GEN11_GRDOM_MEDIA3,
-		[VCS4] = GEN11_GRDOM_MEDIA4,
-		[VECS] = GEN11_GRDOM_VECS,
-		[VECS2] = GEN11_GRDOM_VECS2,
-	};
-	u32 hw_mask;
-
-	BUILD_BUG_ON(VECS2 + 1 != I915_NUM_ENGINES);
-
-	if (engine_mask == ALL_ENGINES) {
-		hw_mask = GEN11_GRDOM_FULL;
-	} else {
-		unsigned int tmp;
-
-		hw_mask = 0;
-		for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
-			hw_mask |= hw_engine_mask[engine->id];
-	}
-
-	return gen6_hw_domain_reset(dev_priv, hw_mask);
-}
-
 /**
  * __intel_wait_for_register_fw - wait until register matches expected state
  * @dev_priv: the i915 device
@@ -2060,169 +1808,6 @@ int __intel_wait_for_register(struct drm_i915_private *dev_priv,
 	return ret;
 }
 
-static int gen8_reset_engine_start(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	int ret;
-
-	I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base),
-		      _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));
-
-	ret = __intel_wait_for_register_fw(dev_priv,
-					   RING_RESET_CTL(engine->mmio_base),
-					   RESET_CTL_READY_TO_RESET,
-					   RESET_CTL_READY_TO_RESET,
-					   700, 0,
-					   NULL);
-	if (ret)
-		DRM_ERROR("%s: reset request timeout\n", engine->name);
-
-	return ret;
-}
-
-static void gen8_reset_engine_cancel(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base),
-		      _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET));
-}
-
-static int gen8_reset_engines(struct drm_i915_private *dev_priv,
-			      unsigned engine_mask)
-{
-	struct intel_engine_cs *engine;
-	unsigned int tmp;
-	int ret;
-
-	for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
-		if (gen8_reset_engine_start(engine)) {
-			ret = -EIO;
-			goto not_ready;
-		}
-	}
-
-	if (INTEL_GEN(dev_priv) >= 11)
-		ret = gen11_reset_engines(dev_priv, engine_mask);
-	else
-		ret = gen6_reset_engines(dev_priv, engine_mask);
-
-not_ready:
-	for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
-		gen8_reset_engine_cancel(engine);
-
-	return ret;
-}
-
-typedef int (*reset_func)(struct drm_i915_private *, unsigned engine_mask);
-
-static reset_func intel_get_gpu_reset(struct drm_i915_private *dev_priv)
-{
-	if (!i915_modparams.reset)
-		return NULL;
-
-	if (INTEL_GEN(dev_priv) >= 8)
-		return gen8_reset_engines;
-	else if (INTEL_GEN(dev_priv) >= 6)
-		return gen6_reset_engines;
-	else if (IS_GEN5(dev_priv))
-		return ironlake_do_reset;
-	else if (IS_G4X(dev_priv))
-		return g4x_do_reset;
-	else if (IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
-		return g33_do_reset;
-	else if (INTEL_GEN(dev_priv) >= 3)
-		return i915_do_reset;
-	else
-		return NULL;
-}
-
-int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
-{
-	reset_func reset = intel_get_gpu_reset(dev_priv);
-	int retry;
-	int ret;
-
-	/*
-	 * We want to perform per-engine reset from atomic context (e.g.
-	 * softirq), which imposes the constraint that we cannot sleep.
-	 * However, experience suggests that spending a bit of time waiting
-	 * for a reset helps in various cases, so for a full-device reset
-	 * we apply the opposite rule and wait if we want to. As we should
-	 * always follow up a failed per-engine reset with a full device reset,
-	 * being a little faster, stricter and more error prone for the
-	 * atomic case seems an acceptable compromise.
-	 *
-	 * Unfortunately this leads to a bimodal routine, when the goal was
-	 * to have a single reset function that worked for resetting any
-	 * number of engines simultaneously.
-	 */
-	might_sleep_if(engine_mask == ALL_ENGINES);
-
-	/*
-	 * If the power well sleeps during the reset, the reset
-	 * request may be dropped and never completes (causing -EIO).
-	 */
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-	for (retry = 0; retry < 3; retry++) {
-
-		/*
-		 * We stop engines, otherwise we might get failed reset and a
-		 * dead gpu (on elk). Also as modern gpu as kbl can suffer
-		 * from system hang if batchbuffer is progressing when
-		 * the reset is issued, regardless of READY_TO_RESET ack.
-		 * Thus assume it is best to stop engines on all gens
-		 * where we have a gpu reset.
-		 *
-		 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
-		 *
-		 * WaMediaResetMainRingCleanup:ctg,elk (presumably)
-		 *
-		 * FIXME: Wa for more modern gens needs to be validated
-		 */
-		i915_stop_engines(dev_priv, engine_mask);
-
-		ret = -ENODEV;
-		if (reset) {
-			GEM_TRACE("engine_mask=%x\n", engine_mask);
-			ret = reset(dev_priv, engine_mask);
-		}
-		if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES)
-			break;
-
-		cond_resched();
-	}
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-
-	return ret;
-}
-
-bool intel_has_gpu_reset(struct drm_i915_private *dev_priv)
-{
-	return intel_get_gpu_reset(dev_priv) != NULL;
-}
-
-bool intel_has_reset_engine(struct drm_i915_private *dev_priv)
-{
-	return (dev_priv->info.has_reset_engine &&
-		i915_modparams.reset >= 2);
-}
-
-int intel_reset_guc(struct drm_i915_private *dev_priv)
-{
-	u32 guc_domain = INTEL_GEN(dev_priv) >= 11 ? GEN11_GRDOM_GUC :
-						     GEN9_GRDOM_GUC;
-	int ret;
-
-	GEM_BUG_ON(!HAS_GUC(dev_priv));
-
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-	ret = gen6_hw_domain_reset(dev_priv, guc_domain);
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-
-	return ret;
-}
-
 bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv)
 {
 	return check_for_unclaimed_mmio(dev_priv);
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index c100153cb494..40fb481bbbf4 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -5,6 +5,7 @@
  */
 
 #include "../i915_selftest.h"
+#include "../i915_reset.h"
 
 #include "mock_context.h"
 
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 37/37] drm/i915: Remove GPU reset dependence on struct_mutex
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (34 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 36/37] drm/i915: Pull all the reset functionality together into i915_reset.c Chris Wilson
@ 2018-06-29  7:53 ` Chris Wilson
  2018-06-29  8:58 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Patchwork
                   ` (4 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29  7:53 UTC (permalink / raw)
  To: intel-gfx

Now that the submission backends are controlled via their own spinlocks,
with a wave of a magic wand we can lift the struct_mutex requirement
around GPU reset. That is we allow the submission frontend (userspace)
to keep on submitting while we process the GPU reset as we can suspend
the backend independently.

The major change is around the backoff/handoff strategy for performing
the reset. With no mutex deadlock, we no longer have to coordinate with
any waiter, and just perform the reset immediately.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c           |   7 -
 drivers/gpu/drm/i915/i915_drv.h               |   5 -
 drivers/gpu/drm/i915/i915_gem.c               |  10 +-
 drivers/gpu/drm/i915/i915_gem_fence_reg.c     |  39 ++-
 drivers/gpu/drm/i915/i915_gem_fence_reg.h     |   6 +-
 drivers/gpu/drm/i915/i915_gpu_error.h         |  22 +-
 drivers/gpu/drm/i915/i915_request.c           |  46 ---
 drivers/gpu/drm/i915/i915_reset.c             | 315 +++++++-----------
 drivers/gpu/drm/i915/i915_reset.h             |   3 +
 drivers/gpu/drm/i915/intel_engine_cs.c        |   6 +-
 drivers/gpu/drm/i915/intel_guc_submission.c   |   5 +-
 drivers/gpu/drm/i915/intel_lrc.c              |  98 ++----
 drivers/gpu/drm/i915/intel_overlay.c          |   2 -
 drivers/gpu/drm/i915/intel_ringbuffer.c       |  89 +++--
 drivers/gpu/drm/i915/intel_ringbuffer.h       |  13 +-
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |  29 +-
 16 files changed, 269 insertions(+), 426 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index f93ab55d61b3..3908b75f8168 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1319,8 +1319,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 		seq_puts(m, "Wedged\n");
 	if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags))
 		seq_puts(m, "Reset in progress: struct_mutex backoff\n");
-	if (test_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags))
-		seq_puts(m, "Reset in progress: reset handoff to waiter\n");
 	if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
 		seq_puts(m, "Waiter holding struct mutex\n");
 	if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
@@ -4063,11 +4061,6 @@ i915_wedged_set(void *data, u64 val)
 
 	i915_handle_error(i915, val, I915_ERROR_CAPTURE,
 			  "Manually set wedged engine mask = %llx", val);
-
-	wait_on_bit(&i915->gpu_error.flags,
-		    I915_RESET_HANDOFF,
-		    TASK_UNINTERRUPTIBLE);
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 49903c7966fa..fc80068d3c80 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3098,11 +3098,6 @@ static inline bool i915_reset_backoff(struct i915_gpu_error *error)
 	return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags));
 }
 
-static inline bool i915_reset_handoff(struct i915_gpu_error *error)
-{
-	return unlikely(test_bit(I915_RESET_HANDOFF, &error->flags));
-}
-
 static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
 {
 	return unlikely(test_bit(I915_WEDGED, &error->flags));
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8821342dfbaf..aef62955a33b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4620,6 +4620,8 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 	intel_runtime_pm_get(i915);
 	intel_suspend_gt_powersave(i915);
 
+	flush_workqueue(i915->wq);
+
 	mutex_lock(&i915->drm.struct_mutex);
 
 	/*
@@ -4646,11 +4648,9 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 		assert_kernel_context_is_current(i915);
 	}
 	mutex_unlock(&i915->drm.struct_mutex);
+	i915_reset_flush(i915);
 
-	intel_uc_suspend(i915);
-
-	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
-	cancel_delayed_work_sync(&i915->gt.retire_work);
+	drain_delayed_work(&i915->gt.retire_work);
 
 	/*
 	 * As the idle_work is rearming if it detects a race, play safe and
@@ -4658,6 +4658,8 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 	 */
 	drain_delayed_work(&i915->gt.idle_work);
 
+	intel_uc_suspend(i915);
+
 	/*
 	 * Assert that we successfully flushed all the work and
 	 * reset the GPU back to its idle, low power state.
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 9313a8e675c8..e458ae189f82 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -473,7 +473,7 @@ void i915_unreserve_fence(struct drm_i915_fence_reg *fence)
 }
 
 /**
- * i915_gem_revoke_fences - revoke fence state
+ * __i915_gem_revoke_fences - revoke fence state
  * @i915: i915 device private
  *
  * Removes all GTT mmappings via the fence registers. This forces any user
@@ -482,12 +482,13 @@ void i915_unreserve_fence(struct drm_i915_fence_reg *fence)
  * revoke concurrent userspace access via GTT mmaps until the hardware has been
  * reset and the fence registers have been restored.
  */
-void i915_gem_revoke_fences(struct drm_i915_private *i915)
+void __i915_gem_revoke_fences(struct drm_i915_private *i915)
 {
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	int i;
 
-	mutex_lock(&ggtt->vm.mutex);
+	lockdep_assert_held(&ggtt->vm.mutex);
+
 	for (i = 0; i < ggtt->num_fence_regs; i++) {
 		struct drm_i915_fence_reg *fence = &ggtt->fence_regs[i];
 
@@ -496,23 +497,15 @@ void i915_gem_revoke_fences(struct drm_i915_private *i915)
 		if (fence->vma)
 			i915_vma_revoke_mmap(fence->vma);
 	}
-	mutex_unlock(&ggtt->vm.mutex);
 }
 
-/**
- * i915_gem_restore_fences - restore fence state
- * @i915: i915 device private
- *
- * Restore the hw fence state to match the software tracking again, to be called
- * after a gpu reset and on resume. Note that on runtime suspend we only cancel
- * the fences, to be reacquired by the user later.
- */
-void i915_gem_restore_fences(struct drm_i915_private *i915)
+void __i915_gem_restore_fences(struct drm_i915_private *i915)
 {
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	int i;
 
-	mutex_lock(&ggtt->vm.mutex);
+	lockdep_assert_held(&ggtt->vm.mutex);
+
 	for (i = 0; i < ggtt->num_fence_regs; i++) {
 		struct drm_i915_fence_reg *reg = &ggtt->fence_regs[i];
 		struct i915_vma *vma = reg->vma;
@@ -535,6 +528,24 @@ void i915_gem_restore_fences(struct drm_i915_private *i915)
 		fence_write(reg, vma);
 		reg->vma = vma;
 	}
+}
+
+/**
+ * i915_gem_restore_fences - restore fence state
+ * @i915: i915 device private
+ *
+ * Restore the hw fence state to match the software tracking again, to be called
+ * after a gpu reset and on resume. Note that on runtime suspend we only cancel
+ * the fences, to be reacquired by the user later.
+ */
+void i915_gem_restore_fences(struct drm_i915_private *i915)
+{
+	struct i915_ggtt *ggtt = &i915->ggtt;
+
+	mutex_lock(&ggtt->vm.mutex);
+
+	__i915_gem_restore_fences(i915);
+
 	mutex_unlock(&ggtt->vm.mutex);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
index 6e66f6b3f851..cd50a9ec36fc 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
@@ -60,8 +60,10 @@ struct drm_i915_fence_reg *
 i915_reserve_fence(struct drm_i915_private *i915);
 void i915_unreserve_fence(struct drm_i915_fence_reg *fence);
 
-void i915_gem_revoke_fences(struct drm_i915_private *i915);
-void i915_gem_restore_fences(struct drm_i915_private *i915);
+void __i915_gem_revoke_fences(struct drm_i915_private *dev_priv);
+void __i915_gem_restore_fences(struct drm_i915_private *dev_priv);
+
+void i915_gem_restore_fences(struct drm_i915_private *dev_priv);
 
 void i915_gem_detect_bit_6_swizzle(struct drm_i915_private *i915);
 void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index f893a4e8b783..9819952a6e49 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -193,6 +193,8 @@ struct i915_gpu_state {
 	struct i915_address_space *active_vm[I915_NUM_ENGINES];
 };
 
+struct i915_gpu_restart;
+
 struct i915_gpu_error {
 	/* For hangcheck timer */
 #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
@@ -243,15 +245,6 @@ struct i915_gpu_error {
 	 * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a
 	 * secondary role in preventing two concurrent global reset attempts.
 	 *
-	 * #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the
-	 * struct_mutex. We try to acquire the struct_mutex in the reset worker,
-	 * but it may be held by some long running waiter (that we cannot
-	 * interrupt without causing trouble). Once we are ready to do the GPU
-	 * reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If
-	 * they already hold the struct_mutex and want to participate they can
-	 * inspect the bit and do the reset directly, otherwise the worker
-	 * waits for the struct_mutex.
-	 *
 	 * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
 	 * acquire the struct_mutex to reset an engine, we need an explicit
 	 * flag to prevent two concurrent reset attempts in the same engine.
@@ -265,20 +258,13 @@ struct i915_gpu_error {
 	 */
 	unsigned long flags;
 #define I915_RESET_BACKOFF	0
-#define I915_RESET_HANDOFF	1
-#define I915_RESET_MODESET	2
+#define I915_RESET_MODESET	1
 #define I915_WEDGED		(BITS_PER_LONG - 1)
 #define I915_RESET_ENGINE	(I915_WEDGED - I915_NUM_ENGINES)
 
 	/** Number of times an engine has been reset */
 	u32 reset_engine_count[I915_NUM_ENGINES];
 
-	/** Set of stalled engines with guilty requests, in the current reset */
-	u32 stalled_mask;
-
-	/** Reason for the current *global* reset */
-	const char *reason;
-
 	/**
 	 * Waitqueue to signal when a hang is detected. Used to for waiters
 	 * to release the struct_mutex for the reset to procede.
@@ -293,6 +279,8 @@ struct i915_gpu_error {
 
 	/* For missed irq/seqno simulation. */
 	unsigned long test_irq_rings;
+
+	struct i915_gpu_restart *restart;
 };
 
 struct drm_i915_error_state_buf {
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 041d8decd431..f00d59df3034 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1232,18 +1232,6 @@ static bool __i915_spin_request(const struct i915_request *rq,
 	return false;
 }
 
-static bool __i915_wait_request_check_and_reset(struct i915_request *request)
-{
-	struct i915_gpu_error *error = &request->i915->gpu_error;
-
-	if (likely(!i915_reset_handoff(error)))
-		return false;
-
-	__set_current_state(TASK_RUNNING);
-	i915_reset(request->i915, error->stalled_mask, error->reason);
-	return true;
-}
-
 /**
  * i915_request_wait - wait until execution of request has finished
  * @rq: the request to wait upon
@@ -1269,17 +1257,10 @@ long i915_request_wait(struct i915_request *rq,
 {
 	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
 		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
-	wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue;
-	DEFINE_WAIT_FUNC(reset, default_wake_function);
 	DEFINE_WAIT_FUNC(exec, default_wake_function);
 	struct intel_wait wait;
 
 	might_sleep();
-#if IS_ENABLED(CONFIG_LOCKDEP)
-	GEM_BUG_ON(debug_locks &&
-		   !!lockdep_is_held(&rq->i915->drm.struct_mutex) !=
-		   !!(flags & I915_WAIT_LOCKED));
-#endif
 	GEM_BUG_ON(timeout < 0);
 
 	if (i915_request_completed(rq))
@@ -1289,10 +1270,7 @@ long i915_request_wait(struct i915_request *rq,
 		return -ETIME;
 
 	trace_i915_request_wait_begin(rq, flags);
-
 	add_wait_queue(&rq->execute, &exec);
-	if (flags & I915_WAIT_LOCKED)
-		add_wait_queue(errq, &reset);
 
 	intel_wait_init(&wait);
 
@@ -1302,10 +1280,6 @@ long i915_request_wait(struct i915_request *rq,
 		if (intel_wait_update_request(&wait, rq))
 			break;
 
-		if (flags & I915_WAIT_LOCKED &&
-		    __i915_wait_request_check_and_reset(rq))
-			continue;
-
 		if (signal_pending_state(state, current)) {
 			timeout = -ERESTARTSYS;
 			goto complete;
@@ -1335,9 +1309,6 @@ long i915_request_wait(struct i915_request *rq,
 		 */
 		goto wakeup;
 
-	if (flags & I915_WAIT_LOCKED)
-		__i915_wait_request_check_and_reset(rq);
-
 	for (;;) {
 		if (signal_pending_state(state, current)) {
 			timeout = -ERESTARTSYS;
@@ -1367,21 +1338,6 @@ long i915_request_wait(struct i915_request *rq,
 		if (__i915_request_irq_complete(rq))
 			break;
 
-		/*
-		 * If the GPU is hung, and we hold the lock, reset the GPU
-		 * and then check for completion. On a full reset, the engine's
-		 * HW seqno will be advanced passed us and we are complete.
-		 * If we do a partial reset, we have to wait for the GPU to
-		 * resume and update the breadcrumb.
-		 *
-		 * If we don't hold the mutex, we can just wait for the worker
-		 * to come along and update the breadcrumb (either directly
-		 * itself, or indirectly by recovering the GPU).
-		 */
-		if (flags & I915_WAIT_LOCKED &&
-		    __i915_wait_request_check_and_reset(rq))
-			continue;
-
 		/* Only spin if we know the GPU is processing this request */
 		if (__i915_spin_request(rq, wait.seqno, state, 2))
 			break;
@@ -1395,8 +1351,6 @@ long i915_request_wait(struct i915_request *rq,
 	intel_engine_remove_wait(rq->engine, &wait);
 complete:
 	__set_current_state(TASK_RUNNING);
-	if (flags & I915_WAIT_LOCKED)
-		remove_wait_queue(errq, &reset);
 	remove_wait_queue(&rq->execute, &exec);
 	trace_i915_request_wait_end(rq);
 
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index d8b7840c9e20..a9dcee60d338 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -15,22 +15,23 @@ static void engine_skip_context(struct i915_request *rq)
 	struct intel_engine_cs *engine = rq->engine;
 	struct i915_gem_context *hung_ctx = rq->gem_context;
 	struct i915_timeline *timeline = rq->timeline;
-	unsigned long flags;
 
+	lockdep_assert_held(&engine->timeline.lock);
 	GEM_BUG_ON(timeline == &engine->timeline);
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
 	spin_lock(&timeline->lock);
 
-	list_for_each_entry_continue(rq, &engine->timeline.requests, link)
-		if (rq->gem_context == hung_ctx)
-			i915_request_skip(rq, -EIO);
+	if (rq->global_seqno) {
+		list_for_each_entry_continue(rq,
+					     &engine->timeline.requests, link)
+			if (rq->gem_context == hung_ctx)
+				i915_request_skip(rq, -EIO);
+	}
 
 	list_for_each_entry(rq, &timeline->requests, link)
 		i915_request_skip(rq, -EIO);
 
 	spin_unlock(&timeline->lock);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static void client_mark_guilty(struct drm_i915_file_private *file_priv,
@@ -57,7 +58,7 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv,
 	}
 }
 
-static void context_mark_guilty(struct i915_gem_context *ctx)
+static bool context_mark_guilty(struct i915_gem_context *ctx)
 {
 	unsigned int score;
 	bool banned, bannable;
@@ -70,7 +71,7 @@ static void context_mark_guilty(struct i915_gem_context *ctx)
 
 	/* Cool contexts don't accumulate client ban score */
 	if (!bannable)
-		return;
+		return false;
 
 	if (banned) {
 		DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n",
@@ -81,6 +82,8 @@ static void context_mark_guilty(struct i915_gem_context *ctx)
 
 	if (!IS_ERR_OR_NULL(ctx->file_priv))
 		client_mark_guilty(ctx->file_priv, ctx);
+
+	return banned;
 }
 
 static void context_mark_innocent(struct i915_gem_context *ctx)
@@ -88,6 +91,21 @@ static void context_mark_innocent(struct i915_gem_context *ctx)
 	atomic_inc(&ctx->active_count);
 }
 
+void i915_reset_request(struct i915_request *rq, bool guilty)
+{
+	lockdep_assert_held(&rq->engine->timeline.lock);
+	GEM_BUG_ON(i915_request_completed(rq));
+
+	if (guilty) {
+		i915_request_skip(rq, -EIO);
+		if (context_mark_guilty(rq->gem_context))
+			engine_skip_context(rq);
+	} else {
+		dma_fence_set_error(&rq->fence, -EAGAIN);
+		context_mark_innocent(rq->gem_context);
+	}
+}
+
 static void gen3_stop_engine(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
@@ -483,11 +501,8 @@ int intel_reset_guc(struct drm_i915_private *i915)
  * Ensure irq handler finishes, and not run again.
  * Also return the active request so that we only search for it once.
  */
-static struct i915_request *
-reset_prepare_engine(struct intel_engine_cs *engine)
+static void reset_prepare_engine(struct intel_engine_cs *engine)
 {
-	struct i915_request *rq;
-
 	/*
 	 * During the reset sequence, we must prevent the engine from
 	 * entering RC6. As the context state is undefined until we restart
@@ -496,171 +511,78 @@ reset_prepare_engine(struct intel_engine_cs *engine)
 	 * GPU state upon resume, i.e. fail to restart after a reset.
 	 */
 	intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
-
-	rq = engine->reset.prepare(engine);
-	if (rq && rq->fence.error == -EIO)
-		rq = ERR_PTR(-EIO); /* Previous reset failed! */
-
-	return rq;
+	engine->reset.prepare(engine);
 }
 
-static int reset_prepare(struct drm_i915_private *i915)
+static void reset_prepare(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
-	struct i915_request *rq;
 	enum intel_engine_id id;
-	int err = 0;
-
-	disable_irq(i915->drm.irq);
-
-	for_each_engine(engine, i915, id) {
-		rq = reset_prepare_engine(engine);
-		if (IS_ERR(rq)) {
-			err = PTR_ERR(rq);
-			continue;
-		}
 
-		engine->hangcheck.active_request = rq;
-	}
+	for_each_engine(engine, i915, id)
+		reset_prepare_engine(engine);
 
-	i915_gem_revoke_fences(i915);
 	intel_uc_sanitize(i915);
-
-	return err;
 }
 
-/* Returns the request if it was guilty of the hang */
-static struct i915_request *
-reset_request(struct intel_engine_cs *engine,
-	      struct i915_request *rq,
-	      bool stalled)
+static void gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
 {
-	/*
-	 * The guilty request will get skipped on a hung engine.
-	 *
-	 * Users of client default contexts do not rely on logical
-	 * state preserved between batches so it is safe to execute
-	 * queued requests following the hang. Non default contexts
-	 * rely on preserved state, so skipping a batch loses the
-	 * evolution of the state and it needs to be considered corrupted.
-	 * Executing more queued batches on top of corrupted state is
-	 * risky. But we take the risk by trying to advance through
-	 * the queued requests in order to make the client behaviour
-	 * more predictable around resets, by not throwing away random
-	 * amount of batches it has prepared for execution. Sophisticated
-	 * clients can use gem_reset_stats_ioctl and dma fence status
-	 * (exported via sync_file info ioctl on explicit fences) to observe
-	 * when it loses the context state and should rebuild accordingly.
-	 *
-	 * The context ban, and ultimately the client ban, mechanism are safety
-	 * valves if client submission ends up resulting in nothing more than
-	 * subsequent hangs.
-	 */
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 
-	if (i915_request_completed(rq)) {
-		GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n",
-			  engine->name, rq->global_seqno,
-			  rq->fence.context, rq->fence.seqno,
-			  intel_engine_get_seqno(engine));
-		stalled = false;
-	}
+	mutex_lock(&i915->ggtt.vm.mutex);
+	__i915_gem_revoke_fences(i915);
 
-	if (stalled) {
-		context_mark_guilty(rq->gem_context);
-		i915_request_skip(rq, -EIO);
+	for_each_engine(engine, i915, id)
+		intel_engine_reset(engine, stalled_mask & ENGINE_MASK(id));
 
-		/* If this context is now banned, skip all pending requests. */
-		if (i915_gem_context_is_banned(rq->gem_context))
-			engine_skip_context(rq);
-	} else {
-		/*
-		 * Since this is not the hung engine, it may have advanced
-		 * since the hang declaration. Double check by refinding
-		 * the active request at the time of the reset.
-		 */
-		rq = i915_gem_find_active_request(engine);
-		if (rq) {
-			unsigned long flags;
-
-			context_mark_innocent(rq->gem_context);
-			dma_fence_set_error(&rq->fence, -EAGAIN);
-
-			/* Rewind the engine to replay the incomplete rq */
-			spin_lock_irqsave(&engine->timeline.lock, flags);
-			rq = list_prev_entry(rq, link);
-			if (&rq->link == &engine->timeline.requests)
-				rq = NULL;
-			spin_unlock_irqrestore(&engine->timeline.lock, flags);
-		}
-	}
-
-	return rq;
+	__i915_gem_restore_fences(i915);
+	mutex_unlock(&i915->ggtt.vm.mutex);
 }
 
-static void reset_engine(struct intel_engine_cs *engine,
-			 struct i915_request *rq,
-			 bool stalled)
+static void reset_finish_engine(struct intel_engine_cs *engine)
 {
-	/*
-	 * Make sure this write is visible before we re-enable the interrupt
-	 * handlers on another CPU, as tasklet_enable() resolves to just
-	 * a compiler barrier which is insufficient for our purpose here.
-	 */
-	smp_store_mb(engine->irq_posted, 0);
-
-	if (rq)
-		rq = reset_request(engine, rq, stalled);
-
-	/* Setup the CS to resume from the breadcrumb of the hung request */
-	engine->reset.reset(engine, rq);
+	engine->reset.finish(engine);
+	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
 }
 
-static void gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
+struct i915_gpu_restart {
+	struct work_struct work;
+	struct drm_i915_private *i915;
+};
+
+static void restart_work(struct work_struct *work)
 {
+	struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work);
+	struct drm_i915_private *i915 = arg->i915;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
+	intel_runtime_pm_get(i915);
+	mutex_lock(&i915->drm.struct_mutex);
 
-	i915_retire_requests(i915);
+	smp_store_mb(i915->gpu_error.restart, NULL);
 
 	for_each_engine(engine, i915, id) {
-		struct intel_context *ce;
-
-		reset_engine(engine,
-			     engine->hangcheck.active_request,
-			     stalled_mask & ENGINE_MASK(id));
-		ce = fetch_and_zero(&engine->last_retired_context);
-		if (ce)
-			intel_context_unpin(ce);
+		struct i915_request *rq;
 
 		/*
 		 * Ostensibily, we always want a context loaded for powersaving,
 		 * so if the engine is idle after the reset, send a request
 		 * to load our scratch kernel_context.
-		 *
-		 * More mysteriously, if we leave the engine idle after a reset,
-		 * the next userspace batch may hang, with what appears to be
-		 * an incoherent read by the CS (presumably stale TLB). An
-		 * empty request appears sufficient to paper over the glitch.
 		 */
-		if (intel_engine_is_idle(engine)) {
-			struct i915_request *rq;
+		if (!intel_engine_is_idle(engine))
+			continue;
 
-			rq = i915_request_alloc(engine, i915->kernel_context);
-			if (!IS_ERR(rq))
-				i915_request_add(rq);
-		}
+		rq = i915_request_alloc(engine, i915->kernel_context);
+		if (!IS_ERR(rq))
+			i915_request_add(rq);
 	}
 
-	i915_gem_restore_fences(i915);
-}
-
-static void reset_finish_engine(struct intel_engine_cs *engine)
-{
-	engine->reset.finish(engine);
+	mutex_unlock(&i915->drm.struct_mutex);
+	intel_runtime_pm_put(i915);
 
-	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
+	kfree(arg);
 }
 
 static void reset_finish(struct drm_i915_private *i915)
@@ -668,14 +590,26 @@ static void reset_finish(struct drm_i915_private *i915)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	for_each_engine(engine, i915, id) {
-		engine->hangcheck.active_request = NULL;
+	for_each_engine(engine, i915, id)
 		reset_finish_engine(engine);
-	}
 
-	enable_irq(i915->drm.irq);
+	/*
+	 * Following the reset, ensure that we always reload context for
+	 * powersaving, and to correct engine->last_retired_context.
+	 */
+	if (!i915_terminally_wedged(&i915->gpu_error) &&
+	    !READ_ONCE(i915->gpu_error.restart)) {
+		struct i915_gpu_restart *arg;
+
+		arg = kmalloc(sizeof(*arg), GFP_KERNEL);
+		if (arg) {
+			arg->i915 = i915;
+			INIT_WORK(&arg->work, restart_work);
+
+			WRITE_ONCE(i915->gpu_error.restart, arg);
+			queue_work(i915->wq, &arg->work);
+		}
+	}
 }
 
 static void nop_submit_request(struct i915_request *rq)
@@ -785,7 +719,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 {
 	struct i915_timeline *tl;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
 	if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
 		return true;
 
@@ -803,9 +736,9 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	 */
 	list_for_each_entry(tl, &i915->gt.timelines, link) {
 		struct i915_request *rq;
+		long timeout;
 
-		rq = i915_gem_active_peek(&tl->last_request,
-					  &i915->drm.struct_mutex);
+		rq = i915_gem_active_get_unlocked(&tl->last_request);
 		if (!rq)
 			continue;
 
@@ -820,12 +753,12 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 		 * and when the seqno passes the fence, the signaler
 		 * then signals the fence waking us up).
 		 */
-		if (dma_fence_default_wait(&rq->fence, true,
-					   MAX_SCHEDULE_TIMEOUT) < 0)
+		timeout = dma_fence_default_wait(&rq->fence, true,
+						 MAX_SCHEDULE_TIMEOUT);
+		i915_request_put(rq);
+		if (timeout < 0)
 			return false;
 	}
-	i915_retire_requests(i915);
-	GEM_BUG_ON(i915->gt.active_requests);
 
 	/*
 	 * Undo nop_submit_request. We prevent all new i915 requests from
@@ -837,7 +770,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	 * context and do not require stop_machine().
 	 */
 	intel_engines_reset_default_submission(i915);
-	i915_gem_contexts_lost(i915);
 
 	GEM_TRACE("end\n");
 
@@ -877,25 +809,17 @@ void i915_reset(struct drm_i915_private *i915,
 	GEM_TRACE("flags=%lx\n", error->flags);
 
 	might_sleep();
-	lockdep_assert_held(&i915->drm.struct_mutex);
 	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
 
-	if (!test_bit(I915_RESET_HANDOFF, &error->flags))
-		return;
-
 	/* Clear any previous failed attempts at recovery. Time to try again. */
 	if (!i915_gem_unset_wedged(i915))
-		goto wakeup;
+		return;
 
 	if (reason)
 		dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
 	error->reset_count++;
 
-	ret = reset_prepare(i915);
-	if (ret) {
-		dev_err(i915->drm.dev, "GPU recovery failed\n");
-		goto taint;
-	}
+	reset_prepare(i915);
 
 	if (!intel_has_gpu_reset(i915)) {
 		if (i915_modparams.reset)
@@ -952,9 +876,6 @@ void i915_reset(struct drm_i915_private *i915,
 
 finish:
 	reset_finish(i915);
-wakeup:
-	clear_bit(I915_RESET_HANDOFF, &error->flags);
-	wake_up_bit(&error->flags, I915_RESET_HANDOFF);
 	return;
 
 taint:
@@ -973,7 +894,6 @@ void i915_reset(struct drm_i915_private *i915,
 	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
 error:
 	i915_gem_set_wedged(i915);
-	i915_retire_requests(i915);
 	goto finish;
 }
 
@@ -999,18 +919,16 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *i915,
 int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 {
 	struct i915_gpu_error *error = &engine->i915->gpu_error;
-	struct i915_request *active_request;
 	int ret;
 
 	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
 	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
 
-	active_request = reset_prepare_engine(engine);
-	if (IS_ERR_OR_NULL(active_request)) {
-		/* Either the previous reset failed, or we pardon the reset. */
-		ret = PTR_ERR(active_request);
-		goto out;
-	}
+	if (i915_seqno_passed(intel_engine_get_seqno(engine),
+			      intel_engine_last_submit(engine)))
+		return 0;
+
+	reset_prepare_engine(engine);
 
 	if (msg)
 		dev_notice(engine->i915->drm.dev,
@@ -1034,7 +952,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 	 * active request and can drop it, adjust head to skip the offending
 	 * request to resume executing remaining requests in the queue.
 	 */
-	reset_engine(engine, active_request, true);
+	intel_engine_reset(engine, true);
 
 	/*
 	 * The engine and its registers (and workarounds in case of render)
@@ -1110,30 +1028,7 @@ static void i915_reset_device(struct drm_i915_private *i915,
 	i915_wedge_on_timeout(&w, i915, 5 * HZ) {
 		intel_prepare_reset(i915);
 
-		error->reason = reason;
-		error->stalled_mask = engine_mask;
-
-		/* Signal that locked waiters should reset the GPU */
-		smp_mb__before_atomic();
-		set_bit(I915_RESET_HANDOFF, &error->flags);
-		wake_up_all(&error->wait_queue);
-
-		/*
-		 * Wait for anyone holding the lock to wakeup, without
-		 * blocking indefinitely on struct_mutex.
-		 */
-		do {
-			if (mutex_trylock(&i915->drm.struct_mutex)) {
-				i915_reset(i915, engine_mask, reason);
-				mutex_unlock(&i915->drm.struct_mutex);
-			}
-		} while (wait_on_bit_timeout(&error->flags,
-					     I915_RESET_HANDOFF,
-					     TASK_UNINTERRUPTIBLE,
-					     1));
-
-		error->stalled_mask = 0;
-		error->reason = NULL;
+		i915_reset(i915, engine_mask, reason);
 
 		intel_finish_reset(i915);
 	}
@@ -1270,3 +1165,21 @@ void i915_handle_error(struct drm_i915_private *i915,
 out:
 	intel_runtime_pm_put(i915);
 }
+
+bool i915_reset_flush(struct drm_i915_private *i915)
+{
+	int err;
+
+	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
+
+	flush_workqueue(i915->wq);
+	GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart));
+
+	mutex_lock(&i915->drm.struct_mutex);
+	err = i915_gem_wait_for_idle(i915,
+				     I915_WAIT_LOCKED |
+				     I915_WAIT_FOR_IDLE_BOOST);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	return !err;
+}
diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h
index 09422c4772dd..39d258a82e97 100644
--- a/drivers/gpu/drm/i915/i915_reset.h
+++ b/drivers/gpu/drm/i915/i915_reset.h
@@ -27,6 +27,9 @@ void i915_reset(struct drm_i915_private *i915,
 int i915_reset_engine(struct intel_engine_cs *engine,
 		      const char *reason);
 
+void i915_reset_request(struct i915_request *rq, bool guilty);
+bool i915_reset_flush(struct drm_i915_private *i915);
+
 bool intel_has_gpu_reset(struct drm_i915_private *i915);
 bool intel_has_reset_engine(struct drm_i915_private *i915);
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 11aa8db47a4d..5f358e176a82 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1087,10 +1087,8 @@ void intel_engines_sanitize(struct drm_i915_private *i915)
 
 	GEM_TRACE("\n");
 
-	for_each_engine(engine, i915, id) {
-		if (engine->reset.reset)
-			engine->reset.reset(engine, NULL);
-	}
+	for_each_engine(engine, i915, id)
+		intel_engine_reset(engine, false);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 660c41ec71ab..df280e2085d9 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -806,8 +806,7 @@ static void guc_submission_tasklet(unsigned long data)
 		guc_dequeue(engine);
 }
 
-static struct i915_request *
-guc_reset_prepare(struct intel_engine_cs *engine)
+static void guc_reset_prepare(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 
@@ -833,8 +832,6 @@ guc_reset_prepare(struct intel_engine_cs *engine)
 	 */
 	if (engine->i915->guc.preempt_wq)
 		flush_workqueue(engine->i915->guc.preempt_wq);
-
-	return i915_gem_find_active_request(engine);
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 0ce6b08ac64a..fa01d79dc5a3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -137,6 +137,7 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
+#include "i915_reset.h"
 #include "i915_vgpu.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
@@ -384,9 +385,10 @@ static void unwind_wa_tail(struct i915_request *rq)
 	assert_ring_tail_valid(rq->ring, rq->tail);
 }
 
-static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
+static struct i915_request *
+__unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
-	struct i915_request *rq, *rn;
+	struct i915_request *rq, *rn, *active = NULL;
 	struct list_head *uninitialized_var(pl);
 	int last_prio = I915_PRIORITY_INVALID;
 
@@ -398,7 +400,7 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 		struct intel_engine_cs *owner;
 
 		if (i915_request_completed(rq))
-			return;
+			break;
 
 		__i915_request_unsubmit(rq);
 		unwind_wa_tail(rq);
@@ -415,7 +417,11 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 		} else {
 			owner->submit_request(rq);
 		}
+
+		active = rq;
 	}
+
+	return active;
 }
 
 void
@@ -2107,14 +2113,13 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
 	return 0;
 }
 
-static struct i915_request *
-execlists_reset_prepare(struct intel_engine_cs *engine)
+static void execlists_reset_prepare(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct i915_request *request, *active;
 	unsigned long flags;
 
-	GEM_TRACE("%s\n", engine->name);
+	GEM_TRACE("%s, tasklet disabled?=%d\n",
+		  engine->name, atomic_read(&execlists->tasklet.count));
 
 	/*
 	 * Prevent request submission to the hardware until we have
@@ -2126,59 +2131,20 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
 	 * prevents the race.
 	 */
 	__tasklet_disable_sync_once(&execlists->tasklet);
+	GEM_BUG_ON(!reset_in_progress(execlists));
 
+	/* And flush any current direct submission. */
 	spin_lock_irqsave(&engine->timeline.lock, flags);
-
-	/*
-	 * We want to flush the pending context switches, having disabled
-	 * the tasklet above, we can assume exclusive access to the execlists.
-	 * For this allows us to catch up with an inflight preemption event,
-	 * and avoid blaming an innocent request if the stall was due to the
-	 * preemption itself.
-	 */
-	process_csb(engine);
-
-	/*
-	 * The last active request can then be no later than the last request
-	 * now in ELSP[0]. So search backwards from there, so that if the GPU
-	 * has advanced beyond the last CSB update, it will be pardoned.
-	 */
-	active = NULL;
-	request = port_request(execlists->port);
-	if (request) {
-		/*
-		 * Prevent the breadcrumb from advancing before we decide
-		 * which request is currently active.
-		 */
-		intel_engine_stop_cs(engine);
-
-		list_for_each_entry_from_reverse(request,
-						 &engine->timeline.requests,
-						 link) {
-			if (__i915_request_completed(request,
-						     request->global_seqno))
-				break;
-
-			active = request;
-		}
-	}
-
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
-	return active;
 }
 
-static void execlists_reset(struct intel_engine_cs *engine,
-			    struct i915_request *request)
+static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
+	struct i915_request *rq;
 	unsigned long flags;
 	u32 *regs;
 
-	GEM_TRACE("%s request global=%x, current=%d\n",
-		  engine->name, request ? request->global_seqno : 0,
-		  intel_engine_get_seqno(engine));
-
 	spin_lock_irqsave(&engine->timeline.lock, flags);
 
 	/*
@@ -2194,12 +2160,16 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	reset_irq(engine);
 
 	/* Push back any incomplete requests for replay after the reset. */
-	__unwind_incomplete_requests(engine);
+	rq = __unwind_incomplete_requests(engine);
 
 	/* Following the reset, we need to reload the CSB read/write pointers */
 	reset_csb_pointers(&engine->execlists);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	GEM_TRACE("%s request global=%x, current=%d\n",
+		  engine->name, rq ? rq->global_seqno : 0,
+		  intel_engine_get_seqno(engine));
+	if (!rq)
+		goto out_unlock;
 
 	/*
 	 * If the request was innocent, we leave the request in the ELSP
@@ -2212,8 +2182,9 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	 * and have to at least restore the RING register in the context
 	 * image back to the expected values to skip over the guilty request.
 	 */
-	if (!request || request->fence.error != -EIO)
-		return;
+	i915_reset_request(rq, stalled);
+	if (!stalled)
+		goto out_unlock;
 
 	/*
 	 * We want a simple context + ring to execute the breadcrumb update.
@@ -2223,25 +2194,23 @@ static void execlists_reset(struct intel_engine_cs *engine,
 	 * future request will be after userspace has had the opportunity
 	 * to recreate its own state.
 	 */
-	regs = request->hw_context->lrc_reg_state;
+	regs = rq->hw_context->lrc_reg_state;
 	if (engine->pinned_default_state) {
 		memcpy(regs, /* skip restoring the vanilla PPHWSP */
 		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
 		       engine->context_size - PAGE_SIZE);
 	}
-	execlists_init_reg_state(regs,
-				 request->gem_context, engine, request->ring);
+	execlists_init_reg_state(regs, rq->gem_context, engine, rq->ring);
 
 	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
-	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma);
+	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(rq->ring->vma);
 
-	request->ring->head = intel_ring_wrap(request->ring, request->postfix);
-	regs[CTX_RING_HEAD + 1] = request->ring->head;
+	rq->ring->head = intel_ring_wrap(rq->ring, rq->postfix);
+	regs[CTX_RING_HEAD + 1] = rq->ring->head;
+	intel_ring_update_space(rq->ring);
 
-	intel_ring_update_space(request->ring);
-
-	/* Reset WaIdleLiteRestore:bdw,skl as well */
-	unwind_wa_tail(request);
+out_unlock:
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static void execlists_reset_finish(struct intel_engine_cs *engine)
@@ -2261,6 +2230,7 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
 	 * serialising multiple attempts to reset so that we know that we
 	 * are the only one manipulating tasklet state.
 	 */
+	GEM_BUG_ON(!reset_in_progress(execlists));
 	__tasklet_enable_sync_once(&execlists->tasklet);
 
 	GEM_TRACE("%s\n", engine->name);
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index c2f10d899329..371eb3dbedc0 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -501,8 +501,6 @@ void intel_overlay_reset(struct drm_i915_private *dev_priv)
 	if (!overlay)
 		return;
 
-	intel_overlay_release_old_vid(overlay);
-
 	overlay->old_xscale = 0;
 	overlay->old_yscale = 0;
 	overlay->crtc = NULL;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 772fb59e6cb9..6d7ce0b6b025 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -34,6 +34,7 @@
 
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
+#include "i915_reset.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include "intel_workarounds.h"
@@ -535,54 +536,82 @@ static int init_ring_common(struct intel_engine_cs *engine)
 	return ret;
 }
 
-static struct i915_request *reset_prepare(struct intel_engine_cs *engine)
+static void reset_prepare(struct intel_engine_cs *engine)
 {
 	intel_engine_stop_cs(engine);
 
 	if (engine->irq_seqno_barrier)
 		engine->irq_seqno_barrier(engine);
-
-	return i915_gem_find_active_request(engine);
 }
 
-static void skip_request(struct i915_request *rq)
+static void reset_ring(struct intel_engine_cs *engine, bool stalled)
 {
-	void *vaddr = rq->ring->vaddr;
+	struct i915_timeline *tl = &engine->timeline;
+	struct i915_request *pos, *rq;
+	unsigned long flags;
 	u32 head;
 
-	head = rq->infix;
-	if (rq->postfix < head) {
-		memset32(vaddr + head, MI_NOOP,
-			 (rq->ring->size - head) / sizeof(u32));
-		head = 0;
+	rq = NULL;
+	spin_lock_irqsave(&tl->lock, flags);
+	list_for_each_entry(pos, &tl->requests, link) {
+		if (!__i915_request_completed(pos, pos->global_seqno)) {
+			rq = pos;
+			break;
+		}
 	}
-	memset32(vaddr + head, MI_NOOP, (rq->postfix - head) / sizeof(u32));
-}
-
-static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq)
-{
-	GEM_TRACE("%s seqno=%x\n", engine->name, rq ? rq->global_seqno : 0);
 
+	GEM_TRACE("%s seqno=%x, stalled? %s\n",
+		  engine->name,
+		  rq ? rq->global_seqno : 0,
+		  yesno(stalled));
 	/*
-	 * Try to restore the logical GPU state to match the continuation
-	 * of the request queue. If we skip the context/PD restore, then
-	 * the next request may try to execute assuming that its context
-	 * is valid and loaded on the GPU and so may try to access invalid
-	 * memory, prompting repeated GPU hangs.
+	 * The guilty request will get skipped on a hung engine.
 	 *
-	 * If the request was guilty, we still restore the logical state
-	 * in case the next request requires it (e.g. the aliasing ppgtt),
-	 * but skip over the hung batch.
+	 * Users of client default contexts do not rely on logical
+	 * state preserved between batches so it is safe to execute
+	 * queued requests following the hang. Non default contexts
+	 * rely on preserved state, so skipping a batch loses the
+	 * evolution of the state and it needs to be considered corrupted.
+	 * Executing more queued batches on top of corrupted state is
+	 * risky. But we take the risk by trying to advance through
+	 * the queued requests in order to make the client behaviour
+	 * more predictable around resets, by not throwing away random
+	 * amount of batches it has prepared for execution. Sophisticated
+	 * clients can use gem_reset_stats_ioctl and dma fence status
+	 * (exported via sync_file info ioctl on explicit fences) to observe
+	 * when it loses the context state and should rebuild accordingly.
 	 *
-	 * If the request was innocent, we try to replay the request with
-	 * the restored context.
+	 * The context ban, and ultimately the client ban, mechanism are safety
+	 * valves if client submission ends up resulting in nothing more than
+	 * subsequent hangs.
 	 */
+
 	if (rq) {
-		/* If the rq hung, jump to its breadcrumb and skip the batch */
-		rq->ring->head = intel_ring_wrap(rq->ring, rq->head);
-		if (rq->fence.error == -EIO)
-			skip_request(rq);
+		/*
+		 * Try to restore the logical GPU state to match the
+		 * continuation of the request queue. If we skip the
+		 * context/PD restore, then the next request may try to execute
+		 * assuming that its context is valid and loaded on the GPU and
+		 * so may try to access invalid memory, prompting repeated GPU
+		 * hangs.
+		 *
+		 * If the request was guilty, we still restore the logical
+		 * state in case the next request requires it (e.g. the
+		 * aliasing ppgtt), but skip over the hung batch.
+		 *
+		 * If the request was innocent, we try to replay the request
+		 * with the restored context.
+		 */
+		i915_reset_request(rq, stalled);
+
+		GEM_BUG_ON(rq->ring != engine->buffer);
+		head = rq->head;
+	} else {
+		head = engine->buffer->tail;
 	}
+	engine->buffer->head = intel_ring_wrap(engine->buffer, head);
+
+	spin_unlock_irqrestore(&tl->lock, flags);
 }
 
 static void reset_finish(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 05c19aa8af69..add541fadc28 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -121,7 +121,6 @@ struct intel_engine_hangcheck {
 	unsigned long action_timestamp;
 	int deadlock;
 	struct intel_instdone instdone;
-	struct i915_request *active_request;
 	bool stalled:1;
 	bool wedged:1;
 };
@@ -453,9 +452,8 @@ struct intel_engine_cs {
 	int		(*init_hw)(struct intel_engine_cs *engine);
 
 	struct {
-		struct i915_request *(*prepare)(struct intel_engine_cs *engine);
-		void (*reset)(struct intel_engine_cs *engine,
-			      struct i915_request *rq);
+		void (*prepare)(struct intel_engine_cs *engine);
+		void (*reset)(struct intel_engine_cs *engine, bool stalled);
 		void (*finish)(struct intel_engine_cs *engine);
 	} reset;
 
@@ -1086,6 +1084,13 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
 	return cs;
 }
 
+static inline void intel_engine_reset(struct intel_engine_cs *engine,
+				      bool stalled)
+{
+	if (engine->reset.reset)
+		engine->reset.reset(engine, stalled);
+}
+
 void intel_engines_sanitize(struct drm_i915_private *i915);
 
 static inline bool
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index e11df2743704..e522a02343d5 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -390,7 +390,6 @@ static int igt_global_reset(void *arg)
 	/* Check that we can issue a global GPU reset */
 
 	global_reset_lock(i915);
-	set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
 
 	mutex_lock(&i915->drm.struct_mutex);
 	reset_count = i915_reset_count(&i915->gpu_error);
@@ -403,7 +402,6 @@ static int igt_global_reset(void *arg)
 	}
 	mutex_unlock(&i915->drm.struct_mutex);
 
-	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
 	global_reset_unlock(i915);
 
 	if (i915_terminally_wedged(&i915->gpu_error))
@@ -513,7 +511,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 				break;
 			}
 
-			if (!wait_for_idle(engine)) {
+			if (!i915_reset_flush(i915)) {
 				struct drm_printer p =
 					drm_info_printer(i915->drm.dev);
 
@@ -905,20 +903,13 @@ static int igt_reset_engines(void *arg)
 	return 0;
 }
 
-static u32 fake_hangcheck(struct i915_request *rq, u32 mask)
+static u32 fake_hangcheck(struct drm_i915_private *i915, u32 mask)
 {
-	struct i915_gpu_error *error = &rq->i915->gpu_error;
-	u32 reset_count = i915_reset_count(error);
+	u32 count = i915_reset_count(&i915->gpu_error);
 
-	error->stalled_mask = mask;
+	i915_reset(i915, mask, NULL);
 
-	/* set_bit() must be after we have setup the backchannel (mask) */
-	smp_mb__before_atomic();
-	set_bit(I915_RESET_HANDOFF, &error->flags);
-
-	wake_up_all(&error->wait_queue);
-
-	return reset_count;
+	return count;
 }
 
 static int igt_wait_reset(void *arg)
@@ -964,7 +955,7 @@ static int igt_wait_reset(void *arg)
 		goto out_rq;
 	}
 
-	reset_count = fake_hangcheck(rq, ALL_ENGINES);
+	reset_count = fake_hangcheck(i915, ALL_ENGINES);
 
 	timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10);
 	if (timeout < 0) {
@@ -974,7 +965,6 @@ static int igt_wait_reset(void *arg)
 		goto out_rq;
 	}
 
-	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
 	if (i915_reset_count(&i915->gpu_error) == reset_count) {
 		pr_err("No GPU reset recorded!\n");
 		err = -EINVAL;
@@ -1100,12 +1090,7 @@ static int igt_reset_queue(void *arg)
 				goto fini;
 			}
 
-			reset_count = fake_hangcheck(prev, ENGINE_MASK(id));
-
-			i915_reset(i915, ENGINE_MASK(id), NULL);
-
-			GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
-					    &i915->gpu_error.flags));
+			reset_count = fake_hangcheck(i915, ENGINE_MASK(id));
 
 			if (prev->fence.error != -EIO) {
 				pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (35 preceding siblings ...)
  2018-06-29  7:53 ` [PATCH 37/37] drm/i915: Remove GPU reset dependence on struct_mutex Chris Wilson
@ 2018-06-29  8:58 ` Patchwork
  2018-06-29  9:14 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (3 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Patchwork @ 2018-06-29  8:58 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode
URL   : https://patchwork.freedesktop.org/series/45638/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
5796c40dcb3f drm/i915/gtt: Add read only pages to gen8_pte_encode
f2ea02d9cb6a drm/i915/gtt: Read-only pages for insert_entries on bdw+
-:192: WARNING:BOOL_BITFIELD: Avoid using bool as bitfield.  Prefer bool bitfields as unsigned int or u<8|16|32>
#192: FILE: drivers/gpu/drm/i915/i915_gem_gtt.h:330:
+	bool pt_kmap_wc:1;

-:195: WARNING:BOOL_BITFIELD: Avoid using bool as bitfield.  Prefer bool bitfields as unsigned int or u<8|16|32>
#195: FILE: drivers/gpu/drm/i915/i915_gem_gtt.h:333:
+	bool has_read_only:1;

-:271: WARNING:LINE_SPACING: Missing a blank line after declarations
#271: FILE: drivers/gpu/drm/i915/selftests/i915_gem_context.c:430:
+	struct drm_file *file;
+	I915_RND_STATE(prng);

-:342: ERROR:CODE_INDENT: code indent should use tabs where possible
#342: FILE: drivers/gpu/drm/i915/selftests/i915_gem_context.c:501:
+^I       ^Indwords, INTEL_INFO(i915)->num_rings);$

-:342: WARNING:SPACE_BEFORE_TAB: please, no space before tabs
#342: FILE: drivers/gpu/drm/i915/selftests/i915_gem_context.c:501:
+^I       ^Indwords, INTEL_INFO(i915)->num_rings);$

-:342: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#342: FILE: drivers/gpu/drm/i915/selftests/i915_gem_context.c:501:
+	pr_info("Submitted %lu dwords (across %u engines)\n",
+	       	ndwords, INTEL_INFO(i915)->num_rings);

total: 1 errors, 4 warnings, 1 checks, 323 lines checked
5db2b0804f50 drm/i915: Prevent writing into a read-only object via a GGTT mmap
-:182: WARNING:BOOL_BITFIELD: Avoid using bool as bitfield.  Prefer bool bitfields as unsigned int or u<8|16|32>
#182: FILE: include/drm/drm_vma_manager.h:44:
+	bool readonly:1;

total: 0 errors, 1 warnings, 0 checks, 118 lines checked
62c8b0d792a9 drm/i915: Reject attempted pwrites into a read-only object
8f1d71adde12 drm/i915/userptr: Enable read-only support on gen8+
b99af734e66a drm/i915: Move rate-limiting request retire to after submission
f38ce0a80f57 drm/i915: Move engine request retirement to intel_engine_cs
d7836ba1aadb drm/i915: Hold request reference for submission until retirement
380077f476a6 drm/i915/execlists: Switch to rb_root_cached
58dbf76c0881 drm/i915: Reserve some priority bits for internal use
7fa34185ea56 drm/i915: Combine multiple internal plists into the same i915_priolist bucket
-:163: WARNING:FUNCTION_ARGUMENTS: function definition argument 'pl' should also have an identifier name
#163: FILE: drivers/gpu/drm/i915/intel_lrc.c:367:
+	struct list_head *uninitialized_var(pl);

-:280: WARNING:FUNCTION_ARGUMENTS: function definition argument 'pl' should also have an identifier name
#280: FILE: drivers/gpu/drm/i915/intel_lrc.c:1241:
+	struct list_head *uninitialized_var(pl);

-:309: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'plist' - possible side-effects?
#309: FILE: drivers/gpu/drm/i915/intel_ringbuffer.h:197:
+#define priolist_for_each_request(it, plist, idx) \
+	for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
+		list_for_each_entry(it, &(plist)->requests[idx], sched.link)

-:309: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'idx' - possible side-effects?
#309: FILE: drivers/gpu/drm/i915/intel_ringbuffer.h:197:
+#define priolist_for_each_request(it, plist, idx) \
+	for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
+		list_for_each_entry(it, &(plist)->requests[idx], sched.link)

-:313: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'plist' - possible side-effects?
#313: FILE: drivers/gpu/drm/i915/intel_ringbuffer.h:201:
+#define priolist_for_each_request_consume(it, n, plist, idx) \
+	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
+		list_for_each_entry_safe(it, n, \
+					 &(plist)->requests[idx - 1], \
+					 sched.link)

-:313: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'idx' - possible side-effects?
#313: FILE: drivers/gpu/drm/i915/intel_ringbuffer.h:201:
+#define priolist_for_each_request_consume(it, n, plist, idx) \
+	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
+		list_for_each_entry_safe(it, n, \
+					 &(plist)->requests[idx - 1], \
+					 sched.link)

total: 0 errors, 2 warnings, 4 checks, 271 lines checked
0d897534417f drm/i915: Priority boost for new clients
7d685300feb5 drm/i915: Priority boost switching to an idle ring
96ca772cff30 drm/i915: Refactor export_fence() after i915_vma_move_to_active()
0513e74c8106 drm/i915: Export i915_request_skip()
b4b1dfde2a88 drm/i915: Start returning an error from i915_vma_move_to_active()
5d01124e29ba drm/i915: Track vma activity per fence.context, not per engine
-:340: WARNING:LONG_LINE: line over 100 characters
#340: FILE: drivers/gpu/drm/i915/i915_vma.c:919:
+						       &vma->vm->i915->drm.struct_mutex)->fence.context);

total: 0 errors, 1 warnings, 0 checks, 459 lines checked
b4b4f8a2fa27 drm/i915: Track the last-active inside the i915_vma
e697b7236be0 drm/i915: Stop tracking MRU activity on VMA
d6f1f6ddd451 drm/i915: Introduce the i915_user_extension_method
-:30: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#30: 
new file mode 100644

-:35: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#35: FILE: drivers/gpu/drm/i915/i915_user_extensions.c:1:
+/*

-:77: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#77: FILE: drivers/gpu/drm/i915/i915_user_extensions.h:1:
+/*

-:111: WARNING:TYPO_SPELLING: 'compatibilty' may be misspelled - perhaps 'compatibility'?
#111: FILE: include/uapi/drm/i915_drm.h:71:
+ * and backward compatibilty, is to use a list of optional structs to

total: 0 errors, 4 warnings, 0 checks, 89 lines checked
4b94cd137935 drm/i915: Extend CREATE_CONTEXT to allow inheritance ala clone()
-:346: WARNING:LINE_SPACING: Missing a blank line after declarations
#346: FILE: drivers/gpu/drm/i915/selftests/i915_gem_context.c:350:
+		struct drm_file *file;
+		IGT_TIMEOUT(end_time);

-:423: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#423: FILE: drivers/gpu/drm/i915/selftests/i915_gem_context.c:414:
+		ncontexts = dw = 0;

-:494: CHECK:LINE_SPACING: Please don't use multiple blank lines
#494: FILE: drivers/gpu/drm/i915/selftests/i915_gem_context.c:480:
 
+

-:538: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#538: FILE: drivers/gpu/drm/i915/selftests/i915_gem_context.c:529:
+		ncontexts = dw = 0;

-:647: WARNING:LONG_LINE: line over 100 characters
#647: FILE: include/uapi/drm/i915_drm.h:389:
+#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_v2)

-:647: WARNING:SPACING: space prohibited between function name and open parenthesis '('
#647: FILE: include/uapi/drm/i915_drm.h:389:
+#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_v2)

-:647: ERROR:COMPLEX_MACRO: Macros with complex values should be enclosed in parentheses
#647: FILE: include/uapi/drm/i915_drm.h:389:
+#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_v2)

total: 1 errors, 3 warnings, 3 checks, 564 lines checked
cef03f724ae0 drm/i915: Allow contexts to share a single timeline across all engines
9fe542707065 drm/i915: Fix I915_EXEC_RING_MASK
081d094df2ec drm/i915: Re-arrange execbuf so context is known before engine
ed799210e4e7 drm/i915: Allow a context to define its set of engines
24a5c97f2953 drm/i915/execlists: Flush the tasklet before unpinning
-:14: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#14: 
References: 60367132a214 ("drm/i915: Avoid use-after-free of ctx in request tracepoints")

-:14: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 60367132a214 ("drm/i915: Avoid use-after-free of ctx in request tracepoints")'
#14: 
References: 60367132a214 ("drm/i915: Avoid use-after-free of ctx in request tracepoints")

total: 1 errors, 1 warnings, 0 checks, 57 lines checked
970e15cf47ab drm/i915/execlists: Refactor out can_merge_rq()
35a3cf4e41b2 drm/i915: Replace nested subclassing with explicit subclasses
f84782d6c6ab RFC drm/i915: Load balancing across a virtual engine
8bf1f56ba199 drm/i915: Introduce i915_address_space.mutex
624594037120 drm/i915: Move fence register tracking to GGTT
fa045278d4b4 drm/i915: Convert fences to use a GGTT lock rather than struct_mutex
3facb74c4b93 drm/i915: Tidy i915_gem_suspend()
1a5428a95445 drm/i915: Move fence-reg interface to i915_gem_fence_reg.h
1a21e8394583 drm/i915: Dynamically allocate the array of drm_i915_gem_fence_reg
cc06844dd041 drm/i915: Pull all the reset functionality together into i915_reset.c
-:1070: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#1070: 
new file mode 100644

-:1075: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#1075: FILE: drivers/gpu/drm/i915/i915_reset.c:1:
+/*

-:1216: WARNING:TYPO_SPELLING: 'acknowledgement' may be misspelled - perhaps 'acknowledgment'?
#1216: FILE: drivers/gpu/drm/i915/i915_reset.c:142:
+	/* Assert reset for at least 20 usec, and wait for acknowledgement. */

-:1394: CHECK:LINE_SPACING: Please don't use multiple blank lines
#1394: FILE: drivers/gpu/drm/i915/i915_reset.c:320:
+
+

-:1794: WARNING:MEMORY_BARRIER: memory barrier without comment
#1794: FILE: drivers/gpu/drm/i915/i915_reset.c:720:
+	smp_mb__after_atomic();

-:2162: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'W' - possible side-effects?
#2162: FILE: drivers/gpu/drm/i915/i915_reset.c:1088:
+#define i915_wedge_on_timeout(W, DEV, TIMEOUT)				\
+	for (__init_wedge((W), (DEV), (TIMEOUT), __func__);		\
+	     (W)->i915;							\
+	     __fini_wedge((W)))

-:2173: WARNING:STATIC_CONST_CHAR_ARRAY: char * array declaration might be better as static const
#2173: FILE: drivers/gpu/drm/i915/i915_reset.c:1099:
+	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };

-:2174: WARNING:STATIC_CONST_CHAR_ARRAY: char * array declaration might be better as static const
#2174: FILE: drivers/gpu/drm/i915/i915_reset.c:1100:
+	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };

-:2175: WARNING:STATIC_CONST_CHAR_ARRAY: char * array declaration might be better as static const
#2175: FILE: drivers/gpu/drm/i915/i915_reset.c:1101:
+	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };

-:2353: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#2353: FILE: drivers/gpu/drm/i915/i915_reset.h:1:
+/*

total: 0 errors, 8 warnings, 2 checks, 2807 lines checked
18b6327d9393 drm/i915: Remove GPU reset dependence on struct_mutex
-:610: WARNING:MEMORY_BARRIER: memory barrier without comment
#610: FILE: drivers/gpu/drm/i915/i915_reset.c:564:
+	smp_store_mb(i915->gpu_error.restart, NULL);

total: 0 errors, 1 warnings, 0 checks, 1238 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (36 preceding siblings ...)
  2018-06-29  8:58 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Patchwork
@ 2018-06-29  9:14 ` Patchwork
  2018-06-29  9:16 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (2 subsequent siblings)
  40 siblings, 0 replies; 68+ messages in thread
From: Patchwork @ 2018-06-29  9:14 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode
URL   : https://patchwork.freedesktop.org/series/45638/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915/gtt: Add read only pages to gen8_pte_encode
Okay!

Commit: drm/i915/gtt: Read-only pages for insert_entries on bdw+
+drivers/gpu/drm/i915/selftests/i915_gem_context.c:506:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/i915_gem_context.c:506:25: warning: expression using sizeof(void)

Commit: drm/i915: Prevent writing into a read-only object via a GGTT mmap
Okay!

Commit: drm/i915: Reject attempted pwrites into a read-only object
Okay!

Commit: drm/i915/userptr: Enable read-only support on gen8+
Okay!

Commit: drm/i915: Move rate-limiting request retire to after submission
Okay!

Commit: drm/i915: Move engine request retirement to intel_engine_cs
Okay!

Commit: drm/i915: Hold request reference for submission until retirement
Okay!

Commit: drm/i915/execlists: Switch to rb_root_cached
Okay!

Commit: drm/i915: Reserve some priority bits for internal use
Okay!

Commit: drm/i915: Combine multiple internal plists into the same i915_priolist bucket
Okay!

Commit: drm/i915: Priority boost for new clients
Okay!

Commit: drm/i915: Priority boost switching to an idle ring
Okay!

Commit: drm/i915: Refactor export_fence() after i915_vma_move_to_active()
Okay!

Commit: drm/i915: Export i915_request_skip()
Okay!

Commit: drm/i915: Start returning an error from i915_vma_move_to_active()
Okay!

Commit: drm/i915: Track vma activity per fence.context, not per engine
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3667:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3664:16: warning: expression using sizeof(void)

Commit: drm/i915: Track the last-active inside the i915_vma
Okay!

Commit: drm/i915: Stop tracking MRU activity on VMA
Okay!

Commit: drm/i915: Introduce the i915_user_extension_method
Okay!

Commit: drm/i915: Extend CREATE_CONTEXT to allow inheritance ala clone()
-O:drivers/gpu/drm/i915/selftests/i915_gem_context.c:411:25: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/selftests/i915_gem_context.c:411:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/i915_gem_context.c:417:33: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/i915_gem_context.c:417:33: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/i915_gem_context.c:532:33: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/i915_gem_context.c:532:33: warning: expression using sizeof(void)

Commit: drm/i915: Allow contexts to share a single timeline across all engines
Okay!

Commit: drm/i915: Fix I915_EXEC_RING_MASK
Okay!

Commit: drm/i915: Re-arrange execbuf so context is known before engine
Okay!

Commit: drm/i915: Allow a context to define its set of engines
+./include/linux/slab.h:631:13: error: undefined identifier '__builtin_mul_overflow'
+./include/linux/slab.h:631:13: warning: call with no type!

Commit: drm/i915/execlists: Flush the tasklet before unpinning
Okay!

Commit: drm/i915/execlists: Refactor out can_merge_rq()
Okay!

Commit: drm/i915: Replace nested subclassing with explicit subclasses
Okay!

Commit: RFC drm/i915: Load balancing across a virtual engine
+drivers/gpu/drm/i915/i915_gem_context.c:918:17: warning: cast removes address space of expression
+drivers/gpu/drm/i915/intel_lrc.c:744:24: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/intel_lrc.c:744:24: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/intel_ringbuffer.h:673:23: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/intel_ringbuffer.h:673:23: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/intel_ringbuffer.h:675:23: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/intel_ringbuffer.h:675:23: warning: expression using sizeof(void)

Commit: drm/i915: Introduce i915_address_space.mutex
Okay!

Commit: drm/i915: Move fence register tracking to GGTT
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3664:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3657:16: warning: expression using sizeof(void)

Commit: drm/i915: Convert fences to use a GGTT lock rather than struct_mutex
Okay!

Commit: drm/i915: Tidy i915_gem_suspend()
Okay!

Commit: drm/i915: Move fence-reg interface to i915_gem_fence_reg.h
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3657:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3642:16: warning: expression using sizeof(void)

Commit: drm/i915: Dynamically allocate the array of drm_i915_gem_fence_reg
+./include/linux/slab.h:631:13: error: not a function <noident>

Commit: drm/i915: Pull all the reset functionality together into i915_reset.c
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3642:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3613:16: warning: expression using sizeof(void)

Commit: drm/i915: Remove GPU reset dependence on struct_mutex
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3613:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3608:16: warning: expression using sizeof(void)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (37 preceding siblings ...)
  2018-06-29  9:14 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-06-29  9:16 ` Patchwork
  2018-06-29 11:53 ` ✗ Fi.CI.IGT: failure " Patchwork
  2018-06-29 22:38 ` ✗ Fi.CI.BAT: failure for series starting with [01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode (rev3) Patchwork
  40 siblings, 0 replies; 68+ messages in thread
From: Patchwork @ 2018-06-29  9:16 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode
URL   : https://patchwork.freedesktop.org/series/45638/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4403 -> Patchwork_9480 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/45638/revisions/1/mbox/

== Known issues ==

  Here are the changes found in Patchwork_9480 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@kms_pipe_crc_basic@hang-read-crc-pipe-a:
      fi-skl-guc:         PASS -> FAIL (fdo#104724, fdo#103191)

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
      fi-snb-2520m:       PASS -> INCOMPLETE (fdo#103713)

    
    ==== Possible fixes ====

    igt@kms_chamelium@dp-crc-fast:
      fi-kbl-7500u:       DMESG-FAIL (fdo#103841) -> PASS

    
  fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
  fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
  fdo#103841 https://bugs.freedesktop.org/show_bug.cgi?id=103841
  fdo#104724 https://bugs.freedesktop.org/show_bug.cgi?id=104724


== Participating hosts (43 -> 39) ==

  Missing    (4): fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-hsw-4200u 


== Build changes ==

    * Linux: CI_DRM_4403 -> Patchwork_9480

  CI_DRM_4403: d5ec143981dab1497ec923c943288dbc1be673d0 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4531: a14bc8b4d69eaca189665de505e6b10cbfbb7730 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9480: 18b6327d9393def5c12599ec1b46ee00c0b108c1 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

18b6327d9393 drm/i915: Remove GPU reset dependence on struct_mutex
cc06844dd041 drm/i915: Pull all the reset functionality together into i915_reset.c
1a21e8394583 drm/i915: Dynamically allocate the array of drm_i915_gem_fence_reg
1a5428a95445 drm/i915: Move fence-reg interface to i915_gem_fence_reg.h
3facb74c4b93 drm/i915: Tidy i915_gem_suspend()
fa045278d4b4 drm/i915: Convert fences to use a GGTT lock rather than struct_mutex
624594037120 drm/i915: Move fence register tracking to GGTT
8bf1f56ba199 drm/i915: Introduce i915_address_space.mutex
f84782d6c6ab RFC drm/i915: Load balancing across a virtual engine
35a3cf4e41b2 drm/i915: Replace nested subclassing with explicit subclasses
970e15cf47ab drm/i915/execlists: Refactor out can_merge_rq()
24a5c97f2953 drm/i915/execlists: Flush the tasklet before unpinning
ed799210e4e7 drm/i915: Allow a context to define its set of engines
081d094df2ec drm/i915: Re-arrange execbuf so context is known before engine
9fe542707065 drm/i915: Fix I915_EXEC_RING_MASK
cef03f724ae0 drm/i915: Allow contexts to share a single timeline across all engines
4b94cd137935 drm/i915: Extend CREATE_CONTEXT to allow inheritance ala clone()
d6f1f6ddd451 drm/i915: Introduce the i915_user_extension_method
e697b7236be0 drm/i915: Stop tracking MRU activity on VMA
b4b4f8a2fa27 drm/i915: Track the last-active inside the i915_vma
5d01124e29ba drm/i915: Track vma activity per fence.context, not per engine
b4b1dfde2a88 drm/i915: Start returning an error from i915_vma_move_to_active()
0513e74c8106 drm/i915: Export i915_request_skip()
96ca772cff30 drm/i915: Refactor export_fence() after i915_vma_move_to_active()
7d685300feb5 drm/i915: Priority boost switching to an idle ring
0d897534417f drm/i915: Priority boost for new clients
7fa34185ea56 drm/i915: Combine multiple internal plists into the same i915_priolist bucket
58dbf76c0881 drm/i915: Reserve some priority bits for internal use
380077f476a6 drm/i915/execlists: Switch to rb_root_cached
d7836ba1aadb drm/i915: Hold request reference for submission until retirement
f38ce0a80f57 drm/i915: Move engine request retirement to intel_engine_cs
b99af734e66a drm/i915: Move rate-limiting request retire to after submission
8f1d71adde12 drm/i915/userptr: Enable read-only support on gen8+
62c8b0d792a9 drm/i915: Reject attempted pwrites into a read-only object
5db2b0804f50 drm/i915: Prevent writing into a read-only object via a GGTT mmap
f2ea02d9cb6a drm/i915/gtt: Read-only pages for insert_entries on bdw+
5796c40dcb3f drm/i915/gtt: Add read only pages to gen8_pte_encode

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9480/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 06/37] drm/i915: Move rate-limiting request retire to after submission
  2018-06-29  7:53 ` [PATCH 06/37] drm/i915: Move rate-limiting request retire to after submission Chris Wilson
@ 2018-06-29 10:00   ` Tvrtko Ursulin
  2018-06-29 10:10     ` Chris Wilson
  0 siblings, 1 reply; 68+ messages in thread
From: Tvrtko Ursulin @ 2018-06-29 10:00 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 29/06/2018 08:53, Chris Wilson wrote:
> Our long standing defense against a single client from flooding the
> system with requests (causing mempressure and stalls across the whole
> system) is to retire the old request on every allocation. (By retiring
> the oldest, we try to keep returning requests back to the system in a
> steady flow.) This adds an extra step into the submission path that we
> can reduce simply by moving it to after the submission itself.
> 
> We already do try to clean up a stale request list after submission, so
> always retiring all completed requests fits in as a natural extension.

Please add to the commit message what motivated you to do this. Like 
this benchmark, or something, is now better in this or that respect. Or 
something.

Regards,

Tvrtko

> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_request.c | 26 ++++++++++++++++++--------
>   1 file changed, 18 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index a2f7e9358450..4eea7a27291f 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -694,12 +694,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   	if (ret)
>   		goto err_unreserve;
>   
> -	/* Move our oldest request to the slab-cache (if not in use!) */
> -	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
> -	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
> -	    i915_request_completed(rq))
> -		i915_request_retire(rq);
> -
>   	/*
>   	 * Beware: Dragons be flying overhead.
>   	 *
> @@ -1110,6 +1104,8 @@ void i915_request_add(struct i915_request *request)
>   	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
>   
>   	/*
> +	 * Move our oldest requests to the slab-cache (if not in use!)
> +	 *
>   	 * In typical scenarios, we do not expect the previous request on
>   	 * the timeline to be still tracked by timeline->last_request if it
>   	 * has been completed. If the completed request is still here, that
> @@ -1126,8 +1122,22 @@ void i915_request_add(struct i915_request *request)
>   	 * work on behalf of others -- but instead we should benefit from
>   	 * improved resource management. (Well, that's the theory at least.)
>   	 */
> -	if (prev && i915_request_completed(prev))
> -		i915_request_retire_upto(prev);
> +	do {
> +		prev = list_first_entry(&ring->request_list,
> +					typeof(*prev), ring_link);
> +
> +		/*
> +		 * Keep the current request, the caller may not be
> +		 * expecting it to be retired (and freed!) immediately,
> +		 * and preserving one request from the client allows us to
> +		 * carry forward frequently reused state onto the next
> +		 * submission.
> +		 */
> +		if (prev == request || !i915_request_completed(prev))
> +			break;
> +
> +		i915_request_retire(prev);
> +	} while (1);
>   }
>   
>   static unsigned long local_clock_us(unsigned int *cpu)
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 12/37] drm/i915: Priority boost for new clients
  2018-06-29  7:53 ` [PATCH 12/37] drm/i915: Priority boost for new clients Chris Wilson
@ 2018-06-29 10:04   ` Tvrtko Ursulin
  2018-06-29 10:09     ` Chris Wilson
  0 siblings, 1 reply; 68+ messages in thread
From: Tvrtko Ursulin @ 2018-06-29 10:04 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 29/06/2018 08:53, Chris Wilson wrote:
> Taken from an idea used for FQ_CODEL, we give new request flows a
> priority boost. These flows are likely to correspond with interactive
> tasks and so be more latency sensitive than the long queues. As soon as
> the client has more than one request in the queue, further requests are
> not boosted and it settles down into ordinary steady state behaviour.
> Such small kicks dramatically help combat the starvation issue, by
> allowing each client the opportunity to run even when the system is
> under heavy throughput load (within the constraints of the user
> selected priority).

Any effect on non-micro benchmarks to mention in the commit message as 
the selling point?

> Testcase: igt/benchmarks/rrul
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_request.c   | 16 ++++++++++++++--
>   drivers/gpu/drm/i915/i915_scheduler.h |  4 +++-
>   2 files changed, 17 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 14bf0be6f994..2d7a785dd88c 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1052,8 +1052,20 @@ void i915_request_add(struct i915_request *request)
>   	 */
>   	local_bh_disable();
>   	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
> -	if (engine->schedule)
> -		engine->schedule(request, &request->gem_context->sched);
> +	if (engine->schedule) {
> +		struct i915_sched_attr attr = request->gem_context->sched;
> +
> +		/*
> +		 * Boost priorities to new clients (new request flows).
> +		 *
> +		 * Allow interactive/synchronous clients to jump ahead of
> +		 * the bulk clients. (FQ_CODEL)
> +		 */
> +		if (!prev || i915_request_completed(prev))
> +			attr.priority |= I915_PRIORITY_NEWCLIENT;

Now a "lucky" client can always get higher priority an keep preempting 
everyone else by just timing it's submissions right. So I have big 
reservations on this one.

Regards,

Tvrtko

> +
> +		engine->schedule(request, &attr);
> +	}
>   	rcu_read_unlock();
>   	i915_sw_fence_commit(&request->submit);
>   	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index 7edfad0abfd7..e9fb6c1d5e42 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -19,12 +19,14 @@ enum {
>   	I915_PRIORITY_INVALID = INT_MIN
>   };
>   
> -#define I915_USER_PRIORITY_SHIFT 0
> +#define I915_USER_PRIORITY_SHIFT 1
>   #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
>   
>   #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
>   #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
>   
> +#define I915_PRIORITY_NEWCLIENT BIT(0)
> +
>   struct i915_sched_attr {
>   	/**
>   	 * @priority: execution and service priority
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 13/37] drm/i915: Priority boost switching to an idle ring
  2018-06-29  7:53 ` [PATCH 13/37] drm/i915: Priority boost switching to an idle ring Chris Wilson
@ 2018-06-29 10:08   ` Tvrtko Ursulin
  2018-06-29 10:15     ` Chris Wilson
  0 siblings, 1 reply; 68+ messages in thread
From: Tvrtko Ursulin @ 2018-06-29 10:08 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 29/06/2018 08:53, Chris Wilson wrote:
> In order to maximise concurrency between engines, if we queue a request
> to a current idle ring, reorder its dependencies to execute that request
> as early as possible and ideally improve occupancy of multiple engines
> simultaneously.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_request.c     | 6 +++++-
>   drivers/gpu/drm/i915/i915_scheduler.h   | 5 +++--
>   drivers/gpu/drm/i915/intel_ringbuffer.h | 6 ++++++
>   3 files changed, 14 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 2d7a785dd88c..d618e7127e88 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1053,7 +1053,8 @@ void i915_request_add(struct i915_request *request)
>   	local_bh_disable();
>   	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
>   	if (engine->schedule) {
> -		struct i915_sched_attr attr = request->gem_context->sched;
> +		struct i915_gem_context *ctx = request->gem_context;
> +		struct i915_sched_attr attr = ctx->sched;
>   
>   		/*
>   		 * Boost priorities to new clients (new request flows).
> @@ -1064,6 +1065,9 @@ void i915_request_add(struct i915_request *request)
>   		if (!prev || i915_request_completed(prev))
>   			attr.priority |= I915_PRIORITY_NEWCLIENT;
>   
> +		if (intel_engine_queue_is_empty(engine))
> +			attr.priority |= I915_PRIORITY_STALL;
> +
>   		engine->schedule(request, &attr);
>   	}
>   	rcu_read_unlock();
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index e9fb6c1d5e42..be132ceb83d9 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -19,13 +19,14 @@ enum {
>   	I915_PRIORITY_INVALID = INT_MIN
>   };
>   
> -#define I915_USER_PRIORITY_SHIFT 1
> +#define I915_USER_PRIORITY_SHIFT 2
>   #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
>   
>   #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
>   #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
>   
> -#define I915_PRIORITY_NEWCLIENT BIT(0)
> +#define I915_PRIORITY_NEWCLIENT BIT(1)
> +#define I915_PRIORITY_STALL BIT(0)

So lower priority than new clients.

Again I have big concerns.

A client which happens to be the only one using some exotic engine would 
now be able to trash everything else running on the system. Just because 
so it happens no one else uses its favourite engine. And that's 
regardless how much work it has queued up on other, potentially busy, 
engines.

This and the previous patch I'd say this - hello slippery slope of 
scheduler tuning! :))

Regards,

Tvrtko

>   
>   struct i915_sched_attr {
>   	/**
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index b9ea5cee249f..b26587d0c70d 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -1080,6 +1080,12 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
>   
>   void intel_engines_sanitize(struct drm_i915_private *i915);
>   
> +static inline bool
> +intel_engine_queue_is_empty(const struct intel_engine_cs *engine)
> +{
> +	return RB_EMPTY_ROOT(&engine->execlists.queue.rb_root);
> +}
> +
>   bool intel_engine_is_idle(struct intel_engine_cs *engine);
>   bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
>   
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 12/37] drm/i915: Priority boost for new clients
  2018-06-29 10:04   ` Tvrtko Ursulin
@ 2018-06-29 10:09     ` Chris Wilson
  2018-06-29 10:36       ` Tvrtko Ursulin
  0 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2018-06-29 10:09 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-06-29 11:04:36)
> 
> On 29/06/2018 08:53, Chris Wilson wrote:
> > Taken from an idea used for FQ_CODEL, we give new request flows a
> > priority boost. These flows are likely to correspond with interactive
> > tasks and so be more latency sensitive than the long queues. As soon as
> > the client has more than one request in the queue, further requests are
> > not boosted and it settles down into ordinary steady state behaviour.
> > Such small kicks dramatically help combat the starvation issue, by
> > allowing each client the opportunity to run even when the system is
> > under heavy throughput load (within the constraints of the user
> > selected priority).
> 
> Any effect on non-micro benchmarks to mention in the commit message as 
> the selling point?

Desktop interactivity, subjective.
wsim showed a major impact
 
> > Testcase: igt/benchmarks/rrul
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_request.c   | 16 ++++++++++++++--
> >   drivers/gpu/drm/i915/i915_scheduler.h |  4 +++-
> >   2 files changed, 17 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index 14bf0be6f994..2d7a785dd88c 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -1052,8 +1052,20 @@ void i915_request_add(struct i915_request *request)
> >        */
> >       local_bh_disable();
> >       rcu_read_lock(); /* RCU serialisation for set-wedged protection */
> > -     if (engine->schedule)
> > -             engine->schedule(request, &request->gem_context->sched);
> > +     if (engine->schedule) {
> > +             struct i915_sched_attr attr = request->gem_context->sched;
> > +
> > +             /*
> > +              * Boost priorities to new clients (new request flows).
> > +              *
> > +              * Allow interactive/synchronous clients to jump ahead of
> > +              * the bulk clients. (FQ_CODEL)
> > +              */
> > +             if (!prev || i915_request_completed(prev))
> > +                     attr.priority |= I915_PRIORITY_NEWCLIENT;
> 
> Now a "lucky" client can always get higher priority an keep preempting 
> everyone else by just timing it's submissions right. So I have big 
> reservations on this one.

Lucky being someone who is _idle_, everyone else being steady state. You
can't keep getting lucky and stealing the show.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 06/37] drm/i915: Move rate-limiting request retire to after submission
  2018-06-29 10:00   ` Tvrtko Ursulin
@ 2018-06-29 10:10     ` Chris Wilson
  0 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29 10:10 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-06-29 11:00:30)
> 
> On 29/06/2018 08:53, Chris Wilson wrote:
> > Our long standing defense against a single client from flooding the
> > system with requests (causing mempressure and stalls across the whole
> > system) is to retire the old request on every allocation. (By retiring
> > the oldest, we try to keep returning requests back to the system in a
> > steady flow.) This adds an extra step into the submission path that we
> > can reduce simply by moving it to after the submission itself.
> > 
> > We already do try to clean up a stale request list after submission, so
> > always retiring all completed requests fits in as a natural extension.
> 
> Please add to the commit message what motivated you to do this. Like 
> this benchmark, or something, is now better in this or that respect. Or 
> something.

This was mentioned in

commit 9512f985c32d45ab439a210981e0d73071da395a
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu Jun 28 21:12:11 2018 +0100

    drm/i915/execlists: Direct submission of new requests (avoid tasklet/ksoftirqd)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 13/37] drm/i915: Priority boost switching to an idle ring
  2018-06-29 10:08   ` Tvrtko Ursulin
@ 2018-06-29 10:15     ` Chris Wilson
  2018-06-29 10:41       ` Tvrtko Ursulin
  0 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2018-06-29 10:15 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-06-29 11:08:48)
> 
> On 29/06/2018 08:53, Chris Wilson wrote:
> > In order to maximise concurrency between engines, if we queue a request
> > to a current idle ring, reorder its dependencies to execute that request
> > as early as possible and ideally improve occupancy of multiple engines
> > simultaneously.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_request.c     | 6 +++++-
> >   drivers/gpu/drm/i915/i915_scheduler.h   | 5 +++--
> >   drivers/gpu/drm/i915/intel_ringbuffer.h | 6 ++++++
> >   3 files changed, 14 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index 2d7a785dd88c..d618e7127e88 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -1053,7 +1053,8 @@ void i915_request_add(struct i915_request *request)
> >       local_bh_disable();
> >       rcu_read_lock(); /* RCU serialisation for set-wedged protection */
> >       if (engine->schedule) {
> > -             struct i915_sched_attr attr = request->gem_context->sched;
> > +             struct i915_gem_context *ctx = request->gem_context;
> > +             struct i915_sched_attr attr = ctx->sched;
> >   
> >               /*
> >                * Boost priorities to new clients (new request flows).
> > @@ -1064,6 +1065,9 @@ void i915_request_add(struct i915_request *request)
> >               if (!prev || i915_request_completed(prev))
> >                       attr.priority |= I915_PRIORITY_NEWCLIENT;
> >   
> > +             if (intel_engine_queue_is_empty(engine))
> > +                     attr.priority |= I915_PRIORITY_STALL;
> > +
> >               engine->schedule(request, &attr);
> >       }
> >       rcu_read_unlock();
> > diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> > index e9fb6c1d5e42..be132ceb83d9 100644
> > --- a/drivers/gpu/drm/i915/i915_scheduler.h
> > +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> > @@ -19,13 +19,14 @@ enum {
> >       I915_PRIORITY_INVALID = INT_MIN
> >   };
> >   
> > -#define I915_USER_PRIORITY_SHIFT 1
> > +#define I915_USER_PRIORITY_SHIFT 2
> >   #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
> >   
> >   #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
> >   #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
> >   
> > -#define I915_PRIORITY_NEWCLIENT BIT(0)
> > +#define I915_PRIORITY_NEWCLIENT BIT(1)
> > +#define I915_PRIORITY_STALL BIT(0)
> 
> So lower priority than new clients.
> 
> Again I have big concerns.
> 
> A client which happens to be the only one using some exotic engine would 
> now be able to trash everything else running on the system. Just because 
> so it happens no one else uses its favourite engine. And that's 
> regardless how much work it has queued up on other, potentially busy, 
> engines.

Within the same priority level, below others, but just above steady
state. Because of that the starvation issue here is no worse than at
current.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 12/37] drm/i915: Priority boost for new clients
  2018-06-29 10:09     ` Chris Wilson
@ 2018-06-29 10:36       ` Tvrtko Ursulin
  2018-06-29 10:41         ` Chris Wilson
  2018-06-29 10:51         ` Chris Wilson
  0 siblings, 2 replies; 68+ messages in thread
From: Tvrtko Ursulin @ 2018-06-29 10:36 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 29/06/2018 11:09, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-06-29 11:04:36)
>>
>> On 29/06/2018 08:53, Chris Wilson wrote:
>>> Taken from an idea used for FQ_CODEL, we give new request flows a
>>> priority boost. These flows are likely to correspond with interactive
>>> tasks and so be more latency sensitive than the long queues. As soon as
>>> the client has more than one request in the queue, further requests are
>>> not boosted and it settles down into ordinary steady state behaviour.
>>> Such small kicks dramatically help combat the starvation issue, by
>>> allowing each client the opportunity to run even when the system is
>>> under heavy throughput load (within the constraints of the user
>>> selected priority).
>>
>> Any effect on non-micro benchmarks to mention in the commit message as
>> the selling point?
> 
> Desktop interactivity, subjective.
> wsim showed a major impact
>   
>>> Testcase: igt/benchmarks/rrul
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_request.c   | 16 ++++++++++++++--
>>>    drivers/gpu/drm/i915/i915_scheduler.h |  4 +++-
>>>    2 files changed, 17 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>>> index 14bf0be6f994..2d7a785dd88c 100644
>>> --- a/drivers/gpu/drm/i915/i915_request.c
>>> +++ b/drivers/gpu/drm/i915/i915_request.c
>>> @@ -1052,8 +1052,20 @@ void i915_request_add(struct i915_request *request)
>>>         */
>>>        local_bh_disable();
>>>        rcu_read_lock(); /* RCU serialisation for set-wedged protection */
>>> -     if (engine->schedule)
>>> -             engine->schedule(request, &request->gem_context->sched);
>>> +     if (engine->schedule) {
>>> +             struct i915_sched_attr attr = request->gem_context->sched;
>>> +
>>> +             /*
>>> +              * Boost priorities to new clients (new request flows).
>>> +              *
>>> +              * Allow interactive/synchronous clients to jump ahead of
>>> +              * the bulk clients. (FQ_CODEL)
>>> +              */
>>> +             if (!prev || i915_request_completed(prev))
>>> +                     attr.priority |= I915_PRIORITY_NEWCLIENT;
>>
>> Now a "lucky" client can always get higher priority an keep preempting
>> everyone else by just timing it's submissions right. So I have big
>> reservations on this one.
> 
> Lucky being someone who is _idle_, everyone else being steady state. You
> can't keep getting lucky and stealing the show.

Why couldn't it? All it is needed is to send a new execbuf after the 
previous has completed.

1. First ctx A eb -> priority boost
2. Other people get back in and start executing
3. Another ctx A eb -> first has completed -> another priority boost -> 
work from 2) is preempted
4. Goto 2.

So work from 2) gets preempted for as long as this client A keeps 
submitting steadily but not too fast.

We would need some sort of priority bump for preempted ones as well for 
this to be safer.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 12/37] drm/i915: Priority boost for new clients
  2018-06-29 10:36       ` Tvrtko Ursulin
@ 2018-06-29 10:41         ` Chris Wilson
  2018-06-29 10:51         ` Chris Wilson
  1 sibling, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29 10:41 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-06-29 11:36:50)
> 
> On 29/06/2018 11:09, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-06-29 11:04:36)
> >>
> >> On 29/06/2018 08:53, Chris Wilson wrote:
> >>> Taken from an idea used for FQ_CODEL, we give new request flows a
> >>> priority boost. These flows are likely to correspond with interactive
> >>> tasks and so be more latency sensitive than the long queues. As soon as
> >>> the client has more than one request in the queue, further requests are
> >>> not boosted and it settles down into ordinary steady state behaviour.
> >>> Such small kicks dramatically help combat the starvation issue, by
> >>> allowing each client the opportunity to run even when the system is
> >>> under heavy throughput load (within the constraints of the user
> >>> selected priority).
> >>
> >> Any effect on non-micro benchmarks to mention in the commit message as
> >> the selling point?
> > 
> > Desktop interactivity, subjective.
> > wsim showed a major impact
> >   
> >>> Testcase: igt/benchmarks/rrul
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >>> ---
> >>>    drivers/gpu/drm/i915/i915_request.c   | 16 ++++++++++++++--
> >>>    drivers/gpu/drm/i915/i915_scheduler.h |  4 +++-
> >>>    2 files changed, 17 insertions(+), 3 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> >>> index 14bf0be6f994..2d7a785dd88c 100644
> >>> --- a/drivers/gpu/drm/i915/i915_request.c
> >>> +++ b/drivers/gpu/drm/i915/i915_request.c
> >>> @@ -1052,8 +1052,20 @@ void i915_request_add(struct i915_request *request)
> >>>         */
> >>>        local_bh_disable();
> >>>        rcu_read_lock(); /* RCU serialisation for set-wedged protection */
> >>> -     if (engine->schedule)
> >>> -             engine->schedule(request, &request->gem_context->sched);
> >>> +     if (engine->schedule) {
> >>> +             struct i915_sched_attr attr = request->gem_context->sched;
> >>> +
> >>> +             /*
> >>> +              * Boost priorities to new clients (new request flows).
> >>> +              *
> >>> +              * Allow interactive/synchronous clients to jump ahead of
> >>> +              * the bulk clients. (FQ_CODEL)
> >>> +              */
> >>> +             if (!prev || i915_request_completed(prev))
> >>> +                     attr.priority |= I915_PRIORITY_NEWCLIENT;
> >>
> >> Now a "lucky" client can always get higher priority an keep preempting
> >> everyone else by just timing it's submissions right. So I have big
> >> reservations on this one.
> > 
> > Lucky being someone who is _idle_, everyone else being steady state. You
> > can't keep getting lucky and stealing the show.
> 
> Why couldn't it? All it is needed is to send a new execbuf after the 
> previous has completed.

That being the point.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 13/37] drm/i915: Priority boost switching to an idle ring
  2018-06-29 10:15     ` Chris Wilson
@ 2018-06-29 10:41       ` Tvrtko Ursulin
  0 siblings, 0 replies; 68+ messages in thread
From: Tvrtko Ursulin @ 2018-06-29 10:41 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 29/06/2018 11:15, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-06-29 11:08:48)
>>
>> On 29/06/2018 08:53, Chris Wilson wrote:
>>> In order to maximise concurrency between engines, if we queue a request
>>> to a current idle ring, reorder its dependencies to execute that request
>>> as early as possible and ideally improve occupancy of multiple engines
>>> simultaneously.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    drivers/gpu/drm/i915/i915_request.c     | 6 +++++-
>>>    drivers/gpu/drm/i915/i915_scheduler.h   | 5 +++--
>>>    drivers/gpu/drm/i915/intel_ringbuffer.h | 6 ++++++
>>>    3 files changed, 14 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>>> index 2d7a785dd88c..d618e7127e88 100644
>>> --- a/drivers/gpu/drm/i915/i915_request.c
>>> +++ b/drivers/gpu/drm/i915/i915_request.c
>>> @@ -1053,7 +1053,8 @@ void i915_request_add(struct i915_request *request)
>>>        local_bh_disable();
>>>        rcu_read_lock(); /* RCU serialisation for set-wedged protection */
>>>        if (engine->schedule) {
>>> -             struct i915_sched_attr attr = request->gem_context->sched;
>>> +             struct i915_gem_context *ctx = request->gem_context;
>>> +             struct i915_sched_attr attr = ctx->sched;
>>>    
>>>                /*
>>>                 * Boost priorities to new clients (new request flows).
>>> @@ -1064,6 +1065,9 @@ void i915_request_add(struct i915_request *request)
>>>                if (!prev || i915_request_completed(prev))
>>>                        attr.priority |= I915_PRIORITY_NEWCLIENT;
>>>    
>>> +             if (intel_engine_queue_is_empty(engine))
>>> +                     attr.priority |= I915_PRIORITY_STALL;
>>> +
>>>                engine->schedule(request, &attr);
>>>        }
>>>        rcu_read_unlock();
>>> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
>>> index e9fb6c1d5e42..be132ceb83d9 100644
>>> --- a/drivers/gpu/drm/i915/i915_scheduler.h
>>> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
>>> @@ -19,13 +19,14 @@ enum {
>>>        I915_PRIORITY_INVALID = INT_MIN
>>>    };
>>>    
>>> -#define I915_USER_PRIORITY_SHIFT 1
>>> +#define I915_USER_PRIORITY_SHIFT 2
>>>    #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
>>>    
>>>    #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
>>>    #define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
>>>    
>>> -#define I915_PRIORITY_NEWCLIENT BIT(0)
>>> +#define I915_PRIORITY_NEWCLIENT BIT(1)
>>> +#define I915_PRIORITY_STALL BIT(0)
>>
>> So lower priority than new clients.
>>
>> Again I have big concerns.
>>
>> A client which happens to be the only one using some exotic engine would
>> now be able to trash everything else running on the system. Just because
>> so it happens no one else uses its favourite engine. And that's
>> regardless how much work it has queued up on other, potentially busy,
>> engines.
> 
> Within the same priority level, below others, but just above steady
> state. Because of that the starvation issue here is no worse than at
> current.

I don't follow. Currently the client which is the only one submitting to 
an engine won't cause preemption on other engines. With this patch it 
would, so it opens up a new set of unfairness issues. Or please correct 
me if I interpreted the commit message and code incorrectly.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 12/37] drm/i915: Priority boost for new clients
  2018-06-29 10:36       ` Tvrtko Ursulin
  2018-06-29 10:41         ` Chris Wilson
@ 2018-06-29 10:51         ` Chris Wilson
  2018-06-29 11:10           ` Tvrtko Ursulin
  1 sibling, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2018-06-29 10:51 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-06-29 11:36:50)
> 
> On 29/06/2018 11:09, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-06-29 11:04:36)
> >>
> >> On 29/06/2018 08:53, Chris Wilson wrote:
> >>> Taken from an idea used for FQ_CODEL, we give new request flows a
> >>> priority boost. These flows are likely to correspond with interactive
> >>> tasks and so be more latency sensitive than the long queues. As soon as
> >>> the client has more than one request in the queue, further requests are
> >>> not boosted and it settles down into ordinary steady state behaviour.
> >>> Such small kicks dramatically help combat the starvation issue, by
> >>> allowing each client the opportunity to run even when the system is
> >>> under heavy throughput load (within the constraints of the user
> >>> selected priority).
> >>
> >> Any effect on non-micro benchmarks to mention in the commit message as
> >> the selling point?
> > 
> > Desktop interactivity, subjective.
> > wsim showed a major impact
> >   
> >>> Testcase: igt/benchmarks/rrul
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >>> ---
> >>>    drivers/gpu/drm/i915/i915_request.c   | 16 ++++++++++++++--
> >>>    drivers/gpu/drm/i915/i915_scheduler.h |  4 +++-
> >>>    2 files changed, 17 insertions(+), 3 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> >>> index 14bf0be6f994..2d7a785dd88c 100644
> >>> --- a/drivers/gpu/drm/i915/i915_request.c
> >>> +++ b/drivers/gpu/drm/i915/i915_request.c
> >>> @@ -1052,8 +1052,20 @@ void i915_request_add(struct i915_request *request)
> >>>         */
> >>>        local_bh_disable();
> >>>        rcu_read_lock(); /* RCU serialisation for set-wedged protection */
> >>> -     if (engine->schedule)
> >>> -             engine->schedule(request, &request->gem_context->sched);
> >>> +     if (engine->schedule) {
> >>> +             struct i915_sched_attr attr = request->gem_context->sched;
> >>> +
> >>> +             /*
> >>> +              * Boost priorities to new clients (new request flows).
> >>> +              *
> >>> +              * Allow interactive/synchronous clients to jump ahead of
> >>> +              * the bulk clients. (FQ_CODEL)
> >>> +              */
> >>> +             if (!prev || i915_request_completed(prev))
> >>> +                     attr.priority |= I915_PRIORITY_NEWCLIENT;
> >>
> >> Now a "lucky" client can always get higher priority an keep preempting
> >> everyone else by just timing it's submissions right. So I have big
> >> reservations on this one.
> > 
> > Lucky being someone who is _idle_, everyone else being steady state. You
> > can't keep getting lucky and stealing the show.
> 
> Why couldn't it? All it is needed is to send a new execbuf after the 
> previous has completed.
> 
> 1. First ctx A eb -> priority boost
> 2. Other people get back in and start executing
> 3. Another ctx A eb -> first has completed -> another priority boost -> 
> work from 2) is preempted
> 4. Goto 2.

So you have one client spinning, it's going to win most races and starve
the system, simply by owning struct_mutex. We give the other starving
steady-state clients an opportunity to submit ahead of the spinner when
they come to resubmit.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 12/37] drm/i915: Priority boost for new clients
  2018-06-29 10:51         ` Chris Wilson
@ 2018-06-29 11:10           ` Tvrtko Ursulin
  2018-07-02 10:19             ` Tvrtko Ursulin
  0 siblings, 1 reply; 68+ messages in thread
From: Tvrtko Ursulin @ 2018-06-29 11:10 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 29/06/2018 11:51, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-06-29 11:36:50)
>>
>> On 29/06/2018 11:09, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2018-06-29 11:04:36)
>>>>
>>>> On 29/06/2018 08:53, Chris Wilson wrote:
>>>>> Taken from an idea used for FQ_CODEL, we give new request flows a
>>>>> priority boost. These flows are likely to correspond with interactive
>>>>> tasks and so be more latency sensitive than the long queues. As soon as
>>>>> the client has more than one request in the queue, further requests are
>>>>> not boosted and it settles down into ordinary steady state behaviour.
>>>>> Such small kicks dramatically help combat the starvation issue, by
>>>>> allowing each client the opportunity to run even when the system is
>>>>> under heavy throughput load (within the constraints of the user
>>>>> selected priority).
>>>>
>>>> Any effect on non-micro benchmarks to mention in the commit message as
>>>> the selling point?
>>>
>>> Desktop interactivity, subjective.
>>> wsim showed a major impact
>>>    
>>>>> Testcase: igt/benchmarks/rrul
>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>>>> ---
>>>>>     drivers/gpu/drm/i915/i915_request.c   | 16 ++++++++++++++--
>>>>>     drivers/gpu/drm/i915/i915_scheduler.h |  4 +++-
>>>>>     2 files changed, 17 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>>>>> index 14bf0be6f994..2d7a785dd88c 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_request.c
>>>>> +++ b/drivers/gpu/drm/i915/i915_request.c
>>>>> @@ -1052,8 +1052,20 @@ void i915_request_add(struct i915_request *request)
>>>>>          */
>>>>>         local_bh_disable();
>>>>>         rcu_read_lock(); /* RCU serialisation for set-wedged protection */
>>>>> -     if (engine->schedule)
>>>>> -             engine->schedule(request, &request->gem_context->sched);
>>>>> +     if (engine->schedule) {
>>>>> +             struct i915_sched_attr attr = request->gem_context->sched;
>>>>> +
>>>>> +             /*
>>>>> +              * Boost priorities to new clients (new request flows).
>>>>> +              *
>>>>> +              * Allow interactive/synchronous clients to jump ahead of
>>>>> +              * the bulk clients. (FQ_CODEL)
>>>>> +              */
>>>>> +             if (!prev || i915_request_completed(prev))
>>>>> +                     attr.priority |= I915_PRIORITY_NEWCLIENT;
>>>>
>>>> Now a "lucky" client can always get higher priority an keep preempting
>>>> everyone else by just timing it's submissions right. So I have big
>>>> reservations on this one.
>>>
>>> Lucky being someone who is _idle_, everyone else being steady state. You
>>> can't keep getting lucky and stealing the show.
>>
>> Why couldn't it? All it is needed is to send a new execbuf after the
>> previous has completed.
>>
>> 1. First ctx A eb -> priority boost
>> 2. Other people get back in and start executing
>> 3. Another ctx A eb -> first has completed -> another priority boost ->
>> work from 2) is preempted
>> 4. Goto 2.
> 
> So you have one client spinning, it's going to win most races and starve
> the system, simply by owning struct_mutex. We give the other starving
> steady-state clients an opportunity to submit ahead of the spinner when
> they come to resubmit.

What do you mean by spinning and owning struct_mutex?

I was thinking for example the client A sending a 1ms batch and client B 
executing a 10ms batch.

If client A keeps submitting its batch at 1ms intervals when will client 
B complete?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* ✗ Fi.CI.IGT: failure for series starting with [01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (38 preceding siblings ...)
  2018-06-29  9:16 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-06-29 11:53 ` Patchwork
  2018-06-29 22:38 ` ✗ Fi.CI.BAT: failure for series starting with [01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode (rev3) Patchwork
  40 siblings, 0 replies; 68+ messages in thread
From: Patchwork @ 2018-06-29 11:53 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode
URL   : https://patchwork.freedesktop.org/series/45638/
State : failure

== Summary ==

= CI Bug Log - changes from CI_DRM_4403_full -> Patchwork_9480_full =

== Summary - FAILURE ==

  Serious unknown changes coming with Patchwork_9480_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_9480_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_9480_full:

  === IGT changes ===

    ==== Possible regressions ====

    igt@gem_ctx_param@invalid-param-set:
      shard-hsw:          PASS -> FAIL
      shard-snb:          PASS -> FAIL
      shard-glk:          PASS -> FAIL +1
      shard-apl:          PASS -> FAIL +1

    igt@gem_userptr_blits@usage-restrictions:
      shard-kbl:          PASS -> FAIL

    
== Known issues ==

  Here are the changes found in Patchwork_9480_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@drv_selftest@live_gtt:
      shard-glk:          PASS -> INCOMPLETE (fdo#103359, k.org#198133)
      shard-apl:          PASS -> FAIL (fdo#105347)

    igt@gem_exec_schedule@pi-ringfull-vebox:
      shard-kbl:          NOTRUN -> FAIL (fdo#103158) +1

    igt@kms_flip@flip-vs-fences-interruptible:
      shard-hsw:          PASS -> DMESG-WARN (fdo#102614)

    igt@kms_flip_tiling@flip-to-x-tiled:
      shard-glk:          PASS -> FAIL (fdo#104724, fdo#103822) +1

    igt@kms_flip_tiling@flip-x-tiled:
      shard-glk:          PASS -> FAIL (fdo#104724)

    igt@kms_vblank@pipe-a-accuracy-idle:
      shard-glk:          PASS -> FAIL (fdo#102583)

    
    ==== Possible fixes ====

    igt@kms_draw_crc@draw-method-xrgb8888-render-ytiled:
      shard-glk:          FAIL (fdo#104724, fdo#103232) -> PASS

    igt@kms_flip@flip-vs-expired-vblank-interruptible:
      shard-apl:          FAIL (fdo#102887, fdo#105363) -> PASS

    igt@kms_flip_tiling@flip-to-y-tiled:
      shard-glk:          FAIL (fdo#104724, fdo#103822) -> PASS

    
  fdo#102583 https://bugs.freedesktop.org/show_bug.cgi?id=102583
  fdo#102614 https://bugs.freedesktop.org/show_bug.cgi?id=102614
  fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
  fdo#103158 https://bugs.freedesktop.org/show_bug.cgi?id=103158
  fdo#103232 https://bugs.freedesktop.org/show_bug.cgi?id=103232
  fdo#103359 https://bugs.freedesktop.org/show_bug.cgi?id=103359
  fdo#103822 https://bugs.freedesktop.org/show_bug.cgi?id=103822
  fdo#104724 https://bugs.freedesktop.org/show_bug.cgi?id=104724
  fdo#105347 https://bugs.freedesktop.org/show_bug.cgi?id=105347
  fdo#105363 https://bugs.freedesktop.org/show_bug.cgi?id=105363
  k.org#198133 https://bugzilla.kernel.org/show_bug.cgi?id=198133


== Participating hosts (5 -> 5) ==

  No changes in participating hosts


== Build changes ==

    * Linux: CI_DRM_4403 -> Patchwork_9480

  CI_DRM_4403: d5ec143981dab1497ec923c943288dbc1be673d0 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4531: a14bc8b4d69eaca189665de505e6b10cbfbb7730 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9480: 18b6327d9393def5c12599ec1b46ee00c0b108c1 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9480/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 14/37] drm/i915: Refactor export_fence() after i915_vma_move_to_active()
  2018-06-29  7:53 ` [PATCH 14/37] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
@ 2018-06-29 12:00   ` Tvrtko Ursulin
  0 siblings, 0 replies; 68+ messages in thread
From: Tvrtko Ursulin @ 2018-06-29 12:00 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 29/06/2018 08:53, Chris Wilson wrote:
> Currently all callers are responsible for adding the vma to the active
> timeline and then exporting its fence. Combine the two operations into
> i915_vma_move_to_active() to move all the extra handling from the
> callers to the single site.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c    | 47 +++++++++----------
>   drivers/gpu/drm/i915/selftests/huge_pages.c   |  4 --
>   .../drm/i915/selftests/i915_gem_coherency.c   |  4 --
>   .../gpu/drm/i915/selftests/i915_gem_context.c |  4 --
>   4 files changed, 21 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index c2dd9b4cdace..91f20445147f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1166,15 +1166,9 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>   
>   	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
>   	i915_vma_move_to_active(batch, rq, 0);
> -	reservation_object_lock(batch->resv, NULL);
> -	reservation_object_add_excl_fence(batch->resv, &rq->fence);
> -	reservation_object_unlock(batch->resv);

Here now the exclusive fence is not added any more due flags being zero.

>   	i915_vma_unpin(batch);
>   
>   	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> -	reservation_object_lock(vma->resv, NULL);
> -	reservation_object_add_excl_fence(vma->resv, &rq->fence);
> -	reservation_object_unlock(vma->resv);
>   
>   	rq->batch = batch;
>   
> @@ -1771,25 +1765,6 @@ static int eb_relocate(struct i915_execbuffer *eb)
>   	return eb_relocate_slow(eb);
>   }
>   
> -static void eb_export_fence(struct i915_vma *vma,
> -			    struct i915_request *rq,
> -			    unsigned int flags)
> -{
> -	struct reservation_object *resv = vma->resv;
> -
> -	/*
> -	 * Ignore errors from failing to allocate the new fence, we can't
> -	 * handle an error right now. Worst case should be missed
> -	 * synchronisation leading to rendering corruption.
> -	 */
> -	reservation_object_lock(resv, NULL);
> -	if (flags & EXEC_OBJECT_WRITE)
> -		reservation_object_add_excl_fence(resv, &rq->fence);
> -	else if (reservation_object_reserve_shared(resv) == 0)
> -		reservation_object_add_shared_fence(resv, &rq->fence);
> -	reservation_object_unlock(resv);
> -}
> -
>   static int eb_move_to_gpu(struct i915_execbuffer *eb)
>   {
>   	const unsigned int count = eb->buffer_count;
> @@ -1844,7 +1819,6 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
>   		struct i915_vma *vma = eb->vma[i];
>   
>   		i915_vma_move_to_active(vma, eb->request, flags);
> -		eb_export_fence(vma, eb->request, flags);
>   
>   		__eb_unreserve_vma(vma, flags);
>   		vma->exec_flags = NULL;
> @@ -1884,6 +1858,25 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
>   	return true;
>   }
>   
> +static void export_fence(struct i915_vma *vma,
> +			 struct i915_request *rq,
> +			 unsigned int flags)
> +{
> +	struct reservation_object *resv = vma->resv;
> +
> +	/*
> +	 * Ignore errors from failing to allocate the new fence, we can't
> +	 * handle an error right now. Worst case should be missed
> +	 * synchronisation leading to rendering corruption.
> +	 */
> +	reservation_object_lock(resv, NULL);
> +	if (flags & EXEC_OBJECT_WRITE)
> +		reservation_object_add_excl_fence(resv, &rq->fence);
> +	else if (reservation_object_reserve_shared(resv) == 0)
> +		reservation_object_add_shared_fence(resv, &rq->fence);
> +	reservation_object_unlock(resv);
> +}
> +
>   void i915_vma_move_to_active(struct i915_vma *vma,
>   			     struct i915_request *rq,
>   			     unsigned int flags)
> @@ -1921,6 +1914,8 @@ void i915_vma_move_to_active(struct i915_vma *vma,
>   
>   	if (flags & EXEC_OBJECT_NEEDS_FENCE)
>   		i915_gem_active_set(&vma->last_fence, rq);
> +
> +	export_fence(vma, rq, flags);
>   }
>   
>   static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
> diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
> index b5e87fcdcdae..358fc81f6c99 100644
> --- a/drivers/gpu/drm/i915/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
> @@ -998,10 +998,6 @@ static int gpu_write(struct i915_vma *vma,
>   
>   	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
>   
> -	reservation_object_lock(vma->resv, NULL);
> -	reservation_object_add_excl_fence(vma->resv, &rq->fence);
> -	reservation_object_unlock(vma->resv);
> -
>   err_request:
>   	i915_request_add(rq);
>   
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> index a4900091ae3d..11427aae0853 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> @@ -225,10 +225,6 @@ static int gpu_set(struct drm_i915_gem_object *obj,
>   	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
>   	i915_vma_unpin(vma);
>   
> -	reservation_object_lock(obj->resv, NULL);
> -	reservation_object_add_excl_fence(obj->resv, &rq->fence);
> -	reservation_object_unlock(obj->resv);
> -
>   	i915_request_add(rq);
>   
>   	return 0;
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 3169b72180d0..a78f8bdb8222 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -179,10 +179,6 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
>   	i915_vma_move_to_active(vma, rq, 0);
>   	i915_vma_unpin(vma);
>   
> -	reservation_object_lock(obj->resv, NULL);
> -	reservation_object_add_excl_fence(obj->resv, &rq->fence);

Here as well this won't be added due zero flags.

> -	reservation_object_unlock(obj->resv);
> -
>   	i915_request_add(rq);
>   
>   	return 0;
> 

There is one i915_vma_move_to_active in render state and one in gvt 
which will now have some extra processing/allocations, do we care?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 15/37] drm/i915: Export i915_request_skip()
  2018-06-29  7:53 ` [PATCH 15/37] drm/i915: Export i915_request_skip() Chris Wilson
@ 2018-06-29 12:10   ` Tvrtko Ursulin
  2018-06-29 12:15     ` Chris Wilson
  0 siblings, 1 reply; 68+ messages in thread
From: Tvrtko Ursulin @ 2018-06-29 12:10 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 29/06/2018 08:53, Chris Wilson wrote:
> In the next patch, we will want to start skipping requests on failing to
> complete their payloads. So export the utility function current used to
> make requests inoperable following a failed gpu reset.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem.c     | 25 +++----------------------
>   drivers/gpu/drm/i915/i915_request.c | 21 +++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_request.h |  2 ++
>   3 files changed, 26 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index c13d5b78a02e..8ae293c75ae9 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3084,25 +3084,6 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
>   	return err;
>   }
>   
> -static void skip_request(struct i915_request *request)
> -{
> -	void *vaddr = request->ring->vaddr;
> -	u32 head;
> -
> -	/* As this request likely depends on state from the lost
> -	 * context, clear out all the user operations leaving the
> -	 * breadcrumb at the end (so we get the fence notifications).
> -	 */
> -	head = request->head;
> -	if (request->postfix < head) {
> -		memset(vaddr + head, 0, request->ring->size - head);
> -		head = 0;
> -	}
> -	memset(vaddr + head, 0, request->postfix - head);
> -
> -	dma_fence_set_error(&request->fence, -EIO);
> -}
> -
>   static void engine_skip_context(struct i915_request *request)
>   {
>   	struct intel_engine_cs *engine = request->engine;
> @@ -3117,10 +3098,10 @@ static void engine_skip_context(struct i915_request *request)
>   
>   	list_for_each_entry_continue(request, &engine->timeline.requests, link)
>   		if (request->gem_context == hung_ctx)
> -			skip_request(request);
> +			i915_request_skip(request, -EIO);
>   
>   	list_for_each_entry(request, &timeline->requests, link)
> -		skip_request(request);
> +		i915_request_skip(request, -EIO);
>   
>   	spin_unlock(&timeline->lock);
>   	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> @@ -3163,7 +3144,7 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
>   
>   	if (stalled) {
>   		i915_gem_context_mark_guilty(request->gem_context);
> -		skip_request(request);
> +		i915_request_skip(request, -EIO);
>   
>   		/* If this context is now banned, skip all pending requests. */
>   		if (i915_gem_context_is_banned(request->gem_context))
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index d618e7127e88..51f28786b0e4 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -962,6 +962,27 @@ i915_request_await_object(struct i915_request *to,
>   	return ret;
>   }
>   
> +void i915_request_skip(struct i915_request *rq, int error)
> +{
> +	void *vaddr = rq->ring->vaddr;
> +	u32 head;
> +
> +	GEM_BUG_ON(!IS_ERR_VALUE((long)error));

Why not simply error >= 0 ? Error is an integer in this function, and in 
dma_fence_set_error. In fact, looking at dma_fence_set_error it is 
already checking for valid errno.

> +	dma_fence_set_error(&rq->fence, error);

Was at the end before code movement.

> +
> +	/*
> +	 * As this request likely depends on state from the lost
> +	 * context, clear out all the user operations leaving the
> +	 * breadcrumb at the end (so we get the fence notifications).
> +	 */
> +	head = rq->infix;

Original assignment is head = rq->head. Looks to be the same but why 
change it in code movement patch?

> +	if (rq->postfix < head) {
> +		memset(vaddr + head, 0, rq->ring->size - head);
> +		head = 0;
> +	}
> +	memset(vaddr + head, 0, rq->postfix - head);
> +}
> +
>   /*
>    * NB: This function is not allowed to fail. Doing so would mean the the
>    * request is not being tracked for completion but the work itself is
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 7ee220ded9c9..a355a081485f 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -258,6 +258,8 @@ void i915_request_add(struct i915_request *rq);
>   void __i915_request_submit(struct i915_request *request);
>   void i915_request_submit(struct i915_request *request);
>   
> +void i915_request_skip(struct i915_request *request, int error);
> +
>   void __i915_request_unsubmit(struct i915_request *request);
>   void i915_request_unsubmit(struct i915_request *request);
>   
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 15/37] drm/i915: Export i915_request_skip()
  2018-06-29 12:10   ` Tvrtko Ursulin
@ 2018-06-29 12:15     ` Chris Wilson
  0 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29 12:15 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-06-29 13:10:32)
> 
> On 29/06/2018 08:53, Chris Wilson wrote:
> > In the next patch, we will want to start skipping requests on failing to
> > complete their payloads. So export the utility function current used to
> > make requests inoperable following a failed gpu reset.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_gem.c     | 25 +++----------------------
> >   drivers/gpu/drm/i915/i915_request.c | 21 +++++++++++++++++++++
> >   drivers/gpu/drm/i915/i915_request.h |  2 ++
> >   3 files changed, 26 insertions(+), 22 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index c13d5b78a02e..8ae293c75ae9 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -3084,25 +3084,6 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
> >       return err;
> >   }
> >   
> > -static void skip_request(struct i915_request *request)
> > -{
> > -     void *vaddr = request->ring->vaddr;
> > -     u32 head;
> > -
> > -     /* As this request likely depends on state from the lost
> > -      * context, clear out all the user operations leaving the
> > -      * breadcrumb at the end (so we get the fence notifications).
> > -      */
> > -     head = request->head;
> > -     if (request->postfix < head) {
> > -             memset(vaddr + head, 0, request->ring->size - head);
> > -             head = 0;
> > -     }
> > -     memset(vaddr + head, 0, request->postfix - head);
> > -
> > -     dma_fence_set_error(&request->fence, -EIO);
> > -}
> > -
> >   static void engine_skip_context(struct i915_request *request)
> >   {
> >       struct intel_engine_cs *engine = request->engine;
> > @@ -3117,10 +3098,10 @@ static void engine_skip_context(struct i915_request *request)
> >   
> >       list_for_each_entry_continue(request, &engine->timeline.requests, link)
> >               if (request->gem_context == hung_ctx)
> > -                     skip_request(request);
> > +                     i915_request_skip(request, -EIO);
> >   
> >       list_for_each_entry(request, &timeline->requests, link)
> > -             skip_request(request);
> > +             i915_request_skip(request, -EIO);
> >   
> >       spin_unlock(&timeline->lock);
> >       spin_unlock_irqrestore(&engine->timeline.lock, flags);
> > @@ -3163,7 +3144,7 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
> >   
> >       if (stalled) {
> >               i915_gem_context_mark_guilty(request->gem_context);
> > -             skip_request(request);
> > +             i915_request_skip(request, -EIO);
> >   
> >               /* If this context is now banned, skip all pending requests. */
> >               if (i915_gem_context_is_banned(request->gem_context))
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index d618e7127e88..51f28786b0e4 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -962,6 +962,27 @@ i915_request_await_object(struct i915_request *to,
> >       return ret;
> >   }
> >   
> > +void i915_request_skip(struct i915_request *rq, int error)
> > +{
> > +     void *vaddr = rq->ring->vaddr;
> > +     u32 head;
> > +
> > +     GEM_BUG_ON(!IS_ERR_VALUE((long)error));
> 
> Why not simply error >= 0 ?

Because < -ERR_MAX would also be invalid as it gets reported to
userspace.

> Error is an integer in this function, and in 
> dma_fence_set_error. In fact, looking at dma_fence_set_error it is 
> already checking for valid errno.
> 
> > +     dma_fence_set_error(&rq->fence, error);
> 
> Was at the end before code movement.

Pulled it up to document the range of valid error.

> > +
> > +     /*
> > +      * As this request likely depends on state from the lost
> > +      * context, clear out all the user operations leaving the
> > +      * breadcrumb at the end (so we get the fence notifications).
> > +      */
> > +     head = rq->infix;
> 
> Original assignment is head = rq->head. Looks to be the same but why 
> change it in code movement patch?

For correctness in the callers of i915_request_skip. The point was to
export a function that could be used rather than have to duplicate all
but a line.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 16/37] drm/i915: Start returning an error from i915_vma_move_to_active()
  2018-06-29  7:53 ` [PATCH 16/37] drm/i915: Start returning an error from i915_vma_move_to_active() Chris Wilson
@ 2018-06-29 12:17   ` Tvrtko Ursulin
  0 siblings, 0 replies; 68+ messages in thread
From: Tvrtko Ursulin @ 2018-06-29 12:17 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 29/06/2018 08:53, Chris Wilson wrote:
> Handling such a late error in request construction is tricky, but to
> accommodate future patches which may allocate here, we potentially could
> err. To handle the error after already adjusting global state to track
> the new request, we must finish and submit the request. But we don't
> want to use the request as not everything is being tracked by it, so we
> opt to cancel the commands inside the request.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gvt/scheduler.c          |  6 ++++-
>   drivers/gpu/drm/i915/i915_drv.h               |  6 ++---
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c    | 25 +++++++++++++------
>   drivers/gpu/drm/i915/i915_gem_render_state.c  |  2 +-
>   drivers/gpu/drm/i915/selftests/huge_pages.c   |  9 +++++--
>   .../drm/i915/selftests/i915_gem_coherency.c   |  4 +--
>   .../gpu/drm/i915/selftests/i915_gem_context.c | 12 +++++++--
>   .../gpu/drm/i915/selftests/i915_gem_object.c  |  7 +++---
>   drivers/gpu/drm/i915/selftests/i915_request.c |  8 ++++--
>   .../gpu/drm/i915/selftests/intel_hangcheck.c  | 11 ++++++--
>   drivers/gpu/drm/i915/selftests/intel_lrc.c    | 11 ++++++--
>   .../drm/i915/selftests/intel_workarounds.c    |  5 +++-
>   12 files changed, 78 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
> index 928818f218f7..b0e566956b8d 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> @@ -476,7 +476,11 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
>   			i915_gem_obj_finish_shmem_access(bb->obj);
>   			bb->accessing = false;
>   
> -			i915_vma_move_to_active(bb->vma, workload->req, 0);
> +			ret = i915_vma_move_to_active(bb->vma,
> +						      workload->req,
> +						      0);
> +			if (ret)
> +				goto err;
>   		}
>   	}
>   	return 0;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index af783a32abd8..33e5ac8bf0b8 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3104,9 +3104,9 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
>   }
>   
>   int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
> -void i915_vma_move_to_active(struct i915_vma *vma,
> -			     struct i915_request *rq,
> -			     unsigned int flags);
> +int __must_check i915_vma_move_to_active(struct i915_vma *vma,
> +					 struct i915_request *rq,
> +					 unsigned int flags);
>   int i915_gem_dumb_create(struct drm_file *file_priv,
>   			 struct drm_device *dev,
>   			 struct drm_mode_create_dumb *args);
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 91f20445147f..97136e4ce91d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1165,12 +1165,16 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>   		goto err_request;
>   
>   	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
> -	i915_vma_move_to_active(batch, rq, 0);
> -	i915_vma_unpin(batch);
> +	err = i915_vma_move_to_active(batch, rq, 0);
> +	if (err)
> +		goto skip_request;
>   
> -	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> +	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> +	if (err)
> +		goto skip_request;
>   
>   	rq->batch = batch;
> +	i915_vma_unpin(batch);
>   
>   	cache->rq = rq;
>   	cache->rq_cmd = cmd;
> @@ -1179,6 +1183,8 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>   	/* Return with batch mapping (cmd) still pinned */
>   	return 0;
>   
> +skip_request:
> +	i915_request_skip(rq, err);
>   err_request:
>   	i915_request_add(rq);
>   err_unpin:
> @@ -1818,7 +1824,11 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
>   		unsigned int flags = eb->flags[i];
>   		struct i915_vma *vma = eb->vma[i];
>   
> -		i915_vma_move_to_active(vma, eb->request, flags);
> +		err = i915_vma_move_to_active(vma, eb->request, flags);
> +		if (unlikely(err)) {
> +			i915_request_skip(eb->request, err);
> +			return err;
> +		}
>   
>   		__eb_unreserve_vma(vma, flags);
>   		vma->exec_flags = NULL;
> @@ -1877,9 +1887,9 @@ static void export_fence(struct i915_vma *vma,
>   	reservation_object_unlock(resv);
>   }
>   
> -void i915_vma_move_to_active(struct i915_vma *vma,
> -			     struct i915_request *rq,
> -			     unsigned int flags)
> +int i915_vma_move_to_active(struct i915_vma *vma,
> +			    struct i915_request *rq,
> +			    unsigned int flags)
>   {
>   	struct drm_i915_gem_object *obj = vma->obj;
>   	const unsigned int idx = rq->engine->id;
> @@ -1916,6 +1926,7 @@ void i915_vma_move_to_active(struct i915_vma *vma,
>   		i915_gem_active_set(&vma->last_fence, rq);
>   
>   	export_fence(vma, rq, flags);
> +	return 0;
>   }
>   
>   static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index 3210cedfa46c..90baf9086d0a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -222,7 +222,7 @@ int i915_gem_render_state_emit(struct i915_request *rq)
>   			goto err_unpin;
>   	}
>   
> -	i915_vma_move_to_active(so.vma, rq, 0);
> +	err = i915_vma_move_to_active(so.vma, rq, 0);
>   err_unpin:
>   	i915_vma_unpin(so.vma);
>   err_vma:
> diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
> index 358fc81f6c99..d33e20940e0a 100644
> --- a/drivers/gpu/drm/i915/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
> @@ -985,7 +985,10 @@ static int gpu_write(struct i915_vma *vma,
>   		goto err_request;
>   	}
>   
> -	i915_vma_move_to_active(batch, rq, 0);
> +	err = i915_vma_move_to_active(batch, rq, 0);
> +	if (err)
> +		goto err_request;
> +
>   	i915_gem_object_set_active_reference(batch->obj);
>   	i915_vma_unpin(batch);
>   	i915_vma_close(batch);
> @@ -996,7 +999,9 @@ static int gpu_write(struct i915_vma *vma,
>   	if (err)
>   		goto err_request;
>   
> -	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> +	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> +	if (err)
> +		i915_request_skip(rq, err);
>   
>   err_request:
>   	i915_request_add(rq);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> index 11427aae0853..328585459c67 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> @@ -222,12 +222,12 @@ static int gpu_set(struct drm_i915_gem_object *obj,
>   	}
>   	intel_ring_advance(rq, cs);
>   
> -	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> +	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
>   	i915_vma_unpin(vma);
>   
>   	i915_request_add(rq);
>   
> -	return 0;
> +	return err;
>   }
>   
>   static bool always_valid(struct drm_i915_private *i915)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index a78f8bdb8222..5bbffe9fcdda 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -171,18 +171,26 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
>   	if (err)
>   		goto err_request;
>   
> -	i915_vma_move_to_active(batch, rq, 0);
> +	err = i915_vma_move_to_active(batch, rq, 0);
> +	if (err)
> +		goto skip_request;
> +
> +	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> +	if (err)
> +		goto skip_request;
> +
>   	i915_gem_object_set_active_reference(batch->obj);
>   	i915_vma_unpin(batch);
>   	i915_vma_close(batch);
>   
> -	i915_vma_move_to_active(vma, rq, 0);
>   	i915_vma_unpin(vma);
>   
>   	i915_request_add(rq);
>   
>   	return 0;
>   
> +skip_request:
> +	i915_request_skip(rq, err);
>   err_request:
>   	i915_request_add(rq);
>   err_batch:
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> index 549707b9d738..eb863b45b7c7 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> @@ -454,12 +454,13 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
>   		return PTR_ERR(rq);
>   	}
>   
> -	i915_vma_move_to_active(vma, rq, 0);
> +	err = i915_vma_move_to_active(vma, rq, 0);
>   	i915_request_add(rq);
>   
> -	i915_gem_object_set_active_reference(obj);
> +	__i915_gem_object_release_unless_active(obj);
>   	i915_vma_unpin(vma);
> -	return 0;
> +
> +	return err;
>   }
>   
>   static bool assert_mmap_offset(struct drm_i915_private *i915,
> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
> index 521ae4a90ddf..c27f77a24ce0 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_request.c
> @@ -678,7 +678,9 @@ static int live_all_engines(void *arg)
>   			i915_gem_object_set_active_reference(batch->obj);
>   		}
>   
> -		i915_vma_move_to_active(batch, request[id], 0);
> +		err = i915_vma_move_to_active(batch, request[id], 0);
> +		GEM_BUG_ON(err);
> +
>   		i915_request_get(request[id]);
>   		i915_request_add(request[id]);
>   	}
> @@ -788,7 +790,9 @@ static int live_sequential_engines(void *arg)
>   		GEM_BUG_ON(err);
>   		request[id]->batch = batch;
>   
> -		i915_vma_move_to_active(batch, request[id], 0);
> +		err = i915_vma_move_to_active(batch, request[id], 0);
> +		GEM_BUG_ON(err);
> +
>   		i915_gem_object_set_active_reference(batch->obj);
>   		i915_vma_get(batch);
>   
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> index fe7d3190ebfe..e11df2743704 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> @@ -130,13 +130,19 @@ static int emit_recurse_batch(struct hang *h,
>   	if (err)
>   		goto unpin_vma;
>   
> -	i915_vma_move_to_active(vma, rq, 0);
> +	err = i915_vma_move_to_active(vma, rq, 0);
> +	if (err)
> +		goto unpin_hws;
> +
>   	if (!i915_gem_object_has_active_reference(vma->obj)) {
>   		i915_gem_object_get(vma->obj);
>   		i915_gem_object_set_active_reference(vma->obj);
>   	}
>   
> -	i915_vma_move_to_active(hws, rq, 0);
> +	err = i915_vma_move_to_active(hws, rq, 0);
> +	if (err)
> +		goto unpin_hws;
> +
>   	if (!i915_gem_object_has_active_reference(hws->obj)) {
>   		i915_gem_object_get(hws->obj);
>   		i915_gem_object_set_active_reference(hws->obj);
> @@ -205,6 +211,7 @@ static int emit_recurse_batch(struct hang *h,
>   
>   	err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
>   
> +unpin_hws:
>   	i915_vma_unpin(hws);
>   unpin_vma:
>   	i915_vma_unpin(vma);
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> index 90841415cf7a..146908b90787 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> @@ -104,13 +104,19 @@ static int emit_recurse_batch(struct spinner *spin,
>   	if (err)
>   		goto unpin_vma;
>   
> -	i915_vma_move_to_active(vma, rq, 0);
> +	err = i915_vma_move_to_active(vma, rq, 0);
> +	if (err)
> +		goto unpin_hws;
> +
>   	if (!i915_gem_object_has_active_reference(vma->obj)) {
>   		i915_gem_object_get(vma->obj);
>   		i915_gem_object_set_active_reference(vma->obj);
>   	}
>   
> -	i915_vma_move_to_active(hws, rq, 0);
> +	err = i915_vma_move_to_active(hws, rq, 0);
> +	if (err)
> +		goto unpin_hws;
> +
>   	if (!i915_gem_object_has_active_reference(hws->obj)) {
>   		i915_gem_object_get(hws->obj);
>   		i915_gem_object_set_active_reference(hws->obj);
> @@ -134,6 +140,7 @@ static int emit_recurse_batch(struct spinner *spin,
>   
>   	err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0);
>   
> +unpin_hws:
>   	i915_vma_unpin(hws);
>   unpin_vma:
>   	i915_vma_unpin(vma);
> diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> index e1ea2d2bedd2..c100153cb494 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> @@ -49,6 +49,10 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
>   		goto err_pin;
>   	}
>   
> +	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> +	if (err)
> +		goto err_req;
> +
>   	srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
>   	if (INTEL_GEN(ctx->i915) >= 8)
>   		srm++;
> @@ -67,7 +71,6 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
>   	}
>   	intel_ring_advance(rq, cs);
>   
> -	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
>   	reservation_object_lock(vma->resv, NULL);
>   	reservation_object_add_excl_fence(vma->resv, &rq->fence);
>   	reservation_object_unlock(vma->resv);
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 17/37] drm/i915: Track vma activity per fence.context, not per engine
  2018-06-29  7:53 ` [PATCH 17/37] drm/i915: Track vma activity per fence.context, not per engine Chris Wilson
@ 2018-06-29 14:54   ` Tvrtko Ursulin
  2018-06-29 15:03     ` Chris Wilson
  2018-06-29 15:08     ` Tvrtko Ursulin
  2018-06-29 22:03   ` [PATCH v2] " Chris Wilson
  1 sibling, 2 replies; 68+ messages in thread
From: Tvrtko Ursulin @ 2018-06-29 14:54 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 29/06/2018 08:53, Chris Wilson wrote:
> In the next patch, we will want to be able to use more flexible request
> timelines that can hop between engines. From the vma pov, we can then
> not rely on the binding of this request to an engine and so can not
> ensure that different requests are ordered through a per-engine
> timeline, and so we must track activity of all timelines. (We track
> activity on the vma itself to prevent unbinding from HW before the HW
> has finished accessing it.)
> 
> For now, let's just ignore the potential issue with trying to use 64b
> indices with radixtrees on 32b machines, it's unlikely to be a problem
> in practice...
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h            |   3 -
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c |  61 -------
>   drivers/gpu/drm/i915/i915_gem_gtt.c        |   4 +-
>   drivers/gpu/drm/i915/i915_gpu_error.c      |  14 +-
>   drivers/gpu/drm/i915/i915_gpu_error.h      |   2 +-
>   drivers/gpu/drm/i915/i915_request.h        |   1 +
>   drivers/gpu/drm/i915/i915_vma.c            | 180 ++++++++++++++++++---
>   drivers/gpu/drm/i915/i915_vma.h            |  42 ++---
>   8 files changed, 173 insertions(+), 134 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 33e5ac8bf0b8..a786a3b6686f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3104,9 +3104,6 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
>   }
>   
>   int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
> -int __must_check i915_vma_move_to_active(struct i915_vma *vma,
> -					 struct i915_request *rq,
> -					 unsigned int flags);
>   int i915_gem_dumb_create(struct drm_file *file_priv,
>   			 struct drm_device *dev,
>   			 struct drm_mode_create_dumb *args);
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 97136e4ce91d..3f0c612d42e7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1868,67 +1868,6 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
>   	return true;
>   }
>   
> -static void export_fence(struct i915_vma *vma,
> -			 struct i915_request *rq,
> -			 unsigned int flags)
> -{
> -	struct reservation_object *resv = vma->resv;
> -
> -	/*
> -	 * Ignore errors from failing to allocate the new fence, we can't
> -	 * handle an error right now. Worst case should be missed
> -	 * synchronisation leading to rendering corruption.
> -	 */
> -	reservation_object_lock(resv, NULL);
> -	if (flags & EXEC_OBJECT_WRITE)
> -		reservation_object_add_excl_fence(resv, &rq->fence);
> -	else if (reservation_object_reserve_shared(resv) == 0)
> -		reservation_object_add_shared_fence(resv, &rq->fence);
> -	reservation_object_unlock(resv);
> -}
> -
> -int i915_vma_move_to_active(struct i915_vma *vma,
> -			    struct i915_request *rq,
> -			    unsigned int flags)
> -{
> -	struct drm_i915_gem_object *obj = vma->obj;
> -	const unsigned int idx = rq->engine->id;
> -
> -	lockdep_assert_held(&rq->i915->drm.struct_mutex);
> -	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> -
> -	/*
> -	 * Add a reference if we're newly entering the active list.
> -	 * The order in which we add operations to the retirement queue is
> -	 * vital here: mark_active adds to the start of the callback list,
> -	 * such that subsequent callbacks are called first. Therefore we
> -	 * add the active reference first and queue for it to be dropped
> -	 * *last*.
> -	 */
> -	if (!i915_vma_is_active(vma))
> -		obj->active_count++;
> -	i915_vma_set_active(vma, idx);
> -	i915_gem_active_set(&vma->last_read[idx], rq);
> -	list_move_tail(&vma->vm_link, &vma->vm->active_list);
> -
> -	obj->write_domain = 0;
> -	if (flags & EXEC_OBJECT_WRITE) {
> -		obj->write_domain = I915_GEM_DOMAIN_RENDER;
> -
> -		if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
> -			i915_gem_active_set(&obj->frontbuffer_write, rq);
> -
> -		obj->read_domains = 0;
> -	}
> -	obj->read_domains |= I915_GEM_GPU_DOMAINS;
> -
> -	if (flags & EXEC_OBJECT_NEEDS_FENCE)
> -		i915_gem_active_set(&vma->last_fence, rq);
> -
> -	export_fence(vma, rq, flags);
> -	return 0;
> -}
> -
>   static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
>   {
>   	u32 *cs;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index f46d873a7530..acc779770e47 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2006,7 +2006,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
>   	struct drm_i915_private *i915 = ppgtt->base.vm.i915;
>   	struct i915_ggtt *ggtt = &i915->ggtt;
>   	struct i915_vma *vma;
> -	int i;
>   
>   	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
>   	GEM_BUG_ON(size > ggtt->vm.total);
> @@ -2015,8 +2014,7 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
>   	if (!vma)
>   		return ERR_PTR(-ENOMEM);
>   
> -	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> -		init_request_active(&vma->last_read[i], NULL);
> +	INIT_RADIX_TREE(&vma->active_rt, GFP_KERNEL);
>   	init_request_active(&vma->last_fence, NULL);
>   
>   	vma->vm = &ggtt->vm;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index df524c9cad40..8c81cf3aa182 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -335,21 +335,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
>   				struct drm_i915_error_buffer *err,
>   				int count)
>   {
> -	int i;
> -
>   	err_printf(m, "%s [%d]:\n", name, count);
>   
>   	while (count--) {
> -		err_printf(m, "    %08x_%08x %8u %02x %02x [ ",
> +		err_printf(m, "    %08x_%08x %8u %02x %02x %02x",
>   			   upper_32_bits(err->gtt_offset),
>   			   lower_32_bits(err->gtt_offset),
>   			   err->size,
>   			   err->read_domains,
> -			   err->write_domain);
> -		for (i = 0; i < I915_NUM_ENGINES; i++)
> -			err_printf(m, "%02x ", err->rseqno[i]);
> -
> -		err_printf(m, "] %02x", err->wseqno);
> +			   err->write_domain,
> +			   err->wseqno);
>   		err_puts(m, tiling_flag(err->tiling));
>   		err_puts(m, dirty_flag(err->dirty));
>   		err_puts(m, purgeable_flag(err->purgeable));
> @@ -1021,13 +1016,10 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>   		       struct i915_vma *vma)
>   {
>   	struct drm_i915_gem_object *obj = vma->obj;
> -	int i;
>   
>   	err->size = obj->base.size;
>   	err->name = obj->base.name;
>   
> -	for (i = 0; i < I915_NUM_ENGINES; i++)
> -		err->rseqno[i] = __active_get_seqno(&vma->last_read[i]);
>   	err->wseqno = __active_get_seqno(&obj->frontbuffer_write);
>   	err->engine = __active_get_engine_id(&obj->frontbuffer_write);
>   
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index 58910f1dc67c..f893a4e8b783 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -177,7 +177,7 @@ struct i915_gpu_state {
>   	struct drm_i915_error_buffer {
>   		u32 size;
>   		u32 name;
> -		u32 rseqno[I915_NUM_ENGINES], wseqno;
> +		u32 wseqno;
>   		u64 gtt_offset;
>   		u32 read_domains;
>   		u32 write_domain;
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index a355a081485f..e1c9365dfefb 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -380,6 +380,7 @@ static inline void
>   init_request_active(struct i915_gem_active *active,
>   		    i915_gem_retire_fn retire)
>   {
> +	RCU_INIT_POINTER(active->request, NULL);
>   	INIT_LIST_HEAD(&active->link);
>   	active->retire = retire ?: i915_gem_retire_noop;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index d0e606e9b27a..23852417dcbd 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -63,18 +63,18 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
>   
>   #endif
>   
> +struct i915_vma_active {
> +	struct i915_gem_active base;
> +	struct i915_vma *vma;
> +};
> +
>   static void
> -i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
> +__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
>   {
> -	const unsigned int idx = rq->engine->id;
> -	struct i915_vma *vma =
> -		container_of(active, struct i915_vma, last_read[idx]);
>   	struct drm_i915_gem_object *obj = vma->obj;
>   
> -	GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
> -
> -	i915_vma_clear_active(vma, idx);
> -	if (i915_vma_is_active(vma))
> +	GEM_BUG_ON(!i915_vma_is_active(vma));
> +	if (--vma->active_count)
>   		return;
>   
>   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> @@ -108,6 +108,19 @@ i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
>   	}
>   }
>   
> +static void
> +i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
> +{
> +	struct i915_vma_active *active =
> +		container_of(base, typeof(*active), base);
> +	struct i915_vma *vma = active->vma;
> +
> +	GEM_BUG_ON(base != radix_tree_lookup(&vma->active_rt,
> +					     rq->fence.context));
> +
> +	__i915_vma_retire(vma, rq);
> +}
> +
>   static struct i915_vma *
>   vma_create(struct drm_i915_gem_object *obj,
>   	   struct i915_address_space *vm,
> @@ -115,7 +128,6 @@ vma_create(struct drm_i915_gem_object *obj,
>   {
>   	struct i915_vma *vma;
>   	struct rb_node *rb, **p;
> -	int i;
>   
>   	/* The aliasing_ppgtt should never be used directly! */
>   	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
> @@ -124,8 +136,8 @@ vma_create(struct drm_i915_gem_object *obj,
>   	if (vma == NULL)
>   		return ERR_PTR(-ENOMEM);
>   
> -	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> -		init_request_active(&vma->last_read[i], i915_vma_retire);
> +	INIT_RADIX_TREE(&vma->active_rt, GFP_KERNEL);
> +
>   	init_request_active(&vma->last_fence, NULL);
>   	vma->vm = vm;
>   	vma->ops = &vm->vma_ops;
> @@ -778,13 +790,12 @@ void i915_vma_reopen(struct i915_vma *vma)
>   static void __i915_vma_destroy(struct i915_vma *vma)
>   {
>   	struct drm_i915_private *i915 = vma->vm->i915;
> -	int i;
> +	struct radix_tree_iter iter;
> +	void __rcu **slot;
>   
>   	GEM_BUG_ON(vma->node.allocated);
>   	GEM_BUG_ON(vma->fence);
>   
> -	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> -		GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i]));
>   	GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
>   
>   	list_del(&vma->obj_link);
> @@ -795,6 +806,17 @@ static void __i915_vma_destroy(struct i915_vma *vma)
>   	if (!i915_vma_is_ggtt(vma))
>   		i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
>   
> +	rcu_read_lock();
> +	radix_tree_for_each_slot(slot, &vma->active_rt, &iter, 0) {
> +		struct i915_vma_active *active = rcu_dereference_raw(*slot);
> +
> +		GEM_BUG_ON(i915_gem_active_isset(&active->base));
> +		kfree(active);
> +
> +		radix_tree_delete(&vma->active_rt, iter.index);
> +	}
> +	rcu_read_unlock();
> +
>   	kmem_cache_free(i915->vmas, vma);
>   }
>   
> @@ -859,9 +881,111 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
>   		list_del(&vma->obj->userfault_link);
>   }
>   
> +static void export_fence(struct i915_vma *vma,
> +			 struct i915_request *rq,
> +			 unsigned int flags)
> +{
> +	struct reservation_object *resv = vma->resv;
> +
> +	/*
> +	 * Ignore errors from failing to allocate the new fence, we can't
> +	 * handle an error right now. Worst case should be missed
> +	 * synchronisation leading to rendering corruption.
> +	 */
> +	reservation_object_lock(resv, NULL);
> +	if (flags & EXEC_OBJECT_WRITE)
> +		reservation_object_add_excl_fence(resv, &rq->fence);
> +	else if (reservation_object_reserve_shared(resv) == 0)
> +		reservation_object_add_shared_fence(resv, &rq->fence);
> +	reservation_object_unlock(resv);
> +}
> +
> +static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
> +{
> +	struct i915_vma_active *active;
> +	int err;
> +
> +	/*
> +	 * XXX Note that the radix_tree uses unsigned longs for it indices,
> +	 * a problem for us on i386 with 32bit longs. However, the likelihood
> +	 * of 2 timelines being used on the same VMA aliasing is minimal,
> +	 * and further reduced by that both timelines must be active
> +	 * simultaneously to confuse us.
> +	 */
> +	active = radix_tree_lookup(&vma->active_rt, idx);
> +	if (likely(active)) {
> +		GEM_BUG_ON(i915_gem_active_isset(&active->base) &&
> +			   idx != i915_gem_active_peek(&active->base,
> +						       &vma->vm->i915->drm.struct_mutex)->fence.context);
> +		return &active->base;
> +	}
> +
> +	active = kmalloc(sizeof(*active), GFP_KERNEL);
> +	if (unlikely(!active))
> +		return ERR_PTR(-ENOMEM);
> +
> +	init_request_active(&active->base, i915_vma_retire);
> +	active->vma = vma;
> +
> +	err = radix_tree_insert(&vma->active_rt, idx, active);
> +	if (unlikely(err)) {
> +		kfree(active);
> +		return ERR_PTR(err);
> +	}
> +
> +	return &active->base;
> +}
> +
> +int i915_vma_move_to_active(struct i915_vma *vma,
> +			    struct i915_request *rq,
> +			    unsigned int flags)
> +{
> +	struct drm_i915_gem_object *obj = vma->obj;
> +	struct i915_gem_active *active;
> +
> +	lockdep_assert_held(&rq->i915->drm.struct_mutex);
> +	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> +
> +	active = lookup_active(vma, rq->fence.context);

Never mind the radix tree, but fence.context is u64 as well. And 
assigned values are continuously incrementing so once >4G of contexts 
are created and destroyed aliasing is guaranteed with the kernel 
context, or any old one.

It is probably IGT abuse territory, but a) can we be sure it will not 
open up some exploit, and b) can we swallow this problem ourselves?

Regards,

Tvrtko

> +	if (IS_ERR(active))
> +		return PTR_ERR(active);
> +
> +	/*
> +	 * Add a reference if we're newly entering the active list.
> +	 * The order in which we add operations to the retirement queue is
> +	 * vital here: mark_active adds to the start of the callback list,
> +	 * such that subsequent callbacks are called first. Therefore we
> +	 * add the active reference first and queue for it to be dropped
> +	 * *last*.
> +	 */
> +	if (!i915_gem_active_isset(active) && !vma->active_count++) {
> +		list_move_tail(&vma->vm_link, &vma->vm->active_list);
> +		obj->active_count++;
> +	}
> +	i915_gem_active_set(active, rq);
> +	GEM_BUG_ON(!i915_vma_is_active(vma));
> +	GEM_BUG_ON(!obj->active_count);
> +
> +	obj->write_domain = 0;
> +	if (flags & EXEC_OBJECT_WRITE) {
> +		obj->write_domain = I915_GEM_DOMAIN_RENDER;
> +
> +		if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
> +			i915_gem_active_set(&obj->frontbuffer_write, rq);
> +
> +		obj->read_domains = 0;
> +	}
> +	obj->read_domains |= I915_GEM_GPU_DOMAINS;
> +
> +	if (flags & EXEC_OBJECT_NEEDS_FENCE)
> +		i915_gem_active_set(&vma->last_fence, rq);
> +
> +	export_fence(vma, rq, flags);
> +	return 0;
> +}
> +
>   int i915_vma_unbind(struct i915_vma *vma)
>   {
> -	unsigned long active;
>   	int ret;
>   
>   	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
> @@ -871,9 +995,9 @@ int i915_vma_unbind(struct i915_vma *vma)
>   	 * have side-effects such as unpinning or even unbinding this vma.
>   	 */
>   	might_sleep();
> -	active = i915_vma_get_active(vma);
> -	if (active) {
> -		int idx;
> +	if (i915_vma_is_active(vma)) {
> +		struct radix_tree_iter iter;
> +		void __rcu **slot;
>   
>   		/*
>   		 * When a closed VMA is retired, it is unbound - eek.
> @@ -890,18 +1014,24 @@ int i915_vma_unbind(struct i915_vma *vma)
>   		 */
>   		__i915_vma_pin(vma);
>   
> -		for_each_active(active, idx) {
> -			ret = i915_gem_active_retire(&vma->last_read[idx],
> +		rcu_read_lock();
> +		radix_tree_for_each_slot(slot, &vma->active_rt, &iter, 0) {
> +			struct i915_vma_active *active =
> +				rcu_dereference_raw(*slot);
> +			rcu_read_unlock();
> +
> +			ret = i915_gem_active_retire(&active->base,
>   						     &vma->vm->i915->drm.struct_mutex);
>   			if (ret)
> -				break;
> -		}
> +				goto unpin;
>   
> -		if (!ret) {
> -			ret = i915_gem_active_retire(&vma->last_fence,
> -						     &vma->vm->i915->drm.struct_mutex);
> +			rcu_read_lock();
>   		}
> +		rcu_read_unlock();
>   
> +		ret = i915_gem_active_retire(&vma->last_fence,
> +					     &vma->vm->i915->drm.struct_mutex);
> +unpin:
>   		__i915_vma_unpin(vma);
>   		if (ret)
>   			return ret;
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 66a228931517..94fdf4917e95 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -26,6 +26,7 @@
>   #define __I915_VMA_H__
>   
>   #include <linux/io-mapping.h>
> +#include <linux/radix-tree.h>
>   
>   #include <drm/drm_mm.h>
>   
> @@ -94,8 +95,8 @@ struct i915_vma {
>   #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
>   #define I915_VMA_GGTT_WRITE	BIT(12)
>   
> -	unsigned int active;
> -	struct i915_gem_active last_read[I915_NUM_ENGINES];
> +	unsigned int active_count;
> +	struct radix_tree_root active_rt;
>   	struct i915_gem_active last_fence;
>   
>   	/**
> @@ -138,6 +139,15 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
>   
>   void i915_vma_unpin_and_release(struct i915_vma **p_vma);
>   
> +static inline bool i915_vma_is_active(struct i915_vma *vma)
> +{
> +	return vma->active_count;
> +}
> +
> +int __must_check i915_vma_move_to_active(struct i915_vma *vma,
> +					 struct i915_request *rq,
> +					 unsigned int flags);
> +
>   static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
>   {
>   	return vma->flags & I915_VMA_GGTT;
> @@ -187,34 +197,6 @@ static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
>   	return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
>   }
>   
> -static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
> -{
> -	return vma->active;
> -}
> -
> -static inline bool i915_vma_is_active(const struct i915_vma *vma)
> -{
> -	return i915_vma_get_active(vma);
> -}
> -
> -static inline void i915_vma_set_active(struct i915_vma *vma,
> -				       unsigned int engine)
> -{
> -	vma->active |= BIT(engine);
> -}
> -
> -static inline void i915_vma_clear_active(struct i915_vma *vma,
> -					 unsigned int engine)
> -{
> -	vma->active &= ~BIT(engine);
> -}
> -
> -static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
> -					      unsigned int engine)
> -{
> -	return vma->active & BIT(engine);
> -}
> -
>   static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
>   {
>   	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 17/37] drm/i915: Track vma activity per fence.context, not per engine
  2018-06-29 14:54   ` Tvrtko Ursulin
@ 2018-06-29 15:03     ` Chris Wilson
  2018-06-29 15:34       ` Chris Wilson
  2018-06-29 15:08     ` Tvrtko Ursulin
  1 sibling, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2018-06-29 15:03 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-06-29 15:54:02)
> 
> On 29/06/2018 08:53, Chris Wilson wrote:
> > In the next patch, we will want to be able to use more flexible request
> > timelines that can hop between engines. From the vma pov, we can then
> > not rely on the binding of this request to an engine and so can not
> > ensure that different requests are ordered through a per-engine
> > timeline, and so we must track activity of all timelines. (We track
> > activity on the vma itself to prevent unbinding from HW before the HW
> > has finished accessing it.)
> > 
> > For now, let's just ignore the potential issue with trying to use 64b
> > indices with radixtrees on 32b machines, it's unlikely to be a problem
> > in practice...

> > +static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
> > +{
> > +     struct i915_vma_active *active;
> > +     int err;
> > +
> > +     /*
> > +      * XXX Note that the radix_tree uses unsigned longs for it indices,
> > +      * a problem for us on i386 with 32bit longs. However, the likelihood
> > +      * of 2 timelines being used on the same VMA aliasing is minimal,
> > +      * and further reduced by that both timelines must be active
> > +      * simultaneously to confuse us.
> > +      */
> > +     active = radix_tree_lookup(&vma->active_rt, idx);
> > +     if (likely(active)) {
> > +             GEM_BUG_ON(i915_gem_active_isset(&active->base) &&
> > +                        idx != i915_gem_active_peek(&active->base,
> > +                                                    &vma->vm->i915->drm.struct_mutex)->fence.context);
> > +             return &active->base;
> > +     }
> > +
> > +     active = kmalloc(sizeof(*active), GFP_KERNEL);
> > +     if (unlikely(!active))
> > +             return ERR_PTR(-ENOMEM);
> > +
> > +     init_request_active(&active->base, i915_vma_retire);
> > +     active->vma = vma;
> > +
> > +     err = radix_tree_insert(&vma->active_rt, idx, active);
> > +     if (unlikely(err)) {
> > +             kfree(active);
> > +             return ERR_PTR(err);
> > +     }
> > +
> > +     return &active->base;
> > +}
> > +
> > +int i915_vma_move_to_active(struct i915_vma *vma,
> > +                         struct i915_request *rq,
> > +                         unsigned int flags)
> > +{
> > +     struct drm_i915_gem_object *obj = vma->obj;
> > +     struct i915_gem_active *active;
> > +
> > +     lockdep_assert_held(&rq->i915->drm.struct_mutex);
> > +     GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> > +
> > +     active = lookup_active(vma, rq->fence.context);
> 
> Never mind the radix tree, but fence.context is u64 as well. And 
> assigned values are continuously incrementing so once >4G of contexts 
> are created and destroyed aliasing is guaranteed with the kernel 
> context, or any old one.

As I said, I don't think it matters because you need to alias active
fences and put a GEM_BUG_ON for you to hit. In the next patch, it
becomes ever harder to hit.

The alternative is yet another custom radixtree, and there is nothing
preventing us substituting one radixtree implementation for another here.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 17/37] drm/i915: Track vma activity per fence.context, not per engine
  2018-06-29 14:54   ` Tvrtko Ursulin
  2018-06-29 15:03     ` Chris Wilson
@ 2018-06-29 15:08     ` Tvrtko Ursulin
  2018-06-29 15:36       ` Chris Wilson
  1 sibling, 1 reply; 68+ messages in thread
From: Tvrtko Ursulin @ 2018-06-29 15:08 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 29/06/2018 15:54, Tvrtko Ursulin wrote:

[snip]

>> +int i915_vma_move_to_active(struct i915_vma *vma,
>> +                struct i915_request *rq,
>> +                unsigned int flags)
>> +{
>> +    struct drm_i915_gem_object *obj = vma->obj;
>> +    struct i915_gem_active *active;
>> +
>> +    lockdep_assert_held(&rq->i915->drm.struct_mutex);
>> +    GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>> +
>> +    active = lookup_active(vma, rq->fence.context);
> 
> Never mind the radix tree, but fence.context is u64 as well. And 
> assigned values are continuously incrementing so once >4G of contexts 
> are created and destroyed aliasing is guaranteed with the kernel 
> context, or any old one.
> 
> It is probably IGT abuse territory, but a) can we be sure it will not 
> open up some exploit, and b) can we swallow this problem ourselves?

Hm.. key the radix tree with the timeline pointer instead? 1:1 to 
fence.context, natural long, and automatic lifetime management.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 17/37] drm/i915: Track vma activity per fence.context, not per engine
  2018-06-29 15:03     ` Chris Wilson
@ 2018-06-29 15:34       ` Chris Wilson
  0 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29 15:34 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Chris Wilson (2018-06-29 16:03:31)
> Quoting Tvrtko Ursulin (2018-06-29 15:54:02)
> > 
> > On 29/06/2018 08:53, Chris Wilson wrote:
> > > In the next patch, we will want to be able to use more flexible request
> > > timelines that can hop between engines. From the vma pov, we can then
> > > not rely on the binding of this request to an engine and so can not
> > > ensure that different requests are ordered through a per-engine
> > > timeline, and so we must track activity of all timelines. (We track
> > > activity on the vma itself to prevent unbinding from HW before the HW
> > > has finished accessing it.)
> > > 
> > > For now, let's just ignore the potential issue with trying to use 64b
> > > indices with radixtrees on 32b machines, it's unlikely to be a problem
> > > in practice...
> 
> > > +static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
> > > +{
> > > +     struct i915_vma_active *active;
> > > +     int err;
> > > +
> > > +     /*
> > > +      * XXX Note that the radix_tree uses unsigned longs for it indices,
> > > +      * a problem for us on i386 with 32bit longs. However, the likelihood
> > > +      * of 2 timelines being used on the same VMA aliasing is minimal,
> > > +      * and further reduced by that both timelines must be active
> > > +      * simultaneously to confuse us.
> > > +      */
> > > +     active = radix_tree_lookup(&vma->active_rt, idx);
> > > +     if (likely(active)) {
> > > +             GEM_BUG_ON(i915_gem_active_isset(&active->base) &&
> > > +                        idx != i915_gem_active_peek(&active->base,
> > > +                                                    &vma->vm->i915->drm.struct_mutex)->fence.context);
> > > +             return &active->base;
> > > +     }
> > > +
> > > +     active = kmalloc(sizeof(*active), GFP_KERNEL);
> > > +     if (unlikely(!active))
> > > +             return ERR_PTR(-ENOMEM);
> > > +
> > > +     init_request_active(&active->base, i915_vma_retire);
> > > +     active->vma = vma;
> > > +
> > > +     err = radix_tree_insert(&vma->active_rt, idx, active);
> > > +     if (unlikely(err)) {
> > > +             kfree(active);
> > > +             return ERR_PTR(err);
> > > +     }
> > > +
> > > +     return &active->base;
> > > +}
> > > +
> > > +int i915_vma_move_to_active(struct i915_vma *vma,
> > > +                         struct i915_request *rq,
> > > +                         unsigned int flags)
> > > +{
> > > +     struct drm_i915_gem_object *obj = vma->obj;
> > > +     struct i915_gem_active *active;
> > > +
> > > +     lockdep_assert_held(&rq->i915->drm.struct_mutex);
> > > +     GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> > > +
> > > +     active = lookup_active(vma, rq->fence.context);
> > 
> > Never mind the radix tree, but fence.context is u64 as well. And 
> > assigned values are continuously incrementing so once >4G of contexts 
> > are created and destroyed aliasing is guaranteed with the kernel 
> > context, or any old one.
> 
> As I said, I don't think it matters because you need to alias active
> fences and put a GEM_BUG_ON for you to hit. In the next patch, it
> becomes ever harder to hit.
> 
> The alternative is yet another custom radixtree, and there is nothing
> preventing us substituting one radixtree implementation for another here.

Alternative to radixtree would be rbtree. With the cache that shouldn't
be too bad.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 17/37] drm/i915: Track vma activity per fence.context, not per engine
  2018-06-29 15:08     ` Tvrtko Ursulin
@ 2018-06-29 15:36       ` Chris Wilson
  2018-06-29 15:39         ` Chris Wilson
  0 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2018-06-29 15:36 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-06-29 16:08:40)
> 
> On 29/06/2018 15:54, Tvrtko Ursulin wrote:
> 
> [snip]
> 
> >> +int i915_vma_move_to_active(struct i915_vma *vma,
> >> +                struct i915_request *rq,
> >> +                unsigned int flags)
> >> +{
> >> +    struct drm_i915_gem_object *obj = vma->obj;
> >> +    struct i915_gem_active *active;
> >> +
> >> +    lockdep_assert_held(&rq->i915->drm.struct_mutex);
> >> +    GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> >> +
> >> +    active = lookup_active(vma, rq->fence.context);
> > 
> > Never mind the radix tree, but fence.context is u64 as well. And 
> > assigned values are continuously incrementing so once >4G of contexts 
> > are created and destroyed aliasing is guaranteed with the kernel 
> > context, or any old one.
> > 
> > It is probably IGT abuse territory, but a) can we be sure it will not 
> > open up some exploit, and b) can we swallow this problem ourselves?
> 
> Hm.. key the radix tree with the timeline pointer instead? 1:1 to 
> fence.context, natural long, and automatic lifetime management.

Lets see.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 17/37] drm/i915: Track vma activity per fence.context, not per engine
  2018-06-29 15:36       ` Chris Wilson
@ 2018-06-29 15:39         ` Chris Wilson
  2018-07-02  9:38           ` Tvrtko Ursulin
  0 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2018-06-29 15:39 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Chris Wilson (2018-06-29 16:36:22)
> Quoting Tvrtko Ursulin (2018-06-29 16:08:40)
> > 
> > On 29/06/2018 15:54, Tvrtko Ursulin wrote:
> > 
> > [snip]
> > 
> > >> +int i915_vma_move_to_active(struct i915_vma *vma,
> > >> +                struct i915_request *rq,
> > >> +                unsigned int flags)
> > >> +{
> > >> +    struct drm_i915_gem_object *obj = vma->obj;
> > >> +    struct i915_gem_active *active;
> > >> +
> > >> +    lockdep_assert_held(&rq->i915->drm.struct_mutex);
> > >> +    GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> > >> +
> > >> +    active = lookup_active(vma, rq->fence.context);
> > > 
> > > Never mind the radix tree, but fence.context is u64 as well. And 
> > > assigned values are continuously incrementing so once >4G of contexts 
> > > are created and destroyed aliasing is guaranteed with the kernel 
> > > context, or any old one.
> > > 
> > > It is probably IGT abuse territory, but a) can we be sure it will not 
> > > open up some exploit, and b) can we swallow this problem ourselves?
> > 
> > Hm.. key the radix tree with the timeline pointer instead? 1:1 to 
> > fence.context, natural long, and automatic lifetime management.
> 
> Lets see.

Counter argument is density. timeline pointers are going to be much
sparser than my expectations around context id (even with the silly
incrementing u64).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* [PATCH v2] drm/i915: Track the last-active inside the i915_vma
  2018-06-29  7:53 ` [PATCH 18/37] drm/i915: Track the last-active inside the i915_vma Chris Wilson
@ 2018-06-29 22:01   ` Chris Wilson
  0 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29 22:01 UTC (permalink / raw)
  To: intel-gfx

Using a VMA on more than one timeline concurrently is the exception
rather than the rule (using it concurrently on multiple engines). As we
expect to only use one active tracker, store the most recently used
tracker inside the i915_vma itself and only fallback to the radixtree if
we need a second or more concurrent active trackers.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_vma.c | 36 +++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_vma.h |  1 +
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 2faad2a1d00e..9b69d24c5cf1 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -119,6 +119,12 @@ i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
 	__i915_vma_retire(active->vma, rq);
 }
 
+static void
+i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+	__i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
+}
+
 static struct i915_vma *
 vma_create(struct drm_i915_gem_object *obj,
 	   struct i915_address_space *vm,
@@ -136,6 +142,7 @@ vma_create(struct drm_i915_gem_object *obj,
 
 	vma->active = RB_ROOT;
 
+	init_request_active(&vma->last_active, i915_vma_last_retire);
 	init_request_active(&vma->last_fence, NULL);
 	vma->vm = vm;
 	vma->ops = &vm->vma_ops;
@@ -895,6 +902,15 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
 {
 	struct i915_vma_active *active;
 	struct rb_node **p, *parent;
+	struct i915_request *old;
+
+	old = i915_gem_active_raw(&vma->last_active,
+				  &vma->vm->i915->drm.struct_mutex);
+	if (!old || old->fence.context == idx)
+		goto out;
+
+	/* Move the currently active fence into the rbtree */
+	idx = old->fence.context;
 
 	parent = NULL;
 	p = &vma->active.rb_node;
@@ -903,7 +919,7 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
 
 		active = rb_entry(parent, struct i915_vma_active, node);
 		if (active->timeline == idx)
-			return &active->base;
+			goto replace;
 
 		if (active->timeline < idx)
 			p = &parent->rb_right;
@@ -922,7 +938,18 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
 	rb_link_node(&active->node, parent, p);
 	rb_insert_color(&active->node, &vma->active);
 
-	return &active->base;
+replace:
+	if (i915_gem_active_isset(&active->base)) {
+		__list_del_entry(&active->base.link);
+		vma->active_count--;
+		GEM_BUG_ON(!vma->active_count);
+	}
+	GEM_BUG_ON(list_empty(&vma->last_active.link));
+	list_replace_init(&vma->last_active.link, &active->base.link);
+	active->base.request = fetch_and_zero(&vma->last_active.request);
+
+out:
+	return &vma->last_active;
 }
 
 int i915_vma_move_to_active(struct i915_vma *vma,
@@ -1002,6 +1029,11 @@ int i915_vma_unbind(struct i915_vma *vma)
 		 */
 		__i915_vma_pin(vma);
 
+		ret = i915_gem_active_retire(&vma->last_active,
+					     &vma->vm->i915->drm.struct_mutex);
+		if (ret)
+			goto unpin;
+
 		rbtree_postorder_for_each_entry_safe(active, n,
 						     &vma->active, node) {
 			ret = i915_gem_active_retire(&active->base,
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index c297b0a0dc47..f06d66377107 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -97,6 +97,7 @@ struct i915_vma {
 
 	unsigned int active_count;
 	struct rb_root active;
+	struct i915_gem_active last_active;
 	struct i915_gem_active last_fence;
 
 	/**
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH v2] drm/i915: Track vma activity per fence.context, not per engine
  2018-06-29  7:53 ` [PATCH 17/37] drm/i915: Track vma activity per fence.context, not per engine Chris Wilson
  2018-06-29 14:54   ` Tvrtko Ursulin
@ 2018-06-29 22:03   ` Chris Wilson
  1 sibling, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2018-06-29 22:03 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will want to be able to use more flexible request
timelines that can hop between engines. From the vma pov, we can then
not rely on the binding of this request to an engine and so can not
ensure that different requests are ordered through a per-engine
timeline, and so we must track activity of all timelines. (We track
activity on the vma itself to prevent unbinding from HW before the HW
has finished accessing it.)

v2: Switch to a rbtree for 32b safety (since using u64 as a radixtree
index is fraught with aliasing of unsigned longs).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c   |   3 -
 drivers/gpu/drm/i915/i915_gpu_error.c |  14 +---
 drivers/gpu/drm/i915/i915_gpu_error.h |   2 +-
 drivers/gpu/drm/i915/i915_request.h   |   1 +
 drivers/gpu/drm/i915/i915_vma.c       | 112 +++++++++++++++++++-------
 drivers/gpu/drm/i915/i915_vma.h       |  46 +++--------
 6 files changed, 98 insertions(+), 80 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index f46d873a7530..52e7bed477c2 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2006,7 +2006,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 	struct drm_i915_private *i915 = ppgtt->base.vm.i915;
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct i915_vma *vma;
-	int i;
 
 	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(size > ggtt->vm.total);
@@ -2015,8 +2014,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 	if (!vma)
 		return ERR_PTR(-ENOMEM);
 
-	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
-		init_request_active(&vma->last_read[i], NULL);
 	init_request_active(&vma->last_fence, NULL);
 
 	vma->vm = &ggtt->vm;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index df524c9cad40..8c81cf3aa182 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -335,21 +335,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
 				struct drm_i915_error_buffer *err,
 				int count)
 {
-	int i;
-
 	err_printf(m, "%s [%d]:\n", name, count);
 
 	while (count--) {
-		err_printf(m, "    %08x_%08x %8u %02x %02x [ ",
+		err_printf(m, "    %08x_%08x %8u %02x %02x %02x",
 			   upper_32_bits(err->gtt_offset),
 			   lower_32_bits(err->gtt_offset),
 			   err->size,
 			   err->read_domains,
-			   err->write_domain);
-		for (i = 0; i < I915_NUM_ENGINES; i++)
-			err_printf(m, "%02x ", err->rseqno[i]);
-
-		err_printf(m, "] %02x", err->wseqno);
+			   err->write_domain,
+			   err->wseqno);
 		err_puts(m, tiling_flag(err->tiling));
 		err_puts(m, dirty_flag(err->dirty));
 		err_puts(m, purgeable_flag(err->purgeable));
@@ -1021,13 +1016,10 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 		       struct i915_vma *vma)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
-	int i;
 
 	err->size = obj->base.size;
 	err->name = obj->base.name;
 
-	for (i = 0; i < I915_NUM_ENGINES; i++)
-		err->rseqno[i] = __active_get_seqno(&vma->last_read[i]);
 	err->wseqno = __active_get_seqno(&obj->frontbuffer_write);
 	err->engine = __active_get_engine_id(&obj->frontbuffer_write);
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 58910f1dc67c..f893a4e8b783 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -177,7 +177,7 @@ struct i915_gpu_state {
 	struct drm_i915_error_buffer {
 		u32 size;
 		u32 name;
-		u32 rseqno[I915_NUM_ENGINES], wseqno;
+		u32 wseqno;
 		u64 gtt_offset;
 		u32 read_domains;
 		u32 write_domain;
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index a355a081485f..e1c9365dfefb 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -380,6 +380,7 @@ static inline void
 init_request_active(struct i915_gem_active *active,
 		    i915_gem_retire_fn retire)
 {
+	RCU_INIT_POINTER(active->request, NULL);
 	INIT_LIST_HEAD(&active->link);
 	active->retire = retire ?: i915_gem_retire_noop;
 }
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 7635c27e7e8b..2faad2a1d00e 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -63,18 +63,20 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
 
 #endif
 
+struct i915_vma_active {
+	struct i915_gem_active base;
+	struct i915_vma *vma;
+	struct rb_node node;
+	u64 timeline;
+};
+
 static void
-i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
+__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
 {
-	const unsigned int idx = rq->engine->id;
-	struct i915_vma *vma =
-		container_of(active, struct i915_vma, last_read[idx]);
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
-
-	i915_vma_clear_active(vma, idx);
-	if (i915_vma_is_active(vma))
+	GEM_BUG_ON(!i915_vma_is_active(vma));
+	if (--vma->active_count)
 		return;
 
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
@@ -108,6 +110,15 @@ i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
 	}
 }
 
+static void
+i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+	struct i915_vma_active *active =
+		container_of(base, typeof(*active), base);
+
+	__i915_vma_retire(active->vma, rq);
+}
+
 static struct i915_vma *
 vma_create(struct drm_i915_gem_object *obj,
 	   struct i915_address_space *vm,
@@ -115,7 +126,6 @@ vma_create(struct drm_i915_gem_object *obj,
 {
 	struct i915_vma *vma;
 	struct rb_node *rb, **p;
-	int i;
 
 	/* The aliasing_ppgtt should never be used directly! */
 	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
@@ -124,8 +134,8 @@ vma_create(struct drm_i915_gem_object *obj,
 	if (vma == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
-		init_request_active(&vma->last_read[i], i915_vma_retire);
+	vma->active = RB_ROOT;
+
 	init_request_active(&vma->last_fence, NULL);
 	vma->vm = vm;
 	vma->ops = &vm->vma_ops;
@@ -778,13 +788,11 @@ void i915_vma_reopen(struct i915_vma *vma)
 static void __i915_vma_destroy(struct i915_vma *vma)
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
-	int i;
+	struct i915_vma_active *iter, *n;
 
 	GEM_BUG_ON(vma->node.allocated);
 	GEM_BUG_ON(vma->fence);
 
-	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
-		GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i]));
 	GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
 
 	list_del(&vma->obj_link);
@@ -795,6 +803,11 @@ static void __i915_vma_destroy(struct i915_vma *vma)
 	if (!i915_vma_is_ggtt(vma))
 		i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
 
+	rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
+		GEM_BUG_ON(i915_gem_active_isset(&iter->base));
+		kfree(iter);
+	}
+
 	kmem_cache_free(i915->vmas, vma);
 }
 
@@ -878,16 +891,54 @@ static void export_fence(struct i915_vma *vma,
 	reservation_object_unlock(resv);
 }
 
+static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
+{
+	struct i915_vma_active *active;
+	struct rb_node **p, *parent;
+
+	parent = NULL;
+	p = &vma->active.rb_node;
+	while (*p) {
+		parent = *p;
+
+		active = rb_entry(parent, struct i915_vma_active, node);
+		if (active->timeline == idx)
+			return &active->base;
+
+		if (active->timeline < idx)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+
+	active = kmalloc(sizeof(*active), GFP_KERNEL);
+	if (unlikely(!active))
+		return ERR_PTR(-ENOMEM);
+
+	init_request_active(&active->base, i915_vma_retire);
+	active->vma = vma;
+	active->timeline = idx;
+
+	rb_link_node(&active->node, parent, p);
+	rb_insert_color(&active->node, &vma->active);
+
+	return &active->base;
+}
+
 int i915_vma_move_to_active(struct i915_vma *vma,
 			    struct i915_request *rq,
 			    unsigned int flags)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
-	const unsigned int idx = rq->engine->id;
+	struct i915_gem_active *active;
 
 	lockdep_assert_held(&rq->i915->drm.struct_mutex);
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
+	active = lookup_active(vma, rq->fence.context);
+	if (IS_ERR(active))
+		return PTR_ERR(active);
+
 	/*
 	 * Add a reference if we're newly entering the active list.
 	 * The order in which we add operations to the retirement queue is
@@ -896,11 +947,13 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	if (!i915_vma_is_active(vma))
+	if (!i915_gem_active_isset(active) && !vma->active_count++) {
+		list_move_tail(&vma->vm_link, &vma->vm->active_list);
 		obj->active_count++;
-	i915_vma_set_active(vma, idx);
-	i915_gem_active_set(&vma->last_read[idx], rq);
-	list_move_tail(&vma->vm_link, &vma->vm->active_list);
+	}
+	i915_gem_active_set(active, rq);
+	GEM_BUG_ON(!i915_vma_is_active(vma));
+	GEM_BUG_ON(!obj->active_count);
 
 	obj->write_domain = 0;
 	if (flags & EXEC_OBJECT_WRITE) {
@@ -922,7 +975,6 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 
 int i915_vma_unbind(struct i915_vma *vma)
 {
-	unsigned long active;
 	int ret;
 
 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
@@ -932,9 +984,8 @@ int i915_vma_unbind(struct i915_vma *vma)
 	 * have side-effects such as unpinning or even unbinding this vma.
 	 */
 	might_sleep();
-	active = i915_vma_get_active(vma);
-	if (active) {
-		int idx;
+	if (i915_vma_is_active(vma)) {
+		struct i915_vma_active *active, *n;
 
 		/*
 		 * When a closed VMA is retired, it is unbound - eek.
@@ -951,18 +1002,17 @@ int i915_vma_unbind(struct i915_vma *vma)
 		 */
 		__i915_vma_pin(vma);
 
-		for_each_active(active, idx) {
-			ret = i915_gem_active_retire(&vma->last_read[idx],
+		rbtree_postorder_for_each_entry_safe(active, n,
+						     &vma->active, node) {
+			ret = i915_gem_active_retire(&active->base,
 						     &vma->vm->i915->drm.struct_mutex);
 			if (ret)
-				break;
-		}
-
-		if (!ret) {
-			ret = i915_gem_active_retire(&vma->last_fence,
-						     &vma->vm->i915->drm.struct_mutex);
+				goto unpin;
 		}
 
+		ret = i915_gem_active_retire(&vma->last_fence,
+					     &vma->vm->i915->drm.struct_mutex);
+unpin:
 		__i915_vma_unpin(vma);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index a218b689e418..c297b0a0dc47 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -26,6 +26,7 @@
 #define __I915_VMA_H__
 
 #include <linux/io-mapping.h>
+#include <linux/rbtree.h>
 
 #include <drm/drm_mm.h>
 
@@ -94,8 +95,8 @@ struct i915_vma {
 #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
 #define I915_VMA_GGTT_WRITE	BIT(12)
 
-	unsigned int active;
-	struct i915_gem_active last_read[I915_NUM_ENGINES];
+	unsigned int active_count;
+	struct rb_root active;
 	struct i915_gem_active last_fence;
 
 	/**
@@ -138,6 +139,15 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 
 void i915_vma_unpin_and_release(struct i915_vma **p_vma);
 
+static inline bool i915_vma_is_active(struct i915_vma *vma)
+{
+	return vma->active_count;
+}
+
+int __must_check i915_vma_move_to_active(struct i915_vma *vma,
+					 struct i915_request *rq,
+					 unsigned int flags);
+
 static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
 {
 	return vma->flags & I915_VMA_GGTT;
@@ -187,38 +197,6 @@ static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
 	return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
 }
 
-static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
-{
-	return vma->active;
-}
-
-static inline bool i915_vma_is_active(const struct i915_vma *vma)
-{
-	return i915_vma_get_active(vma);
-}
-
-static inline void i915_vma_set_active(struct i915_vma *vma,
-				       unsigned int engine)
-{
-	vma->active |= BIT(engine);
-}
-
-static inline void i915_vma_clear_active(struct i915_vma *vma,
-					 unsigned int engine)
-{
-	vma->active &= ~BIT(engine);
-}
-
-static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
-					      unsigned int engine)
-{
-	return vma->active & BIT(engine);
-}
-
-int __must_check i915_vma_move_to_active(struct i915_vma *vma,
-					 struct i915_request *rq,
-					 unsigned int flags);
-
 static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode (rev3)
  2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
                   ` (39 preceding siblings ...)
  2018-06-29 11:53 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2018-06-29 22:38 ` Patchwork
  40 siblings, 0 replies; 68+ messages in thread
From: Patchwork @ 2018-06-29 22:38 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode (rev3)
URL   : https://patchwork.freedesktop.org/series/45638/
State : failure

== Summary ==

Applying: drm/i915/gtt: Add read only pages to gen8_pte_encode
Applying: drm/i915/gtt: Read-only pages for insert_entries on bdw+
Applying: drm/i915: Prevent writing into a read-only object via a GGTT mmap
Applying: drm/i915: Reject attempted pwrites into a read-only object
Applying: drm/i915/userptr: Enable read-only support on gen8+
Applying: drm/i915: Move rate-limiting request retire to after submission
Applying: drm/i915: Move engine request retirement to intel_engine_cs
Applying: drm/i915: Hold request reference for submission until retirement
Applying: drm/i915/execlists: Switch to rb_root_cached
Applying: drm/i915: Reserve some priority bits for internal use
Applying: drm/i915: Combine multiple internal plists into the same i915_priolist bucket
Applying: drm/i915: Priority boost for new clients
Applying: drm/i915: Priority boost switching to an idle ring
Applying: drm/i915: Refactor export_fence() after i915_vma_move_to_active()
Using index info to reconstruct a base tree...
M	drivers/gpu/drm/i915/selftests/i915_gem_context.c
Falling back to patching base and 3-way merge...
Auto-merging drivers/gpu/drm/i915/selftests/i915_gem_context.c
Applying: drm/i915: Export i915_request_skip()
Applying: drm/i915: Start returning an error from i915_vma_move_to_active()
Using index info to reconstruct a base tree...
M	drivers/gpu/drm/i915/i915_drv.h
M	drivers/gpu/drm/i915/selftests/i915_gem_context.c
M	drivers/gpu/drm/i915/selftests/i915_gem_object.c
Falling back to patching base and 3-way merge...
Auto-merging drivers/gpu/drm/i915/selftests/i915_gem_object.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/selftests/i915_gem_object.c
Auto-merging drivers/gpu/drm/i915/selftests/i915_gem_context.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/selftests/i915_gem_context.c
Auto-merging drivers/gpu/drm/i915/i915_drv.h
error: Failed to merge in the changes.
Patch failed at 0016 drm/i915: Start returning an error from i915_vma_move_to_active()
Use 'git am --show-current-patch' to see the failed patch
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 17/37] drm/i915: Track vma activity per fence.context, not per engine
  2018-06-29 15:39         ` Chris Wilson
@ 2018-07-02  9:38           ` Tvrtko Ursulin
  0 siblings, 0 replies; 68+ messages in thread
From: Tvrtko Ursulin @ 2018-07-02  9:38 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 29/06/2018 16:39, Chris Wilson wrote:
> Quoting Chris Wilson (2018-06-29 16:36:22)
>> Quoting Tvrtko Ursulin (2018-06-29 16:08:40)
>>>
>>> On 29/06/2018 15:54, Tvrtko Ursulin wrote:
>>>
>>> [snip]
>>>
>>>>> +int i915_vma_move_to_active(struct i915_vma *vma,
>>>>> +                struct i915_request *rq,
>>>>> +                unsigned int flags)
>>>>> +{
>>>>> +    struct drm_i915_gem_object *obj = vma->obj;
>>>>> +    struct i915_gem_active *active;
>>>>> +
>>>>> +    lockdep_assert_held(&rq->i915->drm.struct_mutex);
>>>>> +    GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>>>>> +
>>>>> +    active = lookup_active(vma, rq->fence.context);
>>>>
>>>> Never mind the radix tree, but fence.context is u64 as well. And
>>>> assigned values are continuously incrementing so once >4G of contexts
>>>> are created and destroyed aliasing is guaranteed with the kernel
>>>> context, or any old one.
>>>>
>>>> It is probably IGT abuse territory, but a) can we be sure it will not
>>>> open up some exploit, and b) can we swallow this problem ourselves?
>>>
>>> Hm.. key the radix tree with the timeline pointer instead? 1:1 to
>>> fence.context, natural long, and automatic lifetime management.
>>
>> Lets see.
> 
> Counter argument is density. timeline pointers are going to be much
> sparser than my expectations around context id (even with the silly
> incrementing u64).

Or maybe something built from the basis of i915_syncmap? Same keys and 
same lifetime, just the stored data is different.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 12/37] drm/i915: Priority boost for new clients
  2018-06-29 11:10           ` Tvrtko Ursulin
@ 2018-07-02 10:19             ` Tvrtko Ursulin
  0 siblings, 0 replies; 68+ messages in thread
From: Tvrtko Ursulin @ 2018-07-02 10:19 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 29/06/2018 12:10, Tvrtko Ursulin wrote:
> 
> On 29/06/2018 11:51, Chris Wilson wrote:
>> Quoting Tvrtko Ursulin (2018-06-29 11:36:50)
>>>
>>> On 29/06/2018 11:09, Chris Wilson wrote:
>>>> Quoting Tvrtko Ursulin (2018-06-29 11:04:36)
>>>>>
>>>>> On 29/06/2018 08:53, Chris Wilson wrote:
>>>>>> Taken from an idea used for FQ_CODEL, we give new request flows a
>>>>>> priority boost. These flows are likely to correspond with interactive
>>>>>> tasks and so be more latency sensitive than the long queues. As 
>>>>>> soon as
>>>>>> the client has more than one request in the queue, further 
>>>>>> requests are
>>>>>> not boosted and it settles down into ordinary steady state behaviour.
>>>>>> Such small kicks dramatically help combat the starvation issue, by
>>>>>> allowing each client the opportunity to run even when the system is
>>>>>> under heavy throughput load (within the constraints of the user
>>>>>> selected priority).
>>>>>
>>>>> Any effect on non-micro benchmarks to mention in the commit message as
>>>>> the selling point?
>>>>
>>>> Desktop interactivity, subjective.
>>>> wsim showed a major impact
>>>>>> Testcase: igt/benchmarks/rrul
>>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>>>>> ---
>>>>>>     drivers/gpu/drm/i915/i915_request.c   | 16 ++++++++++++++--
>>>>>>     drivers/gpu/drm/i915/i915_scheduler.h |  4 +++-
>>>>>>     2 files changed, 17 insertions(+), 3 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/i915/i915_request.c 
>>>>>> b/drivers/gpu/drm/i915/i915_request.c
>>>>>> index 14bf0be6f994..2d7a785dd88c 100644
>>>>>> --- a/drivers/gpu/drm/i915/i915_request.c
>>>>>> +++ b/drivers/gpu/drm/i915/i915_request.c
>>>>>> @@ -1052,8 +1052,20 @@ void i915_request_add(struct i915_request 
>>>>>> *request)
>>>>>>          */
>>>>>>         local_bh_disable();
>>>>>>         rcu_read_lock(); /* RCU serialisation for set-wedged 
>>>>>> protection */
>>>>>> -     if (engine->schedule)
>>>>>> -             engine->schedule(request, 
>>>>>> &request->gem_context->sched);
>>>>>> +     if (engine->schedule) {
>>>>>> +             struct i915_sched_attr attr = 
>>>>>> request->gem_context->sched;
>>>>>> +
>>>>>> +             /*
>>>>>> +              * Boost priorities to new clients (new request flows).
>>>>>> +              *
>>>>>> +              * Allow interactive/synchronous clients to jump 
>>>>>> ahead of
>>>>>> +              * the bulk clients. (FQ_CODEL)
>>>>>> +              */
>>>>>> +             if (!prev || i915_request_completed(prev))
>>>>>> +                     attr.priority |= I915_PRIORITY_NEWCLIENT;
>>>>>
>>>>> Now a "lucky" client can always get higher priority an keep preempting
>>>>> everyone else by just timing it's submissions right. So I have big
>>>>> reservations on this one.
>>>>
>>>> Lucky being someone who is _idle_, everyone else being steady state. 
>>>> You
>>>> can't keep getting lucky and stealing the show.
>>>
>>> Why couldn't it? All it is needed is to send a new execbuf after the
>>> previous has completed.
>>>
>>> 1. First ctx A eb -> priority boost
>>> 2. Other people get back in and start executing
>>> 3. Another ctx A eb -> first has completed -> another priority boost ->
>>> work from 2) is preempted
>>> 4. Goto 2.
>>
>> So you have one client spinning, it's going to win most races and starve
>> the system, simply by owning struct_mutex. We give the other starving
>> steady-state clients an opportunity to submit ahead of the spinner when
>> they come to resubmit.
> 
> What do you mean by spinning and owning struct_mutex?
> 
> I was thinking for example the client A sending a 1ms batch and client B 
> executing a 10ms batch.
> 
> If client A keeps submitting its batch at 1ms intervals when will client 
> B complete?

I was thinking over the weekend that a scheme where we check that at 
least something from a different timeline has been completed on an 
engine might be acceptable. Which effectively may be just checking that 
a greater global seqno than the previous request on this timeline has 
been completed.

But then there is a problem with the virtual engine which would make the 
test too strict - it could miss to apply the new client boost depending 
on which physical engine the request got assigned to. Unless we take the 
position that too strict is better than too lax.

Secondary problem is about the dependency chains. Just because 
client/request is classified as new on an engine, doesn't mean it is OK 
to boost all its dependencies.

But applying the "something executed since its last submission = new 
client" which would look at the whole dependency chain feels perhaps to 
complicated. It would require a pass over the dep list and checking with 
this criteria on all involved engines before new client boost could be 
applied.

Thoughts?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 09/37] drm/i915/execlists: Switch to rb_root_cached
  2018-06-29  7:53 ` [PATCH 09/37] drm/i915/execlists: Switch to rb_root_cached Chris Wilson
@ 2018-07-11 13:20   ` Tvrtko Ursulin
  0 siblings, 0 replies; 68+ messages in thread
From: Tvrtko Ursulin @ 2018-07-11 13:20 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 29/06/2018 08:53, Chris Wilson wrote:
> The kernel recently gained an augmented rbtree with the purpose of
> cacheing the leftmost element of the rbtree, a frequent optimisation to
> avoid calls to rb_first() which is also employed by the
> execlists->queue. Switch from our open-coded cache to the library.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_engine_cs.c      |  7 ++---
>   drivers/gpu/drm/i915/intel_guc_submission.c | 12 +++-----
>   drivers/gpu/drm/i915/intel_lrc.c            | 32 +++++++--------------
>   drivers/gpu/drm/i915/intel_ringbuffer.h     |  7 +----
>   4 files changed, 19 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 457003311b74..14d5b673fc27 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -464,8 +464,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
>   	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
>   
>   	execlists->queue_priority = INT_MIN;
> -	execlists->queue = RB_ROOT;
> -	execlists->first = NULL;
> +	execlists->queue = RB_ROOT_CACHED;
>   }
>   
>   /**
> @@ -1000,7 +999,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
>   	}
>   
>   	/* ELSP is empty, but there are ready requests? E.g. after reset */
> -	if (READ_ONCE(engine->execlists.first))
> +	if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root))
>   		return false;
>   
>   	/* Ring stopped? */
> @@ -1615,7 +1614,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   	last = NULL;
>   	count = 0;
>   	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
> -	for (rb = execlists->first; rb; rb = rb_next(rb)) {
> +	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
>   		struct i915_priolist *p =
>   			rb_entry(rb, typeof(*p), node);
>   
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 05449f636d94..9a2c6856a71e 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -694,9 +694,6 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
>   
>   	lockdep_assert_held(&engine->timeline.lock);
>   
> -	rb = execlists->first;
> -	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
> -
>   	if (port_isset(port)) {
>   		if (intel_engine_has_preemption(engine)) {
>   			struct guc_preempt_work *preempt_work =
> @@ -718,7 +715,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
>   	}
>   	GEM_BUG_ON(port_isset(port));
>   
> -	while (rb) {
> +	while ((rb = rb_first_cached(&execlists->queue))) {
>   		struct i915_priolist *p = to_priolist(rb);
>   		struct i915_request *rq, *rn;
>   
> @@ -743,15 +740,13 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
>   			submit = true;
>   		}
>   
> -		rb = rb_next(rb);
> -		rb_erase(&p->node, &execlists->queue);
> +		rb_erase_cached(&p->node, &execlists->queue);
>   		INIT_LIST_HEAD(&p->requests);
>   		if (p->priority != I915_PRIORITY_NORMAL)
>   			kmem_cache_free(engine->i915->priorities, p);
>   	}
>   done:
>   	execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
> -	execlists->first = rb;
>   	if (submit)
>   		port_assign(port, last);
>   	if (last)
> @@ -760,7 +755,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
>   	/* We must always keep the beast fed if we have work piled up */
>   	GEM_BUG_ON(port_isset(execlists->port) &&
>   		   !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
> -	GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
> +	GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
> +		   !port_isset(execlists->port));
>   
>   	return submit;
>   }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index a6bc50d7195e..422753c8641f 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -273,7 +273,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio)
>   find_priolist:
>   	/* most positive priority is scheduled first, equal priorities fifo */
>   	rb = NULL;
> -	parent = &execlists->queue.rb_node;
> +	parent = &execlists->queue.rb_root.rb_node;
>   	while (*parent) {
>   		rb = *parent;
>   		p = to_priolist(rb);
> @@ -311,10 +311,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio)
>   	p->priority = prio;
>   	INIT_LIST_HEAD(&p->requests);
>   	rb_link_node(&p->node, rb, parent);
> -	rb_insert_color(&p->node, &execlists->queue);
> -
> -	if (first)
> -		execlists->first = &p->node;
> +	rb_insert_color_cached(&p->node, &execlists->queue, first);
>   
>   	return p;
>   }
> @@ -598,9 +595,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	 * and context switches) submission.
>   	 */
>   
> -	rb = execlists->first;
> -	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
> -
>   	if (last) {
>   		/*
>   		 * Don't resubmit or switch until all outstanding
> @@ -662,7 +656,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		last->tail = last->wa_tail;
>   	}
>   
> -	while (rb) {
> +	while ((rb = rb_first_cached(&execlists->queue))) {
>   		struct i915_priolist *p = to_priolist(rb);
>   		struct i915_request *rq, *rn;
>   
> @@ -721,8 +715,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   			submit = true;
>   		}
>   
> -		rb = rb_next(rb);
> -		rb_erase(&p->node, &execlists->queue);
> +		rb_erase_cached(&p->node, &execlists->queue);
>   		INIT_LIST_HEAD(&p->requests);
>   		if (p->priority != I915_PRIORITY_NORMAL)
>   			kmem_cache_free(engine->i915->priorities, p);
> @@ -748,14 +741,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	execlists->queue_priority =
>   		port != execlists->port ? rq_prio(last) : INT_MIN;
>   
> -	execlists->first = rb;
>   	if (submit) {
>   		port_assign(port, last);
>   		execlists_submit_ports(engine);
>   	}
>   
>   	/* We must always keep the beast fed if we have work piled up */
> -	GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
> +	GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
> +		   !port_isset(execlists->port));
>   
>   	/* Re-evaluate the executing context setup after each preemptive kick */
>   	if (last)
> @@ -915,8 +908,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>   	}
>   
>   	/* Flush the queued requests to the timeline list (for retiring). */
> -	rb = execlists->first;
> -	while (rb) {
> +	while ((rb = rb_first_cached(&execlists->queue))) {
>   		struct i915_priolist *p = to_priolist(rb);
>   
>   		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
> @@ -926,8 +918,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>   			__i915_request_submit(rq);
>   		}
>   
> -		rb = rb_next(rb);
> -		rb_erase(&p->node, &execlists->queue);
> +		rb_erase_cached(&p->node, &execlists->queue);
>   		INIT_LIST_HEAD(&p->requests);
>   		if (p->priority != I915_PRIORITY_NORMAL)
>   			kmem_cache_free(engine->i915->priorities, p);
> @@ -936,8 +927,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>   	/* Remaining _unready_ requests will be nop'ed when submitted */
>   
>   	execlists->queue_priority = INT_MIN;
> -	execlists->queue = RB_ROOT;
> -	execlists->first = NULL;
> +	execlists->queue = RB_ROOT_CACHED;
>   	GEM_BUG_ON(port_isset(execlists->port));
>   
>   	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> @@ -1183,7 +1173,7 @@ static void execlists_submit_request(struct i915_request *request)
>   
>   	queue_request(engine, &request->sched, rq_prio(request));
>   
> -	GEM_BUG_ON(!engine->execlists.first);
> +	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
>   	GEM_BUG_ON(list_empty(&request->sched.link));
>   
>   	submit_queue(engine, rq_prio(request));
> @@ -2036,7 +2026,7 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   
>   	/* After a GPU reset, we may have requests to replay */
> -	if (execlists->first)
> +	if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
>   		tasklet_schedule(&execlists->tasklet);
>   
>   	/*
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index a1aff360d0ce..923875500828 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -292,12 +292,7 @@ struct intel_engine_execlists {
>   	/**
>   	 * @queue: queue of requests, in priority lists
>   	 */
> -	struct rb_root queue;
> -
> -	/**
> -	 * @first: leftmost level in priority @queue
> -	 */
> -	struct rb_node *first;
> +	struct rb_root_cached queue;
>   
>   	/**
>   	 * @csb_read: control register for Context Switch buffer
> 

All checks out AFAICT. Nice that we can now simplify!

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

end of thread, other threads:[~2018-07-11 13:20 UTC | newest]

Thread overview: 68+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-29  7:53 [PATCH 01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Chris Wilson
2018-06-29  7:53 ` [PATCH 02/37] drm/i915/gtt: Read-only pages for insert_entries on bdw+ Chris Wilson
2018-06-29  7:53 ` [PATCH 03/37] drm/i915: Prevent writing into a read-only object via a GGTT mmap Chris Wilson
2018-06-29  7:53 ` [PATCH 04/37] drm/i915: Reject attempted pwrites into a read-only object Chris Wilson
2018-06-29  7:53 ` [PATCH 05/37] drm/i915/userptr: Enable read-only support on gen8+ Chris Wilson
2018-06-29  7:53 ` [PATCH 06/37] drm/i915: Move rate-limiting request retire to after submission Chris Wilson
2018-06-29 10:00   ` Tvrtko Ursulin
2018-06-29 10:10     ` Chris Wilson
2018-06-29  7:53 ` [PATCH 07/37] drm/i915: Move engine request retirement to intel_engine_cs Chris Wilson
2018-06-29  7:53 ` [PATCH 08/37] drm/i915: Hold request reference for submission until retirement Chris Wilson
2018-06-29  7:53 ` [PATCH 09/37] drm/i915/execlists: Switch to rb_root_cached Chris Wilson
2018-07-11 13:20   ` Tvrtko Ursulin
2018-06-29  7:53 ` [PATCH 10/37] drm/i915: Reserve some priority bits for internal use Chris Wilson
2018-06-29  7:53 ` [PATCH 11/37] drm/i915: Combine multiple internal plists into the same i915_priolist bucket Chris Wilson
2018-06-29  7:53 ` [PATCH 12/37] drm/i915: Priority boost for new clients Chris Wilson
2018-06-29 10:04   ` Tvrtko Ursulin
2018-06-29 10:09     ` Chris Wilson
2018-06-29 10:36       ` Tvrtko Ursulin
2018-06-29 10:41         ` Chris Wilson
2018-06-29 10:51         ` Chris Wilson
2018-06-29 11:10           ` Tvrtko Ursulin
2018-07-02 10:19             ` Tvrtko Ursulin
2018-06-29  7:53 ` [PATCH 13/37] drm/i915: Priority boost switching to an idle ring Chris Wilson
2018-06-29 10:08   ` Tvrtko Ursulin
2018-06-29 10:15     ` Chris Wilson
2018-06-29 10:41       ` Tvrtko Ursulin
2018-06-29  7:53 ` [PATCH 14/37] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
2018-06-29 12:00   ` Tvrtko Ursulin
2018-06-29  7:53 ` [PATCH 15/37] drm/i915: Export i915_request_skip() Chris Wilson
2018-06-29 12:10   ` Tvrtko Ursulin
2018-06-29 12:15     ` Chris Wilson
2018-06-29  7:53 ` [PATCH 16/37] drm/i915: Start returning an error from i915_vma_move_to_active() Chris Wilson
2018-06-29 12:17   ` Tvrtko Ursulin
2018-06-29  7:53 ` [PATCH 17/37] drm/i915: Track vma activity per fence.context, not per engine Chris Wilson
2018-06-29 14:54   ` Tvrtko Ursulin
2018-06-29 15:03     ` Chris Wilson
2018-06-29 15:34       ` Chris Wilson
2018-06-29 15:08     ` Tvrtko Ursulin
2018-06-29 15:36       ` Chris Wilson
2018-06-29 15:39         ` Chris Wilson
2018-07-02  9:38           ` Tvrtko Ursulin
2018-06-29 22:03   ` [PATCH v2] " Chris Wilson
2018-06-29  7:53 ` [PATCH 18/37] drm/i915: Track the last-active inside the i915_vma Chris Wilson
2018-06-29 22:01   ` [PATCH v2] " Chris Wilson
2018-06-29  7:53 ` [PATCH 19/37] drm/i915: Stop tracking MRU activity on VMA Chris Wilson
2018-06-29  7:53 ` [PATCH 20/37] drm/i915: Introduce the i915_user_extension_method Chris Wilson
2018-06-29  7:53 ` [PATCH 21/37] drm/i915: Extend CREATE_CONTEXT to allow inheritance ala clone() Chris Wilson
2018-06-29  7:53 ` [PATCH 22/37] drm/i915: Allow contexts to share a single timeline across all engines Chris Wilson
2018-06-29  7:53 ` [PATCH 23/37] drm/i915: Fix I915_EXEC_RING_MASK Chris Wilson
2018-06-29  7:53 ` [PATCH 24/37] drm/i915: Re-arrange execbuf so context is known before engine Chris Wilson
2018-06-29  7:53 ` [PATCH 25/37] drm/i915: Allow a context to define its set of engines Chris Wilson
2018-06-29  7:53 ` [PATCH 26/37] drm/i915/execlists: Flush the tasklet before unpinning Chris Wilson
2018-06-29  7:53 ` [PATCH 27/37] drm/i915/execlists: Refactor out can_merge_rq() Chris Wilson
2018-06-29  7:53 ` [PATCH 28/37] drm/i915: Replace nested subclassing with explicit subclasses Chris Wilson
2018-06-29  7:53 ` [PATCH 29/37] RFC drm/i915: Load balancing across a virtual engine Chris Wilson
2018-06-29  7:53 ` [PATCH 30/37] drm/i915: Introduce i915_address_space.mutex Chris Wilson
2018-06-29  7:53 ` [PATCH 31/37] drm/i915: Move fence register tracking to GGTT Chris Wilson
2018-06-29  7:53 ` [PATCH 32/37] drm/i915: Convert fences to use a GGTT lock rather than struct_mutex Chris Wilson
2018-06-29  7:53 ` [PATCH 33/37] drm/i915: Tidy i915_gem_suspend() Chris Wilson
2018-06-29  7:53 ` [PATCH 34/37] drm/i915: Move fence-reg interface to i915_gem_fence_reg.h Chris Wilson
2018-06-29  7:53 ` [PATCH 35/37] drm/i915: Dynamically allocate the array of drm_i915_gem_fence_reg Chris Wilson
2018-06-29  7:53 ` [PATCH 36/37] drm/i915: Pull all the reset functionality together into i915_reset.c Chris Wilson
2018-06-29  7:53 ` [PATCH 37/37] drm/i915: Remove GPU reset dependence on struct_mutex Chris Wilson
2018-06-29  8:58 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode Patchwork
2018-06-29  9:14 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-06-29  9:16 ` ✓ Fi.CI.BAT: success " Patchwork
2018-06-29 11:53 ` ✗ Fi.CI.IGT: failure " Patchwork
2018-06-29 22:38 ` ✗ Fi.CI.BAT: failure for series starting with [01/37] drm/i915/gtt: Add read only pages to gen8_pte_encode (rev3) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.