All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH] drm/i915: Refine VT-d scanout workaround
@ 2021-02-10 23:39 Chris Wilson
  2021-02-11  0:28 ` [Intel-gfx] ✓ Fi.CI.BAT: success for " Patchwork
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Chris Wilson @ 2021-02-10 23:39 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld, Chris Wilson

VT-d may cause overfetch of the scanout PTE, both before and after the
vma (depending on the scanout orientation). bspec recommends that we
provide a tile-row in either directions, and suggests using 160 PTE,
warning that the accesses will wrap around the ends of the GGTT.
Currently, we fill the entire GGTT with scratch pages when using VT-d to
always ensure there are valid entries around every vma, including
scanout. However, writing every PTE is slow as on recent devices we
perform 8MiB of uncached writes, incurring an extra 100ms during resume.

If instead we focus on only putting guard pages around scanout, we can
avoid touching the whole GGTT. To avoid having to introduce extra nodes
around each scanout vma, we adjust the scanout drm_mm_node to be smaller
than the allocated space, and fixup the extra PTE during dma binding.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c |  3 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c       | 37 ++++++++--------------
 drivers/gpu/drm/i915/i915_gem_gtt.h        |  1 +
 drivers/gpu/drm/i915/i915_vma.c            | 23 ++++++++++++++
 drivers/gpu/drm/i915/i915_vma_types.h      |  1 +
 5 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 0478b069c202..9f2ccc255ca1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -345,6 +345,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	if (ret)
 		goto err;
 
+	if (intel_scanout_needs_vtd_wa(i915))
+		flags |= PIN_VTD;
+
 	/*
 	 * As the user may map the buffer once pinned in the display plane
 	 * (e.g. libkms for the bootup splash), we have to ensure that we
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index b0b8ded834f0..416f77f48561 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -238,6 +238,11 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 
 	gte = (gen8_pte_t __iomem *)ggtt->gsm;
 	gte += vma->node.start / I915_GTT_PAGE_SIZE;
+
+	end = gte - vma->guard / I915_GTT_PAGE_SIZE;
+	while (end < gte)
+		gen8_set_pte(end++, vm->scratch[0]->encode);
+
 	end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
 
 	for_each_sgt_daddr(addr, iter, vma->pages)
@@ -245,6 +250,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	GEM_BUG_ON(gte > end);
 
 	/* Fill the allocated but "unused" space beyond the end of the buffer */
+	end += vma->guard / I915_GTT_PAGE_SIZE;
 	while (gte < end)
 		gen8_set_pte(gte++, vm->scratch[0]->encode);
 
@@ -289,6 +295,11 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 
 	gte = (gen6_pte_t __iomem *)ggtt->gsm;
 	gte += vma->node.start / I915_GTT_PAGE_SIZE;
+
+	end = gte - vma->guard / I915_GTT_PAGE_SIZE;
+	while (end < gte)
+		gen8_set_pte(end++, vm->scratch[0]->encode);
+
 	end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
 
 	for_each_sgt_daddr(addr, iter, vma->pages)
@@ -296,6 +307,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 	GEM_BUG_ON(gte > end);
 
 	/* Fill the allocated but "unused" space beyond the end of the buffer */
+	end += vma->guard / I915_GTT_PAGE_SIZE;
 	while (gte < end)
 		iowrite32(vm->scratch[0]->encode, gte++);
 
@@ -311,27 +323,6 @@ static void nop_clear_range(struct i915_address_space *vm,
 {
 }
 
-static void gen8_ggtt_clear_range(struct i915_address_space *vm,
-				  u64 start, u64 length)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
-	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
-	const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
-	gen8_pte_t __iomem *gtt_base =
-		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
-	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
-	int i;
-
-	if (WARN(num_entries > max_entries,
-		 "First entry = %d; Num entries = %d (max=%d)\n",
-		 first_entry, num_entries, max_entries))
-		num_entries = max_entries;
-
-	for (i = 0; i < num_entries; i++)
-		gen8_set_pte(&gtt_base[i], scratch_pte);
-}
-
 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
 {
 	/*
@@ -898,8 +889,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 	ggtt->vm.cleanup = gen6_gmch_remove;
 	ggtt->vm.insert_page = gen8_ggtt_insert_page;
 	ggtt->vm.clear_range = nop_clear_range;
-	if (intel_scanout_needs_vtd_wa(i915))
-		ggtt->vm.clear_range = gen8_ggtt_clear_range;
 
 	ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
 
@@ -1045,7 +1034,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
 	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
 
 	ggtt->vm.clear_range = nop_clear_range;
-	if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
+	if (!HAS_FULL_PPGTT(i915))
 		ggtt->vm.clear_range = gen6_ggtt_clear_range;
 	ggtt->vm.insert_page = gen6_ggtt_insert_page;
 	ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index c9b0ee5e1d23..8a2dfc7144cf 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -41,6 +41,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 #define PIN_HIGH		BIT_ULL(5)
 #define PIN_OFFSET_BIAS		BIT_ULL(6)
 #define PIN_OFFSET_FIXED	BIT_ULL(7)
+#define PIN_VTD			BIT_ULL(8)
 
 #define PIN_GLOBAL		BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */
 #define PIN_USER		BIT_ULL(11) /* I915_VMA_LOCAL_BIND */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index caa9b041616b..dccd36ff1a6d 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -38,6 +38,8 @@
 #include "i915_trace.h"
 #include "i915_vma.h"
 
+#define VTD_GUARD roundup_pow_of_two(160 * SZ_4K) /* 160 PTE padding */
+
 static struct i915_global_vma {
 	struct i915_global base;
 	struct kmem_cache *slab_vmas;
@@ -552,6 +554,9 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
 	    vma->node.start != (flags & PIN_OFFSET_MASK))
 		return true;
 
+	if (flags & PIN_VTD && vma->guard < VTD_GUARD)
+		return true;
+
 	return false;
 }
 
@@ -637,6 +642,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 				  alignment, vma->fence_alignment);
 	}
 
+	/* VT-d requires padding before/after the vma */
+	if (flags & PIN_VTD) {
+		alignment = max_t(typeof(alignment), alignment, VTD_GUARD);
+		vma->guard = alignment;
+		size += 2 * vma->guard;
+	}
+
 	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
 	GEM_BUG_ON(!is_power_of_2(alignment));
@@ -725,6 +737,11 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 
 	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
 
+	if (flags & PIN_VTD) {
+		vma->node.start += vma->guard;
+		vma->node.size -= 2 * vma->guard;
+	}
+
 	return 0;
 }
 
@@ -734,6 +751,12 @@ i915_vma_detach(struct i915_vma *vma)
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
 
+	if (vma->guard) {
+		vma->node.start -= vma->guard;
+		vma->node.size += 2 * vma->guard;
+		vma->guard = 0;
+	}
+
 	/*
 	 * And finally now the object is completely decoupled from this
 	 * vma, we can drop its hold on the backing storage and allow
diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
index f5cb848b7a7e..bafec4e0b042 100644
--- a/drivers/gpu/drm/i915/i915_vma_types.h
+++ b/drivers/gpu/drm/i915/i915_vma_types.h
@@ -190,6 +190,7 @@ struct i915_vma {
 
 	u32 fence_size;
 	u32 fence_alignment;
+	u32 guard;
 
 	/**
 	 * Count of the number of times this vma has been opened by different
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915: Refine VT-d scanout workaround
  2021-02-10 23:39 [Intel-gfx] [PATCH] drm/i915: Refine VT-d scanout workaround Chris Wilson
@ 2021-02-11  0:28 ` Patchwork
  2021-02-11 13:16 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2021-02-11  0:28 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 2863 bytes --]

== Series Details ==

Series: drm/i915: Refine VT-d scanout workaround
URL   : https://patchwork.freedesktop.org/series/86967/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_9761 -> Patchwork_19656
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/index.html

Known issues
------------

  Here are the changes found in Patchwork_19656 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_suspend@basic-s0:
    - fi-glk-dsi:         [PASS][1] -> [DMESG-WARN][2] ([i915#2943])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/fi-glk-dsi/igt@gem_exec_suspend@basic-s0.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/fi-glk-dsi/igt@gem_exec_suspend@basic-s0.html

  * igt@gem_tiled_blits@basic:
    - fi-tgl-y:           [PASS][3] -> [DMESG-WARN][4] ([i915#402]) +1 similar issue
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/fi-tgl-y/igt@gem_tiled_blits@basic.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/fi-tgl-y/igt@gem_tiled_blits@basic.html

  * igt@kms_chamelium@dp-edid-read:
    - fi-kbl-7500u:       [PASS][5] -> [DMESG-WARN][6] ([i915#165])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/fi-kbl-7500u/igt@kms_chamelium@dp-edid-read.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/fi-kbl-7500u/igt@kms_chamelium@dp-edid-read.html

  
#### Possible fixes ####

  * igt@prime_self_import@basic-with_two_bos:
    - fi-tgl-y:           [DMESG-WARN][7] ([i915#402]) -> [PASS][8] +1 similar issue
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/fi-tgl-y/igt@prime_self_import@basic-with_two_bos.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/fi-tgl-y/igt@prime_self_import@basic-with_two_bos.html

  
  [i915#165]: https://gitlab.freedesktop.org/drm/intel/issues/165
  [i915#2943]: https://gitlab.freedesktop.org/drm/intel/issues/2943
  [i915#402]: https://gitlab.freedesktop.org/drm/intel/issues/402


Participating hosts (44 -> 40)
------------------------------

  Missing    (4): fi-ilk-m540 fi-bsw-cyan fi-bdw-samus fi-hsw-4200u 


Build changes
-------------

  * Linux: CI_DRM_9761 -> Patchwork_19656

  CI-20190529: 20190529
  CI_DRM_9761: fc52fc2a7332bd301f802ca3a0444a8fb9fe4f7f @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_6001: d0d6f5e14ef181c93e4b503b05d9c18fa480e09d @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_19656: cb230263531710c4a26ddcd496b272493ea49eda @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

cb2302635317 drm/i915: Refine VT-d scanout workaround

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/index.html

[-- Attachment #1.2: Type: text/html, Size: 3617 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-gfx] ✓ Fi.CI.IGT: success for drm/i915: Refine VT-d scanout workaround
  2021-02-10 23:39 [Intel-gfx] [PATCH] drm/i915: Refine VT-d scanout workaround Chris Wilson
  2021-02-11  0:28 ` [Intel-gfx] ✓ Fi.CI.BAT: success for " Patchwork
@ 2021-02-11 13:16 ` Patchwork
  2021-02-11 14:25 ` [Intel-gfx] [PATCH] " Matthew Auld
  2021-02-11 16:05 ` Ville Syrjälä
  3 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2021-02-11 13:16 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 30263 bytes --]

== Series Details ==

Series: drm/i915: Refine VT-d scanout workaround
URL   : https://patchwork.freedesktop.org/series/86967/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_9761_full -> Patchwork_19656_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  

Known issues
------------

  Here are the changes found in Patchwork_19656_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_fair@basic-deadline:
    - shard-kbl:          [PASS][1] -> [FAIL][2] ([i915#2846])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-kbl1/igt@gem_exec_fair@basic-deadline.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl3/igt@gem_exec_fair@basic-deadline.html

  * igt@gem_exec_fair@basic-none@vcs0:
    - shard-kbl:          [PASS][3] -> [FAIL][4] ([i915#2842])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-kbl2/igt@gem_exec_fair@basic-none@vcs0.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl6/igt@gem_exec_fair@basic-none@vcs0.html

  * igt@gem_exec_fair@basic-pace-share@rcs0:
    - shard-tglb:         [PASS][5] -> [FAIL][6] ([i915#2842]) +2 similar issues
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-tglb3/igt@gem_exec_fair@basic-pace-share@rcs0.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-tglb5/igt@gem_exec_fair@basic-pace-share@rcs0.html

  * igt@gem_exec_fair@basic-pace@vcs1:
    - shard-iclb:         NOTRUN -> [FAIL][7] ([i915#2842])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-iclb4/igt@gem_exec_fair@basic-pace@vcs1.html

  * igt@gem_exec_nop@basic-sequential:
    - shard-iclb:         [PASS][8] -> [DMESG-WARN][9] ([i915#1226]) +38 similar issues
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-iclb7/igt@gem_exec_nop@basic-sequential.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-iclb8/igt@gem_exec_nop@basic-sequential.html

  * igt@gem_huc_copy@huc-copy:
    - shard-tglb:         [PASS][10] -> [SKIP][11] ([i915#2190])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-tglb2/igt@gem_huc_copy@huc-copy.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-tglb6/igt@gem_huc_copy@huc-copy.html

  * igt@gem_pread@exhaustion:
    - shard-apl:          NOTRUN -> [WARN][12] ([i915#2658])
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl6/igt@gem_pread@exhaustion.html
    - shard-skl:          NOTRUN -> [WARN][13] ([i915#2658])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-skl6/igt@gem_pread@exhaustion.html

  * igt@gem_userptr_blits@process-exit-mmap@wb:
    - shard-apl:          NOTRUN -> [SKIP][14] ([fdo#109271] / [i915#1699]) +3 similar issues
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl6/igt@gem_userptr_blits@process-exit-mmap@wb.html

  * igt@gen9_exec_parse@batch-without-end:
    - shard-tglb:         NOTRUN -> [SKIP][15] ([fdo#112306])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-tglb3/igt@gen9_exec_parse@batch-without-end.html

  * igt@gen9_exec_parse@bb-oversize:
    - shard-tglb:         NOTRUN -> [SKIP][16] ([i915#2527])
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-tglb3/igt@gen9_exec_parse@bb-oversize.html

  * igt@i915_hangman@engine-error@vecs0:
    - shard-kbl:          NOTRUN -> [SKIP][17] ([fdo#109271]) +38 similar issues
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl2/igt@i915_hangman@engine-error@vecs0.html

  * igt@i915_pm_dc@dc6-psr:
    - shard-iclb:         [PASS][18] -> [FAIL][19] ([i915#454])
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-iclb7/igt@i915_pm_dc@dc6-psr.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-iclb4/igt@i915_pm_dc@dc6-psr.html

  * igt@i915_pm_lpsp@screens-disabled:
    - shard-tglb:         NOTRUN -> [SKIP][20] ([i915#1902])
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-tglb3/igt@i915_pm_lpsp@screens-disabled.html

  * igt@i915_pm_rpm@system-suspend-execbuf:
    - shard-skl:          [PASS][21] -> [INCOMPLETE][22] ([i915#151])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-skl3/igt@i915_pm_rpm@system-suspend-execbuf.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-skl5/igt@i915_pm_rpm@system-suspend-execbuf.html

  * igt@kms_chamelium@vga-hpd-for-each-pipe:
    - shard-kbl:          NOTRUN -> [SKIP][23] ([fdo#109271] / [fdo#111827]) +2 similar issues
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl2/igt@kms_chamelium@vga-hpd-for-each-pipe.html

  * igt@kms_color_chamelium@pipe-b-ctm-0-75:
    - shard-apl:          NOTRUN -> [SKIP][24] ([fdo#109271] / [fdo#111827]) +12 similar issues
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl3/igt@kms_color_chamelium@pipe-b-ctm-0-75.html

  * igt@kms_color_chamelium@pipe-c-ctm-negative:
    - shard-skl:          NOTRUN -> [SKIP][25] ([fdo#109271] / [fdo#111827]) +1 similar issue
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-skl8/igt@kms_color_chamelium@pipe-c-ctm-negative.html

  * igt@kms_color_chamelium@pipe-d-ctm-red-to-blue:
    - shard-tglb:         NOTRUN -> [SKIP][26] ([fdo#109284] / [fdo#111827]) +1 similar issue
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-tglb3/igt@kms_color_chamelium@pipe-d-ctm-red-to-blue.html

  * igt@kms_cursor_crc@pipe-b-cursor-64x21-offscreen:
    - shard-skl:          NOTRUN -> [FAIL][27] ([i915#54])
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-skl8/igt@kms_cursor_crc@pipe-b-cursor-64x21-offscreen.html

  * igt@kms_cursor_crc@pipe-c-cursor-512x512-sliding:
    - shard-tglb:         NOTRUN -> [SKIP][28] ([fdo#109279])
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-tglb3/igt@kms_cursor_crc@pipe-c-cursor-512x512-sliding.html

  * igt@kms_cursor_crc@pipe-c-cursor-64x21-offscreen:
    - shard-skl:          [PASS][29] -> [FAIL][30] ([i915#54]) +4 similar issues
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-skl5/igt@kms_cursor_crc@pipe-c-cursor-64x21-offscreen.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-skl5/igt@kms_cursor_crc@pipe-c-cursor-64x21-offscreen.html

  * igt@kms_fbcon_fbt@fbc-suspend:
    - shard-kbl:          [PASS][31] -> [INCOMPLETE][32] ([i915#155] / [i915#180] / [i915#636])
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-kbl3/igt@kms_fbcon_fbt@fbc-suspend.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl3/igt@kms_fbcon_fbt@fbc-suspend.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible@c-edp1:
    - shard-skl:          [PASS][33] -> [FAIL][34] ([i915#79])
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-skl10/igt@kms_flip@flip-vs-expired-vblank-interruptible@c-edp1.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-skl1/igt@kms_flip@flip-vs-expired-vblank-interruptible@c-edp1.html

  * igt@kms_flip@flip-vs-suspend-interruptible@c-dp1:
    - shard-apl:          [PASS][35] -> [DMESG-WARN][36] ([i915#180]) +2 similar issues
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-apl4/igt@kms_flip@flip-vs-suspend-interruptible@c-dp1.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl7/igt@kms_flip@flip-vs-suspend-interruptible@c-dp1.html

  * igt@kms_flip@plain-flip-fb-recreate@b-edp1:
    - shard-skl:          [PASS][37] -> [FAIL][38] ([i915#2122]) +1 similar issue
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-skl2/igt@kms_flip@plain-flip-fb-recreate@b-edp1.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-skl7/igt@kms_flip@plain-flip-fb-recreate@b-edp1.html

  * igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytileccs:
    - shard-tglb:         NOTRUN -> [SKIP][39] ([i915#2587])
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-tglb3/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytileccs.html
    - shard-apl:          NOTRUN -> [FAIL][40] ([i915#2641])
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl3/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytileccs.html

  * igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytilegen12rcccs:
    - shard-apl:          NOTRUN -> [SKIP][41] ([fdo#109271] / [i915#2672])
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl6/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytilegen12rcccs.html

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-shrfb-plflip-blt:
    - shard-skl:          NOTRUN -> [SKIP][42] ([fdo#109271]) +20 similar issues
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-skl8/igt@kms_frontbuffer_tracking@fbc-2p-primscrn-shrfb-plflip-blt.html

  * igt@kms_frontbuffer_tracking@fbc-modesetfrombusy:
    - shard-glk:          [PASS][43] -> [DMESG-WARN][44] ([i915#118] / [i915#95]) +22 similar issues
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-glk4/igt@kms_frontbuffer_tracking@fbc-modesetfrombusy.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-glk6/igt@kms_frontbuffer_tracking@fbc-modesetfrombusy.html

  * igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-pri-shrfb-draw-mmap-wc:
    - shard-tglb:         NOTRUN -> [SKIP][45] ([fdo#111825]) +2 similar issues
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-tglb3/igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-pri-shrfb-draw-mmap-wc.html

  * igt@kms_frontbuffer_tracking@fbcpsr-rgb101010-draw-blt:
    - shard-iclb:         [PASS][46] -> [SKIP][47] ([i915#668]) +2 similar issues
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-iclb5/igt@kms_frontbuffer_tracking@fbcpsr-rgb101010-draw-blt.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-iclb2/igt@kms_frontbuffer_tracking@fbcpsr-rgb101010-draw-blt.html

  * igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d:
    - shard-apl:          NOTRUN -> [SKIP][48] ([fdo#109271] / [i915#533])
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl7/igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
    - shard-kbl:          [PASS][49] -> [DMESG-WARN][50] ([i915#180]) +5 similar issues
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-kbl4/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl4/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html

  * igt@kms_plane_alpha_blend@pipe-a-constant-alpha-max:
    - shard-apl:          NOTRUN -> [FAIL][51] ([fdo#108145] / [i915#265])
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl6/igt@kms_plane_alpha_blend@pipe-a-constant-alpha-max.html

  * igt@kms_plane_alpha_blend@pipe-c-alpha-transparent-fb:
    - shard-kbl:          NOTRUN -> [FAIL][52] ([i915#265])
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl2/igt@kms_plane_alpha_blend@pipe-c-alpha-transparent-fb.html

  * igt@kms_plane_scaling@scaler-with-clipping-clamping@pipe-c-scaler-with-clipping-clamping:
    - shard-apl:          NOTRUN -> [SKIP][53] ([fdo#109271] / [i915#2733])
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl7/igt@kms_plane_scaling@scaler-with-clipping-clamping@pipe-c-scaler-with-clipping-clamping.html

  * igt@kms_psr2_sf@overlay-primary-update-sf-dmg-area-2:
    - shard-apl:          NOTRUN -> [SKIP][54] ([fdo#109271] / [i915#658]) +1 similar issue
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl7/igt@kms_psr2_sf@overlay-primary-update-sf-dmg-area-2.html

  * igt@kms_psr2_su@frontbuffer:
    - shard-skl:          NOTRUN -> [SKIP][55] ([fdo#109271] / [i915#658])
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-skl8/igt@kms_psr2_su@frontbuffer.html

  * igt@kms_psr@psr2_sprite_mmap_gtt:
    - shard-iclb:         [PASS][56] -> [SKIP][57] ([fdo#109441]) +1 similar issue
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-iclb2/igt@kms_psr@psr2_sprite_mmap_gtt.html
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-iclb6/igt@kms_psr@psr2_sprite_mmap_gtt.html

  * igt@kms_vblank@pipe-d-wait-forked-hang:
    - shard-apl:          NOTRUN -> [SKIP][58] ([fdo#109271]) +77 similar issues
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl3/igt@kms_vblank@pipe-d-wait-forked-hang.html

  * igt@prime_nv_api@i915_self_import_to_different_fd:
    - shard-tglb:         NOTRUN -> [SKIP][59] ([fdo#109291])
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-tglb3/igt@prime_nv_api@i915_self_import_to_different_fd.html

  
#### Possible fixes ####

  * igt@gem_ctx_persistence@close-replace-race:
    - shard-glk:          [TIMEOUT][60] ([i915#2918]) -> [PASS][61]
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-glk9/igt@gem_ctx_persistence@close-replace-race.html
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-glk9/igt@gem_ctx_persistence@close-replace-race.html

  * igt@gem_eio@in-flight-suspend:
    - shard-apl:          [DMESG-WARN][62] ([i915#1037] / [i915#180]) -> [PASS][63]
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-apl6/igt@gem_eio@in-flight-suspend.html
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl6/igt@gem_eio@in-flight-suspend.html

  * igt@gem_exec_fair@basic-none@vcs1:
    - shard-kbl:          [FAIL][64] ([i915#2842]) -> [PASS][65] +1 similar issue
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-kbl2/igt@gem_exec_fair@basic-none@vcs1.html
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl6/igt@gem_exec_fair@basic-none@vcs1.html

  * igt@gem_exec_fair@basic-pace@bcs0:
    - shard-tglb:         [FAIL][66] ([i915#2842]) -> [PASS][67]
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-tglb7/igt@gem_exec_fair@basic-pace@bcs0.html
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-tglb5/igt@gem_exec_fair@basic-pace@bcs0.html

  * igt@gem_exec_fair@basic-throttle@rcs0:
    - shard-glk:          [FAIL][68] ([i915#2842]) -> [PASS][69] +1 similar issue
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-glk9/igt@gem_exec_fair@basic-throttle@rcs0.html
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-glk4/igt@gem_exec_fair@basic-throttle@rcs0.html
    - shard-iclb:         [FAIL][70] ([i915#2842]) -> [PASS][71]
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-iclb1/igt@gem_exec_fair@basic-throttle@rcs0.html
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-iclb5/igt@gem_exec_fair@basic-throttle@rcs0.html

  * igt@gem_exec_schedule@u-fairslice@rcs0:
    - shard-tglb:         [DMESG-WARN][72] ([i915#2803]) -> [PASS][73]
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-tglb7/igt@gem_exec_schedule@u-fairslice@rcs0.html
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-tglb3/igt@gem_exec_schedule@u-fairslice@rcs0.html

  * igt@gem_sync@basic-each:
    - shard-apl:          [INCOMPLETE][74] ([i915#2944]) -> [PASS][75]
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-apl3/igt@gem_sync@basic-each.html
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl3/igt@gem_sync@basic-each.html

  * igt@gem_vm_create@destroy-race:
    - shard-tglb:         [TIMEOUT][76] ([i915#2795]) -> [PASS][77]
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-tglb8/igt@gem_vm_create@destroy-race.html
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-tglb3/igt@gem_vm_create@destroy-race.html

  * igt@i915_pm_rpm@system-suspend-modeset:
    - shard-skl:          [INCOMPLETE][78] ([i915#146] / [i915#151]) -> [PASS][79]
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-skl1/igt@i915_pm_rpm@system-suspend-modeset.html
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-skl8/igt@i915_pm_rpm@system-suspend-modeset.html

  * igt@i915_suspend@forcewake:
    - shard-kbl:          [INCOMPLETE][80] ([i915#155] / [i915#636]) -> [PASS][81]
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-kbl6/igt@i915_suspend@forcewake.html
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl2/igt@i915_suspend@forcewake.html

  * igt@kms_cursor_crc@pipe-a-cursor-64x64-onscreen:
    - shard-skl:          [FAIL][82] ([i915#54]) -> [PASS][83] +4 similar issues
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-skl3/igt@kms_cursor_crc@pipe-a-cursor-64x64-onscreen.html
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-skl6/igt@kms_cursor_crc@pipe-a-cursor-64x64-onscreen.html

  * igt@kms_cursor_legacy@flip-vs-cursor-varying-size:
    - shard-tglb:         [FAIL][84] ([i915#2346]) -> [PASS][85]
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-tglb7/igt@kms_cursor_legacy@flip-vs-cursor-varying-size.html
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-tglb5/igt@kms_cursor_legacy@flip-vs-cursor-varying-size.html

  * igt@kms_draw_crc@draw-method-rgb565-blt-ytiled:
    - shard-skl:          [DMESG-WARN][86] ([i915#1982]) -> [PASS][87] +1 similar issue
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-skl3/igt@kms_draw_crc@draw-method-rgb565-blt-ytiled.html
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-skl6/igt@kms_draw_crc@draw-method-rgb565-blt-ytiled.html

  * igt@kms_fbcon_fbt@fbc-suspend:
    - shard-apl:          [INCOMPLETE][88] ([i915#180]) -> [PASS][89]
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-apl2/igt@kms_fbcon_fbt@fbc-suspend.html
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl7/igt@kms_fbcon_fbt@fbc-suspend.html

  * igt@kms_flip@flip-vs-absolute-wf_vblank@a-edp1:
    - shard-skl:          [FAIL][90] ([i915#2122]) -> [PASS][91]
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-skl10/igt@kms_flip@flip-vs-absolute-wf_vblank@a-edp1.html
   [91]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-skl8/igt@kms_flip@flip-vs-absolute-wf_vblank@a-edp1.html

  * igt@kms_flip@flip-vs-expired-vblank@a-hdmi-a2:
    - shard-glk:          [FAIL][92] ([i915#79]) -> [PASS][93] +1 similar issue
   [92]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-glk7/igt@kms_flip@flip-vs-expired-vblank@a-hdmi-a2.html
   [93]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-glk2/igt@kms_flip@flip-vs-expired-vblank@a-hdmi-a2.html

  * igt@kms_hdr@bpc-switch:
    - shard-kbl:          [DMESG-WARN][94] ([i915#180] / [i915#78]) -> [PASS][95]
   [94]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-kbl2/igt@kms_hdr@bpc-switch.html
   [95]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl6/igt@kms_hdr@bpc-switch.html

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes:
    - shard-apl:          [DMESG-WARN][96] ([i915#180] / [i915#533]) -> [PASS][97]
   [96]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-apl1/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes.html
   [97]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl6/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes.html

  * igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min:
    - shard-skl:          [FAIL][98] ([fdo#108145] / [i915#265]) -> [PASS][99]
   [98]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-skl7/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min.html
   [99]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-skl9/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min.html

  * igt@kms_psr@psr2_primary_mmap_cpu:
    - shard-iclb:         [SKIP][100] ([fdo#109441]) -> [PASS][101] +2 similar issues
   [100]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-iclb1/igt@kms_psr@psr2_primary_mmap_cpu.html
   [101]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-iclb2/igt@kms_psr@psr2_primary_mmap_cpu.html

  * igt@kms_vblank@pipe-a-ts-continuation-dpms-suspend:
    - shard-skl:          [INCOMPLETE][102] ([i915#146] / [i915#198]) -> [PASS][103]
   [102]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-skl3/igt@kms_vblank@pipe-a-ts-continuation-dpms-suspend.html
   [103]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-skl6/igt@kms_vblank@pipe-a-ts-continuation-dpms-suspend.html

  * igt@perf@polling-parameterized:
    - shard-skl:          [FAIL][104] ([i915#1542]) -> [PASS][105]
   [104]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-skl7/igt@perf@polling-parameterized.html
   [105]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-skl9/igt@perf@polling-parameterized.html

  * {igt@sysfs_clients@recycle-many}:
    - shard-glk:          [FAIL][106] ([i915#3028]) -> [PASS][107]
   [106]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-glk5/igt@sysfs_clients@recycle-many.html
   [107]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-glk1/igt@sysfs_clients@recycle-many.html
    - shard-skl:          [FAIL][108] ([i915#3028]) -> [PASS][109]
   [108]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-skl3/igt@sysfs_clients@recycle-many.html
   [109]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-skl6/igt@sysfs_clients@recycle-many.html

  * igt@testdisplay:
    - shard-kbl:          [DMESG-WARN][110] ([i915#165] / [i915#180] / [i915#78]) -> [PASS][111] +1 similar issue
   [110]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-kbl2/igt@testdisplay.html
   [111]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl6/igt@testdisplay.html

  
#### Warnings ####

  * igt@i915_pm_rc6_residency@rc6-idle:
    - shard-iclb:         [WARN][112] ([i915#2684]) -> [WARN][113] ([i915#1804] / [i915#2684])
   [112]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-iclb5/igt@i915_pm_rc6_residency@rc6-idle.html
   [113]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-iclb7/igt@i915_pm_rc6_residency@rc6-idle.html

  * igt@kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytile:
    - shard-kbl:          [DMESG-FAIL][114] ([i915#165]) -> [FAIL][115] ([i915#2641])
   [114]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-kbl2/igt@kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytile.html
   [115]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl6/igt@kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytile.html

  * igt@kms_psr2_sf@plane-move-sf-dmg-area-0:
    - shard-iclb:         [SKIP][116] ([i915#658]) -> [SKIP][117] ([i915#2920]) +1 similar issue
   [116]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-iclb5/igt@kms_psr2_sf@plane-move-sf-dmg-area-0.html
   [117]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-iclb2/igt@kms_psr2_sf@plane-move-sf-dmg-area-0.html

  * igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-2:
    - shard-iclb:         [SKIP][118] ([i915#2920]) -> [SKIP][119] ([i915#658]) +2 similar issues
   [118]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-iclb2/igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-2.html
   [119]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-iclb6/igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-2.html

  * igt@runner@aborted:
    - shard-kbl:          ([FAIL][120], [FAIL][121], [FAIL][122]) ([i915#2295] / [i915#3002]) -> ([FAIL][123], [FAIL][124], [FAIL][125], [FAIL][126], [FAIL][127], [FAIL][128], [FAIL][129], [FAIL][130], [FAIL][131], [FAIL][132]) ([i915#1814] / [i915#2295] / [i915#2505] / [i915#3002] / [i915#92])
   [120]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-kbl4/igt@runner@aborted.html
   [121]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-kbl2/igt@runner@aborted.html
   [122]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-kbl2/igt@runner@aborted.html
   [123]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl3/igt@runner@aborted.html
   [124]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl4/igt@runner@aborted.html
   [125]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl3/igt@runner@aborted.html
   [126]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl4/igt@runner@aborted.html
   [127]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl6/igt@runner@aborted.html
   [128]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl6/igt@runner@aborted.html
   [129]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl3/igt@runner@aborted.html
   [130]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl2/igt@runner@aborted.html
   [131]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl3/igt@runner@aborted.html
   [132]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-kbl3/igt@runner@aborted.html
    - shard-apl:          ([FAIL][133], [FAIL][134], [FAIL][135], [FAIL][136], [FAIL][137], [FAIL][138], [FAIL][139]) ([i915#1610] / [i915#1814] / [i915#2295] / [i915#2722] / [i915#3002] / [i915#62]) -> ([FAIL][140], [FAIL][141], [FAIL][142], [FAIL][143], [FAIL][144], [FAIL][145]) ([fdo#109271] / [i915#2295] / [i915#3002])
   [133]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-apl6/igt@runner@aborted.html
   [134]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-apl2/igt@runner@aborted.html
   [135]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-apl7/igt@runner@aborted.html
   [136]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-apl2/igt@runner@aborted.html
   [137]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-apl7/igt@runner@aborted.html
   [138]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-apl1/igt@runner@aborted.html
   [139]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-apl3/igt@runner@aborted.html
   [140]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl2/igt@runner@aborted.html
   [141]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl7/igt@runner@aborted.html
   [142]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl1/igt@runner@aborted.html
   [143]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl6/igt@runner@aborted.html
   [144]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl3/igt@runner@aborted.html
   [145]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-apl1/igt@runner@aborted.html
    - shard-tglb:         ([FAIL][146], [FAIL][147], [FAIL][148], [FAIL][149], [FAIL][150]) ([i915#1602] / [i915#2295] / [i915#2426] / [i915#2667] / [i915#2803] / [i915#3002] / [i915#3030]) -> ([FAIL][151], [FAIL][152], [FAIL][153], [FAIL][154]) ([i915#1602] / [i915#2295] / [i915#2667] / [i915#3002] / [i915#3030])
   [146]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-tglb7/igt@runner@aborted.html
   [147]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-tglb5/igt@runner@aborted.html
   [148]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-tglb6/igt@runner@aborted.html
   [149]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-tglb7/igt@runner@aborted.html
   [150]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-tglb2/igt@runner@aborted.html
   [151]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-tglb2/igt@runner@aborted.html
   [152]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-tglb7/igt@runner@aborted.html
   [153]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-tglb7/igt@runner@aborted.html
   [154]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-tglb8/igt@runner@aborted.html
    - shard-skl:          ([FAIL][155], [FAIL][156], [FAIL][157]) ([i915#1436] / [i915#2295] / [i915#3002]) -> ([FAIL][158], [FAIL][159]) ([i915#2295] / [i915#3002])
   [155]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-skl4/igt@runner@aborted.html
   [156]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-skl1/igt@runner@aborted.html
   [157]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9761/shard-skl9/igt@runner@aborted.html
   [158]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-skl2/igt@runner@aborted.html
   [159]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/shard-skl10/igt@runner@aborted.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109279]: https://bugs.freedesktop.org/show_bug.cgi?id=109279
  [fdo#109284]: https://bugs.freedesktop.org/show_bug.cgi?id=109284
  [fdo#109291]: https://bugs.freedesktop.org/show_bug.cgi?id=109291
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#111825]: https://bugs.freedesktop.org/show_bug.cgi?id=111825
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [fdo#112306]: https://bugs.freedesktop.org/show_bug.cgi?id=112306
  [i915#1037]: https://gitlab.freedesktop.org/drm/intel/issues/1037
  [i915#118]: https://gitlab.freedesktop.org/drm/intel/issues/118
  [i915#1226]: https://gitlab.freedesktop.org/drm/intel/issues/1226
  [i915#1436]: https://gitlab.fre

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19656/index.html

[-- Attachment #1.2: Type: text/html, Size: 34348 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Refine VT-d scanout workaround
  2021-02-10 23:39 [Intel-gfx] [PATCH] drm/i915: Refine VT-d scanout workaround Chris Wilson
  2021-02-11  0:28 ` [Intel-gfx] ✓ Fi.CI.BAT: success for " Patchwork
  2021-02-11 13:16 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
@ 2021-02-11 14:25 ` Matthew Auld
  2021-02-11 15:19   ` Chris Wilson
  2021-02-11 16:05 ` Ville Syrjälä
  3 siblings, 1 reply; 9+ messages in thread
From: Matthew Auld @ 2021-02-11 14:25 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 10/02/2021 23:39, Chris Wilson wrote:
> VT-d may cause overfetch of the scanout PTE, both before and after the
> vma (depending on the scanout orientation). bspec recommends that we
> provide a tile-row in either directions, and suggests using 160 PTE,
> warning that the accesses will wrap around the ends of the GGTT.
> Currently, we fill the entire GGTT with scratch pages when using VT-d to
> always ensure there are valid entries around every vma, including
> scanout. However, writing every PTE is slow as on recent devices we
> perform 8MiB of uncached writes, incurring an extra 100ms during resume.
> 
> If instead we focus on only putting guard pages around scanout, we can
> avoid touching the whole GGTT. To avoid having to introduce extra nodes
> around each scanout vma, we adjust the scanout drm_mm_node to be smaller
> than the allocated space, and fixup the extra PTE during dma binding.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Cc: Matthew Auld <matthew.auld@intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_domain.c |  3 ++
>   drivers/gpu/drm/i915/gt/intel_ggtt.c       | 37 ++++++++--------------
>   drivers/gpu/drm/i915/i915_gem_gtt.h        |  1 +
>   drivers/gpu/drm/i915/i915_vma.c            | 23 ++++++++++++++
>   drivers/gpu/drm/i915/i915_vma_types.h      |  1 +
>   5 files changed, 41 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> index 0478b069c202..9f2ccc255ca1 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> @@ -345,6 +345,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>   	if (ret)
>   		goto err;
>   
> +	if (intel_scanout_needs_vtd_wa(i915))
> +		flags |= PIN_VTD;
> +
>   	/*
>   	 * As the user may map the buffer once pinned in the display plane
>   	 * (e.g. libkms for the bootup splash), we have to ensure that we
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index b0b8ded834f0..416f77f48561 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -238,6 +238,11 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>   
>   	gte = (gen8_pte_t __iomem *)ggtt->gsm;
>   	gte += vma->node.start / I915_GTT_PAGE_SIZE;
> +
> +	end = gte - vma->guard / I915_GTT_PAGE_SIZE;
> +	while (end < gte)
> +		gen8_set_pte(end++, vm->scratch[0]->encode);
> +
>   	end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
>   
>   	for_each_sgt_daddr(addr, iter, vma->pages)
> @@ -245,6 +250,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>   	GEM_BUG_ON(gte > end);
>   
>   	/* Fill the allocated but "unused" space beyond the end of the buffer */
> +	end += vma->guard / I915_GTT_PAGE_SIZE;
>   	while (gte < end)
>   		gen8_set_pte(gte++, vm->scratch[0]->encode);
>   
> @@ -289,6 +295,11 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>   
>   	gte = (gen6_pte_t __iomem *)ggtt->gsm;
>   	gte += vma->node.start / I915_GTT_PAGE_SIZE;
> +
> +	end = gte - vma->guard / I915_GTT_PAGE_SIZE;
> +	while (end < gte)
> +		gen8_set_pte(end++, vm->scratch[0]->encode);
> +
>   	end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
>   
>   	for_each_sgt_daddr(addr, iter, vma->pages)
> @@ -296,6 +307,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>   	GEM_BUG_ON(gte > end);
>   
>   	/* Fill the allocated but "unused" space beyond the end of the buffer */
> +	end += vma->guard / I915_GTT_PAGE_SIZE;
>   	while (gte < end)
>   		iowrite32(vm->scratch[0]->encode, gte++);
>   
> @@ -311,27 +323,6 @@ static void nop_clear_range(struct i915_address_space *vm,
>   {
>   }
>   
> -static void gen8_ggtt_clear_range(struct i915_address_space *vm,
> -				  u64 start, u64 length)
> -{
> -	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> -	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
> -	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
> -	const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
> -	gen8_pte_t __iomem *gtt_base =
> -		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
> -	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
> -	int i;
> -
> -	if (WARN(num_entries > max_entries,
> -		 "First entry = %d; Num entries = %d (max=%d)\n",
> -		 first_entry, num_entries, max_entries))
> -		num_entries = max_entries;
> -
> -	for (i = 0; i < num_entries; i++)
> -		gen8_set_pte(&gtt_base[i], scratch_pte);
> -}
> -
>   static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
>   {
>   	/*
> @@ -898,8 +889,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>   	ggtt->vm.cleanup = gen6_gmch_remove;
>   	ggtt->vm.insert_page = gen8_ggtt_insert_page;
>   	ggtt->vm.clear_range = nop_clear_range;
> -	if (intel_scanout_needs_vtd_wa(i915))
> -		ggtt->vm.clear_range = gen8_ggtt_clear_range;
>   
>   	ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
>   
> @@ -1045,7 +1034,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
>   	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
>   
>   	ggtt->vm.clear_range = nop_clear_range;
> -	if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
> +	if (!HAS_FULL_PPGTT(i915))
>   		ggtt->vm.clear_range = gen6_ggtt_clear_range;
>   	ggtt->vm.insert_page = gen6_ggtt_insert_page;
>   	ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index c9b0ee5e1d23..8a2dfc7144cf 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -41,6 +41,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
>   #define PIN_HIGH		BIT_ULL(5)
>   #define PIN_OFFSET_BIAS		BIT_ULL(6)
>   #define PIN_OFFSET_FIXED	BIT_ULL(7)
> +#define PIN_VTD			BIT_ULL(8)
>   
>   #define PIN_GLOBAL		BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */
>   #define PIN_USER		BIT_ULL(11) /* I915_VMA_LOCAL_BIND */
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index caa9b041616b..dccd36ff1a6d 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -38,6 +38,8 @@
>   #include "i915_trace.h"
>   #include "i915_vma.h"
>   
> +#define VTD_GUARD roundup_pow_of_two(160 * SZ_4K) /* 160 PTE padding */
> +
>   static struct i915_global_vma {
>   	struct i915_global base;
>   	struct kmem_cache *slab_vmas;
> @@ -552,6 +554,9 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
>   	    vma->node.start != (flags & PIN_OFFSET_MASK))
>   		return true;
>   
> +	if (flags & PIN_VTD && vma->guard < VTD_GUARD)
> +		return true;
> +
>   	return false;
>   }
>   
> @@ -637,6 +642,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>   				  alignment, vma->fence_alignment);
>   	}
>   
> +	/* VT-d requires padding before/after the vma */
> +	if (flags & PIN_VTD) {
> +		alignment = max_t(typeof(alignment), alignment, VTD_GUARD);
> +		vma->guard = alignment;
> +		size += 2 * vma->guard;
> +	}
> +
>   	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
>   	GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
>   	GEM_BUG_ON(!is_power_of_2(alignment));
> @@ -725,6 +737,11 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>   
>   	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
>   
> +	if (flags & PIN_VTD) {
> +		vma->node.start += vma->guard;
> +		vma->node.size -= 2 * vma->guard;
> +	}
> +

So we adjust the node to be twice as large, reserve it in the mm, and 
then shrink it again here. Seems a little scary to modify 
node.start/node.size after insertion?

Just wondering if this might upset something like evict_for_node, where 
we inspect the allocated nodes over some range, rather than holes, and 
since we shrunk the node it might get confused into thinking the padding 
is actually free space. Or maybe that doesn't really matter much for the 
GGTT?


>   	return 0;
>   }
>   
> @@ -734,6 +751,12 @@ i915_vma_detach(struct i915_vma *vma)
>   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>   	GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
>   
> +	if (vma->guard) {
> +		vma->node.start -= vma->guard;
> +		vma->node.size += 2 * vma->guard;
> +		vma->guard = 0;
> +	}
> +
>   	/*
>   	 * And finally now the object is completely decoupled from this
>   	 * vma, we can drop its hold on the backing storage and allow
> diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
> index f5cb848b7a7e..bafec4e0b042 100644
> --- a/drivers/gpu/drm/i915/i915_vma_types.h
> +++ b/drivers/gpu/drm/i915/i915_vma_types.h
> @@ -190,6 +190,7 @@ struct i915_vma {
>   
>   	u32 fence_size;
>   	u32 fence_alignment;
> +	u32 guard;
>   
>   	/**
>   	 * Count of the number of times this vma has been opened by different
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Refine VT-d scanout workaround
  2021-02-11 14:25 ` [Intel-gfx] [PATCH] " Matthew Auld
@ 2021-02-11 15:19   ` Chris Wilson
  2021-02-11 17:00     ` Matthew Auld
  0 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2021-02-11 15:19 UTC (permalink / raw)
  To: Matthew Auld, intel-gfx

Quoting Matthew Auld (2021-02-11 14:25:41)
> On 10/02/2021 23:39, Chris Wilson wrote:
> > VT-d may cause overfetch of the scanout PTE, both before and after the
> > vma (depending on the scanout orientation). bspec recommends that we
> > provide a tile-row in either directions, and suggests using 160 PTE,
> > warning that the accesses will wrap around the ends of the GGTT.
> > Currently, we fill the entire GGTT with scratch pages when using VT-d to
> > always ensure there are valid entries around every vma, including
> > scanout. However, writing every PTE is slow as on recent devices we
> > perform 8MiB of uncached writes, incurring an extra 100ms during resume.
> > 
> > If instead we focus on only putting guard pages around scanout, we can
> > avoid touching the whole GGTT. To avoid having to introduce extra nodes
> > around each scanout vma, we adjust the scanout drm_mm_node to be smaller
> > than the allocated space, and fixup the extra PTE during dma binding.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > Cc: Matthew Auld <matthew.auld@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gem/i915_gem_domain.c |  3 ++
> >   drivers/gpu/drm/i915/gt/intel_ggtt.c       | 37 ++++++++--------------
> >   drivers/gpu/drm/i915/i915_gem_gtt.h        |  1 +
> >   drivers/gpu/drm/i915/i915_vma.c            | 23 ++++++++++++++
> >   drivers/gpu/drm/i915/i915_vma_types.h      |  1 +
> >   5 files changed, 41 insertions(+), 24 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> > index 0478b069c202..9f2ccc255ca1 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> > @@ -345,6 +345,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
> >       if (ret)
> >               goto err;
> >   
> > +     if (intel_scanout_needs_vtd_wa(i915))
> > +             flags |= PIN_VTD;
> > +
> >       /*
> >        * As the user may map the buffer once pinned in the display plane
> >        * (e.g. libkms for the bootup splash), we have to ensure that we
> > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > index b0b8ded834f0..416f77f48561 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > @@ -238,6 +238,11 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
> >   
> >       gte = (gen8_pte_t __iomem *)ggtt->gsm;
> >       gte += vma->node.start / I915_GTT_PAGE_SIZE;
> > +
> > +     end = gte - vma->guard / I915_GTT_PAGE_SIZE;
> > +     while (end < gte)
> > +             gen8_set_pte(end++, vm->scratch[0]->encode);
> > +
> >       end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
> >   
> >       for_each_sgt_daddr(addr, iter, vma->pages)
> > @@ -245,6 +250,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
> >       GEM_BUG_ON(gte > end);
> >   
> >       /* Fill the allocated but "unused" space beyond the end of the buffer */
> > +     end += vma->guard / I915_GTT_PAGE_SIZE;
> >       while (gte < end)
> >               gen8_set_pte(gte++, vm->scratch[0]->encode);
> >   
> > @@ -289,6 +295,11 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
> >   
> >       gte = (gen6_pte_t __iomem *)ggtt->gsm;
> >       gte += vma->node.start / I915_GTT_PAGE_SIZE;
> > +
> > +     end = gte - vma->guard / I915_GTT_PAGE_SIZE;
> > +     while (end < gte)
> > +             gen8_set_pte(end++, vm->scratch[0]->encode);
> > +
> >       end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
> >   
> >       for_each_sgt_daddr(addr, iter, vma->pages)
> > @@ -296,6 +307,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
> >       GEM_BUG_ON(gte > end);
> >   
> >       /* Fill the allocated but "unused" space beyond the end of the buffer */
> > +     end += vma->guard / I915_GTT_PAGE_SIZE;
> >       while (gte < end)
> >               iowrite32(vm->scratch[0]->encode, gte++);
> >   
> > @@ -311,27 +323,6 @@ static void nop_clear_range(struct i915_address_space *vm,
> >   {
> >   }
> >   
> > -static void gen8_ggtt_clear_range(struct i915_address_space *vm,
> > -                               u64 start, u64 length)
> > -{
> > -     struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> > -     unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
> > -     unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
> > -     const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
> > -     gen8_pte_t __iomem *gtt_base =
> > -             (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
> > -     const int max_entries = ggtt_total_entries(ggtt) - first_entry;
> > -     int i;
> > -
> > -     if (WARN(num_entries > max_entries,
> > -              "First entry = %d; Num entries = %d (max=%d)\n",
> > -              first_entry, num_entries, max_entries))
> > -             num_entries = max_entries;
> > -
> > -     for (i = 0; i < num_entries; i++)
> > -             gen8_set_pte(&gtt_base[i], scratch_pte);
> > -}
> > -
> >   static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
> >   {
> >       /*
> > @@ -898,8 +889,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
> >       ggtt->vm.cleanup = gen6_gmch_remove;
> >       ggtt->vm.insert_page = gen8_ggtt_insert_page;
> >       ggtt->vm.clear_range = nop_clear_range;
> > -     if (intel_scanout_needs_vtd_wa(i915))
> > -             ggtt->vm.clear_range = gen8_ggtt_clear_range;
> >   
> >       ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
> >   
> > @@ -1045,7 +1034,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
> >       ggtt->vm.alloc_pt_dma = alloc_pt_dma;
> >   
> >       ggtt->vm.clear_range = nop_clear_range;
> > -     if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
> > +     if (!HAS_FULL_PPGTT(i915))
> >               ggtt->vm.clear_range = gen6_ggtt_clear_range;
> >       ggtt->vm.insert_page = gen6_ggtt_insert_page;
> >       ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > index c9b0ee5e1d23..8a2dfc7144cf 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > @@ -41,6 +41,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
> >   #define PIN_HIGH            BIT_ULL(5)
> >   #define PIN_OFFSET_BIAS             BIT_ULL(6)
> >   #define PIN_OFFSET_FIXED    BIT_ULL(7)
> > +#define PIN_VTD                      BIT_ULL(8)
> >   
> >   #define PIN_GLOBAL          BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */
> >   #define PIN_USER            BIT_ULL(11) /* I915_VMA_LOCAL_BIND */
> > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> > index caa9b041616b..dccd36ff1a6d 100644
> > --- a/drivers/gpu/drm/i915/i915_vma.c
> > +++ b/drivers/gpu/drm/i915/i915_vma.c
> > @@ -38,6 +38,8 @@
> >   #include "i915_trace.h"
> >   #include "i915_vma.h"
> >   
> > +#define VTD_GUARD roundup_pow_of_two(160 * SZ_4K) /* 160 PTE padding */
> > +
> >   static struct i915_global_vma {
> >       struct i915_global base;
> >       struct kmem_cache *slab_vmas;
> > @@ -552,6 +554,9 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
> >           vma->node.start != (flags & PIN_OFFSET_MASK))
> >               return true;
> >   
> > +     if (flags & PIN_VTD && vma->guard < VTD_GUARD)
> > +             return true;
> > +
> >       return false;
> >   }
> >   
> > @@ -637,6 +642,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> >                                 alignment, vma->fence_alignment);
> >       }
> >   
> > +     /* VT-d requires padding before/after the vma */
> > +     if (flags & PIN_VTD) {
> > +             alignment = max_t(typeof(alignment), alignment, VTD_GUARD);
> > +             vma->guard = alignment;
> > +             size += 2 * vma->guard;
> > +     }
> > +
> >       GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
> >       GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
> >       GEM_BUG_ON(!is_power_of_2(alignment));
> > @@ -725,6 +737,11 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> >   
> >       list_add_tail(&vma->vm_link, &vma->vm->bound_list);
> >   
> > +     if (flags & PIN_VTD) {
> > +             vma->node.start += vma->guard;
> > +             vma->node.size -= 2 * vma->guard;
> > +     }
> > +
> 
> So we adjust the node to be twice as large, reserve it in the mm, and 
> then shrink it again here. Seems a little scary to modify 
> node.start/node.size after insertion?

Definitely scary. So far it feels like the most viable approach. At
least the concept of including a red-zone around the vma/drm_mm_node as
part of that structure seems useful, and familiar.
 
> Just wondering if this might upset something like evict_for_node, where 
> we inspect the allocated nodes over some range, rather than holes, and 
> since we shrunk the node it might get confused into thinking the padding 
> is actually free space. Or maybe that doesn't really matter much for the 
> GGTT?

The stumbling block I had earlier then forgot about is the drm_mm_node's
computed hole_size uses the adjacent drm_mm_node.start. So as we've
adjusted the node.start, it will later on declare a bigger hole and
reuse part of our guard pages. That we will then clobber on resume.

(There's also consequences for the address interval tree, but that will
continue to work correctly as we won't move the node.start beyond the
allocated space.)

Hmm. Fortunately, we have wrapped all users of vma->node.start, at least
for ggtt, with i915_ggtt_offset(). So it is feasible to adjust the
offset by vma->guard there.

Let's try that.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Refine VT-d scanout workaround
  2021-02-10 23:39 [Intel-gfx] [PATCH] drm/i915: Refine VT-d scanout workaround Chris Wilson
                   ` (2 preceding siblings ...)
  2021-02-11 14:25 ` [Intel-gfx] [PATCH] " Matthew Auld
@ 2021-02-11 16:05 ` Ville Syrjälä
  2021-02-11 16:24   ` Chris Wilson
  3 siblings, 1 reply; 9+ messages in thread
From: Ville Syrjälä @ 2021-02-11 16:05 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Matthew Auld

On Wed, Feb 10, 2021 at 11:39:46PM +0000, Chris Wilson wrote:
> VT-d may cause overfetch of the scanout PTE, both before and after the
> vma (depending on the scanout orientation). bspec recommends that we
> provide a tile-row in either directions, and suggests using 160 PTE,
> warning that the accesses will wrap around the ends of the GGTT.
> Currently, we fill the entire GGTT with scratch pages when using VT-d to
> always ensure there are valid entries around every vma, including
> scanout. However, writing every PTE is slow as on recent devices we
> perform 8MiB of uncached writes, incurring an extra 100ms during resume.
> 
> If instead we focus on only putting guard pages around scanout, we can
> avoid touching the whole GGTT. To avoid having to introduce extra nodes
> around each scanout vma, we adjust the scanout drm_mm_node to be smaller
> than the allocated space, and fixup the extra PTE during dma binding.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Cc: Matthew Auld <matthew.auld@intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_domain.c |  3 ++
>  drivers/gpu/drm/i915/gt/intel_ggtt.c       | 37 ++++++++--------------
>  drivers/gpu/drm/i915/i915_gem_gtt.h        |  1 +
>  drivers/gpu/drm/i915/i915_vma.c            | 23 ++++++++++++++
>  drivers/gpu/drm/i915/i915_vma_types.h      |  1 +
>  5 files changed, 41 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> index 0478b069c202..9f2ccc255ca1 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> @@ -345,6 +345,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	if (ret)
>  		goto err;
>  
> +	if (intel_scanout_needs_vtd_wa(i915))
> +		flags |= PIN_VTD;
> +
>  	/*
>  	 * As the user may map the buffer once pinned in the display plane
>  	 * (e.g. libkms for the bootup splash), we have to ensure that we
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index b0b8ded834f0..416f77f48561 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -238,6 +238,11 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>  
>  	gte = (gen8_pte_t __iomem *)ggtt->gsm;
>  	gte += vma->node.start / I915_GTT_PAGE_SIZE;
> +
> +	end = gte - vma->guard / I915_GTT_PAGE_SIZE;
> +	while (end < gte)
> +		gen8_set_pte(end++, vm->scratch[0]->encode);
> +
>  	end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
>  
>  	for_each_sgt_daddr(addr, iter, vma->pages)
> @@ -245,6 +250,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>  	GEM_BUG_ON(gte > end);
>  
>  	/* Fill the allocated but "unused" space beyond the end of the buffer */
> +	end += vma->guard / I915_GTT_PAGE_SIZE;
>  	while (gte < end)
>  		gen8_set_pte(gte++, vm->scratch[0]->encode);
>  
> @@ -289,6 +295,11 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>  
>  	gte = (gen6_pte_t __iomem *)ggtt->gsm;
>  	gte += vma->node.start / I915_GTT_PAGE_SIZE;
> +
> +	end = gte - vma->guard / I915_GTT_PAGE_SIZE;
> +	while (end < gte)
> +		gen8_set_pte(end++, vm->scratch[0]->encode);
> +
>  	end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
>  
>  	for_each_sgt_daddr(addr, iter, vma->pages)
> @@ -296,6 +307,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>  	GEM_BUG_ON(gte > end);
>  
>  	/* Fill the allocated but "unused" space beyond the end of the buffer */
> +	end += vma->guard / I915_GTT_PAGE_SIZE;
>  	while (gte < end)
>  		iowrite32(vm->scratch[0]->encode, gte++);
>  
> @@ -311,27 +323,6 @@ static void nop_clear_range(struct i915_address_space *vm,
>  {
>  }
>  
> -static void gen8_ggtt_clear_range(struct i915_address_space *vm,
> -				  u64 start, u64 length)
> -{
> -	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> -	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
> -	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
> -	const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
> -	gen8_pte_t __iomem *gtt_base =
> -		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
> -	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
> -	int i;
> -
> -	if (WARN(num_entries > max_entries,
> -		 "First entry = %d; Num entries = %d (max=%d)\n",
> -		 first_entry, num_entries, max_entries))
> -		num_entries = max_entries;
> -
> -	for (i = 0; i < num_entries; i++)
> -		gen8_set_pte(&gtt_base[i], scratch_pte);
> -}
> -
>  static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
>  {
>  	/*
> @@ -898,8 +889,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>  	ggtt->vm.cleanup = gen6_gmch_remove;
>  	ggtt->vm.insert_page = gen8_ggtt_insert_page;
>  	ggtt->vm.clear_range = nop_clear_range;
> -	if (intel_scanout_needs_vtd_wa(i915))
> -		ggtt->vm.clear_range = gen8_ggtt_clear_range;
>  
>  	ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
>  
> @@ -1045,7 +1034,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
>  	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
>  
>  	ggtt->vm.clear_range = nop_clear_range;
> -	if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
> +	if (!HAS_FULL_PPGTT(i915))
>  		ggtt->vm.clear_range = gen6_ggtt_clear_range;
>  	ggtt->vm.insert_page = gen6_ggtt_insert_page;
>  	ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index c9b0ee5e1d23..8a2dfc7144cf 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -41,6 +41,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
>  #define PIN_HIGH		BIT_ULL(5)
>  #define PIN_OFFSET_BIAS		BIT_ULL(6)
>  #define PIN_OFFSET_FIXED	BIT_ULL(7)
> +#define PIN_VTD			BIT_ULL(8)
>  
>  #define PIN_GLOBAL		BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */
>  #define PIN_USER		BIT_ULL(11) /* I915_VMA_LOCAL_BIND */
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index caa9b041616b..dccd36ff1a6d 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -38,6 +38,8 @@
>  #include "i915_trace.h"
>  #include "i915_vma.h"
>  
> +#define VTD_GUARD roundup_pow_of_two(160 * SZ_4K) /* 160 PTE padding */
> +
>  static struct i915_global_vma {
>  	struct i915_global base;
>  	struct kmem_cache *slab_vmas;
> @@ -552,6 +554,9 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
>  	    vma->node.start != (flags & PIN_OFFSET_MASK))
>  		return true;
>  
> +	if (flags & PIN_VTD && vma->guard < VTD_GUARD)
> +		return true;
> +
>  	return false;
>  }
>  
> @@ -637,6 +642,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>  				  alignment, vma->fence_alignment);
>  	}
>  
> +	/* VT-d requires padding before/after the vma */
> +	if (flags & PIN_VTD) {
> +		alignment = max_t(typeof(alignment), alignment, VTD_GUARD);
> +		vma->guard = alignment;
> +		size += 2 * vma->guard;
> +	}
> +
>  	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
>  	GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
>  	GEM_BUG_ON(!is_power_of_2(alignment));
someh> @@ -725,6 +737,11 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>  
>  	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
>  
> +	if (flags & PIN_VTD) {
> +		vma->node.start += vma->guard;

Was a bit worried for a second that this might give the display
a potentially misaligned vma start. But looks like you did consider
all that: VTD_GUARD==POT, alignment + guard both get bumped
to the max(). So AFAICS should guarantee everyone is happy.

I guess we're now wasting a lot more ggtt address space though?
Not sure if anyone has ever been at risk of running out though.
And DPT should help with this on new platforms.

> +		vma->node.size -= 2 * vma->guard;
> +	}
> +
>  	return 0;
>  }
>  
> @@ -734,6 +751,12 @@ i915_vma_detach(struct i915_vma *vma)
>  	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>  	GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
>  
> +	if (vma->guard) {
> +		vma->node.start -= vma->guard;
> +		vma->node.size += 2 * vma->guard;
> +		vma->guard = 0;
> +	}
> +
>  	/*
>  	 * And finally now the object is completely decoupled from this
>  	 * vma, we can drop its hold on the backing storage and allow
> diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
> index f5cb848b7a7e..bafec4e0b042 100644
> --- a/drivers/gpu/drm/i915/i915_vma_types.h
> +++ b/drivers/gpu/drm/i915/i915_vma_types.h
> @@ -190,6 +190,7 @@ struct i915_vma {
>  
>  	u32 fence_size;
>  	u32 fence_alignment;
> +	u32 guard;
>  
>  	/**
>  	 * Count of the number of times this vma has been opened by different
> -- 
> 2.20.1

-- 
Ville Syrjälä
Intel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Refine VT-d scanout workaround
  2021-02-11 16:05 ` Ville Syrjälä
@ 2021-02-11 16:24   ` Chris Wilson
  0 siblings, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2021-02-11 16:24 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx, Matthew Auld

Quoting Ville Syrjälä (2021-02-11 16:05:59)
> On Wed, Feb 10, 2021 at 11:39:46PM +0000, Chris Wilson wrote:
> > @@ -637,6 +642,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> >                                 alignment, vma->fence_alignment);
> >       }
> >  
> > +     /* VT-d requires padding before/after the vma */
> > +     if (flags & PIN_VTD) {
> > +             alignment = max_t(typeof(alignment), alignment, VTD_GUARD);
> > +             vma->guard = alignment;
> > +             size += 2 * vma->guard;
> > +     }
> > +
> >       GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
> >       GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
> >       GEM_BUG_ON(!is_power_of_2(alignment));
> someh> @@ -725,6 +737,11 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> >  
> >       list_add_tail(&vma->vm_link, &vma->vm->bound_list);
> >  
> > +     if (flags & PIN_VTD) {
> > +             vma->node.start += vma->guard;
> 
> Was a bit worried for a second that this might give the display
> a potentially misaligned vma start. But looks like you did consider
> all that: VTD_GUARD==POT, alignment + guard both get bumped
> to the max(). So AFAICS should guarantee everyone is happy.
> 
> I guess we're now wasting a lot more ggtt address space though?
> Not sure if anyone has ever been at risk of running out though.
> And DPT should help with this on new platforms.

Definitely this is a considerable bloat to most scanout buffers, which
for the sake of argument lets say are 8MiB. Still enough room for a flip
chain within the mappable portion, and when we get to scanouts that are
large enough to consume the majority of the GGTT, the fixed 2MiB of
padding is lost in the noise.

So handwaving it shouldn't lead to noticeably more thrashing of the
GGTT for existing platforms. There's too much recycling and too little
reuse of scanouts in current display systems for my liking, so the extra
25% overhead in GGTT updates is more likely to be a concern. (Though it
does balance out in that we now skip the clear after use.)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Refine VT-d scanout workaround
  2021-02-11 15:19   ` Chris Wilson
@ 2021-02-11 17:00     ` Matthew Auld
  2021-02-11 17:17       ` Chris Wilson
  0 siblings, 1 reply; 9+ messages in thread
From: Matthew Auld @ 2021-02-11 17:00 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 11/02/2021 15:19, Chris Wilson wrote:
> Quoting Matthew Auld (2021-02-11 14:25:41)
>> On 10/02/2021 23:39, Chris Wilson wrote:
>>> VT-d may cause overfetch of the scanout PTE, both before and after the
>>> vma (depending on the scanout orientation). bspec recommends that we
>>> provide a tile-row in either directions, and suggests using 160 PTE,
>>> warning that the accesses will wrap around the ends of the GGTT.
>>> Currently, we fill the entire GGTT with scratch pages when using VT-d to
>>> always ensure there are valid entries around every vma, including
>>> scanout. However, writing every PTE is slow as on recent devices we
>>> perform 8MiB of uncached writes, incurring an extra 100ms during resume.
>>>
>>> If instead we focus on only putting guard pages around scanout, we can
>>> avoid touching the whole GGTT. To avoid having to introduce extra nodes
>>> around each scanout vma, we adjust the scanout drm_mm_node to be smaller
>>> than the allocated space, and fixup the extra PTE during dma binding.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
>>> Cc: Matthew Auld <matthew.auld@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/gem/i915_gem_domain.c |  3 ++
>>>    drivers/gpu/drm/i915/gt/intel_ggtt.c       | 37 ++++++++--------------
>>>    drivers/gpu/drm/i915/i915_gem_gtt.h        |  1 +
>>>    drivers/gpu/drm/i915/i915_vma.c            | 23 ++++++++++++++
>>>    drivers/gpu/drm/i915/i915_vma_types.h      |  1 +
>>>    5 files changed, 41 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>>> index 0478b069c202..9f2ccc255ca1 100644
>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>>> @@ -345,6 +345,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>>>        if (ret)
>>>                goto err;
>>>    
>>> +     if (intel_scanout_needs_vtd_wa(i915))
>>> +             flags |= PIN_VTD;
>>> +
>>>        /*
>>>         * As the user may map the buffer once pinned in the display plane
>>>         * (e.g. libkms for the bootup splash), we have to ensure that we
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>> index b0b8ded834f0..416f77f48561 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>> @@ -238,6 +238,11 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>>>    
>>>        gte = (gen8_pte_t __iomem *)ggtt->gsm;
>>>        gte += vma->node.start / I915_GTT_PAGE_SIZE;
>>> +
>>> +     end = gte - vma->guard / I915_GTT_PAGE_SIZE;
>>> +     while (end < gte)
>>> +             gen8_set_pte(end++, vm->scratch[0]->encode);
>>> +
>>>        end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
>>>    
>>>        for_each_sgt_daddr(addr, iter, vma->pages)
>>> @@ -245,6 +250,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>>>        GEM_BUG_ON(gte > end);
>>>    
>>>        /* Fill the allocated but "unused" space beyond the end of the buffer */
>>> +     end += vma->guard / I915_GTT_PAGE_SIZE;
>>>        while (gte < end)
>>>                gen8_set_pte(gte++, vm->scratch[0]->encode);
>>>    
>>> @@ -289,6 +295,11 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>>>    
>>>        gte = (gen6_pte_t __iomem *)ggtt->gsm;
>>>        gte += vma->node.start / I915_GTT_PAGE_SIZE;
>>> +
>>> +     end = gte - vma->guard / I915_GTT_PAGE_SIZE;
>>> +     while (end < gte)
>>> +             gen8_set_pte(end++, vm->scratch[0]->encode);
>>> +
>>>        end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
>>>    
>>>        for_each_sgt_daddr(addr, iter, vma->pages)
>>> @@ -296,6 +307,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>>>        GEM_BUG_ON(gte > end);
>>>    
>>>        /* Fill the allocated but "unused" space beyond the end of the buffer */
>>> +     end += vma->guard / I915_GTT_PAGE_SIZE;
>>>        while (gte < end)
>>>                iowrite32(vm->scratch[0]->encode, gte++);
>>>    
>>> @@ -311,27 +323,6 @@ static void nop_clear_range(struct i915_address_space *vm,
>>>    {
>>>    }
>>>    
>>> -static void gen8_ggtt_clear_range(struct i915_address_space *vm,
>>> -                               u64 start, u64 length)
>>> -{
>>> -     struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>>> -     unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
>>> -     unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
>>> -     const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
>>> -     gen8_pte_t __iomem *gtt_base =
>>> -             (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
>>> -     const int max_entries = ggtt_total_entries(ggtt) - first_entry;
>>> -     int i;
>>> -
>>> -     if (WARN(num_entries > max_entries,
>>> -              "First entry = %d; Num entries = %d (max=%d)\n",
>>> -              first_entry, num_entries, max_entries))
>>> -             num_entries = max_entries;
>>> -
>>> -     for (i = 0; i < num_entries; i++)
>>> -             gen8_set_pte(&gtt_base[i], scratch_pte);
>>> -}
>>> -
>>>    static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
>>>    {
>>>        /*
>>> @@ -898,8 +889,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>>>        ggtt->vm.cleanup = gen6_gmch_remove;
>>>        ggtt->vm.insert_page = gen8_ggtt_insert_page;
>>>        ggtt->vm.clear_range = nop_clear_range;
>>> -     if (intel_scanout_needs_vtd_wa(i915))
>>> -             ggtt->vm.clear_range = gen8_ggtt_clear_range;
>>>    
>>>        ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
>>>    
>>> @@ -1045,7 +1034,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
>>>        ggtt->vm.alloc_pt_dma = alloc_pt_dma;
>>>    
>>>        ggtt->vm.clear_range = nop_clear_range;
>>> -     if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
>>> +     if (!HAS_FULL_PPGTT(i915))
>>>                ggtt->vm.clear_range = gen6_ggtt_clear_range;
>>>        ggtt->vm.insert_page = gen6_ggtt_insert_page;
>>>        ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
>>> index c9b0ee5e1d23..8a2dfc7144cf 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
>>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
>>> @@ -41,6 +41,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
>>>    #define PIN_HIGH            BIT_ULL(5)
>>>    #define PIN_OFFSET_BIAS             BIT_ULL(6)
>>>    #define PIN_OFFSET_FIXED    BIT_ULL(7)
>>> +#define PIN_VTD                      BIT_ULL(8)
>>>    
>>>    #define PIN_GLOBAL          BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */
>>>    #define PIN_USER            BIT_ULL(11) /* I915_VMA_LOCAL_BIND */
>>> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
>>> index caa9b041616b..dccd36ff1a6d 100644
>>> --- a/drivers/gpu/drm/i915/i915_vma.c
>>> +++ b/drivers/gpu/drm/i915/i915_vma.c
>>> @@ -38,6 +38,8 @@
>>>    #include "i915_trace.h"
>>>    #include "i915_vma.h"
>>>    
>>> +#define VTD_GUARD roundup_pow_of_two(160 * SZ_4K) /* 160 PTE padding */
>>> +
>>>    static struct i915_global_vma {
>>>        struct i915_global base;
>>>        struct kmem_cache *slab_vmas;
>>> @@ -552,6 +554,9 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
>>>            vma->node.start != (flags & PIN_OFFSET_MASK))
>>>                return true;
>>>    
>>> +     if (flags & PIN_VTD && vma->guard < VTD_GUARD)
>>> +             return true;
>>> +
>>>        return false;
>>>    }
>>>    
>>> @@ -637,6 +642,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>>>                                  alignment, vma->fence_alignment);
>>>        }
>>>    
>>> +     /* VT-d requires padding before/after the vma */
>>> +     if (flags & PIN_VTD) {
>>> +             alignment = max_t(typeof(alignment), alignment, VTD_GUARD);
>>> +             vma->guard = alignment;
>>> +             size += 2 * vma->guard;
>>> +     }
>>> +
>>>        GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
>>>        GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
>>>        GEM_BUG_ON(!is_power_of_2(alignment));
>>> @@ -725,6 +737,11 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>>>    
>>>        list_add_tail(&vma->vm_link, &vma->vm->bound_list);
>>>    
>>> +     if (flags & PIN_VTD) {
>>> +             vma->node.start += vma->guard;
>>> +             vma->node.size -= 2 * vma->guard;
>>> +     }
>>> +
>>
>> So we adjust the node to be twice as large, reserve it in the mm, and
>> then shrink it again here. Seems a little scary to modify
>> node.start/node.size after insertion?
> 
> Definitely scary. So far it feels like the most viable approach. At
> least the concept of including a red-zone around the vma/drm_mm_node as
> part of that structure seems useful, and familiar.

Throwing some color_adjust at it might be another option to consider. 
Maybe something like:

+static void i915_ggtt_color_adjust_vdt(const struct drm_mm_node *node,
+                                      unsigned long color,
+                                      u64 *start,
+                                      u64 *end)
+{
+       if (color == COLOR_VDT) {
+               *start += VDT_PADDING;
+               *end -= VDT_PADDING;
+               return;
+       }
+
+       if (node->allocated && node->color == COLOR_VDT)
+               *start += VDT_PADDING;
+
+       node = list_next_entry(node, node_list);
+       if (node->allocated && node->color == COLOR_VDT)
+               *end -= VDT_PADDING;
+}

But not sure if I would call that simpler. Plus we need to add more 
special casing in the eviction code. And I guess the cache coloring 
might also be active here, which might be nasty. Also we end up with a 
bunch of holes in the address space that are unusable, yet the mm search 
will keep hitting them anyway. Ok, I guess that's what you meant with 
not introducing "extra nodes". Hmmm.

>   
>> Just wondering if this might upset something like evict_for_node, where
>> we inspect the allocated nodes over some range, rather than holes, and
>> since we shrunk the node it might get confused into thinking the padding
>> is actually free space. Or maybe that doesn't really matter much for the
>> GGTT?
> 
> The stumbling block I had earlier then forgot about is the drm_mm_node's
> computed hole_size uses the adjacent drm_mm_node.start. So as we've
> adjusted the node.start, it will later on declare a bigger hole and
> reuse part of our guard pages. That we will then clobber on resume.
> 
> (There's also consequences for the address interval tree, but that will
> continue to work correctly as we won't move the node.start beyond the
> allocated space.)
> 
> Hmm. Fortunately, we have wrapped all users of vma->node.start, at least
> for ggtt, with i915_ggtt_offset(). So it is feasible to adjust the
> offset by vma->guard there.
> 
> Let's try that.
> -Chris
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Refine VT-d scanout workaround
  2021-02-11 17:00     ` Matthew Auld
@ 2021-02-11 17:17       ` Chris Wilson
  0 siblings, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2021-02-11 17:17 UTC (permalink / raw)
  To: Matthew Auld, intel-gfx

Quoting Matthew Auld (2021-02-11 17:00:20)
> Throwing some color_adjust at it might be another option to consider. 
> Maybe something like:
> 
> +static void i915_ggtt_color_adjust_vdt(const struct drm_mm_node *node,
> +                                      unsigned long color,
> +                                      u64 *start,
> +                                      u64 *end)
> +{
> +       if (color == COLOR_VDT) {
> +               *start += VDT_PADDING;
> +               *end -= VDT_PADDING;
> +               return;
> +       }
> +
> +       if (node->allocated && node->color == COLOR_VDT)
> +               *start += VDT_PADDING;
> +
> +       node = list_next_entry(node, node_list);
> +       if (node->allocated && node->color == COLOR_VDT)
> +               *end -= VDT_PADDING;
> +}
> 
> But not sure if I would call that simpler. Plus we need to add more 
> special casing in the eviction code. And I guess the cache coloring 
> might also be active here, which might be nasty. Also we end up with a 
> bunch of holes in the address space that are unusable, yet the mm search 
> will keep hitting them anyway. Ok, I guess that's what you meant with 
> not introducing "extra nodes". Hmmm.

I considered trying to use coloring, but the problem I found was in
knowing whether or not to fill outside of the vma with scratch pages. We
would have to lookup each PTE to check it is not in use, then fill with
scratch. And if we removed a neighbour, it would have to check to see if
it should replace the guard pages. (And it's more complicated by the
wrap-around of the VT-d, an object at beginning of the GGTT would
overfetch into the pages at the other end of the GGTT.)

I felt that make an explicit reservation and accounting the VT-d
overfetch to the scanout vma would save a lot of hassle. The PTE are
accounted for (will not be reused, and safe to clear after resume),
dedicated as scratch for the overfetch and the overfetch cannot wrap
around as we force the vma to be away from the edges.

Packing the information into the single scanout i915_vma so that we do a
single padded i915_vma_insert seemed to be much easier to manage than
having to do additional i915_vma_inserts on either side (and so we have
to somehow manage searching for enough space for all 3 in the first call
etc). Plus i915_vma is already massive :|
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2021-02-11 17:17 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-10 23:39 [Intel-gfx] [PATCH] drm/i915: Refine VT-d scanout workaround Chris Wilson
2021-02-11  0:28 ` [Intel-gfx] ✓ Fi.CI.BAT: success for " Patchwork
2021-02-11 13:16 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
2021-02-11 14:25 ` [Intel-gfx] [PATCH] " Matthew Auld
2021-02-11 15:19   ` Chris Wilson
2021-02-11 17:00     ` Matthew Auld
2021-02-11 17:17       ` Chris Wilson
2021-02-11 16:05 ` Ville Syrjälä
2021-02-11 16:24   ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.