All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH 1/3] drm/i915: Wrap all access to i915_vma.node.start|size
@ 2021-02-12 10:22 Chris Wilson
  2021-02-12 10:22 ` [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma Chris Wilson
                   ` (4 more replies)
  0 siblings, 5 replies; 17+ messages in thread
From: Chris Wilson @ 2021-02-12 10:22 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

We already wrap i915_vma.node.start for use with the GGTT, as there we
can perform additional sanity checks that the node belongs to the GGTT
and fits within the 32b registers. In the next couple of patches, we
will introduce guard pages around the objects _inside_ the drm_mm_node
allocation. That is we will offset the vma->pages so that the first page
is at drm_mm_node.start + vma->guard (not 0 as is currently the case).
All users must then not use i915_vma.node.start directly, but compute
the guard offset, thus all users are converted to use a
i915_vma_offset() wrapper.

The notable exceptions are the selftests that are testing exact
behaviour of i915_vma_pin/i915_vma_insert.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/display/intel_fbdev.c    |  6 ++--
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 29 ++++++++++---------
 drivers/gpu/drm/i915/gem/i915_gem_mman.c      |  2 +-
 .../gpu/drm/i915/gem/i915_gem_object_blt.c    |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_tiling.c    |  4 +--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../i915/gem/selftests/i915_gem_client_blt.c  | 11 +++----
 .../drm/i915/gem/selftests/i915_gem_context.c | 16 ++++++----
 .../drm/i915/gem/selftests/i915_gem_mman.c    |  2 +-
 .../drm/i915/gem/selftests/igt_gem_utils.c    |  4 +--
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c          |  2 +-
 drivers/gpu/drm/i915/gt/gen7_renderclear.c    |  2 +-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c          |  8 ++---
 drivers/gpu/drm/i915/gt/intel_ggtt.c          |  5 ++--
 drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c         |  7 +++--
 drivers/gpu/drm/i915/gt/intel_renderstate.c   |  2 +-
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_execlists.c  |  8 ++---
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  | 14 +++++----
 .../drm/i915/gt/selftest_ring_submission.c    |  2 +-
 drivers/gpu/drm/i915/gt/selftest_rps.c        | 12 ++++----
 drivers/gpu/drm/i915/i915_debugfs.c           |  3 +-
 drivers/gpu/drm/i915/i915_gpu_error.c         |  4 +--
 drivers/gpu/drm/i915/i915_perf.c              |  2 +-
 drivers/gpu/drm/i915/i915_vma.c               | 21 +++++++-------
 drivers/gpu/drm/i915/i915_vma.h               | 18 ++++++++++--
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 12 ++++++--
 drivers/gpu/drm/i915/selftests/i915_request.c |  8 ++---
 .../gpu/drm/i915/selftests/i915_scheduler.c   |  4 +--
 drivers/gpu/drm/i915/selftests/igt_spinner.c  |  9 ++++--
 32 files changed, 134 insertions(+), 94 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index 07db8e83f98e..2a72fb3c8416 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -237,8 +237,8 @@ static int intelfb_create(struct drm_fb_helper *helper,
 
 	/* Our framebuffer is the entirety of fbdev's system memory */
 	info->fix.smem_start =
-		(unsigned long)(ggtt->gmadr.start + vma->node.start);
-	info->fix.smem_len = vma->node.size;
+		(unsigned long)(ggtt->gmadr.start + i915_ggtt_offset(vma));
+	info->fix.smem_len = vma->size;
 
 	vaddr = i915_vma_pin_iomap(vma);
 	if (IS_ERR(vaddr)) {
@@ -248,7 +248,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 		goto out_unpin;
 	}
 	info->screen_base = vaddr;
-	info->screen_size = vma->node.size;
+	info->screen_size = vma->size;
 
 	drm_fb_helper_fill_info(info, &ifbdev->helper, sizes);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index fe170186dd42..f75f6751cf3f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -368,22 +368,23 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
 		 const struct i915_vma *vma,
 		 unsigned int flags)
 {
-	if (vma->node.size < entry->pad_to_size)
+	const u64 start = i915_vma_offset(vma);
+	const u64 size = i915_vma_size(vma);
+
+	if (size < entry->pad_to_size)
 		return true;
 
-	if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
+	if (entry->alignment && !IS_ALIGNED(start, entry->alignment))
 		return true;
 
-	if (flags & EXEC_OBJECT_PINNED &&
-	    vma->node.start != entry->offset)
+	if (flags & EXEC_OBJECT_PINNED && start != entry->offset)
 		return true;
 
-	if (flags & __EXEC_OBJECT_NEEDS_BIAS &&
-	    vma->node.start < BATCH_OFFSET_BIAS)
+	if (flags & __EXEC_OBJECT_NEEDS_BIAS && start < BATCH_OFFSET_BIAS)
 		return true;
 
 	if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
-	    (vma->node.start + vma->node.size + 4095) >> 32)
+	    (start + size + 4095) >> 32)
 		return true;
 
 	if (flags & __EXEC_OBJECT_NEEDS_MAP &&
@@ -607,8 +608,8 @@ static int eb_reserve_vma(struct i915_execbuffer *eb,
 	if (err)
 		return err;
 
-	if (entry->offset != vma->node.start) {
-		entry->offset = vma->node.start | UPDATE;
+	if (entry->offset != i915_vma_offset(vma)) {
+		entry->offset = i915_vma_offset(vma) | UPDATE;
 		eb->args->flags |= __EXEC_HAS_RELOC;
 	}
 
@@ -899,8 +900,8 @@ static int eb_validate_vmas(struct i915_execbuffer *eb)
 			return err;
 
 		if (eb_pin_vma(eb, entry, ev)) {
-			if (entry->offset != vma->node.start) {
-				entry->offset = vma->node.start | UPDATE;
+			if (entry->offset != i915_vma_offset(vma)) {
+				entry->offset = i915_vma_offset(vma) | UPDATE;
 				eb->args->flags |= __EXEC_HAS_RELOC;
 			}
 		} else {
@@ -1173,7 +1174,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 			if (err) /* no inactive aperture space, use cpu reloc */
 				return NULL;
 		} else {
-			cache->node.start = vma->node.start;
+			cache->node.start = i915_ggtt_offset(vma);
 			cache->node.mm = (void *)vma;
 		}
 	}
@@ -1458,7 +1459,7 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb,
 	else if (IS_ERR(batch))
 		return false;
 
-	addr = gen8_canonical_addr(vma->node.start + offset);
+	addr = gen8_canonical_addr(i915_vma_offset(vma) + offset);
 	if (gen >= 8) {
 		if (offset & 7) {
 			*batch++ = MI_STORE_DWORD_IMM_GEN4;
@@ -1622,7 +1623,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 	 * more work needs to be done.
 	 */
 	if (!DBG_FORCE_RELOC &&
-	    gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset)
+	    gen8_canonical_addr(i915_vma_offset(target->vma)) == reloc->presumed_offset)
 		return 0;
 
 	/* Check that the relocation address is valid... */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index ec28a6cde49b..02cb84285dbb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -365,7 +365,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
 	/* Finally, remap it using the new GTT offset */
 	ret = remap_io_mapping(area,
 			       area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
-			       (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
+			       (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT,
 			       min_t(u64, vma->size, area->vm_end - area->vm_start),
 			       &ggtt->iomap);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
index d6dac21fce0b..c6222a99d89b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
@@ -62,7 +62,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
 	}
 
 	rem = vma->size;
-	offset = vma->node.start;
+	offset = i915_vma_offset(vma);
 
 	do {
 		u32 size = min_t(u64, rem, block_size);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index b64a0788381f..7a1a90cd8bcd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -376,7 +376,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 	mutex_lock(&i915->ggtt.vm.mutex);
 	list_for_each_entry_safe(vma, next,
 				 &i915->ggtt.vm.bound_list, vm_link) {
-		unsigned long count = vma->node.size >> PAGE_SHIFT;
+		unsigned long count = i915_vma_size(vma) >> PAGE_SHIFT;
 
 		if (!vma->iomap || i915_vma_is_active(vma))
 			continue;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index d589d3d81085..b92bcb04f563 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -166,11 +166,11 @@ static bool i915_vma_fence_prepare(struct i915_vma *vma,
 		return true;
 
 	size = i915_gem_fence_size(i915, vma->size, tiling_mode, stride);
-	if (vma->node.size < size)
+	if (i915_vma_size(vma) < size)
 		return false;
 
 	alignment = i915_gem_fence_alignment(i915, vma->size, tiling_mode, stride);
-	if (!IS_ALIGNED(vma->node.start, alignment))
+	if (!IS_ALIGNED(i915_ggtt_offset(vma), alignment))
 		return false;
 
 	return true;
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 10ee24b252dd..4b50af149cfc 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -377,7 +377,7 @@ static int igt_check_page_sizes(struct i915_vma *vma)
 	 * Maintaining alignment is required to utilise huge pages in the ppGGT.
 	 */
 	if (i915_gem_object_is_lmem(obj) &&
-	    IS_ALIGNED(vma->node.start, SZ_2M) &&
+	    IS_ALIGNED(i915_vma_offset(vma), SZ_2M) &&
 	    vma->page_sizes.sg & SZ_2M &&
 	    vma->page_sizes.gtt < SZ_2M) {
 		pr_err("gtt pages mismatch for LMEM, expected 2M GTT pages, sg(%u), gtt(%u)\n",
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index 175581724d44..586b57b8d957 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -212,14 +212,14 @@ static int prepare_blit(const struct tiled_blits *t,
 	*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | dst_pitch;
 	*cs++ = 0;
 	*cs++ = t->height << 16 | t->width;
-	*cs++ = lower_32_bits(dst->vma->node.start);
+	*cs++ = lower_32_bits(i915_vma_offset(dst->vma));
 	if (use_64b_reloc)
-		*cs++ = upper_32_bits(dst->vma->node.start);
+		*cs++ = upper_32_bits(i915_vma_offset(dst->vma));
 	*cs++ = 0;
 	*cs++ = src_pitch;
-	*cs++ = lower_32_bits(src->vma->node.start);
+	*cs++ = lower_32_bits(i915_vma_offset(src->vma));
 	if (use_64b_reloc)
-		*cs++ = upper_32_bits(src->vma->node.start);
+		*cs++ = upper_32_bits(i915_vma_offset(src->vma));
 
 	*cs++ = MI_BATCH_BUFFER_END;
 
@@ -435,7 +435,7 @@ static int pin_buffer(struct i915_vma *vma, u64 addr)
 {
 	int err;
 
-	if (drm_mm_node_allocated(&vma->node) && vma->node.start != addr) {
+	if (drm_mm_node_allocated(&vma->node) && i915_vma_offset(vma) != addr) {
 		err = i915_vma_unbind(vma);
 		if (err)
 			return err;
@@ -445,6 +445,7 @@ static int pin_buffer(struct i915_vma *vma, u64 addr)
 	if (err)
 		return err;
 
+	GEM_BUG_ON(i915_vma_offset(vma) != addr);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index df949320f2b5..39fc548d927a 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -905,8 +905,8 @@ static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *v
 
 	*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
 	*cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
-	*cmd++ = lower_32_bits(vma->node.start);
-	*cmd++ = upper_32_bits(vma->node.start);
+	*cmd++ = lower_32_bits(i915_vma_offset(vma));
+	*cmd++ = upper_32_bits(i915_vma_offset(vma));
 	*cmd = MI_BATCH_BUFFER_END;
 
 	__i915_gem_object_flush_map(rpcs, 0, 64);
@@ -1561,7 +1561,10 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 			goto skip_request;
 	}
 
-	err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
+	err = engine->emit_bb_start(rq,
+				    i915_vma_offset(vma),
+				    i915_vma_size(vma),
+				    0);
 	if (err)
 		goto skip_request;
 
@@ -1668,7 +1671,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 		*cmd++ = offset;
 		*cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
 		*cmd++ = reg;
-		*cmd++ = vma->node.start + result;
+		*cmd++ = i915_vma_offset(vma) + result;
 		*cmd = MI_BATCH_BUFFER_END;
 
 		i915_gem_object_flush_map(obj);
@@ -1699,7 +1702,10 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 			goto skip_request;
 	}
 
-	err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags);
+	err = engine->emit_bb_start(rq,
+				    i915_vma_offset(vma),
+				    i915_vma_size(vma),
+				    flags);
 	if (err)
 		goto skip_request;
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 39293d98f34d..d37706807703 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -1145,7 +1145,7 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
 		if (err == 0)
 			err = i915_vma_move_to_active(vma, rq, 0);
 
-		err = engine->emit_bb_start(rq, vma->node.start, 0, 0);
+		err = engine->emit_bb_start(rq, i915_vma_offset(vma), 0, 0);
 		i915_request_get(rq);
 		i915_request_add(rq);
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
index b7e064667d39..e967e0fb79c5 100644
--- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
@@ -62,8 +62,8 @@ igt_emit_store_dw(struct i915_vma *vma,
 		goto err;
 	}
 
-	GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size);
-	offset += vma->node.start;
+	GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > i915_vma_size(vma));
+	offset += i915_vma_offset(vma);
 
 	for (n = 0; n < count; n++) {
 		if (gen >= 8) {
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index e08dff376339..d16cfa75c03a 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -111,7 +111,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 {
 	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_directory * const pd = ppgtt->pd;
-	unsigned int first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
+	unsigned int first_entry = i915_vma_offset(vma) / I915_GTT_PAGE_SIZE;
 	unsigned int act_pt = first_entry / GEN6_PTES;
 	unsigned int act_pte = first_entry % GEN6_PTES;
 	const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
index de575fdb033f..52e6f08d0ced 100644
--- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c
+++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
@@ -105,7 +105,7 @@ static u32 batch_offset(const struct batch_chunk *bc, u32 *cs)
 
 static u32 batch_addr(const struct batch_chunk *bc)
 {
-	return bc->vma->node.start;
+	return i915_vma_offset(bc->vma);
 }
 
 static void batch_add(struct batch_chunk *bc, const u32 d)
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 176c19633412..9d88d81c30d2 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -419,7 +419,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
 {
 	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
 	unsigned int rem = sg_dma_len(iter->sg);
-	u64 start = vma->node.start;
+	u64 start = i915_vma_offset(vma);
 
 	GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm));
 
@@ -502,8 +502,8 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
 		if (maybe_64K != -1 &&
 		    (index == I915_PDES ||
 		     (i915_vm_has_scratch_64K(vma->vm) &&
-		      !iter->sg && IS_ALIGNED(vma->node.start +
-					      vma->node.size,
+		      !iter->sg && IS_ALIGNED(i915_vma_offset(vma) +
+					      i915_vma_size(vma),
 					      I915_GTT_PAGE_SIZE_2M)))) {
 			vaddr = kmap_atomic_px(pd);
 			vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
@@ -547,7 +547,7 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm,
 	if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
 		gen8_ppgtt_insert_huge(vma, &iter, cache_level, flags);
 	} else  {
-		u64 idx = vma->node.start >> GEN8_PTE_SHIFT;
+		u64 idx = i915_vma_offset(vma) >> GEN8_PTE_SHIFT;
 
 		do {
 			struct i915_page_directory * const pdp =
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index b0b8ded834f0..c5803c434d33 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -440,7 +440,8 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm,
 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
 
-	intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
+	intel_gtt_insert_sg_entries(vma->pages,
+				    i915_ggtt_offset(vma) >> PAGE_SHIFT,
 				    flags);
 }
 
@@ -631,7 +632,7 @@ static void aliasing_gtt_unbind_vma(struct i915_address_space *vm,
 				    struct i915_vma *vma)
 {
 	if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
-		vm->clear_range(vm, vma->node.start, vma->size);
+		vm->clear_range(vm, i915_ggtt_offset(vma), vma->size);
 
 	if (i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND))
 		ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma);
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
index e891552611d5..87160e3fed37 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
@@ -215,7 +215,8 @@ static int fence_update(struct i915_fence_reg *fence,
 				return ret;
 		}
 
-		fence->start = vma->node.start;
+		GEM_BUG_ON(vma->fence_size > i915_vma_size(vma));
+		fence->start = i915_ggtt_offset(vma);
 		fence->size = vma->fence_size;
 		fence->stride = i915_gem_object_get_stride(vma->obj);
 		fence->tiling = i915_gem_object_get_tiling(vma->obj);
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index a91955af50a6..0d11894aa848 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -186,7 +186,10 @@ void ppgtt_bind_vma(struct i915_address_space *vm,
 	u32 pte_flags;
 
 	if (!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
-		vm->allocate_va_range(vm, stash, vma->node.start, vma->size);
+		GEM_BUG_ON(vma->size > i915_vma_size(vma));
+		vm->allocate_va_range(vm, stash,
+				      i915_vma_offset(vma),
+				      vma->size);
 		set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
 	}
 
@@ -204,7 +207,7 @@ void ppgtt_bind_vma(struct i915_address_space *vm,
 void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
 {
 	if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)))
-		vm->clear_range(vm, vma->node.start, vma->size);
+		vm->clear_range(vm, i915_vma_offset(vma), vma->size);
 }
 
 static unsigned long pd_count(u64 size, int shift)
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index 0f7c0a148b80..214a154665b5 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -61,7 +61,7 @@ static int render_state_setup(struct intel_renderstate *so,
 		u32 s = rodata->batch[i];
 
 		if (i * 4  == rodata->reloc[reloc_index]) {
-			u64 r = s + so->vma->node.start;
+			u64 r = s + i915_vma_offset(so->vma);
 
 			s = lower_32_bits(r);
 			if (HAS_64BIT_RELOC(i915)) {
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 282089d64789..2c75e3fa6350 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -824,7 +824,7 @@ static int clear_residuals(struct i915_request *rq)
 	}
 
 	ret = engine->emit_bb_start(rq,
-				    engine->wa_ctx.vma->node.start, 0,
+				    i915_vma_offset(engine->wa_ctx.vma), 0,
 				    0);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index f625c29023ea..5c8a6ccdf024 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -2704,8 +2704,8 @@ static int create_gang(struct intel_engine_cs *engine,
 		MI_SEMAPHORE_POLL |
 		MI_SEMAPHORE_SAD_EQ_SDD;
 	*cs++ = 0;
-	*cs++ = lower_32_bits(vma->node.start);
-	*cs++ = upper_32_bits(vma->node.start);
+	*cs++ = lower_32_bits(i915_vma_offset(vma));
+	*cs++ = upper_32_bits(i915_vma_offset(vma));
 
 	if (*prev) {
 		u64 offset = (*prev)->batch->node.start;
@@ -2736,7 +2736,7 @@ static int create_gang(struct intel_engine_cs *engine,
 		err = i915_vma_move_to_active(vma, rq, 0);
 	if (!err)
 		err = rq->engine->emit_bb_start(rq,
-						vma->node.start,
+						i915_vma_offset(vma),
 						PAGE_SIZE, 0);
 	i915_vma_unlock(vma);
 	i915_request_add(rq);
@@ -3486,7 +3486,7 @@ static int smoke_submit(struct preempt_smoke *smoke,
 			err = i915_vma_move_to_active(vma, rq, 0);
 		if (!err)
 			err = rq->engine->emit_bb_start(rq,
-							vma->node.start,
+							i915_vma_offset(vma),
 							PAGE_SIZE, 0);
 		i915_vma_unlock(vma);
 	}
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index cdb0ceff3be1..8540011c39cc 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -192,8 +192,8 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
 
 		*batch++ = MI_NOOP;
 		*batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
-		*batch++ = lower_32_bits(vma->node.start);
-		*batch++ = upper_32_bits(vma->node.start);
+		*batch++ = lower_32_bits(i915_vma_offset(vma));
+		*batch++ = upper_32_bits(i915_vma_offset(vma));
 	} else if (INTEL_GEN(gt->i915) >= 6) {
 		*batch++ = MI_STORE_DWORD_IMM_GEN4;
 		*batch++ = 0;
@@ -206,7 +206,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
 
 		*batch++ = MI_NOOP;
 		*batch++ = MI_BATCH_BUFFER_START | 1 << 8;
-		*batch++ = lower_32_bits(vma->node.start);
+		*batch++ = lower_32_bits(i915_vma_offset(vma));
 	} else if (INTEL_GEN(gt->i915) >= 4) {
 		*batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 		*batch++ = 0;
@@ -219,7 +219,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
 
 		*batch++ = MI_NOOP;
 		*batch++ = MI_BATCH_BUFFER_START | 2 << 6;
-		*batch++ = lower_32_bits(vma->node.start);
+		*batch++ = lower_32_bits(i915_vma_offset(vma));
 	} else {
 		*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
 		*batch++ = lower_32_bits(hws_address(hws, rq));
@@ -231,7 +231,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
 
 		*batch++ = MI_NOOP;
 		*batch++ = MI_BATCH_BUFFER_START | 2 << 6;
-		*batch++ = lower_32_bits(vma->node.start);
+		*batch++ = lower_32_bits(i915_vma_offset(vma));
 	}
 	*batch++ = MI_BATCH_BUFFER_END; /* not reached */
 	intel_gt_chipset_flush(engine->gt);
@@ -246,7 +246,9 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
 	if (INTEL_GEN(gt->i915) <= 5)
 		flags |= I915_DISPATCH_SECURE;
 
-	err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
+	err = rq->engine->emit_bb_start(rq,
+					i915_vma_offset(vma),
+					PAGE_SIZE, flags);
 
 cancel_rq:
 	if (err) {
diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
index 6cd9f6bc240c..92d0dc0ab68b 100644
--- a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
@@ -50,7 +50,7 @@ static struct i915_vma *create_wally(struct intel_engine_cs *engine)
 	} else {
 		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
 	}
-	*cs++ = vma->node.start + 4000;
+	*cs++ = i915_vma_offset(vma) + 4000;
 	*cs++ = STACK_MAGIC;
 
 	*cs++ = MI_BATCH_BUFFER_END;
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index 967641fee42a..1f58db52670c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -119,14 +119,14 @@ create_spin_counter(struct intel_engine_cs *engine,
 		if (srm) {
 			*cs++ = MI_STORE_REGISTER_MEM_GEN8;
 			*cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
-			*cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
-			*cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
+			*cs++ = lower_32_bits(i915_vma_offset(vma) + end * sizeof(*cs));
+			*cs++ = upper_32_bits(i915_vma_offset(vma) + end * sizeof(*cs));
 		}
 	}
 
 	*cs++ = MI_BATCH_BUFFER_START_GEN8;
-	*cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
-	*cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
+	*cs++ = lower_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs));
+	*cs++ = upper_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs));
 	GEM_BUG_ON(cs - base > end);
 
 	i915_gem_object_flush_map(obj);
@@ -655,7 +655,7 @@ int live_rps_frequency_cs(void *arg)
 			err = i915_vma_move_to_active(vma, rq, 0);
 		if (!err)
 			err = rq->engine->emit_bb_start(rq,
-							vma->node.start,
+							i915_vma_offset(vma),
 							PAGE_SIZE, 0);
 		i915_request_add(rq);
 		if (err)
@@ -796,7 +796,7 @@ int live_rps_frequency_srm(void *arg)
 			err = i915_vma_move_to_active(vma, rq, 0);
 		if (!err)
 			err = rq->engine->emit_bb_start(rq,
-							vma->node.start,
+							i915_vma_offset(vma),
 							PAGE_SIZE, 0);
 		i915_request_add(rq);
 		if (err)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 51133b8fabb4..a65a99fa5331 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -158,7 +158,8 @@ i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 
 		seq_printf(m, " (%sgtt offset: %08llx, size: %08llx, pages: %s",
 			   i915_vma_is_ggtt(vma) ? "g" : "pp",
-			   vma->node.start, vma->node.size,
+			   i915_vma_offset(vma),
+			   i915_vma_size(vma),
 			   stringify_page_sizes(vma->page_sizes.gtt, NULL, 0));
 		if (i915_vma_is_ggtt(vma)) {
 			switch (vma->ggtt_view.type) {
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 291f5b818925..c7abe4ee61be 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1011,8 +1011,8 @@ i915_vma_coredump_create(const struct intel_gt *gt,
 	strcpy(dst->name, name);
 	dst->next = NULL;
 
-	dst->gtt_offset = vma->node.start;
-	dst->gtt_size = vma->node.size;
+	dst->gtt_offset = i915_vma_offset(vma);
+	dst->gtt_size = i915_vma_size(vma);
 	dst->gtt_page_sizes = vma->page_sizes.gtt;
 	dst->num_pages = num_pages;
 	dst->page_count = 0;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 93f3e2c5c89a..3e1421755aa6 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1985,7 +1985,7 @@ emit_oa_config(struct i915_perf_stream *stream,
 		goto err_add_request;
 
 	err = rq->engine->emit_bb_start(rq,
-					vma->node.start, 0,
+					i915_vma_offset(vma), 0,
 					I915_DISPATCH_SECURE);
 	if (err)
 		goto err_add_request;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index caa9b041616b..17fe455bd770 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -382,7 +382,7 @@ int i915_vma_bind(struct i915_vma *vma,
 	u32 vma_flags;
 
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-	GEM_BUG_ON(vma->size > vma->node.size);
+	GEM_BUG_ON(vma->size > i915_vma_size(vma));
 
 	if (GEM_DEBUG_WARN_ON(range_overflows(vma->node.start,
 					      vma->node.size,
@@ -459,8 +459,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 	ptr = READ_ONCE(vma->iomap);
 	if (ptr == NULL) {
 		ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap,
-					vma->node.start,
-					vma->node.size);
+					i915_vma_offset(vma),
+					i915_vma_size(vma));
 		if (ptr == NULL) {
 			err = -ENOMEM;
 			goto err;
@@ -534,22 +534,22 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
 	if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma)))
 		return true;
 
-	if (vma->node.size < size)
+	if (i915_vma_size(vma) < size)
 		return true;
 
 	GEM_BUG_ON(alignment && !is_power_of_2(alignment));
-	if (alignment && !IS_ALIGNED(vma->node.start, alignment))
+	if (alignment && !IS_ALIGNED(i915_vma_offset(vma), alignment))
 		return true;
 
 	if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
 		return true;
 
 	if (flags & PIN_OFFSET_BIAS &&
-	    vma->node.start < (flags & PIN_OFFSET_MASK))
+	    i915_vma_offset(vma) < (flags & PIN_OFFSET_MASK))
 		return true;
 
 	if (flags & PIN_OFFSET_FIXED &&
-	    vma->node.start != (flags & PIN_OFFSET_MASK))
+	    i915_vma_offset(vma) != (flags & PIN_OFFSET_MASK))
 		return true;
 
 	return false;
@@ -562,10 +562,11 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
 	GEM_BUG_ON(!vma->fence_size);
 
-	fenceable = (vma->node.size >= vma->fence_size &&
-		     IS_ALIGNED(vma->node.start, vma->fence_alignment));
+	fenceable = (i915_vma_size(vma) >= vma->fence_size &&
+		     IS_ALIGNED(i915_vma_offset(vma), vma->fence_alignment));
 
-	mappable = vma->node.start + vma->fence_size <= i915_vm_to_ggtt(vma->vm)->mappable_end;
+	mappable = (i915_vma_offset(vma) + vma->fence_size <=
+		    i915_vm_to_ggtt(vma->vm)->mappable_end);
 
 	if (mappable && fenceable)
 		set_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma));
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index a64adc8c883b..5049655a24c1 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -110,13 +110,25 @@ static inline bool i915_vma_is_closed(const struct i915_vma *vma)
 	return !list_empty(&vma->closed_link);
 }
 
+static inline u64 i915_vma_size(const struct i915_vma *vma)
+{
+       GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+       return vma->node.size;
+}
+
+static inline u64 i915_vma_offset(const struct i915_vma *vma)
+{
+       GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+       return vma->node.start;
+}
+
 static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-	GEM_BUG_ON(upper_32_bits(vma->node.start));
-	GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1));
-	return lower_32_bits(vma->node.start);
+	GEM_BUG_ON(upper_32_bits(i915_vma_offset(vma)));
+	GEM_BUG_ON(upper_32_bits(i915_vma_offset(vma) + i915_vma_size(vma) - 1));
+	return lower_32_bits(i915_vma_offset(vma));
 }
 
 static inline u32 i915_ggtt_pin_bias(struct i915_vma *vma)
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index c1adea8765a9..d33ef0cc02ea 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -231,6 +231,10 @@ static int lowlevel_hole(struct i915_address_space *vm,
 	if (!mock_vma)
 		return -ENOMEM;
 
+	__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &mock_vma->node.flags);
+	if (i915_is_ggtt(vm))
+		__set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(mock_vma));
+
 	/* Keep creating larger objects until one cannot fit into the hole */
 	for (size = 12; (hole_end - hole_start) >> size; size++) {
 		I915_RND_SUBSTATE(prng, seed_prng);
@@ -1941,6 +1945,7 @@ static int igt_cs_tlb(void *arg)
 			i915_vm_free_pt_stash(vm, &stash);
 
 			/* Prime the TLB with the dummy pages */
+			__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &vma->node.flags);
 			for (i = 0; i < count; i++) {
 				vma->node.start = offset + i * PAGE_SIZE;
 				vm->insert_entries(vm, vma, I915_CACHE_NONE, 0);
@@ -1952,8 +1957,9 @@ static int igt_cs_tlb(void *arg)
 				}
 				i915_request_put(rq);
 			}
-
 			vma->ops->clear_pages(vma);
+			__clear_bit(DRM_MM_NODE_ALLOCATED_BIT,
+				    &vma->node.flags);
 
 			err = context_sync(ce);
 			if (err) {
@@ -1973,6 +1979,7 @@ static int igt_cs_tlb(void *arg)
 				goto end;
 
 			/* Replace the TLB with target batches */
+			__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &vma->node.flags);
 			for (i = 0; i < count; i++) {
 				struct i915_request *rq;
 				u32 *cs = batch + i * 64 / sizeof(*cs);
@@ -2005,8 +2012,9 @@ static int igt_cs_tlb(void *arg)
 				i915_request_put(rq);
 			}
 			end_spin(batch, count - 1);
-
 			vma->ops->clear_pages(vma);
+			__clear_bit(DRM_MM_NODE_ALLOCATED_BIT,
+				    &vma->node.flags);
 
 			err = context_sync(ce);
 			if (err) {
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 8035ea7565ed..70278be5711b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -789,14 +789,14 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
 
 	if (gen >= 8) {
 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
-		*cmd++ = lower_32_bits(vma->node.start);
-		*cmd++ = upper_32_bits(vma->node.start);
+		*cmd++ = lower_32_bits(i915_vma_offset(vma));
+		*cmd++ = upper_32_bits(i915_vma_offset(vma));
 	} else if (gen >= 6) {
 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
-		*cmd++ = lower_32_bits(vma->node.start);
+		*cmd++ = lower_32_bits(i915_vma_offset(vma));
 	} else {
 		*cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
-		*cmd++ = lower_32_bits(vma->node.start);
+		*cmd++ = lower_32_bits(i915_vma_offset(vma));
 	}
 	*cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
index f54bdbeaa48b..6ed27896391b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
@@ -568,8 +568,8 @@ __write_timestamp(struct intel_engine_cs *engine,
 
 	*cs++ = MI_STORE_REGISTER_MEM + use_64b;
 	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(engine->mmio_base));
-	*cs++ = lower_32_bits(vma->node.start) + sizeof(u32) * slot;
-	*cs++ = upper_32_bits(vma->node.start);
+	*cs++ = lower_32_bits(i915_vma_offset(vma)) + sizeof(u32) * slot;
+	*cs++ = upper_32_bits(i915_vma_offset(vma));
 	intel_ring_advance(rq, cs);
 
 	i915_request_get(rq);
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c
index 0e6c1ea0082a..28469743de1c 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -163,8 +163,8 @@ igt_spinner_create_request(struct igt_spinner *spin,
 		*batch++ = MI_BATCH_BUFFER_START;
 	else
 		*batch++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
-	*batch++ = lower_32_bits(vma->node.start);
-	*batch++ = upper_32_bits(vma->node.start);
+	*batch++ = lower_32_bits(i915_vma_offset(vma));
+	*batch++ = upper_32_bits(i915_vma_offset(vma));
 
 	*batch++ = MI_BATCH_BUFFER_END; /* not reached */
 
@@ -179,7 +179,10 @@ igt_spinner_create_request(struct igt_spinner *spin,
 	flags = 0;
 	if (INTEL_GEN(rq->engine->i915) <= 5)
 		flags |= I915_DISPATCH_SECURE;
-	err = engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
+	err = engine->emit_bb_start(rq,
+				    i915_vma_offset(vma),
+				    PAGE_SIZE,
+				    flags);
 
 cancel_rq:
 	if (err) {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma
  2021-02-12 10:22 [Intel-gfx] [PATCH 1/3] drm/i915: Wrap all access to i915_vma.node.start|size Chris Wilson
@ 2021-02-12 10:22 ` Chris Wilson
  2021-02-12 13:43   ` Matthew Auld
  2021-02-12 10:22 ` [Intel-gfx] [PATCH 3/3] drm/i915: Refine VT-d scanout workaround Chris Wilson
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2021-02-12 10:22 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld, Chris Wilson

Introduce the concept of padding the i915_vma with guard pages before
and aft. The major consequence is that all ordinary uses of i915_vma
must use i915_vma_offset/i915_vma_size and not i915_vma.node.start/size
directly, as the drm_mm_node will include the guard pages that surround
our object.

So in this patch, we look for all uses of i915_vma->node.start that
instead need to include the guard offset and switch them to
i915_vma_offset(), and in a few cases to i915_ggtt_offset(). Notable
exceptions are the selftests, which expect exact behaviour.

The biggest connundrum is how exactly to mix request a fixed address
with guard pages, particular through the existing uABI. The user does
not know about guard pages, so such must be transparent to the user, and
so the execobj.offset must be that of the object itself excluding the
guard. So a PIN_OFFSET_FIXED must then be exclusive of the guard pages.

In the next patch, we start using guard pages for scanout objects. While
these are limited to GGTT vma, on a few platforms these vma (or at least
an alias of the vma) is shared with userspace, so we may leak the
existence of such guards if we are not careful to ensure that the
execobj.offset is transparent and excludes the guards. (On such platforms,
without full-ppgtt, userspace has to use relocations so the presence of
more untouchable regions within its GTT such be of no further issue.)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 12 ++++++++++--
 drivers/gpu/drm/i915/i915_vma.c       | 10 +++++++---
 drivers/gpu/drm/i915/i915_vma.h       |  8 ++++----
 drivers/gpu/drm/i915/i915_vma_types.h |  3 ++-
 4 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index c5803c434d33..6b326138e765 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -238,8 +238,12 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 
 	gte = (gen8_pte_t __iomem *)ggtt->gsm;
 	gte += vma->node.start / I915_GTT_PAGE_SIZE;
-	end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
 
+	end = gte + vma->guard / I915_GTT_PAGE_SIZE;
+	while (gte < end)
+		gen8_set_pte(gte++, vm->scratch[0]->encode);
+
+	end += (vma->node.size - vma->guard) / I915_GTT_PAGE_SIZE;
 	for_each_sgt_daddr(addr, iter, vma->pages)
 		gen8_set_pte(gte++, pte_encode | addr);
 	GEM_BUG_ON(gte > end);
@@ -289,8 +293,12 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 
 	gte = (gen6_pte_t __iomem *)ggtt->gsm;
 	gte += vma->node.start / I915_GTT_PAGE_SIZE;
-	end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
 
+	end = gte + vma->guard / I915_GTT_PAGE_SIZE;
+	while (gte < end)
+		gen8_set_pte(gte++, vm->scratch[0]->encode);
+
+	end += (vma->node.size - vma->guard) / I915_GTT_PAGE_SIZE;
 	for_each_sgt_daddr(addr, iter, vma->pages)
 		iowrite32(vm->pte_encode(addr, level, flags), gte++);
 	GEM_BUG_ON(gte > end);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 17fe455bd770..155f510b4cc6 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -623,7 +623,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color)
 static int
 i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
-	unsigned long color;
+	unsigned long color, guard;
 	u64 start, end;
 	int ret;
 
@@ -631,13 +631,16 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
 
 	size = max(size, vma->size);
-	alignment = max(alignment, vma->display_alignment);
+	alignment = max_t(typeof(alignment), alignment, vma->display_alignment);
 	if (flags & PIN_MAPPABLE) {
 		size = max_t(typeof(size), size, vma->fence_size);
 		alignment = max_t(typeof(alignment),
 				  alignment, vma->fence_alignment);
 	}
 
+	guard = 0;
+	size += 2 * guard;
+
 	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
 	GEM_BUG_ON(!is_power_of_2(alignment));
@@ -674,7 +677,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 			return -EINVAL;
 
 		ret = i915_gem_gtt_reserve(vma->vm, &vma->node,
-					   size, offset, color,
+					   size, offset - guard, color,
 					   flags);
 		if (ret)
 			return ret;
@@ -725,6 +728,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color));
 
 	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
+	vma->guard = guard;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 5049655a24c1..fddd2359b392 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -112,14 +112,14 @@ static inline bool i915_vma_is_closed(const struct i915_vma *vma)
 
 static inline u64 i915_vma_size(const struct i915_vma *vma)
 {
-       GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-       return vma->node.size;
+	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+	return vma->node.size - 2 * vma->guard;
 }
 
 static inline u64 i915_vma_offset(const struct i915_vma *vma)
 {
-       GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-       return vma->node.start;
+	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+	return vma->node.start + vma->guard;
 }
 
 static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
index f5cb848b7a7e..f2e4c61c889f 100644
--- a/drivers/gpu/drm/i915/i915_vma_types.h
+++ b/drivers/gpu/drm/i915/i915_vma_types.h
@@ -182,14 +182,15 @@ struct i915_vma {
 	struct i915_fence_reg *fence;
 
 	u64 size;
-	u64 display_alignment;
 	struct i915_page_sizes page_sizes;
 
 	/* mmap-offset associated with fencing for this vma */
 	struct i915_mmap_offset	*mmo;
 
+	u32 guard;
 	u32 fence_size;
 	u32 fence_alignment;
+	u32 display_alignment;
 
 	/**
 	 * Count of the number of times this vma has been opened by different
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Intel-gfx] [PATCH 3/3] drm/i915: Refine VT-d scanout workaround
  2021-02-12 10:22 [Intel-gfx] [PATCH 1/3] drm/i915: Wrap all access to i915_vma.node.start|size Chris Wilson
  2021-02-12 10:22 ` [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma Chris Wilson
@ 2021-02-12 10:22 ` Chris Wilson
  2021-02-12 13:46   ` Matthew Auld
  2021-02-12 10:58 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: Wrap all access to i915_vma.node.start|size Patchwork
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2021-02-12 10:22 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld, Chris Wilson

VT-d may cause overfetch of the scanout PTE, both before and after the
vma (depending on the scanout orientation). bspec recommends that we
provide a tile-row in either directions, and suggests using 160 PTE,
warning that the accesses will wrap around the ends of the GGTT.
Currently, we fill the entire GGTT with scratch pages when using VT-d to
always ensure there are valid entries around every vma, including
scanout. However, writing every PTE is slow as on recent devices we
perform 8MiB of uncached writes, incurring an extra 100ms during resume.

If instead we focus on only putting guard pages around scanout, we can
avoid touching the whole GGTT. To avoid having to introduce extra nodes
around each scanout vma, we adjust the scanout drm_mm_node to be smaller
than the allocated space, and fixup the extra PTE during dma binding.

v2: Move the guard from modifying drm_mm_node.start which is still used
by the drm_mm itself, into an adjustment of node.start at the point of
use.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c |  3 +++
 drivers/gpu/drm/i915/gt/intel_ggtt.c       | 25 +---------------------
 drivers/gpu/drm/i915/i915_gem_gtt.h        |  1 +
 drivers/gpu/drm/i915/i915_vma.c            | 10 +++++++++
 4 files changed, 15 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 0478b069c202..9f2ccc255ca1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -345,6 +345,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	if (ret)
 		goto err;
 
+	if (intel_scanout_needs_vtd_wa(i915))
+		flags |= PIN_VTD;
+
 	/*
 	 * As the user may map the buffer once pinned in the display plane
 	 * (e.g. libkms for the bootup splash), we have to ensure that we
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 6b326138e765..251b50884d1c 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -319,27 +319,6 @@ static void nop_clear_range(struct i915_address_space *vm,
 {
 }
 
-static void gen8_ggtt_clear_range(struct i915_address_space *vm,
-				  u64 start, u64 length)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
-	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
-	const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
-	gen8_pte_t __iomem *gtt_base =
-		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
-	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
-	int i;
-
-	if (WARN(num_entries > max_entries,
-		 "First entry = %d; Num entries = %d (max=%d)\n",
-		 first_entry, num_entries, max_entries))
-		num_entries = max_entries;
-
-	for (i = 0; i < num_entries; i++)
-		gen8_set_pte(&gtt_base[i], scratch_pte);
-}
-
 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
 {
 	/*
@@ -907,8 +886,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 	ggtt->vm.cleanup = gen6_gmch_remove;
 	ggtt->vm.insert_page = gen8_ggtt_insert_page;
 	ggtt->vm.clear_range = nop_clear_range;
-	if (intel_scanout_needs_vtd_wa(i915))
-		ggtt->vm.clear_range = gen8_ggtt_clear_range;
 
 	ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
 
@@ -1054,7 +1031,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
 	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
 
 	ggtt->vm.clear_range = nop_clear_range;
-	if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
+	if (!HAS_FULL_PPGTT(i915))
 		ggtt->vm.clear_range = gen6_ggtt_clear_range;
 	ggtt->vm.insert_page = gen6_ggtt_insert_page;
 	ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index c9b0ee5e1d23..8a2dfc7144cf 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -41,6 +41,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 #define PIN_HIGH		BIT_ULL(5)
 #define PIN_OFFSET_BIAS		BIT_ULL(6)
 #define PIN_OFFSET_FIXED	BIT_ULL(7)
+#define PIN_VTD			BIT_ULL(8)
 
 #define PIN_GLOBAL		BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */
 #define PIN_USER		BIT_ULL(11) /* I915_VMA_LOCAL_BIND */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 155f510b4cc6..929d2a1a20b8 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -38,6 +38,8 @@
 #include "i915_trace.h"
 #include "i915_vma.h"
 
+#define VTD_GUARD roundup_pow_of_two(160 * SZ_4K) /* 160 PTE padding */
+
 static struct i915_global_vma {
 	struct i915_global base;
 	struct kmem_cache *slab_vmas;
@@ -552,6 +554,9 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
 	    i915_vma_offset(vma) != (flags & PIN_OFFSET_MASK))
 		return true;
 
+	if (flags & PIN_VTD && vma->guard < VTD_GUARD)
+		return true;
+
 	return false;
 }
 
@@ -638,7 +643,12 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 				  alignment, vma->fence_alignment);
 	}
 
+	/* VT-d requires padding before/after the vma */
 	guard = 0;
+	if (flags & PIN_VTD) {
+		alignment = max_t(typeof(alignment), alignment, VTD_GUARD);
+		guard = alignment;
+	}
 	size += 2 * guard;
 
 	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: Wrap all access to i915_vma.node.start|size
  2021-02-12 10:22 [Intel-gfx] [PATCH 1/3] drm/i915: Wrap all access to i915_vma.node.start|size Chris Wilson
  2021-02-12 10:22 ` [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma Chris Wilson
  2021-02-12 10:22 ` [Intel-gfx] [PATCH 3/3] drm/i915: Refine VT-d scanout workaround Chris Wilson
@ 2021-02-12 10:58 ` Patchwork
  2021-02-12 11:32 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
  2021-02-12 12:55 ` [Intel-gfx] [PATCH 1/3] " Matthew Auld
  4 siblings, 0 replies; 17+ messages in thread
From: Patchwork @ 2021-02-12 10:58 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: Wrap all access to i915_vma.node.start|size
URL   : https://patchwork.freedesktop.org/series/87013/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
fbe6f8e9939d drm/i915: Wrap all access to i915_vma.node.start|size
-:720: WARNING:LEADING_SPACE: please, no spaces at the start of a line
#720: FILE: drivers/gpu/drm/i915/i915_vma.h:115:
+       GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));$

-:721: WARNING:LEADING_SPACE: please, no spaces at the start of a line
#721: FILE: drivers/gpu/drm/i915/i915_vma.h:116:
+       return vma->node.size;$

-:726: WARNING:LEADING_SPACE: please, no spaces at the start of a line
#726: FILE: drivers/gpu/drm/i915/i915_vma.h:121:
+       GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));$

-:727: WARNING:LEADING_SPACE: please, no spaces at the start of a line
#727: FILE: drivers/gpu/drm/i915/i915_vma.h:122:
+       return vma->node.start;$

total: 0 errors, 4 warnings, 0 checks, 649 lines checked
14325abd66e0 drm/i915: Introduce guard pages to i915_vma
eb0c0d8b37f3 drm/i915: Refine VT-d scanout workaround


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Wrap all access to i915_vma.node.start|size
  2021-02-12 10:22 [Intel-gfx] [PATCH 1/3] drm/i915: Wrap all access to i915_vma.node.start|size Chris Wilson
                   ` (2 preceding siblings ...)
  2021-02-12 10:58 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: Wrap all access to i915_vma.node.start|size Patchwork
@ 2021-02-12 11:32 ` Patchwork
  2021-02-12 12:55 ` [Intel-gfx] [PATCH 1/3] " Matthew Auld
  4 siblings, 0 replies; 17+ messages in thread
From: Patchwork @ 2021-02-12 11:32 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 7725 bytes --]

== Series Details ==

Series: series starting with [1/3] drm/i915: Wrap all access to i915_vma.node.start|size
URL   : https://patchwork.freedesktop.org/series/87013/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_9769 -> Patchwork_19668
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_19668 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_19668, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19668/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_19668:

### IGT changes ###

#### Possible regressions ####

  * igt@kms_busy@basic@flip:
    - fi-ivb-3770:        [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9769/fi-ivb-3770/igt@kms_busy@basic@flip.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19668/fi-ivb-3770/igt@kms_busy@basic@flip.html

  
#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@amdgpu/amd_cs_nop@sync-gfx0:
    - {fi-ehl-2}:         NOTRUN -> [SKIP][3] +20 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19668/fi-ehl-2/igt@amdgpu/amd_cs_nop@sync-gfx0.html

  * igt@i915_module_load@reload:
    - {fi-ehl-2}:         NOTRUN -> [DMESG-WARN][4] +38 similar issues
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19668/fi-ehl-2/igt@i915_module_load@reload.html

  * igt@i915_selftest@live@gt_engines:
    - {fi-ehl-2}:         NOTRUN -> [DMESG-FAIL][5]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19668/fi-ehl-2/igt@i915_selftest@live@gt_engines.html

  
Known issues
------------

  Here are the changes found in Patchwork_19668 that come from known issues:

### CI changes ###

#### Possible fixes ####

  * boot:
    - {fi-ehl-2}:         [FAIL][6] ([i915#2992]) -> [PASS][7]
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9769/fi-ehl-2/boot.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19668/fi-ehl-2/boot.html

  

### IGT changes ###

#### Issues hit ####

  * igt@amdgpu/amd_basic@cs-compute:
    - fi-cfl-guc:         NOTRUN -> [SKIP][8] ([fdo#109271]) +25 similar issues
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19668/fi-cfl-guc/igt@amdgpu/amd_basic@cs-compute.html

  * igt@amdgpu/amd_basic@query-info:
    - fi-bsw-kefka:       NOTRUN -> [SKIP][9] ([fdo#109271]) +17 similar issues
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19668/fi-bsw-kefka/igt@amdgpu/amd_basic@query-info.html

  * igt@amdgpu/amd_basic@semaphore:
    - fi-bdw-5557u:       NOTRUN -> [SKIP][10] ([fdo#109271]) +26 similar issues
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19668/fi-bdw-5557u/igt@amdgpu/amd_basic@semaphore.html

  * igt@amdgpu/amd_basic@userptr:
    - fi-byt-j1900:       NOTRUN -> [SKIP][11] ([fdo#109271]) +17 similar issues
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19668/fi-byt-j1900/igt@amdgpu/amd_basic@userptr.html

  * igt@core_hotunplug@unbind-rebind:
    - fi-bdw-5557u:       NOTRUN -> [WARN][12] ([i915#2283])
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19668/fi-bdw-5557u/igt@core_hotunplug@unbind-rebind.html

  * igt@i915_module_load@reload:
    - fi-tgl-y:           [PASS][13] -> [DMESG-WARN][14] ([i915#1982] / [k.org#205379])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9769/fi-tgl-y/igt@i915_module_load@reload.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19668/fi-tgl-y/igt@i915_module_load@reload.html

  * igt@kms_chamelium@dp-crc-fast:
    - fi-bdw-5557u:       NOTRUN -> [SKIP][15] ([fdo#109271] / [fdo#111827]) +8 similar issues
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19668/fi-bdw-5557u/igt@kms_chamelium@dp-crc-fast.html

  * igt@kms_chamelium@dp-edid-read:
    - fi-cfl-guc:         NOTRUN -> [SKIP][16] ([fdo#109271] / [fdo#111827]) +8 similar issues
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19668/fi-cfl-guc/igt@kms_chamelium@dp-edid-read.html

  * igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d:
    - fi-cfl-guc:         NOTRUN -> [SKIP][17] ([fdo#109271] / [i915#533])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19668/fi-cfl-guc/igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d.html

  * igt@runner@aborted:
    - fi-ivb-3770:        NOTRUN -> [FAIL][18] ([i915#2426])
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19668/fi-ivb-3770/igt@runner@aborted.html

  
#### Possible fixes ####

  * igt@fbdev@read:
    - fi-tgl-y:           [DMESG-WARN][19] ([i915#402]) -> [PASS][20] +1 similar issue
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9769/fi-tgl-y/igt@fbdev@read.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19668/fi-tgl-y/igt@fbdev@read.html

  * igt@i915_pm_rpm@module-reload:
    - fi-byt-j1900:       [INCOMPLETE][21] ([i915#142] / [i915#2405]) -> [PASS][22]
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9769/fi-byt-j1900/igt@i915_pm_rpm@module-reload.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19668/fi-byt-j1900/igt@i915_pm_rpm@module-reload.html

  * igt@i915_selftest@live@execlists:
    - fi-bsw-kefka:       [INCOMPLETE][23] ([i915#2940]) -> [PASS][24]
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9769/fi-bsw-kefka/igt@i915_selftest@live@execlists.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19668/fi-bsw-kefka/igt@i915_selftest@live@execlists.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [i915#142]: https://gitlab.freedesktop.org/drm/intel/issues/142
  [i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982
  [i915#2283]: https://gitlab.freedesktop.org/drm/intel/issues/2283
  [i915#2405]: https://gitlab.freedesktop.org/drm/intel/issues/2405
  [i915#2426]: https://gitlab.freedesktop.org/drm/intel/issues/2426
  [i915#2940]: https://gitlab.freedesktop.org/drm/intel/issues/2940
  [i915#2992]: https://gitlab.freedesktop.org/drm/intel/issues/2992
  [i915#402]: https://gitlab.freedesktop.org/drm/intel/issues/402
  [i915#533]: https://gitlab.freedesktop.org/drm/intel/issues/533
  [k.org#205379]: https://bugzilla.kernel.org/show_bug.cgi?id=205379


Participating hosts (44 -> 40)
------------------------------

  Additional (1): fi-cfl-guc 
  Missing    (5): fi-ilk-m540 fi-hsw-4200u fi-bsw-cyan fi-ctg-p8600 fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_9769 -> Patchwork_19668

  CI-20190529: 20190529
  CI_DRM_9769: 2b48b43b8f964195fc483aff9468a8b75177b08b @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_6001: d0d6f5e14ef181c93e4b503b05d9c18fa480e09d @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_19668: eb0c0d8b37f30c652b2ebec61e744d4aa738128f @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

eb0c0d8b37f3 drm/i915: Refine VT-d scanout workaround
14325abd66e0 drm/i915: Introduce guard pages to i915_vma
fbe6f8e9939d drm/i915: Wrap all access to i915_vma.node.start|size

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19668/index.html

[-- Attachment #1.2: Type: text/html, Size: 9296 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH 1/3] drm/i915: Wrap all access to i915_vma.node.start|size
  2021-02-12 10:22 [Intel-gfx] [PATCH 1/3] drm/i915: Wrap all access to i915_vma.node.start|size Chris Wilson
                   ` (3 preceding siblings ...)
  2021-02-12 11:32 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
@ 2021-02-12 12:55 ` Matthew Auld
  4 siblings, 0 replies; 17+ messages in thread
From: Matthew Auld @ 2021-02-12 12:55 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development

On Fri, 12 Feb 2021 at 10:22, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> We already wrap i915_vma.node.start for use with the GGTT, as there we
> can perform additional sanity checks that the node belongs to the GGTT
> and fits within the 32b registers. In the next couple of patches, we
> will introduce guard pages around the objects _inside_ the drm_mm_node
> allocation. That is we will offset the vma->pages so that the first page
> is at drm_mm_node.start + vma->guard (not 0 as is currently the case).
> All users must then not use i915_vma.node.start directly, but compute
> the guard offset, thus all users are converted to use a
> i915_vma_offset() wrapper.
>
> The notable exceptions are the selftests that are testing exact
> behaviour of i915_vma_pin/i915_vma_insert.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---

<snip>

> @@ -562,10 +562,11 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
>         GEM_BUG_ON(!i915_vma_is_ggtt(vma));
>         GEM_BUG_ON(!vma->fence_size);
>
> -       fenceable = (vma->node.size >= vma->fence_size &&
> -                    IS_ALIGNED(vma->node.start, vma->fence_alignment));
> +       fenceable = (i915_vma_size(vma) >= vma->fence_size &&
> +                    IS_ALIGNED(i915_vma_offset(vma), vma->fence_alignment));
>
> -       mappable = vma->node.start + vma->fence_size <= i915_vm_to_ggtt(vma->vm)->mappable_end;
> +       mappable = (i915_vma_offset(vma) + vma->fence_size <=
> +                   i915_vm_to_ggtt(vma->vm)->mappable_end);

i915_ggtt_offset(vma) could be used here.

Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma
  2021-02-12 10:22 ` [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma Chris Wilson
@ 2021-02-12 13:43   ` Matthew Auld
  2021-02-12 13:55     ` Chris Wilson
  0 siblings, 1 reply; 17+ messages in thread
From: Matthew Auld @ 2021-02-12 13:43 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Matthew Auld

On Fri, 12 Feb 2021 at 10:22, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Introduce the concept of padding the i915_vma with guard pages before
> and aft. The major consequence is that all ordinary uses of i915_vma
> must use i915_vma_offset/i915_vma_size and not i915_vma.node.start/size
> directly, as the drm_mm_node will include the guard pages that surround
> our object.
>
> So in this patch, we look for all uses of i915_vma->node.start that
> instead need to include the guard offset and switch them to
> i915_vma_offset(), and in a few cases to i915_ggtt_offset(). Notable
> exceptions are the selftests, which expect exact behaviour.
>
> The biggest connundrum is how exactly to mix request a fixed address
> with guard pages, particular through the existing uABI. The user does
> not know about guard pages, so such must be transparent to the user, and
> so the execobj.offset must be that of the object itself excluding the
> guard. So a PIN_OFFSET_FIXED must then be exclusive of the guard pages.
>
> In the next patch, we start using guard pages for scanout objects. While
> these are limited to GGTT vma, on a few platforms these vma (or at least
> an alias of the vma) is shared with userspace, so we may leak the
> existence of such guards if we are not careful to ensure that the
> execobj.offset is transparent and excludes the guards. (On such platforms,
> without full-ppgtt, userspace has to use relocations so the presence of
> more untouchable regions within its GTT such be of no further issue.)
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Matthew Auld <matthew.auld@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_ggtt.c  | 12 ++++++++++--
>  drivers/gpu/drm/i915/i915_vma.c       | 10 +++++++---
>  drivers/gpu/drm/i915/i915_vma.h       |  8 ++++----
>  drivers/gpu/drm/i915/i915_vma_types.h |  3 ++-
>  4 files changed, 23 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index c5803c434d33..6b326138e765 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -238,8 +238,12 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>
>         gte = (gen8_pte_t __iomem *)ggtt->gsm;
>         gte += vma->node.start / I915_GTT_PAGE_SIZE;
> -       end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
>
> +       end = gte + vma->guard / I915_GTT_PAGE_SIZE;
> +       while (gte < end)
> +               gen8_set_pte(gte++, vm->scratch[0]->encode);
> +
> +       end += (vma->node.size - vma->guard) / I915_GTT_PAGE_SIZE;
>         for_each_sgt_daddr(addr, iter, vma->pages)
>                 gen8_set_pte(gte++, pte_encode | addr);
>         GEM_BUG_ON(gte > end);
> @@ -289,8 +293,12 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>
>         gte = (gen6_pte_t __iomem *)ggtt->gsm;
>         gte += vma->node.start / I915_GTT_PAGE_SIZE;
> -       end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
>
> +       end = gte + vma->guard / I915_GTT_PAGE_SIZE;
> +       while (gte < end)
> +               gen8_set_pte(gte++, vm->scratch[0]->encode);
> +
> +       end += (vma->node.size - vma->guard) / I915_GTT_PAGE_SIZE;
>         for_each_sgt_daddr(addr, iter, vma->pages)
>                 iowrite32(vm->pte_encode(addr, level, flags), gte++);
>         GEM_BUG_ON(gte > end);
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 17fe455bd770..155f510b4cc6 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -623,7 +623,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color)
>  static int
>  i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>  {
> -       unsigned long color;
> +       unsigned long color, guard;
>         u64 start, end;
>         int ret;
>
> @@ -631,13 +631,16 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>         GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
>
>         size = max(size, vma->size);
> -       alignment = max(alignment, vma->display_alignment);
> +       alignment = max_t(typeof(alignment), alignment, vma->display_alignment);
>         if (flags & PIN_MAPPABLE) {
>                 size = max_t(typeof(size), size, vma->fence_size);
>                 alignment = max_t(typeof(alignment),
>                                   alignment, vma->fence_alignment);
>         }
>
> +       guard = 0;
> +       size += 2 * guard;
> +
>         GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
>         GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
>         GEM_BUG_ON(!is_power_of_2(alignment));
> @@ -674,7 +677,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>                         return -EINVAL;

Should we bother adjusting the overflows checking to account for the
padding? Also maybe check that offset >= guard?

        if (flags & PIN_OFFSET_FIXED) {
                u64 offset = flags & PIN_OFFSET_MASK;
+
+               if (offset < guard)
+                       return -ENOSPC;
+
                if (!IS_ALIGNED(offset, alignment) ||
-                   range_overflows(offset, size, end))
+                   range_overflows(offset, size - 2 * guard, end))
                        return -EINVAL;

If the padding spills over the end, maybe it's ok, so long as the
"real" range doesn't spill over, like if it needs to be placed within
the mappable range. Otherwise maybe something like
range_overflows(offset - guard, size - guard, end)?

Also bumping the alignment constraint for the OFFSET_FIXED case does
seem a little scary? Is it not possible to just add the required
padding around both ends of the provided offset?

>
>                 ret = i915_gem_gtt_reserve(vma->vm, &vma->node,
> -                                          size, offset, color,
> +                                          size, offset - guard, color,
>                                            flags);
>                 if (ret)
>                         return ret;
> @@ -725,6 +728,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>         GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color));
>
>         list_add_tail(&vma->vm_link, &vma->vm->bound_list);
> +       vma->guard = guard;
>
>         return 0;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 5049655a24c1..fddd2359b392 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -112,14 +112,14 @@ static inline bool i915_vma_is_closed(const struct i915_vma *vma)
>
>  static inline u64 i915_vma_size(const struct i915_vma *vma)
>  {
> -       GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> -       return vma->node.size;
> +       GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> +       return vma->node.size - 2 * vma->guard;
>  }
>
>  static inline u64 i915_vma_offset(const struct i915_vma *vma)
>  {
> -       GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> -       return vma->node.start;
> +       GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> +       return vma->node.start + vma->guard;
>  }
>
>  static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
> diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
> index f5cb848b7a7e..f2e4c61c889f 100644
> --- a/drivers/gpu/drm/i915/i915_vma_types.h
> +++ b/drivers/gpu/drm/i915/i915_vma_types.h
> @@ -182,14 +182,15 @@ struct i915_vma {
>         struct i915_fence_reg *fence;
>
>         u64 size;
> -       u64 display_alignment;
>         struct i915_page_sizes page_sizes;
>
>         /* mmap-offset associated with fencing for this vma */
>         struct i915_mmap_offset *mmo;
>
> +       u32 guard;
>         u32 fence_size;
>         u32 fence_alignment;
> +       u32 display_alignment;
>
>         /**
>          * Count of the number of times this vma has been opened by different
> --
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH 3/3] drm/i915: Refine VT-d scanout workaround
  2021-02-12 10:22 ` [Intel-gfx] [PATCH 3/3] drm/i915: Refine VT-d scanout workaround Chris Wilson
@ 2021-02-12 13:46   ` Matthew Auld
  0 siblings, 0 replies; 17+ messages in thread
From: Matthew Auld @ 2021-02-12 13:46 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Matthew Auld

On Fri, 12 Feb 2021 at 10:22, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> VT-d may cause overfetch of the scanout PTE, both before and after the
> vma (depending on the scanout orientation). bspec recommends that we
> provide a tile-row in either directions, and suggests using 160 PTE,
> warning that the accesses will wrap around the ends of the GGTT.
> Currently, we fill the entire GGTT with scratch pages when using VT-d to
> always ensure there are valid entries around every vma, including
> scanout. However, writing every PTE is slow as on recent devices we
> perform 8MiB of uncached writes, incurring an extra 100ms during resume.
>
> If instead we focus on only putting guard pages around scanout, we can
> avoid touching the whole GGTT. To avoid having to introduce extra nodes
> around each scanout vma, we adjust the scanout drm_mm_node to be smaller
> than the allocated space, and fixup the extra PTE during dma binding.
>
> v2: Move the guard from modifying drm_mm_node.start which is still used
> by the drm_mm itself, into an adjustment of node.start at the point of
> use.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Cc: Matthew Auld <matthew.auld@intel.com>

Yeah, that does look much better,
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma
  2021-02-12 13:43   ` Matthew Auld
@ 2021-02-12 13:55     ` Chris Wilson
  0 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2021-02-12 13:55 UTC (permalink / raw)
  To: Matthew Auld; +Cc: Intel Graphics Development, Matthew Auld

Quoting Matthew Auld (2021-02-12 13:43:46)
> On Fri, 12 Feb 2021 at 10:22, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >
> > Introduce the concept of padding the i915_vma with guard pages before
> > and aft. The major consequence is that all ordinary uses of i915_vma
> > must use i915_vma_offset/i915_vma_size and not i915_vma.node.start/size
> > directly, as the drm_mm_node will include the guard pages that surround
> > our object.
> >
> > So in this patch, we look for all uses of i915_vma->node.start that
> > instead need to include the guard offset and switch them to
> > i915_vma_offset(), and in a few cases to i915_ggtt_offset(). Notable
> > exceptions are the selftests, which expect exact behaviour.
> >
> > The biggest connundrum is how exactly to mix request a fixed address
> > with guard pages, particular through the existing uABI. The user does
> > not know about guard pages, so such must be transparent to the user, and
> > so the execobj.offset must be that of the object itself excluding the
> > guard. So a PIN_OFFSET_FIXED must then be exclusive of the guard pages.
> >
> > In the next patch, we start using guard pages for scanout objects. While
> > these are limited to GGTT vma, on a few platforms these vma (or at least
> > an alias of the vma) is shared with userspace, so we may leak the
> > existence of such guards if we are not careful to ensure that the
> > execobj.offset is transparent and excludes the guards. (On such platforms,
> > without full-ppgtt, userspace has to use relocations so the presence of
> > more untouchable regions within its GTT such be of no further issue.)
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Matthew Auld <matthew.auld@intel.com>
> > ---
> >  drivers/gpu/drm/i915/gt/intel_ggtt.c  | 12 ++++++++++--
> >  drivers/gpu/drm/i915/i915_vma.c       | 10 +++++++---
> >  drivers/gpu/drm/i915/i915_vma.h       |  8 ++++----
> >  drivers/gpu/drm/i915/i915_vma_types.h |  3 ++-
> >  4 files changed, 23 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > index c5803c434d33..6b326138e765 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > @@ -238,8 +238,12 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
> >
> >         gte = (gen8_pte_t __iomem *)ggtt->gsm;
> >         gte += vma->node.start / I915_GTT_PAGE_SIZE;
> > -       end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
> >
> > +       end = gte + vma->guard / I915_GTT_PAGE_SIZE;
> > +       while (gte < end)
> > +               gen8_set_pte(gte++, vm->scratch[0]->encode);
> > +
> > +       end += (vma->node.size - vma->guard) / I915_GTT_PAGE_SIZE;
> >         for_each_sgt_daddr(addr, iter, vma->pages)
> >                 gen8_set_pte(gte++, pte_encode | addr);
> >         GEM_BUG_ON(gte > end);
> > @@ -289,8 +293,12 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
> >
> >         gte = (gen6_pte_t __iomem *)ggtt->gsm;
> >         gte += vma->node.start / I915_GTT_PAGE_SIZE;
> > -       end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
> >
> > +       end = gte + vma->guard / I915_GTT_PAGE_SIZE;
> > +       while (gte < end)
> > +               gen8_set_pte(gte++, vm->scratch[0]->encode);
> > +
> > +       end += (vma->node.size - vma->guard) / I915_GTT_PAGE_SIZE;
> >         for_each_sgt_daddr(addr, iter, vma->pages)
> >                 iowrite32(vm->pte_encode(addr, level, flags), gte++);
> >         GEM_BUG_ON(gte > end);
> > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> > index 17fe455bd770..155f510b4cc6 100644
> > --- a/drivers/gpu/drm/i915/i915_vma.c
> > +++ b/drivers/gpu/drm/i915/i915_vma.c
> > @@ -623,7 +623,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color)
> >  static int
> >  i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> >  {
> > -       unsigned long color;
> > +       unsigned long color, guard;
> >         u64 start, end;
> >         int ret;
> >
> > @@ -631,13 +631,16 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> >         GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
> >
> >         size = max(size, vma->size);
> > -       alignment = max(alignment, vma->display_alignment);
> > +       alignment = max_t(typeof(alignment), alignment, vma->display_alignment);
> >         if (flags & PIN_MAPPABLE) {
> >                 size = max_t(typeof(size), size, vma->fence_size);
> >                 alignment = max_t(typeof(alignment),
> >                                   alignment, vma->fence_alignment);
> >         }
> >
> > +       guard = 0;
> > +       size += 2 * guard;
> > +
> >         GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
> >         GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
> >         GEM_BUG_ON(!is_power_of_2(alignment));
> > @@ -674,7 +677,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> >                         return -EINVAL;
> 
> Should we bother adjusting the overflows checking to account for the
> padding? Also maybe check that offset >= guard?

offset >= guard only for pinned.

And yes, it is sensible that we check we don't overflow with adding a guard.

> 
>         if (flags & PIN_OFFSET_FIXED) {
>                 u64 offset = flags & PIN_OFFSET_MASK;
> +
> +               if (offset < guard)
> +                       return -ENOSPC;
> +
>                 if (!IS_ALIGNED(offset, alignment) ||
> -                   range_overflows(offset, size, end))
> +                   range_overflows(offset, size - 2 * guard, end))
>                         return -EINVAL;
> 
> If the padding spills over the end, maybe it's ok, so long as the
> "real" range doesn't spill over, like if it needs to be placed within
> the mappable range. Otherwise maybe something like
> range_overflows(offset - guard, size - guard, end)?

My thought was that the user has no knowledge of the guard pages, nor
direct access to them [in theory], so they should not be penalised for
their placement, as far as is possible.

That we have to prevent wrap around and so restrict total placement to
[guard, end - guard] unfortunately belies our attempt to be
undetectable.

So the -EINVAL check should be against the full range, but we should
also do

if (offset < guard  || offset > gtt_end - guard)
	return -ENOSPC;


> Also bumping the alignment constraint for the OFFSET_FIXED case does
> seem a little scary? Is it not possible to just add the required
> padding around both ends of the provided offset?

Hmm. We don't need to bump the alignment, we just need to make sure the
guard is a multiple of the alignment (so that start + guard still meets
the alignment constraints).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma
  2022-11-09 17:40 [PATCH 0/3] add guard padding around i915_vma Andi Shyti
@ 2022-11-09 17:40 ` Andi Shyti
  0 siblings, 0 replies; 17+ messages in thread
From: Andi Shyti @ 2022-11-09 17:40 UTC (permalink / raw)
  To: intel-gfx, dri-devel; +Cc: Thomas Hellström, Chris Wilson, Matthew Auld

From: Chris Wilson <chris@chris-wilson.co.uk>

Introduce the concept of padding the i915_vma with guard pages before
and after. The major consequence is that all ordinary uses of i915_vma
must use i915_vma_offset/i915_vma_size and not i915_vma.node.start/size
directly, as the drm_mm_node will include the guard pages that surround
our object.

The biggest connundrum is how exactly to mix requesting a fixed address
with guard pages, particularly through the existing uABI. The user does
not know about guard pages, so such must be transparent to the user, and
so the execobj.offset must be that of the object itself excluding the
guard. So a PIN_OFFSET_FIXED must then be exclusive of the guard pages.
The caveat is that some placements will be impossible with guard pages,
as wrap arounds need to be avoided, and the vma itself will require a
larger node. We must not report EINVAL but ENOSPC as these are unavailable
locations within the GTT rather than conflicting user requirements.

In the next patch, we start using guard pages for scanout objects. While
these are limited to GGTT vma, on a few platforms these vma (or at least
an alias of the vma) is shared with userspace, so we may leak the
existence of such guards if we are not careful to ensure that the
execobj.offset is transparent and excludes the guards. (On such platforms
like ivb, without full-ppgtt, userspace has to use relocations so the
presence of more untouchable regions within its GTT such be of no further
issue.)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Tejas Upadhyay <tejaskumarx.surendrakumar.upadhyay@intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c     | 14 ++++++++----
 drivers/gpu/drm/i915/i915_gem_gtt.h      |  3 ++-
 drivers/gpu/drm/i915/i915_vma.c          | 27 ++++++++++++++++++------
 drivers/gpu/drm/i915/i915_vma.h          |  5 +++--
 drivers/gpu/drm/i915/i915_vma_resource.c |  4 ++--
 drivers/gpu/drm/i915/i915_vma_resource.h |  7 +++++-
 drivers/gpu/drm/i915/i915_vma_types.h    |  3 ++-
 7 files changed, 46 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 2518cebbf931c..0dd81b18716c3 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -287,8 +287,11 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	 */
 
 	gte = (gen8_pte_t __iomem *)ggtt->gsm;
-	gte += vma_res->start / I915_GTT_PAGE_SIZE;
-	end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
+	gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
+	end = gte + vma_res->guard / I915_GTT_PAGE_SIZE;
+	while (gte < end)
+		gen8_set_pte(gte++, vm->scratch[0]->encode);
+	end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
 
 	for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
 		gen8_set_pte(gte++, pte_encode | addr);
@@ -338,9 +341,12 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 	dma_addr_t addr;
 
 	gte = (gen6_pte_t __iomem *)ggtt->gsm;
-	gte += vma_res->start / I915_GTT_PAGE_SIZE;
-	end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
+	gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
 
+	end = gte + vma_res->guard / I915_GTT_PAGE_SIZE;
+	while (gte < end)
+		iowrite32(vm->scratch[0]->encode, gte++);
+	end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
 	for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
 		iowrite32(vm->pte_encode(addr, level, flags), gte++);
 	GEM_BUG_ON(gte > end);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 8c2f57eb5ddaa..2434197830523 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -44,7 +44,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 #define PIN_HIGH		BIT_ULL(5)
 #define PIN_OFFSET_BIAS		BIT_ULL(6)
 #define PIN_OFFSET_FIXED	BIT_ULL(7)
-#define PIN_VALIDATE		BIT_ULL(8) /* validate placement only, no need to call unpin() */
+#define PIN_OFFSET_GUARD	BIT_ULL(8)
+#define PIN_VALIDATE		BIT_ULL(9) /* validate placement only, no need to call unpin() */
 
 #define PIN_GLOBAL		BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */
 #define PIN_USER		BIT_ULL(11) /* I915_VMA_LOCAL_BIND */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 2be43885a2b5d..ff24a654565f6 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -417,7 +417,7 @@ i915_vma_resource_init_from_vma(struct i915_vma_resource *vma_res,
 			       obj->mm.rsgt, i915_gem_object_is_readonly(obj),
 			       i915_gem_object_is_lmem(obj), obj->mm.region,
 			       vma->ops, vma->private, __i915_vma_offset(vma),
-			       __i915_vma_size(vma), vma->size);
+			       __i915_vma_size(vma), vma->size, vma->guard);
 }
 
 /**
@@ -747,7 +747,7 @@ static int
 i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 		u64 size, u64 alignment, u64 flags)
 {
-	unsigned long color;
+	unsigned long color, guard;
 	u64 start, end;
 	int ret;
 
@@ -755,7 +755,7 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
 
 	size = max(size, vma->size);
-	alignment = max(alignment, vma->display_alignment);
+	alignment = max_t(typeof(alignment), alignment, vma->display_alignment);
 	if (flags & PIN_MAPPABLE) {
 		size = max_t(typeof(size), size, vma->fence_size);
 		alignment = max_t(typeof(alignment),
@@ -766,6 +766,9 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
 	GEM_BUG_ON(!is_power_of_2(alignment));
 
+	guard = vma->guard; /* retain guard across rebinds */
+	guard = ALIGN(guard, alignment);
+
 	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
 	GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
 
@@ -775,6 +778,7 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	if (flags & PIN_ZONE_4G)
 		end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
 	GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
+	GEM_BUG_ON(2 * guard > end);
 
 	alignment = max(alignment, i915_vm_obj_min_alignment(vma->vm, vma->obj));
 
@@ -782,7 +786,7 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	 * aperture has, reject it early before evicting everything in a vain
 	 * attempt to find space.
 	 */
-	if (size > end) {
+	if (size > end - 2 * guard) {
 		DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu > %s aperture=%llu\n",
 			  size, flags & PIN_MAPPABLE ? "mappable" : "total",
 			  end);
@@ -799,13 +803,23 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 		if (!IS_ALIGNED(offset, alignment) ||
 		    range_overflows(offset, size, end))
 			return -EINVAL;
+		/*
+		 * The caller knows not of the guard added by others and
+		 * requests for the offset of the start of its buffer
+		 * to be fixed, which may not be the same as the position
+		 * of the vma->node due to the guard pages.
+		 */
+		if (offset < guard || offset + size > end - guard)
+			return -ENOSPC;
 
 		ret = i915_gem_gtt_reserve(vma->vm, ww, &vma->node,
-					   size, offset, color,
-					   flags);
+					   size + 2 * guard,
+					   offset - guard,
+					   color, flags);
 		if (ret)
 			return ret;
 	} else {
+		size += 2 * guard;
 		/*
 		 * We only support huge gtt pages through the 48b PPGTT,
 		 * however we also don't want to force any alignment for
@@ -853,6 +867,7 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color));
 
 	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+	vma->guard = guard;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 0954f8a933869..6acc986190137 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -127,7 +127,7 @@ static inline bool i915_vma_is_closed(const struct i915_vma *vma)
 /* Internal use only. */
 static inline u64 __i915_vma_size(const struct i915_vma *vma)
 {
-	return vma->node.size;
+	return vma->node.size - 2 * vma->guard;
 }
 
 /**
@@ -149,7 +149,8 @@ static inline u64 i915_vma_size(const struct i915_vma *vma)
 /* Internal use only. */
 static inline u64 __i915_vma_offset(const struct i915_vma *vma)
 {
-	return vma->node.start;
+	/* The actual start of the vma->pages is after the guard pages. */
+	return vma->node.start + vma->guard;
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.c b/drivers/gpu/drm/i915/i915_vma_resource.c
index de1342dbfa128..6ba7a7feceba1 100644
--- a/drivers/gpu/drm/i915/i915_vma_resource.c
+++ b/drivers/gpu/drm/i915/i915_vma_resource.c
@@ -34,8 +34,8 @@ static struct kmem_cache *slab_vma_resources;
  * and removal of fences increases as O(ln(pending_unbinds)) instead of
  * O(1) for a single fence without interval tree.
  */
-#define VMA_RES_START(_node) ((_node)->start)
-#define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size - 1)
+#define VMA_RES_START(_node) ((_node)->start - (_node)->guard)
+#define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size + (_node)->guard - 1)
 INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb,
 		     u64, __subtree_last,
 		     VMA_RES_START, VMA_RES_LAST, static, vma_res_itree);
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.h b/drivers/gpu/drm/i915/i915_vma_resource.h
index 54edf3739ca0b..c1864e3d0b43e 100644
--- a/drivers/gpu/drm/i915/i915_vma_resource.h
+++ b/drivers/gpu/drm/i915/i915_vma_resource.h
@@ -57,6 +57,7 @@ struct i915_page_sizes {
  * @node_size: Size of the allocated range manager node with padding
  * subtracted.
  * @vma_size: Bind size.
+ * @guard: The size of guard area preceding and trailing the bind.
  * @page_sizes_gtt: Resulting page sizes from the bind operation.
  * @bound_flags: Flags indicating binding status.
  * @allocated: Backend private data. TODO: Should move into @private.
@@ -115,6 +116,7 @@ struct i915_vma_resource {
 	u64 start;
 	u64 node_size;
 	u64 vma_size;
+	u32 guard;
 	u32 page_sizes_gtt;
 
 	u32 bound_flags;
@@ -179,6 +181,7 @@ static inline void i915_vma_resource_put(struct i915_vma_resource *vma_res)
  * @start: Offset into the address space of bind range start after padding.
  * @node_size: Size of the allocated range manager node minus padding.
  * @size: Bind size.
+ * @guard: The size of the guard area preceding and trailing the bind.
  *
  * Initializes a vma resource allocated using i915_vma_resource_alloc().
  * The reason for having separate allocate and initialize function is that
@@ -197,7 +200,8 @@ static inline void i915_vma_resource_init(struct i915_vma_resource *vma_res,
 					  void *private,
 					  u64 start,
 					  u64 node_size,
-					  u64 size)
+					  u64 size,
+					  u32 guard)
 {
 	__i915_vma_resource_init(vma_res);
 	vma_res->vm = vm;
@@ -215,6 +219,7 @@ static inline void i915_vma_resource_init(struct i915_vma_resource *vma_res,
 	vma_res->start = start;
 	vma_res->node_size = node_size;
 	vma_res->vma_size = size;
+	vma_res->guard = guard;
 }
 
 static inline void i915_vma_resource_fini(struct i915_vma_resource *vma_res)
diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
index ec0f6c9f57d02..77fda2244d161 100644
--- a/drivers/gpu/drm/i915/i915_vma_types.h
+++ b/drivers/gpu/drm/i915/i915_vma_types.h
@@ -197,14 +197,15 @@ struct i915_vma {
 	struct i915_fence_reg *fence;
 
 	u64 size;
-	u64 display_alignment;
 	struct i915_page_sizes page_sizes;
 
 	/* mmap-offset associated with fencing for this vma */
 	struct i915_mmap_offset	*mmo;
 
+	u32 guard; /* padding allocated around vma->pages within the node */
 	u32 fence_size;
 	u32 fence_alignment;
+	u32 display_alignment;
 
 	/**
 	 * Count of the number of times this vma has been opened by different
-- 
2.37.2


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma
  2022-11-09 16:49 [PATCH 0/3] add guard patting around i915_vma Andi Shyti
@ 2022-11-09 16:49 ` Andi Shyti
  0 siblings, 0 replies; 17+ messages in thread
From: Andi Shyti @ 2022-11-09 16:49 UTC (permalink / raw)
  To: intel-gfx, dri-devel; +Cc: Chris Wilson, Matthew Auld, Tejas Upadhyay

From: Chris Wilson <chris@chris-wilson.co.uk>

Introduce the concept of padding the i915_vma with guard pages before
and after. The major consequence is that all ordinary uses of i915_vma
must use i915_vma_offset/i915_vma_size and not i915_vma.node.start/size
directly, as the drm_mm_node will include the guard pages that surround
our object.

The biggest connundrum is how exactly to mix requesting a fixed address
with guard pages, particularly through the existing uABI. The user does
not know about guard pages, so such must be transparent to the user, and
so the execobj.offset must be that of the object itself excluding the
guard. So a PIN_OFFSET_FIXED must then be exclusive of the guard pages.
The caveat is that some placements will be impossible with guard pages,
as wrap arounds need to be avoided, and the vma itself will require a
larger node. We must not report EINVAL but ENOSPC as these are unavailable
locations within the GTT rather than conflicting user requirements.

In the next patch, we start using guard pages for scanout objects. While
these are limited to GGTT vma, on a few platforms these vma (or at least
an alias of the vma) is shared with userspace, so we may leak the
existence of such guards if we are not careful to ensure that the
execobj.offset is transparent and excludes the guards. (On such platforms
like ivb, without full-ppgtt, userspace has to use relocations so the
presence of more untouchable regions within its GTT such be of no further
issue.)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 12 ++++++++++--
 drivers/gpu/drm/i915/i915_gem_gtt.h   |  1 +
 drivers/gpu/drm/i915/i915_vma.c       | 28 ++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_vma.h       |  5 +++--
 drivers/gpu/drm/i915/i915_vma_types.h |  3 ++-
 5 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 6549b8e3adbfc..717135f5bf5a6 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -266,8 +266,12 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 
 	gte = (gen8_pte_t __iomem *)ggtt->gsm;
 	gte += vma->node.start / I915_GTT_PAGE_SIZE;
-	end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
 
+	end = gte + vma->guard / I915_GTT_PAGE_SIZE;
+	while (gte < end)
+		gen8_set_pte(gte++, vm->scratch[0]->encode);
+
+	end += (vma->node.size - vma->guard) / I915_GTT_PAGE_SIZE;
 	for_each_sgt_daddr(addr, iter, vma->pages)
 		gen8_set_pte(gte++, pte_encode | addr);
 	GEM_BUG_ON(gte > end);
@@ -317,8 +321,12 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 
 	gte = (gen6_pte_t __iomem *)ggtt->gsm;
 	gte += vma->node.start / I915_GTT_PAGE_SIZE;
-	end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
 
+	end = gte + vma->guard / I915_GTT_PAGE_SIZE;
+	while (gte < end)
+		gen8_set_pte(gte++, vm->scratch[0]->encode);
+
+	end += (vma->node.size - vma->guard) / I915_GTT_PAGE_SIZE;
 	for_each_sgt_daddr(addr, iter, vma->pages)
 		iowrite32(vm->pte_encode(addr, level, flags), gte++);
 	GEM_BUG_ON(gte > end);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index c9b0ee5e1d237..f3ae9afdee158 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -41,6 +41,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 #define PIN_HIGH		BIT_ULL(5)
 #define PIN_OFFSET_BIAS		BIT_ULL(6)
 #define PIN_OFFSET_FIXED	BIT_ULL(7)
+#define PIN_OFFSET_GUARD	BIT_ULL(8)
 
 #define PIN_GLOBAL		BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */
 #define PIN_USER		BIT_ULL(11) /* I915_VMA_LOCAL_BIND */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 4b4a6dcdc676b..bf0f8d7e13405 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -633,7 +633,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color)
 static int
 i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
-	unsigned long color;
+	unsigned long color, guard;
 	u64 start, end;
 	int ret;
 
@@ -641,7 +641,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
 
 	size = max(size, vma->size);
-	alignment = max(alignment, vma->display_alignment);
+	alignment = max_t(typeof(alignment), alignment, vma->display_alignment);
 	if (flags & PIN_MAPPABLE) {
 		size = max_t(typeof(size), size, vma->fence_size);
 		alignment = max_t(typeof(alignment),
@@ -652,6 +652,9 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
 	GEM_BUG_ON(!is_power_of_2(alignment));
 
+	guard = vma->guard; /* retain guard across rebinds */
+	guard = ALIGN(guard, alignment);
+
 	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
 	GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
 
@@ -661,12 +664,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	if (flags & PIN_ZONE_4G)
 		end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
 	GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
+	GEM_BUG_ON(2 * guard > end);
 
 	/* If binding the object/GGTT view requires more space than the entire
 	 * aperture has, reject it early before evicting everything in a vain
 	 * attempt to find space.
 	 */
-	if (size > end) {
+	if (size > end - 2 * guard) {
 		DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu > %s aperture=%llu\n",
 			  size, flags & PIN_MAPPABLE ? "mappable" : "total",
 			  end);
@@ -679,16 +683,29 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 
 	if (flags & PIN_OFFSET_FIXED) {
 		u64 offset = flags & PIN_OFFSET_MASK;
+
 		if (!IS_ALIGNED(offset, alignment) ||
 		    range_overflows(offset, size, end))
 			return -EINVAL;
 
+		/*
+		 * The caller knows not of the guard added by others and
+		 * requests for the offset of the start of its buffer
+		 * to be fixed, which may not be the same as the position
+		 * of the vma->node due to the guard pages.
+		 */
+		if (offset < guard || offset + size > end - guard)
+			return -ENOSPC;
+
 		ret = i915_gem_gtt_reserve(vma->vm, &vma->node,
-					   size, offset, color,
-					   flags);
+					   size + 2 * guard,
+					   offset - guard,
+					   color, flags);
 		if (ret)
 			return ret;
 	} else {
+		size += 2 * guard;
+
 		/*
 		 * We only support huge gtt pages through the 48b PPGTT,
 		 * however we also don't want to force any alignment for
@@ -735,6 +752,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color));
 
 	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
+	vma->guard = guard;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index caf6e251782d8..cb5189922ab8a 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -121,12 +121,13 @@ static inline bool i915_vma_is_closed(const struct i915_vma *vma)
 static inline u64 i915_vma_size(const struct i915_vma *vma)
 {
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-	return vma->node.size;
+	return vma->node.size - 2 * vma->guard;
 }
 
 static inline u64 __i915_vma_offset(const struct i915_vma *vma)
 {
-	return vma->node.start;
+	/* The actual start of the vma->pages is after the guard pages. */
+	return vma->node.start + vma->guard;
 }
 
 static inline u64 i915_vma_offset(const struct i915_vma *vma)
diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
index 4ee6e54799f48..07c72d623c40f 100644
--- a/drivers/gpu/drm/i915/i915_vma_types.h
+++ b/drivers/gpu/drm/i915/i915_vma_types.h
@@ -196,14 +196,15 @@ struct i915_vma {
 	struct i915_fence_reg *fence;
 
 	u64 size;
-	u64 display_alignment;
 	struct i915_page_sizes page_sizes;
 
 	/* mmap-offset associated with fencing for this vma */
 	struct i915_mmap_offset	*mmo;
 
+	u32 guard; /* padding allocated around vma->pages within the node */
 	u32 fence_size;
 	u32 fence_alignment;
+	u32 display_alignment;
 
 	/**
 	 * Count of the number of times this vma has been opened by different
-- 
2.37.2


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma
  2021-12-02  8:31 [Intel-gfx] [PATCH 0/3] Replace VT-d workaround with guard pages Tejas Upadhyay
@ 2021-12-02  8:31 ` Tejas Upadhyay
  0 siblings, 0 replies; 17+ messages in thread
From: Tejas Upadhyay @ 2021-12-02  8:31 UTC (permalink / raw)
  To: intel-gfx

From: Chris Wilson <chris@chris-wilson.co.uk>

Introduce the concept of padding the i915_vma with guard pages before
and aft. The major consequence is that all ordinary uses of i915_vma
must use i915_vma_offset/i915_vma_size and not i915_vma.node.start/size
directly, as the drm_mm_node will include the guard pages that surround
our object.

The biggest connundrum is how exactly to mix requesting a fixed address
with guard pages, particularly through the existing uABI. The user does
not know about guard pages, so such must be transparent to the user, and
so the execobj.offset must be that of the object itself excluding the
guard. So a PIN_OFFSET_FIXED must then be exclusive of the guard pages.
The caveat is that some placements will be impossible with guard pages,
as wrap arounds need to be avoided, and the vma itself will require a
larger node. We must we not report EINVAL but ENOSPC as these are
unavailable locations within the GTT rather than conflicting user
requirements.

In the next patch, we start using guard pages for scanout objects. While
these are limited to GGTT vma, on a few platforms these vma (or at least
an alias of the vma) is shared with userspace, so we may leak the
existence of such guards if we are not careful to ensure that the
execobj.offset is transparent and excludes the guards. (On such platforms
like ivb, without full-ppgtt, userspace has to use relocations so the
presence of more untouchable regions within its GTT such be of no further
issue.)

v2: Include the guard range in the overflow checks and placement
restrictions.

v3: Fix the check on the placement upper bound. The request user offset
is relative to the guard offset (not the node.start) and so we should
not include the initial guard offset again when computing the upper
bound of the node.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Tejas Upadhyay <tejaskumarx.surendrakumar.upadhyay@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 12 ++++++++++--
 drivers/gpu/drm/i915/i915_vma.c       | 26 +++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_vma.h       |  5 +++--
 drivers/gpu/drm/i915/i915_vma_types.h |  3 ++-
 4 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 07133f0c529e..282ed6dd3ca2 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -256,8 +256,12 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 
 	gte = (gen8_pte_t __iomem *)ggtt->gsm;
 	gte += vma->node.start / I915_GTT_PAGE_SIZE;
-	end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
 
+	end = gte + vma->guard / I915_GTT_PAGE_SIZE;
+	while (gte < end)
+		gen8_set_pte(gte++, vm->scratch[0]->encode);
+
+	end += (vma->node.size - vma->guard) / I915_GTT_PAGE_SIZE;
 	for_each_sgt_daddr(addr, iter, vma->pages)
 		gen8_set_pte(gte++, pte_encode | addr);
 	GEM_BUG_ON(gte > end);
@@ -307,8 +311,12 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 
 	gte = (gen6_pte_t __iomem *)ggtt->gsm;
 	gte += vma->node.start / I915_GTT_PAGE_SIZE;
-	end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
 
+	end = gte + vma->guard / I915_GTT_PAGE_SIZE;
+	while (gte < end)
+		gen8_set_pte(gte++, vm->scratch[0]->encode);
+
+	end += (vma->node.size - vma->guard) / I915_GTT_PAGE_SIZE;
 	for_each_sgt_daddr(addr, iter, vma->pages)
 		iowrite32(vm->pte_encode(addr, level, flags), gte++);
 	GEM_BUG_ON(gte > end);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 10473ce8a047..080ffa583edf 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -658,7 +658,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color)
 static int
 i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
-	unsigned long color;
+	unsigned long color, guard;
 	u64 start, end;
 	int ret;
 
@@ -666,7 +666,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
 
 	size = max(size, vma->size);
-	alignment = max(alignment, vma->display_alignment);
+	alignment = max_t(typeof(alignment), alignment, vma->display_alignment);
 	if (flags & PIN_MAPPABLE) {
 		size = max_t(typeof(size), size, vma->fence_size);
 		alignment = max_t(typeof(alignment),
@@ -677,6 +677,9 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
 	GEM_BUG_ON(!is_power_of_2(alignment));
 
+	guard = vma->guard; /* retain guard across rebinds */
+	guard = ALIGN(guard, alignment);
+
 	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
 	GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
 
@@ -686,12 +689,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	if (flags & PIN_ZONE_4G)
 		end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
 	GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
+	GEM_BUG_ON(2 * guard > end);
 
 	/* If binding the object/GGTT view requires more space than the entire
 	 * aperture has, reject it early before evicting everything in a vain
 	 * attempt to find space.
 	 */
-	if (size > end) {
+	if (size > end - 2 * guard) {
 		DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu > %s aperture=%llu\n",
 			  size, flags & PIN_MAPPABLE ? "mappable" : "total",
 			  end);
@@ -707,13 +711,24 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 		if (!IS_ALIGNED(offset, alignment) ||
 		    range_overflows(offset, size, end))
 			return -EINVAL;
+		/*
+		 * The caller knows not of the guard added by others and
+		 * requests for the offset of the start of its buffer
+		 * to be fixed, which may not be the same as the position
+		 * of the vma->node due to the guard pages.
+		 */
+		if (offset < guard || offset + size > end - guard)
+			return -ENOSPC;
+
 
 		ret = i915_gem_gtt_reserve(vma->vm, &vma->node,
-					   size, offset, color,
-					   flags);
+					   size + 2 * guard,
+					   offset - guard,
+					   color, flags);
 		if (ret)
 			return ret;
 	} else {
+		size += 2 * guard;
 		/*
 		 * We only support huge gtt pages through the 48b PPGTT,
 		 * however we also don't want to force any alignment for
@@ -760,6 +775,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color));
 
 	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
+	vma->guard = guard;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 1d13d619ff86..d35ce4e7170c 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -128,12 +128,13 @@ static inline bool i915_vma_is_closed(const struct i915_vma *vma)
 static inline u64 i915_vma_size(const struct i915_vma *vma)
 {
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-	return vma->node.size;
+	return vma->node.size - 2 * vma->guard;
 }
 
 static inline u64 __i915_vma_offset(const struct i915_vma *vma)
 {
-	return vma->node.start;
+	/* The actual start of the vma->pages is after the guard pages. */
+	return vma->node.start + vma->guard;
 }
 
 static inline u64 i915_vma_offset(const struct i915_vma *vma)
diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
index f03fa96a1701..a224daf1044e 100644
--- a/drivers/gpu/drm/i915/i915_vma_types.h
+++ b/drivers/gpu/drm/i915/i915_vma_types.h
@@ -195,14 +195,15 @@ struct i915_vma {
 	struct i915_fence_reg *fence;
 
 	u64 size;
-	u64 display_alignment;
 	struct i915_page_sizes page_sizes;
 
 	/* mmap-offset associated with fencing for this vma */
 	struct i915_mmap_offset	*mmo;
 
+	u32 guard; /* padding allocated around vma->pages within the node */
 	u32 fence_size;
 	u32 fence_alignment;
+	u32 display_alignment;
 
 	/**
 	 * Count of the number of times this vma has been opened by different
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma
  2021-02-15 19:31       ` Matthew Auld
@ 2021-02-15 20:29         ` Chris Wilson
  0 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2021-02-15 20:29 UTC (permalink / raw)
  To: Matthew Auld; +Cc: Intel Graphics Development, Matthew Auld

Quoting Matthew Auld (2021-02-15 19:31:39)
> On Mon, 15 Feb 2021 at 18:15, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >
> > Quoting Matthew Auld (2021-02-15 18:04:08)
> > > On Mon, 15 Feb 2021 at 15:56, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > >
> > > > Introduce the concept of padding the i915_vma with guard pages before
> > > > and aft. The major consequence is that all ordinary uses of i915_vma
> > > > must use i915_vma_offset/i915_vma_size and not i915_vma.node.start/size
> > > > directly, as the drm_mm_node will include the guard pages that surround
> > > > our object.
> > > >
> > > > The biggest connundrum is how exactly to mix requesting a fixed address
> > > > with guard pages, particularly through the existing uABI. The user does
> > > > not know about guard pages, so such must be transparent to the user, and
> > > > so the execobj.offset must be that of the object itself excluding the
> > > > guard. So a PIN_OFFSET_FIXED must then be exclusive of the guard pages.
> > > > The caveat is that some placements will be impossible with guard pages,
> > > > as wrap arounds need to be avoided, and the vma itself will require a
> > > > larger node. We must we not report EINVAL but ENOSPC as these are
> > > > unavailable locations within the GTT rather than conflicting user
> > > > requirements.
> > > >
> > > > In the next patch, we start using guard pages for scanout objects. While
> > > > these are limited to GGTT vma, on a few platforms these vma (or at least
> > > > an alias of the vma) is shared with userspace, so we may leak the
> > > > existence of such guards if we are not careful to ensure that the
> > > > execobj.offset is transparent and excludes the guards. (On such platforms
> > > > like ivb, without full-ppgtt, userspace has to use relocations so the
> > > > presence of more untouchable regions within its GTT such be of no further
> > > > issue.)
> > > >
> > > > v2: Include the guard range in the overflow checks and placement
> > > > restrictions.
> > > >
> > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > Cc: Matthew Auld <matthew.auld@intel.com>
> > > > ---
> > > >  drivers/gpu/drm/i915/gt/intel_ggtt.c  | 12 ++++++++++--
> > > >  drivers/gpu/drm/i915/i915_vma.c       | 28 ++++++++++++++++++++++-----
> > > >  drivers/gpu/drm/i915/i915_vma.h       |  5 +++--
> > > >  drivers/gpu/drm/i915/i915_vma_types.h |  3 ++-
> > > >  4 files changed, 38 insertions(+), 10 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > > > index c5803c434d33..6b326138e765 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > > > @@ -238,8 +238,12 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
> > > >
> > > >         gte = (gen8_pte_t __iomem *)ggtt->gsm;
> > > >         gte += vma->node.start / I915_GTT_PAGE_SIZE;
> > > > -       end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
> > > >
> > > > +       end = gte + vma->guard / I915_GTT_PAGE_SIZE;
> > > > +       while (gte < end)
> > > > +               gen8_set_pte(gte++, vm->scratch[0]->encode);
> > > > +
> > > > +       end += (vma->node.size - vma->guard) / I915_GTT_PAGE_SIZE;
> > > >         for_each_sgt_daddr(addr, iter, vma->pages)
> > > >                 gen8_set_pte(gte++, pte_encode | addr);
> > > >         GEM_BUG_ON(gte > end);
> > > > @@ -289,8 +293,12 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
> > > >
> > > >         gte = (gen6_pte_t __iomem *)ggtt->gsm;
> > > >         gte += vma->node.start / I915_GTT_PAGE_SIZE;
> > > > -       end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
> > > >
> > > > +       end = gte + vma->guard / I915_GTT_PAGE_SIZE;
> > > > +       while (gte < end)
> > > > +               gen8_set_pte(gte++, vm->scratch[0]->encode);
> > > > +
> > > > +       end += (vma->node.size - vma->guard) / I915_GTT_PAGE_SIZE;
> > > >         for_each_sgt_daddr(addr, iter, vma->pages)
> > > >                 iowrite32(vm->pte_encode(addr, level, flags), gte++);
> > > >         GEM_BUG_ON(gte > end);
> > > > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> > > > index 748f5ea1ba04..31d0f8b64ec0 100644
> > > > --- a/drivers/gpu/drm/i915/i915_vma.c
> > > > +++ b/drivers/gpu/drm/i915/i915_vma.c
> > > > @@ -623,7 +623,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color)
> > > >  static int
> > > >  i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> > > >  {
> > > > -       unsigned long color;
> > > > +       unsigned long color, guard;
> > > >         u64 start, end;
> > > >         int ret;
> > > >
> > > > @@ -631,7 +631,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> > > >         GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
> > > >
> > > >         size = max(size, vma->size);
> > > > -       alignment = max(alignment, vma->display_alignment);
> > > > +       alignment = max_t(typeof(alignment), alignment, vma->display_alignment);
> > > >         if (flags & PIN_MAPPABLE) {
> > > >                 size = max_t(typeof(size), size, vma->fence_size);
> > > >                 alignment = max_t(typeof(alignment),
> > > > @@ -642,6 +642,9 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> > > >         GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
> > > >         GEM_BUG_ON(!is_power_of_2(alignment));
> > > >
> > > > +       guard = vma->guard; /* retain guard across rebinds */
> > > > +       guard = ALIGN(guard, alignment);
> > > > +
> > > >         start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
> > > >         GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
> > > >
> > > > @@ -651,12 +654,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> > > >         if (flags & PIN_ZONE_4G)
> > > >                 end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
> > > >         GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
> > > > +       GEM_BUG_ON(2 * guard > end);
> > > >
> > > >         /* If binding the object/GGTT view requires more space than the entire
> > > >          * aperture has, reject it early before evicting everything in a vain
> > > >          * attempt to find space.
> > > >          */
> > > > -       if (size > end) {
> > > > +       if (size > end - 2 * guard) {
> > > >                 DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu > %s aperture=%llu\n",
> > > >                           size, flags & PIN_MAPPABLE ? "mappable" : "total",
> > > >                           end);
> > > > @@ -669,16 +673,29 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> > > >
> > > >         if (flags & PIN_OFFSET_FIXED) {
> > > >                 u64 offset = flags & PIN_OFFSET_MASK;
> > > > +
> > > >                 if (!IS_ALIGNED(offset, alignment) ||
> > > >                     range_overflows(offset, size, end))
> > > >                         return -EINVAL;
> > > >
> > > > +               /*
> > > > +                * The caller knows not of the guard added by others and
> > > > +                * requests for the offset of the start of its buffer
> > > > +                * to be fixed, which may not be the same as the position
> > > > +                * of the vma->node due to the guard pages.
> > > > +                */
> > > > +               if (offset < guard || offset > end - size - 2 * guard)
> > >
> > > (offset < guard || offset + size > end - guard)?
> >
> > Padding is afterwards as well as before; so total size grows by 2 *
> > guard. And since VT-d says that the overfetch wraps past the end of the
> > GTT to the front, we can't simply shrink the node if it abuts the end.
> >
> > So I'm confident the right check is end - size - 2*guard, and since we
> > already check that 2*guard is less than end, and size is less than end
> > minus the 2*guard, overflows have been caught.
> 
> I thought here the range [offset, offset + size] sits in between both
> sides of the padding.

Yes.

> The offset < guard checks the left side,

Yes.

> and so
> offset > end - size - guard will check the right side,

D'oh. You're right. Since the user offset is itself node.start + guard.

> but I didn't
> see a check for size > guard anywhere, hence the above.

size here is still the user size, which is independent of the guard, so
at this point there's no interaction between size and guard, we just
need to check the offset + size + guard is within range.

We have checked that end - size - 2*guard does not overflow, so we also
know that end - size - guard will not overflow. But as we have checked
for overflows already, we can use whatever construction looks more
natural.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma
  2021-02-15 18:15     ` Chris Wilson
@ 2021-02-15 19:31       ` Matthew Auld
  2021-02-15 20:29         ` Chris Wilson
  0 siblings, 1 reply; 17+ messages in thread
From: Matthew Auld @ 2021-02-15 19:31 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Matthew Auld

On Mon, 15 Feb 2021 at 18:15, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Quoting Matthew Auld (2021-02-15 18:04:08)
> > On Mon, 15 Feb 2021 at 15:56, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > >
> > > Introduce the concept of padding the i915_vma with guard pages before
> > > and aft. The major consequence is that all ordinary uses of i915_vma
> > > must use i915_vma_offset/i915_vma_size and not i915_vma.node.start/size
> > > directly, as the drm_mm_node will include the guard pages that surround
> > > our object.
> > >
> > > The biggest connundrum is how exactly to mix requesting a fixed address
> > > with guard pages, particularly through the existing uABI. The user does
> > > not know about guard pages, so such must be transparent to the user, and
> > > so the execobj.offset must be that of the object itself excluding the
> > > guard. So a PIN_OFFSET_FIXED must then be exclusive of the guard pages.
> > > The caveat is that some placements will be impossible with guard pages,
> > > as wrap arounds need to be avoided, and the vma itself will require a
> > > larger node. We must we not report EINVAL but ENOSPC as these are
> > > unavailable locations within the GTT rather than conflicting user
> > > requirements.
> > >
> > > In the next patch, we start using guard pages for scanout objects. While
> > > these are limited to GGTT vma, on a few platforms these vma (or at least
> > > an alias of the vma) is shared with userspace, so we may leak the
> > > existence of such guards if we are not careful to ensure that the
> > > execobj.offset is transparent and excludes the guards. (On such platforms
> > > like ivb, without full-ppgtt, userspace has to use relocations so the
> > > presence of more untouchable regions within its GTT such be of no further
> > > issue.)
> > >
> > > v2: Include the guard range in the overflow checks and placement
> > > restrictions.
> > >
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Matthew Auld <matthew.auld@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/gt/intel_ggtt.c  | 12 ++++++++++--
> > >  drivers/gpu/drm/i915/i915_vma.c       | 28 ++++++++++++++++++++++-----
> > >  drivers/gpu/drm/i915/i915_vma.h       |  5 +++--
> > >  drivers/gpu/drm/i915/i915_vma_types.h |  3 ++-
> > >  4 files changed, 38 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > > index c5803c434d33..6b326138e765 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > > @@ -238,8 +238,12 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
> > >
> > >         gte = (gen8_pte_t __iomem *)ggtt->gsm;
> > >         gte += vma->node.start / I915_GTT_PAGE_SIZE;
> > > -       end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
> > >
> > > +       end = gte + vma->guard / I915_GTT_PAGE_SIZE;
> > > +       while (gte < end)
> > > +               gen8_set_pte(gte++, vm->scratch[0]->encode);
> > > +
> > > +       end += (vma->node.size - vma->guard) / I915_GTT_PAGE_SIZE;
> > >         for_each_sgt_daddr(addr, iter, vma->pages)
> > >                 gen8_set_pte(gte++, pte_encode | addr);
> > >         GEM_BUG_ON(gte > end);
> > > @@ -289,8 +293,12 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
> > >
> > >         gte = (gen6_pte_t __iomem *)ggtt->gsm;
> > >         gte += vma->node.start / I915_GTT_PAGE_SIZE;
> > > -       end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
> > >
> > > +       end = gte + vma->guard / I915_GTT_PAGE_SIZE;
> > > +       while (gte < end)
> > > +               gen8_set_pte(gte++, vm->scratch[0]->encode);
> > > +
> > > +       end += (vma->node.size - vma->guard) / I915_GTT_PAGE_SIZE;
> > >         for_each_sgt_daddr(addr, iter, vma->pages)
> > >                 iowrite32(vm->pte_encode(addr, level, flags), gte++);
> > >         GEM_BUG_ON(gte > end);
> > > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> > > index 748f5ea1ba04..31d0f8b64ec0 100644
> > > --- a/drivers/gpu/drm/i915/i915_vma.c
> > > +++ b/drivers/gpu/drm/i915/i915_vma.c
> > > @@ -623,7 +623,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color)
> > >  static int
> > >  i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> > >  {
> > > -       unsigned long color;
> > > +       unsigned long color, guard;
> > >         u64 start, end;
> > >         int ret;
> > >
> > > @@ -631,7 +631,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> > >         GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
> > >
> > >         size = max(size, vma->size);
> > > -       alignment = max(alignment, vma->display_alignment);
> > > +       alignment = max_t(typeof(alignment), alignment, vma->display_alignment);
> > >         if (flags & PIN_MAPPABLE) {
> > >                 size = max_t(typeof(size), size, vma->fence_size);
> > >                 alignment = max_t(typeof(alignment),
> > > @@ -642,6 +642,9 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> > >         GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
> > >         GEM_BUG_ON(!is_power_of_2(alignment));
> > >
> > > +       guard = vma->guard; /* retain guard across rebinds */
> > > +       guard = ALIGN(guard, alignment);
> > > +
> > >         start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
> > >         GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
> > >
> > > @@ -651,12 +654,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> > >         if (flags & PIN_ZONE_4G)
> > >                 end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
> > >         GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
> > > +       GEM_BUG_ON(2 * guard > end);
> > >
> > >         /* If binding the object/GGTT view requires more space than the entire
> > >          * aperture has, reject it early before evicting everything in a vain
> > >          * attempt to find space.
> > >          */
> > > -       if (size > end) {
> > > +       if (size > end - 2 * guard) {
> > >                 DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu > %s aperture=%llu\n",
> > >                           size, flags & PIN_MAPPABLE ? "mappable" : "total",
> > >                           end);
> > > @@ -669,16 +673,29 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> > >
> > >         if (flags & PIN_OFFSET_FIXED) {
> > >                 u64 offset = flags & PIN_OFFSET_MASK;
> > > +
> > >                 if (!IS_ALIGNED(offset, alignment) ||
> > >                     range_overflows(offset, size, end))
> > >                         return -EINVAL;
> > >
> > > +               /*
> > > +                * The caller knows not of the guard added by others and
> > > +                * requests for the offset of the start of its buffer
> > > +                * to be fixed, which may not be the same as the position
> > > +                * of the vma->node due to the guard pages.
> > > +                */
> > > +               if (offset < guard || offset > end - size - 2 * guard)
> >
> > (offset < guard || offset + size > end - guard)?
>
> Padding is afterwards as well as before; so total size grows by 2 *
> guard. And since VT-d says that the overfetch wraps past the end of the
> GTT to the front, we can't simply shrink the node if it abuts the end.
>
> So I'm confident the right check is end - size - 2*guard, and since we
> already check that 2*guard is less than end, and size is less than end
> minus the 2*guard, overflows have been caught.

I thought here the range [offset, offset + size] sits in between both
sides of the padding. The offset < guard checks the left side, and so
offset > end - size - guard will check the right side, but I didn't
see a check for size > guard anywhere, hence the above.

>
> The only real bother is the restriction there is against the end of the
> GTT and not the end of the pin zone. C'est la vie.
> -Chris
>
>
> >
> > Reviewed-by: Matthew Auld <matthew.auld@intel.com>
> >
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma
  2021-02-15 18:04   ` Matthew Auld
@ 2021-02-15 18:15     ` Chris Wilson
  2021-02-15 19:31       ` Matthew Auld
  0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2021-02-15 18:15 UTC (permalink / raw)
  To: Matthew Auld; +Cc: Intel Graphics Development, Matthew Auld

Quoting Matthew Auld (2021-02-15 18:04:08)
> On Mon, 15 Feb 2021 at 15:56, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >
> > Introduce the concept of padding the i915_vma with guard pages before
> > and aft. The major consequence is that all ordinary uses of i915_vma
> > must use i915_vma_offset/i915_vma_size and not i915_vma.node.start/size
> > directly, as the drm_mm_node will include the guard pages that surround
> > our object.
> >
> > The biggest connundrum is how exactly to mix requesting a fixed address
> > with guard pages, particularly through the existing uABI. The user does
> > not know about guard pages, so such must be transparent to the user, and
> > so the execobj.offset must be that of the object itself excluding the
> > guard. So a PIN_OFFSET_FIXED must then be exclusive of the guard pages.
> > The caveat is that some placements will be impossible with guard pages,
> > as wrap arounds need to be avoided, and the vma itself will require a
> > larger node. We must we not report EINVAL but ENOSPC as these are
> > unavailable locations within the GTT rather than conflicting user
> > requirements.
> >
> > In the next patch, we start using guard pages for scanout objects. While
> > these are limited to GGTT vma, on a few platforms these vma (or at least
> > an alias of the vma) is shared with userspace, so we may leak the
> > existence of such guards if we are not careful to ensure that the
> > execobj.offset is transparent and excludes the guards. (On such platforms
> > like ivb, without full-ppgtt, userspace has to use relocations so the
> > presence of more untouchable regions within its GTT such be of no further
> > issue.)
> >
> > v2: Include the guard range in the overflow checks and placement
> > restrictions.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Matthew Auld <matthew.auld@intel.com>
> > ---
> >  drivers/gpu/drm/i915/gt/intel_ggtt.c  | 12 ++++++++++--
> >  drivers/gpu/drm/i915/i915_vma.c       | 28 ++++++++++++++++++++++-----
> >  drivers/gpu/drm/i915/i915_vma.h       |  5 +++--
> >  drivers/gpu/drm/i915/i915_vma_types.h |  3 ++-
> >  4 files changed, 38 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > index c5803c434d33..6b326138e765 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > @@ -238,8 +238,12 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
> >
> >         gte = (gen8_pte_t __iomem *)ggtt->gsm;
> >         gte += vma->node.start / I915_GTT_PAGE_SIZE;
> > -       end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
> >
> > +       end = gte + vma->guard / I915_GTT_PAGE_SIZE;
> > +       while (gte < end)
> > +               gen8_set_pte(gte++, vm->scratch[0]->encode);
> > +
> > +       end += (vma->node.size - vma->guard) / I915_GTT_PAGE_SIZE;
> >         for_each_sgt_daddr(addr, iter, vma->pages)
> >                 gen8_set_pte(gte++, pte_encode | addr);
> >         GEM_BUG_ON(gte > end);
> > @@ -289,8 +293,12 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
> >
> >         gte = (gen6_pte_t __iomem *)ggtt->gsm;
> >         gte += vma->node.start / I915_GTT_PAGE_SIZE;
> > -       end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
> >
> > +       end = gte + vma->guard / I915_GTT_PAGE_SIZE;
> > +       while (gte < end)
> > +               gen8_set_pte(gte++, vm->scratch[0]->encode);
> > +
> > +       end += (vma->node.size - vma->guard) / I915_GTT_PAGE_SIZE;
> >         for_each_sgt_daddr(addr, iter, vma->pages)
> >                 iowrite32(vm->pte_encode(addr, level, flags), gte++);
> >         GEM_BUG_ON(gte > end);
> > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> > index 748f5ea1ba04..31d0f8b64ec0 100644
> > --- a/drivers/gpu/drm/i915/i915_vma.c
> > +++ b/drivers/gpu/drm/i915/i915_vma.c
> > @@ -623,7 +623,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color)
> >  static int
> >  i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> >  {
> > -       unsigned long color;
> > +       unsigned long color, guard;
> >         u64 start, end;
> >         int ret;
> >
> > @@ -631,7 +631,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> >         GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
> >
> >         size = max(size, vma->size);
> > -       alignment = max(alignment, vma->display_alignment);
> > +       alignment = max_t(typeof(alignment), alignment, vma->display_alignment);
> >         if (flags & PIN_MAPPABLE) {
> >                 size = max_t(typeof(size), size, vma->fence_size);
> >                 alignment = max_t(typeof(alignment),
> > @@ -642,6 +642,9 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> >         GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
> >         GEM_BUG_ON(!is_power_of_2(alignment));
> >
> > +       guard = vma->guard; /* retain guard across rebinds */
> > +       guard = ALIGN(guard, alignment);
> > +
> >         start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
> >         GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
> >
> > @@ -651,12 +654,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> >         if (flags & PIN_ZONE_4G)
> >                 end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
> >         GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
> > +       GEM_BUG_ON(2 * guard > end);
> >
> >         /* If binding the object/GGTT view requires more space than the entire
> >          * aperture has, reject it early before evicting everything in a vain
> >          * attempt to find space.
> >          */
> > -       if (size > end) {
> > +       if (size > end - 2 * guard) {
> >                 DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu > %s aperture=%llu\n",
> >                           size, flags & PIN_MAPPABLE ? "mappable" : "total",
> >                           end);
> > @@ -669,16 +673,29 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> >
> >         if (flags & PIN_OFFSET_FIXED) {
> >                 u64 offset = flags & PIN_OFFSET_MASK;
> > +
> >                 if (!IS_ALIGNED(offset, alignment) ||
> >                     range_overflows(offset, size, end))
> >                         return -EINVAL;
> >
> > +               /*
> > +                * The caller knows not of the guard added by others and
> > +                * requests for the offset of the start of its buffer
> > +                * to be fixed, which may not be the same as the position
> > +                * of the vma->node due to the guard pages.
> > +                */
> > +               if (offset < guard || offset > end - size - 2 * guard)
> 
> (offset < guard || offset + size > end - guard)?

Padding is afterwards as well as before; so total size grows by 2 *
guard. And since VT-d says that the overfetch wraps past the end of the
GTT to the front, we can't simply shrink the node if it abuts the end.

So I'm confident the right check is end - size - 2*guard, and since we
already check that 2*guard is less than end, and size is less than end
minus the 2*guard, overflows have been caught.

The only real bother is the restriction there is against the end of the
GTT and not the end of the pin zone. C'est la vie.
-Chris


> 
> Reviewed-by: Matthew Auld <matthew.auld@intel.com>
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma
  2021-02-15 15:56 ` [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma Chris Wilson
@ 2021-02-15 18:04   ` Matthew Auld
  2021-02-15 18:15     ` Chris Wilson
  0 siblings, 1 reply; 17+ messages in thread
From: Matthew Auld @ 2021-02-15 18:04 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Matthew Auld

On Mon, 15 Feb 2021 at 15:56, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Introduce the concept of padding the i915_vma with guard pages before
> and aft. The major consequence is that all ordinary uses of i915_vma
> must use i915_vma_offset/i915_vma_size and not i915_vma.node.start/size
> directly, as the drm_mm_node will include the guard pages that surround
> our object.
>
> The biggest connundrum is how exactly to mix requesting a fixed address
> with guard pages, particularly through the existing uABI. The user does
> not know about guard pages, so such must be transparent to the user, and
> so the execobj.offset must be that of the object itself excluding the
> guard. So a PIN_OFFSET_FIXED must then be exclusive of the guard pages.
> The caveat is that some placements will be impossible with guard pages,
> as wrap arounds need to be avoided, and the vma itself will require a
> larger node. We must we not report EINVAL but ENOSPC as these are
> unavailable locations within the GTT rather than conflicting user
> requirements.
>
> In the next patch, we start using guard pages for scanout objects. While
> these are limited to GGTT vma, on a few platforms these vma (or at least
> an alias of the vma) is shared with userspace, so we may leak the
> existence of such guards if we are not careful to ensure that the
> execobj.offset is transparent and excludes the guards. (On such platforms
> like ivb, without full-ppgtt, userspace has to use relocations so the
> presence of more untouchable regions within its GTT such be of no further
> issue.)
>
> v2: Include the guard range in the overflow checks and placement
> restrictions.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Matthew Auld <matthew.auld@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_ggtt.c  | 12 ++++++++++--
>  drivers/gpu/drm/i915/i915_vma.c       | 28 ++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/i915_vma.h       |  5 +++--
>  drivers/gpu/drm/i915/i915_vma_types.h |  3 ++-
>  4 files changed, 38 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index c5803c434d33..6b326138e765 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -238,8 +238,12 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>
>         gte = (gen8_pte_t __iomem *)ggtt->gsm;
>         gte += vma->node.start / I915_GTT_PAGE_SIZE;
> -       end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
>
> +       end = gte + vma->guard / I915_GTT_PAGE_SIZE;
> +       while (gte < end)
> +               gen8_set_pte(gte++, vm->scratch[0]->encode);
> +
> +       end += (vma->node.size - vma->guard) / I915_GTT_PAGE_SIZE;
>         for_each_sgt_daddr(addr, iter, vma->pages)
>                 gen8_set_pte(gte++, pte_encode | addr);
>         GEM_BUG_ON(gte > end);
> @@ -289,8 +293,12 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>
>         gte = (gen6_pte_t __iomem *)ggtt->gsm;
>         gte += vma->node.start / I915_GTT_PAGE_SIZE;
> -       end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
>
> +       end = gte + vma->guard / I915_GTT_PAGE_SIZE;
> +       while (gte < end)
> +               gen8_set_pte(gte++, vm->scratch[0]->encode);
> +
> +       end += (vma->node.size - vma->guard) / I915_GTT_PAGE_SIZE;
>         for_each_sgt_daddr(addr, iter, vma->pages)
>                 iowrite32(vm->pte_encode(addr, level, flags), gte++);
>         GEM_BUG_ON(gte > end);
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 748f5ea1ba04..31d0f8b64ec0 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -623,7 +623,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color)
>  static int
>  i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>  {
> -       unsigned long color;
> +       unsigned long color, guard;
>         u64 start, end;
>         int ret;
>
> @@ -631,7 +631,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>         GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
>
>         size = max(size, vma->size);
> -       alignment = max(alignment, vma->display_alignment);
> +       alignment = max_t(typeof(alignment), alignment, vma->display_alignment);
>         if (flags & PIN_MAPPABLE) {
>                 size = max_t(typeof(size), size, vma->fence_size);
>                 alignment = max_t(typeof(alignment),
> @@ -642,6 +642,9 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>         GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
>         GEM_BUG_ON(!is_power_of_2(alignment));
>
> +       guard = vma->guard; /* retain guard across rebinds */
> +       guard = ALIGN(guard, alignment);
> +
>         start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
>         GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
>
> @@ -651,12 +654,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>         if (flags & PIN_ZONE_4G)
>                 end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
>         GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
> +       GEM_BUG_ON(2 * guard > end);
>
>         /* If binding the object/GGTT view requires more space than the entire
>          * aperture has, reject it early before evicting everything in a vain
>          * attempt to find space.
>          */
> -       if (size > end) {
> +       if (size > end - 2 * guard) {
>                 DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu > %s aperture=%llu\n",
>                           size, flags & PIN_MAPPABLE ? "mappable" : "total",
>                           end);
> @@ -669,16 +673,29 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>
>         if (flags & PIN_OFFSET_FIXED) {
>                 u64 offset = flags & PIN_OFFSET_MASK;
> +
>                 if (!IS_ALIGNED(offset, alignment) ||
>                     range_overflows(offset, size, end))
>                         return -EINVAL;
>
> +               /*
> +                * The caller knows not of the guard added by others and
> +                * requests for the offset of the start of its buffer
> +                * to be fixed, which may not be the same as the position
> +                * of the vma->node due to the guard pages.
> +                */
> +               if (offset < guard || offset > end - size - 2 * guard)

(offset < guard || offset + size > end - guard)?

Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma
  2021-02-15 15:56 Chris Wilson
@ 2021-02-15 15:56 ` Chris Wilson
  2021-02-15 18:04   ` Matthew Auld
  0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2021-02-15 15:56 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld, Chris Wilson

Introduce the concept of padding the i915_vma with guard pages before
and aft. The major consequence is that all ordinary uses of i915_vma
must use i915_vma_offset/i915_vma_size and not i915_vma.node.start/size
directly, as the drm_mm_node will include the guard pages that surround
our object.

The biggest connundrum is how exactly to mix requesting a fixed address
with guard pages, particularly through the existing uABI. The user does
not know about guard pages, so such must be transparent to the user, and
so the execobj.offset must be that of the object itself excluding the
guard. So a PIN_OFFSET_FIXED must then be exclusive of the guard pages.
The caveat is that some placements will be impossible with guard pages,
as wrap arounds need to be avoided, and the vma itself will require a
larger node. We must we not report EINVAL but ENOSPC as these are
unavailable locations within the GTT rather than conflicting user
requirements.

In the next patch, we start using guard pages for scanout objects. While
these are limited to GGTT vma, on a few platforms these vma (or at least
an alias of the vma) is shared with userspace, so we may leak the
existence of such guards if we are not careful to ensure that the
execobj.offset is transparent and excludes the guards. (On such platforms
like ivb, without full-ppgtt, userspace has to use relocations so the
presence of more untouchable regions within its GTT such be of no further
issue.)

v2: Include the guard range in the overflow checks and placement
restrictions.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 12 ++++++++++--
 drivers/gpu/drm/i915/i915_vma.c       | 28 ++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_vma.h       |  5 +++--
 drivers/gpu/drm/i915/i915_vma_types.h |  3 ++-
 4 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index c5803c434d33..6b326138e765 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -238,8 +238,12 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 
 	gte = (gen8_pte_t __iomem *)ggtt->gsm;
 	gte += vma->node.start / I915_GTT_PAGE_SIZE;
-	end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
 
+	end = gte + vma->guard / I915_GTT_PAGE_SIZE;
+	while (gte < end)
+		gen8_set_pte(gte++, vm->scratch[0]->encode);
+
+	end += (vma->node.size - vma->guard) / I915_GTT_PAGE_SIZE;
 	for_each_sgt_daddr(addr, iter, vma->pages)
 		gen8_set_pte(gte++, pte_encode | addr);
 	GEM_BUG_ON(gte > end);
@@ -289,8 +293,12 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 
 	gte = (gen6_pte_t __iomem *)ggtt->gsm;
 	gte += vma->node.start / I915_GTT_PAGE_SIZE;
-	end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
 
+	end = gte + vma->guard / I915_GTT_PAGE_SIZE;
+	while (gte < end)
+		gen8_set_pte(gte++, vm->scratch[0]->encode);
+
+	end += (vma->node.size - vma->guard) / I915_GTT_PAGE_SIZE;
 	for_each_sgt_daddr(addr, iter, vma->pages)
 		iowrite32(vm->pte_encode(addr, level, flags), gte++);
 	GEM_BUG_ON(gte > end);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 748f5ea1ba04..31d0f8b64ec0 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -623,7 +623,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color)
 static int
 i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
-	unsigned long color;
+	unsigned long color, guard;
 	u64 start, end;
 	int ret;
 
@@ -631,7 +631,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
 
 	size = max(size, vma->size);
-	alignment = max(alignment, vma->display_alignment);
+	alignment = max_t(typeof(alignment), alignment, vma->display_alignment);
 	if (flags & PIN_MAPPABLE) {
 		size = max_t(typeof(size), size, vma->fence_size);
 		alignment = max_t(typeof(alignment),
@@ -642,6 +642,9 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
 	GEM_BUG_ON(!is_power_of_2(alignment));
 
+	guard = vma->guard; /* retain guard across rebinds */
+	guard = ALIGN(guard, alignment);
+
 	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
 	GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
 
@@ -651,12 +654,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	if (flags & PIN_ZONE_4G)
 		end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
 	GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
+	GEM_BUG_ON(2 * guard > end);
 
 	/* If binding the object/GGTT view requires more space than the entire
 	 * aperture has, reject it early before evicting everything in a vain
 	 * attempt to find space.
 	 */
-	if (size > end) {
+	if (size > end - 2 * guard) {
 		DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu > %s aperture=%llu\n",
 			  size, flags & PIN_MAPPABLE ? "mappable" : "total",
 			  end);
@@ -669,16 +673,29 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 
 	if (flags & PIN_OFFSET_FIXED) {
 		u64 offset = flags & PIN_OFFSET_MASK;
+
 		if (!IS_ALIGNED(offset, alignment) ||
 		    range_overflows(offset, size, end))
 			return -EINVAL;
 
+		/*
+		 * The caller knows not of the guard added by others and
+		 * requests for the offset of the start of its buffer
+		 * to be fixed, which may not be the same as the position
+		 * of the vma->node due to the guard pages.
+		 */
+		if (offset < guard || offset > end - size - 2 * guard)
+			return -ENOSPC;
+
 		ret = i915_gem_gtt_reserve(vma->vm, &vma->node,
-					   size, offset, color,
-					   flags);
+					   size + 2 * guard,
+					   offset - guard,
+					   color, flags);
 		if (ret)
 			return ret;
 	} else {
+		size += 2 * guard;
+
 		/*
 		 * We only support huge gtt pages through the 48b PPGTT,
 		 * however we also don't want to force any alignment for
@@ -725,6 +742,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color));
 
 	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
+	vma->guard = guard;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 3066d36b2ccd..5004ece1eb9a 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -113,12 +113,13 @@ static inline bool i915_vma_is_closed(const struct i915_vma *vma)
 static inline u64 i915_vma_size(const struct i915_vma *vma)
 {
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-	return vma->node.size;
+	return vma->node.size - 2 * vma->guard;
 }
 
 static inline u64 __i915_vma_offset(const struct i915_vma *vma)
 {
-	return vma->node.start;
+	/* The actual start of the vma->pages is after the guard pages. */
+	return vma->node.start + vma->guard;
 }
 
 static inline u64 i915_vma_offset(const struct i915_vma *vma)
diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
index f5cb848b7a7e..6e55a71543ea 100644
--- a/drivers/gpu/drm/i915/i915_vma_types.h
+++ b/drivers/gpu/drm/i915/i915_vma_types.h
@@ -182,14 +182,15 @@ struct i915_vma {
 	struct i915_fence_reg *fence;
 
 	u64 size;
-	u64 display_alignment;
 	struct i915_page_sizes page_sizes;
 
 	/* mmap-offset associated with fencing for this vma */
 	struct i915_mmap_offset	*mmo;
 
+	u32 guard; /* padding allocated around vma->pages within the node */
 	u32 fence_size;
 	u32 fence_alignment;
+	u32 display_alignment;
 
 	/**
 	 * Count of the number of times this vma has been opened by different
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2022-11-09 17:42 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-12 10:22 [Intel-gfx] [PATCH 1/3] drm/i915: Wrap all access to i915_vma.node.start|size Chris Wilson
2021-02-12 10:22 ` [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma Chris Wilson
2021-02-12 13:43   ` Matthew Auld
2021-02-12 13:55     ` Chris Wilson
2021-02-12 10:22 ` [Intel-gfx] [PATCH 3/3] drm/i915: Refine VT-d scanout workaround Chris Wilson
2021-02-12 13:46   ` Matthew Auld
2021-02-12 10:58 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: Wrap all access to i915_vma.node.start|size Patchwork
2021-02-12 11:32 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
2021-02-12 12:55 ` [Intel-gfx] [PATCH 1/3] " Matthew Auld
2021-02-15 15:56 Chris Wilson
2021-02-15 15:56 ` [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma Chris Wilson
2021-02-15 18:04   ` Matthew Auld
2021-02-15 18:15     ` Chris Wilson
2021-02-15 19:31       ` Matthew Auld
2021-02-15 20:29         ` Chris Wilson
2021-12-02  8:31 [Intel-gfx] [PATCH 0/3] Replace VT-d workaround with guard pages Tejas Upadhyay
2021-12-02  8:31 ` [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma Tejas Upadhyay
2022-11-09 16:49 [PATCH 0/3] add guard patting around i915_vma Andi Shyti
2022-11-09 16:49 ` [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma Andi Shyti
2022-11-09 17:40 [PATCH 0/3] add guard padding around i915_vma Andi Shyti
2022-11-09 17:40 ` [Intel-gfx] [PATCH 2/3] drm/i915: Introduce guard pages to i915_vma Andi Shyti

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.