From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga17.intel.com (mga17.intel.com [192.55.52.151]) by gabe.freedesktop.org (Postfix) with ESMTPS id 7857910E78A for ; Fri, 2 Sep 2022 07:52:35 +0000 (UTC) From: =?UTF-8?q?Zbigniew=20Kempczy=C5=84ski?= To: igt-dev@lists.freedesktop.org Date: Fri, 2 Sep 2022 09:52:16 +0200 Message-Id: <20220902075227.50690-2-zbigniew.kempczynski@intel.com> In-Reply-To: <20220902075227.50690-1-zbigniew.kempczynski@intel.com> References: <20220902075227.50690-1-zbigniew.kempczynski@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [igt-dev] [PATCH i-g-t v2 01/12] lib: Fix off-by-one-page in 48b obj.flags List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: igt-dev-bounces@lists.freedesktop.org Sender: "igt-dev" List-ID: From: Chris Wilson The kernel checks that the last byte of the object will fit inside the 32b GTT window unless the object is marked as being suitable for use with 48b addressing. However, the spinner only checked the start of the object which depending on the mix of size/alignment, could allow the object to straddle the 32b boundary and not be marked as 48b capable. Always set 48b for all the objects where ppGTT merites. In the process, there was one location where we failed to write the upper 32b of the address into the instruction. Signed-off-by: Chris Wilson --- lib/igt_dummyload.c | 63 ++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c index dc1bd51e08..17ae21f567 100644 --- a/lib/igt_dummyload.c +++ b/lib/igt_dummyload.c @@ -100,12 +100,21 @@ emit_recursive_batch(igt_spin_t *spin, struct drm_i915_gem_execbuffer2 *execbuf; struct drm_i915_gem_exec_object2 *obj; unsigned int flags[GEM_MAX_ENGINES]; + unsigned int objflags = 0; unsigned int nengine; int fence_fd = -1; - uint64_t addr, addr_scratch, ahnd = opts->ahnd, objflags = 0; + uint64_t addr, addr_scratch, ahnd = opts->ahnd; uint32_t *cs; int i; + igt_assert(!(opts->ctx && opts->ctx_id)); + + r = memset(relocs, 0, sizeof(relocs)); + execbuf = memset(&spin->execbuf, 0, sizeof(spin->execbuf)); + execbuf->rsvd1 = opts->ctx ? opts->ctx->id : opts->ctx_id; + execbuf->flags = I915_EXEC_NO_RELOC; + obj = memset(spin->obj, 0, sizeof(spin->obj)); + /* * Pick a random location for our spinner et al. * @@ -121,8 +130,12 @@ emit_recursive_batch(igt_spin_t *spin, * that wrap. */ + addr = gem_aperture_size(fd) / 2; + if (addr >> 32) + objflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; + if (!ahnd) { - addr = gem_aperture_size(fd) / 2; + addr /= 2; if (addr >> 31) addr = 1u << 31; addr += random() % addr / 2; @@ -131,8 +144,6 @@ emit_recursive_batch(igt_spin_t *spin, objflags |= EXEC_OBJECT_PINNED; } - igt_assert(!(opts->ctx && opts->ctx_id)); - nengine = 0; if (opts->engine == ALL_ENGINES) { struct intel_execution_engine2 *engine; @@ -150,11 +161,6 @@ emit_recursive_batch(igt_spin_t *spin, } igt_require(nengine); - memset(relocs, 0, sizeof(relocs)); - execbuf = memset(&spin->execbuf, 0, sizeof(spin->execbuf)); - execbuf->flags = I915_EXEC_NO_RELOC; - obj = memset(spin->obj, 0, sizeof(spin->obj)); - obj[BATCH].handle = handle_create(fd, BATCH_SIZE, opts->flags, &spin->batch); if (!spin->batch) { @@ -175,9 +181,7 @@ emit_recursive_batch(igt_spin_t *spin, BATCH_SIZE, 0, ALLOC_STRATEGY_LOW_TO_HIGH); obj[BATCH].offset = CANONICAL(addr); - obj[BATCH].flags |= objflags; - if (obj[BATCH].offset >= (1ull << 32)) - obj[BATCH].flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; + obj[BATCH].flags = objflags; addr += BATCH_SIZE; @@ -192,27 +196,23 @@ emit_recursive_batch(igt_spin_t *spin, obj[SCRATCH].handle = opts->dependency; obj[SCRATCH].offset = CANONICAL(addr_scratch); - obj[SCRATCH].flags |= objflags; - if (obj[SCRATCH].offset >= (1ull << 32)) - obj[SCRATCH].flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; + obj[SCRATCH].flags = objflags; if (!(opts->flags & IGT_SPIN_SOFTDEP)) { obj[SCRATCH].flags |= EXEC_OBJECT_WRITE; /* dummy write to dependency */ - r = &relocs[obj[BATCH].relocation_count++]; r->presumed_offset = obj[SCRATCH].offset; r->target_handle = obj[SCRATCH].handle; r->offset = sizeof(uint32_t) * 1020; r->delta = 0; r->read_domains = I915_GEM_DOMAIN_RENDER; r->write_domain = I915_GEM_DOMAIN_RENDER; + r++; } execbuf->buffer_count++; } else if (opts->flags & IGT_SPIN_POLL_RUN) { - r = &relocs[obj[BATCH].relocation_count++]; - igt_assert(!opts->dependency); if (gen == 4 || gen == 5) { @@ -244,6 +244,7 @@ emit_recursive_batch(igt_spin_t *spin, ALLOC_STRATEGY_LOW_TO_HIGH); addr += 4096; /* guard page */ obj[SCRATCH].offset = CANONICAL(addr); + obj[SCRATCH].flags = objflags; addr += 4096; igt_assert_eq(spin->poll[SPIN_POLL_START_IDX], 0); @@ -253,10 +254,6 @@ emit_recursive_batch(igt_spin_t *spin, r->offset = sizeof(uint32_t) * 1; r->delta = sizeof(uint32_t) * SPIN_POLL_START_IDX; - obj[SCRATCH].flags |= objflags; - if (obj[SCRATCH].offset >= (1ull << 32)) - obj[SCRATCH].flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; - *cs++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); if (gen >= 8) { @@ -274,6 +271,7 @@ emit_recursive_batch(igt_spin_t *spin, *cs++ = 1; execbuf->buffer_count++; + r++; } spin->handle = obj[BATCH].handle; @@ -314,8 +312,6 @@ emit_recursive_batch(igt_spin_t *spin, * no matter how they modify it (from either the GPU or CPU). */ if (gen >= 8) { /* arbitrary cutoff between ring/execlists submission */ - r = &relocs[obj[BATCH].relocation_count++]; - /* * On Sandybridge+ the comparison is a strict greater-than: * if the value at spin->condition is greater than BB_END, @@ -334,15 +330,17 @@ emit_recursive_batch(igt_spin_t *spin, r->offset = (cs + 2 - spin->batch) * sizeof(*cs); r->read_domains = I915_GEM_DOMAIN_COMMAND; r->delta = (spin->condition - spin->batch) * sizeof(*cs); + igt_assert(r->delta < 4096); *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | 2; *cs++ = MI_BATCH_BUFFER_END; *cs++ = r->presumed_offset + r->delta; - *cs++ = 0; + *cs++ = r->presumed_offset >> 32; + + r++; } /* recurse */ - r = &relocs[obj[BATCH].relocation_count++]; r->target_handle = obj[BATCH].handle; r->presumed_offset = obj[BATCH].offset; r->offset = (cs + 1 - spin->batch) * sizeof(*cs); @@ -363,11 +361,10 @@ emit_recursive_batch(igt_spin_t *spin, *cs = r->presumed_offset + r->delta; cs++; } - obj[BATCH].relocs_ptr = to_user_pointer(relocs); + r++; execbuf->buffers_ptr = to_user_pointer(obj + (2 - execbuf->buffer_count)); - execbuf->rsvd1 = opts->ctx ? opts->ctx->id : opts->ctx_id; if (opts->flags & IGT_SPIN_FENCE_OUT) execbuf->flags |= I915_EXEC_FENCE_OUT; @@ -382,14 +379,16 @@ emit_recursive_batch(igt_spin_t *spin, execbuf->rsvd2 = opts->fence; } + /* For allocator we have to rid of relocation_count */ + if (!ahnd) { + obj[BATCH].relocs_ptr = to_user_pointer(relocs); + obj[BATCH].relocation_count = r - relocs; + } + for (i = 0; i < nengine; i++) { execbuf->flags &= ~ENGINE_MASK; execbuf->flags |= flags[i]; - /* For allocator we have to rid of relocation_count */ - for (int j = 0; j < ARRAY_SIZE(spin->obj) && ahnd; j++) - spin->obj[j].relocation_count = 0; - gem_execbuf_wr(fd, execbuf); if (opts->flags & IGT_SPIN_FENCE_OUT) { -- 2.34.1