From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Cc: igt-dev@lists.freedesktop.org Subject: [PATCH i-g-t 6/8] i915/gem_exec_nop: poll-sequential requires ordering between rings Date: Wed, 30 Jan 2019 09:54:58 +0000 [thread overview] Message-ID: <20190130095500.23596-6-chris@chris-wilson.co.uk> (raw) In-Reply-To: <20190130095500.23596-1-chris@chris-wilson.co.uk> In order to correctly serialise the order of execution between rings, we need to flag the scratch address as being written. Make it so. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- tests/i915/gem_exec_nop.c | 152 +++++++++++++++++++++++++++++++++----- 1 file changed, 133 insertions(+), 19 deletions(-) diff --git a/tests/i915/gem_exec_nop.c b/tests/i915/gem_exec_nop.c index 59a08ad08..b91b4d0f6 100644 --- a/tests/i915/gem_exec_nop.c +++ b/tests/i915/gem_exec_nop.c @@ -104,7 +104,7 @@ static double nop_on_ring(int fd, uint32_t handle, unsigned ring_id, return elapsed(&start, &now); } -static void poll_ring(int fd, unsigned ring, const char *name, int timeout) +static void poll_ring(int fd, unsigned engine, const char *name, int timeout) { const int gen = intel_gen(intel_get_drm_devid(fd)); const uint32_t MI_ARB_CHK = 0x5 << 23; @@ -112,29 +112,17 @@ static void poll_ring(int fd, unsigned ring, const char *name, int timeout) struct drm_i915_gem_exec_object2 obj; struct drm_i915_gem_relocation_entry reloc[4], *r; uint32_t *bbe[2], *state, *batch; - unsigned engines[16], nengine, flags; struct timespec tv = {}; unsigned long cycles; + unsigned flags; uint64_t elapsed; flags = I915_EXEC_NO_RELOC; if (gen == 4 || gen == 5) flags |= I915_EXEC_SECURE; - nengine = 0; - if (ring == ALL_ENGINES) { - for_each_physical_engine(fd, ring) { - if (!gem_can_store_dword(fd, ring)) - continue; - - engines[nengine++] = ring; - } - } else { - gem_require_ring(fd, ring); - igt_require(gem_can_store_dword(fd, ring)); - engines[nengine++] = ring; - } - igt_require(nengine); + gem_require_ring(fd, engine); + igt_require(gem_can_store_dword(fd, engine)); memset(&obj, 0, sizeof(obj)); obj.handle = gem_create(fd, 4096); @@ -198,7 +186,7 @@ static void poll_ring(int fd, unsigned ring, const char *name, int timeout) memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = to_user_pointer(&obj); execbuf.buffer_count = 1; - execbuf.flags = engines[0]; + execbuf.flags = engine | flags; cycles = 0; do { @@ -208,7 +196,6 @@ static void poll_ring(int fd, unsigned ring, const char *name, int timeout) execbuf.batch_start_offset = (bbe[idx] - batch) * sizeof(*batch) - 64; - execbuf.flags = engines[cycles % nengine] | flags; gem_execbuf(fd, &execbuf); *bbe[!idx] = MI_BATCH_BUFFER_END; @@ -227,6 +214,133 @@ static void poll_ring(int fd, unsigned ring, const char *name, int timeout) gem_close(fd, obj.handle); } +static void poll_sequential(int fd, const char *name, int timeout) +{ + const int gen = intel_gen(intel_get_drm_devid(fd)); + const uint32_t MI_ARB_CHK = 0x5 << 23; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 obj[2]; + struct drm_i915_gem_relocation_entry reloc[4], *r; + uint32_t *bbe[2], *state, *batch; + unsigned engines[16], nengine, engine, flags; + struct timespec tv = {}; + unsigned long cycles; + uint64_t elapsed; + bool cached; + + flags = I915_EXEC_NO_RELOC; + if (gen == 4 || gen == 5) + flags |= I915_EXEC_SECURE; + + nengine = 0; + for_each_physical_engine(fd, engine) { + if (!gem_can_store_dword(fd, engine)) + continue; + + engines[nengine++] = engine; + } + igt_require(nengine); + + memset(obj, 0, sizeof(obj)); + obj[0].handle = gem_create(fd, 4096); + obj[0].flags = EXEC_OBJECT_WRITE; + cached = __gem_set_caching(fd, obj[0].handle, 1) == 0; + obj[1].handle = gem_create(fd, 4096); + obj[1].relocs_ptr = to_user_pointer(reloc); + obj[1].relocation_count = ARRAY_SIZE(reloc); + + r = memset(reloc, 0, sizeof(reloc)); + batch = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_WRITE); + + for (unsigned int start_offset = 0; + start_offset <= 128; + start_offset += 128) { + uint32_t *b = batch + start_offset / sizeof(*batch); + + r->target_handle = obj[0].handle; + r->offset = (b - batch + 1) * sizeof(uint32_t); + r->delta = 0; + r->read_domains = I915_GEM_DOMAIN_RENDER; + r->write_domain = I915_GEM_DOMAIN_RENDER; + + *b = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); + if (gen >= 8) { + *++b = r->delta; + *++b = 0; + } else if (gen >= 4) { + r->offset += sizeof(uint32_t); + *++b = 0; + *++b = r->delta; + } else { + *b -= 1; + *++b = r->delta; + } + *++b = start_offset != 0; + r++; + + b = batch + (start_offset + 64) / sizeof(*batch); + bbe[start_offset != 0] = b; + *b++ = MI_ARB_CHK; + + r->target_handle = obj[1].handle; + r->offset = (b - batch + 1) * sizeof(uint32_t); + r->read_domains = I915_GEM_DOMAIN_COMMAND; + r->delta = start_offset + 64; + if (gen >= 8) { + *b++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; + *b++ = r->delta; + *b++ = 0; + } else if (gen >= 6) { + *b++ = MI_BATCH_BUFFER_START | 1 << 8; + *b++ = r->delta; + } else { + *b++ = MI_BATCH_BUFFER_START | 2 << 6; + if (gen < 4) + r->delta |= 1; + *b++ = r->delta; + } + r++; + } + igt_assert(r == reloc + ARRAY_SIZE(reloc)); + + if (cached) + state = gem_mmap__cpu(fd, obj[0].handle, 0, 4096, PROT_READ); + else + state = gem_mmap__wc(fd, obj[0].handle, 0, 4096, PROT_READ); + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = to_user_pointer(obj); + execbuf.buffer_count = ARRAY_SIZE(obj); + + cycles = 0; + do { + unsigned int idx = ++cycles & 1; + + *bbe[idx] = MI_ARB_CHK; + execbuf.batch_start_offset = + (bbe[idx] - batch) * sizeof(*batch) - 64; + + execbuf.flags = engines[cycles % nengine] | flags; + gem_execbuf(fd, &execbuf); + + *bbe[!idx] = MI_BATCH_BUFFER_END; + __sync_synchronize(); + + while (READ_ONCE(*state) != idx) + ; + } while ((elapsed = igt_nsec_elapsed(&tv)) >> 30 < timeout); + *bbe[cycles & 1] = MI_BATCH_BUFFER_END; + gem_sync(fd, obj[1].handle); + + igt_info("%s completed %ld cycles: %.3f us\n", + name, cycles, elapsed*1e-3/cycles); + + munmap(state, 4096); + munmap(batch, 4096); + gem_close(fd, obj[1].handle); + gem_close(fd, obj[0].handle); +} + static void single(int fd, uint32_t handle, unsigned ring_id, const char *ring_name) { @@ -813,7 +927,7 @@ igt_main } igt_subtest("poll-sequential") - poll_ring(device, ALL_ENGINES, "Sequential", 20); + poll_sequential(device, "Sequential", 20); igt_subtest("headless") { /* Requires master for changing display modes */ -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
WARNING: multiple messages have this Message-ID (diff)
From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Cc: igt-dev@lists.freedesktop.org Subject: [igt-dev] [PATCH i-g-t 6/8] i915/gem_exec_nop: poll-sequential requires ordering between rings Date: Wed, 30 Jan 2019 09:54:58 +0000 [thread overview] Message-ID: <20190130095500.23596-6-chris@chris-wilson.co.uk> (raw) In-Reply-To: <20190130095500.23596-1-chris@chris-wilson.co.uk> In order to correctly serialise the order of execution between rings, we need to flag the scratch address as being written. Make it so. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- tests/i915/gem_exec_nop.c | 152 +++++++++++++++++++++++++++++++++----- 1 file changed, 133 insertions(+), 19 deletions(-) diff --git a/tests/i915/gem_exec_nop.c b/tests/i915/gem_exec_nop.c index 59a08ad08..b91b4d0f6 100644 --- a/tests/i915/gem_exec_nop.c +++ b/tests/i915/gem_exec_nop.c @@ -104,7 +104,7 @@ static double nop_on_ring(int fd, uint32_t handle, unsigned ring_id, return elapsed(&start, &now); } -static void poll_ring(int fd, unsigned ring, const char *name, int timeout) +static void poll_ring(int fd, unsigned engine, const char *name, int timeout) { const int gen = intel_gen(intel_get_drm_devid(fd)); const uint32_t MI_ARB_CHK = 0x5 << 23; @@ -112,29 +112,17 @@ static void poll_ring(int fd, unsigned ring, const char *name, int timeout) struct drm_i915_gem_exec_object2 obj; struct drm_i915_gem_relocation_entry reloc[4], *r; uint32_t *bbe[2], *state, *batch; - unsigned engines[16], nengine, flags; struct timespec tv = {}; unsigned long cycles; + unsigned flags; uint64_t elapsed; flags = I915_EXEC_NO_RELOC; if (gen == 4 || gen == 5) flags |= I915_EXEC_SECURE; - nengine = 0; - if (ring == ALL_ENGINES) { - for_each_physical_engine(fd, ring) { - if (!gem_can_store_dword(fd, ring)) - continue; - - engines[nengine++] = ring; - } - } else { - gem_require_ring(fd, ring); - igt_require(gem_can_store_dword(fd, ring)); - engines[nengine++] = ring; - } - igt_require(nengine); + gem_require_ring(fd, engine); + igt_require(gem_can_store_dword(fd, engine)); memset(&obj, 0, sizeof(obj)); obj.handle = gem_create(fd, 4096); @@ -198,7 +186,7 @@ static void poll_ring(int fd, unsigned ring, const char *name, int timeout) memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = to_user_pointer(&obj); execbuf.buffer_count = 1; - execbuf.flags = engines[0]; + execbuf.flags = engine | flags; cycles = 0; do { @@ -208,7 +196,6 @@ static void poll_ring(int fd, unsigned ring, const char *name, int timeout) execbuf.batch_start_offset = (bbe[idx] - batch) * sizeof(*batch) - 64; - execbuf.flags = engines[cycles % nengine] | flags; gem_execbuf(fd, &execbuf); *bbe[!idx] = MI_BATCH_BUFFER_END; @@ -227,6 +214,133 @@ static void poll_ring(int fd, unsigned ring, const char *name, int timeout) gem_close(fd, obj.handle); } +static void poll_sequential(int fd, const char *name, int timeout) +{ + const int gen = intel_gen(intel_get_drm_devid(fd)); + const uint32_t MI_ARB_CHK = 0x5 << 23; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 obj[2]; + struct drm_i915_gem_relocation_entry reloc[4], *r; + uint32_t *bbe[2], *state, *batch; + unsigned engines[16], nengine, engine, flags; + struct timespec tv = {}; + unsigned long cycles; + uint64_t elapsed; + bool cached; + + flags = I915_EXEC_NO_RELOC; + if (gen == 4 || gen == 5) + flags |= I915_EXEC_SECURE; + + nengine = 0; + for_each_physical_engine(fd, engine) { + if (!gem_can_store_dword(fd, engine)) + continue; + + engines[nengine++] = engine; + } + igt_require(nengine); + + memset(obj, 0, sizeof(obj)); + obj[0].handle = gem_create(fd, 4096); + obj[0].flags = EXEC_OBJECT_WRITE; + cached = __gem_set_caching(fd, obj[0].handle, 1) == 0; + obj[1].handle = gem_create(fd, 4096); + obj[1].relocs_ptr = to_user_pointer(reloc); + obj[1].relocation_count = ARRAY_SIZE(reloc); + + r = memset(reloc, 0, sizeof(reloc)); + batch = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_WRITE); + + for (unsigned int start_offset = 0; + start_offset <= 128; + start_offset += 128) { + uint32_t *b = batch + start_offset / sizeof(*batch); + + r->target_handle = obj[0].handle; + r->offset = (b - batch + 1) * sizeof(uint32_t); + r->delta = 0; + r->read_domains = I915_GEM_DOMAIN_RENDER; + r->write_domain = I915_GEM_DOMAIN_RENDER; + + *b = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); + if (gen >= 8) { + *++b = r->delta; + *++b = 0; + } else if (gen >= 4) { + r->offset += sizeof(uint32_t); + *++b = 0; + *++b = r->delta; + } else { + *b -= 1; + *++b = r->delta; + } + *++b = start_offset != 0; + r++; + + b = batch + (start_offset + 64) / sizeof(*batch); + bbe[start_offset != 0] = b; + *b++ = MI_ARB_CHK; + + r->target_handle = obj[1].handle; + r->offset = (b - batch + 1) * sizeof(uint32_t); + r->read_domains = I915_GEM_DOMAIN_COMMAND; + r->delta = start_offset + 64; + if (gen >= 8) { + *b++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; + *b++ = r->delta; + *b++ = 0; + } else if (gen >= 6) { + *b++ = MI_BATCH_BUFFER_START | 1 << 8; + *b++ = r->delta; + } else { + *b++ = MI_BATCH_BUFFER_START | 2 << 6; + if (gen < 4) + r->delta |= 1; + *b++ = r->delta; + } + r++; + } + igt_assert(r == reloc + ARRAY_SIZE(reloc)); + + if (cached) + state = gem_mmap__cpu(fd, obj[0].handle, 0, 4096, PROT_READ); + else + state = gem_mmap__wc(fd, obj[0].handle, 0, 4096, PROT_READ); + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = to_user_pointer(obj); + execbuf.buffer_count = ARRAY_SIZE(obj); + + cycles = 0; + do { + unsigned int idx = ++cycles & 1; + + *bbe[idx] = MI_ARB_CHK; + execbuf.batch_start_offset = + (bbe[idx] - batch) * sizeof(*batch) - 64; + + execbuf.flags = engines[cycles % nengine] | flags; + gem_execbuf(fd, &execbuf); + + *bbe[!idx] = MI_BATCH_BUFFER_END; + __sync_synchronize(); + + while (READ_ONCE(*state) != idx) + ; + } while ((elapsed = igt_nsec_elapsed(&tv)) >> 30 < timeout); + *bbe[cycles & 1] = MI_BATCH_BUFFER_END; + gem_sync(fd, obj[1].handle); + + igt_info("%s completed %ld cycles: %.3f us\n", + name, cycles, elapsed*1e-3/cycles); + + munmap(state, 4096); + munmap(batch, 4096); + gem_close(fd, obj[1].handle); + gem_close(fd, obj[0].handle); +} + static void single(int fd, uint32_t handle, unsigned ring_id, const char *ring_name) { @@ -813,7 +927,7 @@ igt_main } igt_subtest("poll-sequential") - poll_ring(device, ALL_ENGINES, "Sequential", 20); + poll_sequential(device, "Sequential", 20); igt_subtest("headless") { /* Requires master for changing display modes */ -- 2.20.1 _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev
next prev parent reply other threads:[~2019-01-30 9:54 UTC|newest] Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-01-30 9:54 [PATCH i-g-t 1/8] lib: Skip unused fork helpers Chris Wilson 2019-01-30 9:54 ` [igt-dev] " Chris Wilson 2019-01-30 9:54 ` [PATCH i-g-t 2/8] i915/gem_eio: Check for allow-hang prior to issuing a reset Chris Wilson 2019-01-30 9:54 ` [igt-dev] " Chris Wilson 2019-01-30 11:08 ` Mika Kuoppala 2019-01-30 11:08 ` [igt-dev] [Intel-gfx] " Mika Kuoppala 2019-01-30 9:54 ` [PATCH i-g-t 3/8] i915/gem_exec_latency: Normalize results into ns Chris Wilson 2019-01-30 9:54 ` [Intel-gfx] " Chris Wilson 2019-01-30 9:54 ` [PATCH i-g-t 4/8] i915/gem_exec_latency: Eliminate the wakeup penalty Chris Wilson 2019-01-30 9:54 ` [igt-dev] " Chris Wilson 2019-01-30 9:54 ` [PATCH i-g-t 5/8] i915/gem_exec_schedule: Verify that using HW semaphores doesn't block Chris Wilson 2019-01-30 9:54 ` [Intel-gfx] " Chris Wilson 2019-01-30 9:54 ` Chris Wilson [this message] 2019-01-30 9:54 ` [igt-dev] [PATCH i-g-t 6/8] i915/gem_exec_nop: poll-sequential requires ordering between rings Chris Wilson 2019-01-30 9:54 ` [PATCH i-g-t 7/8] i915/gem_sync: Make switch-default asymmetric Chris Wilson 2019-01-30 9:54 ` [igt-dev] " Chris Wilson 2019-01-30 9:55 ` [PATCH i-g-t 8/8] intel-ci: Drop gem_exec_nop from BAT Chris Wilson 2019-01-30 9:55 ` [igt-dev] " Chris Wilson 2019-01-30 11:12 ` Mika Kuoppala 2019-01-30 11:12 ` [igt-dev] [Intel-gfx] " Mika Kuoppala 2019-01-30 10:50 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/8] lib: Skip unused fork helpers Patchwork 2019-01-30 11:07 ` [PATCH i-g-t 1/8] " Mika Kuoppala 2019-01-30 11:07 ` [igt-dev] [Intel-gfx] " Mika Kuoppala 2019-01-30 12:47 ` [igt-dev] ✓ Fi.CI.IGT: success for series starting with [i-g-t,1/8] " Patchwork
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20190130095500.23596-6-chris@chris-wilson.co.uk \ --to=chris@chris-wilson.co.uk \ --cc=igt-dev@lists.freedesktop.org \ --cc=intel-gfx@lists.freedesktop.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.