All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH i-g-t] i915/gem_exec_balancer: Check for scheduling bonded-pairs on the same engine
@ 2019-09-20 22:26 ` Chris Wilson
  0 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2019-09-20 22:26 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

The expectation for bonded submission is that they are run concurrently,
in parallel on multiple engines. However, given a lack of constraints in
the scheduler's selection combined with timeslicing could mean that the
bonded requests could be run in opposite order on the same engine. With
just the right pair of requests, this can cause a GPU hang (or at least
trigger hangchecker), best (worst) case would be execution running
several times slower than ideal.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/i915/gem_exec_balancer.c | 151 +++++++++++++++++++++++++++++++++
 1 file changed, 151 insertions(+)

diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
index 407dc0eca..e4fe75747 100644
--- a/tests/i915/gem_exec_balancer.c
+++ b/tests/i915/gem_exec_balancer.c
@@ -30,6 +30,15 @@
 
 IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
 
+#define MI_SEMAPHORE_WAIT		(0x1c << 23)
+#define   MI_SEMAPHORE_POLL             (1 << 15)
+#define   MI_SEMAPHORE_SAD_GT_SDD       (0 << 12)
+#define   MI_SEMAPHORE_SAD_GTE_SDD      (1 << 12)
+#define   MI_SEMAPHORE_SAD_LT_SDD       (2 << 12)
+#define   MI_SEMAPHORE_SAD_LTE_SDD      (3 << 12)
+#define   MI_SEMAPHORE_SAD_EQ_SDD       (4 << 12)
+#define   MI_SEMAPHORE_SAD_NEQ_SDD      (5 << 12)
+
 #define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
 
 static size_t sizeof_load_balance(int count)
@@ -694,6 +703,145 @@ static void bonded(int i915, unsigned int flags)
 	gem_context_destroy(i915, master);
 }
 
+static unsigned int offset_in_page(void *addr)
+{
+	return (uintptr_t)addr & 4095;
+}
+
+static uint32_t create_semaphore_to_spinner(int i915, igt_spin_t *spin)
+{
+	uint32_t *cs, *map;
+	uint32_t handle;
+
+	handle = gem_create(i915, 4096);
+	cs = map = gem_mmap__cpu(i915, handle, 0, 4096, PROT_WRITE);
+
+	/* Wait until the spinner is running */
+	*cs++ = MI_SEMAPHORE_WAIT |
+		MI_SEMAPHORE_POLL |
+		MI_SEMAPHORE_SAD_NEQ_SDD |
+		(4 - 2);
+	*cs++ = 0;
+	*cs++ = spin->obj[0].offset + 4 * SPIN_POLL_START_IDX;
+	*cs++ = 0;
+
+	/* Then cancel the spinner */
+	*cs++ = MI_STORE_DWORD_IMM;
+	*cs++ = spin->obj[IGT_SPIN_BATCH].offset +
+		offset_in_page(spin->condition);
+	*cs++ = 0;
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	*cs++ = MI_BATCH_BUFFER_END;
+	munmap(map, 4096);
+
+	return handle;
+}
+
+static void bonded_slice(int i915)
+{
+	uint32_t ctx;
+	int *stop;
+
+	igt_require(gem_scheduler_has_semaphores(i915));
+
+	stop = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(stop != MAP_FAILED);
+
+	ctx = gem_context_create(i915);
+
+	for (int class = 0; class < 32; class++) {
+		struct i915_engine_class_instance *siblings;
+		struct drm_i915_gem_exec_object2 obj[3] = {};
+		struct drm_i915_gem_execbuffer2 eb = {};
+		unsigned int count;
+		igt_spin_t *spin;
+
+		siblings = list_engines(i915, 1u << class, &count);
+		if (!siblings)
+			continue;
+
+		if (count < 2) {
+			free(siblings);
+			continue;
+		}
+
+		/*
+		 * A: semaphore wait on spinner; cancel spinner
+		 * B: unpreemptable spinner
+		 *
+		 * A waits for running ack from B, if scheduled on the same
+		 * engine -> hang.
+		 *
+		 * C+: background load across engines
+		 */
+
+		set_load_balancer(i915, ctx, siblings, count, NULL);
+
+		spin = __igt_spin_new(i915,
+				      .ctx = ctx,
+				      .flags = (IGT_SPIN_NO_PREEMPTION |
+						IGT_SPIN_POLL_RUN));
+		igt_spin_end(spin); /* we just want its address for later */
+		gem_sync(i915, spin->handle);
+		igt_spin_reset(spin);
+
+		obj[0] = spin->obj[0];
+		obj[1] = spin->obj[1];
+		obj[2].handle = create_semaphore_to_spinner(i915, spin);
+
+		eb.buffers_ptr = to_user_pointer(obj);
+		eb.rsvd1 = ctx;
+
+		*stop = 0;
+		igt_fork(child, count + 1) {
+			igt_list_del(&spin->link);
+
+			ctx = gem_context_clone(i915, ctx,
+						I915_CONTEXT_CLONE_ENGINES, 0);
+
+			while (!READ_ONCE(*stop)) {
+				spin = igt_spin_new(i915,
+						    .ctx = ctx,
+						    .engine = (1 + rand() % count),
+						    .flags = IGT_SPIN_POLL_RUN);
+				igt_spin_busywait_until_started(spin);
+				usleep(50000);
+				igt_spin_free(i915, spin);
+			}
+
+			gem_context_destroy(i915, ctx);
+		}
+
+		igt_until_timeout(5) {
+			igt_spin_reset(spin);
+
+			/* A: Submit the semaphore wait */
+			eb.buffer_count = 3;
+			eb.flags = (1 + rand() % count) | I915_EXEC_FENCE_OUT;
+			gem_execbuf_wr(i915, &eb);
+
+			/* B: Submit the spinner (in parallel) */
+			eb.buffer_count = 2;
+			eb.flags = 0 | I915_EXEC_FENCE_SUBMIT;
+			eb.rsvd2 >>= 32;
+			gem_execbuf(i915, &eb);
+			close(eb.rsvd2);
+
+			gem_sync(i915, obj[2].handle);
+		}
+
+		*stop = 1;
+		igt_waitchildren();
+
+		gem_close(i915, obj[2].handle);
+		igt_spin_free(i915, spin);
+	}
+
+	gem_context_destroy(i915, ctx);
+	munmap(stop, 4096);
+}
+
 static void indices(int i915)
 {
 	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
@@ -1320,6 +1468,9 @@ igt_main
 	igt_subtest("bonded-cork")
 		bonded(i915, CORK);
 
+	igt_subtest("bonded-slice")
+		bonded_slice(i915);
+
 	igt_fixture {
 		igt_stop_hang_detector();
 	}
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [igt-dev] [PATCH i-g-t] i915/gem_exec_balancer: Check for scheduling bonded-pairs on the same engine
@ 2019-09-20 22:26 ` Chris Wilson
  0 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2019-09-20 22:26 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Tvrtko Ursulin

The expectation for bonded submission is that they are run concurrently,
in parallel on multiple engines. However, given a lack of constraints in
the scheduler's selection combined with timeslicing could mean that the
bonded requests could be run in opposite order on the same engine. With
just the right pair of requests, this can cause a GPU hang (or at least
trigger hangchecker), best (worst) case would be execution running
several times slower than ideal.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/i915/gem_exec_balancer.c | 151 +++++++++++++++++++++++++++++++++
 1 file changed, 151 insertions(+)

diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
index 407dc0eca..e4fe75747 100644
--- a/tests/i915/gem_exec_balancer.c
+++ b/tests/i915/gem_exec_balancer.c
@@ -30,6 +30,15 @@
 
 IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
 
+#define MI_SEMAPHORE_WAIT		(0x1c << 23)
+#define   MI_SEMAPHORE_POLL             (1 << 15)
+#define   MI_SEMAPHORE_SAD_GT_SDD       (0 << 12)
+#define   MI_SEMAPHORE_SAD_GTE_SDD      (1 << 12)
+#define   MI_SEMAPHORE_SAD_LT_SDD       (2 << 12)
+#define   MI_SEMAPHORE_SAD_LTE_SDD      (3 << 12)
+#define   MI_SEMAPHORE_SAD_EQ_SDD       (4 << 12)
+#define   MI_SEMAPHORE_SAD_NEQ_SDD      (5 << 12)
+
 #define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
 
 static size_t sizeof_load_balance(int count)
@@ -694,6 +703,145 @@ static void bonded(int i915, unsigned int flags)
 	gem_context_destroy(i915, master);
 }
 
+static unsigned int offset_in_page(void *addr)
+{
+	return (uintptr_t)addr & 4095;
+}
+
+static uint32_t create_semaphore_to_spinner(int i915, igt_spin_t *spin)
+{
+	uint32_t *cs, *map;
+	uint32_t handle;
+
+	handle = gem_create(i915, 4096);
+	cs = map = gem_mmap__cpu(i915, handle, 0, 4096, PROT_WRITE);
+
+	/* Wait until the spinner is running */
+	*cs++ = MI_SEMAPHORE_WAIT |
+		MI_SEMAPHORE_POLL |
+		MI_SEMAPHORE_SAD_NEQ_SDD |
+		(4 - 2);
+	*cs++ = 0;
+	*cs++ = spin->obj[0].offset + 4 * SPIN_POLL_START_IDX;
+	*cs++ = 0;
+
+	/* Then cancel the spinner */
+	*cs++ = MI_STORE_DWORD_IMM;
+	*cs++ = spin->obj[IGT_SPIN_BATCH].offset +
+		offset_in_page(spin->condition);
+	*cs++ = 0;
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	*cs++ = MI_BATCH_BUFFER_END;
+	munmap(map, 4096);
+
+	return handle;
+}
+
+static void bonded_slice(int i915)
+{
+	uint32_t ctx;
+	int *stop;
+
+	igt_require(gem_scheduler_has_semaphores(i915));
+
+	stop = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(stop != MAP_FAILED);
+
+	ctx = gem_context_create(i915);
+
+	for (int class = 0; class < 32; class++) {
+		struct i915_engine_class_instance *siblings;
+		struct drm_i915_gem_exec_object2 obj[3] = {};
+		struct drm_i915_gem_execbuffer2 eb = {};
+		unsigned int count;
+		igt_spin_t *spin;
+
+		siblings = list_engines(i915, 1u << class, &count);
+		if (!siblings)
+			continue;
+
+		if (count < 2) {
+			free(siblings);
+			continue;
+		}
+
+		/*
+		 * A: semaphore wait on spinner; cancel spinner
+		 * B: unpreemptable spinner
+		 *
+		 * A waits for running ack from B, if scheduled on the same
+		 * engine -> hang.
+		 *
+		 * C+: background load across engines
+		 */
+
+		set_load_balancer(i915, ctx, siblings, count, NULL);
+
+		spin = __igt_spin_new(i915,
+				      .ctx = ctx,
+				      .flags = (IGT_SPIN_NO_PREEMPTION |
+						IGT_SPIN_POLL_RUN));
+		igt_spin_end(spin); /* we just want its address for later */
+		gem_sync(i915, spin->handle);
+		igt_spin_reset(spin);
+
+		obj[0] = spin->obj[0];
+		obj[1] = spin->obj[1];
+		obj[2].handle = create_semaphore_to_spinner(i915, spin);
+
+		eb.buffers_ptr = to_user_pointer(obj);
+		eb.rsvd1 = ctx;
+
+		*stop = 0;
+		igt_fork(child, count + 1) {
+			igt_list_del(&spin->link);
+
+			ctx = gem_context_clone(i915, ctx,
+						I915_CONTEXT_CLONE_ENGINES, 0);
+
+			while (!READ_ONCE(*stop)) {
+				spin = igt_spin_new(i915,
+						    .ctx = ctx,
+						    .engine = (1 + rand() % count),
+						    .flags = IGT_SPIN_POLL_RUN);
+				igt_spin_busywait_until_started(spin);
+				usleep(50000);
+				igt_spin_free(i915, spin);
+			}
+
+			gem_context_destroy(i915, ctx);
+		}
+
+		igt_until_timeout(5) {
+			igt_spin_reset(spin);
+
+			/* A: Submit the semaphore wait */
+			eb.buffer_count = 3;
+			eb.flags = (1 + rand() % count) | I915_EXEC_FENCE_OUT;
+			gem_execbuf_wr(i915, &eb);
+
+			/* B: Submit the spinner (in parallel) */
+			eb.buffer_count = 2;
+			eb.flags = 0 | I915_EXEC_FENCE_SUBMIT;
+			eb.rsvd2 >>= 32;
+			gem_execbuf(i915, &eb);
+			close(eb.rsvd2);
+
+			gem_sync(i915, obj[2].handle);
+		}
+
+		*stop = 1;
+		igt_waitchildren();
+
+		gem_close(i915, obj[2].handle);
+		igt_spin_free(i915, spin);
+	}
+
+	gem_context_destroy(i915, ctx);
+	munmap(stop, 4096);
+}
+
 static void indices(int i915)
 {
 	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
@@ -1320,6 +1468,9 @@ igt_main
 	igt_subtest("bonded-cork")
 		bonded(i915, CORK);
 
+	igt_subtest("bonded-slice")
+		bonded_slice(i915);
+
 	igt_fixture {
 		igt_stop_hang_detector();
 	}
-- 
2.23.0

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [igt-dev] ✗ GitLab.Pipeline: warning for i915/gem_exec_balancer: Check for scheduling bonded-pairs on the same engine
  2019-09-20 22:26 ` [igt-dev] " Chris Wilson
  (?)
@ 2019-09-20 22:40 ` Patchwork
  -1 siblings, 0 replies; 13+ messages in thread
From: Patchwork @ 2019-09-20 22:40 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: i915/gem_exec_balancer: Check for scheduling bonded-pairs on the same engine
URL   : https://patchwork.freedesktop.org/series/67024/
State : warning

== Summary ==

ERROR! This series introduces new undocumented tests:

gem_exec_balancer@bonded-slice

Can you document them as per the requirement in the [CONTRIBUTING.md]?

[Documentation] has more details on how to do this.

Here are few examples:
https://gitlab.freedesktop.org/drm/igt-gpu-tools/commit/0316695d03aa46108296b27f3982ec93200c7a6e
https://gitlab.freedesktop.org/drm/igt-gpu-tools/commit/443cc658e1e6b492ee17bf4f4d891029eb7a205d

Thanks in advance!

[CONTRIBUTING.md]: https://gitlab.freedesktop.org/drm/igt-gpu-tools/blob/master/CONTRIBUTING.md#L19
[Documentation]: https://drm.pages.freedesktop.org/igt-gpu-tools/igt-gpu-tools-Core.html#igt-describe

Other than that, pipeline status: SUCCESS.

see https://gitlab.freedesktop.org/gfx-ci/igt-ci-tags/pipelines/65263 for more details

== Logs ==

For more details see: https://gitlab.freedesktop.org/gfx-ci/igt-ci-tags/pipelines/65263
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for i915/gem_exec_balancer: Check for scheduling bonded-pairs on the same engine
  2019-09-20 22:26 ` [igt-dev] " Chris Wilson
  (?)
  (?)
@ 2019-09-20 23:11 ` Patchwork
  -1 siblings, 0 replies; 13+ messages in thread
From: Patchwork @ 2019-09-20 23:11 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: i915/gem_exec_balancer: Check for scheduling bonded-pairs on the same engine
URL   : https://patchwork.freedesktop.org/series/67024/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_6932 -> IGTPW_3483
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/67024/revisions/1/mbox/

Known issues
------------

  Here are the changes found in IGTPW_3483 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_suspend@basic-s4-devices:
    - fi-blb-e6850:       [PASS][1] -> [INCOMPLETE][2] ([fdo#107718])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/fi-blb-e6850/igt@gem_exec_suspend@basic-s4-devices.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/fi-blb-e6850/igt@gem_exec_suspend@basic-s4-devices.html

  * igt@kms_frontbuffer_tracking@basic:
    - fi-icl-u3:          [PASS][3] -> [FAIL][4] ([fdo#103167])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/fi-icl-u3/igt@kms_frontbuffer_tracking@basic.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/fi-icl-u3/igt@kms_frontbuffer_tracking@basic.html

  * igt@prime_vgem@basic-fence-read:
    - fi-icl-u3:          [PASS][5] -> [DMESG-WARN][6] ([fdo#107724]) +1 similar issue
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/fi-icl-u3/igt@prime_vgem@basic-fence-read.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/fi-icl-u3/igt@prime_vgem@basic-fence-read.html

  
#### Possible fixes ####

  * igt@gem_exec_suspend@basic:
    - {fi-icl-u4}:        [FAIL][7] ([fdo#111699]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/fi-icl-u4/igt@gem_exec_suspend@basic.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/fi-icl-u4/igt@gem_exec_suspend@basic.html

  * igt@gem_mmap_gtt@basic-small-bo:
    - fi-icl-u3:          [DMESG-WARN][9] ([fdo#107724]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/fi-icl-u3/igt@gem_mmap_gtt@basic-small-bo.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/fi-icl-u3/igt@gem_mmap_gtt@basic-small-bo.html

  * igt@i915_module_load@reload:
    - fi-icl-u3:          [DMESG-WARN][11] ([fdo#107724] / [fdo#111214]) -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/fi-icl-u3/igt@i915_module_load@reload.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/fi-icl-u3/igt@i915_module_load@reload.html

  
#### Warnings ####

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-kbl-7500u:       [FAIL][13] ([fdo#111407]) -> [FAIL][14] ([fdo#111045] / [fdo#111096])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#111045]: https://bugs.freedesktop.org/show_bug.cgi?id=111045
  [fdo#111096]: https://bugs.freedesktop.org/show_bug.cgi?id=111096
  [fdo#111214]: https://bugs.freedesktop.org/show_bug.cgi?id=111214
  [fdo#111407]: https://bugs.freedesktop.org/show_bug.cgi?id=111407
  [fdo#111699]: https://bugs.freedesktop.org/show_bug.cgi?id=111699


Participating hosts (54 -> 33)
------------------------------

  Additional (1): fi-tgl-u2 
  Missing    (22): fi-kbl-soraka fi-bdw-gvtdvm fi-icl-u2 fi-skl-6260u fi-apl-guc fi-pnv-d510 fi-icl-y fi-skl-lmem fi-icl-guc fi-hsw-4770r fi-cml-u2 fi-bdw-5557u fi-bsw-n3050 fi-elk-e7500 fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-kbl-guc fi-cfl-8109u fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * IGT: IGT_5195 -> IGTPW_3483

  CI-20190529: 20190529
  CI_DRM_6932: f539beb004edb5b82925c10324f7cf4c5b4dbcc5 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_3483: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/
  IGT_5195: ea29372bb4e261a0a8da371a1f434131750f18e0 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools



== Testlist changes ==

+igt@gem_exec_balancer@bonded-slice

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [igt-dev] ✗ Fi.CI.IGT: failure for i915/gem_exec_balancer: Check for scheduling bonded-pairs on the same engine
  2019-09-20 22:26 ` [igt-dev] " Chris Wilson
                   ` (2 preceding siblings ...)
  (?)
@ 2019-09-22 12:24 ` Patchwork
  -1 siblings, 0 replies; 13+ messages in thread
From: Patchwork @ 2019-09-22 12:24 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: i915/gem_exec_balancer: Check for scheduling bonded-pairs on the same engine
URL   : https://patchwork.freedesktop.org/series/67024/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_6932_full -> IGTPW_3483_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with IGTPW_3483_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in IGTPW_3483_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/67024/revisions/1/mbox/

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in IGTPW_3483_full:

### IGT changes ###

#### Possible regressions ####

  * {igt@gem_exec_balancer@bonded-slice} (NEW):
    - shard-kbl:          NOTRUN -> [DMESG-FAIL][1]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-kbl6/igt@gem_exec_balancer@bonded-slice.html
    - shard-iclb:         NOTRUN -> [DMESG-FAIL][2]
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-iclb1/igt@gem_exec_balancer@bonded-slice.html

  * igt@kms_flip@flip-vs-suspend-interruptible:
    - shard-iclb:         [PASS][3] -> [DMESG-WARN][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-iclb1/igt@kms_flip@flip-vs-suspend-interruptible.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-iclb6/igt@kms_flip@flip-vs-suspend-interruptible.html

  * igt@runner@aborted:
    - shard-kbl:          NOTRUN -> [FAIL][5]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-kbl6/igt@runner@aborted.html

  
New tests
---------

  New tests have been introduced between CI_DRM_6932_full and IGTPW_3483_full:

### New IGT tests (1) ###

  * igt@gem_exec_balancer@bonded-slice:
    - Statuses : 2 dmesg-fail(s) 2 pass(s) 2 skip(s)
    - Exec time: [0.0, 15.85] s

  

Known issues
------------

  Here are the changes found in IGTPW_3483_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_schedule@preempt-other-bsd:
    - shard-iclb:         [PASS][6] -> [SKIP][7] ([fdo#111325]) +3 similar issues
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-iclb3/igt@gem_exec_schedule@preempt-other-bsd.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-iclb4/igt@gem_exec_schedule@preempt-other-bsd.html

  * igt@kms_cursor_crc@pipe-a-cursor-suspend:
    - shard-kbl:          [PASS][8] -> [DMESG-WARN][9] ([fdo#108566]) +3 similar issues
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-kbl2/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-kbl1/igt@kms_cursor_crc@pipe-a-cursor-suspend.html

  * igt@kms_cursor_legacy@2x-long-cursor-vs-flip-atomic:
    - shard-hsw:          [PASS][10] -> [FAIL][11] ([fdo#105767])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-hsw4/igt@kms_cursor_legacy@2x-long-cursor-vs-flip-atomic.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-hsw6/igt@kms_cursor_legacy@2x-long-cursor-vs-flip-atomic.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible:
    - shard-apl:          [PASS][12] -> [FAIL][13] ([fdo#105363])
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-apl6/igt@kms_flip@flip-vs-expired-vblank-interruptible.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-apl2/igt@kms_flip@flip-vs-expired-vblank-interruptible.html

  * igt@kms_frontbuffer_tracking@fbc-suspend:
    - shard-apl:          [PASS][14] -> [DMESG-WARN][15] ([fdo#108566]) +5 similar issues
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-apl1/igt@kms_frontbuffer_tracking@fbc-suspend.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-apl4/igt@kms_frontbuffer_tracking@fbc-suspend.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-spr-indfb-draw-render:
    - shard-iclb:         [PASS][16] -> [FAIL][17] ([fdo#103167]) +1 similar issue
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-iclb6/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-spr-indfb-draw-render.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-iclb8/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-spr-indfb-draw-render.html

  * igt@kms_psr@psr2_cursor_render:
    - shard-iclb:         [PASS][18] -> [SKIP][19] ([fdo#109441]) +3 similar issues
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-iclb2/igt@kms_psr@psr2_cursor_render.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-iclb5/igt@kms_psr@psr2_cursor_render.html

  * igt@prime_vgem@fence-wait-bsd2:
    - shard-iclb:         [PASS][20] -> [SKIP][21] ([fdo#109276]) +18 similar issues
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-iclb2/igt@prime_vgem@fence-wait-bsd2.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-iclb6/igt@prime_vgem@fence-wait-bsd2.html

  
#### Possible fixes ####

  * igt@gem_exec_balancer@smoke:
    - shard-iclb:         [SKIP][22] ([fdo#110854]) -> [PASS][23]
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-iclb5/igt@gem_exec_balancer@smoke.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-iclb2/igt@gem_exec_balancer@smoke.html

  * igt@gem_exec_schedule@preempt-queue-bsd1:
    - shard-iclb:         [SKIP][24] ([fdo#109276]) -> [PASS][25] +13 similar issues
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-iclb8/igt@gem_exec_schedule@preempt-queue-bsd1.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-iclb4/igt@gem_exec_schedule@preempt-queue-bsd1.html

  * igt@gem_exec_schedule@preemptive-hang-bsd:
    - shard-iclb:         [SKIP][26] ([fdo#111325]) -> [PASS][27] +5 similar issues
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-iclb2/igt@gem_exec_schedule@preemptive-hang-bsd.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-iclb3/igt@gem_exec_schedule@preemptive-hang-bsd.html

  * igt@gem_vm_create@isolation:
    - shard-apl:          [INCOMPLETE][28] ([fdo#103927]) -> [PASS][29]
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-apl7/igt@gem_vm_create@isolation.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-apl6/igt@gem_vm_create@isolation.html

  * igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic:
    - shard-glk:          [FAIL][30] ([fdo#104873]) -> [PASS][31]
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-glk4/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-glk2/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic.html

  * igt@kms_frontbuffer_tracking@fbc-rgb565-draw-blt:
    - shard-iclb:         [FAIL][32] ([fdo#103167]) -> [PASS][33] +2 similar issues
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-iclb2/igt@kms_frontbuffer_tracking@fbc-rgb565-draw-blt.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-iclb5/igt@kms_frontbuffer_tracking@fbc-rgb565-draw-blt.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
    - shard-apl:          [DMESG-WARN][34] ([fdo#108566]) -> [PASS][35] +4 similar issues
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-apl7/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-apl8/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c:
    - shard-kbl:          [DMESG-WARN][36] ([fdo#103313]) -> [PASS][37]
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-kbl3/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-kbl7/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c.html

  * igt@kms_plane_lowres@pipe-a-tiling-y:
    - shard-iclb:         [FAIL][38] ([fdo#103166]) -> [PASS][39]
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-iclb8/igt@kms_plane_lowres@pipe-a-tiling-y.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-iclb3/igt@kms_plane_lowres@pipe-a-tiling-y.html

  * igt@kms_psr2_su@frontbuffer:
    - shard-iclb:         [SKIP][40] ([fdo#109642] / [fdo#111068]) -> [PASS][41]
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-iclb6/igt@kms_psr2_su@frontbuffer.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-iclb2/igt@kms_psr2_su@frontbuffer.html

  * igt@kms_psr@psr2_no_drrs:
    - shard-iclb:         [SKIP][42] ([fdo#109441]) -> [PASS][43] +2 similar issues
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-iclb5/igt@kms_psr@psr2_no_drrs.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-iclb2/igt@kms_psr@psr2_no_drrs.html

  * igt@kms_setmode@basic:
    - shard-apl:          [FAIL][44] ([fdo#99912]) -> [PASS][45]
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-apl3/igt@kms_setmode@basic.html
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-apl6/igt@kms_setmode@basic.html

  
#### Warnings ####

  * igt@gem_mocs_settings@mocs-settings-bsd2:
    - shard-iclb:         [FAIL][46] ([fdo#111330]) -> [SKIP][47] ([fdo#109276]) +1 similar issue
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-iclb4/igt@gem_mocs_settings@mocs-settings-bsd2.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-iclb6/igt@gem_mocs_settings@mocs-settings-bsd2.html

  * igt@kms_dp_dsc@basic-dsc-enable-edp:
    - shard-iclb:         [SKIP][48] ([fdo#109349]) -> [DMESG-WARN][49] ([fdo#107724])
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6932/shard-iclb5/igt@kms_dp_dsc@basic-dsc-enable-edp.html
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/shard-iclb2/igt@kms_dp_dsc@basic-dsc-enable-edp.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103313]: https://bugs.freedesktop.org/show_bug.cgi?id=103313
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#104873]: https://bugs.freedesktop.org/show_bug.cgi?id=104873
  [fdo#105363]: https://bugs.freedesktop.org/show_bug.cgi?id=105363
  [fdo#105411]: https://bugs.freedesktop.org/show_bug.cgi?id=105411
  [fdo#105767]: https://bugs.freedesktop.org/show_bug.cgi?id=105767
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#108566]: https://bugs.freedesktop.org/show_bug.cgi?id=108566
  [fdo#109276]: https://bugs.freedesktop.org/show_bug.cgi?id=109276
  [fdo#109349]: https://bugs.freedesktop.org/show_bug.cgi?id=109349
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#109642]: https://bugs.freedesktop.org/show_bug.cgi?id=109642
  [fdo#110854]: https://bugs.freedesktop.org/show_bug.cgi?id=110854
  [fdo#111068]: https://bugs.freedesktop.org/show_bug.cgi?id=111068
  [fdo#111325]: https://bugs.freedesktop.org/show_bug.cgi?id=111325
  [fdo#111330]: https://bugs.freedesktop.org/show_bug.cgi?id=111330
  [fdo#111757]: https://bugs.freedesktop.org/show_bug.cgi?id=111757
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912


Participating hosts (9 -> 6)
------------------------------

  Missing    (3): pig-skl-6260u shard-skl pig-hsw-4770r 


Build changes
-------------

  * CI: CI-20190529 -> None
  * IGT: IGT_5195 -> IGTPW_3483
  * Piglit: piglit_4509 -> None

  CI-20190529: 20190529
  CI_DRM_6932: f539beb004edb5b82925c10324f7cf4c5b4dbcc5 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_3483: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/
  IGT_5195: ea29372bb4e261a0a8da371a1f434131750f18e0 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3483/
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/gem_exec_balancer: Check for scheduling bonded-pairs on the same engine
  2019-09-20 22:26 ` [igt-dev] " Chris Wilson
@ 2019-09-23 14:29   ` Tvrtko Ursulin
  -1 siblings, 0 replies; 13+ messages in thread
From: Tvrtko Ursulin @ 2019-09-23 14:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 20/09/2019 23:26, Chris Wilson wrote:
> The expectation for bonded submission is that they are run concurrently,
> in parallel on multiple engines. However, given a lack of constraints in
> the scheduler's selection combined with timeslicing could mean that the
> bonded requests could be run in opposite order on the same engine. With
> just the right pair of requests, this can cause a GPU hang (or at least
> trigger hangchecker), best (worst) case would be execution running
> several times slower than ideal.

I don't see any bonding being setup?

[comes back later]

Oh you used only the submit fence and not actually bonds. But you also 
don't use the virtual engine at all?

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   tests/i915/gem_exec_balancer.c | 151 +++++++++++++++++++++++++++++++++
>   1 file changed, 151 insertions(+)
> 
> diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
> index 407dc0eca..e4fe75747 100644
> --- a/tests/i915/gem_exec_balancer.c
> +++ b/tests/i915/gem_exec_balancer.c
> @@ -30,6 +30,15 @@
>   
>   IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
>   
> +#define MI_SEMAPHORE_WAIT		(0x1c << 23)
> +#define   MI_SEMAPHORE_POLL             (1 << 15)
> +#define   MI_SEMAPHORE_SAD_GT_SDD       (0 << 12)
> +#define   MI_SEMAPHORE_SAD_GTE_SDD      (1 << 12)
> +#define   MI_SEMAPHORE_SAD_LT_SDD       (2 << 12)
> +#define   MI_SEMAPHORE_SAD_LTE_SDD      (3 << 12)
> +#define   MI_SEMAPHORE_SAD_EQ_SDD       (4 << 12)
> +#define   MI_SEMAPHORE_SAD_NEQ_SDD      (5 << 12)
> +
>   #define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
>   
>   static size_t sizeof_load_balance(int count)
> @@ -694,6 +703,145 @@ static void bonded(int i915, unsigned int flags)
>   	gem_context_destroy(i915, master);
>   }
>   
> +static unsigned int offset_in_page(void *addr)
> +{
> +	return (uintptr_t)addr & 4095;
> +}
> +
> +static uint32_t create_semaphore_to_spinner(int i915, igt_spin_t *spin)
> +{
> +	uint32_t *cs, *map;
> +	uint32_t handle;
> +
> +	handle = gem_create(i915, 4096);
> +	cs = map = gem_mmap__cpu(i915, handle, 0, 4096, PROT_WRITE);
> +
> +	/* Wait until the spinner is running */
> +	*cs++ = MI_SEMAPHORE_WAIT |
> +		MI_SEMAPHORE_POLL |
> +		MI_SEMAPHORE_SAD_NEQ_SDD |
> +		(4 - 2);
> +	*cs++ = 0;
> +	*cs++ = spin->obj[0].offset + 4 * SPIN_POLL_START_IDX;
> +	*cs++ = 0;
> +
> +	/* Then cancel the spinner */
> +	*cs++ = MI_STORE_DWORD_IMM;
> +	*cs++ = spin->obj[IGT_SPIN_BATCH].offset +
> +		offset_in_page(spin->condition);
> +	*cs++ = 0;
> +	*cs++ = MI_BATCH_BUFFER_END;
> +
> +	*cs++ = MI_BATCH_BUFFER_END;
> +	munmap(map, 4096);
> +
> +	return handle;
> +}
> +
> +static void bonded_slice(int i915)
> +{
> +	uint32_t ctx;
> +	int *stop;
> +
> +	igt_require(gem_scheduler_has_semaphores(i915));
> +
> +	stop = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
> +	igt_assert(stop != MAP_FAILED);
> +
> +	ctx = gem_context_create(i915);
> +
> +	for (int class = 0; class < 32; class++) {
> +		struct i915_engine_class_instance *siblings;
> +		struct drm_i915_gem_exec_object2 obj[3] = {};
> +		struct drm_i915_gem_execbuffer2 eb = {};
> +		unsigned int count;
> +		igt_spin_t *spin;
> +
> +		siblings = list_engines(i915, 1u << class, &count);
> +		if (!siblings)
> +			continue;
> +
> +		if (count < 2) {
> +			free(siblings);
> +			continue;
> +		}
> +
> +		/*
> +		 * A: semaphore wait on spinner; cancel spinner
> +		 * B: unpreemptable spinner
> +		 *
> +		 * A waits for running ack from B, if scheduled on the same
> +		 * engine -> hang.
> +		 *
> +		 * C+: background load across engines
> +		 */
> +
> +		set_load_balancer(i915, ctx, siblings, count, NULL);
> +
> +		spin = __igt_spin_new(i915,
> +				      .ctx = ctx,
> +				      .flags = (IGT_SPIN_NO_PREEMPTION |
> +						IGT_SPIN_POLL_RUN));
> +		igt_spin_end(spin); /* we just want its address for later */
> +		gem_sync(i915, spin->handle);
> +		igt_spin_reset(spin);
> +
> +		obj[0] = spin->obj[0];
> +		obj[1] = spin->obj[1];
> +		obj[2].handle = create_semaphore_to_spinner(i915, spin);
> +
> +		eb.buffers_ptr = to_user_pointer(obj);
> +		eb.rsvd1 = ctx;
> +
> +		*stop = 0;
> +		igt_fork(child, count + 1) {
> +			igt_list_del(&spin->link);
> +
> +			ctx = gem_context_clone(i915, ctx,
> +						I915_CONTEXT_CLONE_ENGINES, 0);
> +
> +			while (!READ_ONCE(*stop)) {
> +				spin = igt_spin_new(i915,
> +						    .ctx = ctx,
> +						    .engine = (1 + rand() % count),

With "count + 1" children and rand load my end up uneven across engines 
- are you happy with that?

> +						    .flags = IGT_SPIN_POLL_RUN);
> +				igt_spin_busywait_until_started(spin);
> +				usleep(50000);

50ms, hm, ideally there should be a pipe signal before parent starts the 
test to know children have started. Otherwise parent can finish before 
they even start, no?

> +				igt_spin_free(i915, spin);
> +			}
> +
> +			gem_context_destroy(i915, ctx);
> +		}
> +
> +		igt_until_timeout(5) {
> +			igt_spin_reset(spin);

What is the reset for?

> +
> +			/* A: Submit the semaphore wait */
> +			eb.buffer_count = 3;
> +			eb.flags = (1 + rand() % count) | I915_EXEC_FENCE_OUT;
> +			gem_execbuf_wr(i915, &eb);
> +
> +			/* B: Submit the spinner (in parallel) */

How in parallel when it is the same context so they are implicitly in order?

Regards,

Tvrtko

> +			eb.buffer_count = 2;
> +			eb.flags = 0 | I915_EXEC_FENCE_SUBMIT;
> +			eb.rsvd2 >>= 32;
> +			gem_execbuf(i915, &eb);
> +			close(eb.rsvd2);
> +
> +			gem_sync(i915, obj[2].handle);
> +		}
> +
> +		*stop = 1;
> +		igt_waitchildren();
> +
> +		gem_close(i915, obj[2].handle);
> +		igt_spin_free(i915, spin);
> +	}
> +
> +	gem_context_destroy(i915, ctx);
> +	munmap(stop, 4096);
> +}
> +
>   static void indices(int i915)
>   {
>   	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
> @@ -1320,6 +1468,9 @@ igt_main
>   	igt_subtest("bonded-cork")
>   		bonded(i915, CORK);
>   
> +	igt_subtest("bonded-slice")
> +		bonded_slice(i915);
> +
>   	igt_fixture {
>   		igt_stop_hang_detector();
>   	}
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/gem_exec_balancer: Check for scheduling bonded-pairs on the same engine
@ 2019-09-23 14:29   ` Tvrtko Ursulin
  0 siblings, 0 replies; 13+ messages in thread
From: Tvrtko Ursulin @ 2019-09-23 14:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev, Tvrtko Ursulin


On 20/09/2019 23:26, Chris Wilson wrote:
> The expectation for bonded submission is that they are run concurrently,
> in parallel on multiple engines. However, given a lack of constraints in
> the scheduler's selection combined with timeslicing could mean that the
> bonded requests could be run in opposite order on the same engine. With
> just the right pair of requests, this can cause a GPU hang (or at least
> trigger hangchecker), best (worst) case would be execution running
> several times slower than ideal.

I don't see any bonding being setup?

[comes back later]

Oh you used only the submit fence and not actually bonds. But you also 
don't use the virtual engine at all?

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   tests/i915/gem_exec_balancer.c | 151 +++++++++++++++++++++++++++++++++
>   1 file changed, 151 insertions(+)
> 
> diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
> index 407dc0eca..e4fe75747 100644
> --- a/tests/i915/gem_exec_balancer.c
> +++ b/tests/i915/gem_exec_balancer.c
> @@ -30,6 +30,15 @@
>   
>   IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
>   
> +#define MI_SEMAPHORE_WAIT		(0x1c << 23)
> +#define   MI_SEMAPHORE_POLL             (1 << 15)
> +#define   MI_SEMAPHORE_SAD_GT_SDD       (0 << 12)
> +#define   MI_SEMAPHORE_SAD_GTE_SDD      (1 << 12)
> +#define   MI_SEMAPHORE_SAD_LT_SDD       (2 << 12)
> +#define   MI_SEMAPHORE_SAD_LTE_SDD      (3 << 12)
> +#define   MI_SEMAPHORE_SAD_EQ_SDD       (4 << 12)
> +#define   MI_SEMAPHORE_SAD_NEQ_SDD      (5 << 12)
> +
>   #define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
>   
>   static size_t sizeof_load_balance(int count)
> @@ -694,6 +703,145 @@ static void bonded(int i915, unsigned int flags)
>   	gem_context_destroy(i915, master);
>   }
>   
> +static unsigned int offset_in_page(void *addr)
> +{
> +	return (uintptr_t)addr & 4095;
> +}
> +
> +static uint32_t create_semaphore_to_spinner(int i915, igt_spin_t *spin)
> +{
> +	uint32_t *cs, *map;
> +	uint32_t handle;
> +
> +	handle = gem_create(i915, 4096);
> +	cs = map = gem_mmap__cpu(i915, handle, 0, 4096, PROT_WRITE);
> +
> +	/* Wait until the spinner is running */
> +	*cs++ = MI_SEMAPHORE_WAIT |
> +		MI_SEMAPHORE_POLL |
> +		MI_SEMAPHORE_SAD_NEQ_SDD |
> +		(4 - 2);
> +	*cs++ = 0;
> +	*cs++ = spin->obj[0].offset + 4 * SPIN_POLL_START_IDX;
> +	*cs++ = 0;
> +
> +	/* Then cancel the spinner */
> +	*cs++ = MI_STORE_DWORD_IMM;
> +	*cs++ = spin->obj[IGT_SPIN_BATCH].offset +
> +		offset_in_page(spin->condition);
> +	*cs++ = 0;
> +	*cs++ = MI_BATCH_BUFFER_END;
> +
> +	*cs++ = MI_BATCH_BUFFER_END;
> +	munmap(map, 4096);
> +
> +	return handle;
> +}
> +
> +static void bonded_slice(int i915)
> +{
> +	uint32_t ctx;
> +	int *stop;
> +
> +	igt_require(gem_scheduler_has_semaphores(i915));
> +
> +	stop = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
> +	igt_assert(stop != MAP_FAILED);
> +
> +	ctx = gem_context_create(i915);
> +
> +	for (int class = 0; class < 32; class++) {
> +		struct i915_engine_class_instance *siblings;
> +		struct drm_i915_gem_exec_object2 obj[3] = {};
> +		struct drm_i915_gem_execbuffer2 eb = {};
> +		unsigned int count;
> +		igt_spin_t *spin;
> +
> +		siblings = list_engines(i915, 1u << class, &count);
> +		if (!siblings)
> +			continue;
> +
> +		if (count < 2) {
> +			free(siblings);
> +			continue;
> +		}
> +
> +		/*
> +		 * A: semaphore wait on spinner; cancel spinner
> +		 * B: unpreemptable spinner
> +		 *
> +		 * A waits for running ack from B, if scheduled on the same
> +		 * engine -> hang.
> +		 *
> +		 * C+: background load across engines
> +		 */
> +
> +		set_load_balancer(i915, ctx, siblings, count, NULL);
> +
> +		spin = __igt_spin_new(i915,
> +				      .ctx = ctx,
> +				      .flags = (IGT_SPIN_NO_PREEMPTION |
> +						IGT_SPIN_POLL_RUN));
> +		igt_spin_end(spin); /* we just want its address for later */
> +		gem_sync(i915, spin->handle);
> +		igt_spin_reset(spin);
> +
> +		obj[0] = spin->obj[0];
> +		obj[1] = spin->obj[1];
> +		obj[2].handle = create_semaphore_to_spinner(i915, spin);
> +
> +		eb.buffers_ptr = to_user_pointer(obj);
> +		eb.rsvd1 = ctx;
> +
> +		*stop = 0;
> +		igt_fork(child, count + 1) {
> +			igt_list_del(&spin->link);
> +
> +			ctx = gem_context_clone(i915, ctx,
> +						I915_CONTEXT_CLONE_ENGINES, 0);
> +
> +			while (!READ_ONCE(*stop)) {
> +				spin = igt_spin_new(i915,
> +						    .ctx = ctx,
> +						    .engine = (1 + rand() % count),

With "count + 1" children and rand load my end up uneven across engines 
- are you happy with that?

> +						    .flags = IGT_SPIN_POLL_RUN);
> +				igt_spin_busywait_until_started(spin);
> +				usleep(50000);

50ms, hm, ideally there should be a pipe signal before parent starts the 
test to know children have started. Otherwise parent can finish before 
they even start, no?

> +				igt_spin_free(i915, spin);
> +			}
> +
> +			gem_context_destroy(i915, ctx);
> +		}
> +
> +		igt_until_timeout(5) {
> +			igt_spin_reset(spin);

What is the reset for?

> +
> +			/* A: Submit the semaphore wait */
> +			eb.buffer_count = 3;
> +			eb.flags = (1 + rand() % count) | I915_EXEC_FENCE_OUT;
> +			gem_execbuf_wr(i915, &eb);
> +
> +			/* B: Submit the spinner (in parallel) */

How in parallel when it is the same context so they are implicitly in order?

Regards,

Tvrtko

> +			eb.buffer_count = 2;
> +			eb.flags = 0 | I915_EXEC_FENCE_SUBMIT;
> +			eb.rsvd2 >>= 32;
> +			gem_execbuf(i915, &eb);
> +			close(eb.rsvd2);
> +
> +			gem_sync(i915, obj[2].handle);
> +		}
> +
> +		*stop = 1;
> +		igt_waitchildren();
> +
> +		gem_close(i915, obj[2].handle);
> +		igt_spin_free(i915, spin);
> +	}
> +
> +	gem_context_destroy(i915, ctx);
> +	munmap(stop, 4096);
> +}
> +
>   static void indices(int i915)
>   {
>   	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
> @@ -1320,6 +1468,9 @@ igt_main
>   	igt_subtest("bonded-cork")
>   		bonded(i915, CORK);
>   
> +	igt_subtest("bonded-slice")
> +		bonded_slice(i915);
> +
>   	igt_fixture {
>   		igt_stop_hang_detector();
>   	}
> 
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/gem_exec_balancer: Check for scheduling bonded-pairs on the same engine
  2019-09-23 14:29   ` Tvrtko Ursulin
@ 2019-09-23 15:43     ` Chris Wilson
  -1 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2019-09-23 15:43 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2019-09-23 15:29:11)
> 
> On 20/09/2019 23:26, Chris Wilson wrote:
> > The expectation for bonded submission is that they are run concurrently,
> > in parallel on multiple engines. However, given a lack of constraints in
> > the scheduler's selection combined with timeslicing could mean that the
> > bonded requests could be run in opposite order on the same engine. With
> > just the right pair of requests, this can cause a GPU hang (or at least
> > trigger hangchecker), best (worst) case would be execution running
> > several times slower than ideal.
> 
> I don't see any bonding being setup?
> 
> [comes back later]
> 
> Oh you used only the submit fence and not actually bonds. But you also 
> don't use the virtual engine at all?

A is using either of the 2 real engines, B is using the virtual engine
to select the other available engine. Bonding in this case is just that
the requests are bonded together to run in parallel.

> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   tests/i915/gem_exec_balancer.c | 151 +++++++++++++++++++++++++++++++++
> >   1 file changed, 151 insertions(+)
> > 
> > diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
> > index 407dc0eca..e4fe75747 100644
> > --- a/tests/i915/gem_exec_balancer.c
> > +++ b/tests/i915/gem_exec_balancer.c
> > @@ -30,6 +30,15 @@
> >   
> >   IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
> >   
> > +#define MI_SEMAPHORE_WAIT            (0x1c << 23)
> > +#define   MI_SEMAPHORE_POLL             (1 << 15)
> > +#define   MI_SEMAPHORE_SAD_GT_SDD       (0 << 12)
> > +#define   MI_SEMAPHORE_SAD_GTE_SDD      (1 << 12)
> > +#define   MI_SEMAPHORE_SAD_LT_SDD       (2 << 12)
> > +#define   MI_SEMAPHORE_SAD_LTE_SDD      (3 << 12)
> > +#define   MI_SEMAPHORE_SAD_EQ_SDD       (4 << 12)
> > +#define   MI_SEMAPHORE_SAD_NEQ_SDD      (5 << 12)
> > +
> >   #define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
> >   
> >   static size_t sizeof_load_balance(int count)
> > @@ -694,6 +703,145 @@ static void bonded(int i915, unsigned int flags)
> >       gem_context_destroy(i915, master);
> >   }
> >   
> > +static unsigned int offset_in_page(void *addr)
> > +{
> > +     return (uintptr_t)addr & 4095;
> > +}
> > +
> > +static uint32_t create_semaphore_to_spinner(int i915, igt_spin_t *spin)
> > +{
> > +     uint32_t *cs, *map;
> > +     uint32_t handle;
> > +
> > +     handle = gem_create(i915, 4096);
> > +     cs = map = gem_mmap__cpu(i915, handle, 0, 4096, PROT_WRITE);
> > +
> > +     /* Wait until the spinner is running */
> > +     *cs++ = MI_SEMAPHORE_WAIT |
> > +             MI_SEMAPHORE_POLL |
> > +             MI_SEMAPHORE_SAD_NEQ_SDD |
> > +             (4 - 2);
> > +     *cs++ = 0;
> > +     *cs++ = spin->obj[0].offset + 4 * SPIN_POLL_START_IDX;
> > +     *cs++ = 0;
> > +
> > +     /* Then cancel the spinner */
> > +     *cs++ = MI_STORE_DWORD_IMM;
> > +     *cs++ = spin->obj[IGT_SPIN_BATCH].offset +
> > +             offset_in_page(spin->condition);
> > +     *cs++ = 0;
> > +     *cs++ = MI_BATCH_BUFFER_END;
> > +
> > +     *cs++ = MI_BATCH_BUFFER_END;
> > +     munmap(map, 4096);
> > +
> > +     return handle;
> > +}
> > +
> > +static void bonded_slice(int i915)
> > +{
> > +     uint32_t ctx;
> > +     int *stop;
> > +
> > +     igt_require(gem_scheduler_has_semaphores(i915));
> > +
> > +     stop = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
> > +     igt_assert(stop != MAP_FAILED);
> > +
> > +     ctx = gem_context_create(i915);
> > +
> > +     for (int class = 0; class < 32; class++) {
> > +             struct i915_engine_class_instance *siblings;
> > +             struct drm_i915_gem_exec_object2 obj[3] = {};
> > +             struct drm_i915_gem_execbuffer2 eb = {};
> > +             unsigned int count;
> > +             igt_spin_t *spin;
> > +
> > +             siblings = list_engines(i915, 1u << class, &count);
> > +             if (!siblings)
> > +                     continue;
> > +
> > +             if (count < 2) {
> > +                     free(siblings);
> > +                     continue;
> > +             }
> > +
> > +             /*
> > +              * A: semaphore wait on spinner; cancel spinner
> > +              * B: unpreemptable spinner
> > +              *
> > +              * A waits for running ack from B, if scheduled on the same
> > +              * engine -> hang.
> > +              *
> > +              * C+: background load across engines
> > +              */
> > +
> > +             set_load_balancer(i915, ctx, siblings, count, NULL);
> > +
> > +             spin = __igt_spin_new(i915,
> > +                                   .ctx = ctx,
> > +                                   .flags = (IGT_SPIN_NO_PREEMPTION |
> > +                                             IGT_SPIN_POLL_RUN));
> > +             igt_spin_end(spin); /* we just want its address for later */
> > +             gem_sync(i915, spin->handle);
> > +             igt_spin_reset(spin);
> > +
> > +             obj[0] = spin->obj[0];
> > +             obj[1] = spin->obj[1];
> > +             obj[2].handle = create_semaphore_to_spinner(i915, spin);
> > +
> > +             eb.buffers_ptr = to_user_pointer(obj);
> > +             eb.rsvd1 = ctx;
> > +
> > +             *stop = 0;
> > +             igt_fork(child, count + 1) {
> > +                     igt_list_del(&spin->link);
> > +
> > +                     ctx = gem_context_clone(i915, ctx,
> > +                                             I915_CONTEXT_CLONE_ENGINES, 0);
> > +
> > +                     while (!READ_ONCE(*stop)) {
> > +                             spin = igt_spin_new(i915,
> > +                                                 .ctx = ctx,
> > +                                                 .engine = (1 + rand() % count),
> 
> With "count + 1" children and rand load my end up uneven across engines 
> - are you happy with that?

It's using rand, it's going to be uneven. count + 1 isn't significant in
any way. I stopped as soon as I had the test reliably hitting the issue.

> > +                                                 .flags = IGT_SPIN_POLL_RUN);
> > +                             igt_spin_busywait_until_started(spin);
> > +                             usleep(50000);
> 
> 50ms, hm, ideally there should be a pipe signal before parent starts the 
> test to know children have started. Otherwise parent can finish before 
> they even start, no?

The children are just to provide noise. The requirement is that we have
enough load across the system to cause timeslicing to kick in.
 
> > +                             igt_spin_free(i915, spin);
> > +                     }
> > +
> > +                     gem_context_destroy(i915, ctx);
> > +             }
> > +
> > +             igt_until_timeout(5) {
> > +                     igt_spin_reset(spin);
> 
> What is the reset for?

We are reusing the spinner inside the loop.

> > +
> > +                     /* A: Submit the semaphore wait */
> > +                     eb.buffer_count = 3;
> > +                     eb.flags = (1 + rand() % count) | I915_EXEC_FENCE_OUT;
> > +                     gem_execbuf_wr(i915, &eb);
> > +
> > +                     /* B: Submit the spinner (in parallel) */
> 
> How in parallel when it is the same context so they are implicitly in order?

Different engines with different timelines, using the submit to request
parallel execution.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/gem_exec_balancer: Check for scheduling bonded-pairs on the same engine
@ 2019-09-23 15:43     ` Chris Wilson
  0 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2019-09-23 15:43 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev, Tvrtko Ursulin

Quoting Tvrtko Ursulin (2019-09-23 15:29:11)
> 
> On 20/09/2019 23:26, Chris Wilson wrote:
> > The expectation for bonded submission is that they are run concurrently,
> > in parallel on multiple engines. However, given a lack of constraints in
> > the scheduler's selection combined with timeslicing could mean that the
> > bonded requests could be run in opposite order on the same engine. With
> > just the right pair of requests, this can cause a GPU hang (or at least
> > trigger hangchecker), best (worst) case would be execution running
> > several times slower than ideal.
> 
> I don't see any bonding being setup?
> 
> [comes back later]
> 
> Oh you used only the submit fence and not actually bonds. But you also 
> don't use the virtual engine at all?

A is using either of the 2 real engines, B is using the virtual engine
to select the other available engine. Bonding in this case is just that
the requests are bonded together to run in parallel.

> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   tests/i915/gem_exec_balancer.c | 151 +++++++++++++++++++++++++++++++++
> >   1 file changed, 151 insertions(+)
> > 
> > diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
> > index 407dc0eca..e4fe75747 100644
> > --- a/tests/i915/gem_exec_balancer.c
> > +++ b/tests/i915/gem_exec_balancer.c
> > @@ -30,6 +30,15 @@
> >   
> >   IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
> >   
> > +#define MI_SEMAPHORE_WAIT            (0x1c << 23)
> > +#define   MI_SEMAPHORE_POLL             (1 << 15)
> > +#define   MI_SEMAPHORE_SAD_GT_SDD       (0 << 12)
> > +#define   MI_SEMAPHORE_SAD_GTE_SDD      (1 << 12)
> > +#define   MI_SEMAPHORE_SAD_LT_SDD       (2 << 12)
> > +#define   MI_SEMAPHORE_SAD_LTE_SDD      (3 << 12)
> > +#define   MI_SEMAPHORE_SAD_EQ_SDD       (4 << 12)
> > +#define   MI_SEMAPHORE_SAD_NEQ_SDD      (5 << 12)
> > +
> >   #define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
> >   
> >   static size_t sizeof_load_balance(int count)
> > @@ -694,6 +703,145 @@ static void bonded(int i915, unsigned int flags)
> >       gem_context_destroy(i915, master);
> >   }
> >   
> > +static unsigned int offset_in_page(void *addr)
> > +{
> > +     return (uintptr_t)addr & 4095;
> > +}
> > +
> > +static uint32_t create_semaphore_to_spinner(int i915, igt_spin_t *spin)
> > +{
> > +     uint32_t *cs, *map;
> > +     uint32_t handle;
> > +
> > +     handle = gem_create(i915, 4096);
> > +     cs = map = gem_mmap__cpu(i915, handle, 0, 4096, PROT_WRITE);
> > +
> > +     /* Wait until the spinner is running */
> > +     *cs++ = MI_SEMAPHORE_WAIT |
> > +             MI_SEMAPHORE_POLL |
> > +             MI_SEMAPHORE_SAD_NEQ_SDD |
> > +             (4 - 2);
> > +     *cs++ = 0;
> > +     *cs++ = spin->obj[0].offset + 4 * SPIN_POLL_START_IDX;
> > +     *cs++ = 0;
> > +
> > +     /* Then cancel the spinner */
> > +     *cs++ = MI_STORE_DWORD_IMM;
> > +     *cs++ = spin->obj[IGT_SPIN_BATCH].offset +
> > +             offset_in_page(spin->condition);
> > +     *cs++ = 0;
> > +     *cs++ = MI_BATCH_BUFFER_END;
> > +
> > +     *cs++ = MI_BATCH_BUFFER_END;
> > +     munmap(map, 4096);
> > +
> > +     return handle;
> > +}
> > +
> > +static void bonded_slice(int i915)
> > +{
> > +     uint32_t ctx;
> > +     int *stop;
> > +
> > +     igt_require(gem_scheduler_has_semaphores(i915));
> > +
> > +     stop = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
> > +     igt_assert(stop != MAP_FAILED);
> > +
> > +     ctx = gem_context_create(i915);
> > +
> > +     for (int class = 0; class < 32; class++) {
> > +             struct i915_engine_class_instance *siblings;
> > +             struct drm_i915_gem_exec_object2 obj[3] = {};
> > +             struct drm_i915_gem_execbuffer2 eb = {};
> > +             unsigned int count;
> > +             igt_spin_t *spin;
> > +
> > +             siblings = list_engines(i915, 1u << class, &count);
> > +             if (!siblings)
> > +                     continue;
> > +
> > +             if (count < 2) {
> > +                     free(siblings);
> > +                     continue;
> > +             }
> > +
> > +             /*
> > +              * A: semaphore wait on spinner; cancel spinner
> > +              * B: unpreemptable spinner
> > +              *
> > +              * A waits for running ack from B, if scheduled on the same
> > +              * engine -> hang.
> > +              *
> > +              * C+: background load across engines
> > +              */
> > +
> > +             set_load_balancer(i915, ctx, siblings, count, NULL);
> > +
> > +             spin = __igt_spin_new(i915,
> > +                                   .ctx = ctx,
> > +                                   .flags = (IGT_SPIN_NO_PREEMPTION |
> > +                                             IGT_SPIN_POLL_RUN));
> > +             igt_spin_end(spin); /* we just want its address for later */
> > +             gem_sync(i915, spin->handle);
> > +             igt_spin_reset(spin);
> > +
> > +             obj[0] = spin->obj[0];
> > +             obj[1] = spin->obj[1];
> > +             obj[2].handle = create_semaphore_to_spinner(i915, spin);
> > +
> > +             eb.buffers_ptr = to_user_pointer(obj);
> > +             eb.rsvd1 = ctx;
> > +
> > +             *stop = 0;
> > +             igt_fork(child, count + 1) {
> > +                     igt_list_del(&spin->link);
> > +
> > +                     ctx = gem_context_clone(i915, ctx,
> > +                                             I915_CONTEXT_CLONE_ENGINES, 0);
> > +
> > +                     while (!READ_ONCE(*stop)) {
> > +                             spin = igt_spin_new(i915,
> > +                                                 .ctx = ctx,
> > +                                                 .engine = (1 + rand() % count),
> 
> With "count + 1" children and rand load my end up uneven across engines 
> - are you happy with that?

It's using rand, it's going to be uneven. count + 1 isn't significant in
any way. I stopped as soon as I had the test reliably hitting the issue.

> > +                                                 .flags = IGT_SPIN_POLL_RUN);
> > +                             igt_spin_busywait_until_started(spin);
> > +                             usleep(50000);
> 
> 50ms, hm, ideally there should be a pipe signal before parent starts the 
> test to know children have started. Otherwise parent can finish before 
> they even start, no?

The children are just to provide noise. The requirement is that we have
enough load across the system to cause timeslicing to kick in.
 
> > +                             igt_spin_free(i915, spin);
> > +                     }
> > +
> > +                     gem_context_destroy(i915, ctx);
> > +             }
> > +
> > +             igt_until_timeout(5) {
> > +                     igt_spin_reset(spin);
> 
> What is the reset for?

We are reusing the spinner inside the loop.

> > +
> > +                     /* A: Submit the semaphore wait */
> > +                     eb.buffer_count = 3;
> > +                     eb.flags = (1 + rand() % count) | I915_EXEC_FENCE_OUT;
> > +                     gem_execbuf_wr(i915, &eb);
> > +
> > +                     /* B: Submit the spinner (in parallel) */
> 
> How in parallel when it is the same context so they are implicitly in order?

Different engines with different timelines, using the submit to request
parallel execution.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/gem_exec_balancer: Check for scheduling bonded-pairs on the same engine
  2019-09-23 15:43     ` Chris Wilson
@ 2019-09-23 16:21       ` Tvrtko Ursulin
  -1 siblings, 0 replies; 13+ messages in thread
From: Tvrtko Ursulin @ 2019-09-23 16:21 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 23/09/2019 16:43, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-09-23 15:29:11)
>>
>> On 20/09/2019 23:26, Chris Wilson wrote:
>>> The expectation for bonded submission is that they are run concurrently,
>>> in parallel on multiple engines. However, given a lack of constraints in
>>> the scheduler's selection combined with timeslicing could mean that the
>>> bonded requests could be run in opposite order on the same engine. With
>>> just the right pair of requests, this can cause a GPU hang (or at least
>>> trigger hangchecker), best (worst) case would be execution running
>>> several times slower than ideal.
>>
>> I don't see any bonding being setup?
>>
>> [comes back later]
>>
>> Oh you used only the submit fence and not actually bonds. But you also
>> don't use the virtual engine at all?
> 
> A is using either of the 2 real engines, B is using the virtual engine
> to select the other available engine. Bonding in this case is just that
> the requests are bonded together to run in parallel.
> 
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    tests/i915/gem_exec_balancer.c | 151 +++++++++++++++++++++++++++++++++
>>>    1 file changed, 151 insertions(+)
>>>
>>> diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
>>> index 407dc0eca..e4fe75747 100644
>>> --- a/tests/i915/gem_exec_balancer.c
>>> +++ b/tests/i915/gem_exec_balancer.c
>>> @@ -30,6 +30,15 @@
>>>    
>>>    IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
>>>    
>>> +#define MI_SEMAPHORE_WAIT            (0x1c << 23)
>>> +#define   MI_SEMAPHORE_POLL             (1 << 15)
>>> +#define   MI_SEMAPHORE_SAD_GT_SDD       (0 << 12)
>>> +#define   MI_SEMAPHORE_SAD_GTE_SDD      (1 << 12)
>>> +#define   MI_SEMAPHORE_SAD_LT_SDD       (2 << 12)
>>> +#define   MI_SEMAPHORE_SAD_LTE_SDD      (3 << 12)
>>> +#define   MI_SEMAPHORE_SAD_EQ_SDD       (4 << 12)
>>> +#define   MI_SEMAPHORE_SAD_NEQ_SDD      (5 << 12)
>>> +
>>>    #define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
>>>    
>>>    static size_t sizeof_load_balance(int count)
>>> @@ -694,6 +703,145 @@ static void bonded(int i915, unsigned int flags)
>>>        gem_context_destroy(i915, master);
>>>    }
>>>    
>>> +static unsigned int offset_in_page(void *addr)
>>> +{
>>> +     return (uintptr_t)addr & 4095;
>>> +}
>>> +
>>> +static uint32_t create_semaphore_to_spinner(int i915, igt_spin_t *spin)
>>> +{
>>> +     uint32_t *cs, *map;
>>> +     uint32_t handle;
>>> +
>>> +     handle = gem_create(i915, 4096);
>>> +     cs = map = gem_mmap__cpu(i915, handle, 0, 4096, PROT_WRITE);
>>> +
>>> +     /* Wait until the spinner is running */
>>> +     *cs++ = MI_SEMAPHORE_WAIT |
>>> +             MI_SEMAPHORE_POLL |
>>> +             MI_SEMAPHORE_SAD_NEQ_SDD |
>>> +             (4 - 2);
>>> +     *cs++ = 0;
>>> +     *cs++ = spin->obj[0].offset + 4 * SPIN_POLL_START_IDX;
>>> +     *cs++ = 0;
>>> +
>>> +     /* Then cancel the spinner */
>>> +     *cs++ = MI_STORE_DWORD_IMM;
>>> +     *cs++ = spin->obj[IGT_SPIN_BATCH].offset +
>>> +             offset_in_page(spin->condition);
>>> +     *cs++ = 0;
>>> +     *cs++ = MI_BATCH_BUFFER_END;
>>> +
>>> +     *cs++ = MI_BATCH_BUFFER_END;
>>> +     munmap(map, 4096);
>>> +
>>> +     return handle;
>>> +}
>>> +
>>> +static void bonded_slice(int i915)
>>> +{
>>> +     uint32_t ctx;
>>> +     int *stop;
>>> +
>>> +     igt_require(gem_scheduler_has_semaphores(i915));
>>> +
>>> +     stop = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
>>> +     igt_assert(stop != MAP_FAILED);
>>> +
>>> +     ctx = gem_context_create(i915);
>>> +
>>> +     for (int class = 0; class < 32; class++) {
>>> +             struct i915_engine_class_instance *siblings;
>>> +             struct drm_i915_gem_exec_object2 obj[3] = {};
>>> +             struct drm_i915_gem_execbuffer2 eb = {};
>>> +             unsigned int count;
>>> +             igt_spin_t *spin;
>>> +
>>> +             siblings = list_engines(i915, 1u << class, &count);
>>> +             if (!siblings)
>>> +                     continue;
>>> +
>>> +             if (count < 2) {
>>> +                     free(siblings);
>>> +                     continue;
>>> +             }
>>> +
>>> +             /*
>>> +              * A: semaphore wait on spinner; cancel spinner
>>> +              * B: unpreemptable spinner
>>> +              *
>>> +              * A waits for running ack from B, if scheduled on the same
>>> +              * engine -> hang.
>>> +              *
>>> +              * C+: background load across engines
>>> +              */
>>> +
>>> +             set_load_balancer(i915, ctx, siblings, count, NULL);
>>> +
>>> +             spin = __igt_spin_new(i915,
>>> +                                   .ctx = ctx,
>>> +                                   .flags = (IGT_SPIN_NO_PREEMPTION |
>>> +                                             IGT_SPIN_POLL_RUN));
>>> +             igt_spin_end(spin); /* we just want its address for later */
>>> +             gem_sync(i915, spin->handle);
>>> +             igt_spin_reset(spin);
>>> +
>>> +             obj[0] = spin->obj[0];
>>> +             obj[1] = spin->obj[1];

igt_assert_eq(IGT_SPIN_BATCH, 1);

?

>>> +             obj[2].handle = create_semaphore_to_spinner(i915, spin);
>>> +
>>> +             eb.buffers_ptr = to_user_pointer(obj);
>>> +             eb.rsvd1 = ctx;
>>> +
>>> +             *stop = 0;
>>> +             igt_fork(child, count + 1) {
>>> +                     igt_list_del(&spin->link);
>>> +
>>> +                     ctx = gem_context_clone(i915, ctx,
>>> +                                             I915_CONTEXT_CLONE_ENGINES, 0);
>>> +
>>> +                     while (!READ_ONCE(*stop)) {
>>> +                             spin = igt_spin_new(i915,
>>> +                                                 .ctx = ctx,
>>> +                                                 .engine = (1 + rand() % count),
>>
>> With "count + 1" children and rand load my end up uneven across engines
>> - are you happy with that?
> 
> It's using rand, it's going to be uneven. count + 1 isn't significant in
> any way. I stopped as soon as I had the test reliably hitting the issue.
> 
>>> +                                                 .flags = IGT_SPIN_POLL_RUN);
>>> +                             igt_spin_busywait_until_started(spin);
>>> +                             usleep(50000);
>>
>> 50ms, hm, ideally there should be a pipe signal before parent starts the
>> test to know children have started. Otherwise parent can finish before
>> they even start, no?
> 
> The children are just to provide noise. The requirement is that we have
> enough load across the system to cause timeslicing to kick in.
>   
>>> +                             igt_spin_free(i915, spin);
>>> +                     }
>>> +
>>> +                     gem_context_destroy(i915, ctx);
>>> +             }
>>> +
>>> +             igt_until_timeout(5) {
>>> +                     igt_spin_reset(spin);
>>
>> What is the reset for?
> 
> We are reusing the spinner inside the loop.
> 
>>> +
>>> +                     /* A: Submit the semaphore wait */
>>> +                     eb.buffer_count = 3;
>>> +                     eb.flags = (1 + rand() % count) | I915_EXEC_FENCE_OUT;
>>> +                     gem_execbuf_wr(i915, &eb);
>>> +
>>> +                     /* B: Submit the spinner (in parallel) */
>>
>> How in parallel when it is the same context so they are implicitly in order?
> 
> Different engines with different timelines, using the submit to request
> parallel execution.

Yeah I kind of missed a few things. Looks good. For completeness you 
should also add a flavour which actually sets up the bond so the "if 
(bond)" path in virtual_bond_execute is also exercised. But this looks good.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/gem_exec_balancer: Check for scheduling bonded-pairs on the same engine
@ 2019-09-23 16:21       ` Tvrtko Ursulin
  0 siblings, 0 replies; 13+ messages in thread
From: Tvrtko Ursulin @ 2019-09-23 16:21 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev, Tvrtko Ursulin


On 23/09/2019 16:43, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-09-23 15:29:11)
>>
>> On 20/09/2019 23:26, Chris Wilson wrote:
>>> The expectation for bonded submission is that they are run concurrently,
>>> in parallel on multiple engines. However, given a lack of constraints in
>>> the scheduler's selection combined with timeslicing could mean that the
>>> bonded requests could be run in opposite order on the same engine. With
>>> just the right pair of requests, this can cause a GPU hang (or at least
>>> trigger hangchecker), best (worst) case would be execution running
>>> several times slower than ideal.
>>
>> I don't see any bonding being setup?
>>
>> [comes back later]
>>
>> Oh you used only the submit fence and not actually bonds. But you also
>> don't use the virtual engine at all?
> 
> A is using either of the 2 real engines, B is using the virtual engine
> to select the other available engine. Bonding in this case is just that
> the requests are bonded together to run in parallel.
> 
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    tests/i915/gem_exec_balancer.c | 151 +++++++++++++++++++++++++++++++++
>>>    1 file changed, 151 insertions(+)
>>>
>>> diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
>>> index 407dc0eca..e4fe75747 100644
>>> --- a/tests/i915/gem_exec_balancer.c
>>> +++ b/tests/i915/gem_exec_balancer.c
>>> @@ -30,6 +30,15 @@
>>>    
>>>    IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
>>>    
>>> +#define MI_SEMAPHORE_WAIT            (0x1c << 23)
>>> +#define   MI_SEMAPHORE_POLL             (1 << 15)
>>> +#define   MI_SEMAPHORE_SAD_GT_SDD       (0 << 12)
>>> +#define   MI_SEMAPHORE_SAD_GTE_SDD      (1 << 12)
>>> +#define   MI_SEMAPHORE_SAD_LT_SDD       (2 << 12)
>>> +#define   MI_SEMAPHORE_SAD_LTE_SDD      (3 << 12)
>>> +#define   MI_SEMAPHORE_SAD_EQ_SDD       (4 << 12)
>>> +#define   MI_SEMAPHORE_SAD_NEQ_SDD      (5 << 12)
>>> +
>>>    #define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
>>>    
>>>    static size_t sizeof_load_balance(int count)
>>> @@ -694,6 +703,145 @@ static void bonded(int i915, unsigned int flags)
>>>        gem_context_destroy(i915, master);
>>>    }
>>>    
>>> +static unsigned int offset_in_page(void *addr)
>>> +{
>>> +     return (uintptr_t)addr & 4095;
>>> +}
>>> +
>>> +static uint32_t create_semaphore_to_spinner(int i915, igt_spin_t *spin)
>>> +{
>>> +     uint32_t *cs, *map;
>>> +     uint32_t handle;
>>> +
>>> +     handle = gem_create(i915, 4096);
>>> +     cs = map = gem_mmap__cpu(i915, handle, 0, 4096, PROT_WRITE);
>>> +
>>> +     /* Wait until the spinner is running */
>>> +     *cs++ = MI_SEMAPHORE_WAIT |
>>> +             MI_SEMAPHORE_POLL |
>>> +             MI_SEMAPHORE_SAD_NEQ_SDD |
>>> +             (4 - 2);
>>> +     *cs++ = 0;
>>> +     *cs++ = spin->obj[0].offset + 4 * SPIN_POLL_START_IDX;
>>> +     *cs++ = 0;
>>> +
>>> +     /* Then cancel the spinner */
>>> +     *cs++ = MI_STORE_DWORD_IMM;
>>> +     *cs++ = spin->obj[IGT_SPIN_BATCH].offset +
>>> +             offset_in_page(spin->condition);
>>> +     *cs++ = 0;
>>> +     *cs++ = MI_BATCH_BUFFER_END;
>>> +
>>> +     *cs++ = MI_BATCH_BUFFER_END;
>>> +     munmap(map, 4096);
>>> +
>>> +     return handle;
>>> +}
>>> +
>>> +static void bonded_slice(int i915)
>>> +{
>>> +     uint32_t ctx;
>>> +     int *stop;
>>> +
>>> +     igt_require(gem_scheduler_has_semaphores(i915));
>>> +
>>> +     stop = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
>>> +     igt_assert(stop != MAP_FAILED);
>>> +
>>> +     ctx = gem_context_create(i915);
>>> +
>>> +     for (int class = 0; class < 32; class++) {
>>> +             struct i915_engine_class_instance *siblings;
>>> +             struct drm_i915_gem_exec_object2 obj[3] = {};
>>> +             struct drm_i915_gem_execbuffer2 eb = {};
>>> +             unsigned int count;
>>> +             igt_spin_t *spin;
>>> +
>>> +             siblings = list_engines(i915, 1u << class, &count);
>>> +             if (!siblings)
>>> +                     continue;
>>> +
>>> +             if (count < 2) {
>>> +                     free(siblings);
>>> +                     continue;
>>> +             }
>>> +
>>> +             /*
>>> +              * A: semaphore wait on spinner; cancel spinner
>>> +              * B: unpreemptable spinner
>>> +              *
>>> +              * A waits for running ack from B, if scheduled on the same
>>> +              * engine -> hang.
>>> +              *
>>> +              * C+: background load across engines
>>> +              */
>>> +
>>> +             set_load_balancer(i915, ctx, siblings, count, NULL);
>>> +
>>> +             spin = __igt_spin_new(i915,
>>> +                                   .ctx = ctx,
>>> +                                   .flags = (IGT_SPIN_NO_PREEMPTION |
>>> +                                             IGT_SPIN_POLL_RUN));
>>> +             igt_spin_end(spin); /* we just want its address for later */
>>> +             gem_sync(i915, spin->handle);
>>> +             igt_spin_reset(spin);
>>> +
>>> +             obj[0] = spin->obj[0];
>>> +             obj[1] = spin->obj[1];

igt_assert_eq(IGT_SPIN_BATCH, 1);

?

>>> +             obj[2].handle = create_semaphore_to_spinner(i915, spin);
>>> +
>>> +             eb.buffers_ptr = to_user_pointer(obj);
>>> +             eb.rsvd1 = ctx;
>>> +
>>> +             *stop = 0;
>>> +             igt_fork(child, count + 1) {
>>> +                     igt_list_del(&spin->link);
>>> +
>>> +                     ctx = gem_context_clone(i915, ctx,
>>> +                                             I915_CONTEXT_CLONE_ENGINES, 0);
>>> +
>>> +                     while (!READ_ONCE(*stop)) {
>>> +                             spin = igt_spin_new(i915,
>>> +                                                 .ctx = ctx,
>>> +                                                 .engine = (1 + rand() % count),
>>
>> With "count + 1" children and rand load my end up uneven across engines
>> - are you happy with that?
> 
> It's using rand, it's going to be uneven. count + 1 isn't significant in
> any way. I stopped as soon as I had the test reliably hitting the issue.
> 
>>> +                                                 .flags = IGT_SPIN_POLL_RUN);
>>> +                             igt_spin_busywait_until_started(spin);
>>> +                             usleep(50000);
>>
>> 50ms, hm, ideally there should be a pipe signal before parent starts the
>> test to know children have started. Otherwise parent can finish before
>> they even start, no?
> 
> The children are just to provide noise. The requirement is that we have
> enough load across the system to cause timeslicing to kick in.
>   
>>> +                             igt_spin_free(i915, spin);
>>> +                     }
>>> +
>>> +                     gem_context_destroy(i915, ctx);
>>> +             }
>>> +
>>> +             igt_until_timeout(5) {
>>> +                     igt_spin_reset(spin);
>>
>> What is the reset for?
> 
> We are reusing the spinner inside the loop.
> 
>>> +
>>> +                     /* A: Submit the semaphore wait */
>>> +                     eb.buffer_count = 3;
>>> +                     eb.flags = (1 + rand() % count) | I915_EXEC_FENCE_OUT;
>>> +                     gem_execbuf_wr(i915, &eb);
>>> +
>>> +                     /* B: Submit the spinner (in parallel) */
>>
>> How in parallel when it is the same context so they are implicitly in order?
> 
> Different engines with different timelines, using the submit to request
> parallel execution.

Yeah I kind of missed a few things. Looks good. For completeness you 
should also add a flavour which actually sets up the bond so the "if 
(bond)" path in virtual_bond_execute is also exercised. But this looks good.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko


_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/gem_exec_balancer: Check for scheduling bonded-pairs on the same engine
  2019-09-23 16:21       ` Tvrtko Ursulin
@ 2019-09-23 18:11         ` Chris Wilson
  -1 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2019-09-23 18:11 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2019-09-23 17:21:50)
> 
> On 23/09/2019 16:43, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-09-23 15:29:11)
> >>
> >> On 20/09/2019 23:26, Chris Wilson wrote:
> >>> +             spin = __igt_spin_new(i915,
> >>> +                                   .ctx = ctx,
> >>> +                                   .flags = (IGT_SPIN_NO_PREEMPTION |
> >>> +                                             IGT_SPIN_POLL_RUN));
> >>> +             igt_spin_end(spin); /* we just want its address for later */
> >>> +             gem_sync(i915, spin->handle);
> >>> +             igt_spin_reset(spin);
> >>> +
> >>> +             obj[0] = spin->obj[0];
> >>> +             obj[1] = spin->obj[1];
> 
> igt_assert_eq(IGT_SPIN_BATCH, 1);

Sensible enough, it's always going to be tied to implementation as we
play tricks from the gpu.

> >>> +                     /* A: Submit the semaphore wait */
> >>> +                     eb.buffer_count = 3;
> >>> +                     eb.flags = (1 + rand() % count) | I915_EXEC_FENCE_OUT;
> >>> +                     gem_execbuf_wr(i915, &eb);
> >>> +
> >>> +                     /* B: Submit the spinner (in parallel) */
> >>
> >> How in parallel when it is the same context so they are implicitly in order?
> > 
> > Different engines with different timelines, using the submit to request
> > parallel execution.
> 
> Yeah I kind of missed a few things. Looks good. For completeness you 
> should also add a flavour which actually sets up the bond so the "if 
> (bond)" path in virtual_bond_execute is also exercised. But this looks good.

I'll try to capture the questions in better comments. I was avoiding
setting up the explicit bonds mostly because I was amazed to get a
working test, and I was getting lost in the combinatorials to make it
explicit :) (i.e. what should we do if we have more engines in the set).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/gem_exec_balancer: Check for scheduling bonded-pairs on the same engine
@ 2019-09-23 18:11         ` Chris Wilson
  0 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2019-09-23 18:11 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev, Tvrtko Ursulin

Quoting Tvrtko Ursulin (2019-09-23 17:21:50)
> 
> On 23/09/2019 16:43, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-09-23 15:29:11)
> >>
> >> On 20/09/2019 23:26, Chris Wilson wrote:
> >>> +             spin = __igt_spin_new(i915,
> >>> +                                   .ctx = ctx,
> >>> +                                   .flags = (IGT_SPIN_NO_PREEMPTION |
> >>> +                                             IGT_SPIN_POLL_RUN));
> >>> +             igt_spin_end(spin); /* we just want its address for later */
> >>> +             gem_sync(i915, spin->handle);
> >>> +             igt_spin_reset(spin);
> >>> +
> >>> +             obj[0] = spin->obj[0];
> >>> +             obj[1] = spin->obj[1];
> 
> igt_assert_eq(IGT_SPIN_BATCH, 1);

Sensible enough, it's always going to be tied to implementation as we
play tricks from the gpu.

> >>> +                     /* A: Submit the semaphore wait */
> >>> +                     eb.buffer_count = 3;
> >>> +                     eb.flags = (1 + rand() % count) | I915_EXEC_FENCE_OUT;
> >>> +                     gem_execbuf_wr(i915, &eb);
> >>> +
> >>> +                     /* B: Submit the spinner (in parallel) */
> >>
> >> How in parallel when it is the same context so they are implicitly in order?
> > 
> > Different engines with different timelines, using the submit to request
> > parallel execution.
> 
> Yeah I kind of missed a few things. Looks good. For completeness you 
> should also add a flavour which actually sets up the bond so the "if 
> (bond)" path in virtual_bond_execute is also exercised. But this looks good.

I'll try to capture the questions in better comments. I was avoiding
setting up the explicit bonds mostly because I was amazed to get a
working test, and I was getting lost in the combinatorials to make it
explicit :) (i.e. what should we do if we have more engines in the set).
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2019-09-23 18:11 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-20 22:26 [PATCH i-g-t] i915/gem_exec_balancer: Check for scheduling bonded-pairs on the same engine Chris Wilson
2019-09-20 22:26 ` [igt-dev] " Chris Wilson
2019-09-20 22:40 ` [igt-dev] ✗ GitLab.Pipeline: warning for " Patchwork
2019-09-20 23:11 ` [igt-dev] ✓ Fi.CI.BAT: success " Patchwork
2019-09-22 12:24 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
2019-09-23 14:29 ` [igt-dev] [PATCH i-g-t] " Tvrtko Ursulin
2019-09-23 14:29   ` Tvrtko Ursulin
2019-09-23 15:43   ` Chris Wilson
2019-09-23 15:43     ` Chris Wilson
2019-09-23 16:21     ` Tvrtko Ursulin
2019-09-23 16:21       ` Tvrtko Ursulin
2019-09-23 18:11       ` Chris Wilson
2019-09-23 18:11         ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.