All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH i-g-t 0/2] tests/i915/perf: Add stress / race exercises
@ 2023-01-31  9:17 ` Janusz Krzysztofik
  0 siblings, 0 replies; 34+ messages in thread
From: Janusz Krzysztofik @ 2023-01-31  9:17 UTC (permalink / raw)
  To: Petri Latvala, Arkadiusz Hiler, Kamil Konieczny
  Cc: igt-dev, intel-gfx, Chris Wilson

Users reported oopses on list corruptions when using i915 perf with a
number of concurrently running graphics applications.  That indicates we
are currently missing some important tests for such scenarios.  Cover
that gap.

Chris Wilson (1):
  i915/perf: Stress opening of new perf streams against existing
    contexts

Janusz Krzysztofik (1):
  tests/i915/perf: Exercise barrier race

 tests/i915/perf.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [igt-dev] [PATCH i-g-t 0/2] tests/i915/perf: Add stress / race exercises
@ 2023-01-31  9:17 ` Janusz Krzysztofik
  0 siblings, 0 replies; 34+ messages in thread
From: Janusz Krzysztofik @ 2023-01-31  9:17 UTC (permalink / raw)
  To: Petri Latvala, Arkadiusz Hiler, Kamil Konieczny
  Cc: igt-dev, intel-gfx, Chris Wilson

Users reported oopses on list corruptions when using i915 perf with a
number of concurrently running graphics applications.  That indicates we
are currently missing some important tests for such scenarios.  Cover
that gap.

Chris Wilson (1):
  i915/perf: Stress opening of new perf streams against existing
    contexts

Janusz Krzysztofik (1):
  tests/i915/perf: Exercise barrier race

 tests/i915/perf.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)

-- 
2.25.1

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [Intel-gfx] [PATCH i-g-t 1/2] i915/perf: Stress opening of new perf streams against existing contexts
  2023-01-31  9:17 ` [igt-dev] " Janusz Krzysztofik
@ 2023-01-31  9:17   ` Janusz Krzysztofik
  -1 siblings, 0 replies; 34+ messages in thread
From: Janusz Krzysztofik @ 2023-01-31  9:17 UTC (permalink / raw)
  To: Petri Latvala, Arkadiusz Hiler, Kamil Konieczny
  Cc: igt-dev, intel-gfx, Chris Wilson

From: Chris Wilson <chris.p.wilson@linux.intel.com>

Try opening the perf stream while there is a flurry of activity on the
system, both new and old contexts. This will exercise the ability of the
driver to modify those contexts to work with perf.

References: https://gitlab.freedesktop.org/drm/intel/-/issues/6333
Signed-off-by: Chris Wilson <chris.p.wilson@linux.intel.com>
Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Cc: Andi Shyti <andi.shyti@linux.intel.com>
---
 tests/i915/perf.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/tests/i915/perf.c b/tests/i915/perf.c
index dd1f1ac399..e33cacc443 100644
--- a/tests/i915/perf.c
+++ b/tests/i915/perf.c
@@ -4885,6 +4885,71 @@ test_whitelisted_registers_userspace_config(void)
 	i915_perf_remove_config(drm_fd, config_id);
 }
 
+static void test_open_race(const struct intel_execution_engine2 *e, int timeout)
+{
+	int *done;
+
+	done = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(done != MAP_FAILED);
+
+	igt_fork(child, 1) {
+		uint64_t properties[] = {
+			DRM_I915_PERF_PROP_SAMPLE_OA, true,
+			DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
+			DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
+			DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
+		};
+		struct drm_i915_perf_open_param param = {
+			.flags = I915_PERF_FLAG_FD_CLOEXEC,
+			.num_properties = sizeof(properties) / 16,
+			.properties_ptr = to_user_pointer(properties),
+		};
+		unsigned long count = 0;
+
+		do {
+			__perf_close(__perf_open(drm_fd, &param, false));
+			count++;
+		} while (!READ_ONCE(*done));
+
+		igt_info("Completed %lu cycles\n", count);
+	}
+
+	for (int persistence = 0; persistence <= 1; persistence++) {
+		igt_fork(child, sysconf(_SC_NPROCESSORS_ONLN)) {
+			int i915 = gem_reopen_driver(drm_fd);
+
+			do {
+				igt_spin_t *spin;
+				uint64_t ahnd;
+				uint32_t ctx;
+
+				ctx = gem_context_create_for_engine(i915, e->class, e->instance);
+				gem_context_set_persistence(i915, ctx, persistence);
+
+				spin = __igt_spin_new(i915, ctx, .ahnd = ahnd);
+				for (int i = random() % 7; i--; ) {
+					if (random() & 1) {
+						igt_spin_end(spin);
+						gem_sync(i915, spin->handle);
+						igt_spin_reset(spin);
+					}
+					gem_execbuf(i915, &spin->execbuf);
+				}
+
+				gem_context_destroy(i915, ctx);
+				igt_spin_free(i915, spin);
+				put_ahnd(ahnd);
+			} while (!READ_ONCE(*done));
+		}
+	}
+
+	sleep(timeout);
+	*done = 1;
+	igt_waitchildren();
+
+	munmap(done, 4096);
+}
+
 static unsigned
 read_i915_module_ref(void)
 {
@@ -5259,6 +5324,15 @@ igt_main
 	igt_subtest("whitelisted-registers-userspace-config")
 		test_whitelisted_registers_userspace_config();
 
+	igt_subtest_with_dynamic("open-race") {
+		const struct intel_execution_engine2 *e;
+
+		for_each_physical_engine(drm_fd, e)
+			if (e->class == I915_ENGINE_CLASS_RENDER)
+				igt_dynamic_f("%s", e->name)
+					test_open_race(e, 5);
+	}
+
 	igt_fixture {
 		/* leave sysctl options in their default state... */
 		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [igt-dev] [PATCH i-g-t 1/2] i915/perf: Stress opening of new perf streams against existing contexts
@ 2023-01-31  9:17   ` Janusz Krzysztofik
  0 siblings, 0 replies; 34+ messages in thread
From: Janusz Krzysztofik @ 2023-01-31  9:17 UTC (permalink / raw)
  To: Petri Latvala, Arkadiusz Hiler, Kamil Konieczny
  Cc: igt-dev, intel-gfx, Chris Wilson

From: Chris Wilson <chris.p.wilson@linux.intel.com>

Try opening the perf stream while there is a flurry of activity on the
system, both new and old contexts. This will exercise the ability of the
driver to modify those contexts to work with perf.

References: https://gitlab.freedesktop.org/drm/intel/-/issues/6333
Signed-off-by: Chris Wilson <chris.p.wilson@linux.intel.com>
Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Cc: Andi Shyti <andi.shyti@linux.intel.com>
---
 tests/i915/perf.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/tests/i915/perf.c b/tests/i915/perf.c
index dd1f1ac399..e33cacc443 100644
--- a/tests/i915/perf.c
+++ b/tests/i915/perf.c
@@ -4885,6 +4885,71 @@ test_whitelisted_registers_userspace_config(void)
 	i915_perf_remove_config(drm_fd, config_id);
 }
 
+static void test_open_race(const struct intel_execution_engine2 *e, int timeout)
+{
+	int *done;
+
+	done = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(done != MAP_FAILED);
+
+	igt_fork(child, 1) {
+		uint64_t properties[] = {
+			DRM_I915_PERF_PROP_SAMPLE_OA, true,
+			DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
+			DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
+			DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
+		};
+		struct drm_i915_perf_open_param param = {
+			.flags = I915_PERF_FLAG_FD_CLOEXEC,
+			.num_properties = sizeof(properties) / 16,
+			.properties_ptr = to_user_pointer(properties),
+		};
+		unsigned long count = 0;
+
+		do {
+			__perf_close(__perf_open(drm_fd, &param, false));
+			count++;
+		} while (!READ_ONCE(*done));
+
+		igt_info("Completed %lu cycles\n", count);
+	}
+
+	for (int persistence = 0; persistence <= 1; persistence++) {
+		igt_fork(child, sysconf(_SC_NPROCESSORS_ONLN)) {
+			int i915 = gem_reopen_driver(drm_fd);
+
+			do {
+				igt_spin_t *spin;
+				uint64_t ahnd;
+				uint32_t ctx;
+
+				ctx = gem_context_create_for_engine(i915, e->class, e->instance);
+				gem_context_set_persistence(i915, ctx, persistence);
+
+				spin = __igt_spin_new(i915, ctx, .ahnd = ahnd);
+				for (int i = random() % 7; i--; ) {
+					if (random() & 1) {
+						igt_spin_end(spin);
+						gem_sync(i915, spin->handle);
+						igt_spin_reset(spin);
+					}
+					gem_execbuf(i915, &spin->execbuf);
+				}
+
+				gem_context_destroy(i915, ctx);
+				igt_spin_free(i915, spin);
+				put_ahnd(ahnd);
+			} while (!READ_ONCE(*done));
+		}
+	}
+
+	sleep(timeout);
+	*done = 1;
+	igt_waitchildren();
+
+	munmap(done, 4096);
+}
+
 static unsigned
 read_i915_module_ref(void)
 {
@@ -5259,6 +5324,15 @@ igt_main
 	igt_subtest("whitelisted-registers-userspace-config")
 		test_whitelisted_registers_userspace_config();
 
+	igt_subtest_with_dynamic("open-race") {
+		const struct intel_execution_engine2 *e;
+
+		for_each_physical_engine(drm_fd, e)
+			if (e->class == I915_ENGINE_CLASS_RENDER)
+				igt_dynamic_f("%s", e->name)
+					test_open_race(e, 5);
+	}
+
 	igt_fixture {
 		/* leave sysctl options in their default state... */
 		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Intel-gfx] [PATCH i-g-t 2/2] tests/i915/perf: Exercise barrier race
  2023-01-31  9:17 ` [igt-dev] " Janusz Krzysztofik
@ 2023-01-31  9:17   ` Janusz Krzysztofik
  -1 siblings, 0 replies; 34+ messages in thread
From: Janusz Krzysztofik @ 2023-01-31  9:17 UTC (permalink / raw)
  To: Petri Latvala, Arkadiusz Hiler, Kamil Konieczny
  Cc: igt-dev, intel-gfx, Chris Wilson

Add a new subtest focused on exercising interaction between perf open /
close operations, which replace active barriers with perf requests, and
concurrent barrier preallocate / acquire operations performed during
context first pin / last unpin.

References: https://gitlab.freedesktop.org/drm/intel/-/issues/6333
Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
---
 tests/i915/perf.c | 41 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 39 insertions(+), 2 deletions(-)

diff --git a/tests/i915/perf.c b/tests/i915/perf.c
index e33cacc443..11a3ec21ab 100644
--- a/tests/i915/perf.c
+++ b/tests/i915/perf.c
@@ -39,6 +39,7 @@
 #include <math.h>
 
 #include "i915/gem.h"
+#include "i915/gem_create.h"
 #include "i915/perf.h"
 #include "igt.h"
 #include "igt_perf.h"
@@ -4885,7 +4886,27 @@ test_whitelisted_registers_userspace_config(void)
 	i915_perf_remove_config(drm_fd, config_id);
 }
 
-static void test_open_race(const struct intel_execution_engine2 *e, int timeout)
+static void gem_exec_nop(int i915, const struct intel_execution_engine2 *e)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj = { };
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+	};
+
+	obj.handle = gem_create(i915, 4096);
+	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
+
+	execbuf.flags = e->flags;
+	gem_execbuf(i915, &execbuf);
+
+	gem_sync(i915, obj.handle);
+	gem_close(i915, obj.handle);
+}
+
+static void test_open_race(const struct intel_execution_engine2 *e, int timeout,
+			   bool use_spin)
 {
 	int *done;
 
@@ -4926,6 +4947,12 @@ static void test_open_race(const struct intel_execution_engine2 *e, int timeout)
 				ctx = gem_context_create_for_engine(i915, e->class, e->instance);
 				gem_context_set_persistence(i915, ctx, persistence);
 
+				if (!use_spin) {
+					gem_exec_nop(i915, e);
+					gem_context_destroy(i915, ctx);
+					continue;
+				}
+
 				spin = __igt_spin_new(i915, ctx, .ahnd = ahnd);
 				for (int i = random() % 7; i--; ) {
 					if (random() & 1) {
@@ -5330,7 +5357,17 @@ igt_main
 		for_each_physical_engine(drm_fd, e)
 			if (e->class == I915_ENGINE_CLASS_RENDER)
 				igt_dynamic_f("%s", e->name)
-					test_open_race(e, 5);
+					test_open_race(e, 5, true);
+	}
+
+	igt_describe("Exercise perf open/close against intensive barrier preallocate/acquire");
+	igt_subtest_with_dynamic("barrier-race") {
+		const struct intel_execution_engine2 *e;
+
+		for_each_physical_engine(drm_fd, e)
+			if (e->class == I915_ENGINE_CLASS_RENDER)
+				igt_dynamic_f("%s", e->name)
+					test_open_race(e, 5, false);
 	}
 
 	igt_fixture {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [igt-dev] [PATCH i-g-t 2/2] tests/i915/perf: Exercise barrier race
@ 2023-01-31  9:17   ` Janusz Krzysztofik
  0 siblings, 0 replies; 34+ messages in thread
From: Janusz Krzysztofik @ 2023-01-31  9:17 UTC (permalink / raw)
  To: Petri Latvala, Arkadiusz Hiler, Kamil Konieczny
  Cc: igt-dev, intel-gfx, Chris Wilson

Add a new subtest focused on exercising interaction between perf open /
close operations, which replace active barriers with perf requests, and
concurrent barrier preallocate / acquire operations performed during
context first pin / last unpin.

References: https://gitlab.freedesktop.org/drm/intel/-/issues/6333
Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
---
 tests/i915/perf.c | 41 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 39 insertions(+), 2 deletions(-)

diff --git a/tests/i915/perf.c b/tests/i915/perf.c
index e33cacc443..11a3ec21ab 100644
--- a/tests/i915/perf.c
+++ b/tests/i915/perf.c
@@ -39,6 +39,7 @@
 #include <math.h>
 
 #include "i915/gem.h"
+#include "i915/gem_create.h"
 #include "i915/perf.h"
 #include "igt.h"
 #include "igt_perf.h"
@@ -4885,7 +4886,27 @@ test_whitelisted_registers_userspace_config(void)
 	i915_perf_remove_config(drm_fd, config_id);
 }
 
-static void test_open_race(const struct intel_execution_engine2 *e, int timeout)
+static void gem_exec_nop(int i915, const struct intel_execution_engine2 *e)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj = { };
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+	};
+
+	obj.handle = gem_create(i915, 4096);
+	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
+
+	execbuf.flags = e->flags;
+	gem_execbuf(i915, &execbuf);
+
+	gem_sync(i915, obj.handle);
+	gem_close(i915, obj.handle);
+}
+
+static void test_open_race(const struct intel_execution_engine2 *e, int timeout,
+			   bool use_spin)
 {
 	int *done;
 
@@ -4926,6 +4947,12 @@ static void test_open_race(const struct intel_execution_engine2 *e, int timeout)
 				ctx = gem_context_create_for_engine(i915, e->class, e->instance);
 				gem_context_set_persistence(i915, ctx, persistence);
 
+				if (!use_spin) {
+					gem_exec_nop(i915, e);
+					gem_context_destroy(i915, ctx);
+					continue;
+				}
+
 				spin = __igt_spin_new(i915, ctx, .ahnd = ahnd);
 				for (int i = random() % 7; i--; ) {
 					if (random() & 1) {
@@ -5330,7 +5357,17 @@ igt_main
 		for_each_physical_engine(drm_fd, e)
 			if (e->class == I915_ENGINE_CLASS_RENDER)
 				igt_dynamic_f("%s", e->name)
-					test_open_race(e, 5);
+					test_open_race(e, 5, true);
+	}
+
+	igt_describe("Exercise perf open/close against intensive barrier preallocate/acquire");
+	igt_subtest_with_dynamic("barrier-race") {
+		const struct intel_execution_engine2 *e;
+
+		for_each_physical_engine(drm_fd, e)
+			if (e->class == I915_ENGINE_CLASS_RENDER)
+				igt_dynamic_f("%s", e->name)
+					test_open_race(e, 5, false);
 	}
 
 	igt_fixture {
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for tests/i915/perf: Add stress / race exercises
  2023-01-31  9:17 ` [igt-dev] " Janusz Krzysztofik
                   ` (2 preceding siblings ...)
  (?)
@ 2023-01-31 11:16 ` Patchwork
  -1 siblings, 0 replies; 34+ messages in thread
From: Patchwork @ 2023-01-31 11:16 UTC (permalink / raw)
  To: Janusz Krzysztofik; +Cc: igt-dev

[-- Attachment #1: Type: text/plain, Size: 4424 bytes --]

== Series Details ==

Series: tests/i915/perf: Add stress / race exercises
URL   : https://patchwork.freedesktop.org/series/113522/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_12672 -> IGTPW_8419
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/index.html

Participating hosts (26 -> 25)
------------------------------

  Missing    (1): fi-snb-2520m 

Known issues
------------

  Here are the changes found in IGTPW_8419 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live@workarounds:
    - bat-dg1-5:          [PASS][1] -> [ABORT][2] ([i915#4983] / [i915#7981])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/bat-dg1-5/igt@i915_selftest@live@workarounds.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/bat-dg1-5/igt@i915_selftest@live@workarounds.html

  
#### Possible fixes ####

  * igt@i915_pm_rpm@basic-rte:
    - {bat-adlp-6}:       [ABORT][3] ([i915#7977]) -> [PASS][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/bat-adlp-6/igt@i915_pm_rpm@basic-rte.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/bat-adlp-6/igt@i915_pm_rpm@basic-rte.html

  * igt@i915_selftest@live@hangcheck:
    - {bat-dg2-8}:        [ABORT][5] -> [PASS][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/bat-dg2-8/igt@i915_selftest@live@hangcheck.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/bat-dg2-8/igt@i915_selftest@live@hangcheck.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#109295]: https://bugs.freedesktop.org/show_bug.cgi?id=109295
  [i915#3291]: https://gitlab.freedesktop.org/drm/intel/issues/3291
  [i915#3301]: https://gitlab.freedesktop.org/drm/intel/issues/3301
  [i915#3708]: https://gitlab.freedesktop.org/drm/intel/issues/3708
  [i915#4613]: https://gitlab.freedesktop.org/drm/intel/issues/4613
  [i915#4983]: https://gitlab.freedesktop.org/drm/intel/issues/4983
  [i915#5354]: https://gitlab.freedesktop.org/drm/intel/issues/5354
  [i915#6367]: https://gitlab.freedesktop.org/drm/intel/issues/6367
  [i915#6621]: https://gitlab.freedesktop.org/drm/intel/issues/6621
  [i915#6645]: https://gitlab.freedesktop.org/drm/intel/issues/6645
  [i915#6997]: https://gitlab.freedesktop.org/drm/intel/issues/6997
  [i915#7828]: https://gitlab.freedesktop.org/drm/intel/issues/7828
  [i915#7913]: https://gitlab.freedesktop.org/drm/intel/issues/7913
  [i915#7977]: https://gitlab.freedesktop.org/drm/intel/issues/7977
  [i915#7981]: https://gitlab.freedesktop.org/drm/intel/issues/7981


Build changes
-------------

  * CI: CI-20190529 -> None
  * IGT: IGT_7143 -> IGTPW_8419

  CI-20190529: 20190529
  CI_DRM_12672: df5c31e78609626a1278ad79305cd408f403ceac @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_8419: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/index.html
  IGT_7143: c7b12dcc460fc2348e1fa7f4dcb791bb82e29e44 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git


Testlist changes
----------------

+igt@perf@barrier-race
+igt@perf@open-race
-igt@v3d/v3d_submit_cl@bad-bo
-igt@v3d/v3d_submit_cl@bad-extension
-igt@v3d/v3d_submit_cl@bad-flag
-igt@v3d/v3d_submit_cl@bad-in-sync
-igt@v3d/v3d_submit_cl@bad-multisync-extension
-igt@v3d/v3d_submit_cl@bad-multisync-in-sync
-igt@v3d/v3d_submit_cl@bad-multisync-out-sync
-igt@v3d/v3d_submit_cl@bad-multisync-pad
-igt@v3d/v3d_submit_cl@bad-pad
-igt@v3d/v3d_submit_cl@bad-perfmon
-igt@v3d/v3d_submit_cl@job-perfmon
-igt@v3d/v3d_submit_cl@multiple-job-submission
-igt@v3d/v3d_submit_cl@multisync-out-syncs
-igt@v3d/v3d_submit_cl@multi-and-single-sync
-igt@v3d/v3d_submit_cl@simple-flush-cache
-igt@v3d/v3d_submit_cl@single-in-sync
-igt@v3d/v3d_submit_cl@single-out-sync
-igt@v3d/v3d_submit_cl@valid-multisync-submission
-igt@v3d/v3d_submit_cl@valid-submission
-igt@v3d/v3d_wait_bo@bad-bo
-igt@v3d/v3d_wait_bo@bad-pad
-igt@v3d/v3d_wait_bo@map-bo-0ns
-igt@v3d/v3d_wait_bo@map-bo-1ns
-igt@v3d/v3d_wait_bo@unused-bo-0ns
-igt@v3d/v3d_wait_bo@unused-bo-1ns
-igt@v3d/v3d_wait_bo@used-bo
-igt@v3d/v3d_wait_bo@used-bo-0ns
-igt@v3d/v3d_wait_bo@used-bo-1ns

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/index.html

[-- Attachment #2: Type: text/html, Size: 4438 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH i-g-t 1/2] i915/perf: Stress opening of new perf streams against existing contexts
  2023-01-31  9:17   ` [igt-dev] " Janusz Krzysztofik
@ 2023-01-31 11:59     ` Kamil Konieczny
  -1 siblings, 0 replies; 34+ messages in thread
From: Kamil Konieczny @ 2023-01-31 11:59 UTC (permalink / raw)
  To: Janusz Krzysztofik
  Cc: Arkadiusz Hiler, intel-gfx, igt-dev, Petri Latvala, Chris Wilson

Hi,

On 2023-01-31 at 10:17:30 +0100, Janusz Krzysztofik wrote:
> From: Chris Wilson <chris.p.wilson@linux.intel.com>
> 
> Try opening the perf stream while there is a flurry of activity on the
> system, both new and old contexts. This will exercise the ability of the
> driver to modify those contexts to work with perf.
> 
> References: https://gitlab.freedesktop.org/drm/intel/-/issues/6333
> Signed-off-by: Chris Wilson <chris.p.wilson@linux.intel.com>
> Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
> Cc: Andi Shyti <andi.shyti@linux.intel.com>
> ---
>  tests/i915/perf.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 74 insertions(+)
> 
> diff --git a/tests/i915/perf.c b/tests/i915/perf.c
> index dd1f1ac399..e33cacc443 100644
> --- a/tests/i915/perf.c
> +++ b/tests/i915/perf.c
> @@ -4885,6 +4885,71 @@ test_whitelisted_registers_userspace_config(void)
>  	i915_perf_remove_config(drm_fd, config_id);
>  }
>  
> +static void test_open_race(const struct intel_execution_engine2 *e, int timeout)
> +{
> +	int *done;
> +
> +	done = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
> +	igt_assert(done != MAP_FAILED);
> +
> +	igt_fork(child, 1) {
> +		uint64_t properties[] = {
> +			DRM_I915_PERF_PROP_SAMPLE_OA, true,
> +			DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
> +			DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
> +			DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
> +		};
> +		struct drm_i915_perf_open_param param = {
> +			.flags = I915_PERF_FLAG_FD_CLOEXEC,
> +			.num_properties = sizeof(properties) / 16,
> +			.properties_ptr = to_user_pointer(properties),
> +		};
> +		unsigned long count = 0;
> +
> +		do {
> +			__perf_close(__perf_open(drm_fd, &param, false));
> +			count++;
> +		} while (!READ_ONCE(*done));
> +
> +		igt_info("Completed %lu cycles\n", count);
> +	}
> +
> +	for (int persistence = 0; persistence <= 1; persistence++) {
> +		igt_fork(child, sysconf(_SC_NPROCESSORS_ONLN)) {
> +			int i915 = gem_reopen_driver(drm_fd);
> +
> +			do {
> +				igt_spin_t *spin;
> +				uint64_t ahnd;
> +				uint32_t ctx;
> +
> +				ctx = gem_context_create_for_engine(i915, e->class, e->instance);
> +				gem_context_set_persistence(i915, ctx, persistence);
> +
> +				spin = __igt_spin_new(i915, ctx, .ahnd = ahnd);
> +				for (int i = random() % 7; i--; ) {
> +					if (random() & 1) {
> +						igt_spin_end(spin);
> +						gem_sync(i915, spin->handle);
> +						igt_spin_reset(spin);
> +					}
> +					gem_execbuf(i915, &spin->execbuf);
> +				}
> +
> +				gem_context_destroy(i915, ctx);
> +				igt_spin_free(i915, spin);
> +				put_ahnd(ahnd);
> +			} while (!READ_ONCE(*done));
> +		}
> +	}
> +
> +	sleep(timeout);
> +	*done = 1;
> +	igt_waitchildren();
> +
> +	munmap(done, 4096);
> +}
> +
>  static unsigned
>  read_i915_module_ref(void)
>  {
> @@ -5259,6 +5324,15 @@ igt_main
>  	igt_subtest("whitelisted-registers-userspace-config")
>  		test_whitelisted_registers_userspace_config();
>  

Please add description to new test.

Regards,
Kamil

> +	igt_subtest_with_dynamic("open-race") {
> +		const struct intel_execution_engine2 *e;
> +
> +		for_each_physical_engine(drm_fd, e)
> +			if (e->class == I915_ENGINE_CLASS_RENDER)
> +				igt_dynamic_f("%s", e->name)
> +					test_open_race(e, 5);
> +	}
> +
>  	igt_fixture {
>  		/* leave sysctl options in their default state... */
>  		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
> -- 
> 2.25.1
> 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/2] i915/perf: Stress opening of new perf streams against existing contexts
@ 2023-01-31 11:59     ` Kamil Konieczny
  0 siblings, 0 replies; 34+ messages in thread
From: Kamil Konieczny @ 2023-01-31 11:59 UTC (permalink / raw)
  To: Janusz Krzysztofik; +Cc: intel-gfx, igt-dev, Chris Wilson

Hi,

On 2023-01-31 at 10:17:30 +0100, Janusz Krzysztofik wrote:
> From: Chris Wilson <chris.p.wilson@linux.intel.com>
> 
> Try opening the perf stream while there is a flurry of activity on the
> system, both new and old contexts. This will exercise the ability of the
> driver to modify those contexts to work with perf.
> 
> References: https://gitlab.freedesktop.org/drm/intel/-/issues/6333
> Signed-off-by: Chris Wilson <chris.p.wilson@linux.intel.com>
> Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
> Cc: Andi Shyti <andi.shyti@linux.intel.com>
> ---
>  tests/i915/perf.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 74 insertions(+)
> 
> diff --git a/tests/i915/perf.c b/tests/i915/perf.c
> index dd1f1ac399..e33cacc443 100644
> --- a/tests/i915/perf.c
> +++ b/tests/i915/perf.c
> @@ -4885,6 +4885,71 @@ test_whitelisted_registers_userspace_config(void)
>  	i915_perf_remove_config(drm_fd, config_id);
>  }
>  
> +static void test_open_race(const struct intel_execution_engine2 *e, int timeout)
> +{
> +	int *done;
> +
> +	done = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
> +	igt_assert(done != MAP_FAILED);
> +
> +	igt_fork(child, 1) {
> +		uint64_t properties[] = {
> +			DRM_I915_PERF_PROP_SAMPLE_OA, true,
> +			DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
> +			DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
> +			DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
> +		};
> +		struct drm_i915_perf_open_param param = {
> +			.flags = I915_PERF_FLAG_FD_CLOEXEC,
> +			.num_properties = sizeof(properties) / 16,
> +			.properties_ptr = to_user_pointer(properties),
> +		};
> +		unsigned long count = 0;
> +
> +		do {
> +			__perf_close(__perf_open(drm_fd, &param, false));
> +			count++;
> +		} while (!READ_ONCE(*done));
> +
> +		igt_info("Completed %lu cycles\n", count);
> +	}
> +
> +	for (int persistence = 0; persistence <= 1; persistence++) {
> +		igt_fork(child, sysconf(_SC_NPROCESSORS_ONLN)) {
> +			int i915 = gem_reopen_driver(drm_fd);
> +
> +			do {
> +				igt_spin_t *spin;
> +				uint64_t ahnd;
> +				uint32_t ctx;
> +
> +				ctx = gem_context_create_for_engine(i915, e->class, e->instance);
> +				gem_context_set_persistence(i915, ctx, persistence);
> +
> +				spin = __igt_spin_new(i915, ctx, .ahnd = ahnd);
> +				for (int i = random() % 7; i--; ) {
> +					if (random() & 1) {
> +						igt_spin_end(spin);
> +						gem_sync(i915, spin->handle);
> +						igt_spin_reset(spin);
> +					}
> +					gem_execbuf(i915, &spin->execbuf);
> +				}
> +
> +				gem_context_destroy(i915, ctx);
> +				igt_spin_free(i915, spin);
> +				put_ahnd(ahnd);
> +			} while (!READ_ONCE(*done));
> +		}
> +	}
> +
> +	sleep(timeout);
> +	*done = 1;
> +	igt_waitchildren();
> +
> +	munmap(done, 4096);
> +}
> +
>  static unsigned
>  read_i915_module_ref(void)
>  {
> @@ -5259,6 +5324,15 @@ igt_main
>  	igt_subtest("whitelisted-registers-userspace-config")
>  		test_whitelisted_registers_userspace_config();
>  

Please add description to new test.

Regards,
Kamil

> +	igt_subtest_with_dynamic("open-race") {
> +		const struct intel_execution_engine2 *e;
> +
> +		for_each_physical_engine(drm_fd, e)
> +			if (e->class == I915_ENGINE_CLASS_RENDER)
> +				igt_dynamic_f("%s", e->name)
> +					test_open_race(e, 5);
> +	}
> +
>  	igt_fixture {
>  		/* leave sysctl options in their default state... */
>  		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
> -- 
> 2.25.1
> 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH i-g-t 1/2] i915/perf: Stress opening of new perf streams against existing contexts
  2023-01-31 11:59     ` [igt-dev] " Kamil Konieczny
@ 2023-01-31 14:14       ` Janusz Krzysztofik
  -1 siblings, 0 replies; 34+ messages in thread
From: Janusz Krzysztofik @ 2023-01-31 14:14 UTC (permalink / raw)
  To: Kamil Konieczny
  Cc: Arkadiusz Hiler, intel-gfx, igt-dev, Petri Latvala, Chris Wilson

Hi Kamil,

Thanks for review.

On Tuesday, 31 January 2023 12:59:10 CET Kamil Konieczny wrote:
...
> > @@ -5259,6 +5324,15 @@ igt_main
> >  	igt_subtest("whitelisted-registers-userspace-config")
> >  		test_whitelisted_registers_userspace_config();
> >  
> 
> Please add description to new test.

Sure, please expect v2 with this fixed.

Thanks,
Janusz



^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/2] i915/perf: Stress opening of new perf streams against existing contexts
@ 2023-01-31 14:14       ` Janusz Krzysztofik
  0 siblings, 0 replies; 34+ messages in thread
From: Janusz Krzysztofik @ 2023-01-31 14:14 UTC (permalink / raw)
  To: Kamil Konieczny; +Cc: intel-gfx, igt-dev, Chris Wilson

Hi Kamil,

Thanks for review.

On Tuesday, 31 January 2023 12:59:10 CET Kamil Konieczny wrote:
...
> > @@ -5259,6 +5324,15 @@ igt_main
> >  	igt_subtest("whitelisted-registers-userspace-config")
> >  		test_whitelisted_registers_userspace_config();
> >  
> 
> Please add description to new test.

Sure, please expect v2 with this fixed.

Thanks,
Janusz


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [igt-dev] ✓ Fi.CI.IGT: success for tests/i915/perf: Add stress / race exercises
  2023-01-31  9:17 ` [igt-dev] " Janusz Krzysztofik
                   ` (3 preceding siblings ...)
  (?)
@ 2023-01-31 15:08 ` Patchwork
  -1 siblings, 0 replies; 34+ messages in thread
From: Patchwork @ 2023-01-31 15:08 UTC (permalink / raw)
  To: Janusz Krzysztofik; +Cc: igt-dev

[-- Attachment #1: Type: text/plain, Size: 19497 bytes --]

== Series Details ==

Series: tests/i915/perf: Add stress / race exercises
URL   : https://patchwork.freedesktop.org/series/113522/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_12672_full -> IGTPW_8419_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/index.html

Participating hosts (10 -> 11)
------------------------------

  Additional (1): shard-rkl0 

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in IGTPW_8419_full:

### IGT changes ###

#### Possible regressions ####

  * {igt@perf@barrier-race@rcs0} (NEW):
    - {shard-rkl}:        NOTRUN -> [ABORT][1]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-rkl-3/igt@perf@barrier-race@rcs0.html

  * {igt@perf@open-race@rcs0} (NEW):
    - {shard-rkl}:        NOTRUN -> [FAIL][2]
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-rkl-1/igt@perf@open-race@rcs0.html
    - {shard-dg1}:        NOTRUN -> [FAIL][3]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-dg1-17/igt@perf@open-race@rcs0.html

  
New tests
---------

  New tests have been introduced between CI_DRM_12672_full and IGTPW_8419_full:

### New IGT tests (4) ###

  * igt@perf@barrier-race:
    - Statuses :
    - Exec time: [None] s

  * igt@perf@barrier-race@rcs0:
    - Statuses : 1 abort(s) 2 pass(s)
    - Exec time: [0.0] s

  * igt@perf@open-race:
    - Statuses :
    - Exec time: [None] s

  * igt@perf@open-race@rcs0:
    - Statuses : 2 fail(s) 2 pass(s)
    - Exec time: [0.0] s

  

Known issues
------------

  Here are the changes found in IGTPW_8419_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@kms_flip@flip-vs-expired-vblank@b-hdmi-a2:
    - shard-glk:          [PASS][4] -> [FAIL][5] ([i915#79])
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-glk4/igt@kms_flip@flip-vs-expired-vblank@b-hdmi-a2.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-glk5/igt@kms_flip@flip-vs-expired-vblank@b-hdmi-a2.html

  
#### Possible fixes ####

  * igt@gem_eio@reset-stress:
    - {shard-dg1}:        [FAIL][6] ([i915#5784]) -> [PASS][7]
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-dg1-16/igt@gem_eio@reset-stress.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-dg1-17/igt@gem_eio@reset-stress.html

  * igt@gem_exec_fair@basic-deadline:
    - {shard-rkl}:        [FAIL][8] ([i915#2846]) -> [PASS][9]
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-rkl-2/igt@gem_exec_fair@basic-deadline.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-rkl-1/igt@gem_exec_fair@basic-deadline.html

  * igt@gem_exec_fair@basic-pace-share@rcs0:
    - shard-glk:          [FAIL][10] ([i915#2842]) -> [PASS][11]
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-glk9/igt@gem_exec_fair@basic-pace-share@rcs0.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-glk1/igt@gem_exec_fair@basic-pace-share@rcs0.html

  * igt@gem_exec_fair@basic-pace-solo@rcs0:
    - {shard-rkl}:        [FAIL][12] ([i915#2842]) -> [PASS][13]
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-rkl-2/igt@gem_exec_fair@basic-pace-solo@rcs0.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-rkl-5/igt@gem_exec_fair@basic-pace-solo@rcs0.html

  * igt@gem_exec_reloc@basic-wc-read-noreloc:
    - {shard-rkl}:        [SKIP][14] ([i915#3281]) -> [PASS][15] +6 similar issues
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-rkl-3/igt@gem_exec_reloc@basic-wc-read-noreloc.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-rkl-5/igt@gem_exec_reloc@basic-wc-read-noreloc.html

  * igt@gem_set_tiling_vs_pwrite:
    - {shard-rkl}:        [SKIP][16] ([i915#3282]) -> [PASS][17] +3 similar issues
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-rkl-4/igt@gem_set_tiling_vs_pwrite.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-rkl-5/igt@gem_set_tiling_vs_pwrite.html

  * igt@gen9_exec_parse@bb-chained:
    - {shard-rkl}:        [SKIP][18] ([i915#2527]) -> [PASS][19] +2 similar issues
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-rkl-3/igt@gen9_exec_parse@bb-chained.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-rkl-5/igt@gen9_exec_parse@bb-chained.html

  * igt@i915_pm_rc6_residency@rc6-idle@vcs0:
    - {shard-rkl}:        [WARN][20] ([i915#2681]) -> [PASS][21]
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-rkl-5/igt@i915_pm_rc6_residency@rc6-idle@vcs0.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-rkl-3/igt@i915_pm_rc6_residency@rc6-idle@vcs0.html

  * igt@i915_pm_rpm@modeset-lpsp-stress-no-wait:
    - {shard-rkl}:        [SKIP][22] ([i915#1397]) -> [PASS][23]
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-rkl-3/igt@i915_pm_rpm@modeset-lpsp-stress-no-wait.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-rkl-6/igt@i915_pm_rpm@modeset-lpsp-stress-no-wait.html
    - {shard-tglu}:       [SKIP][24] ([i915#1397]) -> [PASS][25]
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-tglu-6/igt@i915_pm_rpm@modeset-lpsp-stress-no-wait.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-tglu-8/igt@i915_pm_rpm@modeset-lpsp-stress-no-wait.html

  * igt@i915_pm_rpm@pm-tiling:
    - {shard-rkl}:        [SKIP][26] ([fdo#109308]) -> [PASS][27]
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-rkl-4/igt@i915_pm_rpm@pm-tiling.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-rkl-6/igt@i915_pm_rpm@pm-tiling.html

  * igt@kms_ccs@pipe-a-bad-pixel-format-y_tiled_gen12_rc_ccs:
    - {shard-rkl}:        [SKIP][28] ([i915#1845] / [i915#4098]) -> [PASS][29] +9 similar issues
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-rkl-3/igt@kms_ccs@pipe-a-bad-pixel-format-y_tiled_gen12_rc_ccs.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-rkl-6/igt@kms_ccs@pipe-a-bad-pixel-format-y_tiled_gen12_rc_ccs.html

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-render:
    - {shard-tglu}:       [SKIP][30] ([i915#1849]) -> [PASS][31] +2 similar issues
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-tglu-6/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-render.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-tglu-8/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-render.html

  * igt@kms_frontbuffer_tracking@psr-1p-primscrn-indfb-plflip-blt:
    - {shard-rkl}:        [SKIP][32] ([i915#1849] / [i915#4098]) -> [PASS][33] +6 similar issues
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-rkl-4/igt@kms_frontbuffer_tracking@psr-1p-primscrn-indfb-plflip-blt.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-rkl-6/igt@kms_frontbuffer_tracking@psr-1p-primscrn-indfb-plflip-blt.html

  * igt@kms_plane@plane-position-hole-dpms@pipe-b-planes:
    - {shard-rkl}:        [SKIP][34] ([i915#1849]) -> [PASS][35] +1 similar issue
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-rkl-5/igt@kms_plane@plane-position-hole-dpms@pipe-b-planes.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-rkl-6/igt@kms_plane@plane-position-hole-dpms@pipe-b-planes.html

  * igt@kms_psr@primary_render:
    - {shard-rkl}:        [SKIP][36] ([i915#1072]) -> [PASS][37]
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-rkl-3/igt@kms_psr@primary_render.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-rkl-6/igt@kms_psr@primary_render.html

  * igt@kms_rotation_crc@bad-tiling:
    - {shard-tglu}:       [SKIP][38] ([i915#7651]) -> [PASS][39] +8 similar issues
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-tglu-6/igt@kms_rotation_crc@bad-tiling.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-tglu-2/igt@kms_rotation_crc@bad-tiling.html

  * igt@kms_vblank@pipe-c-wait-busy:
    - {shard-tglu}:       [SKIP][40] ([i915#1845] / [i915#7651]) -> [PASS][41] +1 similar issue
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-tglu-6/igt@kms_vblank@pipe-c-wait-busy.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-tglu-8/igt@kms_vblank@pipe-c-wait-busy.html

  * igt@perf@gen12-oa-tlb-invalidate:
    - {shard-rkl}:        [SKIP][42] ([fdo#109289]) -> [PASS][43] +1 similar issue
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-rkl-5/igt@perf@gen12-oa-tlb-invalidate.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-rkl-3/igt@perf@gen12-oa-tlb-invalidate.html

  * igt@prime_vgem@basic-write:
    - {shard-rkl}:        [SKIP][44] ([fdo#109295] / [i915#3291] / [i915#3708]) -> [PASS][45]
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12672/shard-rkl-3/igt@prime_vgem@basic-write.html
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/shard-rkl-5/igt@prime_vgem@basic-write.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [IGT#2]: https://gitlab.freedesktop.org/drm/igt-gpu-tools/issues/2
  [fdo#103375]: https://bugs.freedesktop.org/show_bug.cgi?id=103375
  [fdo#109274]: https://bugs.freedesktop.org/show_bug.cgi?id=109274
  [fdo#109279]: https://bugs.freedesktop.org/show_bug.cgi?id=109279
  [fdo#109280]: https://bugs.freedesktop.org/show_bug.cgi?id=109280
  [fdo#109283]: https://bugs.freedesktop.org/show_bug.cgi?id=109283
  [fdo#109285]: https://bugs.freedesktop.org/show_bug.cgi?id=109285
  [fdo#109289]: https://bugs.freedesktop.org/show_bug.cgi?id=109289
  [fdo#109291]: https://bugs.freedesktop.org/show_bug.cgi?id=109291
  [fdo#109295]: https://bugs.freedesktop.org/show_bug.cgi?id=109295
  [fdo#109308]: https://bugs.freedesktop.org/show_bug.cgi?id=109308
  [fdo#109312]: https://bugs.freedesktop.org/show_bug.cgi?id=109312
  [fdo#109315]: https://bugs.freedesktop.org/show_bug.cgi?id=109315
  [fdo#109506]: https://bugs.freedesktop.org/show_bug.cgi?id=109506
  [fdo#109642]: https://bugs.freedesktop.org/show_bug.cgi?id=109642
  [fdo#110189]: https://bugs.freedesktop.org/show_bug.cgi?id=110189
  [fdo#110723]: https://bugs.freedesktop.org/show_bug.cgi?id=110723
  [fdo#111068]: https://bugs.freedesktop.org/show_bug.cgi?id=111068
  [fdo#111614]: https://bugs.freedesktop.org/show_bug.cgi?id=111614
  [fdo#111615]: https://bugs.freedesktop.org/show_bug.cgi?id=111615
  [fdo#111644]: https://bugs.freedesktop.org/show_bug.cgi?id=111644
  [fdo#111656]: https://bugs.freedesktop.org/show_bug.cgi?id=111656
  [fdo#111825]: https://bugs.freedesktop.org/show_bug.cgi?id=111825
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [fdo#112283]: https://bugs.freedesktop.org/show_bug.cgi?id=112283
  [i915#1072]: https://gitlab.freedesktop.org/drm/intel/issues/1072
  [i915#132]: https://gitlab.freedesktop.org/drm/intel/issues/132
  [i915#1397]: https://gitlab.freedesktop.org/drm/intel/issues/1397
  [i915#1722]: https://gitlab.freedesktop.org/drm/intel/issues/1722
  [i915#1825]: https://gitlab.freedesktop.org/drm/intel/issues/1825
  [i915#1839]: https://gitlab.freedesktop.org/drm/intel/issues/1839
  [i915#1845]: https://gitlab.freedesktop.org/drm/intel/issues/1845
  [i915#1849]: https://gitlab.freedesktop.org/drm/intel/issues/1849
  [i915#1902]: https://gitlab.freedesktop.org/drm/intel/issues/1902
  [i915#2437]: https://gitlab.freedesktop.org/drm/intel/issues/2437
  [i915#2527]: https://gitlab.freedesktop.org/drm/intel/issues/2527
  [i915#2575]: https://gitlab.freedesktop.org/drm/intel/issues/2575
  [i915#2582]: https://gitlab.freedesktop.org/drm/intel/issues/2582
  [i915#2587]: https://gitlab.freedesktop.org/drm/intel/issues/2587
  [i915#2658]: https://gitlab.freedesktop.org/drm/intel/issues/2658
  [i915#2672]: https://gitlab.freedesktop.org/drm/intel/issues/2672
  [i915#2681]: https://gitlab.freedesktop.org/drm/intel/issues/2681
  [i915#2705]: https://gitlab.freedesktop.org/drm/intel/issues/2705
  [i915#280]: https://gitlab.freedesktop.org/drm/intel/issues/280
  [i915#2842]: https://gitlab.freedesktop.org/drm/intel/issues/2842
  [i915#2846]: https://gitlab.freedesktop.org/drm/intel/issues/2846
  [i915#2856]: https://gitlab.freedesktop.org/drm/intel/issues/2856
  [i915#2920]: https://gitlab.freedesktop.org/drm/intel/issues/2920
  [i915#3116]: https://gitlab.freedesktop.org/drm/intel/issues/3116
  [i915#3281]: https://gitlab.freedesktop.org/drm/intel/issues/3281
  [i915#3282]: https://gitlab.freedesktop.org/drm/intel/issues/3282
  [i915#3291]: https://gitlab.freedesktop.org/drm/intel/issues/3291
  [i915#3297]: https://gitlab.freedesktop.org/drm/intel/issues/3297
  [i915#3318]: https://gitlab.freedesktop.org/drm/intel/issues/3318
  [i915#3359]: https://gitlab.freedesktop.org/drm/intel/issues/3359
  [i915#3458]: https://gitlab.freedesktop.org/drm/intel/issues/3458
  [i915#3469]: https://gitlab.freedesktop.org/drm/intel/issues/3469
  [i915#3528]: https://gitlab.freedesktop.org/drm/intel/issues/3528
  [i915#3539]: https://gitlab.freedesktop.org/drm/intel/issues/3539
  [i915#3547]: https://gitlab.freedesktop.org/drm/intel/issues/3547
  [i915#3555]: https://gitlab.freedesktop.org/drm/intel/issues/3555
  [i915#3591]: https://gitlab.freedesktop.org/drm/intel/issues/3591
  [i915#3637]: https://gitlab.freedesktop.org/drm/intel/issues/3637
  [i915#3638]: https://gitlab.freedesktop.org/drm/intel/issues/3638
  [i915#3689]: https://gitlab.freedesktop.org/drm/intel/issues/3689
  [i915#3708]: https://gitlab.freedesktop.org/drm/intel/issues/3708
  [i915#3734]: https://gitlab.freedesktop.org/drm/intel/issues/3734
  [i915#3742]: https://gitlab.freedesktop.org/drm/intel/issues/3742
  [i915#3804]: https://gitlab.freedesktop.org/drm/intel/issues/3804
  [i915#3826]: https://gitlab.freedesktop.org/drm/intel/issues/3826
  [i915#3886]: https://gitlab.freedesktop.org/drm/intel/issues/3886
  [i915#3952]: https://gitlab.freedesktop.org/drm/intel/issues/3952
  [i915#404]: https://gitlab.freedesktop.org/drm/intel/issues/404
  [i915#4070]: https://gitlab.freedesktop.org/drm/intel/issues/4070
  [i915#4077]: https://gitlab.freedesktop.org/drm/intel/issues/4077
  [i915#4078]: https://gitlab.freedesktop.org/drm/intel/issues/4078
  [i915#4079]: https://gitlab.freedesktop.org/drm/intel/issues/4079
  [i915#4083]: https://gitlab.freedesktop.org/drm/intel/issues/4083
  [i915#4098]: https://gitlab.freedesktop.org/drm/intel/issues/4098
  [i915#4103]: https://gitlab.freedesktop.org/drm/intel/issues/4103
  [i915#4213]: https://gitlab.freedesktop.org/drm/intel/issues/4213
  [i915#426]: https://gitlab.freedesktop.org/drm/intel/issues/426
  [i915#4270]: https://gitlab.freedesktop.org/drm/intel/issues/4270
  [i915#4281]: https://gitlab.freedesktop.org/drm/intel/issues/4281
  [i915#4387]: https://gitlab.freedesktop.org/drm/intel/issues/4387
  [i915#4538]: https://gitlab.freedesktop.org/drm/intel/issues/4538
  [i915#4565]: https://gitlab.freedesktop.org/drm/intel/issues/4565
  [i915#4613]: https://gitlab.freedesktop.org/drm/intel/issues/4613
  [i915#4771]: https://gitlab.freedesktop.org/drm/intel/issues/4771
  [i915#4812]: https://gitlab.freedesktop.org/drm/intel/issues/4812
  [i915#4833]: https://gitlab.freedesktop.org/drm/intel/issues/4833
  [i915#4852]: https://gitlab.freedesktop.org/drm/intel/issues/4852
  [i915#4860]: https://gitlab.freedesktop.org/drm/intel/issues/4860
  [i915#4877]: https://gitlab.freedesktop.org/drm/intel/issues/4877
  [i915#4879]: https://gitlab.freedesktop.org/drm/intel/issues/4879
  [i915#4880]: https://gitlab.freedesktop.org/drm/intel/issues/4880
  [i915#4881]: https://gitlab.freedesktop.org/drm/intel/issues/4881
  [i915#4884]: https://gitlab.freedesktop.org/drm/intel/issues/4884
  [i915#4885]: https://gitlab.freedesktop.org/drm/intel/issues/4885
  [i915#5176]: https://gitlab.freedesktop.org/drm/intel/issues/5176
  [i915#5235]: https://gitlab.freedesktop.org/drm/intel/issues/5235
  [i915#5286]: https://gitlab.freedesktop.org/drm/intel/issues/5286
  [i915#5289]: https://gitlab.freedesktop.org/drm/intel/issues/5289
  [i915#5325]: https://gitlab.freedesktop.org/drm/intel/issues/5325
  [i915#533]: https://gitlab.freedesktop.org/drm/intel/issues/533
  [i915#5439]: https://gitlab.freedesktop.org/drm/intel/issues/5439
  [i915#5563]: https://gitlab.freedesktop.org/drm/intel/issues/5563
  [i915#5784]: https://gitlab.freedesktop.org/drm/intel/issues/5784
  [i915#6095]: https://gitlab.freedesktop.org/drm/intel/issues/6095
  [i915#6117]: https://gitlab.freedesktop.org/drm/intel/issues/6117
  [i915#6230]: https://gitlab.freedesktop.org/drm/intel/issues/6230
  [i915#6248]: https://gitlab.freedesktop.org/drm/intel/issues/6248
  [i915#6301]: https://gitlab.freedesktop.org/drm/intel/issues/6301
  [i915#6335]: https://gitlab.freedesktop.org/drm/intel/issues/6335
  [i915#6412]: https://gitlab.freedesktop.org/drm/intel/issues/6412
  [i915#6433]: https://gitlab.freedesktop.org/drm/intel/issues/6433
  [i915#6493]: https://gitlab.freedesktop.org/drm/intel/issues/6493
  [i915#6497]: https://gitlab.freedesktop.org/drm/intel/issues/6497
  [i915#658]: https://gitlab.freedesktop.org/drm/intel/issues/658
  [i915#6590]: https://gitlab.freedesktop.org/drm/intel/issues/6590
  [i915#6621]: https://gitlab.freedesktop.org/drm/intel/issues/6621
  [i915#6768]: https://gitlab.freedesktop.org/drm/intel/issues/6768
  [i915#6944]: https://gitlab.freedesktop.org/drm/intel/issues/6944
  [i915#6953]: https://gitlab.freedesktop.org/drm/intel/issues/6953
  [i915#7037]: https://gitlab.freedesktop.org/drm/intel/issues/7037
  [i915#7116]: https://gitlab.freedesktop.org/drm/intel/issues/7116
  [i915#7118]: https://gitlab.freedesktop.org/drm/intel/issues/7118
  [i915#7128]: https://gitlab.freedesktop.org/drm/intel/issues/7128
  [i915#7294]: https://gitlab.freedesktop.org/drm/intel/issues/7294
  [i915#7561]: https://gitlab.freedesktop.org/drm/intel/issues/7561
  [i915#7582]: https://gitlab.freedesktop.org/drm/intel/issues/7582
  [i915#7651]: https://gitlab.freedesktop.org/drm/intel/issues/7651
  [i915#7697]: https://gitlab.freedesktop.org/drm/intel/issues/7697
  [i915#7701]: https://gitlab.freedesktop.org/drm/intel/issues/7701
  [i915#7711]: https://gitlab.freedesktop.org/drm/intel/issues/7711
  [i915#7742]: https://gitlab.freedesktop.org/drm/intel/issues/7742
  [i915#7828]: https://gitlab.freedesktop.org/drm/intel/issues/7828
  [i915#79]: https://gitlab.freedesktop.org/drm/intel/issues/79
  [i915#7949]: https://gitlab.freedesktop.org/drm/intel/issues/7949
  [i915#7957]: https://gitlab.freedesktop.org/drm/intel/issues/7957


Build changes
-------------

  * CI: CI-20190529 -> None
  * IGT: IGT_7143 -> IGTPW_8419

  CI-20190529: 20190529
  CI_DRM_12672: df5c31e78609626a1278ad79305cd408f403ceac @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_8419: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/index.html
  IGT_7143: c7b12dcc460fc2348e1fa7f4dcb791bb82e29e44 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8419/index.html

[-- Attachment #2: Type: text/html, Size: 13058 bytes --]

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [igt-dev] [PATCH i-g-t 0/2] tests/i915/perf: Add stress / race exercises
  2023-01-31  9:17 ` [igt-dev] " Janusz Krzysztofik
@ 2023-01-31 16:19   ` Dixit, Ashutosh
  -1 siblings, 0 replies; 34+ messages in thread
From: Dixit, Ashutosh @ 2023-01-31 16:19 UTC (permalink / raw)
  To: Janusz Krzysztofik
  Cc: igt-dev, Arkadiusz Hiler, intel-gfx, Petri Latvala, Chris Wilson

On Tue, 31 Jan 2023 01:17:29 -0800, Janusz Krzysztofik wrote:
>

Hi Janusz,

> Users reported oopses on list corruptions when using i915 perf with a
> number of concurrently running graphics applications.  That indicates we
> are currently missing some important tests for such scenarios.  Cover
> that gap.

Do these oops etc. have anything to do with perf itself or rather with
persistence or non-persistence not properly supported with GuC? We should
have seen such failures with persistence tests (with GuC) itself so I am
wondering if there's any point of dragging perf into these already muddy
waters? Such failures should be isolated first with other tests without
mixing perf into this IMO.

Thanks.
--
Ashutosh

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 0/2] tests/i915/perf: Add stress / race exercises
@ 2023-01-31 16:19   ` Dixit, Ashutosh
  0 siblings, 0 replies; 34+ messages in thread
From: Dixit, Ashutosh @ 2023-01-31 16:19 UTC (permalink / raw)
  To: Janusz Krzysztofik; +Cc: igt-dev, intel-gfx, Chris Wilson

On Tue, 31 Jan 2023 01:17:29 -0800, Janusz Krzysztofik wrote:
>

Hi Janusz,

> Users reported oopses on list corruptions when using i915 perf with a
> number of concurrently running graphics applications.  That indicates we
> are currently missing some important tests for such scenarios.  Cover
> that gap.

Do these oops etc. have anything to do with perf itself or rather with
persistence or non-persistence not properly supported with GuC? We should
have seen such failures with persistence tests (with GuC) itself so I am
wondering if there's any point of dragging perf into these already muddy
waters? Such failures should be isolated first with other tests without
mixing perf into this IMO.

Thanks.
--
Ashutosh

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [igt-dev] [PATCH i-g-t 0/2] tests/i915/perf: Add stress / race exercises
  2023-01-31 16:19   ` Dixit, Ashutosh
@ 2023-01-31 16:55     ` Dixit, Ashutosh
  -1 siblings, 0 replies; 34+ messages in thread
From: Dixit, Ashutosh @ 2023-01-31 16:55 UTC (permalink / raw)
  To: Janusz Krzysztofik
  Cc: igt-dev, Arkadiusz Hiler, intel-gfx, Petri Latvala, Chris Wilson

On Tue, 31 Jan 2023 08:19:48 -0800, Dixit, Ashutosh wrote:
>
> On Tue, 31 Jan 2023 01:17:29 -0800, Janusz Krzysztofik wrote:
> >
>
> Hi Janusz,
>
> > Users reported oopses on list corruptions when using i915 perf with a
> > number of concurrently running graphics applications.  That indicates we
> > are currently missing some important tests for such scenarios.  Cover
> > that gap.
>
> Do these oops etc. have anything to do with perf itself or rather with
> persistence or non-persistence not properly supported with GuC? We should
> have seen such failures with persistence tests (with GuC) itself so I am
> wondering if there's any point of dragging perf into these already muddy
> waters? Such failures should be isolated first with other tests without
> mixing perf into this IMO.

Basically failures in these tests indicate defects in which subsystem? If
the failures do not indicate defects in perf then these tests should not be
added as perf tests. E.g. if failures indicate defects in GuC subsystem
then they should be added as GuC tests.

Otherwise it gets hard to dispostion bugs which are filed due to these
failures. The bugs come to the wrong team and then have to be sent to the
correct team etc.

Thanks.
--
Ashutosh

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 0/2] tests/i915/perf: Add stress / race exercises
@ 2023-01-31 16:55     ` Dixit, Ashutosh
  0 siblings, 0 replies; 34+ messages in thread
From: Dixit, Ashutosh @ 2023-01-31 16:55 UTC (permalink / raw)
  To: Janusz Krzysztofik; +Cc: igt-dev, intel-gfx, Chris Wilson

On Tue, 31 Jan 2023 08:19:48 -0800, Dixit, Ashutosh wrote:
>
> On Tue, 31 Jan 2023 01:17:29 -0800, Janusz Krzysztofik wrote:
> >
>
> Hi Janusz,
>
> > Users reported oopses on list corruptions when using i915 perf with a
> > number of concurrently running graphics applications.  That indicates we
> > are currently missing some important tests for such scenarios.  Cover
> > that gap.
>
> Do these oops etc. have anything to do with perf itself or rather with
> persistence or non-persistence not properly supported with GuC? We should
> have seen such failures with persistence tests (with GuC) itself so I am
> wondering if there's any point of dragging perf into these already muddy
> waters? Such failures should be isolated first with other tests without
> mixing perf into this IMO.

Basically failures in these tests indicate defects in which subsystem? If
the failures do not indicate defects in perf then these tests should not be
added as perf tests. E.g. if failures indicate defects in GuC subsystem
then they should be added as GuC tests.

Otherwise it gets hard to dispostion bugs which are filed due to these
failures. The bugs come to the wrong team and then have to be sent to the
correct team etc.

Thanks.
--
Ashutosh

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [igt-dev] [PATCH i-g-t 0/2] tests/i915/perf: Add stress / race exercises
  2023-01-31 16:19   ` Dixit, Ashutosh
@ 2023-01-31 17:36     ` Janusz Krzysztofik
  -1 siblings, 0 replies; 34+ messages in thread
From: Janusz Krzysztofik @ 2023-01-31 17:36 UTC (permalink / raw)
  To: Dixit, Ashutosh
  Cc: igt-dev, Arkadiusz Hiler, intel-gfx, Petri Latvala, Chris Wilson

On Tuesday, 31 January 2023 17:19:48 CET Dixit, Ashutosh wrote:
> On Tue, 31 Jan 2023 01:17:29 -0800, Janusz Krzysztofik wrote:
> >
> 
> Hi Janusz,
> 
> > Users reported oopses on list corruptions when using i915 perf with a
> > number of concurrently running graphics applications.  That indicates we
> > are currently missing some important tests for such scenarios.  Cover
> > that gap.
> 
> Do these oops etc. have anything to do with perf itself or rather with
> persistence or non-persistence not properly supported with GuC? 

My root cause analysis has revealed that these list corruptions are actually 
caused by a bug in barrier processing, then no, they are not persistence nor 
GuC related.  For details, please see my preliminary (still a bit buggy, but 
otherwise valid) fix, so far sent only to trybot:
https://patchwork.freedesktop.org/series/113268/

> We should
> have seen such failures with persistence tests (with GuC) itself so I am
> wondering if there's any point of dragging perf into these already muddy
> waters? Such failures should be isolated first with other tests without
> mixing perf into this IMO.

I see your point, but unfortunately things are not that easy.  My 
investigation has lead me to a conclusion that the bug within the barrier 
processing code is now addressed, to some extent, and probably not 
intentionally, by a kind of workaround that makes it really hard to reproduce 
without any interaction from an external user that tries to replace a barrier 
with its own request.  And I can see a very limited number of such users, one 
of them being perf.

The first patch was developed by Chris still before I found the the root cause 
of the issue.  Since the bug seemed strictly perf related at that point in 
time, that's probably why Chris decided to add the new subtest to perf.  As 
such, that subtest is more general than just focused on triggering the list 
corruption bug, and it pretty belongs to perf, I believe.

Since Chris' subtest didn't help in triggering the list corruption, I've 
developed a new subtest that can do it.  Since it is almost identical to the 
one Chris added, I decided to reuse his code, then add my new subtest to perf 
as well.  But maybe you are right that my subtest better fits to another test. 
not perf.  I'll think this over.

I hope this clarifies things for you.

Thanks,
Janusz

> 
> Thanks.
> --
> Ashutosh
> 





^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 0/2] tests/i915/perf: Add stress / race exercises
@ 2023-01-31 17:36     ` Janusz Krzysztofik
  0 siblings, 0 replies; 34+ messages in thread
From: Janusz Krzysztofik @ 2023-01-31 17:36 UTC (permalink / raw)
  To: Dixit, Ashutosh; +Cc: igt-dev, intel-gfx, Chris Wilson

On Tuesday, 31 January 2023 17:19:48 CET Dixit, Ashutosh wrote:
> On Tue, 31 Jan 2023 01:17:29 -0800, Janusz Krzysztofik wrote:
> >
> 
> Hi Janusz,
> 
> > Users reported oopses on list corruptions when using i915 perf with a
> > number of concurrently running graphics applications.  That indicates we
> > are currently missing some important tests for such scenarios.  Cover
> > that gap.
> 
> Do these oops etc. have anything to do with perf itself or rather with
> persistence or non-persistence not properly supported with GuC? 

My root cause analysis has revealed that these list corruptions are actually 
caused by a bug in barrier processing, then no, they are not persistence nor 
GuC related.  For details, please see my preliminary (still a bit buggy, but 
otherwise valid) fix, so far sent only to trybot:
https://patchwork.freedesktop.org/series/113268/

> We should
> have seen such failures with persistence tests (with GuC) itself so I am
> wondering if there's any point of dragging perf into these already muddy
> waters? Such failures should be isolated first with other tests without
> mixing perf into this IMO.

I see your point, but unfortunately things are not that easy.  My 
investigation has lead me to a conclusion that the bug within the barrier 
processing code is now addressed, to some extent, and probably not 
intentionally, by a kind of workaround that makes it really hard to reproduce 
without any interaction from an external user that tries to replace a barrier 
with its own request.  And I can see a very limited number of such users, one 
of them being perf.

The first patch was developed by Chris still before I found the the root cause 
of the issue.  Since the bug seemed strictly perf related at that point in 
time, that's probably why Chris decided to add the new subtest to perf.  As 
such, that subtest is more general than just focused on triggering the list 
corruption bug, and it pretty belongs to perf, I believe.

Since Chris' subtest didn't help in triggering the list corruption, I've 
developed a new subtest that can do it.  Since it is almost identical to the 
one Chris added, I decided to reuse his code, then add my new subtest to perf 
as well.  But maybe you are right that my subtest better fits to another test. 
not perf.  I'll think this over.

I hope this clarifies things for you.

Thanks,
Janusz

> 
> Thanks.
> --
> Ashutosh
> 




^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [igt-dev] [PATCH i-g-t 0/2] tests/i915/perf: Add stress / race exercises
  2023-01-31 16:55     ` Dixit, Ashutosh
@ 2023-01-31 17:56       ` Janusz Krzysztofik
  -1 siblings, 0 replies; 34+ messages in thread
From: Janusz Krzysztofik @ 2023-01-31 17:56 UTC (permalink / raw)
  To: Dixit, Ashutosh
  Cc: igt-dev, Arkadiusz Hiler, intel-gfx, Petri Latvala, Chris Wilson

On Tuesday, 31 January 2023 17:55:54 CET Dixit, Ashutosh wrote:
> On Tue, 31 Jan 2023 08:19:48 -0800, Dixit, Ashutosh wrote:
> >
> > On Tue, 31 Jan 2023 01:17:29 -0800, Janusz Krzysztofik wrote:
> > >
> >
> > Hi Janusz,
> >
> > > Users reported oopses on list corruptions when using i915 perf with a
> > > number of concurrently running graphics applications.  That indicates we
> > > are currently missing some important tests for such scenarios.  Cover
> > > that gap.
> >
> > Do these oops etc. have anything to do with perf itself or rather with
> > persistence or non-persistence not properly supported with GuC? We should
> > have seen such failures with persistence tests (with GuC) itself so I am
> > wondering if there's any point of dragging perf into these already muddy
> > waters? Such failures should be isolated first with other tests without
> > mixing perf into this IMO.
> 
> Basically failures in these tests indicate defects in which subsystem? If
> the failures do not indicate defects in perf then these tests should not be
> added as perf tests. E.g. if failures indicate defects in GuC subsystem
> then they should be added as GuC tests.

But how can a tests know in advance what bugs, in which particular subsystems, 
it is ever going to hit?  If it could, we wouldn't need any root cause 
analysis, only tests telling us which bug from a predicted set was hit.
For me your vision seems to assume an environment without cross-subsystem 
dependencies, where a test is only capable of triggering bugs in a particular 
subsystem and never in others.  That's not possible in reality, I believe, we 
need root cause analysis to tell.

> 
> Otherwise it gets hard to dispostion bugs which are filed due to these
> failures. The bugs come to the wrong team and then have to be sent to the
> correct team etc.

In my opinion, all parties, whether validation, or bug filling, or 
development, must do their job with care.  Assigning bugs to teams by test 
name, not by a signature of the issue found in test output or system logs, 
doesn't seem to be the best practice to me.

Thanks,
Janusz

> 
> Thanks.
> --
> Ashutosh
> 





^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 0/2] tests/i915/perf: Add stress / race exercises
@ 2023-01-31 17:56       ` Janusz Krzysztofik
  0 siblings, 0 replies; 34+ messages in thread
From: Janusz Krzysztofik @ 2023-01-31 17:56 UTC (permalink / raw)
  To: Dixit, Ashutosh; +Cc: igt-dev, intel-gfx, Chris Wilson

On Tuesday, 31 January 2023 17:55:54 CET Dixit, Ashutosh wrote:
> On Tue, 31 Jan 2023 08:19:48 -0800, Dixit, Ashutosh wrote:
> >
> > On Tue, 31 Jan 2023 01:17:29 -0800, Janusz Krzysztofik wrote:
> > >
> >
> > Hi Janusz,
> >
> > > Users reported oopses on list corruptions when using i915 perf with a
> > > number of concurrently running graphics applications.  That indicates we
> > > are currently missing some important tests for such scenarios.  Cover
> > > that gap.
> >
> > Do these oops etc. have anything to do with perf itself or rather with
> > persistence or non-persistence not properly supported with GuC? We should
> > have seen such failures with persistence tests (with GuC) itself so I am
> > wondering if there's any point of dragging perf into these already muddy
> > waters? Such failures should be isolated first with other tests without
> > mixing perf into this IMO.
> 
> Basically failures in these tests indicate defects in which subsystem? If
> the failures do not indicate defects in perf then these tests should not be
> added as perf tests. E.g. if failures indicate defects in GuC subsystem
> then they should be added as GuC tests.

But how can a tests know in advance what bugs, in which particular subsystems, 
it is ever going to hit?  If it could, we wouldn't need any root cause 
analysis, only tests telling us which bug from a predicted set was hit.
For me your vision seems to assume an environment without cross-subsystem 
dependencies, where a test is only capable of triggering bugs in a particular 
subsystem and never in others.  That's not possible in reality, I believe, we 
need root cause analysis to tell.

> 
> Otherwise it gets hard to dispostion bugs which are filed due to these
> failures. The bugs come to the wrong team and then have to be sent to the
> correct team etc.

In my opinion, all parties, whether validation, or bug filling, or 
development, must do their job with care.  Assigning bugs to teams by test 
name, not by a signature of the issue found in test output or system logs, 
doesn't seem to be the best practice to me.

Thanks,
Janusz

> 
> Thanks.
> --
> Ashutosh
> 




^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [igt-dev] [PATCH i-g-t 0/2] tests/i915/perf: Add stress / race exercises
  2023-01-31 17:36     ` Janusz Krzysztofik
@ 2023-01-31 18:36       ` Dixit, Ashutosh
  -1 siblings, 0 replies; 34+ messages in thread
From: Dixit, Ashutosh @ 2023-01-31 18:36 UTC (permalink / raw)
  To: Janusz Krzysztofik
  Cc: igt-dev, Arkadiusz Hiler, intel-gfx, Petri Latvala, Chris Wilson

On Tue, 31 Jan 2023 09:36:30 -0800, Janusz Krzysztofik wrote:
>
> Since Chris' subtest didn't help in triggering the list corruption, I've
> developed a new subtest that can do it.  Since it is almost identical to the
> one Chris added, I decided to reuse his code, then add my new subtest to perf
> as well.  But maybe you are right that my subtest better fits to another test.
> not perf.  I'll think this over.
>
> I hope this clarifies things for you.

Thanks Janusz. Of course the test is valid but because it is not triggering
bugs in perf perhaps it is better added to a different IGT file. Maybe
gem_ctx_persistence? Maybe perf is also ok. Anyway something to think
about.

Thanks.
--
Ashutosh

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 0/2] tests/i915/perf: Add stress / race exercises
@ 2023-01-31 18:36       ` Dixit, Ashutosh
  0 siblings, 0 replies; 34+ messages in thread
From: Dixit, Ashutosh @ 2023-01-31 18:36 UTC (permalink / raw)
  To: Janusz Krzysztofik; +Cc: igt-dev, intel-gfx, Chris Wilson

On Tue, 31 Jan 2023 09:36:30 -0800, Janusz Krzysztofik wrote:
>
> Since Chris' subtest didn't help in triggering the list corruption, I've
> developed a new subtest that can do it.  Since it is almost identical to the
> one Chris added, I decided to reuse his code, then add my new subtest to perf
> as well.  But maybe you are right that my subtest better fits to another test.
> not perf.  I'll think this over.
>
> I hope this clarifies things for you.

Thanks Janusz. Of course the test is valid but because it is not triggering
bugs in perf perhaps it is better added to a different IGT file. Maybe
gem_ctx_persistence? Maybe perf is also ok. Anyway something to think
about.

Thanks.
--
Ashutosh

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [igt-dev] [PATCH i-g-t 0/2] tests/i915/perf: Add stress / race exercises
  2023-01-31 18:36       ` Dixit, Ashutosh
@ 2023-02-01 15:51         ` Janusz Krzysztofik
  -1 siblings, 0 replies; 34+ messages in thread
From: Janusz Krzysztofik @ 2023-02-01 15:51 UTC (permalink / raw)
  To: Dixit, Ashutosh
  Cc: igt-dev, Arkadiusz Hiler, intel-gfx, Petri Latvala, Chris Wilson

Hi Ashutosh,

On Tuesday, 31 January 2023 19:36:50 CET Dixit, Ashutosh wrote:
> On Tue, 31 Jan 2023 09:36:30 -0800, Janusz Krzysztofik wrote:
> >
> > Since Chris' subtest didn't help in triggering the list corruption, I've
> > developed a new subtest that can do it.  Since it is almost identical to the
> > one Chris added, I decided to reuse his code, then add my new subtest to perf
> > as well.  But maybe you are right that my subtest better fits to another test.
> > not perf.  I'll think this over.
> >
> > I hope this clarifies things for you.
> 
> Thanks Janusz. Of course the test is valid but because it is not triggering
> bugs in perf perhaps it is better added to a different IGT file. Maybe
> gem_ctx_persistence? Maybe perf is also ok. Anyway something to think
> about.

I raised this point on today's IGT workroup meeting, asking for opinion.  
Since the new subtests depend on perf, we agreed that what needs to be done 
first is to provide an IGT library with perf helpers.  As soon as it is 
available, other tests can use it if needed.  Since I'm not familiar with 
perf, and I have other tasks in my queue, I've no time to spend on learning 
perf and working on that library.  Then, for now I'll keep the new subtests 
inside the perf test.  We can move them elsewhere at a later time, when the 
library is ready.

Thanks,
Janusz

> 
> Thanks.
> --
> Ashutosh
> 





^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 0/2] tests/i915/perf: Add stress / race exercises
@ 2023-02-01 15:51         ` Janusz Krzysztofik
  0 siblings, 0 replies; 34+ messages in thread
From: Janusz Krzysztofik @ 2023-02-01 15:51 UTC (permalink / raw)
  To: Dixit, Ashutosh; +Cc: igt-dev, intel-gfx, Chris Wilson

Hi Ashutosh,

On Tuesday, 31 January 2023 19:36:50 CET Dixit, Ashutosh wrote:
> On Tue, 31 Jan 2023 09:36:30 -0800, Janusz Krzysztofik wrote:
> >
> > Since Chris' subtest didn't help in triggering the list corruption, I've
> > developed a new subtest that can do it.  Since it is almost identical to the
> > one Chris added, I decided to reuse his code, then add my new subtest to perf
> > as well.  But maybe you are right that my subtest better fits to another test.
> > not perf.  I'll think this over.
> >
> > I hope this clarifies things for you.
> 
> Thanks Janusz. Of course the test is valid but because it is not triggering
> bugs in perf perhaps it is better added to a different IGT file. Maybe
> gem_ctx_persistence? Maybe perf is also ok. Anyway something to think
> about.

I raised this point on today's IGT workroup meeting, asking for opinion.  
Since the new subtests depend on perf, we agreed that what needs to be done 
first is to provide an IGT library with perf helpers.  As soon as it is 
available, other tests can use it if needed.  Since I'm not familiar with 
perf, and I have other tasks in my queue, I've no time to spend on learning 
perf and working on that library.  Then, for now I'll keep the new subtests 
inside the perf test.  We can move them elsewhere at a later time, when the 
library is ready.

Thanks,
Janusz

> 
> Thanks.
> --
> Ashutosh
> 




^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH i-g-t 2/2] tests/i915/perf: Exercise barrier race
  2023-01-31  9:17   ` [igt-dev] " Janusz Krzysztofik
@ 2023-02-01 18:21     ` Kamil Konieczny
  -1 siblings, 0 replies; 34+ messages in thread
From: Kamil Konieczny @ 2023-02-01 18:21 UTC (permalink / raw)
  To: igt-dev; +Cc: Arkadiusz Hiler, intel-gfx, Petri Latvala, Chris Wilson

Hi Janusz,

please send patches to igt ML and add other addresses to cc:
I have one nit, see below.

On 2023-01-31 at 10:17:31 +0100, Janusz Krzysztofik wrote:
> Add a new subtest focused on exercising interaction between perf open /
> close operations, which replace active barriers with perf requests, and
> concurrent barrier preallocate / acquire operations performed during
> context first pin / last unpin.
> 
> References: https://gitlab.freedesktop.org/drm/intel/-/issues/6333
> Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
> Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
> ---
>  tests/i915/perf.c | 41 +++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 39 insertions(+), 2 deletions(-)
> 
> diff --git a/tests/i915/perf.c b/tests/i915/perf.c
> index e33cacc443..11a3ec21ab 100644
> --- a/tests/i915/perf.c
> +++ b/tests/i915/perf.c
> @@ -39,6 +39,7 @@
>  #include <math.h>
>  
>  #include "i915/gem.h"
> +#include "i915/gem_create.h"
>  #include "i915/perf.h"
>  #include "igt.h"
>  #include "igt_perf.h"
> @@ -4885,7 +4886,27 @@ test_whitelisted_registers_userspace_config(void)
>  	i915_perf_remove_config(drm_fd, config_id);
>  }
>  
> -static void test_open_race(const struct intel_execution_engine2 *e, int timeout)
> +static void gem_exec_nop(int i915, const struct intel_execution_engine2 *e)
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	struct drm_i915_gem_exec_object2 obj = { };
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +	};
> +
> +	obj.handle = gem_create(i915, 4096);
> +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +
> +	execbuf.flags = e->flags;
> +	gem_execbuf(i915, &execbuf);
> +
> +	gem_sync(i915, obj.handle);
> +	gem_close(i915, obj.handle);
> +}
> +
> +static void test_open_race(const struct intel_execution_engine2 *e, int timeout,
> +			   bool use_spin)
-------------------------- ^
This is not the best way to develop new code, it may be good
for fast development but imho it is better to refactor existing
code and avoiding to add bool logic into function.
It can be done later as this is revealing the bug.

>  {
>  	int *done;
>  
> @@ -4926,6 +4947,12 @@ static void test_open_race(const struct intel_execution_engine2 *e, int timeout)
>  				ctx = gem_context_create_for_engine(i915, e->class, e->instance);
>  				gem_context_set_persistence(i915, ctx, persistence);
>  
> +				if (!use_spin) {
> +					gem_exec_nop(i915, e);
> +					gem_context_destroy(i915, ctx);
> +					continue;
> +				}
> +
>  				spin = __igt_spin_new(i915, ctx, .ahnd = ahnd);
>  				for (int i = random() % 7; i--; ) {
>  					if (random() & 1) {
> @@ -5330,7 +5357,17 @@ igt_main
>  		for_each_physical_engine(drm_fd, e)
>  			if (e->class == I915_ENGINE_CLASS_RENDER)
>  				igt_dynamic_f("%s", e->name)
> -					test_open_race(e, 5);
> +					test_open_race(e, 5, true);
> +	}
> +

Please add here some TODO comments from discussions and a note
which will help bug filling team to classify that.

Regards,
Kamil

> +	igt_describe("Exercise perf open/close against intensive barrier preallocate/acquire");
> +	igt_subtest_with_dynamic("barrier-race") {
> +		const struct intel_execution_engine2 *e;
> +
> +		for_each_physical_engine(drm_fd, e)
> +			if (e->class == I915_ENGINE_CLASS_RENDER)
> +				igt_dynamic_f("%s", e->name)
> +					test_open_race(e, 5, false);
>  	}
>  
>  	igt_fixture {
> -- 
> 2.25.1
> 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 2/2] tests/i915/perf: Exercise barrier race
@ 2023-02-01 18:21     ` Kamil Konieczny
  0 siblings, 0 replies; 34+ messages in thread
From: Kamil Konieczny @ 2023-02-01 18:21 UTC (permalink / raw)
  To: igt-dev; +Cc: intel-gfx, Chris Wilson

Hi Janusz,

please send patches to igt ML and add other addresses to cc:
I have one nit, see below.

On 2023-01-31 at 10:17:31 +0100, Janusz Krzysztofik wrote:
> Add a new subtest focused on exercising interaction between perf open /
> close operations, which replace active barriers with perf requests, and
> concurrent barrier preallocate / acquire operations performed during
> context first pin / last unpin.
> 
> References: https://gitlab.freedesktop.org/drm/intel/-/issues/6333
> Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
> Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
> ---
>  tests/i915/perf.c | 41 +++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 39 insertions(+), 2 deletions(-)
> 
> diff --git a/tests/i915/perf.c b/tests/i915/perf.c
> index e33cacc443..11a3ec21ab 100644
> --- a/tests/i915/perf.c
> +++ b/tests/i915/perf.c
> @@ -39,6 +39,7 @@
>  #include <math.h>
>  
>  #include "i915/gem.h"
> +#include "i915/gem_create.h"
>  #include "i915/perf.h"
>  #include "igt.h"
>  #include "igt_perf.h"
> @@ -4885,7 +4886,27 @@ test_whitelisted_registers_userspace_config(void)
>  	i915_perf_remove_config(drm_fd, config_id);
>  }
>  
> -static void test_open_race(const struct intel_execution_engine2 *e, int timeout)
> +static void gem_exec_nop(int i915, const struct intel_execution_engine2 *e)
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	struct drm_i915_gem_exec_object2 obj = { };
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +	};
> +
> +	obj.handle = gem_create(i915, 4096);
> +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +
> +	execbuf.flags = e->flags;
> +	gem_execbuf(i915, &execbuf);
> +
> +	gem_sync(i915, obj.handle);
> +	gem_close(i915, obj.handle);
> +}
> +
> +static void test_open_race(const struct intel_execution_engine2 *e, int timeout,
> +			   bool use_spin)
-------------------------- ^
This is not the best way to develop new code, it may be good
for fast development but imho it is better to refactor existing
code and avoiding to add bool logic into function.
It can be done later as this is revealing the bug.

>  {
>  	int *done;
>  
> @@ -4926,6 +4947,12 @@ static void test_open_race(const struct intel_execution_engine2 *e, int timeout)
>  				ctx = gem_context_create_for_engine(i915, e->class, e->instance);
>  				gem_context_set_persistence(i915, ctx, persistence);
>  
> +				if (!use_spin) {
> +					gem_exec_nop(i915, e);
> +					gem_context_destroy(i915, ctx);
> +					continue;
> +				}
> +
>  				spin = __igt_spin_new(i915, ctx, .ahnd = ahnd);
>  				for (int i = random() % 7; i--; ) {
>  					if (random() & 1) {
> @@ -5330,7 +5357,17 @@ igt_main
>  		for_each_physical_engine(drm_fd, e)
>  			if (e->class == I915_ENGINE_CLASS_RENDER)
>  				igt_dynamic_f("%s", e->name)
> -					test_open_race(e, 5);
> +					test_open_race(e, 5, true);
> +	}
> +

Please add here some TODO comments from discussions and a note
which will help bug filling team to classify that.

Regards,
Kamil

> +	igt_describe("Exercise perf open/close against intensive barrier preallocate/acquire");
> +	igt_subtest_with_dynamic("barrier-race") {
> +		const struct intel_execution_engine2 *e;
> +
> +		for_each_physical_engine(drm_fd, e)
> +			if (e->class == I915_ENGINE_CLASS_RENDER)
> +				igt_dynamic_f("%s", e->name)
> +					test_open_race(e, 5, false);
>  	}
>  
>  	igt_fixture {
> -- 
> 2.25.1
> 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH i-g-t 2/2] tests/i915/perf: Exercise barrier race
  2023-02-01 18:21     ` [igt-dev] " Kamil Konieczny
@ 2023-02-01 19:16       ` Janusz Krzysztofik
  -1 siblings, 0 replies; 34+ messages in thread
From: Janusz Krzysztofik @ 2023-02-01 19:16 UTC (permalink / raw)
  To: Kamil Konieczny
  Cc: Arkadiusz Hiler, intel-gfx, igt-dev, Petri Latvala, Chris Wilson

Hi Kamil,

On Wednesday, 1 February 2023 19:21:57 CET Kamil Konieczny wrote:
> Hi Janusz,
> 
> please send patches to igt ML and add other addresses to cc:
> I have one nit, see below.
> 
> On 2023-01-31 at 10:17:31 +0100, Janusz Krzysztofik wrote:
> > Add a new subtest focused on exercising interaction between perf open /
> > close operations, which replace active barriers with perf requests, and
> > concurrent barrier preallocate / acquire operations performed during
> > context first pin / last unpin.
> > 
> > References: https://gitlab.freedesktop.org/drm/intel/-/issues/6333
> > Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
> > Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
> > ---
> >  tests/i915/perf.c | 41 +++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 39 insertions(+), 2 deletions(-)
> > 
> > diff --git a/tests/i915/perf.c b/tests/i915/perf.c
> > index e33cacc443..11a3ec21ab 100644
> > --- a/tests/i915/perf.c
> > +++ b/tests/i915/perf.c
> > @@ -39,6 +39,7 @@
> >  #include <math.h>
> >  
> >  #include "i915/gem.h"
> > +#include "i915/gem_create.h"
> >  #include "i915/perf.h"
> >  #include "igt.h"
> >  #include "igt_perf.h"
> > @@ -4885,7 +4886,27 @@ test_whitelisted_registers_userspace_config(void)
> >  	i915_perf_remove_config(drm_fd, config_id);
> >  }
> >  
> > -static void test_open_race(const struct intel_execution_engine2 *e, int timeout)
> > +static void gem_exec_nop(int i915, const struct intel_execution_engine2 *e)
> > +{
> > +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +	struct drm_i915_gem_exec_object2 obj = { };
> > +	struct drm_i915_gem_execbuffer2 execbuf = {
> > +		.buffers_ptr = to_user_pointer(&obj),
> > +		.buffer_count = 1,
> > +	};
> > +
> > +	obj.handle = gem_create(i915, 4096);
> > +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> > +
> > +	execbuf.flags = e->flags;
> > +	gem_execbuf(i915, &execbuf);
> > +
> > +	gem_sync(i915, obj.handle);
> > +	gem_close(i915, obj.handle);
> > +}
> > +
> > +static void test_open_race(const struct intel_execution_engine2 *e, int timeout,
> > +			   bool use_spin)
> -------------------------- ^
> This is not the best way to develop new code, it may be good
> for fast development but imho it is better to refactor existing
> code and avoiding to add bool logic into function.

I'm not sure what you mean.  By refactoring, do you mean moving the code 
common to both processing paths out to a separate helper, then call it from 
two distinct functions, each implementing only one scenario?  What's wrong 
with passing flags that select one of alternative processing paths within one 
function?  Or are you just not happy with bool type?

> It can be done later as this is revealing the bug.
> 
> >  {
> >  	int *done;
> >  
> > @@ -4926,6 +4947,12 @@ static void test_open_race(const struct intel_execution_engine2 *e, int timeout)
> >  				ctx = gem_context_create_for_engine(i915, e->class, e->instance);
> >  				gem_context_set_persistence(i915, ctx, persistence);
> >  
> > +				if (!use_spin) {
> > +					gem_exec_nop(i915, e);
> > +					gem_context_destroy(i915, ctx);
> > +					continue;
> > +				}
> > +
> >  				spin = __igt_spin_new(i915, ctx, .ahnd = ahnd);
> >  				for (int i = random() % 7; i--; ) {
> >  					if (random() & 1) {
> > @@ -5330,7 +5357,17 @@ igt_main
> >  		for_each_physical_engine(drm_fd, e)
> >  			if (e->class == I915_ENGINE_CLASS_RENDER)
> >  				igt_dynamic_f("%s", e->name)
> > -					test_open_race(e, 5);
> > +					test_open_race(e, 5, true);
> > +	}
> > +
> 
> Please add here some TODO comments from discussions and a note
> which will help bug filling team to classify that.

TODO about moving the subtest to a different test -- OK.  But instructions for 
Bug Filling team?  Hmm, what if this subtest triggers a completely different 
bug in the future?  Are we going to update the comments then?  Do Bug Filling 
team members really read the sources while classifying bugs?

Please have another look at an example of expected dmesg from the currently 
triggered bug:

<4> [192.634015] list_del corruption. prev->next should be ffff8881479d34e0, but was ffff888108af4ce0. (prev=ffff888127335160)
<4> [192.634032] WARNING: CPU: 1 PID: 1286 at lib/list_debug.c:59 __list_del_entry_valid+0xb7/0xe0
<4> [192.634041] Modules linked in: vgem drm_shmem_helper fuse snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio x86_pkg_temp_thermal coretemp i915 prime_numbers i2c_algo_bit kvm_intel ttm snd_hda_intel snd_intel_dspcfg kvm drm_buddy mei_pxp snd_hda_codec smsc75xx irqbypass mei_hdcp e1000e drm_display_helper usbnet snd_hwdep crct10dif_pclmul wmi_bmof mii crc32_pclmul ghash_clmulni_intel igc snd_hda_core drm_kms_helper ptp mei_me i2c_i801 syscopyarea snd_pcm pps_core sysfillrect i2c_smbus mei sysimgblt video intel_lpss_pci wmi
<4> [192.634156] CPU: 1 PID: 1286 Comm: perf Not tainted 6.2.0-rc6-CI_DRM_12672-gdf5c31e78609+ #1
<4> [192.634161] Hardware name: Intel Corporation Rocket Lake Client Platform/RocketLake S SODIMM RVP, BIOS RKLSFWI1.R00.1435.A00.2010232019 10/23/2020
<4> [192.634165] RIP: 0010:__list_del_entry_valid+0xb7/0xe0
<4> [192.634170] Code: cc cc cc 48 89 ca 48 c7 c7 78 00 46 82 e8 0d 7e 61 00 0f 0b 31 c0 c3 cc cc cc cc 4c 89 c2 48 c7 c7 b0 00 46 82 e8 f5 7d 61 00 <0f> 0b 31 c0 c3 cc cc cc cc 48 89 d1 4c 89 c6 4c 89 ca 48 c7 c7 f8
<4> [192.634174] RSP: 0018:ffffc90001f2fcb0 EFLAGS: 00010082
<4> [192.634180] RAX: 0000000000000000 RBX: ffff88812f8dec40 RCX: 0000000000000000
<4> [192.634184] RDX: 0000000000000003 RSI: ffffffff823d7c88 RDI: 00000000ffffffff
<4> [192.634187] RBP: ffff8881479d34d8 R08: 0000000000000000 R09: ffffc90001f2fb60
<4> [192.634191] R10: 0000000000000001 R11: 0000000000000001 R12: ffff88812f8df440
<4> [192.634194] R13: ffff88812f8df440 R14: ffff8881479d34e0 R15: 0000000000000246
<4> [192.634197] FS:  00007fa38f9514c0(0000) GS:ffff888450080000(0000) knlGS:0000000000000000
<4> [192.634201] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4> [192.634205] CR2: 00007fa392c76000 CR3: 000000010809e003 CR4: 0000000000770ee0
<4> [192.634208] PKRU: 55555554
<4> [192.634211] Call Trace:
<4> [192.634214]  <TASK>
<4> [192.634218]  __i915_active_fence_set+0x71/0xf0 [i915]
<4> [192.634412]  __i915_request_commit+0x2e1/0x5b0 [i915]
<4> [192.634597]  i915_request_add+0xa6/0x330 [i915]
<4> [192.634783]  gen8_modify_context+0xc2/0x220 [i915]
<4> [192.634958]  oa_configure_all_contexts.isra.0+0x183/0x400 [i915]
<4> [192.635137]  gen12_disable_metric_set+0x98/0x160 [i915]
<4> [192.635305]  i915_oa_stream_destroy+0x3c/0x330 [i915]
<4> [192.635480]  i915_perf_destroy_locked+0x22/0x80 [i915]
<4> [192.635646]  i915_perf_release+0x35/0x50 [i915]
<4> [192.635808]  __fput+0x95/0x260

Isn't such information from actual bug occurrence, contained in CI reports the 
Bug Filling team is working with, more meaningful than any comment added to 
the test source code?  How can a test know in advance what bugs it will ever 
trigger, and who exactly will be responsible for fixing them?

Thanks,
Janusz

> 
> Regards,
> Kamil
> 
> > +	igt_describe("Exercise perf open/close against intensive barrier preallocate/acquire");
> > +	igt_subtest_with_dynamic("barrier-race") {
> > +		const struct intel_execution_engine2 *e;
> > +
> > +		for_each_physical_engine(drm_fd, e)
> > +			if (e->class == I915_ENGINE_CLASS_RENDER)
> > +				igt_dynamic_f("%s", e->name)
> > +					test_open_race(e, 5, false);
> >  	}
> >  
> >  	igt_fixture {
> 





^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 2/2] tests/i915/perf: Exercise barrier race
@ 2023-02-01 19:16       ` Janusz Krzysztofik
  0 siblings, 0 replies; 34+ messages in thread
From: Janusz Krzysztofik @ 2023-02-01 19:16 UTC (permalink / raw)
  To: Kamil Konieczny; +Cc: intel-gfx, igt-dev, Chris Wilson

Hi Kamil,

On Wednesday, 1 February 2023 19:21:57 CET Kamil Konieczny wrote:
> Hi Janusz,
> 
> please send patches to igt ML and add other addresses to cc:
> I have one nit, see below.
> 
> On 2023-01-31 at 10:17:31 +0100, Janusz Krzysztofik wrote:
> > Add a new subtest focused on exercising interaction between perf open /
> > close operations, which replace active barriers with perf requests, and
> > concurrent barrier preallocate / acquire operations performed during
> > context first pin / last unpin.
> > 
> > References: https://gitlab.freedesktop.org/drm/intel/-/issues/6333
> > Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
> > Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
> > ---
> >  tests/i915/perf.c | 41 +++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 39 insertions(+), 2 deletions(-)
> > 
> > diff --git a/tests/i915/perf.c b/tests/i915/perf.c
> > index e33cacc443..11a3ec21ab 100644
> > --- a/tests/i915/perf.c
> > +++ b/tests/i915/perf.c
> > @@ -39,6 +39,7 @@
> >  #include <math.h>
> >  
> >  #include "i915/gem.h"
> > +#include "i915/gem_create.h"
> >  #include "i915/perf.h"
> >  #include "igt.h"
> >  #include "igt_perf.h"
> > @@ -4885,7 +4886,27 @@ test_whitelisted_registers_userspace_config(void)
> >  	i915_perf_remove_config(drm_fd, config_id);
> >  }
> >  
> > -static void test_open_race(const struct intel_execution_engine2 *e, int timeout)
> > +static void gem_exec_nop(int i915, const struct intel_execution_engine2 *e)
> > +{
> > +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +	struct drm_i915_gem_exec_object2 obj = { };
> > +	struct drm_i915_gem_execbuffer2 execbuf = {
> > +		.buffers_ptr = to_user_pointer(&obj),
> > +		.buffer_count = 1,
> > +	};
> > +
> > +	obj.handle = gem_create(i915, 4096);
> > +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> > +
> > +	execbuf.flags = e->flags;
> > +	gem_execbuf(i915, &execbuf);
> > +
> > +	gem_sync(i915, obj.handle);
> > +	gem_close(i915, obj.handle);
> > +}
> > +
> > +static void test_open_race(const struct intel_execution_engine2 *e, int timeout,
> > +			   bool use_spin)
> -------------------------- ^
> This is not the best way to develop new code, it may be good
> for fast development but imho it is better to refactor existing
> code and avoiding to add bool logic into function.

I'm not sure what you mean.  By refactoring, do you mean moving the code 
common to both processing paths out to a separate helper, then call it from 
two distinct functions, each implementing only one scenario?  What's wrong 
with passing flags that select one of alternative processing paths within one 
function?  Or are you just not happy with bool type?

> It can be done later as this is revealing the bug.
> 
> >  {
> >  	int *done;
> >  
> > @@ -4926,6 +4947,12 @@ static void test_open_race(const struct intel_execution_engine2 *e, int timeout)
> >  				ctx = gem_context_create_for_engine(i915, e->class, e->instance);
> >  				gem_context_set_persistence(i915, ctx, persistence);
> >  
> > +				if (!use_spin) {
> > +					gem_exec_nop(i915, e);
> > +					gem_context_destroy(i915, ctx);
> > +					continue;
> > +				}
> > +
> >  				spin = __igt_spin_new(i915, ctx, .ahnd = ahnd);
> >  				for (int i = random() % 7; i--; ) {
> >  					if (random() & 1) {
> > @@ -5330,7 +5357,17 @@ igt_main
> >  		for_each_physical_engine(drm_fd, e)
> >  			if (e->class == I915_ENGINE_CLASS_RENDER)
> >  				igt_dynamic_f("%s", e->name)
> > -					test_open_race(e, 5);
> > +					test_open_race(e, 5, true);
> > +	}
> > +
> 
> Please add here some TODO comments from discussions and a note
> which will help bug filling team to classify that.

TODO about moving the subtest to a different test -- OK.  But instructions for 
Bug Filling team?  Hmm, what if this subtest triggers a completely different 
bug in the future?  Are we going to update the comments then?  Do Bug Filling 
team members really read the sources while classifying bugs?

Please have another look at an example of expected dmesg from the currently 
triggered bug:

<4> [192.634015] list_del corruption. prev->next should be ffff8881479d34e0, but was ffff888108af4ce0. (prev=ffff888127335160)
<4> [192.634032] WARNING: CPU: 1 PID: 1286 at lib/list_debug.c:59 __list_del_entry_valid+0xb7/0xe0
<4> [192.634041] Modules linked in: vgem drm_shmem_helper fuse snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio x86_pkg_temp_thermal coretemp i915 prime_numbers i2c_algo_bit kvm_intel ttm snd_hda_intel snd_intel_dspcfg kvm drm_buddy mei_pxp snd_hda_codec smsc75xx irqbypass mei_hdcp e1000e drm_display_helper usbnet snd_hwdep crct10dif_pclmul wmi_bmof mii crc32_pclmul ghash_clmulni_intel igc snd_hda_core drm_kms_helper ptp mei_me i2c_i801 syscopyarea snd_pcm pps_core sysfillrect i2c_smbus mei sysimgblt video intel_lpss_pci wmi
<4> [192.634156] CPU: 1 PID: 1286 Comm: perf Not tainted 6.2.0-rc6-CI_DRM_12672-gdf5c31e78609+ #1
<4> [192.634161] Hardware name: Intel Corporation Rocket Lake Client Platform/RocketLake S SODIMM RVP, BIOS RKLSFWI1.R00.1435.A00.2010232019 10/23/2020
<4> [192.634165] RIP: 0010:__list_del_entry_valid+0xb7/0xe0
<4> [192.634170] Code: cc cc cc 48 89 ca 48 c7 c7 78 00 46 82 e8 0d 7e 61 00 0f 0b 31 c0 c3 cc cc cc cc 4c 89 c2 48 c7 c7 b0 00 46 82 e8 f5 7d 61 00 <0f> 0b 31 c0 c3 cc cc cc cc 48 89 d1 4c 89 c6 4c 89 ca 48 c7 c7 f8
<4> [192.634174] RSP: 0018:ffffc90001f2fcb0 EFLAGS: 00010082
<4> [192.634180] RAX: 0000000000000000 RBX: ffff88812f8dec40 RCX: 0000000000000000
<4> [192.634184] RDX: 0000000000000003 RSI: ffffffff823d7c88 RDI: 00000000ffffffff
<4> [192.634187] RBP: ffff8881479d34d8 R08: 0000000000000000 R09: ffffc90001f2fb60
<4> [192.634191] R10: 0000000000000001 R11: 0000000000000001 R12: ffff88812f8df440
<4> [192.634194] R13: ffff88812f8df440 R14: ffff8881479d34e0 R15: 0000000000000246
<4> [192.634197] FS:  00007fa38f9514c0(0000) GS:ffff888450080000(0000) knlGS:0000000000000000
<4> [192.634201] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4> [192.634205] CR2: 00007fa392c76000 CR3: 000000010809e003 CR4: 0000000000770ee0
<4> [192.634208] PKRU: 55555554
<4> [192.634211] Call Trace:
<4> [192.634214]  <TASK>
<4> [192.634218]  __i915_active_fence_set+0x71/0xf0 [i915]
<4> [192.634412]  __i915_request_commit+0x2e1/0x5b0 [i915]
<4> [192.634597]  i915_request_add+0xa6/0x330 [i915]
<4> [192.634783]  gen8_modify_context+0xc2/0x220 [i915]
<4> [192.634958]  oa_configure_all_contexts.isra.0+0x183/0x400 [i915]
<4> [192.635137]  gen12_disable_metric_set+0x98/0x160 [i915]
<4> [192.635305]  i915_oa_stream_destroy+0x3c/0x330 [i915]
<4> [192.635480]  i915_perf_destroy_locked+0x22/0x80 [i915]
<4> [192.635646]  i915_perf_release+0x35/0x50 [i915]
<4> [192.635808]  __fput+0x95/0x260

Isn't such information from actual bug occurrence, contained in CI reports the 
Bug Filling team is working with, more meaningful than any comment added to 
the test source code?  How can a test know in advance what bugs it will ever 
trigger, and who exactly will be responsible for fixing them?

Thanks,
Janusz

> 
> Regards,
> Kamil
> 
> > +	igt_describe("Exercise perf open/close against intensive barrier preallocate/acquire");
> > +	igt_subtest_with_dynamic("barrier-race") {
> > +		const struct intel_execution_engine2 *e;
> > +
> > +		for_each_physical_engine(drm_fd, e)
> > +			if (e->class == I915_ENGINE_CLASS_RENDER)
> > +				igt_dynamic_f("%s", e->name)
> > +					test_open_race(e, 5, false);
> >  	}
> >  
> >  	igt_fixture {
> 




^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [PATCH i-g-t 2/2] tests/i915/perf: Exercise barrier race
  2023-02-01 19:16       ` [igt-dev] " Janusz Krzysztofik
@ 2023-02-02 10:19         ` Kamil Konieczny
  -1 siblings, 0 replies; 34+ messages in thread
From: Kamil Konieczny @ 2023-02-02 10:19 UTC (permalink / raw)
  To: igt-dev; +Cc: Arkadiusz Hiler, intel-gfx, Petri Latvala, Chris Wilson

Hi Janusz,

On 2023-02-01 at 20:16:11 +0100, Janusz Krzysztofik wrote:
> Hi Kamil,
> 
> On Wednesday, 1 February 2023 19:21:57 CET Kamil Konieczny wrote:
> > Hi Janusz,
> > 
> > please send patches to igt ML and add other addresses to cc:
> > I have one nit, see below.
> > 
> > On 2023-01-31 at 10:17:31 +0100, Janusz Krzysztofik wrote:
> > > Add a new subtest focused on exercising interaction between perf open /
> > > close operations, which replace active barriers with perf requests, and
> > > concurrent barrier preallocate / acquire operations performed during
> > > context first pin / last unpin.
> > > 
> > > References: https://gitlab.freedesktop.org/drm/intel/-/issues/6333
> > > Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
> > > Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
> > > ---
> > >  tests/i915/perf.c | 41 +++++++++++++++++++++++++++++++++++++++--
> > >  1 file changed, 39 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/tests/i915/perf.c b/tests/i915/perf.c
> > > index e33cacc443..11a3ec21ab 100644
> > > --- a/tests/i915/perf.c
> > > +++ b/tests/i915/perf.c
> > > @@ -39,6 +39,7 @@
> > >  #include <math.h>
> > >  
> > >  #include "i915/gem.h"
> > > +#include "i915/gem_create.h"
> > >  #include "i915/perf.h"
> > >  #include "igt.h"
> > >  #include "igt_perf.h"
> > > @@ -4885,7 +4886,27 @@ test_whitelisted_registers_userspace_config(void)
> > >  	i915_perf_remove_config(drm_fd, config_id);
> > >  }
> > >  
> > > -static void test_open_race(const struct intel_execution_engine2 *e, int timeout)
> > > +static void gem_exec_nop(int i915, const struct intel_execution_engine2 *e)
> > > +{
> > > +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> > > +	struct drm_i915_gem_exec_object2 obj = { };
> > > +	struct drm_i915_gem_execbuffer2 execbuf = {
> > > +		.buffers_ptr = to_user_pointer(&obj),
> > > +		.buffer_count = 1,
> > > +	};
> > > +
> > > +	obj.handle = gem_create(i915, 4096);
> > > +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> > > +
> > > +	execbuf.flags = e->flags;
> > > +	gem_execbuf(i915, &execbuf);
> > > +
> > > +	gem_sync(i915, obj.handle);
> > > +	gem_close(i915, obj.handle);
> > > +}
> > > +
> > > +static void test_open_race(const struct intel_execution_engine2 *e, int timeout,
> > > +			   bool use_spin)
> > -------------------------- ^
> > This is not the best way to develop new code, it may be good
> > for fast development but imho it is better to refactor existing
> > code and avoiding to add bool logic into function.
> 
> I'm not sure what you mean.  By refactoring, do you mean moving the code 
> common to both processing paths out to a separate helper, then call it from 
> two distinct functions, each implementing only one scenario?  What's wrong 
> with passing flags that select one of alternative processing paths within one 
> function?  Or are you just not happy with bool type?

It is not wrong unless there is only one such var but even then
readability of code is reduced.

> 
> > It can be done later as this is revealing the bug.
> > 
> > >  {
> > >  	int *done;
> > >  
> > > @@ -4926,6 +4947,12 @@ static void test_open_race(const struct intel_execution_engine2 *e, int timeout)
> > >  				ctx = gem_context_create_for_engine(i915, e->class, e->instance);
> > >  				gem_context_set_persistence(i915, ctx, persistence);
> > >  
> > > +				if (!use_spin) {
> > > +					gem_exec_nop(i915, e);
> > > +					gem_context_destroy(i915, ctx);
> > > +					continue;
> > > +				}
> > > +
> > >  				spin = __igt_spin_new(i915, ctx, .ahnd = ahnd);
> > >  				for (int i = random() % 7; i--; ) {
> > >  					if (random() & 1) {
> > > @@ -5330,7 +5357,17 @@ igt_main
> > >  		for_each_physical_engine(drm_fd, e)
> > >  			if (e->class == I915_ENGINE_CLASS_RENDER)
> > >  				igt_dynamic_f("%s", e->name)
> > > -					test_open_race(e, 5);
> > > +					test_open_race(e, 5, true);
> > > +	}
> > > +
> > 
> > Please add here some TODO comments from discussions and a note
> > which will help bug filling team to classify that.
> 
> TODO about moving the subtest to a different test -- OK.  But instructions for 
> Bug Filling team?  Hmm, what if this subtest triggers a completely different 
> bug in the future?  Are we going to update the comments then?  Do Bug Filling 
> team members really read the sources while classifying bugs?
> 
> Please have another look at an example of expected dmesg from the currently 
> triggered bug:
> 
> <4> [192.634015] list_del corruption. prev->next should be ffff8881479d34e0, but was ffff888108af4ce0. (prev=ffff888127335160)
> <4> [192.634032] WARNING: CPU: 1 PID: 1286 at lib/list_debug.c:59 __list_del_entry_valid+0xb7/0xe0

Ok, I see that it is kernel bug here. You are right,
please add only TODO here.

Regards,
Kamil

> <4> [192.634041] Modules linked in: vgem drm_shmem_helper fuse snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio x86_pkg_temp_thermal coretemp i915 prime_numbers i2c_algo_bit kvm_intel ttm snd_hda_intel snd_intel_dspcfg kvm drm_buddy mei_pxp snd_hda_codec smsc75xx irqbypass mei_hdcp e1000e drm_display_helper usbnet snd_hwdep crct10dif_pclmul wmi_bmof mii crc32_pclmul ghash_clmulni_intel igc snd_hda_core drm_kms_helper ptp mei_me i2c_i801 syscopyarea snd_pcm pps_core sysfillrect i2c_smbus mei sysimgblt video intel_lpss_pci wmi
> <4> [192.634156] CPU: 1 PID: 1286 Comm: perf Not tainted 6.2.0-rc6-CI_DRM_12672-gdf5c31e78609+ #1
> <4> [192.634161] Hardware name: Intel Corporation Rocket Lake Client Platform/RocketLake S SODIMM RVP, BIOS RKLSFWI1.R00.1435.A00.2010232019 10/23/2020
> <4> [192.634165] RIP: 0010:__list_del_entry_valid+0xb7/0xe0
> <4> [192.634170] Code: cc cc cc 48 89 ca 48 c7 c7 78 00 46 82 e8 0d 7e 61 00 0f 0b 31 c0 c3 cc cc cc cc 4c 89 c2 48 c7 c7 b0 00 46 82 e8 f5 7d 61 00 <0f> 0b 31 c0 c3 cc cc cc cc 48 89 d1 4c 89 c6 4c 89 ca 48 c7 c7 f8
> <4> [192.634174] RSP: 0018:ffffc90001f2fcb0 EFLAGS: 00010082
> <4> [192.634180] RAX: 0000000000000000 RBX: ffff88812f8dec40 RCX: 0000000000000000
> <4> [192.634184] RDX: 0000000000000003 RSI: ffffffff823d7c88 RDI: 00000000ffffffff
> <4> [192.634187] RBP: ffff8881479d34d8 R08: 0000000000000000 R09: ffffc90001f2fb60
> <4> [192.634191] R10: 0000000000000001 R11: 0000000000000001 R12: ffff88812f8df440
> <4> [192.634194] R13: ffff88812f8df440 R14: ffff8881479d34e0 R15: 0000000000000246
> <4> [192.634197] FS:  00007fa38f9514c0(0000) GS:ffff888450080000(0000) knlGS:0000000000000000
> <4> [192.634201] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> <4> [192.634205] CR2: 00007fa392c76000 CR3: 000000010809e003 CR4: 0000000000770ee0
> <4> [192.634208] PKRU: 55555554
> <4> [192.634211] Call Trace:
> <4> [192.634214]  <TASK>
> <4> [192.634218]  __i915_active_fence_set+0x71/0xf0 [i915]
> <4> [192.634412]  __i915_request_commit+0x2e1/0x5b0 [i915]
> <4> [192.634597]  i915_request_add+0xa6/0x330 [i915]
> <4> [192.634783]  gen8_modify_context+0xc2/0x220 [i915]
> <4> [192.634958]  oa_configure_all_contexts.isra.0+0x183/0x400 [i915]
> <4> [192.635137]  gen12_disable_metric_set+0x98/0x160 [i915]
> <4> [192.635305]  i915_oa_stream_destroy+0x3c/0x330 [i915]
> <4> [192.635480]  i915_perf_destroy_locked+0x22/0x80 [i915]
> <4> [192.635646]  i915_perf_release+0x35/0x50 [i915]
> <4> [192.635808]  __fput+0x95/0x260
> 
> Isn't such information from actual bug occurrence, contained in CI reports the 
> Bug Filling team is working with, more meaningful than any comment added to 
> the test source code?  How can a test know in advance what bugs it will ever 
> trigger, and who exactly will be responsible for fixing them?
> 
> Thanks,
> Janusz
> 
> > 
> > Regards,
> > Kamil
> > 
> > > +	igt_describe("Exercise perf open/close against intensive barrier preallocate/acquire");
> > > +	igt_subtest_with_dynamic("barrier-race") {
> > > +		const struct intel_execution_engine2 *e;
> > > +
> > > +		for_each_physical_engine(drm_fd, e)
> > > +			if (e->class == I915_ENGINE_CLASS_RENDER)
> > > +				igt_dynamic_f("%s", e->name)
> > > +					test_open_race(e, 5, false);
> > >  	}
> > >  
> > >  	igt_fixture {
> > 
> 
> 
> 
> 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 2/2] tests/i915/perf: Exercise barrier race
@ 2023-02-02 10:19         ` Kamil Konieczny
  0 siblings, 0 replies; 34+ messages in thread
From: Kamil Konieczny @ 2023-02-02 10:19 UTC (permalink / raw)
  To: igt-dev; +Cc: intel-gfx, Chris Wilson

Hi Janusz,

On 2023-02-01 at 20:16:11 +0100, Janusz Krzysztofik wrote:
> Hi Kamil,
> 
> On Wednesday, 1 February 2023 19:21:57 CET Kamil Konieczny wrote:
> > Hi Janusz,
> > 
> > please send patches to igt ML and add other addresses to cc:
> > I have one nit, see below.
> > 
> > On 2023-01-31 at 10:17:31 +0100, Janusz Krzysztofik wrote:
> > > Add a new subtest focused on exercising interaction between perf open /
> > > close operations, which replace active barriers with perf requests, and
> > > concurrent barrier preallocate / acquire operations performed during
> > > context first pin / last unpin.
> > > 
> > > References: https://gitlab.freedesktop.org/drm/intel/-/issues/6333
> > > Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
> > > Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
> > > ---
> > >  tests/i915/perf.c | 41 +++++++++++++++++++++++++++++++++++++++--
> > >  1 file changed, 39 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/tests/i915/perf.c b/tests/i915/perf.c
> > > index e33cacc443..11a3ec21ab 100644
> > > --- a/tests/i915/perf.c
> > > +++ b/tests/i915/perf.c
> > > @@ -39,6 +39,7 @@
> > >  #include <math.h>
> > >  
> > >  #include "i915/gem.h"
> > > +#include "i915/gem_create.h"
> > >  #include "i915/perf.h"
> > >  #include "igt.h"
> > >  #include "igt_perf.h"
> > > @@ -4885,7 +4886,27 @@ test_whitelisted_registers_userspace_config(void)
> > >  	i915_perf_remove_config(drm_fd, config_id);
> > >  }
> > >  
> > > -static void test_open_race(const struct intel_execution_engine2 *e, int timeout)
> > > +static void gem_exec_nop(int i915, const struct intel_execution_engine2 *e)
> > > +{
> > > +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> > > +	struct drm_i915_gem_exec_object2 obj = { };
> > > +	struct drm_i915_gem_execbuffer2 execbuf = {
> > > +		.buffers_ptr = to_user_pointer(&obj),
> > > +		.buffer_count = 1,
> > > +	};
> > > +
> > > +	obj.handle = gem_create(i915, 4096);
> > > +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> > > +
> > > +	execbuf.flags = e->flags;
> > > +	gem_execbuf(i915, &execbuf);
> > > +
> > > +	gem_sync(i915, obj.handle);
> > > +	gem_close(i915, obj.handle);
> > > +}
> > > +
> > > +static void test_open_race(const struct intel_execution_engine2 *e, int timeout,
> > > +			   bool use_spin)
> > -------------------------- ^
> > This is not the best way to develop new code, it may be good
> > for fast development but imho it is better to refactor existing
> > code and avoiding to add bool logic into function.
> 
> I'm not sure what you mean.  By refactoring, do you mean moving the code 
> common to both processing paths out to a separate helper, then call it from 
> two distinct functions, each implementing only one scenario?  What's wrong 
> with passing flags that select one of alternative processing paths within one 
> function?  Or are you just not happy with bool type?

It is not wrong unless there is only one such var but even then
readability of code is reduced.

> 
> > It can be done later as this is revealing the bug.
> > 
> > >  {
> > >  	int *done;
> > >  
> > > @@ -4926,6 +4947,12 @@ static void test_open_race(const struct intel_execution_engine2 *e, int timeout)
> > >  				ctx = gem_context_create_for_engine(i915, e->class, e->instance);
> > >  				gem_context_set_persistence(i915, ctx, persistence);
> > >  
> > > +				if (!use_spin) {
> > > +					gem_exec_nop(i915, e);
> > > +					gem_context_destroy(i915, ctx);
> > > +					continue;
> > > +				}
> > > +
> > >  				spin = __igt_spin_new(i915, ctx, .ahnd = ahnd);
> > >  				for (int i = random() % 7; i--; ) {
> > >  					if (random() & 1) {
> > > @@ -5330,7 +5357,17 @@ igt_main
> > >  		for_each_physical_engine(drm_fd, e)
> > >  			if (e->class == I915_ENGINE_CLASS_RENDER)
> > >  				igt_dynamic_f("%s", e->name)
> > > -					test_open_race(e, 5);
> > > +					test_open_race(e, 5, true);
> > > +	}
> > > +
> > 
> > Please add here some TODO comments from discussions and a note
> > which will help bug filling team to classify that.
> 
> TODO about moving the subtest to a different test -- OK.  But instructions for 
> Bug Filling team?  Hmm, what if this subtest triggers a completely different 
> bug in the future?  Are we going to update the comments then?  Do Bug Filling 
> team members really read the sources while classifying bugs?
> 
> Please have another look at an example of expected dmesg from the currently 
> triggered bug:
> 
> <4> [192.634015] list_del corruption. prev->next should be ffff8881479d34e0, but was ffff888108af4ce0. (prev=ffff888127335160)
> <4> [192.634032] WARNING: CPU: 1 PID: 1286 at lib/list_debug.c:59 __list_del_entry_valid+0xb7/0xe0

Ok, I see that it is kernel bug here. You are right,
please add only TODO here.

Regards,
Kamil

> <4> [192.634041] Modules linked in: vgem drm_shmem_helper fuse snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio x86_pkg_temp_thermal coretemp i915 prime_numbers i2c_algo_bit kvm_intel ttm snd_hda_intel snd_intel_dspcfg kvm drm_buddy mei_pxp snd_hda_codec smsc75xx irqbypass mei_hdcp e1000e drm_display_helper usbnet snd_hwdep crct10dif_pclmul wmi_bmof mii crc32_pclmul ghash_clmulni_intel igc snd_hda_core drm_kms_helper ptp mei_me i2c_i801 syscopyarea snd_pcm pps_core sysfillrect i2c_smbus mei sysimgblt video intel_lpss_pci wmi
> <4> [192.634156] CPU: 1 PID: 1286 Comm: perf Not tainted 6.2.0-rc6-CI_DRM_12672-gdf5c31e78609+ #1
> <4> [192.634161] Hardware name: Intel Corporation Rocket Lake Client Platform/RocketLake S SODIMM RVP, BIOS RKLSFWI1.R00.1435.A00.2010232019 10/23/2020
> <4> [192.634165] RIP: 0010:__list_del_entry_valid+0xb7/0xe0
> <4> [192.634170] Code: cc cc cc 48 89 ca 48 c7 c7 78 00 46 82 e8 0d 7e 61 00 0f 0b 31 c0 c3 cc cc cc cc 4c 89 c2 48 c7 c7 b0 00 46 82 e8 f5 7d 61 00 <0f> 0b 31 c0 c3 cc cc cc cc 48 89 d1 4c 89 c6 4c 89 ca 48 c7 c7 f8
> <4> [192.634174] RSP: 0018:ffffc90001f2fcb0 EFLAGS: 00010082
> <4> [192.634180] RAX: 0000000000000000 RBX: ffff88812f8dec40 RCX: 0000000000000000
> <4> [192.634184] RDX: 0000000000000003 RSI: ffffffff823d7c88 RDI: 00000000ffffffff
> <4> [192.634187] RBP: ffff8881479d34d8 R08: 0000000000000000 R09: ffffc90001f2fb60
> <4> [192.634191] R10: 0000000000000001 R11: 0000000000000001 R12: ffff88812f8df440
> <4> [192.634194] R13: ffff88812f8df440 R14: ffff8881479d34e0 R15: 0000000000000246
> <4> [192.634197] FS:  00007fa38f9514c0(0000) GS:ffff888450080000(0000) knlGS:0000000000000000
> <4> [192.634201] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> <4> [192.634205] CR2: 00007fa392c76000 CR3: 000000010809e003 CR4: 0000000000770ee0
> <4> [192.634208] PKRU: 55555554
> <4> [192.634211] Call Trace:
> <4> [192.634214]  <TASK>
> <4> [192.634218]  __i915_active_fence_set+0x71/0xf0 [i915]
> <4> [192.634412]  __i915_request_commit+0x2e1/0x5b0 [i915]
> <4> [192.634597]  i915_request_add+0xa6/0x330 [i915]
> <4> [192.634783]  gen8_modify_context+0xc2/0x220 [i915]
> <4> [192.634958]  oa_configure_all_contexts.isra.0+0x183/0x400 [i915]
> <4> [192.635137]  gen12_disable_metric_set+0x98/0x160 [i915]
> <4> [192.635305]  i915_oa_stream_destroy+0x3c/0x330 [i915]
> <4> [192.635480]  i915_perf_destroy_locked+0x22/0x80 [i915]
> <4> [192.635646]  i915_perf_release+0x35/0x50 [i915]
> <4> [192.635808]  __fput+0x95/0x260
> 
> Isn't such information from actual bug occurrence, contained in CI reports the 
> Bug Filling team is working with, more meaningful than any comment added to 
> the test source code?  How can a test know in advance what bugs it will ever 
> trigger, and who exactly will be responsible for fixing them?
> 
> Thanks,
> Janusz
> 
> > 
> > Regards,
> > Kamil
> > 
> > > +	igt_describe("Exercise perf open/close against intensive barrier preallocate/acquire");
> > > +	igt_subtest_with_dynamic("barrier-race") {
> > > +		const struct intel_execution_engine2 *e;
> > > +
> > > +		for_each_physical_engine(drm_fd, e)
> > > +			if (e->class == I915_ENGINE_CLASS_RENDER)
> > > +				igt_dynamic_f("%s", e->name)
> > > +					test_open_race(e, 5, false);
> > >  	}
> > >  
> > >  	igt_fixture {
> > 
> 
> 
> 
> 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [igt-dev] [PATCH i-g-t 0/2] tests/i915/perf: Add stress / race exercises
  2023-01-31  9:17 ` [igt-dev] " Janusz Krzysztofik
@ 2023-02-03 19:21   ` Umesh Nerlige Ramappa
  -1 siblings, 0 replies; 34+ messages in thread
From: Umesh Nerlige Ramappa @ 2023-02-03 19:21 UTC (permalink / raw)
  To: Janusz Krzysztofik
  Cc: igt-dev, Arkadiusz Hiler, intel-gfx, Petri Latvala, Chris Wilson

On Tue, Jan 31, 2023 at 10:17:29AM +0100, Janusz Krzysztofik wrote:
>Users reported oopses on list corruptions when using i915 perf with a
>number of concurrently running graphics applications.  That indicates we
>are currently missing some important tests for such scenarios.  Cover
>that gap.

Any bug ids that were filed for the issue? Also what were the parameters 
passed to perf OA when the issues were reported? This could help 
actually root cause the issue and have a test closer to the actual use 
case.

Regards,
Umesh
>
>Chris Wilson (1):
>  i915/perf: Stress opening of new perf streams against existing
>    contexts
>
>Janusz Krzysztofik (1):
>  tests/i915/perf: Exercise barrier race
>
> tests/i915/perf.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 111 insertions(+)
>
>-- 
>2.25.1
>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 0/2] tests/i915/perf: Add stress / race exercises
@ 2023-02-03 19:21   ` Umesh Nerlige Ramappa
  0 siblings, 0 replies; 34+ messages in thread
From: Umesh Nerlige Ramappa @ 2023-02-03 19:21 UTC (permalink / raw)
  To: Janusz Krzysztofik; +Cc: igt-dev, intel-gfx, Chris Wilson

On Tue, Jan 31, 2023 at 10:17:29AM +0100, Janusz Krzysztofik wrote:
>Users reported oopses on list corruptions when using i915 perf with a
>number of concurrently running graphics applications.  That indicates we
>are currently missing some important tests for such scenarios.  Cover
>that gap.

Any bug ids that were filed for the issue? Also what were the parameters 
passed to perf OA when the issues were reported? This could help 
actually root cause the issue and have a test closer to the actual use 
case.

Regards,
Umesh
>
>Chris Wilson (1):
>  i915/perf: Stress opening of new perf streams against existing
>    contexts
>
>Janusz Krzysztofik (1):
>  tests/i915/perf: Exercise barrier race
>
> tests/i915/perf.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 111 insertions(+)
>
>-- 
>2.25.1
>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Intel-gfx] [igt-dev] [PATCH i-g-t 0/2] tests/i915/perf: Add stress / race exercises
  2023-02-03 19:21   ` Umesh Nerlige Ramappa
@ 2023-02-03 20:59     ` Janusz Krzysztofik
  -1 siblings, 0 replies; 34+ messages in thread
From: Janusz Krzysztofik @ 2023-02-03 20:59 UTC (permalink / raw)
  To: Umesh Nerlige Ramappa
  Cc: igt-dev, Arkadiusz Hiler, intel-gfx, Petri Latvala, Chris Wilson

Hi Umesh,

Thanks for taking a look.

On Friday, 3 February 2023 20:21:38 CET Umesh Nerlige Ramappa wrote:
> On Tue, Jan 31, 2023 at 10:17:29AM +0100, Janusz Krzysztofik wrote:
> >Users reported oopses on list corruptions when using i915 perf with a
> >number of concurrently running graphics applications.  That indicates we
> >are currently missing some important tests for such scenarios.  Cover
> >that gap.
> 
> Any bug ids that were filed for the issue? 

Yes, https://gitlab.freedesktop.org/drm/intel/issues/6333.

> Also what were the parameters 
> passed to perf OA when the issues were reported? This could help 
> actually root cause the issue and have a test closer to the actual use 
> case.

Root cause has been already identified, and it lays not in perf, only in 
barriers handling code, see https://patchwork.freedesktop.org/series/113636/ 
(submitted to trybot so far, soon to the public list).

Thanks,
Janusz

> 
> Regards,
> Umesh
> >
> >Chris Wilson (1):
> >  i915/perf: Stress opening of new perf streams against existing
> >    contexts
> >
> >Janusz Krzysztofik (1):
> >  tests/i915/perf: Exercise barrier race
> >
> > tests/i915/perf.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 111 insertions(+)
> >
> 





^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 0/2] tests/i915/perf: Add stress / race exercises
@ 2023-02-03 20:59     ` Janusz Krzysztofik
  0 siblings, 0 replies; 34+ messages in thread
From: Janusz Krzysztofik @ 2023-02-03 20:59 UTC (permalink / raw)
  To: Umesh Nerlige Ramappa; +Cc: igt-dev, intel-gfx, Chris Wilson

Hi Umesh,

Thanks for taking a look.

On Friday, 3 February 2023 20:21:38 CET Umesh Nerlige Ramappa wrote:
> On Tue, Jan 31, 2023 at 10:17:29AM +0100, Janusz Krzysztofik wrote:
> >Users reported oopses on list corruptions when using i915 perf with a
> >number of concurrently running graphics applications.  That indicates we
> >are currently missing some important tests for such scenarios.  Cover
> >that gap.
> 
> Any bug ids that were filed for the issue? 

Yes, https://gitlab.freedesktop.org/drm/intel/issues/6333.

> Also what were the parameters 
> passed to perf OA when the issues were reported? This could help 
> actually root cause the issue and have a test closer to the actual use 
> case.

Root cause has been already identified, and it lays not in perf, only in 
barriers handling code, see https://patchwork.freedesktop.org/series/113636/ 
(submitted to trybot so far, soon to the public list).

Thanks,
Janusz

> 
> Regards,
> Umesh
> >
> >Chris Wilson (1):
> >  i915/perf: Stress opening of new perf streams against existing
> >    contexts
> >
> >Janusz Krzysztofik (1):
> >  tests/i915/perf: Exercise barrier race
> >
> > tests/i915/perf.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 111 insertions(+)
> >
> 




^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2023-02-03 20:59 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-31  9:17 [Intel-gfx] [PATCH i-g-t 0/2] tests/i915/perf: Add stress / race exercises Janusz Krzysztofik
2023-01-31  9:17 ` [igt-dev] " Janusz Krzysztofik
2023-01-31  9:17 ` [Intel-gfx] [PATCH i-g-t 1/2] i915/perf: Stress opening of new perf streams against existing contexts Janusz Krzysztofik
2023-01-31  9:17   ` [igt-dev] " Janusz Krzysztofik
2023-01-31 11:59   ` [Intel-gfx] " Kamil Konieczny
2023-01-31 11:59     ` [igt-dev] " Kamil Konieczny
2023-01-31 14:14     ` [Intel-gfx] " Janusz Krzysztofik
2023-01-31 14:14       ` [igt-dev] " Janusz Krzysztofik
2023-01-31  9:17 ` [Intel-gfx] [PATCH i-g-t 2/2] tests/i915/perf: Exercise barrier race Janusz Krzysztofik
2023-01-31  9:17   ` [igt-dev] " Janusz Krzysztofik
2023-02-01 18:21   ` [Intel-gfx] " Kamil Konieczny
2023-02-01 18:21     ` [igt-dev] " Kamil Konieczny
2023-02-01 19:16     ` [Intel-gfx] " Janusz Krzysztofik
2023-02-01 19:16       ` [igt-dev] " Janusz Krzysztofik
2023-02-02 10:19       ` [Intel-gfx] " Kamil Konieczny
2023-02-02 10:19         ` [igt-dev] " Kamil Konieczny
2023-01-31 11:16 ` [igt-dev] ✓ Fi.CI.BAT: success for tests/i915/perf: Add stress / race exercises Patchwork
2023-01-31 15:08 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
2023-01-31 16:19 ` [Intel-gfx] [igt-dev] [PATCH i-g-t 0/2] " Dixit, Ashutosh
2023-01-31 16:19   ` Dixit, Ashutosh
2023-01-31 16:55   ` [Intel-gfx] " Dixit, Ashutosh
2023-01-31 16:55     ` Dixit, Ashutosh
2023-01-31 17:56     ` [Intel-gfx] " Janusz Krzysztofik
2023-01-31 17:56       ` Janusz Krzysztofik
2023-01-31 17:36   ` [Intel-gfx] " Janusz Krzysztofik
2023-01-31 17:36     ` Janusz Krzysztofik
2023-01-31 18:36     ` [Intel-gfx] " Dixit, Ashutosh
2023-01-31 18:36       ` Dixit, Ashutosh
2023-02-01 15:51       ` [Intel-gfx] " Janusz Krzysztofik
2023-02-01 15:51         ` Janusz Krzysztofik
2023-02-03 19:21 ` [Intel-gfx] " Umesh Nerlige Ramappa
2023-02-03 19:21   ` Umesh Nerlige Ramappa
2023-02-03 20:59   ` [Intel-gfx] " Janusz Krzysztofik
2023-02-03 20:59     ` Janusz Krzysztofik

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.