[PATCH i-g-t 01/16] i915/gem_exec_schedule: Semaphore priority fixups

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH i-g-t 01/16] i915/gem_exec_schedule: Semaphore priority fixups
@ 2019-05-08 10:09 ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

A stray git add from my test boxen -- we were being careful enough to
preserve priority and ordering to match the implicit policies.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_exec_schedule.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 330e8a54e..77a264a6a 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -507,6 +507,7 @@ static void semaphore_resolve(int i915)
 		uint32_t handle, cancel;
 		uint32_t *cs, *map;
 		igt_spin_t *spin;
+		int64_t poke = 1;
 
 		if (!gem_can_store_dword(i915, engine))
 			continue;
@@ -587,6 +588,7 @@ static void semaphore_resolve(int i915)
 		eb.buffer_count = 2;
 		eb.rsvd1 = inner;
 		gem_execbuf(i915, &eb);
+		gem_wait(i915, cancel, &poke);
 		gem_close(i915, cancel);
 
 		gem_sync(i915, handle); /* To hang unless cancel runs! */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [igt-dev] [PATCH i-g-t 01/16] i915/gem_exec_schedule: Semaphore priority fixups
@ 2019-05-08 10:09 ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

A stray git add from my test boxen -- we were being careful enough to
preserve priority and ordering to match the implicit policies.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_exec_schedule.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 330e8a54e..77a264a6a 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -507,6 +507,7 @@ static void semaphore_resolve(int i915)
 		uint32_t handle, cancel;
 		uint32_t *cs, *map;
 		igt_spin_t *spin;
+		int64_t poke = 1;
 
 		if (!gem_can_store_dword(i915, engine))
 			continue;
@@ -587,6 +588,7 @@ static void semaphore_resolve(int i915)
 		eb.buffer_count = 2;
 		eb.rsvd1 = inner;
 		gem_execbuf(i915, &eb);
+		gem_wait(i915, cancel, &poke);
 		gem_close(i915, cancel);
 
 		gem_sync(i915, handle); /* To hang unless cancel runs! */
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH i-g-t 02/16] drm-uapi: Import i915_drm.h upto 53073249452d
  2019-05-08 10:09 ` [igt-dev] " Chris Wilson
@ 2019-05-08 10:09   ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

commit 53073249452d307b66c2ab9a4b5ebf94db534ad6
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu Jan 25 17:55:58 2018 +0000

    drm/i915: Allow contexts to share a single timeline across all engines

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 include/drm-uapi/i915_drm.h | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
index e01b3e1fd..1b0488a81 100644
--- a/include/drm-uapi/i915_drm.h
+++ b/include/drm-uapi/i915_drm.h
@@ -355,6 +355,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_PERF_ADD_CONFIG	0x37
 #define DRM_I915_PERF_REMOVE_CONFIG	0x38
 #define DRM_I915_QUERY			0x39
+#define DRM_I915_GEM_VM_CREATE		0x3a
+#define DRM_I915_GEM_VM_DESTROY		0x3b
 /* Must be kept compact -- no holes */
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
@@ -415,6 +417,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_PERF_ADD_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
 #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
 #define DRM_IOCTL_I915_QUERY			DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
+#define DRM_IOCTL_I915_GEM_VM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)
+#define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1464,8 +1468,9 @@ struct drm_i915_gem_context_create_ext {
 	__u32 ctx_id; /* output: id of new context*/
 	__u32 flags;
 #define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS	(1u << 0)
+#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE	(1u << 1)
 #define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \
-	(-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1))
+	(-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1))
 	__u64 extensions;
 };
 
@@ -1507,6 +1512,17 @@ struct drm_i915_gem_context_param {
  * On creation, all new contexts are marked as recoverable.
  */
 #define I915_CONTEXT_PARAM_RECOVERABLE	0x8
+
+	/*
+	 * The id of the associated virtual memory address space (ppGTT) of
+	 * this context. Can be retrieved and passed to another context
+	 * (on the same fd) for both to use the same ppGTT and so share
+	 * address layouts, and avoid reloading the page tables on context
+	 * switches between themselves.
+	 *
+	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
+	 */
+#define I915_CONTEXT_PARAM_VM		0x9
 /* Must be kept compact -- no holes and well documented */
 
 	__u64 value;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [igt-dev] [PATCH i-g-t 02/16] drm-uapi: Import i915_drm.h upto 53073249452d
@ 2019-05-08 10:09   ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

commit 53073249452d307b66c2ab9a4b5ebf94db534ad6
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu Jan 25 17:55:58 2018 +0000

    drm/i915: Allow contexts to share a single timeline across all engines

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 include/drm-uapi/i915_drm.h | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
index e01b3e1fd..1b0488a81 100644
--- a/include/drm-uapi/i915_drm.h
+++ b/include/drm-uapi/i915_drm.h
@@ -355,6 +355,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_PERF_ADD_CONFIG	0x37
 #define DRM_I915_PERF_REMOVE_CONFIG	0x38
 #define DRM_I915_QUERY			0x39
+#define DRM_I915_GEM_VM_CREATE		0x3a
+#define DRM_I915_GEM_VM_DESTROY		0x3b
 /* Must be kept compact -- no holes */
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
@@ -415,6 +417,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_PERF_ADD_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
 #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
 #define DRM_IOCTL_I915_QUERY			DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
+#define DRM_IOCTL_I915_GEM_VM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)
+#define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1464,8 +1468,9 @@ struct drm_i915_gem_context_create_ext {
 	__u32 ctx_id; /* output: id of new context*/
 	__u32 flags;
 #define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS	(1u << 0)
+#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE	(1u << 1)
 #define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \
-	(-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1))
+	(-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1))
 	__u64 extensions;
 };
 
@@ -1507,6 +1512,17 @@ struct drm_i915_gem_context_param {
  * On creation, all new contexts are marked as recoverable.
  */
 #define I915_CONTEXT_PARAM_RECOVERABLE	0x8
+
+	/*
+	 * The id of the associated virtual memory address space (ppGTT) of
+	 * this context. Can be retrieved and passed to another context
+	 * (on the same fd) for both to use the same ppGTT and so share
+	 * address layouts, and avoid reloading the page tables on context
+	 * switches between themselves.
+	 *
+	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
+	 */
+#define I915_CONTEXT_PARAM_VM		0x9
 /* Must be kept compact -- no holes and well documented */
 
 	__u64 value;
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH i-g-t 03/16] i915: Add gem_vm_create
  2019-05-08 10:09 ` [igt-dev] " Chris Wilson
@ 2019-05-08 10:09   ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Exercise basic creation and swapping between new address spaces.

v2: Check isolation that the same vm_id on different fd are indeed
different VM.
v3: Cross-over check with CREATE_EXT_SETPARAM

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 lib/Makefile.sources       |   2 +
 lib/i915/gem_vm.c          | 130 ++++++++++++
 lib/i915/gem_vm.h          |  38 ++++
 lib/meson.build            |   1 +
 tests/Makefile.sources     |   1 +
 tests/i915/gem_vm_create.c | 412 +++++++++++++++++++++++++++++++++++++
 tests/meson.build          |   1 +
 7 files changed, 585 insertions(+)
 create mode 100644 lib/i915/gem_vm.c
 create mode 100644 lib/i915/gem_vm.h
 create mode 100644 tests/i915/gem_vm_create.c

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 976858238..891f65b96 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -13,6 +13,8 @@ lib_source_list =	 	\
 	i915/gem_ring.c	\
 	i915/gem_mman.c	\
 	i915/gem_mman.h	\
+	i915/gem_vm.c	\
+	i915/gem_vm.h	\
 	i915_3d.h		\
 	i915_reg.h		\
 	i915_pciids.h		\
diff --git a/lib/i915/gem_vm.c b/lib/i915/gem_vm.c
new file mode 100644
index 000000000..9a022a56c
--- /dev/null
+++ b/lib/i915/gem_vm.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <errno.h>
+#include <string.h>
+
+#include "ioctl_wrappers.h"
+#include "drmtest.h"
+
+#include "i915/gem_vm.h"
+
+/**
+ * SECTION:gem_vm
+ * @short_description: Helpers for dealing with address spaces (vm/GTT)
+ * @title: GEM Virtual Memory
+ *
+ * This helper library contains functions used for handling gem address
+ * spaces.
+ */
+
+/**
+ * gem_has_vm:
+ * @i915: open i915 drm file descriptor
+ *
+ * Returns: whether VM creation is supported or not.
+ */
+bool gem_has_vm(int i915)
+{
+	uint32_t vm_id = 0;
+
+	__gem_vm_create(i915, &vm_id);
+	if (vm_id)
+		gem_vm_destroy(i915, vm_id);
+
+	return vm_id;
+}
+
+/**
+ * gem_require_vm:
+ * @i915: open i915 drm file descriptor
+ *
+ * This helper will automatically skip the test on platforms where address
+ * space creation is not available.
+ */
+void gem_require_vm(int i915)
+{
+	igt_require(gem_has_vm(i915));
+}
+
+int __gem_vm_create(int i915, uint32_t *vm_id)
+{
+       struct drm_i915_gem_vm_control ctl = {};
+       int err = 0;
+
+       if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_VM_CREATE, &ctl) == 0) {
+               *vm_id = ctl.vm_id;
+       } else {
+	       err = -errno;
+	       igt_assume(err != 0);
+       }
+
+       errno = 0;
+       return err;
+}
+
+/**
+ * gem_vm_create:
+ * @i915: open i915 drm file descriptor
+ *
+ * This wraps the VM_CREATE ioctl, which is used to allocate a new
+ * address space for use with GEM contexts.
+ *
+ * Returns: The id of the allocated address space.
+ */
+uint32_t gem_vm_create(int i915)
+{
+	uint32_t vm_id;
+
+	igt_assert_eq(__gem_vm_create(i915, &vm_id), 0);
+	igt_assert(vm_id != 0);
+
+	return vm_id;
+}
+
+int __gem_vm_destroy(int i915, uint32_t vm_id)
+{
+	struct drm_i915_gem_vm_control ctl = { .vm_id = vm_id };
+	int err = 0;
+
+	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_VM_DESTROY, &ctl)) {
+		err = -errno;
+		igt_assume(err);
+	}
+
+	errno = 0;
+	return err;
+}
+
+/**
+ * gem_vm_destroy:
+ * @i915: open i915 drm file descriptor
+ * @vm_id: i915 VM id
+ *
+ * This wraps the VM_DESTROY ioctl, which is used to free an address space
+ * handle.
+ */
+void gem_vm_destroy(int i915, uint32_t vm_id)
+{
+	igt_assert_eq(__gem_vm_destroy(i915, vm_id), 0);
+}
diff --git a/lib/i915/gem_vm.h b/lib/i915/gem_vm.h
new file mode 100644
index 000000000..27af899d4
--- /dev/null
+++ b/lib/i915/gem_vm.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef GEM_VM_H
+#define GEM_VM_H
+
+#include <stdint.h>
+
+bool gem_has_vm(int i915);
+void gem_require_vm(int i915);
+
+uint32_t gem_vm_create(int i915);
+int __gem_vm_create(int i915, uint32_t *vm_id);
+
+void gem_vm_destroy(int i915, uint32_t vm_id);
+int __gem_vm_destroy(int i915, uint32_t vm_id);
+
+#endif /* GEM_VM_H */
diff --git a/lib/meson.build b/lib/meson.build
index 786e7a264..d057cb573 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -5,6 +5,7 @@ lib_sources = [
 	'i915/gem_submission.c',
 	'i915/gem_ring.c',
 	'i915/gem_mman.c',
+	'i915/gem_vm.c',
 	'igt_color_encoding.c',
 	'igt_debugfs.c',
 	'igt_device.c',
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 7f921f6c5..1a541d206 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -21,6 +21,7 @@ TESTS_progs = \
 	drm_import_export \
 	drm_mm \
 	drm_read \
+	i915/gem_vm_create \
 	kms_3d \
 	kms_addfb_basic \
 	kms_atomic \
diff --git a/tests/i915/gem_vm_create.c b/tests/i915/gem_vm_create.c
new file mode 100644
index 000000000..cbd273d9d
--- /dev/null
+++ b/tests/i915/gem_vm_create.c
@@ -0,0 +1,412 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "igt.h"
+#include "igt_dummyload.h"
+#include "i915/gem_vm.h"
+
+static int vm_create_ioctl(int i915, struct drm_i915_gem_vm_control *ctl)
+{
+	int err = 0;
+	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_VM_CREATE, ctl)) {
+		err = -errno;
+		igt_assume(err);
+	}
+	errno = 0;
+	return err;
+}
+
+static int vm_destroy_ioctl(int i915, struct drm_i915_gem_vm_control *ctl)
+{
+	int err = 0;
+	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_VM_DESTROY, ctl)) {
+		err = -errno;
+		igt_assume(err);
+	}
+	errno = 0;
+	return err;
+}
+
+static int ctx_create_ioctl(int i915,
+			    struct drm_i915_gem_context_create_ext *arg)
+{
+	int err = 0;
+	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
+		err = -errno;
+		igt_assume(err);
+	}
+	errno = 0;
+	return err;
+}
+
+static bool has_vm(int i915)
+{
+	struct drm_i915_gem_vm_control ctl = {};
+	int err;
+
+	err = vm_create_ioctl(i915, &ctl);
+	switch (err) {
+	case -EINVAL: /* unknown ioctl */
+	case -ENODEV: /* !full-ppgtt */
+		return false;
+
+	case 0:
+		gem_vm_destroy(i915, ctl.vm_id);
+		return true;
+
+	default:
+		igt_fail_on_f(err, "Unknown response from VM_CREATE\n");
+		return false;
+	}
+}
+
+static void invalid_create(int i915)
+{
+	struct drm_i915_gem_vm_control ctl = {};
+	struct i915_user_extension ext = { .name = -1 };
+
+	igt_assert_eq(vm_create_ioctl(i915, &ctl), 0);
+	gem_vm_destroy(i915, ctl.vm_id);
+
+	ctl.vm_id = 0xdeadbeef;
+	igt_assert_eq(vm_create_ioctl(i915, &ctl), 0);
+	gem_vm_destroy(i915, ctl.vm_id);
+	ctl.vm_id = 0;
+
+	ctl.flags = -1;
+	igt_assert_eq(vm_create_ioctl(i915, &ctl), -EINVAL);
+	ctl.flags = 0;
+
+	ctl.extensions = -1;
+	igt_assert_eq(vm_create_ioctl(i915, &ctl), -EFAULT);
+	ctl.extensions = to_user_pointer(&ext);
+	igt_assert_eq(vm_create_ioctl(i915, &ctl), -EINVAL);
+	ctl.extensions = 0;
+}
+
+static void invalid_destroy(int i915)
+{
+	struct drm_i915_gem_vm_control ctl = {};
+
+	igt_assert_eq(vm_destroy_ioctl(i915, &ctl), -ENOENT);
+
+	igt_assert_eq(vm_create_ioctl(i915, &ctl), 0);
+	igt_assert_eq(vm_destroy_ioctl(i915, &ctl), 0);
+	igt_assert_eq(vm_destroy_ioctl(i915, &ctl), -ENOENT);
+
+	igt_assert_eq(vm_create_ioctl(i915, &ctl), 0);
+	ctl.vm_id = ctl.vm_id + 1; /* assumes no one else allocated */
+	igt_assert_eq(vm_destroy_ioctl(i915, &ctl), -ENOENT);
+	ctl.vm_id = ctl.vm_id - 1;
+	igt_assert_eq(vm_destroy_ioctl(i915, &ctl), 0);
+
+	igt_assert_eq(vm_create_ioctl(i915, &ctl), 0);
+	ctl.flags = -1;
+	igt_assert_eq(vm_destroy_ioctl(i915, &ctl), -EINVAL);
+	ctl.flags = 0;
+	igt_assert_eq(vm_destroy_ioctl(i915, &ctl), 0);
+
+	igt_assert_eq(vm_create_ioctl(i915, &ctl), 0);
+	ctl.extensions = -1;
+	igt_assert_eq(vm_destroy_ioctl(i915, &ctl), -EINVAL);
+	ctl.extensions = 0;
+	igt_assert_eq(vm_destroy_ioctl(i915, &ctl), 0);
+}
+
+static uint32_t __batch_create(int i915, uint32_t offset)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	uint32_t handle;
+
+	handle = gem_create(i915, ALIGN(offset + 4, 4096));
+	gem_write(i915, handle, offset, &bbe, sizeof(bbe));
+
+	return handle;
+}
+
+static uint32_t batch_create(int i915)
+{
+	return __batch_create(i915, 0);
+}
+
+static void check_same_vm(int i915, uint32_t ctx_a, uint32_t ctx_b)
+{
+	struct drm_i915_gem_exec_object2 batch = {
+		.handle = batch_create(i915),
+	};
+	struct drm_i915_gem_execbuffer2 eb = {
+		.buffers_ptr = to_user_pointer(&batch),
+		.buffer_count = 1,
+	};
+
+	/* First verify that we try to use "softpinning" by default */
+	batch.offset = 48 << 20;
+	eb.rsvd1 = ctx_a;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, 48 << 20);
+
+	/* An already active VMA will try to keep its offset */
+	batch.offset = 0;
+	eb.rsvd1 = ctx_b;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, 48 << 20);
+
+	gem_sync(i915, batch.handle);
+	gem_close(i915, batch.handle);
+}
+
+static void create_ext(int i915)
+{
+	struct drm_i915_gem_context_create_ext_setparam ext = {
+		{ .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
+		{ .param = I915_CONTEXT_PARAM_VM }
+	};
+	struct drm_i915_gem_context_create_ext create = {
+		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS
+	};
+	uint32_t ctx[2];
+
+	igt_require(ctx_create_ioctl(i915, &create) == 0);
+	gem_context_destroy(i915, create.ctx_id);
+
+	create.extensions = to_user_pointer(&ext);
+
+	ext.param.value = gem_vm_create(i915);
+
+	igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+	ctx[0] = create.ctx_id;
+
+	igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+	ctx[1] = create.ctx_id;
+
+	gem_vm_destroy(i915, ext.param.value);
+
+	check_same_vm(i915, ctx[0], ctx[1]);
+
+	gem_context_destroy(i915, ctx[1]);
+	gem_context_destroy(i915, ctx[0]);
+}
+
+static void execbuf(int i915)
+{
+	struct drm_i915_gem_exec_object2 batch = {
+		.handle = batch_create(i915),
+	};
+	struct drm_i915_gem_execbuffer2 eb = {
+		.buffers_ptr = to_user_pointer(&batch),
+		.buffer_count = 1,
+	};
+	struct drm_i915_gem_context_param arg = {
+		.param = I915_CONTEXT_PARAM_VM,
+	};
+
+	/* First verify that we try to use "softpinning" by default */
+	batch.offset = 48 << 20;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, 48 << 20);
+
+	arg.value = gem_vm_create(i915);
+	gem_context_set_param(i915, &arg);
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, 48 << 20);
+	gem_vm_destroy(i915, arg.value);
+
+	arg.value = gem_vm_create(i915);
+	gem_context_set_param(i915, &arg);
+	batch.offset = 0;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, 0);
+	gem_vm_destroy(i915, arg.value);
+
+	gem_sync(i915, batch.handle);
+	gem_close(i915, batch.handle);
+}
+
+static void
+write_to_address(int fd, uint32_t ctx, uint64_t addr, uint32_t value)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_exec_object2 batch = {
+		.handle = gem_create(fd, 4096)
+	};
+	struct drm_i915_gem_execbuffer2 eb = {
+		.buffers_ptr = to_user_pointer(&batch),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+	};
+	uint32_t cs[16];
+	int i;
+
+	i = 0;
+	cs[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		cs[++i] = addr;
+		cs[++i] = addr >> 32;
+	} else if (gen >= 4) {
+		cs[++i] = 0;
+		cs[++i] = addr;
+	} else {
+		cs[i]--;
+		cs[++i] = addr;
+	}
+	cs[++i] = value;
+	cs[++i] = MI_BATCH_BUFFER_END;
+	gem_write(fd, batch.handle, 0, cs, sizeof(cs));
+
+	gem_execbuf(fd, &eb);
+	igt_assert(batch.offset != addr);
+
+	gem_sync(fd, batch.handle);
+	gem_close(fd, batch.handle);
+}
+
+static void isolation(int i915)
+{
+	struct drm_i915_gem_exec_object2 obj[2] = {
+		{
+			.handle = gem_create(i915, 4096),
+			.offset = 1 << 20
+		},
+		{ .handle = batch_create(i915), }
+	};
+	struct drm_i915_gem_execbuffer2 eb = {
+		.buffers_ptr = to_user_pointer(obj),
+		.buffer_count = 2,
+	};
+	struct drm_i915_gem_context_param arg = {
+		.param = I915_CONTEXT_PARAM_VM,
+	};
+	int other = gem_reopen_driver(i915);
+	uint32_t ctx[2], vm[2], result;
+	int loops = 4096;
+
+	/* An vm_id on one fd is not the same on another fd */
+	igt_assert_neq(i915, other);
+
+	ctx[0] = gem_context_create(i915);
+	ctx[1] = gem_context_create(other);
+
+	vm[0] = gem_vm_create(i915);
+	do {
+		vm[1] = gem_vm_create(other);
+	} while (vm[1] != vm[0] && loops-- > 0);
+	igt_assert(loops);
+
+	arg.ctx_id = ctx[0];
+	arg.value = vm[0];
+	gem_context_set_param(i915, &arg);
+
+	arg.ctx_id = ctx[1];
+	arg.value = vm[1];
+	gem_context_set_param(other, &arg);
+
+	eb.rsvd1 = ctx[0];
+	gem_execbuf(i915, &eb); /* bind object into vm[0] */
+
+	/* Verify the trick with the assumed target address works */
+	write_to_address(i915, ctx[0], obj[0].offset, 1);
+	gem_read(i915, obj[0].handle, 0, &result, sizeof(result));
+	igt_assert_eq(result, 1);
+
+	/* Now check that we can't write to vm[0] from second fd/vm */
+	write_to_address(other, ctx[1], obj[0].offset, 2);
+	gem_read(i915, obj[0].handle, 0, &result, sizeof(result));
+	igt_assert_eq(result, 1);
+
+	close(other);
+
+	gem_close(i915, obj[1].handle);
+	gem_close(i915, obj[0].handle);
+
+	gem_context_destroy(i915, ctx[0]);
+	gem_vm_destroy(i915, vm[0]);
+}
+
+static void async_destroy(int i915)
+{
+	struct drm_i915_gem_context_param arg = {
+		.ctx_id = gem_context_create(i915),
+		.value = gem_vm_create(i915),
+		.param = I915_CONTEXT_PARAM_VM,
+	};
+	igt_spin_t *spin[2];
+
+	spin[0] = igt_spin_new(i915,
+			       .ctx = arg.ctx_id,
+			       .flags = IGT_SPIN_POLL_RUN);
+	igt_spin_busywait_until_started(spin[0]);
+
+	gem_context_set_param(i915, &arg);
+	spin[1] = __igt_spin_new(i915, .ctx = arg.ctx_id);
+
+	igt_spin_end(spin[0]);
+	gem_sync(i915, spin[0]->handle);
+
+	gem_vm_destroy(i915, arg.value);
+	gem_context_destroy(i915, arg.ctx_id);
+
+	igt_spin_end(spin[1]);
+	gem_sync(i915, spin[1]->handle);
+
+	for (int i = 0; i < ARRAY_SIZE(spin); i++)
+		igt_spin_free(i915, spin[i]);
+}
+
+igt_main
+{
+	int i915 = -1;
+
+	igt_fixture {
+		i915 = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(i915);
+		igt_require(has_vm(i915));
+	}
+
+	igt_subtest("invalid-create")
+		invalid_create(i915);
+
+	igt_subtest("invalid-destroy")
+		invalid_destroy(i915);
+
+	igt_subtest_group {
+		igt_fixture {
+			gem_context_require_param(i915, I915_CONTEXT_PARAM_VM);
+		}
+
+		igt_subtest("execbuf")
+			execbuf(i915);
+
+		igt_subtest("isolation")
+			isolation(i915);
+
+		igt_subtest("create-ext")
+			create_ext(i915);
+
+		igt_subtest("async-destroy")
+			async_destroy(i915);
+	}
+
+	igt_fixture {
+		close(i915);
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index 711979b4a..e7dbc5756 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -210,6 +210,7 @@ i915_progs = [
 	'gem_unfence_active_buffers',
 	'gem_unref_active_buffers',
 	'gem_userptr_blits',
+	'gem_vm_create',
 	'gem_wait',
 	'gem_workarounds',
 	'gem_write_read_ring_switch',
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [igt-dev] [PATCH i-g-t 03/16] i915: Add gem_vm_create
@ 2019-05-08 10:09   ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Tvrtko Ursulin

Exercise basic creation and swapping between new address spaces.

v2: Check isolation that the same vm_id on different fd are indeed
different VM.
v3: Cross-over check with CREATE_EXT_SETPARAM

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 lib/Makefile.sources       |   2 +
 lib/i915/gem_vm.c          | 130 ++++++++++++
 lib/i915/gem_vm.h          |  38 ++++
 lib/meson.build            |   1 +
 tests/Makefile.sources     |   1 +
 tests/i915/gem_vm_create.c | 412 +++++++++++++++++++++++++++++++++++++
 tests/meson.build          |   1 +
 7 files changed, 585 insertions(+)
 create mode 100644 lib/i915/gem_vm.c
 create mode 100644 lib/i915/gem_vm.h
 create mode 100644 tests/i915/gem_vm_create.c

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 976858238..891f65b96 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -13,6 +13,8 @@ lib_source_list =	 	\
 	i915/gem_ring.c	\
 	i915/gem_mman.c	\
 	i915/gem_mman.h	\
+	i915/gem_vm.c	\
+	i915/gem_vm.h	\
 	i915_3d.h		\
 	i915_reg.h		\
 	i915_pciids.h		\
diff --git a/lib/i915/gem_vm.c b/lib/i915/gem_vm.c
new file mode 100644
index 000000000..9a022a56c
--- /dev/null
+++ b/lib/i915/gem_vm.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <errno.h>
+#include <string.h>
+
+#include "ioctl_wrappers.h"
+#include "drmtest.h"
+
+#include "i915/gem_vm.h"
+
+/**
+ * SECTION:gem_vm
+ * @short_description: Helpers for dealing with address spaces (vm/GTT)
+ * @title: GEM Virtual Memory
+ *
+ * This helper library contains functions used for handling gem address
+ * spaces.
+ */
+
+/**
+ * gem_has_vm:
+ * @i915: open i915 drm file descriptor
+ *
+ * Returns: whether VM creation is supported or not.
+ */
+bool gem_has_vm(int i915)
+{
+	uint32_t vm_id = 0;
+
+	__gem_vm_create(i915, &vm_id);
+	if (vm_id)
+		gem_vm_destroy(i915, vm_id);
+
+	return vm_id;
+}
+
+/**
+ * gem_require_vm:
+ * @i915: open i915 drm file descriptor
+ *
+ * This helper will automatically skip the test on platforms where address
+ * space creation is not available.
+ */
+void gem_require_vm(int i915)
+{
+	igt_require(gem_has_vm(i915));
+}
+
+int __gem_vm_create(int i915, uint32_t *vm_id)
+{
+       struct drm_i915_gem_vm_control ctl = {};
+       int err = 0;
+
+       if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_VM_CREATE, &ctl) == 0) {
+               *vm_id = ctl.vm_id;
+       } else {
+	       err = -errno;
+	       igt_assume(err != 0);
+       }
+
+       errno = 0;
+       return err;
+}
+
+/**
+ * gem_vm_create:
+ * @i915: open i915 drm file descriptor
+ *
+ * This wraps the VM_CREATE ioctl, which is used to allocate a new
+ * address space for use with GEM contexts.
+ *
+ * Returns: The id of the allocated address space.
+ */
+uint32_t gem_vm_create(int i915)
+{
+	uint32_t vm_id;
+
+	igt_assert_eq(__gem_vm_create(i915, &vm_id), 0);
+	igt_assert(vm_id != 0);
+
+	return vm_id;
+}
+
+int __gem_vm_destroy(int i915, uint32_t vm_id)
+{
+	struct drm_i915_gem_vm_control ctl = { .vm_id = vm_id };
+	int err = 0;
+
+	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_VM_DESTROY, &ctl)) {
+		err = -errno;
+		igt_assume(err);
+	}
+
+	errno = 0;
+	return err;
+}
+
+/**
+ * gem_vm_destroy:
+ * @i915: open i915 drm file descriptor
+ * @vm_id: i915 VM id
+ *
+ * This wraps the VM_DESTROY ioctl, which is used to free an address space
+ * handle.
+ */
+void gem_vm_destroy(int i915, uint32_t vm_id)
+{
+	igt_assert_eq(__gem_vm_destroy(i915, vm_id), 0);
+}
diff --git a/lib/i915/gem_vm.h b/lib/i915/gem_vm.h
new file mode 100644
index 000000000..27af899d4
--- /dev/null
+++ b/lib/i915/gem_vm.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef GEM_VM_H
+#define GEM_VM_H
+
+#include <stdint.h>
+
+bool gem_has_vm(int i915);
+void gem_require_vm(int i915);
+
+uint32_t gem_vm_create(int i915);
+int __gem_vm_create(int i915, uint32_t *vm_id);
+
+void gem_vm_destroy(int i915, uint32_t vm_id);
+int __gem_vm_destroy(int i915, uint32_t vm_id);
+
+#endif /* GEM_VM_H */
diff --git a/lib/meson.build b/lib/meson.build
index 786e7a264..d057cb573 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -5,6 +5,7 @@ lib_sources = [
 	'i915/gem_submission.c',
 	'i915/gem_ring.c',
 	'i915/gem_mman.c',
+	'i915/gem_vm.c',
 	'igt_color_encoding.c',
 	'igt_debugfs.c',
 	'igt_device.c',
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 7f921f6c5..1a541d206 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -21,6 +21,7 @@ TESTS_progs = \
 	drm_import_export \
 	drm_mm \
 	drm_read \
+	i915/gem_vm_create \
 	kms_3d \
 	kms_addfb_basic \
 	kms_atomic \
diff --git a/tests/i915/gem_vm_create.c b/tests/i915/gem_vm_create.c
new file mode 100644
index 000000000..cbd273d9d
--- /dev/null
+++ b/tests/i915/gem_vm_create.c
@@ -0,0 +1,412 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "igt.h"
+#include "igt_dummyload.h"
+#include "i915/gem_vm.h"
+
+static int vm_create_ioctl(int i915, struct drm_i915_gem_vm_control *ctl)
+{
+	int err = 0;
+	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_VM_CREATE, ctl)) {
+		err = -errno;
+		igt_assume(err);
+	}
+	errno = 0;
+	return err;
+}
+
+static int vm_destroy_ioctl(int i915, struct drm_i915_gem_vm_control *ctl)
+{
+	int err = 0;
+	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_VM_DESTROY, ctl)) {
+		err = -errno;
+		igt_assume(err);
+	}
+	errno = 0;
+	return err;
+}
+
+static int ctx_create_ioctl(int i915,
+			    struct drm_i915_gem_context_create_ext *arg)
+{
+	int err = 0;
+	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
+		err = -errno;
+		igt_assume(err);
+	}
+	errno = 0;
+	return err;
+}
+
+static bool has_vm(int i915)
+{
+	struct drm_i915_gem_vm_control ctl = {};
+	int err;
+
+	err = vm_create_ioctl(i915, &ctl);
+	switch (err) {
+	case -EINVAL: /* unknown ioctl */
+	case -ENODEV: /* !full-ppgtt */
+		return false;
+
+	case 0:
+		gem_vm_destroy(i915, ctl.vm_id);
+		return true;
+
+	default:
+		igt_fail_on_f(err, "Unknown response from VM_CREATE\n");
+		return false;
+	}
+}
+
+static void invalid_create(int i915)
+{
+	struct drm_i915_gem_vm_control ctl = {};
+	struct i915_user_extension ext = { .name = -1 };
+
+	igt_assert_eq(vm_create_ioctl(i915, &ctl), 0);
+	gem_vm_destroy(i915, ctl.vm_id);
+
+	ctl.vm_id = 0xdeadbeef;
+	igt_assert_eq(vm_create_ioctl(i915, &ctl), 0);
+	gem_vm_destroy(i915, ctl.vm_id);
+	ctl.vm_id = 0;
+
+	ctl.flags = -1;
+	igt_assert_eq(vm_create_ioctl(i915, &ctl), -EINVAL);
+	ctl.flags = 0;
+
+	ctl.extensions = -1;
+	igt_assert_eq(vm_create_ioctl(i915, &ctl), -EFAULT);
+	ctl.extensions = to_user_pointer(&ext);
+	igt_assert_eq(vm_create_ioctl(i915, &ctl), -EINVAL);
+	ctl.extensions = 0;
+}
+
+static void invalid_destroy(int i915)
+{
+	struct drm_i915_gem_vm_control ctl = {};
+
+	igt_assert_eq(vm_destroy_ioctl(i915, &ctl), -ENOENT);
+
+	igt_assert_eq(vm_create_ioctl(i915, &ctl), 0);
+	igt_assert_eq(vm_destroy_ioctl(i915, &ctl), 0);
+	igt_assert_eq(vm_destroy_ioctl(i915, &ctl), -ENOENT);
+
+	igt_assert_eq(vm_create_ioctl(i915, &ctl), 0);
+	ctl.vm_id = ctl.vm_id + 1; /* assumes no one else allocated */
+	igt_assert_eq(vm_destroy_ioctl(i915, &ctl), -ENOENT);
+	ctl.vm_id = ctl.vm_id - 1;
+	igt_assert_eq(vm_destroy_ioctl(i915, &ctl), 0);
+
+	igt_assert_eq(vm_create_ioctl(i915, &ctl), 0);
+	ctl.flags = -1;
+	igt_assert_eq(vm_destroy_ioctl(i915, &ctl), -EINVAL);
+	ctl.flags = 0;
+	igt_assert_eq(vm_destroy_ioctl(i915, &ctl), 0);
+
+	igt_assert_eq(vm_create_ioctl(i915, &ctl), 0);
+	ctl.extensions = -1;
+	igt_assert_eq(vm_destroy_ioctl(i915, &ctl), -EINVAL);
+	ctl.extensions = 0;
+	igt_assert_eq(vm_destroy_ioctl(i915, &ctl), 0);
+}
+
+static uint32_t __batch_create(int i915, uint32_t offset)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	uint32_t handle;
+
+	handle = gem_create(i915, ALIGN(offset + 4, 4096));
+	gem_write(i915, handle, offset, &bbe, sizeof(bbe));
+
+	return handle;
+}
+
+static uint32_t batch_create(int i915)
+{
+	return __batch_create(i915, 0);
+}
+
+static void check_same_vm(int i915, uint32_t ctx_a, uint32_t ctx_b)
+{
+	struct drm_i915_gem_exec_object2 batch = {
+		.handle = batch_create(i915),
+	};
+	struct drm_i915_gem_execbuffer2 eb = {
+		.buffers_ptr = to_user_pointer(&batch),
+		.buffer_count = 1,
+	};
+
+	/* First verify that we try to use "softpinning" by default */
+	batch.offset = 48 << 20;
+	eb.rsvd1 = ctx_a;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, 48 << 20);
+
+	/* An already active VMA will try to keep its offset */
+	batch.offset = 0;
+	eb.rsvd1 = ctx_b;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, 48 << 20);
+
+	gem_sync(i915, batch.handle);
+	gem_close(i915, batch.handle);
+}
+
+static void create_ext(int i915)
+{
+	struct drm_i915_gem_context_create_ext_setparam ext = {
+		{ .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
+		{ .param = I915_CONTEXT_PARAM_VM }
+	};
+	struct drm_i915_gem_context_create_ext create = {
+		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS
+	};
+	uint32_t ctx[2];
+
+	igt_require(ctx_create_ioctl(i915, &create) == 0);
+	gem_context_destroy(i915, create.ctx_id);
+
+	create.extensions = to_user_pointer(&ext);
+
+	ext.param.value = gem_vm_create(i915);
+
+	igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+	ctx[0] = create.ctx_id;
+
+	igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+	ctx[1] = create.ctx_id;
+
+	gem_vm_destroy(i915, ext.param.value);
+
+	check_same_vm(i915, ctx[0], ctx[1]);
+
+	gem_context_destroy(i915, ctx[1]);
+	gem_context_destroy(i915, ctx[0]);
+}
+
+static void execbuf(int i915)
+{
+	struct drm_i915_gem_exec_object2 batch = {
+		.handle = batch_create(i915),
+	};
+	struct drm_i915_gem_execbuffer2 eb = {
+		.buffers_ptr = to_user_pointer(&batch),
+		.buffer_count = 1,
+	};
+	struct drm_i915_gem_context_param arg = {
+		.param = I915_CONTEXT_PARAM_VM,
+	};
+
+	/* First verify that we try to use "softpinning" by default */
+	batch.offset = 48 << 20;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, 48 << 20);
+
+	arg.value = gem_vm_create(i915);
+	gem_context_set_param(i915, &arg);
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, 48 << 20);
+	gem_vm_destroy(i915, arg.value);
+
+	arg.value = gem_vm_create(i915);
+	gem_context_set_param(i915, &arg);
+	batch.offset = 0;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, 0);
+	gem_vm_destroy(i915, arg.value);
+
+	gem_sync(i915, batch.handle);
+	gem_close(i915, batch.handle);
+}
+
+static void
+write_to_address(int fd, uint32_t ctx, uint64_t addr, uint32_t value)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_exec_object2 batch = {
+		.handle = gem_create(fd, 4096)
+	};
+	struct drm_i915_gem_execbuffer2 eb = {
+		.buffers_ptr = to_user_pointer(&batch),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+	};
+	uint32_t cs[16];
+	int i;
+
+	i = 0;
+	cs[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		cs[++i] = addr;
+		cs[++i] = addr >> 32;
+	} else if (gen >= 4) {
+		cs[++i] = 0;
+		cs[++i] = addr;
+	} else {
+		cs[i]--;
+		cs[++i] = addr;
+	}
+	cs[++i] = value;
+	cs[++i] = MI_BATCH_BUFFER_END;
+	gem_write(fd, batch.handle, 0, cs, sizeof(cs));
+
+	gem_execbuf(fd, &eb);
+	igt_assert(batch.offset != addr);
+
+	gem_sync(fd, batch.handle);
+	gem_close(fd, batch.handle);
+}
+
+static void isolation(int i915)
+{
+	struct drm_i915_gem_exec_object2 obj[2] = {
+		{
+			.handle = gem_create(i915, 4096),
+			.offset = 1 << 20
+		},
+		{ .handle = batch_create(i915), }
+	};
+	struct drm_i915_gem_execbuffer2 eb = {
+		.buffers_ptr = to_user_pointer(obj),
+		.buffer_count = 2,
+	};
+	struct drm_i915_gem_context_param arg = {
+		.param = I915_CONTEXT_PARAM_VM,
+	};
+	int other = gem_reopen_driver(i915);
+	uint32_t ctx[2], vm[2], result;
+	int loops = 4096;
+
+	/* An vm_id on one fd is not the same on another fd */
+	igt_assert_neq(i915, other);
+
+	ctx[0] = gem_context_create(i915);
+	ctx[1] = gem_context_create(other);
+
+	vm[0] = gem_vm_create(i915);
+	do {
+		vm[1] = gem_vm_create(other);
+	} while (vm[1] != vm[0] && loops-- > 0);
+	igt_assert(loops);
+
+	arg.ctx_id = ctx[0];
+	arg.value = vm[0];
+	gem_context_set_param(i915, &arg);
+
+	arg.ctx_id = ctx[1];
+	arg.value = vm[1];
+	gem_context_set_param(other, &arg);
+
+	eb.rsvd1 = ctx[0];
+	gem_execbuf(i915, &eb); /* bind object into vm[0] */
+
+	/* Verify the trick with the assumed target address works */
+	write_to_address(i915, ctx[0], obj[0].offset, 1);
+	gem_read(i915, obj[0].handle, 0, &result, sizeof(result));
+	igt_assert_eq(result, 1);
+
+	/* Now check that we can't write to vm[0] from second fd/vm */
+	write_to_address(other, ctx[1], obj[0].offset, 2);
+	gem_read(i915, obj[0].handle, 0, &result, sizeof(result));
+	igt_assert_eq(result, 1);
+
+	close(other);
+
+	gem_close(i915, obj[1].handle);
+	gem_close(i915, obj[0].handle);
+
+	gem_context_destroy(i915, ctx[0]);
+	gem_vm_destroy(i915, vm[0]);
+}
+
+static void async_destroy(int i915)
+{
+	struct drm_i915_gem_context_param arg = {
+		.ctx_id = gem_context_create(i915),
+		.value = gem_vm_create(i915),
+		.param = I915_CONTEXT_PARAM_VM,
+	};
+	igt_spin_t *spin[2];
+
+	spin[0] = igt_spin_new(i915,
+			       .ctx = arg.ctx_id,
+			       .flags = IGT_SPIN_POLL_RUN);
+	igt_spin_busywait_until_started(spin[0]);
+
+	gem_context_set_param(i915, &arg);
+	spin[1] = __igt_spin_new(i915, .ctx = arg.ctx_id);
+
+	igt_spin_end(spin[0]);
+	gem_sync(i915, spin[0]->handle);
+
+	gem_vm_destroy(i915, arg.value);
+	gem_context_destroy(i915, arg.ctx_id);
+
+	igt_spin_end(spin[1]);
+	gem_sync(i915, spin[1]->handle);
+
+	for (int i = 0; i < ARRAY_SIZE(spin); i++)
+		igt_spin_free(i915, spin[i]);
+}
+
+igt_main
+{
+	int i915 = -1;
+
+	igt_fixture {
+		i915 = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(i915);
+		igt_require(has_vm(i915));
+	}
+
+	igt_subtest("invalid-create")
+		invalid_create(i915);
+
+	igt_subtest("invalid-destroy")
+		invalid_destroy(i915);
+
+	igt_subtest_group {
+		igt_fixture {
+			gem_context_require_param(i915, I915_CONTEXT_PARAM_VM);
+		}
+
+		igt_subtest("execbuf")
+			execbuf(i915);
+
+		igt_subtest("isolation")
+			isolation(i915);
+
+		igt_subtest("create-ext")
+			create_ext(i915);
+
+		igt_subtest("async-destroy")
+			async_destroy(i915);
+	}
+
+	igt_fixture {
+		close(i915);
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index 711979b4a..e7dbc5756 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -210,6 +210,7 @@ i915_progs = [
 	'gem_unfence_active_buffers',
 	'gem_unref_active_buffers',
 	'gem_userptr_blits',
+	'gem_vm_create',
 	'gem_wait',
 	'gem_workarounds',
 	'gem_write_read_ring_switch',
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH i-g-t 04/16] i915/gem_ctx_param: Test set/get (copy) VM
  2019-05-08 10:09 ` [igt-dev] " Chris Wilson
@ 2019-05-08 10:09   ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Exercise reusing the GTT of one ctx in another.

v2: Test setting back to the same VM
v3: Check the VM still exists after the parent ctx are dead.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/i915/gem_ctx_param.c | 107 ++++++++++++++++++++++++++++++++-----
 1 file changed, 95 insertions(+), 12 deletions(-)

diff --git a/tests/i915/gem_ctx_param.c b/tests/i915/gem_ctx_param.c
index b6f57236c..d949cef32 100644
--- a/tests/i915/gem_ctx_param.c
+++ b/tests/i915/gem_ctx_param.c
@@ -28,6 +28,7 @@
 #include <limits.h>
 
 #include "igt.h"
+#include "i915/gem_vm.h"
 
 IGT_TEST_DESCRIPTION("Basic test for context set/get param input validation.");
 
@@ -36,17 +37,6 @@ IGT_TEST_DESCRIPTION("Basic test for context set/get param input validation.");
 #define NEW_CTX	BIT(0)
 #define USER BIT(1)
 
-static int reopen_driver(int fd)
-{
-	char path[256];
-
-	snprintf(path, sizeof(path), "/proc/self/fd/%d", fd);
-	fd = open(path, O_RDWR);
-	igt_assert_lte(0, fd);
-
-	return fd;
-}
-
 static void set_priority(int i915)
 {
 	static const int64_t test_values[] = {
@@ -91,7 +81,7 @@ static void set_priority(int i915)
 	igt_permute_array(values, size, igt_exchange_int64);
 
 	igt_fork(flags, NEW_CTX | USER) {
-		int fd = reopen_driver(i915);
+		int fd = gem_reopen_driver(i915);
 		struct drm_i915_gem_context_param arg = {
 			.param = I915_CONTEXT_PARAM_PRIORITY,
 			.ctx_id = flags & NEW_CTX ? gem_context_create(fd) : 0,
@@ -143,6 +133,96 @@ static void set_priority(int i915)
 	free(values);
 }
 
+static uint32_t __batch_create(int i915, uint32_t offset)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	uint32_t handle;
+
+	handle = gem_create(i915, ALIGN(offset + 4, 4096));
+	gem_write(i915, handle, offset, &bbe, sizeof(bbe));
+
+	return handle;
+}
+
+static uint32_t batch_create(int i915)
+{
+	return __batch_create(i915, 0);
+}
+
+static void test_vm(int i915)
+{
+	const uint64_t nonzero_offset = 48 << 20;
+	struct drm_i915_gem_exec_object2 batch = {
+		.handle = batch_create(i915),
+	};
+	struct drm_i915_gem_execbuffer2 eb = {
+		.buffers_ptr = to_user_pointer(&batch),
+		.buffer_count = 1,
+	};
+	struct drm_i915_gem_context_param arg = {
+		.param = I915_CONTEXT_PARAM_VM,
+	};
+	uint32_t parent, child;
+
+	arg.value = -1ull;
+	igt_require(__gem_context_set_param(i915, &arg) == -ENOENT);
+
+	parent = gem_context_create(i915);
+	child = gem_context_create(i915);
+
+	/* Using implicit soft-pinning */
+	eb.rsvd1 = parent;
+	batch.offset = nonzero_offset;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, nonzero_offset);
+
+	eb.rsvd1 = child;
+	batch.offset = 0;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, 0);
+
+	eb.rsvd1 = parent;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, nonzero_offset);
+
+	arg.ctx_id = parent;
+	gem_context_get_param(i915, &arg);
+	gem_context_set_param(i915, &arg);
+
+	/* Still the same VM, so expect the old VMA again */
+	batch.offset = 0;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, nonzero_offset);
+
+	arg.ctx_id = child;
+	gem_context_set_param(i915, &arg);
+
+	eb.rsvd1 = child;
+	batch.offset = 0;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, nonzero_offset);
+
+	gem_context_destroy(i915, child);
+	gem_context_destroy(i915, parent);
+
+	/* both contexts destroyed, but we still keep hold of the vm */
+	child = gem_context_create(i915);
+
+	arg.ctx_id = child;
+	gem_context_set_param(i915, &arg);
+
+	eb.rsvd1 = child;
+	batch.offset = 0;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, nonzero_offset);
+
+	gem_context_destroy(i915, child);
+	gem_vm_destroy(i915, arg.value);
+
+	gem_sync(i915, batch.handle);
+	gem_close(i915, batch.handle);
+}
+
 igt_main
 {
 	struct drm_i915_gem_context_param arg;
@@ -253,6 +333,9 @@ igt_main
 		gem_context_set_param(fd, &arg);
 	}
 
+	igt_subtest("vm")
+		test_vm(fd);
+
 	arg.param = I915_CONTEXT_PARAM_PRIORITY;
 
 	igt_subtest("set-priority-not-supported") {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [igt-dev] [PATCH i-g-t 04/16] i915/gem_ctx_param: Test set/get (copy) VM
@ 2019-05-08 10:09   ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Tvrtko Ursulin

Exercise reusing the GTT of one ctx in another.

v2: Test setting back to the same VM
v3: Check the VM still exists after the parent ctx are dead.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/i915/gem_ctx_param.c | 107 ++++++++++++++++++++++++++++++++-----
 1 file changed, 95 insertions(+), 12 deletions(-)

diff --git a/tests/i915/gem_ctx_param.c b/tests/i915/gem_ctx_param.c
index b6f57236c..d949cef32 100644
--- a/tests/i915/gem_ctx_param.c
+++ b/tests/i915/gem_ctx_param.c
@@ -28,6 +28,7 @@
 #include <limits.h>
 
 #include "igt.h"
+#include "i915/gem_vm.h"
 
 IGT_TEST_DESCRIPTION("Basic test for context set/get param input validation.");
 
@@ -36,17 +37,6 @@ IGT_TEST_DESCRIPTION("Basic test for context set/get param input validation.");
 #define NEW_CTX	BIT(0)
 #define USER BIT(1)
 
-static int reopen_driver(int fd)
-{
-	char path[256];
-
-	snprintf(path, sizeof(path), "/proc/self/fd/%d", fd);
-	fd = open(path, O_RDWR);
-	igt_assert_lte(0, fd);
-
-	return fd;
-}
-
 static void set_priority(int i915)
 {
 	static const int64_t test_values[] = {
@@ -91,7 +81,7 @@ static void set_priority(int i915)
 	igt_permute_array(values, size, igt_exchange_int64);
 
 	igt_fork(flags, NEW_CTX | USER) {
-		int fd = reopen_driver(i915);
+		int fd = gem_reopen_driver(i915);
 		struct drm_i915_gem_context_param arg = {
 			.param = I915_CONTEXT_PARAM_PRIORITY,
 			.ctx_id = flags & NEW_CTX ? gem_context_create(fd) : 0,
@@ -143,6 +133,96 @@ static void set_priority(int i915)
 	free(values);
 }
 
+static uint32_t __batch_create(int i915, uint32_t offset)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	uint32_t handle;
+
+	handle = gem_create(i915, ALIGN(offset + 4, 4096));
+	gem_write(i915, handle, offset, &bbe, sizeof(bbe));
+
+	return handle;
+}
+
+static uint32_t batch_create(int i915)
+{
+	return __batch_create(i915, 0);
+}
+
+static void test_vm(int i915)
+{
+	const uint64_t nonzero_offset = 48 << 20;
+	struct drm_i915_gem_exec_object2 batch = {
+		.handle = batch_create(i915),
+	};
+	struct drm_i915_gem_execbuffer2 eb = {
+		.buffers_ptr = to_user_pointer(&batch),
+		.buffer_count = 1,
+	};
+	struct drm_i915_gem_context_param arg = {
+		.param = I915_CONTEXT_PARAM_VM,
+	};
+	uint32_t parent, child;
+
+	arg.value = -1ull;
+	igt_require(__gem_context_set_param(i915, &arg) == -ENOENT);
+
+	parent = gem_context_create(i915);
+	child = gem_context_create(i915);
+
+	/* Using implicit soft-pinning */
+	eb.rsvd1 = parent;
+	batch.offset = nonzero_offset;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, nonzero_offset);
+
+	eb.rsvd1 = child;
+	batch.offset = 0;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, 0);
+
+	eb.rsvd1 = parent;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, nonzero_offset);
+
+	arg.ctx_id = parent;
+	gem_context_get_param(i915, &arg);
+	gem_context_set_param(i915, &arg);
+
+	/* Still the same VM, so expect the old VMA again */
+	batch.offset = 0;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, nonzero_offset);
+
+	arg.ctx_id = child;
+	gem_context_set_param(i915, &arg);
+
+	eb.rsvd1 = child;
+	batch.offset = 0;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, nonzero_offset);
+
+	gem_context_destroy(i915, child);
+	gem_context_destroy(i915, parent);
+
+	/* both contexts destroyed, but we still keep hold of the vm */
+	child = gem_context_create(i915);
+
+	arg.ctx_id = child;
+	gem_context_set_param(i915, &arg);
+
+	eb.rsvd1 = child;
+	batch.offset = 0;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, nonzero_offset);
+
+	gem_context_destroy(i915, child);
+	gem_vm_destroy(i915, arg.value);
+
+	gem_sync(i915, batch.handle);
+	gem_close(i915, batch.handle);
+}
+
 igt_main
 {
 	struct drm_i915_gem_context_param arg;
@@ -253,6 +333,9 @@ igt_main
 		gem_context_set_param(fd, &arg);
 	}
 
+	igt_subtest("vm")
+		test_vm(fd);
+
 	arg.param = I915_CONTEXT_PARAM_PRIORITY;
 
 	igt_subtest("set-priority-not-supported") {
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH i-g-t 05/16] i915/gem_ctx_create: Basic checks for constructor properties
  2019-05-08 10:09 ` [igt-dev] " Chris Wilson
@ 2019-05-08 10:09   ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Check that the extended create interface accepts setparam.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_ctx_create.c | 225 ++++++++++++++++++++++++++++++++++--
 1 file changed, 213 insertions(+), 12 deletions(-)

diff --git a/tests/i915/gem_ctx_create.c b/tests/i915/gem_ctx_create.c
index a664070db..9b4fddbe7 100644
--- a/tests/i915/gem_ctx_create.c
+++ b/tests/i915/gem_ctx_create.c
@@ -33,6 +33,7 @@
 #include <time.h>
 
 #include "igt_rand.h"
+#include "sw_sync.h"
 
 #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
 #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
@@ -45,12 +46,33 @@ static unsigned all_nengine;
 static unsigned ppgtt_engines[16];
 static unsigned ppgtt_nengine;
 
-static int __gem_context_create_local(int fd, struct drm_i915_gem_context_create *arg)
+static int create_ioctl(int fd, struct drm_i915_gem_context_create *arg)
 {
-	int ret = 0;
-	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg))
-		ret = -errno;
-	return ret;
+	int err;
+
+	err = 0;
+	if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg)) {
+		err = -errno;
+		igt_assert(err);
+	}
+
+	errno = 0;
+	return err;
+}
+
+static int create_ext_ioctl(int i915,
+			    struct drm_i915_gem_context_create_ext *arg)
+{
+	int err;
+
+	err = 0;
+	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
+		err = -errno;
+		igt_assume(err);
+	}
+
+	errno = 0;
+	return err;
 }
 
 static double elapsed(const struct timespec *start,
@@ -308,6 +330,187 @@ static void maximum(int fd, int ncpus, unsigned mode)
 	free(contexts);
 }
 
+static void basic_ext_param(int i915)
+{
+	struct drm_i915_gem_context_create_ext_setparam ext = {
+		{ .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
+	};
+	struct drm_i915_gem_context_create_ext create = {
+		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS
+	};
+	struct drm_i915_gem_context_param get;
+
+	igt_require(create_ext_ioctl(i915, &create) == 0);
+	gem_context_destroy(i915, create.ctx_id);
+
+	create.extensions = -1ull;
+	igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
+
+	create.extensions = to_user_pointer(&ext);
+	igt_assert_eq(create_ext_ioctl(i915, &create), -EINVAL);
+
+	ext.param.param = I915_CONTEXT_PARAM_PRIORITY;
+	if (create_ext_ioctl(i915, &create) != -ENODEV) {
+		gem_context_destroy(i915, create.ctx_id);
+
+		ext.base.next_extension = -1ull;
+		igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
+		ext.base.next_extension = to_user_pointer(&ext);
+		igt_assert_eq(create_ext_ioctl(i915, &create), -E2BIG);
+		ext.base.next_extension = 0;
+
+		ext.param.value = 32;
+		igt_assert_eq(create_ext_ioctl(i915, &create), 0);
+
+		memset(&get, 0, sizeof(get));
+		get.ctx_id = create.ctx_id;
+		get.param = I915_CONTEXT_PARAM_PRIORITY;
+		gem_context_get_param(i915, &get);
+		igt_assert_eq(get.value, ext.param.value);
+
+		gem_context_destroy(i915, create.ctx_id);
+	}
+}
+
+static void check_single_timeline(int i915, uint32_t ctx, int num_engines)
+{
+#define RCS_TIMESTAMP (0x2000 + 0x358)
+	const int gen = intel_gen(intel_get_drm_devid(i915));
+	const int has_64bit_reloc = gen >= 8;
+	struct drm_i915_gem_exec_object2 results = { .handle = gem_create(i915, 4096) };
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	int timeline = sw_sync_timeline_create();
+	uint32_t last, *map;
+
+	{
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(&results),
+			.buffer_count = 1,
+			.rsvd1 = ctx,
+		};
+		gem_write(i915, results.handle, 0, &bbe, sizeof(bbe));
+		gem_execbuf(i915, &execbuf);
+		results.flags = EXEC_OBJECT_PINNED;
+	}
+
+	for (int i = 0; i < num_engines; i++) {
+		struct drm_i915_gem_exec_object2 obj[2] = {
+			results, /* write hazard lies! */
+			{ .handle = gem_create(i915, 4096) },
+		};
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(obj),
+			.buffer_count = 2,
+			.rsvd1 = ctx,
+			.rsvd2 = sw_sync_timeline_create_fence(timeline, num_engines - i),
+			.flags = i | I915_EXEC_FENCE_IN,
+		};
+		uint64_t offset = results.offset + 4 * i;
+		uint32_t *cs;
+		int j = 0;
+
+		cs = gem_mmap__cpu(i915, obj[1].handle, 0, 4096, PROT_WRITE);
+
+		cs[j] = 0x24 << 23 | 1; /* SRM */
+		if (has_64bit_reloc)
+			cs[j]++;
+		j++;
+		cs[j++] = RCS_TIMESTAMP;
+		cs[j++] = offset;
+		if (has_64bit_reloc)
+			cs[j++] = offset >> 32;
+		cs[j++] = MI_BATCH_BUFFER_END;
+
+		munmap(cs, 4096);
+
+		gem_execbuf(i915, &execbuf);
+		gem_close(i915, obj[1].handle);
+		close(execbuf.rsvd2);
+	}
+	close(timeline);
+	gem_sync(i915, results.handle);
+
+	map = gem_mmap__cpu(i915, results.handle, 0, 4096, PROT_READ);
+	gem_set_domain(i915, results.handle, I915_GEM_DOMAIN_CPU, 0);
+	gem_close(i915, results.handle);
+
+	last = map[0];
+	for (int i = 1; i < num_engines; i++) {
+		igt_assert_f((map[i] - last) > 0,
+			     "Engine instance [%d] executed too early: this:%x, last:%x\n",
+			     i, map[i], last);
+		last = map[i];
+	}
+	munmap(map, 4096);
+}
+
+static void iris_pipeline(int i915)
+{
+#ifdef I915_DEFINE_CONTEXT_PARAM_ENGINES
+#define RCS0 {0, 0}
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = {
+		.engines = { RCS0, RCS0 }
+	};
+	struct drm_i915_gem_context_create_ext_setparam p_engines = {
+		.base = {
+			.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
+			.next_extension = 0, /* end of chain */
+		},
+		.param = {
+			.param = I915_CONTEXT_PARAM_ENGINES,
+			.value = to_user_pointer(&engines),
+			.size = sizeof(engines),
+		},
+	};
+	struct drm_i915_gem_context_create_ext_setparam p_recover = {
+		.base = {
+			.name =I915_CONTEXT_CREATE_EXT_SETPARAM,
+			.next_extension = to_user_pointer(&p_engines),
+		},
+		.param = {
+			.param = I915_CONTEXT_PARAM_RECOVERABLE,
+			.value = 0,
+		},
+	};
+	struct drm_i915_gem_context_create_ext_setparam p_prio = {
+		.base = {
+			.name =I915_CONTEXT_CREATE_EXT_SETPARAM,
+			.next_extension = to_user_pointer(&p_recover),
+		},
+		.param = {
+			.param = I915_CONTEXT_PARAM_PRIORITY,
+			.value = 768,
+		},
+	};
+	struct drm_i915_gem_context_create_ext create = {
+		.flags = (I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE |
+			  I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS),
+	};
+	struct drm_i915_gem_context_param get;
+
+	igt_require(create_ext_ioctl(i915, &create) == 0);
+
+	create.extensions = to_user_pointer(&p_prio);
+	igt_assert_eq(create_ext_ioctl(i915, &create), 0);
+
+	memset(&get, 0, sizeof(get));
+	get.ctx_id = create.ctx_id;
+	get.param = I915_CONTEXT_PARAM_PRIORITY;
+	gem_context_get_param(i915, &get);
+	igt_assert_eq(get.value, p_prio.param.value);
+
+	memset(&get, 0, sizeof(get));
+	get.ctx_id = create.ctx_id;
+	get.param = I915_CONTEXT_PARAM_RECOVERABLE;
+	gem_context_get_param(i915, &get);
+	igt_assert_eq(get.value, 0);
+
+	check_single_timeline(i915, create.ctx_id, 2);
+
+	gem_context_destroy(i915, create.ctx_id);
+#endif /* I915_DEFINE_CONTEXT_PARAM_ENGINES */
+}
+
 igt_main
 {
 	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
@@ -340,17 +543,15 @@ igt_main
 		memset(&create, 0, sizeof(create));
 		create.ctx_id = rand();
 		create.pad = 0;
-		igt_assert_eq(__gem_context_create_local(fd, &create), 0);
+		igt_assert_eq(create_ioctl(fd, &create), 0);
 		igt_assert(create.ctx_id != 0);
 		gem_context_destroy(fd, create.ctx_id);
 	}
 
-	igt_subtest("invalid-pad") {
-		memset(&create, 0, sizeof(create));
-		create.ctx_id = rand();
-		create.pad = 1;
-		igt_assert_eq(__gem_context_create_local(fd, &create), -EINVAL);
-	}
+	igt_subtest("ext-param")
+		basic_ext_param(fd);
+	igt_subtest("iris-pipeline")
+		iris_pipeline(fd);
 
 	igt_subtest("maximum-mem")
 		maximum(fd, ncpus, CHECK_RAM);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [igt-dev] [PATCH i-g-t 05/16] i915/gem_ctx_create: Basic checks for constructor properties
@ 2019-05-08 10:09   ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Check that the extended create interface accepts setparam.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_ctx_create.c | 225 ++++++++++++++++++++++++++++++++++--
 1 file changed, 213 insertions(+), 12 deletions(-)

diff --git a/tests/i915/gem_ctx_create.c b/tests/i915/gem_ctx_create.c
index a664070db..9b4fddbe7 100644
--- a/tests/i915/gem_ctx_create.c
+++ b/tests/i915/gem_ctx_create.c
@@ -33,6 +33,7 @@
 #include <time.h>
 
 #include "igt_rand.h"
+#include "sw_sync.h"
 
 #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
 #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
@@ -45,12 +46,33 @@ static unsigned all_nengine;
 static unsigned ppgtt_engines[16];
 static unsigned ppgtt_nengine;
 
-static int __gem_context_create_local(int fd, struct drm_i915_gem_context_create *arg)
+static int create_ioctl(int fd, struct drm_i915_gem_context_create *arg)
 {
-	int ret = 0;
-	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg))
-		ret = -errno;
-	return ret;
+	int err;
+
+	err = 0;
+	if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg)) {
+		err = -errno;
+		igt_assert(err);
+	}
+
+	errno = 0;
+	return err;
+}
+
+static int create_ext_ioctl(int i915,
+			    struct drm_i915_gem_context_create_ext *arg)
+{
+	int err;
+
+	err = 0;
+	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
+		err = -errno;
+		igt_assume(err);
+	}
+
+	errno = 0;
+	return err;
 }
 
 static double elapsed(const struct timespec *start,
@@ -308,6 +330,187 @@ static void maximum(int fd, int ncpus, unsigned mode)
 	free(contexts);
 }
 
+static void basic_ext_param(int i915)
+{
+	struct drm_i915_gem_context_create_ext_setparam ext = {
+		{ .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
+	};
+	struct drm_i915_gem_context_create_ext create = {
+		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS
+	};
+	struct drm_i915_gem_context_param get;
+
+	igt_require(create_ext_ioctl(i915, &create) == 0);
+	gem_context_destroy(i915, create.ctx_id);
+
+	create.extensions = -1ull;
+	igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
+
+	create.extensions = to_user_pointer(&ext);
+	igt_assert_eq(create_ext_ioctl(i915, &create), -EINVAL);
+
+	ext.param.param = I915_CONTEXT_PARAM_PRIORITY;
+	if (create_ext_ioctl(i915, &create) != -ENODEV) {
+		gem_context_destroy(i915, create.ctx_id);
+
+		ext.base.next_extension = -1ull;
+		igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
+		ext.base.next_extension = to_user_pointer(&ext);
+		igt_assert_eq(create_ext_ioctl(i915, &create), -E2BIG);
+		ext.base.next_extension = 0;
+
+		ext.param.value = 32;
+		igt_assert_eq(create_ext_ioctl(i915, &create), 0);
+
+		memset(&get, 0, sizeof(get));
+		get.ctx_id = create.ctx_id;
+		get.param = I915_CONTEXT_PARAM_PRIORITY;
+		gem_context_get_param(i915, &get);
+		igt_assert_eq(get.value, ext.param.value);
+
+		gem_context_destroy(i915, create.ctx_id);
+	}
+}
+
+static void check_single_timeline(int i915, uint32_t ctx, int num_engines)
+{
+#define RCS_TIMESTAMP (0x2000 + 0x358)
+	const int gen = intel_gen(intel_get_drm_devid(i915));
+	const int has_64bit_reloc = gen >= 8;
+	struct drm_i915_gem_exec_object2 results = { .handle = gem_create(i915, 4096) };
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	int timeline = sw_sync_timeline_create();
+	uint32_t last, *map;
+
+	{
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(&results),
+			.buffer_count = 1,
+			.rsvd1 = ctx,
+		};
+		gem_write(i915, results.handle, 0, &bbe, sizeof(bbe));
+		gem_execbuf(i915, &execbuf);
+		results.flags = EXEC_OBJECT_PINNED;
+	}
+
+	for (int i = 0; i < num_engines; i++) {
+		struct drm_i915_gem_exec_object2 obj[2] = {
+			results, /* write hazard lies! */
+			{ .handle = gem_create(i915, 4096) },
+		};
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(obj),
+			.buffer_count = 2,
+			.rsvd1 = ctx,
+			.rsvd2 = sw_sync_timeline_create_fence(timeline, num_engines - i),
+			.flags = i | I915_EXEC_FENCE_IN,
+		};
+		uint64_t offset = results.offset + 4 * i;
+		uint32_t *cs;
+		int j = 0;
+
+		cs = gem_mmap__cpu(i915, obj[1].handle, 0, 4096, PROT_WRITE);
+
+		cs[j] = 0x24 << 23 | 1; /* SRM */
+		if (has_64bit_reloc)
+			cs[j]++;
+		j++;
+		cs[j++] = RCS_TIMESTAMP;
+		cs[j++] = offset;
+		if (has_64bit_reloc)
+			cs[j++] = offset >> 32;
+		cs[j++] = MI_BATCH_BUFFER_END;
+
+		munmap(cs, 4096);
+
+		gem_execbuf(i915, &execbuf);
+		gem_close(i915, obj[1].handle);
+		close(execbuf.rsvd2);
+	}
+	close(timeline);
+	gem_sync(i915, results.handle);
+
+	map = gem_mmap__cpu(i915, results.handle, 0, 4096, PROT_READ);
+	gem_set_domain(i915, results.handle, I915_GEM_DOMAIN_CPU, 0);
+	gem_close(i915, results.handle);
+
+	last = map[0];
+	for (int i = 1; i < num_engines; i++) {
+		igt_assert_f((map[i] - last) > 0,
+			     "Engine instance [%d] executed too early: this:%x, last:%x\n",
+			     i, map[i], last);
+		last = map[i];
+	}
+	munmap(map, 4096);
+}
+
+static void iris_pipeline(int i915)
+{
+#ifdef I915_DEFINE_CONTEXT_PARAM_ENGINES
+#define RCS0 {0, 0}
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = {
+		.engines = { RCS0, RCS0 }
+	};
+	struct drm_i915_gem_context_create_ext_setparam p_engines = {
+		.base = {
+			.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
+			.next_extension = 0, /* end of chain */
+		},
+		.param = {
+			.param = I915_CONTEXT_PARAM_ENGINES,
+			.value = to_user_pointer(&engines),
+			.size = sizeof(engines),
+		},
+	};
+	struct drm_i915_gem_context_create_ext_setparam p_recover = {
+		.base = {
+			.name =I915_CONTEXT_CREATE_EXT_SETPARAM,
+			.next_extension = to_user_pointer(&p_engines),
+		},
+		.param = {
+			.param = I915_CONTEXT_PARAM_RECOVERABLE,
+			.value = 0,
+		},
+	};
+	struct drm_i915_gem_context_create_ext_setparam p_prio = {
+		.base = {
+			.name =I915_CONTEXT_CREATE_EXT_SETPARAM,
+			.next_extension = to_user_pointer(&p_recover),
+		},
+		.param = {
+			.param = I915_CONTEXT_PARAM_PRIORITY,
+			.value = 768,
+		},
+	};
+	struct drm_i915_gem_context_create_ext create = {
+		.flags = (I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE |
+			  I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS),
+	};
+	struct drm_i915_gem_context_param get;
+
+	igt_require(create_ext_ioctl(i915, &create) == 0);
+
+	create.extensions = to_user_pointer(&p_prio);
+	igt_assert_eq(create_ext_ioctl(i915, &create), 0);
+
+	memset(&get, 0, sizeof(get));
+	get.ctx_id = create.ctx_id;
+	get.param = I915_CONTEXT_PARAM_PRIORITY;
+	gem_context_get_param(i915, &get);
+	igt_assert_eq(get.value, p_prio.param.value);
+
+	memset(&get, 0, sizeof(get));
+	get.ctx_id = create.ctx_id;
+	get.param = I915_CONTEXT_PARAM_RECOVERABLE;
+	gem_context_get_param(i915, &get);
+	igt_assert_eq(get.value, 0);
+
+	check_single_timeline(i915, create.ctx_id, 2);
+
+	gem_context_destroy(i915, create.ctx_id);
+#endif /* I915_DEFINE_CONTEXT_PARAM_ENGINES */
+}
+
 igt_main
 {
 	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
@@ -340,17 +543,15 @@ igt_main
 		memset(&create, 0, sizeof(create));
 		create.ctx_id = rand();
 		create.pad = 0;
-		igt_assert_eq(__gem_context_create_local(fd, &create), 0);
+		igt_assert_eq(create_ioctl(fd, &create), 0);
 		igt_assert(create.ctx_id != 0);
 		gem_context_destroy(fd, create.ctx_id);
 	}
 
-	igt_subtest("invalid-pad") {
-		memset(&create, 0, sizeof(create));
-		create.ctx_id = rand();
-		create.pad = 1;
-		igt_assert_eq(__gem_context_create_local(fd, &create), -EINVAL);
-	}
+	igt_subtest("ext-param")
+		basic_ext_param(fd);
+	igt_subtest("iris-pipeline")
+		iris_pipeline(fd);
 
 	igt_subtest("maximum-mem")
 		maximum(fd, ncpus, CHECK_RAM);
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH i-g-t 06/16] drm-uapi: Import i915_drm.h upto 364df3d04d51
  2019-05-08 10:09 ` [igt-dev] " Chris Wilson
@ 2019-05-08 10:09   ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

commit 364df3d04d51f0aad13b898f3dffca8c2d03d2b3 (HEAD)
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Fri Jun 30 13:40:53 2017 +0100

    drm/i915: Allow specification of parallel execbuf

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 include/drm-uapi/i915_drm.h | 146 +++++++++++++++++++++++++++++++++++-
 1 file changed, 145 insertions(+), 1 deletion(-)

diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
index 1b0488a81..72be2705e 100644
--- a/include/drm-uapi/i915_drm.h
+++ b/include/drm-uapi/i915_drm.h
@@ -136,6 +136,8 @@ enum drm_i915_gem_engine_class {
 struct i915_engine_class_instance {
 	__u16 engine_class; /* see enum drm_i915_gem_engine_class */
 	__u16 engine_instance;
+#define I915_ENGINE_CLASS_INVALID_NONE -1
+#define I915_ENGINE_CLASS_INVALID_VIRTUAL -2
 };
 
 /**
@@ -602,6 +604,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_MMAP_GTT_COHERENT	52
 
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel
+ * execution through use of explicit fence support.
+ * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
+ */
+#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1124,7 +1132,16 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_FENCE_ARRAY   (1<<19)
 
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
+/*
+ * Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent
+ * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
+ * the batch.
+ *
+ * Returns -EINVAL if the sync_file fd cannot be found.
+ */
+#define I915_EXEC_FENCE_SUBMIT		(1 << 20)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
@@ -1523,6 +1540,30 @@ struct drm_i915_gem_context_param {
 	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
 	 */
 #define I915_CONTEXT_PARAM_VM		0x9
+
+/*
+ * I915_CONTEXT_PARAM_ENGINES:
+ *
+ * Bind this context to operate on this subset of available engines. Henceforth,
+ * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
+ * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
+ * and upwards. Slots 0...N are filled in using the specified (class, instance).
+ * Use
+ *	engine_class: I915_ENGINE_CLASS_INVALID,
+ *	engine_instance: I915_ENGINE_CLASS_INVALID_NONE
+ * to specify a gap in the array that can be filled in later, e.g. by a
+ * virtual engine used for load balancing.
+ *
+ * Setting the number of engines bound to the context to 0, by passing a zero
+ * sized argument, will revert back to default settings.
+ *
+ * See struct i915_context_param_engines.
+ *
+ * Extensions:
+ *   i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
+ *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
+ */
+#define I915_CONTEXT_PARAM_ENGINES	0xa
 /* Must be kept compact -- no holes and well documented */
 
 	__u64 value;
@@ -1586,12 +1627,115 @@ struct drm_i915_gem_context_param_sseu {
 	__u32 rsvd;
 };
 
+/*
+ * i915_context_engines_load_balance:
+ *
+ * Enable load balancing across this set of engines.
+ *
+ * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when
+ * used will proxy the execbuffer request onto one of the set of engines
+ * in such a way as to distribute the load evenly across the set.
+ *
+ * The set of engines must be compatible (e.g. the same HW class) as they
+ * will share the same logical GPU context and ring.
+ *
+ * To intermix rendering with the virtual engine and direct rendering onto
+ * the backing engines (bypassing the load balancing proxy), the context must
+ * be defined to use a single timeline for all engines.
+ */
+struct i915_context_engines_load_balance {
+	struct i915_user_extension base;
+
+	__u16 engine_index;
+	__u16 num_siblings;
+	__u32 flags; /* all undefined flags must be zero */
+
+	__u64 mbz64; /* reserved for future use; must be zero */
+
+	struct i915_engine_class_instance engines[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(name__, N__) struct { \
+	struct i915_user_extension base; \
+	__u16 engine_index; \
+	__u16 num_siblings; \
+	__u32 flags; \
+	__u64 mbz64; \
+	struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
+/*
+ * i915_context_engines_bond:
+ *
+ * Constructed bonded pairs for execution within a virtual engine.
+ *
+ * All engines are equal, but some are more equal than others. Given
+ * the distribution of resources in the HW, it may be preferable to run
+ * a request on a given subset of engines in parallel to a request on a
+ * specific engine. We enable this selection of engines within a virtual
+ * engine by specifying bonding pairs, for any given master engine we will
+ * only execute on one of the corresponding siblings within the virtual engine.
+ *
+ * To execute a request in parallel on the master engine and a sibling requires
+ * coordination with a I915_EXEC_FENCE_SUBMIT.
+ */
+struct i915_context_engines_bond {
+	struct i915_user_extension base;
+
+	struct i915_engine_class_instance master;
+
+	__u16 virtual_index; /* index of virtual engine in ctx->engines[] */
+	__u16 num_bonds;
+
+	__u64 flags; /* all undefined flags must be zero */
+	__u64 mbz64[4]; /* reserved for future use; must be zero */
+
+	struct i915_engine_class_instance engines[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_ENGINES_BOND(name__, N__) struct { \
+	struct i915_user_extension base; \
+	struct i915_engine_class_instance master; \
+	__u16 virtual_index; \
+	__u16 num_bonds; \
+	__u64 flags; \
+	__u64 mbz64[4]; \
+	struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
+struct i915_context_param_engines {
+	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
+#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
+#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
+	struct i915_engine_class_instance engines[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \
+	__u64 extensions; \
+	struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
 struct drm_i915_gem_context_create_ext_setparam {
 #define I915_CONTEXT_CREATE_EXT_SETPARAM 0
 	struct i915_user_extension base;
 	struct drm_i915_gem_context_param param;
 };
 
+struct drm_i915_gem_context_create_ext_clone {
+#define I915_CONTEXT_CREATE_EXT_CLONE 1
+	struct i915_user_extension base;
+	__u32 clone_id;
+	__u32 flags;
+#define I915_CONTEXT_CLONE_ENGINES	(1u << 0)
+#define I915_CONTEXT_CLONE_FLAGS	(1u << 1)
+#define I915_CONTEXT_CLONE_SCHEDATTR	(1u << 2)
+#define I915_CONTEXT_CLONE_SSEU		(1u << 3)
+#define I915_CONTEXT_CLONE_TIMELINE	(1u << 4)
+#define I915_CONTEXT_CLONE_VM		(1u << 5)
+#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
+	__u64 rsvd;
+};
+
 struct drm_i915_gem_context_destroy {
 	__u32 ctx_id;
 	__u32 pad;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [igt-dev] [PATCH i-g-t 06/16] drm-uapi: Import i915_drm.h upto 364df3d04d51
@ 2019-05-08 10:09   ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

commit 364df3d04d51f0aad13b898f3dffca8c2d03d2b3 (HEAD)
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Fri Jun 30 13:40:53 2017 +0100

    drm/i915: Allow specification of parallel execbuf

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 include/drm-uapi/i915_drm.h | 146 +++++++++++++++++++++++++++++++++++-
 1 file changed, 145 insertions(+), 1 deletion(-)

diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
index 1b0488a81..72be2705e 100644
--- a/include/drm-uapi/i915_drm.h
+++ b/include/drm-uapi/i915_drm.h
@@ -136,6 +136,8 @@ enum drm_i915_gem_engine_class {
 struct i915_engine_class_instance {
 	__u16 engine_class; /* see enum drm_i915_gem_engine_class */
 	__u16 engine_instance;
+#define I915_ENGINE_CLASS_INVALID_NONE -1
+#define I915_ENGINE_CLASS_INVALID_VIRTUAL -2
 };
 
 /**
@@ -602,6 +604,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_MMAP_GTT_COHERENT	52
 
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel
+ * execution through use of explicit fence support.
+ * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
+ */
+#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1124,7 +1132,16 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_FENCE_ARRAY   (1<<19)
 
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
+/*
+ * Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent
+ * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
+ * the batch.
+ *
+ * Returns -EINVAL if the sync_file fd cannot be found.
+ */
+#define I915_EXEC_FENCE_SUBMIT		(1 << 20)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
@@ -1523,6 +1540,30 @@ struct drm_i915_gem_context_param {
 	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
 	 */
 #define I915_CONTEXT_PARAM_VM		0x9
+
+/*
+ * I915_CONTEXT_PARAM_ENGINES:
+ *
+ * Bind this context to operate on this subset of available engines. Henceforth,
+ * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
+ * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
+ * and upwards. Slots 0...N are filled in using the specified (class, instance).
+ * Use
+ *	engine_class: I915_ENGINE_CLASS_INVALID,
+ *	engine_instance: I915_ENGINE_CLASS_INVALID_NONE
+ * to specify a gap in the array that can be filled in later, e.g. by a
+ * virtual engine used for load balancing.
+ *
+ * Setting the number of engines bound to the context to 0, by passing a zero
+ * sized argument, will revert back to default settings.
+ *
+ * See struct i915_context_param_engines.
+ *
+ * Extensions:
+ *   i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
+ *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
+ */
+#define I915_CONTEXT_PARAM_ENGINES	0xa
 /* Must be kept compact -- no holes and well documented */
 
 	__u64 value;
@@ -1586,12 +1627,115 @@ struct drm_i915_gem_context_param_sseu {
 	__u32 rsvd;
 };
 
+/*
+ * i915_context_engines_load_balance:
+ *
+ * Enable load balancing across this set of engines.
+ *
+ * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when
+ * used will proxy the execbuffer request onto one of the set of engines
+ * in such a way as to distribute the load evenly across the set.
+ *
+ * The set of engines must be compatible (e.g. the same HW class) as they
+ * will share the same logical GPU context and ring.
+ *
+ * To intermix rendering with the virtual engine and direct rendering onto
+ * the backing engines (bypassing the load balancing proxy), the context must
+ * be defined to use a single timeline for all engines.
+ */
+struct i915_context_engines_load_balance {
+	struct i915_user_extension base;
+
+	__u16 engine_index;
+	__u16 num_siblings;
+	__u32 flags; /* all undefined flags must be zero */
+
+	__u64 mbz64; /* reserved for future use; must be zero */
+
+	struct i915_engine_class_instance engines[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(name__, N__) struct { \
+	struct i915_user_extension base; \
+	__u16 engine_index; \
+	__u16 num_siblings; \
+	__u32 flags; \
+	__u64 mbz64; \
+	struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
+/*
+ * i915_context_engines_bond:
+ *
+ * Constructed bonded pairs for execution within a virtual engine.
+ *
+ * All engines are equal, but some are more equal than others. Given
+ * the distribution of resources in the HW, it may be preferable to run
+ * a request on a given subset of engines in parallel to a request on a
+ * specific engine. We enable this selection of engines within a virtual
+ * engine by specifying bonding pairs, for any given master engine we will
+ * only execute on one of the corresponding siblings within the virtual engine.
+ *
+ * To execute a request in parallel on the master engine and a sibling requires
+ * coordination with a I915_EXEC_FENCE_SUBMIT.
+ */
+struct i915_context_engines_bond {
+	struct i915_user_extension base;
+
+	struct i915_engine_class_instance master;
+
+	__u16 virtual_index; /* index of virtual engine in ctx->engines[] */
+	__u16 num_bonds;
+
+	__u64 flags; /* all undefined flags must be zero */
+	__u64 mbz64[4]; /* reserved for future use; must be zero */
+
+	struct i915_engine_class_instance engines[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_ENGINES_BOND(name__, N__) struct { \
+	struct i915_user_extension base; \
+	struct i915_engine_class_instance master; \
+	__u16 virtual_index; \
+	__u16 num_bonds; \
+	__u64 flags; \
+	__u64 mbz64[4]; \
+	struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
+struct i915_context_param_engines {
+	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
+#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
+#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
+	struct i915_engine_class_instance engines[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \
+	__u64 extensions; \
+	struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
 struct drm_i915_gem_context_create_ext_setparam {
 #define I915_CONTEXT_CREATE_EXT_SETPARAM 0
 	struct i915_user_extension base;
 	struct drm_i915_gem_context_param param;
 };
 
+struct drm_i915_gem_context_create_ext_clone {
+#define I915_CONTEXT_CREATE_EXT_CLONE 1
+	struct i915_user_extension base;
+	__u32 clone_id;
+	__u32 flags;
+#define I915_CONTEXT_CLONE_ENGINES	(1u << 0)
+#define I915_CONTEXT_CLONE_FLAGS	(1u << 1)
+#define I915_CONTEXT_CLONE_SCHEDATTR	(1u << 2)
+#define I915_CONTEXT_CLONE_SSEU		(1u << 3)
+#define I915_CONTEXT_CLONE_TIMELINE	(1u << 4)
+#define I915_CONTEXT_CLONE_VM		(1u << 5)
+#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
+	__u64 rsvd;
+};
+
 struct drm_i915_gem_context_destroy {
 	__u32 ctx_id;
 	__u32 pad;
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH i-g-t 07/16] i915: Add gem_ctx_clone
  2019-05-08 10:09 ` [igt-dev] " Chris Wilson
@ 2019-05-08 10:09   ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Exercise cloning contexts, an extension of merely creating one.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/Makefile.sources     |   1 +
 tests/i915/gem_ctx_clone.c | 460 +++++++++++++++++++++++++++++++++++++
 tests/meson.build          |   1 +
 3 files changed, 462 insertions(+)
 create mode 100644 tests/i915/gem_ctx_clone.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 1a541d206..e1b7feeb2 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -21,6 +21,7 @@ TESTS_progs = \
 	drm_import_export \
 	drm_mm \
 	drm_read \
+	i915/gem_ctx_clone \
 	i915/gem_vm_create \
 	kms_3d \
 	kms_addfb_basic \
diff --git a/tests/i915/gem_ctx_clone.c b/tests/i915/gem_ctx_clone.c
new file mode 100644
index 000000000..cdc5bf413
--- /dev/null
+++ b/tests/i915/gem_ctx_clone.c
@@ -0,0 +1,460 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "igt.h"
+#include "igt_gt.h"
+#include "i915/gem_vm.h"
+#include "i915_drm.h"
+
+static int ctx_create_ioctl(int i915, struct drm_i915_gem_context_create_ext *arg)
+{
+	int err;
+
+	err = 0;
+	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
+		err = -errno;
+		igt_assume(err);
+	}
+
+	errno = 0;
+	return err;
+}
+
+static bool has_ctx_clone(int i915)
+{
+	struct drm_i915_gem_context_create_ext_clone ext = {
+		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
+		.clone_id = -1,
+	};
+	struct drm_i915_gem_context_create_ext create = {
+		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+		.extensions = to_user_pointer(&ext),
+	};
+	return ctx_create_ioctl(i915, &create) == -ENOENT;
+}
+
+static void invalid_clone(int i915)
+{
+	struct drm_i915_gem_context_create_ext_clone ext = {
+		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
+	};
+	struct drm_i915_gem_context_create_ext create = {
+		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+		.extensions = to_user_pointer(&ext),
+	};
+
+	igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+	gem_context_destroy(i915, create.ctx_id);
+
+	ext.flags = -1; /* Hopefully we won't run out of flags */
+	igt_assert_eq(ctx_create_ioctl(i915, &create), -EINVAL);
+	ext.flags = 0;
+
+	ext.base.next_extension = -1;
+	igt_assert_eq(ctx_create_ioctl(i915, &create), -EFAULT);
+	ext.base.next_extension = to_user_pointer(&ext);
+	igt_assert_eq(ctx_create_ioctl(i915, &create), -E2BIG);
+	ext.base.next_extension = 0;
+
+	ext.clone_id = -1;
+	igt_assert_eq(ctx_create_ioctl(i915, &create), -ENOENT);
+	ext.clone_id = 0;
+}
+
+static void clone_flags(int i915)
+{
+	struct drm_i915_gem_context_create_ext_setparam set = {
+		{ .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
+		{ .param = I915_CONTEXT_PARAM_RECOVERABLE },
+	};
+	struct drm_i915_gem_context_create_ext_clone ext = {
+		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
+		.flags = I915_CONTEXT_CLONE_FLAGS,
+	};
+	struct drm_i915_gem_context_create_ext create = {
+		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+		.extensions = to_user_pointer(&ext),
+	};
+	int expected;
+
+	set.param.value = 1; /* default is recoverable */
+	igt_require(__gem_context_set_param(i915, &set.param) == 0);
+
+	for (int pass = 0; pass < 2; pass++) { /* cloning default, then child */
+		igt_debug("Cloning %d\n", ext.clone_id);
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+
+		set.param.ctx_id = ext.clone_id;
+		gem_context_get_param(i915, &set.param);
+		expected = set.param.value;
+
+		set.param.ctx_id = create.ctx_id;
+		gem_context_get_param(i915, &set.param);
+
+		igt_assert_eq_u64(set.param.param,
+				  I915_CONTEXT_PARAM_RECOVERABLE);
+		igt_assert_eq((int)set.param.value, expected);
+
+		gem_context_destroy(i915, create.ctx_id);
+
+		expected = set.param.value = 0;
+		set.param.ctx_id = ext.clone_id;
+		gem_context_set_param(i915, &set.param);
+
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+
+		set.param.ctx_id = create.ctx_id;
+		gem_context_get_param(i915, &set.param);
+
+		igt_assert_eq_u64(set.param.param,
+				  I915_CONTEXT_PARAM_RECOVERABLE);
+		igt_assert_eq((int)set.param.value, expected);
+
+		gem_context_destroy(i915, create.ctx_id);
+
+		/* clone but then reset priority to default... */
+		set.param.ctx_id = 0;
+		set.param.value = 1;
+		ext.base.next_extension = to_user_pointer(&set);
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+		ext.base.next_extension = 0;
+
+		/* new context should have updated prio... */
+		set.param.ctx_id = create.ctx_id;
+		gem_context_get_param(i915, &set.param);
+		igt_assert_eq_u64(set.param.value, 1);
+
+		/* but original context should have default prio */
+		set.param.ctx_id = ext.clone_id;
+		gem_context_get_param(i915, &set.param);
+		igt_assert_eq_u64(set.param.value, 0);
+
+		gem_context_destroy(i915, create.ctx_id);
+		ext.clone_id = gem_context_create(i915);
+	}
+
+	gem_context_destroy(i915, ext.clone_id);
+}
+
+static void clone_engines(int i915)
+{
+	struct drm_i915_gem_context_create_ext_setparam set = {
+		{ .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
+		{ .param = I915_CONTEXT_PARAM_ENGINES },
+	};
+	struct drm_i915_gem_context_create_ext_clone ext = {
+		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
+		.flags = I915_CONTEXT_CLONE_ENGINES,
+	};
+	struct drm_i915_gem_context_create_ext create = {
+		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+		.extensions = to_user_pointer(&ext),
+	};
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(expected, 64);
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 64);
+	uint64_t ex_size;
+
+	memset(&expected, 0, sizeof(expected));
+	memset(&engines, 0, sizeof(engines));
+
+	igt_require(__gem_context_set_param(i915, &set.param) == 0);
+
+	for (int pass = 0; pass < 2; pass++) { /* cloning default, then child */
+		igt_debug("Cloning %d\n", ext.clone_id);
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+
+		set.param.ctx_id = ext.clone_id;
+		set.param.size = sizeof(expected);
+		set.param.value = to_user_pointer(&expected);
+		gem_context_get_param(i915, &set.param);
+		ex_size = set.param.size;
+
+		set.param.ctx_id = create.ctx_id;
+		set.param.size = sizeof(engines);
+		set.param.value = to_user_pointer(&engines);
+		gem_context_get_param(i915, &set.param);
+
+		igt_assert_eq_u64(set.param.param, I915_CONTEXT_PARAM_ENGINES);
+		igt_assert_eq_u64(set.param.size, ex_size);
+		igt_assert(!memcmp(&engines, &expected, ex_size));
+
+		gem_context_destroy(i915, create.ctx_id);
+
+		expected.engines[0].engine_class =
+			I915_ENGINE_CLASS_INVALID;
+		expected.engines[0].engine_instance =
+			I915_ENGINE_CLASS_INVALID_NONE;
+		ex_size = (sizeof(struct i915_context_param_engines) +
+			   sizeof(expected.engines[0]));
+
+		set.param.ctx_id = ext.clone_id;
+		set.param.size = ex_size;
+		set.param.value = to_user_pointer(&expected);
+		gem_context_set_param(i915, &set.param);
+
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+
+		set.param.ctx_id = create.ctx_id;
+		set.param.size = sizeof(engines);
+		set.param.value = to_user_pointer(&engines);
+		gem_context_get_param(i915, &set.param);
+
+		igt_assert_eq_u64(set.param.size, ex_size);
+		igt_assert(!memcmp(&engines, &expected, ex_size));
+
+		gem_context_destroy(i915, create.ctx_id);
+
+		/* clone but then reset engines to default */
+		set.param.ctx_id = 0;
+		set.param.size = 0;
+		set.param.value = 0;
+		ext.base.next_extension = to_user_pointer(&set);
+
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+		ext.base.next_extension = 0;
+
+		set.param.ctx_id = create.ctx_id;
+		set.param.size = sizeof(engines);
+		set.param.value = to_user_pointer(&engines);
+		gem_context_get_param(i915, &set.param);
+		igt_assert_eq_u64(set.param.size, 0);
+
+		gem_context_destroy(i915, create.ctx_id);
+
+		/* And check we ignore the flag */
+		ext.flags = 0;
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+		ext.flags = I915_CONTEXT_CLONE_ENGINES;
+
+		set.param.ctx_id = create.ctx_id;
+		set.param.size = sizeof(engines);
+		set.param.value = to_user_pointer(&engines);
+		gem_context_get_param(i915, &set.param);
+		igt_assert_eq_u64(set.param.size, 0);
+
+		ext.clone_id = gem_context_create(i915);
+	}
+
+	gem_context_destroy(i915, ext.clone_id);
+}
+
+static void clone_scheduler(int i915)
+{
+	struct drm_i915_gem_context_create_ext_setparam set = {
+		{ .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
+		{ .param = I915_CONTEXT_PARAM_PRIORITY },
+	};
+	struct drm_i915_gem_context_create_ext_clone ext = {
+		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
+		.flags = I915_CONTEXT_CLONE_SCHEDATTR,
+	};
+	struct drm_i915_gem_context_create_ext create = {
+		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+		.extensions = to_user_pointer(&ext),
+	};
+	int expected;
+
+	igt_require(__gem_context_set_param(i915, &set.param) == 0);
+
+	for (int pass = 0; pass < 2; pass++) { /* cloning default, then child */
+		igt_debug("Cloning %d\n", ext.clone_id);
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+
+		set.param.ctx_id = ext.clone_id;
+		gem_context_get_param(i915, &set.param);
+		expected = set.param.value;
+
+		set.param.ctx_id = create.ctx_id;
+		gem_context_get_param(i915, &set.param);
+
+		igt_assert_eq_u64(set.param.param, I915_CONTEXT_PARAM_PRIORITY);
+		igt_assert_eq((int)set.param.value, expected);
+
+		gem_context_destroy(i915, create.ctx_id);
+
+		expected = set.param.value = 1;
+		set.param.ctx_id = ext.clone_id;
+		gem_context_set_param(i915, &set.param);
+
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+
+		set.param.ctx_id = create.ctx_id;
+		gem_context_get_param(i915, &set.param);
+
+		igt_assert_eq_u64(set.param.param, I915_CONTEXT_PARAM_PRIORITY);
+		igt_assert_eq((int)set.param.value, expected);
+
+		gem_context_destroy(i915, create.ctx_id);
+
+		/* clone but then reset priority to default */
+		set.param.ctx_id = 0;
+		set.param.value = 0;
+		ext.base.next_extension = to_user_pointer(&set);
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+		ext.base.next_extension = 0;
+
+		set.param.ctx_id = create.ctx_id;
+		gem_context_get_param(i915, &set.param);
+		igt_assert_eq_u64(set.param.value, 0);
+
+		set.param.ctx_id = ext.clone_id;
+		gem_context_get_param(i915, &set.param);
+		igt_assert_eq_u64(set.param.value, 1);
+
+		gem_context_destroy(i915, create.ctx_id);
+		ext.clone_id = gem_context_create(i915);
+	}
+
+	gem_context_destroy(i915, ext.clone_id);
+}
+
+static uint32_t __batch_create(int i915, uint32_t offset)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	uint32_t handle;
+
+	handle = gem_create(i915, ALIGN(offset + 4, 4096));
+	gem_write(i915, handle, offset, &bbe, sizeof(bbe));
+
+	return handle;
+}
+
+static uint32_t batch_create(int i915)
+{
+	return __batch_create(i915, 0);
+}
+
+static void check_same_vm(int i915, uint32_t ctx_a, uint32_t ctx_b)
+{
+	struct drm_i915_gem_exec_object2 batch = {
+		.handle = batch_create(i915),
+	};
+	struct drm_i915_gem_execbuffer2 eb = {
+		.buffers_ptr = to_user_pointer(&batch),
+		.buffer_count = 1,
+	};
+
+	/* First verify that we try to use "softpinning" by default */
+	batch.offset = 48 << 20;
+	eb.rsvd1 = ctx_a;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, 48 << 20);
+
+	/* An already active VMA will try to keep its offset */
+	batch.offset = 0;
+	eb.rsvd1 = ctx_b;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, 48 << 20);
+
+	gem_sync(i915, batch.handle);
+	gem_close(i915, batch.handle);
+
+	gem_quiescent_gpu(i915); /* evict the vma */
+}
+
+static void clone_vm(int i915)
+{
+	struct drm_i915_gem_context_param set = {
+		.param = I915_CONTEXT_PARAM_VM,
+	};
+	struct drm_i915_gem_context_create_ext_clone ext = {
+		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
+		.flags = I915_CONTEXT_CLONE_VM,
+	};
+	struct drm_i915_gem_context_create_ext create = {
+		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+		.extensions = to_user_pointer(&ext),
+	};
+	uint32_t vm_id[2];
+
+	igt_require(__gem_context_set_param(i915, &set) == -ENOENT);
+
+	/* Scrub the VM for our tests */
+	i915 = gem_reopen_driver(i915);
+
+	set.ctx_id = gem_context_create(i915);
+	gem_context_get_param(i915, &set);
+	vm_id[0] = set.value;
+	gem_context_destroy(i915, set.ctx_id);
+
+	vm_id[1] = gem_vm_create(i915);
+
+	for (int pass = 0; pass < 2; pass++) { /* cloning default, then child */
+		igt_debug("Cloning %d\n", ext.clone_id);
+
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+		check_same_vm(i915, ext.clone_id, create.ctx_id);
+		gem_context_destroy(i915, create.ctx_id);
+
+		set.value = vm_id[pass];
+		set.ctx_id = ext.clone_id;
+		gem_context_set_param(i915, &set);
+
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+		check_same_vm(i915, ext.clone_id, create.ctx_id);
+		gem_context_destroy(i915, create.ctx_id);
+
+		ext.clone_id = gem_context_create(i915);
+	}
+
+	gem_context_destroy(i915, ext.clone_id);
+
+	for (int i = 0; i < ARRAY_SIZE(vm_id); i++)
+		gem_vm_destroy(i915, vm_id[i]);
+
+	close(i915);
+}
+
+igt_main
+{
+	int i915 = -1;
+
+	igt_fixture {
+		i915 = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(i915);
+		gem_require_contexts(i915);
+
+		igt_require(has_ctx_clone(i915));
+		igt_fork_hang_detector(i915);
+	}
+
+	igt_subtest("invalid")
+		invalid_clone(i915);
+
+	igt_subtest("engines")
+		clone_engines(i915);
+
+	igt_subtest("flags")
+		clone_flags(i915);
+
+	igt_subtest("scheduler")
+		clone_scheduler(i915);
+
+	igt_subtest("vm")
+		clone_vm(i915);
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		close(i915);
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index e7dbc5756..3810bd760 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -109,6 +109,7 @@ i915_progs = [
 	'gem_cs_prefetch',
 	'gem_cs_tlb',
 	'gem_ctx_bad_destroy',
+	'gem_ctx_clone',
 	'gem_ctx_create',
 	'gem_ctx_exec',
 	'gem_ctx_isolation',
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [igt-dev] [PATCH i-g-t 07/16] i915: Add gem_ctx_clone
@ 2019-05-08 10:09   ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Exercise cloning contexts, an extension of merely creating one.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/Makefile.sources     |   1 +
 tests/i915/gem_ctx_clone.c | 460 +++++++++++++++++++++++++++++++++++++
 tests/meson.build          |   1 +
 3 files changed, 462 insertions(+)
 create mode 100644 tests/i915/gem_ctx_clone.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 1a541d206..e1b7feeb2 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -21,6 +21,7 @@ TESTS_progs = \
 	drm_import_export \
 	drm_mm \
 	drm_read \
+	i915/gem_ctx_clone \
 	i915/gem_vm_create \
 	kms_3d \
 	kms_addfb_basic \
diff --git a/tests/i915/gem_ctx_clone.c b/tests/i915/gem_ctx_clone.c
new file mode 100644
index 000000000..cdc5bf413
--- /dev/null
+++ b/tests/i915/gem_ctx_clone.c
@@ -0,0 +1,460 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "igt.h"
+#include "igt_gt.h"
+#include "i915/gem_vm.h"
+#include "i915_drm.h"
+
+static int ctx_create_ioctl(int i915, struct drm_i915_gem_context_create_ext *arg)
+{
+	int err;
+
+	err = 0;
+	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
+		err = -errno;
+		igt_assume(err);
+	}
+
+	errno = 0;
+	return err;
+}
+
+static bool has_ctx_clone(int i915)
+{
+	struct drm_i915_gem_context_create_ext_clone ext = {
+		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
+		.clone_id = -1,
+	};
+	struct drm_i915_gem_context_create_ext create = {
+		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+		.extensions = to_user_pointer(&ext),
+	};
+	return ctx_create_ioctl(i915, &create) == -ENOENT;
+}
+
+static void invalid_clone(int i915)
+{
+	struct drm_i915_gem_context_create_ext_clone ext = {
+		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
+	};
+	struct drm_i915_gem_context_create_ext create = {
+		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+		.extensions = to_user_pointer(&ext),
+	};
+
+	igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+	gem_context_destroy(i915, create.ctx_id);
+
+	ext.flags = -1; /* Hopefully we won't run out of flags */
+	igt_assert_eq(ctx_create_ioctl(i915, &create), -EINVAL);
+	ext.flags = 0;
+
+	ext.base.next_extension = -1;
+	igt_assert_eq(ctx_create_ioctl(i915, &create), -EFAULT);
+	ext.base.next_extension = to_user_pointer(&ext);
+	igt_assert_eq(ctx_create_ioctl(i915, &create), -E2BIG);
+	ext.base.next_extension = 0;
+
+	ext.clone_id = -1;
+	igt_assert_eq(ctx_create_ioctl(i915, &create), -ENOENT);
+	ext.clone_id = 0;
+}
+
+static void clone_flags(int i915)
+{
+	struct drm_i915_gem_context_create_ext_setparam set = {
+		{ .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
+		{ .param = I915_CONTEXT_PARAM_RECOVERABLE },
+	};
+	struct drm_i915_gem_context_create_ext_clone ext = {
+		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
+		.flags = I915_CONTEXT_CLONE_FLAGS,
+	};
+	struct drm_i915_gem_context_create_ext create = {
+		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+		.extensions = to_user_pointer(&ext),
+	};
+	int expected;
+
+	set.param.value = 1; /* default is recoverable */
+	igt_require(__gem_context_set_param(i915, &set.param) == 0);
+
+	for (int pass = 0; pass < 2; pass++) { /* cloning default, then child */
+		igt_debug("Cloning %d\n", ext.clone_id);
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+
+		set.param.ctx_id = ext.clone_id;
+		gem_context_get_param(i915, &set.param);
+		expected = set.param.value;
+
+		set.param.ctx_id = create.ctx_id;
+		gem_context_get_param(i915, &set.param);
+
+		igt_assert_eq_u64(set.param.param,
+				  I915_CONTEXT_PARAM_RECOVERABLE);
+		igt_assert_eq((int)set.param.value, expected);
+
+		gem_context_destroy(i915, create.ctx_id);
+
+		expected = set.param.value = 0;
+		set.param.ctx_id = ext.clone_id;
+		gem_context_set_param(i915, &set.param);
+
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+
+		set.param.ctx_id = create.ctx_id;
+		gem_context_get_param(i915, &set.param);
+
+		igt_assert_eq_u64(set.param.param,
+				  I915_CONTEXT_PARAM_RECOVERABLE);
+		igt_assert_eq((int)set.param.value, expected);
+
+		gem_context_destroy(i915, create.ctx_id);
+
+		/* clone but then reset priority to default... */
+		set.param.ctx_id = 0;
+		set.param.value = 1;
+		ext.base.next_extension = to_user_pointer(&set);
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+		ext.base.next_extension = 0;
+
+		/* new context should have updated prio... */
+		set.param.ctx_id = create.ctx_id;
+		gem_context_get_param(i915, &set.param);
+		igt_assert_eq_u64(set.param.value, 1);
+
+		/* but original context should have default prio */
+		set.param.ctx_id = ext.clone_id;
+		gem_context_get_param(i915, &set.param);
+		igt_assert_eq_u64(set.param.value, 0);
+
+		gem_context_destroy(i915, create.ctx_id);
+		ext.clone_id = gem_context_create(i915);
+	}
+
+	gem_context_destroy(i915, ext.clone_id);
+}
+
+static void clone_engines(int i915)
+{
+	struct drm_i915_gem_context_create_ext_setparam set = {
+		{ .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
+		{ .param = I915_CONTEXT_PARAM_ENGINES },
+	};
+	struct drm_i915_gem_context_create_ext_clone ext = {
+		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
+		.flags = I915_CONTEXT_CLONE_ENGINES,
+	};
+	struct drm_i915_gem_context_create_ext create = {
+		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+		.extensions = to_user_pointer(&ext),
+	};
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(expected, 64);
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 64);
+	uint64_t ex_size;
+
+	memset(&expected, 0, sizeof(expected));
+	memset(&engines, 0, sizeof(engines));
+
+	igt_require(__gem_context_set_param(i915, &set.param) == 0);
+
+	for (int pass = 0; pass < 2; pass++) { /* cloning default, then child */
+		igt_debug("Cloning %d\n", ext.clone_id);
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+
+		set.param.ctx_id = ext.clone_id;
+		set.param.size = sizeof(expected);
+		set.param.value = to_user_pointer(&expected);
+		gem_context_get_param(i915, &set.param);
+		ex_size = set.param.size;
+
+		set.param.ctx_id = create.ctx_id;
+		set.param.size = sizeof(engines);
+		set.param.value = to_user_pointer(&engines);
+		gem_context_get_param(i915, &set.param);
+
+		igt_assert_eq_u64(set.param.param, I915_CONTEXT_PARAM_ENGINES);
+		igt_assert_eq_u64(set.param.size, ex_size);
+		igt_assert(!memcmp(&engines, &expected, ex_size));
+
+		gem_context_destroy(i915, create.ctx_id);
+
+		expected.engines[0].engine_class =
+			I915_ENGINE_CLASS_INVALID;
+		expected.engines[0].engine_instance =
+			I915_ENGINE_CLASS_INVALID_NONE;
+		ex_size = (sizeof(struct i915_context_param_engines) +
+			   sizeof(expected.engines[0]));
+
+		set.param.ctx_id = ext.clone_id;
+		set.param.size = ex_size;
+		set.param.value = to_user_pointer(&expected);
+		gem_context_set_param(i915, &set.param);
+
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+
+		set.param.ctx_id = create.ctx_id;
+		set.param.size = sizeof(engines);
+		set.param.value = to_user_pointer(&engines);
+		gem_context_get_param(i915, &set.param);
+
+		igt_assert_eq_u64(set.param.size, ex_size);
+		igt_assert(!memcmp(&engines, &expected, ex_size));
+
+		gem_context_destroy(i915, create.ctx_id);
+
+		/* clone but then reset engines to default */
+		set.param.ctx_id = 0;
+		set.param.size = 0;
+		set.param.value = 0;
+		ext.base.next_extension = to_user_pointer(&set);
+
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+		ext.base.next_extension = 0;
+
+		set.param.ctx_id = create.ctx_id;
+		set.param.size = sizeof(engines);
+		set.param.value = to_user_pointer(&engines);
+		gem_context_get_param(i915, &set.param);
+		igt_assert_eq_u64(set.param.size, 0);
+
+		gem_context_destroy(i915, create.ctx_id);
+
+		/* And check we ignore the flag */
+		ext.flags = 0;
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+		ext.flags = I915_CONTEXT_CLONE_ENGINES;
+
+		set.param.ctx_id = create.ctx_id;
+		set.param.size = sizeof(engines);
+		set.param.value = to_user_pointer(&engines);
+		gem_context_get_param(i915, &set.param);
+		igt_assert_eq_u64(set.param.size, 0);
+
+		ext.clone_id = gem_context_create(i915);
+	}
+
+	gem_context_destroy(i915, ext.clone_id);
+}
+
+static void clone_scheduler(int i915)
+{
+	struct drm_i915_gem_context_create_ext_setparam set = {
+		{ .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
+		{ .param = I915_CONTEXT_PARAM_PRIORITY },
+	};
+	struct drm_i915_gem_context_create_ext_clone ext = {
+		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
+		.flags = I915_CONTEXT_CLONE_SCHEDATTR,
+	};
+	struct drm_i915_gem_context_create_ext create = {
+		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+		.extensions = to_user_pointer(&ext),
+	};
+	int expected;
+
+	igt_require(__gem_context_set_param(i915, &set.param) == 0);
+
+	for (int pass = 0; pass < 2; pass++) { /* cloning default, then child */
+		igt_debug("Cloning %d\n", ext.clone_id);
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+
+		set.param.ctx_id = ext.clone_id;
+		gem_context_get_param(i915, &set.param);
+		expected = set.param.value;
+
+		set.param.ctx_id = create.ctx_id;
+		gem_context_get_param(i915, &set.param);
+
+		igt_assert_eq_u64(set.param.param, I915_CONTEXT_PARAM_PRIORITY);
+		igt_assert_eq((int)set.param.value, expected);
+
+		gem_context_destroy(i915, create.ctx_id);
+
+		expected = set.param.value = 1;
+		set.param.ctx_id = ext.clone_id;
+		gem_context_set_param(i915, &set.param);
+
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+
+		set.param.ctx_id = create.ctx_id;
+		gem_context_get_param(i915, &set.param);
+
+		igt_assert_eq_u64(set.param.param, I915_CONTEXT_PARAM_PRIORITY);
+		igt_assert_eq((int)set.param.value, expected);
+
+		gem_context_destroy(i915, create.ctx_id);
+
+		/* clone but then reset priority to default */
+		set.param.ctx_id = 0;
+		set.param.value = 0;
+		ext.base.next_extension = to_user_pointer(&set);
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+		ext.base.next_extension = 0;
+
+		set.param.ctx_id = create.ctx_id;
+		gem_context_get_param(i915, &set.param);
+		igt_assert_eq_u64(set.param.value, 0);
+
+		set.param.ctx_id = ext.clone_id;
+		gem_context_get_param(i915, &set.param);
+		igt_assert_eq_u64(set.param.value, 1);
+
+		gem_context_destroy(i915, create.ctx_id);
+		ext.clone_id = gem_context_create(i915);
+	}
+
+	gem_context_destroy(i915, ext.clone_id);
+}
+
+static uint32_t __batch_create(int i915, uint32_t offset)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	uint32_t handle;
+
+	handle = gem_create(i915, ALIGN(offset + 4, 4096));
+	gem_write(i915, handle, offset, &bbe, sizeof(bbe));
+
+	return handle;
+}
+
+static uint32_t batch_create(int i915)
+{
+	return __batch_create(i915, 0);
+}
+
+static void check_same_vm(int i915, uint32_t ctx_a, uint32_t ctx_b)
+{
+	struct drm_i915_gem_exec_object2 batch = {
+		.handle = batch_create(i915),
+	};
+	struct drm_i915_gem_execbuffer2 eb = {
+		.buffers_ptr = to_user_pointer(&batch),
+		.buffer_count = 1,
+	};
+
+	/* First verify that we try to use "softpinning" by default */
+	batch.offset = 48 << 20;
+	eb.rsvd1 = ctx_a;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, 48 << 20);
+
+	/* An already active VMA will try to keep its offset */
+	batch.offset = 0;
+	eb.rsvd1 = ctx_b;
+	gem_execbuf(i915, &eb);
+	igt_assert_eq_u64(batch.offset, 48 << 20);
+
+	gem_sync(i915, batch.handle);
+	gem_close(i915, batch.handle);
+
+	gem_quiescent_gpu(i915); /* evict the vma */
+}
+
+static void clone_vm(int i915)
+{
+	struct drm_i915_gem_context_param set = {
+		.param = I915_CONTEXT_PARAM_VM,
+	};
+	struct drm_i915_gem_context_create_ext_clone ext = {
+		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
+		.flags = I915_CONTEXT_CLONE_VM,
+	};
+	struct drm_i915_gem_context_create_ext create = {
+		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+		.extensions = to_user_pointer(&ext),
+	};
+	uint32_t vm_id[2];
+
+	igt_require(__gem_context_set_param(i915, &set) == -ENOENT);
+
+	/* Scrub the VM for our tests */
+	i915 = gem_reopen_driver(i915);
+
+	set.ctx_id = gem_context_create(i915);
+	gem_context_get_param(i915, &set);
+	vm_id[0] = set.value;
+	gem_context_destroy(i915, set.ctx_id);
+
+	vm_id[1] = gem_vm_create(i915);
+
+	for (int pass = 0; pass < 2; pass++) { /* cloning default, then child */
+		igt_debug("Cloning %d\n", ext.clone_id);
+
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+		check_same_vm(i915, ext.clone_id, create.ctx_id);
+		gem_context_destroy(i915, create.ctx_id);
+
+		set.value = vm_id[pass];
+		set.ctx_id = ext.clone_id;
+		gem_context_set_param(i915, &set);
+
+		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
+		check_same_vm(i915, ext.clone_id, create.ctx_id);
+		gem_context_destroy(i915, create.ctx_id);
+
+		ext.clone_id = gem_context_create(i915);
+	}
+
+	gem_context_destroy(i915, ext.clone_id);
+
+	for (int i = 0; i < ARRAY_SIZE(vm_id); i++)
+		gem_vm_destroy(i915, vm_id[i]);
+
+	close(i915);
+}
+
+igt_main
+{
+	int i915 = -1;
+
+	igt_fixture {
+		i915 = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(i915);
+		gem_require_contexts(i915);
+
+		igt_require(has_ctx_clone(i915));
+		igt_fork_hang_detector(i915);
+	}
+
+	igt_subtest("invalid")
+		invalid_clone(i915);
+
+	igt_subtest("engines")
+		clone_engines(i915);
+
+	igt_subtest("flags")
+		clone_flags(i915);
+
+	igt_subtest("scheduler")
+		clone_scheduler(i915);
+
+	igt_subtest("vm")
+		clone_vm(i915);
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		close(i915);
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index e7dbc5756..3810bd760 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -109,6 +109,7 @@ i915_progs = [
 	'gem_cs_prefetch',
 	'gem_cs_tlb',
 	'gem_ctx_bad_destroy',
+	'gem_ctx_clone',
 	'gem_ctx_create',
 	'gem_ctx_exec',
 	'gem_ctx_isolation',
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH i-g-t 08/16] i915: Exercise creating context with shared GTT
  2019-05-08 10:09 ` [igt-dev] " Chris Wilson
@ 2019-05-08 10:09   ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

v2: Test each shared context is its own timeline and allows request
reordering between shared contexts.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
---
 lib/i915/gem_context.c        |  68 +++
 lib/i915/gem_context.h        |  13 +
 tests/Makefile.sources        |   1 +
 tests/i915/gem_ctx_shared.c   | 856 ++++++++++++++++++++++++++++++++++
 tests/i915/gem_exec_whisper.c |  32 +-
 tests/meson.build             |   1 +
 6 files changed, 962 insertions(+), 9 deletions(-)
 create mode 100644 tests/i915/gem_ctx_shared.c

diff --git a/lib/i915/gem_context.c b/lib/i915/gem_context.c
index f94d89cb4..8fb8984d1 100644
--- a/lib/i915/gem_context.c
+++ b/lib/i915/gem_context.c
@@ -272,6 +272,74 @@ void gem_context_set_priority(int fd, uint32_t ctx_id, int prio)
 	igt_assert_eq(__gem_context_set_priority(fd, ctx_id, prio), 0);
 }
 
+int
+__gem_context_clone(int i915,
+		    uint32_t src, unsigned int share,
+		    unsigned int flags,
+		    uint32_t *out)
+{
+	struct drm_i915_gem_context_create_ext_clone clone = {
+		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
+		.clone_id = src,
+		.flags = share,
+	};
+	struct drm_i915_gem_context_create_ext arg = {
+		.flags = flags | I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+		.extensions = to_user_pointer(&clone),
+	};
+	int err = 0;
+
+	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, &arg))
+		err = -errno;
+
+	*out = arg.ctx_id;
+
+	errno = 0;
+	return err;
+}
+
+static bool __gem_context_has(int i915, uint32_t share, unsigned int flags)
+{
+	uint32_t ctx;
+
+	__gem_context_clone(i915, 0, share, flags, &ctx);
+	if (ctx)
+		gem_context_destroy(i915, ctx);
+
+	errno = 0;
+	return ctx;
+}
+
+bool gem_contexts_has_shared_gtt(int i915)
+{
+	return __gem_context_has(i915, I915_CONTEXT_CLONE_VM, 0);
+}
+
+bool gem_has_queues(int i915)
+{
+	return __gem_context_has(i915,
+				 I915_CONTEXT_CLONE_VM,
+				 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
+}
+
+uint32_t gem_context_clone(int i915,
+			   uint32_t src, unsigned int share,
+			   unsigned int flags)
+{
+	uint32_t ctx;
+
+	igt_assert_eq(__gem_context_clone(i915, src, share, flags, &ctx), 0);
+
+	return ctx;
+}
+
+uint32_t gem_queue_create(int i915)
+{
+	return gem_context_clone(i915, 0,
+				 I915_CONTEXT_CLONE_VM,
+				 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
+}
+
 bool gem_context_has_engine(int fd, uint32_t ctx, uint64_t engine)
 {
 	struct drm_i915_gem_exec_object2 exec = {};
diff --git a/lib/i915/gem_context.h b/lib/i915/gem_context.h
index a052714d4..8043c3401 100644
--- a/lib/i915/gem_context.h
+++ b/lib/i915/gem_context.h
@@ -29,6 +29,19 @@ int __gem_context_create(int fd, uint32_t *ctx_id);
 void gem_context_destroy(int fd, uint32_t ctx_id);
 int __gem_context_destroy(int fd, uint32_t ctx_id);
 
+int __gem_context_clone(int i915,
+			uint32_t src, unsigned int share,
+			unsigned int flags,
+			uint32_t *out);
+uint32_t gem_context_clone(int i915,
+			   uint32_t src, unsigned int share,
+			   unsigned int flags);
+
+uint32_t gem_queue_create(int i915);
+
+bool gem_contexts_has_shared_gtt(int i915);
+bool gem_has_queues(int i915);
+
 bool gem_has_contexts(int fd);
 void gem_require_contexts(int fd);
 void gem_context_require_bannable(int fd);
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index e1b7feeb2..3552e895b 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -22,6 +22,7 @@ TESTS_progs = \
 	drm_mm \
 	drm_read \
 	i915/gem_ctx_clone \
+	i915/gem_ctx_shared \
 	i915/gem_vm_create \
 	kms_3d \
 	kms_addfb_basic \
diff --git a/tests/i915/gem_ctx_shared.c b/tests/i915/gem_ctx_shared.c
new file mode 100644
index 000000000..0076f5e9d
--- /dev/null
+++ b/tests/i915/gem_ctx_shared.c
@@ -0,0 +1,856 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+#include "igt_rand.h"
+#include "igt_vgem.h"
+#include "sync_file.h"
+
+#define LO 0
+#define HI 1
+#define NOISE 2
+
+#define MAX_PRIO LOCAL_I915_CONTEXT_MAX_USER_PRIORITY
+#define MIN_PRIO LOCAL_I915_CONTEXT_MIN_USER_PRIORITY
+
+static int priorities[] = {
+	[LO] = MIN_PRIO / 2,
+	[HI] = MAX_PRIO / 2,
+};
+
+#define MAX_ELSP_QLEN 16
+
+IGT_TEST_DESCRIPTION("Test shared contexts.");
+
+static void create_shared_gtt(int i915, unsigned int flags)
+#define DETACHED 0x1
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+	};
+	uint32_t parent, child;
+
+	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	child = flags & DETACHED ? gem_context_create(i915) : 0;
+	igt_until_timeout(2) {
+		parent = flags & DETACHED ? child : 0;
+		child = gem_context_clone(i915,
+					  parent, I915_CONTEXT_CLONE_VM,
+					  0);
+		execbuf.rsvd1 = child;
+		gem_execbuf(i915, &execbuf);
+
+		if (flags & DETACHED) {
+			gem_context_destroy(i915, parent);
+			gem_execbuf(i915, &execbuf);
+		} else {
+			parent = child;
+			gem_context_destroy(i915, parent);
+		}
+
+		execbuf.rsvd1 = parent;
+		igt_assert_eq(__gem_execbuf(i915, &execbuf), -ENOENT);
+		igt_assert_eq(__gem_context_clone(i915,
+						  parent, I915_CONTEXT_CLONE_VM,
+						  0, &parent), -ENOENT);
+	}
+	if (flags & DETACHED)
+		gem_context_destroy(i915, child);
+
+	gem_sync(i915, obj.handle);
+	gem_close(i915, obj.handle);
+}
+
+static void disjoint_timelines(int i915)
+{
+	IGT_CORK_HANDLE(cork);
+	igt_spin_t *spin[2];
+	uint32_t plug, child;
+
+	igt_require(gem_has_execlists(i915));
+
+	/*
+	 * Each context, although they share a vm, are expected to be
+	 * distinct timelines. A request queued to one context should be
+	 * independent of any shared contexts.
+	 */
+	child = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
+	plug = igt_cork_plug(&cork, i915);
+
+	spin[0] = __igt_spin_new(i915, .ctx = 0, .dependency = plug);
+	spin[1] = __igt_spin_new(i915, .ctx = child);
+
+	/* Wait for the second spinner, will hang if stuck behind the first */
+	igt_spin_end(spin[1]);
+	gem_sync(i915, spin[1]->handle);
+
+	igt_cork_unplug(&cork);
+
+	igt_spin_free(i915, spin[1]);
+	igt_spin_free(i915, spin[0]);
+}
+
+static void exhaust_shared_gtt(int i915, unsigned int flags)
+#define EXHAUST_LRC 0x1
+{
+	i915 = gem_reopen_driver(i915);
+
+	igt_fork(pid, 1) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 obj = {
+			.handle = gem_create(i915, 4096)
+		};
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(&obj),
+			.buffer_count = 1,
+		};
+		uint32_t parent, child;
+		unsigned long count = 0;
+		int err;
+
+		gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
+
+		child = 0;
+		for (;;) {
+			parent = child;
+			err = __gem_context_clone(i915,
+						  parent, I915_CONTEXT_CLONE_VM,
+						  0, &child);
+			if (err)
+				break;
+
+			if (flags & EXHAUST_LRC) {
+				execbuf.rsvd1 = child;
+				err = __gem_execbuf(i915, &execbuf);
+				if (err)
+					break;
+			}
+
+			count++;
+		}
+		gem_sync(i915, obj.handle);
+
+		igt_info("Created %lu shared contexts, before %d (%s)\n",
+			 count, err, strerror(-err));
+	}
+	close(i915);
+	igt_waitchildren();
+}
+
+static void exec_shared_gtt(int i915, unsigned int ring)
+{
+	const int gen = intel_gen(intel_get_drm_devid(i915));
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096)
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = ring,
+	};
+	uint32_t scratch = obj.handle;
+	uint32_t batch[16];
+	int i;
+
+	gem_require_ring(i915, ring);
+	igt_require(gem_can_store_dword(i915, ring));
+
+	/* Load object into place in the GTT */
+	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
+	gem_execbuf(i915, &execbuf);
+
+	/* Presume nothing causes an eviction in the meantime */
+
+	obj.handle = gem_create(i915, 4096);
+
+	i = 0;
+	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		batch[++i] = obj.offset;
+		batch[++i] = 0;
+	} else if (gen >= 4) {
+		batch[++i] = 0;
+		batch[++i] = obj.offset;
+	} else {
+		batch[i]--;
+		batch[++i] = obj.offset;
+	}
+	batch[++i] = 0xc0ffee;
+	batch[++i] = MI_BATCH_BUFFER_END;
+	gem_write(i915, obj.handle, 0, batch, sizeof(batch));
+
+	obj.offset += 4096; /* make sure we don't cause an eviction! */
+	obj.flags |= EXEC_OBJECT_PINNED;
+	execbuf.rsvd1 = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
+	if (gen > 3 && gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+
+	gem_execbuf(i915, &execbuf);
+	gem_context_destroy(i915, execbuf.rsvd1);
+	gem_sync(i915, obj.handle); /* write hazard lies */
+	gem_close(i915, obj.handle);
+
+	gem_read(i915, scratch, 0, batch, sizeof(uint32_t));
+	gem_close(i915, scratch);
+
+	igt_assert_eq_u32(*batch, 0xc0ffee);
+}
+
+static int nop_sync(int i915, uint32_t ctx, unsigned int ring, int64_t timeout)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = ring,
+		.rsvd1 = ctx,
+	};
+	int err;
+
+	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
+	gem_execbuf(i915, &execbuf);
+	err = gem_wait(i915, obj.handle, &timeout);
+	gem_close(i915, obj.handle);
+
+	return err;
+}
+
+static bool has_single_timeline(int i915)
+{
+	uint32_t ctx;
+
+	__gem_context_clone(i915, 0, 0,
+			    I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
+			    &ctx);
+	if (ctx)
+		gem_context_destroy(i915, ctx);
+
+	return ctx != 0;
+}
+
+static bool ignore_engine(unsigned engine)
+{
+	if (engine == 0)
+		return true;
+
+	if (engine == I915_EXEC_BSD)
+		return true;
+
+	return false;
+}
+
+static void single_timeline(int i915)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+	};
+	struct sync_fence_info rings[16];
+	struct sync_file_info sync_file_info = {
+		.num_fences = 1,
+	};
+	unsigned int engine;
+	int n;
+
+	igt_require(has_single_timeline(i915));
+
+	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	/*
+	 * For a "single timeline" context, each ring is on the common
+	 * timeline, unlike a normal context where each ring has an
+	 * independent timeline. That is no matter which engine we submit
+	 * to, it reports the same timeline name and fence context. However,
+	 * the fence context is not reported through the sync_fence_info.
+	 */
+	execbuf.rsvd1 =
+		gem_context_clone(i915, 0, 0,
+				  I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
+	execbuf.flags = I915_EXEC_FENCE_OUT;
+	n = 0;
+	for_each_engine(i915, engine) {
+		gem_execbuf_wr(i915, &execbuf);
+		sync_file_info.sync_fence_info = to_user_pointer(&rings[n]);
+		do_ioctl(execbuf.rsvd2 >> 32, SYNC_IOC_FILE_INFO, &sync_file_info);
+		close(execbuf.rsvd2 >> 32);
+
+		igt_info("ring[%d] fence: %s %s\n",
+			 n, rings[n].driver_name, rings[n].obj_name);
+		n++;
+	}
+	gem_sync(i915, obj.handle);
+	gem_close(i915, obj.handle);
+
+	for (int i = 1; i < n; i++) {
+		igt_assert(!strcmp(rings[0].driver_name, rings[i].driver_name));
+		igt_assert(!strcmp(rings[0].obj_name, rings[i].obj_name));
+	}
+}
+
+static void exec_single_timeline(int i915, unsigned int ring)
+{
+	unsigned int other;
+	igt_spin_t *spin;
+	uint32_t ctx;
+
+	gem_require_ring(i915, ring);
+	igt_require(has_single_timeline(i915));
+
+	/*
+	 * On an ordinary context, a blockage on one ring doesn't prevent
+	 * execution on an other.
+	 */
+	ctx = 0;
+	spin = NULL;
+	for_each_engine(i915, other) {
+		if (other == ring || ignore_engine(other))
+			continue;
+
+		if (spin == NULL) {
+			spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
+		} else {
+			struct drm_i915_gem_execbuffer2 execbuf = {
+				.buffers_ptr = spin->execbuf.buffers_ptr,
+				.buffer_count = spin->execbuf.buffer_count,
+				.flags = other,
+				.rsvd1 = ctx,
+			};
+			gem_execbuf(i915, &execbuf);
+		}
+	}
+	igt_require(spin);
+	igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), 0);
+	igt_spin_free(i915, spin);
+
+	/*
+	 * But if we create a context with just a single shared timeline,
+	 * then it will block waiting for the earlier requests on the
+	 * other engines.
+	 */
+	ctx = gem_context_clone(i915, 0, 0,
+				I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
+	spin = NULL;
+	for_each_engine(i915, other) {
+		if (other == ring || ignore_engine(other))
+			continue;
+
+		if (spin == NULL) {
+			spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
+		} else {
+			struct drm_i915_gem_execbuffer2 execbuf = {
+				.buffers_ptr = spin->execbuf.buffers_ptr,
+				.buffer_count = spin->execbuf.buffer_count,
+				.flags = other,
+				.rsvd1 = ctx,
+			};
+			gem_execbuf(i915, &execbuf);
+		}
+	}
+	igt_assert(spin);
+	igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), -ETIME);
+	igt_spin_free(i915, spin);
+}
+
+static void store_dword(int i915, uint32_t ctx, unsigned ring,
+			uint32_t target, uint32_t offset, uint32_t value,
+			uint32_t cork, unsigned write_domain)
+{
+	const int gen = intel_gen(intel_get_drm_devid(i915));
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint32_t batch[16];
+	int i;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj + !cork);
+	execbuf.buffer_count = 2 + !!cork;
+	execbuf.flags = ring;
+	if (gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+	execbuf.rsvd1 = ctx;
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = cork;
+	obj[1].handle = target;
+	obj[2].handle = gem_create(i915, 4096);
+
+	memset(&reloc, 0, sizeof(reloc));
+	reloc.target_handle = obj[1].handle;
+	reloc.presumed_offset = 0;
+	reloc.offset = sizeof(uint32_t);
+	reloc.delta = offset;
+	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.write_domain = write_domain;
+	obj[2].relocs_ptr = to_user_pointer(&reloc);
+	obj[2].relocation_count = 1;
+
+	i = 0;
+	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		batch[++i] = offset;
+		batch[++i] = 0;
+	} else if (gen >= 4) {
+		batch[++i] = 0;
+		batch[++i] = offset;
+		reloc.offset += sizeof(uint32_t);
+	} else {
+		batch[i]--;
+		batch[++i] = offset;
+	}
+	batch[++i] = value;
+	batch[++i] = MI_BATCH_BUFFER_END;
+	gem_write(i915, obj[2].handle, 0, batch, sizeof(batch));
+	gem_execbuf(i915, &execbuf);
+	gem_close(i915, obj[2].handle);
+}
+
+static uint32_t create_highest_priority(int i915)
+{
+	uint32_t ctx = gem_context_create(i915);
+
+	/*
+	 * If there is no priority support, all contexts will have equal
+	 * priority (and therefore the max user priority), so no context
+	 * can overtake us, and we effectively can form a plug.
+	 */
+	__gem_context_set_priority(i915, ctx, MAX_PRIO);
+
+	return ctx;
+}
+
+static void unplug_show_queue(int i915, struct igt_cork *c, unsigned int engine)
+{
+	igt_spin_t *spin[MAX_ELSP_QLEN];
+
+	for (int n = 0; n < ARRAY_SIZE(spin); n++) {
+		const struct igt_spin_factory opts = {
+			.ctx = create_highest_priority(i915),
+			.engine = engine,
+		};
+		spin[n] = __igt_spin_factory(i915, &opts);
+		gem_context_destroy(i915, opts.ctx);
+	}
+
+	igt_cork_unplug(c); /* batches will now be queued on the engine */
+	igt_debugfs_dump(i915, "i915_engine_info");
+
+	for (int n = 0; n < ARRAY_SIZE(spin); n++)
+		igt_spin_free(i915, spin[n]);
+}
+
+static uint32_t store_timestamp(int i915,
+				uint32_t ctx, unsigned ring,
+				unsigned mmio_base)
+{
+	const bool r64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096),
+		.relocation_count = 1,
+	};
+	struct drm_i915_gem_relocation_entry reloc = {
+		.target_handle = obj.handle,
+		.offset = 2 * sizeof(uint32_t),
+		.delta = 4092,
+		.read_domains = I915_GEM_DOMAIN_INSTRUCTION,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = ring,
+		.rsvd1 = ctx,
+	};
+	uint32_t batch[] = {
+		0x24 << 23 | (1 + r64b), /* SRM */
+		mmio_base + 0x358,
+		4092,
+		0,
+		MI_BATCH_BUFFER_END
+	};
+
+	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 7);
+
+	gem_write(i915, obj.handle, 0, batch, sizeof(batch));
+	obj.relocs_ptr = to_user_pointer(&reloc);
+
+	gem_execbuf(i915, &execbuf);
+
+	return obj.handle;
+}
+
+static void independent(int i915, unsigned ring, unsigned flags)
+{
+	uint32_t handle[ARRAY_SIZE(priorities)];
+	igt_spin_t *spin[MAX_ELSP_QLEN];
+	unsigned int mmio_base;
+
+	/* XXX i915_query()! */
+	switch (ring) {
+	case I915_EXEC_DEFAULT:
+	case I915_EXEC_RENDER:
+		mmio_base = 0x2000;
+		break;
+#if 0
+	case I915_EXEC_BSD:
+		mmio_base = 0x12000;
+		break;
+#endif
+	case I915_EXEC_BLT:
+		mmio_base = 0x22000;
+		break;
+
+	case I915_EXEC_VEBOX:
+		if (intel_gen(intel_get_drm_devid(i915)) >= 11)
+			mmio_base = 0x1d8000;
+		else
+			mmio_base = 0x1a000;
+		break;
+
+	default:
+		igt_skip("mmio base not known\n");
+	}
+
+	for (int n = 0; n < ARRAY_SIZE(spin); n++) {
+		const struct igt_spin_factory opts = {
+			.ctx = create_highest_priority(i915),
+			.engine = ring,
+		};
+		spin[n] = __igt_spin_factory(i915, &opts);
+		gem_context_destroy(i915, opts.ctx);
+	}
+
+	for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
+		uint32_t ctx = gem_queue_create(i915);
+		gem_context_set_priority(i915, ctx, priorities[i]);
+		handle[i] = store_timestamp(i915, ctx, ring, mmio_base);
+		gem_context_destroy(i915, ctx);
+	}
+
+	for (int n = 0; n < ARRAY_SIZE(spin); n++)
+		igt_spin_free(i915, spin[n]);
+
+	for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
+		uint32_t *ptr;
+
+		ptr = gem_mmap__gtt(i915, handle[i], 4096, PROT_READ);
+		gem_set_domain(i915, handle[i], /* no write hazard lies! */
+			       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+		gem_close(i915, handle[i]);
+
+		handle[i] = ptr[1023];
+		munmap(ptr, 4096);
+
+		igt_debug("ctx[%d] .prio=%d, timestamp=%u\n",
+			  i, priorities[i], handle[i]);
+	}
+
+	igt_assert((int32_t)(handle[HI] - handle[LO]) < 0);
+}
+
+static void reorder(int i915, unsigned ring, unsigned flags)
+#define EQUAL 1
+{
+	IGT_CORK_HANDLE(cork);
+	uint32_t scratch;
+	uint32_t *ptr;
+	uint32_t ctx[2];
+	uint32_t plug;
+
+	ctx[LO] = gem_queue_create(i915);
+	gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
+
+	ctx[HI] = gem_queue_create(i915);
+	gem_context_set_priority(i915, ctx[HI], flags & EQUAL ? MIN_PRIO : 0);
+
+	scratch = gem_create(i915, 4096);
+	plug = igt_cork_plug(&cork, i915);
+
+	/* We expect the high priority context to be executed first, and
+	 * so the final result will be value from the low priority context.
+	 */
+	store_dword(i915, ctx[LO], ring, scratch, 0, ctx[LO], plug, 0);
+	store_dword(i915, ctx[HI], ring, scratch, 0, ctx[HI], plug, 0);
+
+	unplug_show_queue(i915, &cork, ring);
+	gem_close(i915, plug);
+
+	gem_context_destroy(i915, ctx[LO]);
+	gem_context_destroy(i915, ctx[HI]);
+
+	ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
+	gem_set_domain(i915, scratch, /* no write hazard lies! */
+		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(i915, scratch);
+
+	if (flags & EQUAL) /* equal priority, result will be fifo */
+		igt_assert_eq_u32(ptr[0], ctx[HI]);
+	else
+		igt_assert_eq_u32(ptr[0], ctx[LO]);
+	munmap(ptr, 4096);
+}
+
+static void promotion(int i915, unsigned ring)
+{
+	IGT_CORK_HANDLE(cork);
+	uint32_t result, dep;
+	uint32_t *ptr;
+	uint32_t ctx[3];
+	uint32_t plug;
+
+	ctx[LO] = gem_queue_create(i915);
+	gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
+
+	ctx[HI] = gem_queue_create(i915);
+	gem_context_set_priority(i915, ctx[HI], 0);
+
+	ctx[NOISE] = gem_queue_create(i915);
+	gem_context_set_priority(i915, ctx[NOISE], MIN_PRIO/2);
+
+	result = gem_create(i915, 4096);
+	dep = gem_create(i915, 4096);
+
+	plug = igt_cork_plug(&cork, i915);
+
+	/* Expect that HI promotes LO, so the order will be LO, HI, NOISE.
+	 *
+	 * fifo would be NOISE, LO, HI.
+	 * strict priority would be  HI, NOISE, LO
+	 */
+	store_dword(i915, ctx[NOISE], ring, result, 0, ctx[NOISE], plug, 0);
+	store_dword(i915, ctx[LO], ring, result, 0, ctx[LO], plug, 0);
+
+	/* link LO <-> HI via a dependency on another buffer */
+	store_dword(i915, ctx[LO], ring, dep, 0, ctx[LO], 0, I915_GEM_DOMAIN_INSTRUCTION);
+	store_dword(i915, ctx[HI], ring, dep, 0, ctx[HI], 0, 0);
+
+	store_dword(i915, ctx[HI], ring, result, 0, ctx[HI], 0, 0);
+
+	unplug_show_queue(i915, &cork, ring);
+	gem_close(i915, plug);
+
+	gem_context_destroy(i915, ctx[NOISE]);
+	gem_context_destroy(i915, ctx[LO]);
+	gem_context_destroy(i915, ctx[HI]);
+
+	ptr = gem_mmap__gtt(i915, dep, 4096, PROT_READ);
+	gem_set_domain(i915, dep, /* no write hazard lies! */
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(i915, dep);
+
+	igt_assert_eq_u32(ptr[0], ctx[HI]);
+	munmap(ptr, 4096);
+
+	ptr = gem_mmap__gtt(i915, result, 4096, PROT_READ);
+	gem_set_domain(i915, result, /* no write hazard lies! */
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(i915, result);
+
+	igt_assert_eq_u32(ptr[0], ctx[NOISE]);
+	munmap(ptr, 4096);
+}
+
+static void smoketest(int i915, unsigned ring, unsigned timeout)
+{
+	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	unsigned engines[16];
+	unsigned nengine;
+	unsigned engine;
+	uint32_t scratch;
+	uint32_t *ptr;
+
+	nengine = 0;
+	for_each_engine(i915, engine) {
+		if (ignore_engine(engine))
+			continue;
+
+		engines[nengine++] = engine;
+	}
+	igt_require(nengine);
+
+	scratch = gem_create(i915, 4096);
+	igt_fork(child, ncpus) {
+		unsigned long count = 0;
+		uint32_t ctx;
+
+		hars_petruska_f54_1_random_perturb(child);
+
+		ctx = gem_queue_create(i915);
+		igt_until_timeout(timeout) {
+			int prio;
+
+			prio = hars_petruska_f54_1_random_unsafe_max(MAX_PRIO - MIN_PRIO) + MIN_PRIO;
+			gem_context_set_priority(i915, ctx, prio);
+
+			engine = engines[hars_petruska_f54_1_random_unsafe_max(nengine)];
+			store_dword(i915, ctx, engine, scratch,
+				    8*child + 0, ~child,
+				    0, 0);
+			for (unsigned int step = 0; step < 8; step++)
+				store_dword(i915, ctx, engine, scratch,
+					    8*child + 4, count++,
+					    0, 0);
+		}
+		gem_context_destroy(i915, ctx);
+	}
+	igt_waitchildren();
+
+	ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
+	gem_set_domain(i915, scratch, /* no write hazard lies! */
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(i915, scratch);
+
+	for (unsigned n = 0; n < ncpus; n++) {
+		igt_assert_eq_u32(ptr[2*n], ~n);
+		/*
+		 * Note this count is approximate due to unconstrained
+		 * ordering of the dword writes between engines.
+		 *
+		 * Take the result with a pinch of salt.
+		 */
+		igt_info("Child[%d] completed %u cycles\n",  n, ptr[2*n+1]);
+	}
+	munmap(ptr, 4096);
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	int i915 = -1;
+
+	igt_fixture {
+		i915 = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(i915);
+	}
+
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(gem_contexts_has_shared_gtt(i915));
+			igt_fork_hang_detector(i915);
+		}
+
+		igt_subtest("create-shared-gtt")
+			create_shared_gtt(i915, 0);
+
+		igt_subtest("detached-shared-gtt")
+			create_shared_gtt(i915, DETACHED);
+
+		igt_subtest("disjoint-timelines")
+			disjoint_timelines(i915);
+
+		igt_subtest("single-timeline")
+			single_timeline(i915);
+
+		igt_subtest("exhaust-shared-gtt")
+			exhaust_shared_gtt(i915, 0);
+
+		igt_subtest("exhaust-shared-gtt-lrc")
+			exhaust_shared_gtt(i915, EXHAUST_LRC);
+
+		for (e = intel_execution_engines; e->name; e++) {
+			igt_subtest_f("exec-shared-gtt-%s", e->name)
+				exec_shared_gtt(i915, e->exec_id | e->flags);
+
+			if (!ignore_engine(e->exec_id | e->flags)) {
+				igt_subtest_f("exec-single-timeline-%s",
+					      e->name)
+					exec_single_timeline(i915,
+							     e->exec_id | e->flags);
+			}
+
+			/*
+			 * Check that the shared contexts operate independently,
+			 * that is requests on one ("queue") can be scheduled
+			 * around another queue. We only check the basics here,
+			 * enough to reduce the queue into just another context,
+			 * and so rely on gem_exec_schedule to prove the rest.
+			 */
+			igt_subtest_group {
+				igt_fixture {
+					gem_require_ring(i915, e->exec_id | e->flags);
+					igt_require(gem_can_store_dword(i915, e->exec_id) | e->flags);
+					igt_require(gem_scheduler_enabled(i915));
+					igt_require(gem_scheduler_has_ctx_priority(i915));
+				}
+
+				igt_subtest_f("Q-independent-%s", e->name)
+					independent(i915, e->exec_id | e->flags, 0);
+
+				igt_subtest_f("Q-in-order-%s", e->name)
+					reorder(i915, e->exec_id | e->flags, EQUAL);
+
+				igt_subtest_f("Q-out-order-%s", e->name)
+					reorder(i915, e->exec_id | e->flags, 0);
+
+				igt_subtest_f("Q-promotion-%s", e->name)
+					promotion(i915, e->exec_id | e->flags);
+
+				igt_subtest_f("Q-smoketest-%s", e->name)
+					smoketest(i915, e->exec_id | e->flags, 5);
+			}
+		}
+
+		igt_subtest("Q-smoketest-all") {
+			igt_require(gem_scheduler_enabled(i915));
+			igt_require(gem_scheduler_has_ctx_priority(i915));
+			smoketest(i915, -1, 30);
+		}
+
+		igt_fixture {
+			igt_stop_hang_detector();
+		}
+	}
+}
diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
index 6c3b53756..d3e0b0ba2 100644
--- a/tests/i915/gem_exec_whisper.c
+++ b/tests/i915/gem_exec_whisper.c
@@ -87,6 +87,7 @@ static void verify_reloc(int fd, uint32_t handle,
 #define HANG 0x20
 #define SYNC 0x40
 #define PRIORITY 0x80
+#define QUEUES 0x100
 
 struct hang {
 	struct drm_i915_gem_exec_object2 obj;
@@ -171,7 +172,7 @@ static void ctx_set_random_priority(int fd, uint32_t ctx)
 {
 	int prio = hars_petruska_f54_1_random_unsafe_max(1024) - 512;
 	gem_context_set_priority(fd, ctx, prio);
-};
+}
 
 static void whisper(int fd, unsigned engine, unsigned flags)
 {
@@ -226,6 +227,9 @@ static void whisper(int fd, unsigned engine, unsigned flags)
 	if (flags & CONTEXTS)
 		gem_require_contexts(fd);
 
+	if (flags & QUEUES)
+		igt_require(gem_has_queues(fd));
+
 	if (flags & HANG)
 		init_hang(&hang);
 
@@ -290,6 +294,10 @@ static void whisper(int fd, unsigned engine, unsigned flags)
 			for (n = 0; n < 64; n++)
 				contexts[n] = gem_context_create(fd);
 		}
+		if (flags & QUEUES) {
+			for (n = 0; n < 64; n++)
+				contexts[n] = gem_queue_create(fd);
+		}
 		if (flags & FDS) {
 			for (n = 0; n < 64; n++)
 				fds[n] = drm_open_driver(DRIVER_INTEL);
@@ -403,7 +411,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
 						execbuf.flags &= ~ENGINE_MASK;
 						execbuf.flags |= engines[rand() % nengine];
 					}
-					if (flags & CONTEXTS) {
+					if (flags & (CONTEXTS | QUEUES)) {
 						execbuf.rsvd1 = contexts[rand() % 64];
 						if (flags & PRIORITY)
 							ctx_set_random_priority(this_fd, execbuf.rsvd1);
@@ -486,7 +494,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
 			for (n = 0; n < 64; n++)
 				close(fds[n]);
 		}
-		if (flags & CONTEXTS) {
+		if (flags & (CONTEXTS | QUEUES)) {
 			for (n = 0; n < 64; n++)
 				gem_context_destroy(fd, contexts[n]);
 		}
@@ -522,18 +530,24 @@ igt_main
 		{ "chain-forked", CHAIN | FORKED },
 		{ "chain-interruptible", CHAIN | INTERRUPTIBLE },
 		{ "chain-sync", CHAIN | SYNC },
-		{ "contexts", CONTEXTS },
-		{ "contexts-interruptible", CONTEXTS | INTERRUPTIBLE},
-		{ "contexts-forked", CONTEXTS | FORKED},
-		{ "contexts-priority", CONTEXTS | FORKED | PRIORITY },
-		{ "contexts-chain", CONTEXTS | CHAIN },
-		{ "contexts-sync", CONTEXTS | SYNC },
 		{ "fds", FDS },
 		{ "fds-interruptible", FDS | INTERRUPTIBLE},
 		{ "fds-forked", FDS | FORKED},
 		{ "fds-priority", FDS | FORKED | PRIORITY },
 		{ "fds-chain", FDS | CHAIN},
 		{ "fds-sync", FDS | SYNC},
+		{ "contexts", CONTEXTS },
+		{ "contexts-interruptible", CONTEXTS | INTERRUPTIBLE},
+		{ "contexts-forked", CONTEXTS | FORKED},
+		{ "contexts-priority", CONTEXTS | FORKED | PRIORITY },
+		{ "contexts-chain", CONTEXTS | CHAIN },
+		{ "contexts-sync", CONTEXTS | SYNC },
+		{ "queues", QUEUES },
+		{ "queues-interruptible", QUEUES | INTERRUPTIBLE},
+		{ "queues-forked", QUEUES | FORKED},
+		{ "queues-priority", QUEUES | FORKED | PRIORITY },
+		{ "queues-chain", QUEUES | CHAIN },
+		{ "queues-sync", QUEUES | SYNC },
 		{ NULL }
 	};
 	int fd;
diff --git a/tests/meson.build b/tests/meson.build
index 3810bd760..3883ae127 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -114,6 +114,7 @@ i915_progs = [
 	'gem_ctx_exec',
 	'gem_ctx_isolation',
 	'gem_ctx_param',
+	'gem_ctx_shared',
 	'gem_ctx_switch',
 	'gem_ctx_thrash',
 	'gem_double_irq_loop',
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [igt-dev] [PATCH i-g-t 08/16] i915: Exercise creating context with shared GTT
@ 2019-05-08 10:09   ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: Tvrtko Ursulin, igt-dev

v2: Test each shared context is its own timeline and allows request
reordering between shared contexts.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
---
 lib/i915/gem_context.c        |  68 +++
 lib/i915/gem_context.h        |  13 +
 tests/Makefile.sources        |   1 +
 tests/i915/gem_ctx_shared.c   | 856 ++++++++++++++++++++++++++++++++++
 tests/i915/gem_exec_whisper.c |  32 +-
 tests/meson.build             |   1 +
 6 files changed, 962 insertions(+), 9 deletions(-)
 create mode 100644 tests/i915/gem_ctx_shared.c

diff --git a/lib/i915/gem_context.c b/lib/i915/gem_context.c
index f94d89cb4..8fb8984d1 100644
--- a/lib/i915/gem_context.c
+++ b/lib/i915/gem_context.c
@@ -272,6 +272,74 @@ void gem_context_set_priority(int fd, uint32_t ctx_id, int prio)
 	igt_assert_eq(__gem_context_set_priority(fd, ctx_id, prio), 0);
 }
 
+int
+__gem_context_clone(int i915,
+		    uint32_t src, unsigned int share,
+		    unsigned int flags,
+		    uint32_t *out)
+{
+	struct drm_i915_gem_context_create_ext_clone clone = {
+		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
+		.clone_id = src,
+		.flags = share,
+	};
+	struct drm_i915_gem_context_create_ext arg = {
+		.flags = flags | I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+		.extensions = to_user_pointer(&clone),
+	};
+	int err = 0;
+
+	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, &arg))
+		err = -errno;
+
+	*out = arg.ctx_id;
+
+	errno = 0;
+	return err;
+}
+
+static bool __gem_context_has(int i915, uint32_t share, unsigned int flags)
+{
+	uint32_t ctx;
+
+	__gem_context_clone(i915, 0, share, flags, &ctx);
+	if (ctx)
+		gem_context_destroy(i915, ctx);
+
+	errno = 0;
+	return ctx;
+}
+
+bool gem_contexts_has_shared_gtt(int i915)
+{
+	return __gem_context_has(i915, I915_CONTEXT_CLONE_VM, 0);
+}
+
+bool gem_has_queues(int i915)
+{
+	return __gem_context_has(i915,
+				 I915_CONTEXT_CLONE_VM,
+				 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
+}
+
+uint32_t gem_context_clone(int i915,
+			   uint32_t src, unsigned int share,
+			   unsigned int flags)
+{
+	uint32_t ctx;
+
+	igt_assert_eq(__gem_context_clone(i915, src, share, flags, &ctx), 0);
+
+	return ctx;
+}
+
+uint32_t gem_queue_create(int i915)
+{
+	return gem_context_clone(i915, 0,
+				 I915_CONTEXT_CLONE_VM,
+				 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
+}
+
 bool gem_context_has_engine(int fd, uint32_t ctx, uint64_t engine)
 {
 	struct drm_i915_gem_exec_object2 exec = {};
diff --git a/lib/i915/gem_context.h b/lib/i915/gem_context.h
index a052714d4..8043c3401 100644
--- a/lib/i915/gem_context.h
+++ b/lib/i915/gem_context.h
@@ -29,6 +29,19 @@ int __gem_context_create(int fd, uint32_t *ctx_id);
 void gem_context_destroy(int fd, uint32_t ctx_id);
 int __gem_context_destroy(int fd, uint32_t ctx_id);
 
+int __gem_context_clone(int i915,
+			uint32_t src, unsigned int share,
+			unsigned int flags,
+			uint32_t *out);
+uint32_t gem_context_clone(int i915,
+			   uint32_t src, unsigned int share,
+			   unsigned int flags);
+
+uint32_t gem_queue_create(int i915);
+
+bool gem_contexts_has_shared_gtt(int i915);
+bool gem_has_queues(int i915);
+
 bool gem_has_contexts(int fd);
 void gem_require_contexts(int fd);
 void gem_context_require_bannable(int fd);
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index e1b7feeb2..3552e895b 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -22,6 +22,7 @@ TESTS_progs = \
 	drm_mm \
 	drm_read \
 	i915/gem_ctx_clone \
+	i915/gem_ctx_shared \
 	i915/gem_vm_create \
 	kms_3d \
 	kms_addfb_basic \
diff --git a/tests/i915/gem_ctx_shared.c b/tests/i915/gem_ctx_shared.c
new file mode 100644
index 000000000..0076f5e9d
--- /dev/null
+++ b/tests/i915/gem_ctx_shared.c
@@ -0,0 +1,856 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+#include "igt_rand.h"
+#include "igt_vgem.h"
+#include "sync_file.h"
+
+#define LO 0
+#define HI 1
+#define NOISE 2
+
+#define MAX_PRIO LOCAL_I915_CONTEXT_MAX_USER_PRIORITY
+#define MIN_PRIO LOCAL_I915_CONTEXT_MIN_USER_PRIORITY
+
+static int priorities[] = {
+	[LO] = MIN_PRIO / 2,
+	[HI] = MAX_PRIO / 2,
+};
+
+#define MAX_ELSP_QLEN 16
+
+IGT_TEST_DESCRIPTION("Test shared contexts.");
+
+static void create_shared_gtt(int i915, unsigned int flags)
+#define DETACHED 0x1
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+	};
+	uint32_t parent, child;
+
+	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	child = flags & DETACHED ? gem_context_create(i915) : 0;
+	igt_until_timeout(2) {
+		parent = flags & DETACHED ? child : 0;
+		child = gem_context_clone(i915,
+					  parent, I915_CONTEXT_CLONE_VM,
+					  0);
+		execbuf.rsvd1 = child;
+		gem_execbuf(i915, &execbuf);
+
+		if (flags & DETACHED) {
+			gem_context_destroy(i915, parent);
+			gem_execbuf(i915, &execbuf);
+		} else {
+			parent = child;
+			gem_context_destroy(i915, parent);
+		}
+
+		execbuf.rsvd1 = parent;
+		igt_assert_eq(__gem_execbuf(i915, &execbuf), -ENOENT);
+		igt_assert_eq(__gem_context_clone(i915,
+						  parent, I915_CONTEXT_CLONE_VM,
+						  0, &parent), -ENOENT);
+	}
+	if (flags & DETACHED)
+		gem_context_destroy(i915, child);
+
+	gem_sync(i915, obj.handle);
+	gem_close(i915, obj.handle);
+}
+
+static void disjoint_timelines(int i915)
+{
+	IGT_CORK_HANDLE(cork);
+	igt_spin_t *spin[2];
+	uint32_t plug, child;
+
+	igt_require(gem_has_execlists(i915));
+
+	/*
+	 * Each context, although they share a vm, are expected to be
+	 * distinct timelines. A request queued to one context should be
+	 * independent of any shared contexts.
+	 */
+	child = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
+	plug = igt_cork_plug(&cork, i915);
+
+	spin[0] = __igt_spin_new(i915, .ctx = 0, .dependency = plug);
+	spin[1] = __igt_spin_new(i915, .ctx = child);
+
+	/* Wait for the second spinner, will hang if stuck behind the first */
+	igt_spin_end(spin[1]);
+	gem_sync(i915, spin[1]->handle);
+
+	igt_cork_unplug(&cork);
+
+	igt_spin_free(i915, spin[1]);
+	igt_spin_free(i915, spin[0]);
+}
+
+static void exhaust_shared_gtt(int i915, unsigned int flags)
+#define EXHAUST_LRC 0x1
+{
+	i915 = gem_reopen_driver(i915);
+
+	igt_fork(pid, 1) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 obj = {
+			.handle = gem_create(i915, 4096)
+		};
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(&obj),
+			.buffer_count = 1,
+		};
+		uint32_t parent, child;
+		unsigned long count = 0;
+		int err;
+
+		gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
+
+		child = 0;
+		for (;;) {
+			parent = child;
+			err = __gem_context_clone(i915,
+						  parent, I915_CONTEXT_CLONE_VM,
+						  0, &child);
+			if (err)
+				break;
+
+			if (flags & EXHAUST_LRC) {
+				execbuf.rsvd1 = child;
+				err = __gem_execbuf(i915, &execbuf);
+				if (err)
+					break;
+			}
+
+			count++;
+		}
+		gem_sync(i915, obj.handle);
+
+		igt_info("Created %lu shared contexts, before %d (%s)\n",
+			 count, err, strerror(-err));
+	}
+	close(i915);
+	igt_waitchildren();
+}
+
+static void exec_shared_gtt(int i915, unsigned int ring)
+{
+	const int gen = intel_gen(intel_get_drm_devid(i915));
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096)
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = ring,
+	};
+	uint32_t scratch = obj.handle;
+	uint32_t batch[16];
+	int i;
+
+	gem_require_ring(i915, ring);
+	igt_require(gem_can_store_dword(i915, ring));
+
+	/* Load object into place in the GTT */
+	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
+	gem_execbuf(i915, &execbuf);
+
+	/* Presume nothing causes an eviction in the meantime */
+
+	obj.handle = gem_create(i915, 4096);
+
+	i = 0;
+	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		batch[++i] = obj.offset;
+		batch[++i] = 0;
+	} else if (gen >= 4) {
+		batch[++i] = 0;
+		batch[++i] = obj.offset;
+	} else {
+		batch[i]--;
+		batch[++i] = obj.offset;
+	}
+	batch[++i] = 0xc0ffee;
+	batch[++i] = MI_BATCH_BUFFER_END;
+	gem_write(i915, obj.handle, 0, batch, sizeof(batch));
+
+	obj.offset += 4096; /* make sure we don't cause an eviction! */
+	obj.flags |= EXEC_OBJECT_PINNED;
+	execbuf.rsvd1 = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
+	if (gen > 3 && gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+
+	gem_execbuf(i915, &execbuf);
+	gem_context_destroy(i915, execbuf.rsvd1);
+	gem_sync(i915, obj.handle); /* write hazard lies */
+	gem_close(i915, obj.handle);
+
+	gem_read(i915, scratch, 0, batch, sizeof(uint32_t));
+	gem_close(i915, scratch);
+
+	igt_assert_eq_u32(*batch, 0xc0ffee);
+}
+
+static int nop_sync(int i915, uint32_t ctx, unsigned int ring, int64_t timeout)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = ring,
+		.rsvd1 = ctx,
+	};
+	int err;
+
+	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
+	gem_execbuf(i915, &execbuf);
+	err = gem_wait(i915, obj.handle, &timeout);
+	gem_close(i915, obj.handle);
+
+	return err;
+}
+
+static bool has_single_timeline(int i915)
+{
+	uint32_t ctx;
+
+	__gem_context_clone(i915, 0, 0,
+			    I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
+			    &ctx);
+	if (ctx)
+		gem_context_destroy(i915, ctx);
+
+	return ctx != 0;
+}
+
+static bool ignore_engine(unsigned engine)
+{
+	if (engine == 0)
+		return true;
+
+	if (engine == I915_EXEC_BSD)
+		return true;
+
+	return false;
+}
+
+static void single_timeline(int i915)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+	};
+	struct sync_fence_info rings[16];
+	struct sync_file_info sync_file_info = {
+		.num_fences = 1,
+	};
+	unsigned int engine;
+	int n;
+
+	igt_require(has_single_timeline(i915));
+
+	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	/*
+	 * For a "single timeline" context, each ring is on the common
+	 * timeline, unlike a normal context where each ring has an
+	 * independent timeline. That is no matter which engine we submit
+	 * to, it reports the same timeline name and fence context. However,
+	 * the fence context is not reported through the sync_fence_info.
+	 */
+	execbuf.rsvd1 =
+		gem_context_clone(i915, 0, 0,
+				  I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
+	execbuf.flags = I915_EXEC_FENCE_OUT;
+	n = 0;
+	for_each_engine(i915, engine) {
+		gem_execbuf_wr(i915, &execbuf);
+		sync_file_info.sync_fence_info = to_user_pointer(&rings[n]);
+		do_ioctl(execbuf.rsvd2 >> 32, SYNC_IOC_FILE_INFO, &sync_file_info);
+		close(execbuf.rsvd2 >> 32);
+
+		igt_info("ring[%d] fence: %s %s\n",
+			 n, rings[n].driver_name, rings[n].obj_name);
+		n++;
+	}
+	gem_sync(i915, obj.handle);
+	gem_close(i915, obj.handle);
+
+	for (int i = 1; i < n; i++) {
+		igt_assert(!strcmp(rings[0].driver_name, rings[i].driver_name));
+		igt_assert(!strcmp(rings[0].obj_name, rings[i].obj_name));
+	}
+}
+
+static void exec_single_timeline(int i915, unsigned int ring)
+{
+	unsigned int other;
+	igt_spin_t *spin;
+	uint32_t ctx;
+
+	gem_require_ring(i915, ring);
+	igt_require(has_single_timeline(i915));
+
+	/*
+	 * On an ordinary context, a blockage on one ring doesn't prevent
+	 * execution on an other.
+	 */
+	ctx = 0;
+	spin = NULL;
+	for_each_engine(i915, other) {
+		if (other == ring || ignore_engine(other))
+			continue;
+
+		if (spin == NULL) {
+			spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
+		} else {
+			struct drm_i915_gem_execbuffer2 execbuf = {
+				.buffers_ptr = spin->execbuf.buffers_ptr,
+				.buffer_count = spin->execbuf.buffer_count,
+				.flags = other,
+				.rsvd1 = ctx,
+			};
+			gem_execbuf(i915, &execbuf);
+		}
+	}
+	igt_require(spin);
+	igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), 0);
+	igt_spin_free(i915, spin);
+
+	/*
+	 * But if we create a context with just a single shared timeline,
+	 * then it will block waiting for the earlier requests on the
+	 * other engines.
+	 */
+	ctx = gem_context_clone(i915, 0, 0,
+				I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
+	spin = NULL;
+	for_each_engine(i915, other) {
+		if (other == ring || ignore_engine(other))
+			continue;
+
+		if (spin == NULL) {
+			spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
+		} else {
+			struct drm_i915_gem_execbuffer2 execbuf = {
+				.buffers_ptr = spin->execbuf.buffers_ptr,
+				.buffer_count = spin->execbuf.buffer_count,
+				.flags = other,
+				.rsvd1 = ctx,
+			};
+			gem_execbuf(i915, &execbuf);
+		}
+	}
+	igt_assert(spin);
+	igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), -ETIME);
+	igt_spin_free(i915, spin);
+}
+
+static void store_dword(int i915, uint32_t ctx, unsigned ring,
+			uint32_t target, uint32_t offset, uint32_t value,
+			uint32_t cork, unsigned write_domain)
+{
+	const int gen = intel_gen(intel_get_drm_devid(i915));
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint32_t batch[16];
+	int i;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj + !cork);
+	execbuf.buffer_count = 2 + !!cork;
+	execbuf.flags = ring;
+	if (gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+	execbuf.rsvd1 = ctx;
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = cork;
+	obj[1].handle = target;
+	obj[2].handle = gem_create(i915, 4096);
+
+	memset(&reloc, 0, sizeof(reloc));
+	reloc.target_handle = obj[1].handle;
+	reloc.presumed_offset = 0;
+	reloc.offset = sizeof(uint32_t);
+	reloc.delta = offset;
+	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.write_domain = write_domain;
+	obj[2].relocs_ptr = to_user_pointer(&reloc);
+	obj[2].relocation_count = 1;
+
+	i = 0;
+	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	if (gen >= 8) {
+		batch[++i] = offset;
+		batch[++i] = 0;
+	} else if (gen >= 4) {
+		batch[++i] = 0;
+		batch[++i] = offset;
+		reloc.offset += sizeof(uint32_t);
+	} else {
+		batch[i]--;
+		batch[++i] = offset;
+	}
+	batch[++i] = value;
+	batch[++i] = MI_BATCH_BUFFER_END;
+	gem_write(i915, obj[2].handle, 0, batch, sizeof(batch));
+	gem_execbuf(i915, &execbuf);
+	gem_close(i915, obj[2].handle);
+}
+
+static uint32_t create_highest_priority(int i915)
+{
+	uint32_t ctx = gem_context_create(i915);
+
+	/*
+	 * If there is no priority support, all contexts will have equal
+	 * priority (and therefore the max user priority), so no context
+	 * can overtake us, and we effectively can form a plug.
+	 */
+	__gem_context_set_priority(i915, ctx, MAX_PRIO);
+
+	return ctx;
+}
+
+static void unplug_show_queue(int i915, struct igt_cork *c, unsigned int engine)
+{
+	igt_spin_t *spin[MAX_ELSP_QLEN];
+
+	for (int n = 0; n < ARRAY_SIZE(spin); n++) {
+		const struct igt_spin_factory opts = {
+			.ctx = create_highest_priority(i915),
+			.engine = engine,
+		};
+		spin[n] = __igt_spin_factory(i915, &opts);
+		gem_context_destroy(i915, opts.ctx);
+	}
+
+	igt_cork_unplug(c); /* batches will now be queued on the engine */
+	igt_debugfs_dump(i915, "i915_engine_info");
+
+	for (int n = 0; n < ARRAY_SIZE(spin); n++)
+		igt_spin_free(i915, spin[n]);
+}
+
+static uint32_t store_timestamp(int i915,
+				uint32_t ctx, unsigned ring,
+				unsigned mmio_base)
+{
+	const bool r64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096),
+		.relocation_count = 1,
+	};
+	struct drm_i915_gem_relocation_entry reloc = {
+		.target_handle = obj.handle,
+		.offset = 2 * sizeof(uint32_t),
+		.delta = 4092,
+		.read_domains = I915_GEM_DOMAIN_INSTRUCTION,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = ring,
+		.rsvd1 = ctx,
+	};
+	uint32_t batch[] = {
+		0x24 << 23 | (1 + r64b), /* SRM */
+		mmio_base + 0x358,
+		4092,
+		0,
+		MI_BATCH_BUFFER_END
+	};
+
+	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 7);
+
+	gem_write(i915, obj.handle, 0, batch, sizeof(batch));
+	obj.relocs_ptr = to_user_pointer(&reloc);
+
+	gem_execbuf(i915, &execbuf);
+
+	return obj.handle;
+}
+
+static void independent(int i915, unsigned ring, unsigned flags)
+{
+	uint32_t handle[ARRAY_SIZE(priorities)];
+	igt_spin_t *spin[MAX_ELSP_QLEN];
+	unsigned int mmio_base;
+
+	/* XXX i915_query()! */
+	switch (ring) {
+	case I915_EXEC_DEFAULT:
+	case I915_EXEC_RENDER:
+		mmio_base = 0x2000;
+		break;
+#if 0
+	case I915_EXEC_BSD:
+		mmio_base = 0x12000;
+		break;
+#endif
+	case I915_EXEC_BLT:
+		mmio_base = 0x22000;
+		break;
+
+	case I915_EXEC_VEBOX:
+		if (intel_gen(intel_get_drm_devid(i915)) >= 11)
+			mmio_base = 0x1d8000;
+		else
+			mmio_base = 0x1a000;
+		break;
+
+	default:
+		igt_skip("mmio base not known\n");
+	}
+
+	for (int n = 0; n < ARRAY_SIZE(spin); n++) {
+		const struct igt_spin_factory opts = {
+			.ctx = create_highest_priority(i915),
+			.engine = ring,
+		};
+		spin[n] = __igt_spin_factory(i915, &opts);
+		gem_context_destroy(i915, opts.ctx);
+	}
+
+	for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
+		uint32_t ctx = gem_queue_create(i915);
+		gem_context_set_priority(i915, ctx, priorities[i]);
+		handle[i] = store_timestamp(i915, ctx, ring, mmio_base);
+		gem_context_destroy(i915, ctx);
+	}
+
+	for (int n = 0; n < ARRAY_SIZE(spin); n++)
+		igt_spin_free(i915, spin[n]);
+
+	for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
+		uint32_t *ptr;
+
+		ptr = gem_mmap__gtt(i915, handle[i], 4096, PROT_READ);
+		gem_set_domain(i915, handle[i], /* no write hazard lies! */
+			       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+		gem_close(i915, handle[i]);
+
+		handle[i] = ptr[1023];
+		munmap(ptr, 4096);
+
+		igt_debug("ctx[%d] .prio=%d, timestamp=%u\n",
+			  i, priorities[i], handle[i]);
+	}
+
+	igt_assert((int32_t)(handle[HI] - handle[LO]) < 0);
+}
+
+static void reorder(int i915, unsigned ring, unsigned flags)
+#define EQUAL 1
+{
+	IGT_CORK_HANDLE(cork);
+	uint32_t scratch;
+	uint32_t *ptr;
+	uint32_t ctx[2];
+	uint32_t plug;
+
+	ctx[LO] = gem_queue_create(i915);
+	gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
+
+	ctx[HI] = gem_queue_create(i915);
+	gem_context_set_priority(i915, ctx[HI], flags & EQUAL ? MIN_PRIO : 0);
+
+	scratch = gem_create(i915, 4096);
+	plug = igt_cork_plug(&cork, i915);
+
+	/* We expect the high priority context to be executed first, and
+	 * so the final result will be value from the low priority context.
+	 */
+	store_dword(i915, ctx[LO], ring, scratch, 0, ctx[LO], plug, 0);
+	store_dword(i915, ctx[HI], ring, scratch, 0, ctx[HI], plug, 0);
+
+	unplug_show_queue(i915, &cork, ring);
+	gem_close(i915, plug);
+
+	gem_context_destroy(i915, ctx[LO]);
+	gem_context_destroy(i915, ctx[HI]);
+
+	ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
+	gem_set_domain(i915, scratch, /* no write hazard lies! */
+		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(i915, scratch);
+
+	if (flags & EQUAL) /* equal priority, result will be fifo */
+		igt_assert_eq_u32(ptr[0], ctx[HI]);
+	else
+		igt_assert_eq_u32(ptr[0], ctx[LO]);
+	munmap(ptr, 4096);
+}
+
+static void promotion(int i915, unsigned ring)
+{
+	IGT_CORK_HANDLE(cork);
+	uint32_t result, dep;
+	uint32_t *ptr;
+	uint32_t ctx[3];
+	uint32_t plug;
+
+	ctx[LO] = gem_queue_create(i915);
+	gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
+
+	ctx[HI] = gem_queue_create(i915);
+	gem_context_set_priority(i915, ctx[HI], 0);
+
+	ctx[NOISE] = gem_queue_create(i915);
+	gem_context_set_priority(i915, ctx[NOISE], MIN_PRIO/2);
+
+	result = gem_create(i915, 4096);
+	dep = gem_create(i915, 4096);
+
+	plug = igt_cork_plug(&cork, i915);
+
+	/* Expect that HI promotes LO, so the order will be LO, HI, NOISE.
+	 *
+	 * fifo would be NOISE, LO, HI.
+	 * strict priority would be  HI, NOISE, LO
+	 */
+	store_dword(i915, ctx[NOISE], ring, result, 0, ctx[NOISE], plug, 0);
+	store_dword(i915, ctx[LO], ring, result, 0, ctx[LO], plug, 0);
+
+	/* link LO <-> HI via a dependency on another buffer */
+	store_dword(i915, ctx[LO], ring, dep, 0, ctx[LO], 0, I915_GEM_DOMAIN_INSTRUCTION);
+	store_dword(i915, ctx[HI], ring, dep, 0, ctx[HI], 0, 0);
+
+	store_dword(i915, ctx[HI], ring, result, 0, ctx[HI], 0, 0);
+
+	unplug_show_queue(i915, &cork, ring);
+	gem_close(i915, plug);
+
+	gem_context_destroy(i915, ctx[NOISE]);
+	gem_context_destroy(i915, ctx[LO]);
+	gem_context_destroy(i915, ctx[HI]);
+
+	ptr = gem_mmap__gtt(i915, dep, 4096, PROT_READ);
+	gem_set_domain(i915, dep, /* no write hazard lies! */
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(i915, dep);
+
+	igt_assert_eq_u32(ptr[0], ctx[HI]);
+	munmap(ptr, 4096);
+
+	ptr = gem_mmap__gtt(i915, result, 4096, PROT_READ);
+	gem_set_domain(i915, result, /* no write hazard lies! */
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(i915, result);
+
+	igt_assert_eq_u32(ptr[0], ctx[NOISE]);
+	munmap(ptr, 4096);
+}
+
+static void smoketest(int i915, unsigned ring, unsigned timeout)
+{
+	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	unsigned engines[16];
+	unsigned nengine;
+	unsigned engine;
+	uint32_t scratch;
+	uint32_t *ptr;
+
+	nengine = 0;
+	for_each_engine(i915, engine) {
+		if (ignore_engine(engine))
+			continue;
+
+		engines[nengine++] = engine;
+	}
+	igt_require(nengine);
+
+	scratch = gem_create(i915, 4096);
+	igt_fork(child, ncpus) {
+		unsigned long count = 0;
+		uint32_t ctx;
+
+		hars_petruska_f54_1_random_perturb(child);
+
+		ctx = gem_queue_create(i915);
+		igt_until_timeout(timeout) {
+			int prio;
+
+			prio = hars_petruska_f54_1_random_unsafe_max(MAX_PRIO - MIN_PRIO) + MIN_PRIO;
+			gem_context_set_priority(i915, ctx, prio);
+
+			engine = engines[hars_petruska_f54_1_random_unsafe_max(nengine)];
+			store_dword(i915, ctx, engine, scratch,
+				    8*child + 0, ~child,
+				    0, 0);
+			for (unsigned int step = 0; step < 8; step++)
+				store_dword(i915, ctx, engine, scratch,
+					    8*child + 4, count++,
+					    0, 0);
+		}
+		gem_context_destroy(i915, ctx);
+	}
+	igt_waitchildren();
+
+	ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
+	gem_set_domain(i915, scratch, /* no write hazard lies! */
+			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+	gem_close(i915, scratch);
+
+	for (unsigned n = 0; n < ncpus; n++) {
+		igt_assert_eq_u32(ptr[2*n], ~n);
+		/*
+		 * Note this count is approximate due to unconstrained
+		 * ordering of the dword writes between engines.
+		 *
+		 * Take the result with a pinch of salt.
+		 */
+		igt_info("Child[%d] completed %u cycles\n",  n, ptr[2*n+1]);
+	}
+	munmap(ptr, 4096);
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	int i915 = -1;
+
+	igt_fixture {
+		i915 = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(i915);
+	}
+
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(gem_contexts_has_shared_gtt(i915));
+			igt_fork_hang_detector(i915);
+		}
+
+		igt_subtest("create-shared-gtt")
+			create_shared_gtt(i915, 0);
+
+		igt_subtest("detached-shared-gtt")
+			create_shared_gtt(i915, DETACHED);
+
+		igt_subtest("disjoint-timelines")
+			disjoint_timelines(i915);
+
+		igt_subtest("single-timeline")
+			single_timeline(i915);
+
+		igt_subtest("exhaust-shared-gtt")
+			exhaust_shared_gtt(i915, 0);
+
+		igt_subtest("exhaust-shared-gtt-lrc")
+			exhaust_shared_gtt(i915, EXHAUST_LRC);
+
+		for (e = intel_execution_engines; e->name; e++) {
+			igt_subtest_f("exec-shared-gtt-%s", e->name)
+				exec_shared_gtt(i915, e->exec_id | e->flags);
+
+			if (!ignore_engine(e->exec_id | e->flags)) {
+				igt_subtest_f("exec-single-timeline-%s",
+					      e->name)
+					exec_single_timeline(i915,
+							     e->exec_id | e->flags);
+			}
+
+			/*
+			 * Check that the shared contexts operate independently,
+			 * that is requests on one ("queue") can be scheduled
+			 * around another queue. We only check the basics here,
+			 * enough to reduce the queue into just another context,
+			 * and so rely on gem_exec_schedule to prove the rest.
+			 */
+			igt_subtest_group {
+				igt_fixture {
+					gem_require_ring(i915, e->exec_id | e->flags);
+					igt_require(gem_can_store_dword(i915, e->exec_id) | e->flags);
+					igt_require(gem_scheduler_enabled(i915));
+					igt_require(gem_scheduler_has_ctx_priority(i915));
+				}
+
+				igt_subtest_f("Q-independent-%s", e->name)
+					independent(i915, e->exec_id | e->flags, 0);
+
+				igt_subtest_f("Q-in-order-%s", e->name)
+					reorder(i915, e->exec_id | e->flags, EQUAL);
+
+				igt_subtest_f("Q-out-order-%s", e->name)
+					reorder(i915, e->exec_id | e->flags, 0);
+
+				igt_subtest_f("Q-promotion-%s", e->name)
+					promotion(i915, e->exec_id | e->flags);
+
+				igt_subtest_f("Q-smoketest-%s", e->name)
+					smoketest(i915, e->exec_id | e->flags, 5);
+			}
+		}
+
+		igt_subtest("Q-smoketest-all") {
+			igt_require(gem_scheduler_enabled(i915));
+			igt_require(gem_scheduler_has_ctx_priority(i915));
+			smoketest(i915, -1, 30);
+		}
+
+		igt_fixture {
+			igt_stop_hang_detector();
+		}
+	}
+}
diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
index 6c3b53756..d3e0b0ba2 100644
--- a/tests/i915/gem_exec_whisper.c
+++ b/tests/i915/gem_exec_whisper.c
@@ -87,6 +87,7 @@ static void verify_reloc(int fd, uint32_t handle,
 #define HANG 0x20
 #define SYNC 0x40
 #define PRIORITY 0x80
+#define QUEUES 0x100
 
 struct hang {
 	struct drm_i915_gem_exec_object2 obj;
@@ -171,7 +172,7 @@ static void ctx_set_random_priority(int fd, uint32_t ctx)
 {
 	int prio = hars_petruska_f54_1_random_unsafe_max(1024) - 512;
 	gem_context_set_priority(fd, ctx, prio);
-};
+}
 
 static void whisper(int fd, unsigned engine, unsigned flags)
 {
@@ -226,6 +227,9 @@ static void whisper(int fd, unsigned engine, unsigned flags)
 	if (flags & CONTEXTS)
 		gem_require_contexts(fd);
 
+	if (flags & QUEUES)
+		igt_require(gem_has_queues(fd));
+
 	if (flags & HANG)
 		init_hang(&hang);
 
@@ -290,6 +294,10 @@ static void whisper(int fd, unsigned engine, unsigned flags)
 			for (n = 0; n < 64; n++)
 				contexts[n] = gem_context_create(fd);
 		}
+		if (flags & QUEUES) {
+			for (n = 0; n < 64; n++)
+				contexts[n] = gem_queue_create(fd);
+		}
 		if (flags & FDS) {
 			for (n = 0; n < 64; n++)
 				fds[n] = drm_open_driver(DRIVER_INTEL);
@@ -403,7 +411,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
 						execbuf.flags &= ~ENGINE_MASK;
 						execbuf.flags |= engines[rand() % nengine];
 					}
-					if (flags & CONTEXTS) {
+					if (flags & (CONTEXTS | QUEUES)) {
 						execbuf.rsvd1 = contexts[rand() % 64];
 						if (flags & PRIORITY)
 							ctx_set_random_priority(this_fd, execbuf.rsvd1);
@@ -486,7 +494,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
 			for (n = 0; n < 64; n++)
 				close(fds[n]);
 		}
-		if (flags & CONTEXTS) {
+		if (flags & (CONTEXTS | QUEUES)) {
 			for (n = 0; n < 64; n++)
 				gem_context_destroy(fd, contexts[n]);
 		}
@@ -522,18 +530,24 @@ igt_main
 		{ "chain-forked", CHAIN | FORKED },
 		{ "chain-interruptible", CHAIN | INTERRUPTIBLE },
 		{ "chain-sync", CHAIN | SYNC },
-		{ "contexts", CONTEXTS },
-		{ "contexts-interruptible", CONTEXTS | INTERRUPTIBLE},
-		{ "contexts-forked", CONTEXTS | FORKED},
-		{ "contexts-priority", CONTEXTS | FORKED | PRIORITY },
-		{ "contexts-chain", CONTEXTS | CHAIN },
-		{ "contexts-sync", CONTEXTS | SYNC },
 		{ "fds", FDS },
 		{ "fds-interruptible", FDS | INTERRUPTIBLE},
 		{ "fds-forked", FDS | FORKED},
 		{ "fds-priority", FDS | FORKED | PRIORITY },
 		{ "fds-chain", FDS | CHAIN},
 		{ "fds-sync", FDS | SYNC},
+		{ "contexts", CONTEXTS },
+		{ "contexts-interruptible", CONTEXTS | INTERRUPTIBLE},
+		{ "contexts-forked", CONTEXTS | FORKED},
+		{ "contexts-priority", CONTEXTS | FORKED | PRIORITY },
+		{ "contexts-chain", CONTEXTS | CHAIN },
+		{ "contexts-sync", CONTEXTS | SYNC },
+		{ "queues", QUEUES },
+		{ "queues-interruptible", QUEUES | INTERRUPTIBLE},
+		{ "queues-forked", QUEUES | FORKED},
+		{ "queues-priority", QUEUES | FORKED | PRIORITY },
+		{ "queues-chain", QUEUES | CHAIN },
+		{ "queues-sync", QUEUES | SYNC },
 		{ NULL }
 	};
 	int fd;
diff --git a/tests/meson.build b/tests/meson.build
index 3810bd760..3883ae127 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -114,6 +114,7 @@ i915_progs = [
 	'gem_ctx_exec',
 	'gem_ctx_isolation',
 	'gem_ctx_param',
+	'gem_ctx_shared',
 	'gem_ctx_switch',
 	'gem_ctx_thrash',
 	'gem_double_irq_loop',
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH i-g-t 09/16] i915/gem_ctx_switch: Exercise queues
  2019-05-08 10:09 ` [igt-dev] " Chris Wilson
@ 2019-05-08 10:09   ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Queues are a form of contexts that share vm and enfore a single timeline
across all engines. Test switching between them, just like ordinary
contexts.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_ctx_switch.c | 75 +++++++++++++++++++++++++++----------
 1 file changed, 55 insertions(+), 20 deletions(-)

diff --git a/tests/i915/gem_ctx_switch.c b/tests/i915/gem_ctx_switch.c
index 87e13b915..647911d4c 100644
--- a/tests/i915/gem_ctx_switch.c
+++ b/tests/i915/gem_ctx_switch.c
@@ -44,7 +44,8 @@
 #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
 #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
 
-#define INTERRUPTIBLE 1
+#define INTERRUPTIBLE 0x1
+#define QUEUE 0x2
 
 static double elapsed(const struct timespec *start, const struct timespec *end)
 {
@@ -126,8 +127,12 @@ static void single(int fd, uint32_t handle,
 
 	gem_require_ring(fd, e->exec_id | e->flags);
 
-	for (n = 0; n < 64; n++)
-		contexts[n] = gem_context_create(fd);
+	for (n = 0; n < 64; n++) {
+		if (flags & QUEUE)
+			contexts[n] = gem_queue_create(fd);
+		else
+			contexts[n] = gem_context_create(fd);
+	}
 
 	memset(&obj, 0, sizeof(obj));
 	obj.handle = handle;
@@ -232,8 +237,12 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
 	}
 	igt_require(nengine);
 
-	for (n = 0; n < ARRAY_SIZE(contexts); n++)
-		contexts[n] = gem_context_create(fd);
+	for (n = 0; n < ARRAY_SIZE(contexts); n++) {
+		if (flags & QUEUE)
+			contexts[n] = gem_queue_create(fd);
+		else
+			contexts[n] = gem_context_create(fd);
+	}
 
 	memset(obj, 0, sizeof(obj));
 	obj[1].handle = handle;
@@ -298,6 +307,17 @@ igt_main
 {
 	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
 	const struct intel_execution_engine *e;
+	static const struct {
+		const char *name;
+		unsigned int flags;
+		bool (*require)(int fd);
+	} phases[] = {
+		{ "", 0, NULL },
+		{ "-interruptible", INTERRUPTIBLE, NULL },
+		{ "-queue", QUEUE, gem_has_queues },
+		{ "-queue-interruptible", QUEUE | INTERRUPTIBLE, gem_has_queues },
+		{ }
+	};
 	uint32_t light = 0, heavy;
 	int fd = -1;
 
@@ -319,21 +339,26 @@ igt_main
 	}
 
 	for (e = intel_execution_engines; e->name; e++) {
-		igt_subtest_f("%s%s", e->exec_id == 0 ? "basic-" : "", e->name)
-			single(fd, light, e, 0, 1, 5);
-
-		igt_skip_on_simulation();
-
-		igt_subtest_f("%s%s-heavy", e->exec_id == 0 ? "basic-" : "", e->name)
-			single(fd, heavy, e, 0, 1, 5);
-		igt_subtest_f("%s-interruptible", e->name)
-			single(fd, light, e, INTERRUPTIBLE, 1, 150);
-		igt_subtest_f("forked-%s", e->name)
-			single(fd, light, e, 0, ncpus, 150);
-		igt_subtest_f("forked-%s-heavy", e->name)
-			single(fd, heavy, e, 0, ncpus, 150);
-		igt_subtest_f("forked-%s-interruptible", e->name)
-			single(fd, light, e, INTERRUPTIBLE, ncpus, 150);
+		for (typeof(*phases) *p = phases; p->name; p++) {
+			igt_subtest_group {
+				igt_fixture {
+					if (p->require)
+						igt_require(p->require(fd));
+				}
+
+				igt_subtest_f("%s%s%s", e->exec_id == 0 ? "basic-" : "", e->name, p->name)
+					single(fd, light, e, p->flags, 1, 5);
+
+				igt_skip_on_simulation();
+
+				igt_subtest_f("%s%s-heavy%s", e->exec_id == 0 ? "basic-" : "", e->name, p->name)
+					single(fd, heavy, e, p->flags, 1, 5);
+				igt_subtest_f("forked-%s%s", e->name, p->name)
+					single(fd, light, e, p->flags, ncpus, 150);
+				igt_subtest_f("forked-%s-heavy%s", e->name, p->name)
+					single(fd, heavy, e, p->flags, ncpus, 150);
+			}
+		}
 	}
 
 	igt_subtest("basic-all-light")
@@ -341,6 +366,16 @@ igt_main
 	igt_subtest("basic-all-heavy")
 		all(fd, heavy, 0, 5);
 
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(gem_has_queues(fd));
+		}
+		igt_subtest("basic-queue-light")
+			all(fd, light, QUEUE, 5);
+		igt_subtest("basic-queue-heavy")
+			all(fd, heavy, QUEUE, 5);
+	}
+
 	igt_fixture {
 		igt_stop_hang_detector();
 		gem_close(fd, heavy);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [igt-dev] [PATCH i-g-t 09/16] i915/gem_ctx_switch: Exercise queues
@ 2019-05-08 10:09   ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Queues are a form of contexts that share vm and enfore a single timeline
across all engines. Test switching between them, just like ordinary
contexts.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_ctx_switch.c | 75 +++++++++++++++++++++++++++----------
 1 file changed, 55 insertions(+), 20 deletions(-)

diff --git a/tests/i915/gem_ctx_switch.c b/tests/i915/gem_ctx_switch.c
index 87e13b915..647911d4c 100644
--- a/tests/i915/gem_ctx_switch.c
+++ b/tests/i915/gem_ctx_switch.c
@@ -44,7 +44,8 @@
 #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
 #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
 
-#define INTERRUPTIBLE 1
+#define INTERRUPTIBLE 0x1
+#define QUEUE 0x2
 
 static double elapsed(const struct timespec *start, const struct timespec *end)
 {
@@ -126,8 +127,12 @@ static void single(int fd, uint32_t handle,
 
 	gem_require_ring(fd, e->exec_id | e->flags);
 
-	for (n = 0; n < 64; n++)
-		contexts[n] = gem_context_create(fd);
+	for (n = 0; n < 64; n++) {
+		if (flags & QUEUE)
+			contexts[n] = gem_queue_create(fd);
+		else
+			contexts[n] = gem_context_create(fd);
+	}
 
 	memset(&obj, 0, sizeof(obj));
 	obj.handle = handle;
@@ -232,8 +237,12 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
 	}
 	igt_require(nengine);
 
-	for (n = 0; n < ARRAY_SIZE(contexts); n++)
-		contexts[n] = gem_context_create(fd);
+	for (n = 0; n < ARRAY_SIZE(contexts); n++) {
+		if (flags & QUEUE)
+			contexts[n] = gem_queue_create(fd);
+		else
+			contexts[n] = gem_context_create(fd);
+	}
 
 	memset(obj, 0, sizeof(obj));
 	obj[1].handle = handle;
@@ -298,6 +307,17 @@ igt_main
 {
 	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
 	const struct intel_execution_engine *e;
+	static const struct {
+		const char *name;
+		unsigned int flags;
+		bool (*require)(int fd);
+	} phases[] = {
+		{ "", 0, NULL },
+		{ "-interruptible", INTERRUPTIBLE, NULL },
+		{ "-queue", QUEUE, gem_has_queues },
+		{ "-queue-interruptible", QUEUE | INTERRUPTIBLE, gem_has_queues },
+		{ }
+	};
 	uint32_t light = 0, heavy;
 	int fd = -1;
 
@@ -319,21 +339,26 @@ igt_main
 	}
 
 	for (e = intel_execution_engines; e->name; e++) {
-		igt_subtest_f("%s%s", e->exec_id == 0 ? "basic-" : "", e->name)
-			single(fd, light, e, 0, 1, 5);
-
-		igt_skip_on_simulation();
-
-		igt_subtest_f("%s%s-heavy", e->exec_id == 0 ? "basic-" : "", e->name)
-			single(fd, heavy, e, 0, 1, 5);
-		igt_subtest_f("%s-interruptible", e->name)
-			single(fd, light, e, INTERRUPTIBLE, 1, 150);
-		igt_subtest_f("forked-%s", e->name)
-			single(fd, light, e, 0, ncpus, 150);
-		igt_subtest_f("forked-%s-heavy", e->name)
-			single(fd, heavy, e, 0, ncpus, 150);
-		igt_subtest_f("forked-%s-interruptible", e->name)
-			single(fd, light, e, INTERRUPTIBLE, ncpus, 150);
+		for (typeof(*phases) *p = phases; p->name; p++) {
+			igt_subtest_group {
+				igt_fixture {
+					if (p->require)
+						igt_require(p->require(fd));
+				}
+
+				igt_subtest_f("%s%s%s", e->exec_id == 0 ? "basic-" : "", e->name, p->name)
+					single(fd, light, e, p->flags, 1, 5);
+
+				igt_skip_on_simulation();
+
+				igt_subtest_f("%s%s-heavy%s", e->exec_id == 0 ? "basic-" : "", e->name, p->name)
+					single(fd, heavy, e, p->flags, 1, 5);
+				igt_subtest_f("forked-%s%s", e->name, p->name)
+					single(fd, light, e, p->flags, ncpus, 150);
+				igt_subtest_f("forked-%s-heavy%s", e->name, p->name)
+					single(fd, heavy, e, p->flags, ncpus, 150);
+			}
+		}
 	}
 
 	igt_subtest("basic-all-light")
@@ -341,6 +366,16 @@ igt_main
 	igt_subtest("basic-all-heavy")
 		all(fd, heavy, 0, 5);
 
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(gem_has_queues(fd));
+		}
+		igt_subtest("basic-queue-light")
+			all(fd, light, QUEUE, 5);
+		igt_subtest("basic-queue-heavy")
+			all(fd, heavy, QUEUE, 5);
+	}
+
 	igt_fixture {
 		igt_stop_hang_detector();
 		gem_close(fd, heavy);
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH i-g-t 10/16] i915/gem_exec_whisper: Fork all-engine tests one-per-engine
  2019-05-08 10:09 ` [igt-dev] " Chris Wilson
@ 2019-05-08 10:09   ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Add a new mode for some more stress, submit the all-engines tests
simultaneously, a stream per engine.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_exec_whisper.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
index d3e0b0ba2..d5afc8119 100644
--- a/tests/i915/gem_exec_whisper.c
+++ b/tests/i915/gem_exec_whisper.c
@@ -88,6 +88,7 @@ static void verify_reloc(int fd, uint32_t handle,
 #define SYNC 0x40
 #define PRIORITY 0x80
 #define QUEUES 0x100
+#define ALL 0x200
 
 struct hang {
 	struct drm_i915_gem_exec_object2 obj;
@@ -199,6 +200,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
 	uint64_t old_offset;
 	int i, n, loc;
 	int debugfs;
+	int nchild;
 
 	if (flags & PRIORITY) {
 		igt_require(gem_scheduler_enabled(fd));
@@ -215,6 +217,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
 				engines[nengine++] = engine;
 		}
 	} else {
+		igt_assert(!(flags & ALL));
 		igt_require(gem_has_ring(fd, engine));
 		igt_require(gem_can_store_dword(fd, engine));
 		engines[nengine++] = engine;
@@ -233,11 +236,22 @@ static void whisper(int fd, unsigned engine, unsigned flags)
 	if (flags & HANG)
 		init_hang(&hang);
 
+	nchild = 1;
+	if (flags & FORKED)
+		nchild *= sysconf(_SC_NPROCESSORS_ONLN);
+	if (flags & ALL)
+		nchild *= nengine;
+
 	intel_detect_and_clear_missed_interrupts(fd);
 	gpu_power_read(&power, &sample[0]);
-	igt_fork(child, flags & FORKED ? sysconf(_SC_NPROCESSORS_ONLN) : 1)  {
+	igt_fork(child, nchild) {
 		unsigned int pass;
 
+		if (flags & ALL) {
+			engines[0] = engines[child % nengine];
+			nengine = 1;
+		}
+
 		memset(&scratch, 0, sizeof(scratch));
 		scratch.handle = gem_create(fd, 4096);
 		scratch.flags = EXEC_OBJECT_WRITE;
@@ -341,7 +355,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
 			igt_until_timeout(150) {
 				uint64_t offset;
 
-				if (!(flags & FORKED))
+				if (nchild == 1)
 					write_seqno(debugfs, pass);
 
 				if (flags & HANG)
@@ -382,8 +396,8 @@ static void whisper(int fd, unsigned engine, unsigned flags)
 
 				gem_write(fd, batches[1023].handle, loc, &pass, sizeof(pass));
 				for (n = 1024; --n >= 1; ) {
+					uint32_t handle[2] = {};
 					int this_fd = fd;
-					uint32_t handle[2];
 
 					execbuf.buffers_ptr = to_user_pointer(&batches[n-1]);
 					reloc_migrations += batches[n-1].offset != inter[n].presumed_offset;
@@ -550,7 +564,7 @@ igt_main
 		{ "queues-sync", QUEUES | SYNC },
 		{ NULL }
 	};
-	int fd;
+	int fd = -1;
 
 	igt_fixture {
 		fd = drm_open_driver_master(DRIVER_INTEL);
@@ -561,9 +575,12 @@ igt_main
 		igt_fork_hang_detector(fd);
 	}
 
-	for (const struct mode *m = modes; m->name; m++)
+	for (const struct mode *m = modes; m->name; m++) {
 		igt_subtest_f("%s", m->name)
 			whisper(fd, ALL_ENGINES, m->flags);
+		igt_subtest_f("%s-all", m->name)
+			whisper(fd, ALL_ENGINES, m->flags | ALL);
+	}
 
 	for (const struct intel_execution_engine *e = intel_execution_engines;
 	     e->name; e++) {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [igt-dev] [PATCH i-g-t 10/16] i915/gem_exec_whisper: Fork all-engine tests one-per-engine
@ 2019-05-08 10:09   ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Add a new mode for some more stress, submit the all-engines tests
simultaneously, a stream per engine.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_exec_whisper.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
index d3e0b0ba2..d5afc8119 100644
--- a/tests/i915/gem_exec_whisper.c
+++ b/tests/i915/gem_exec_whisper.c
@@ -88,6 +88,7 @@ static void verify_reloc(int fd, uint32_t handle,
 #define SYNC 0x40
 #define PRIORITY 0x80
 #define QUEUES 0x100
+#define ALL 0x200
 
 struct hang {
 	struct drm_i915_gem_exec_object2 obj;
@@ -199,6 +200,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
 	uint64_t old_offset;
 	int i, n, loc;
 	int debugfs;
+	int nchild;
 
 	if (flags & PRIORITY) {
 		igt_require(gem_scheduler_enabled(fd));
@@ -215,6 +217,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
 				engines[nengine++] = engine;
 		}
 	} else {
+		igt_assert(!(flags & ALL));
 		igt_require(gem_has_ring(fd, engine));
 		igt_require(gem_can_store_dword(fd, engine));
 		engines[nengine++] = engine;
@@ -233,11 +236,22 @@ static void whisper(int fd, unsigned engine, unsigned flags)
 	if (flags & HANG)
 		init_hang(&hang);
 
+	nchild = 1;
+	if (flags & FORKED)
+		nchild *= sysconf(_SC_NPROCESSORS_ONLN);
+	if (flags & ALL)
+		nchild *= nengine;
+
 	intel_detect_and_clear_missed_interrupts(fd);
 	gpu_power_read(&power, &sample[0]);
-	igt_fork(child, flags & FORKED ? sysconf(_SC_NPROCESSORS_ONLN) : 1)  {
+	igt_fork(child, nchild) {
 		unsigned int pass;
 
+		if (flags & ALL) {
+			engines[0] = engines[child % nengine];
+			nengine = 1;
+		}
+
 		memset(&scratch, 0, sizeof(scratch));
 		scratch.handle = gem_create(fd, 4096);
 		scratch.flags = EXEC_OBJECT_WRITE;
@@ -341,7 +355,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
 			igt_until_timeout(150) {
 				uint64_t offset;
 
-				if (!(flags & FORKED))
+				if (nchild == 1)
 					write_seqno(debugfs, pass);
 
 				if (flags & HANG)
@@ -382,8 +396,8 @@ static void whisper(int fd, unsigned engine, unsigned flags)
 
 				gem_write(fd, batches[1023].handle, loc, &pass, sizeof(pass));
 				for (n = 1024; --n >= 1; ) {
+					uint32_t handle[2] = {};
 					int this_fd = fd;
-					uint32_t handle[2];
 
 					execbuf.buffers_ptr = to_user_pointer(&batches[n-1]);
 					reloc_migrations += batches[n-1].offset != inter[n].presumed_offset;
@@ -550,7 +564,7 @@ igt_main
 		{ "queues-sync", QUEUES | SYNC },
 		{ NULL }
 	};
-	int fd;
+	int fd = -1;
 
 	igt_fixture {
 		fd = drm_open_driver_master(DRIVER_INTEL);
@@ -561,9 +575,12 @@ igt_main
 		igt_fork_hang_detector(fd);
 	}
 
-	for (const struct mode *m = modes; m->name; m++)
+	for (const struct mode *m = modes; m->name; m++) {
 		igt_subtest_f("%s", m->name)
 			whisper(fd, ALL_ENGINES, m->flags);
+		igt_subtest_f("%s-all", m->name)
+			whisper(fd, ALL_ENGINES, m->flags | ALL);
+	}
 
 	for (const struct intel_execution_engine *e = intel_execution_engines;
 	     e->name; e++) {
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH i-g-t 11/16] i915/gem_exec_whisper: debugfs/next_seqno is defunct
  2019-05-08 10:09 ` [igt-dev] " Chris Wilson
@ 2019-05-08 10:09   ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

We removed next_seqno in 5.1, so time to wave goodbye.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_exec_whisper.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
index d5afc8119..61b8d6dac 100644
--- a/tests/i915/gem_exec_whisper.c
+++ b/tests/i915/gem_exec_whisper.c
@@ -44,15 +44,6 @@
 
 #define VERIFY 0
 
-static void write_seqno(int dir, unsigned offset)
-{
-	uint32_t seqno = UINT32_MAX - offset;
-
-	igt_sysfs_printf(dir, "i915_next_seqno", "0x%x", seqno);
-
-	igt_debug("next seqno set to: 0x%x\n", seqno);
-}
-
 static void check_bo(int fd, uint32_t handle, int pass)
 {
 	uint32_t *map;
@@ -355,9 +346,6 @@ static void whisper(int fd, unsigned engine, unsigned flags)
 			igt_until_timeout(150) {
 				uint64_t offset;
 
-				if (nchild == 1)
-					write_seqno(debugfs, pass);
-
 				if (flags & HANG)
 					submit_hang(&hang, engines, nengine, flags);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [Intel-gfx] [PATCH i-g-t 11/16] i915/gem_exec_whisper: debugfs/next_seqno is defunct
@ 2019-05-08 10:09   ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

We removed next_seqno in 5.1, so time to wave goodbye.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_exec_whisper.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
index d5afc8119..61b8d6dac 100644
--- a/tests/i915/gem_exec_whisper.c
+++ b/tests/i915/gem_exec_whisper.c
@@ -44,15 +44,6 @@
 
 #define VERIFY 0
 
-static void write_seqno(int dir, unsigned offset)
-{
-	uint32_t seqno = UINT32_MAX - offset;
-
-	igt_sysfs_printf(dir, "i915_next_seqno", "0x%x", seqno);
-
-	igt_debug("next seqno set to: 0x%x\n", seqno);
-}
-
 static void check_bo(int fd, uint32_t handle, int pass)
 {
 	uint32_t *map;
@@ -355,9 +346,6 @@ static void whisper(int fd, unsigned engine, unsigned flags)
 			igt_until_timeout(150) {
 				uint64_t offset;
 
-				if (nchild == 1)
-					write_seqno(debugfs, pass);
-
 				if (flags & HANG)
 					submit_hang(&hang, engines, nengine, flags);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH i-g-t 12/16] i915: Add gem_ctx_engines
  2019-05-08 10:09 ` [igt-dev] " Chris Wilson
@ 2019-05-08 10:09   ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

To exercise the new I915_CONTEXT_PARAM_ENGINES and interactions with
gem_execbuf().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Andi Shyti <andi@etezian.org>
Reviewed-by: Andi Shyti <andi@etezian.org>
---
 tests/Makefile.sources       |   1 +
 tests/i915/gem_ctx_engines.c | 517 +++++++++++++++++++++++++++++++++++
 tests/meson.build            |   1 +
 3 files changed, 519 insertions(+)
 create mode 100644 tests/i915/gem_ctx_engines.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 3552e895b..e7ee27e81 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -22,6 +22,7 @@ TESTS_progs = \
 	drm_mm \
 	drm_read \
 	i915/gem_ctx_clone \
+	i915/gem_ctx_engines \
 	i915/gem_ctx_shared \
 	i915/gem_vm_create \
 	kms_3d \
diff --git a/tests/i915/gem_ctx_engines.c b/tests/i915/gem_ctx_engines.c
new file mode 100644
index 000000000..f83aa4772
--- /dev/null
+++ b/tests/i915/gem_ctx_engines.c
@@ -0,0 +1,517 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+#include "i915/gem_context.h"
+#include "sw_sync.h"
+
+#define engine_class(e, n) ((e)->engines[(n)].engine_class)
+#define engine_instance(e, n) ((e)->engines[(n)].engine_instance)
+
+static bool has_context_engines(int i915)
+{
+	struct drm_i915_gem_context_param param = {
+		.ctx_id = 0,
+		.param = I915_CONTEXT_PARAM_ENGINES,
+	};
+	return __gem_context_set_param(i915, &param) == 0;
+}
+
+static void invalid_engines(int i915)
+{
+	struct i915_context_param_engines stack = {}, *engines;
+	struct drm_i915_gem_context_param param = {
+		.ctx_id = gem_context_create(i915),
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.value = to_user_pointer(&stack),
+	};
+	uint32_t handle;
+	void *ptr;
+
+	param.size = 0;
+	igt_assert_eq(__gem_context_set_param(i915, &param), 0);
+
+	param.size = 1;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EINVAL);
+
+	param.size = sizeof(stack) - 1;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EINVAL);
+
+	param.size = sizeof(stack) + 1;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EINVAL);
+
+	param.size = 0;
+	igt_assert_eq(__gem_context_set_param(i915, &param), 0);
+
+	/* Create a single page surrounded by inaccessible nothingness */
+	ptr = mmap(NULL, 3 * 4096, PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
+	igt_assert(ptr != MAP_FAILED);
+
+	munmap(ptr, 4096);
+	engines = ptr + 4096;
+	munmap(ptr + 2 *4096, 4096);
+
+	param.size = sizeof(*engines) + sizeof(*engines->engines);
+	param.value = to_user_pointer(engines);
+
+	engines->engines[0].engine_class = -1;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -ENOENT);
+
+	mprotect(engines, 4096, PROT_READ);
+	igt_assert_eq(__gem_context_set_param(i915, &param), -ENOENT);
+
+	mprotect(engines, 4096, PROT_WRITE);
+	engines->engines[0].engine_class = 0;
+	if (__gem_context_set_param(i915, &param)) /* XXX needs RCS */
+		goto out;
+
+	engines->extensions = to_user_pointer(ptr);
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	engines->extensions = 0;
+	igt_assert_eq(__gem_context_set_param(i915, &param), 0);
+
+	param.value = to_user_pointer(engines - 1);
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(engines) - 1;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(engines) - param.size +  1;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(engines) + 4096;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(engines) - param.size + 4096;
+	igt_assert_eq(__gem_context_set_param(i915, &param), 0);
+
+	param.value = to_user_pointer(engines) - param.size + 4096 + 1;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(engines) + 4096;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(engines) + 4096 - 1;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(engines) - 1;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(engines - 1);
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(engines - 1) + 4096;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(engines - 1) + 4096 - sizeof(*engines->engines) / 2;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	handle = gem_create(i915, 4096 * 3);
+	ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_READ);
+	gem_close(i915, handle);
+
+	munmap(ptr, 4096);
+	munmap(ptr + 8192, 4096);
+
+	param.value = to_user_pointer(ptr + 4096);
+	igt_assert_eq(__gem_context_set_param(i915, &param), 0);
+
+	param.value = to_user_pointer(ptr);
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(ptr) + 4095;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(ptr) + 8192;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(ptr) + 12287;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	munmap(ptr + 4096, 4096);
+
+out:
+	munmap(engines, 4096);
+	gem_context_destroy(i915, param.ctx_id);
+}
+
+static void idempotent(int i915)
+{
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines , I915_EXEC_RING_MASK + 1);
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(expected , I915_EXEC_RING_MASK + 1);
+	struct drm_i915_gem_context_param p = {
+		.ctx_id = gem_context_create(i915),
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.value = to_user_pointer(&engines),
+		.size = sizeof(engines),
+	};
+	const size_t base = sizeof(struct i915_context_param_engines);
+	const struct intel_execution_engine2 *e;
+	int idx;
+
+	/* What goes in, must come out. And what comes out, must go in */
+
+	gem_context_get_param(i915, &p);
+	igt_assert_eq(p.size, 0); /* atm default is to use legacy ring mask */
+
+	idx = 0;
+	memset(&engines, 0, sizeof(engines));
+	for_each_engine_class_instance(i915, e) {
+		engines.engines[idx].engine_class = e->class;
+		engines.engines[idx].engine_instance = e->instance;
+		idx++;
+	}
+	idx *= sizeof(*engines.engines);
+	p.size = base + idx;
+	gem_context_set_param(i915, &p);
+
+	memcpy(&expected, &engines, sizeof(expected));
+
+	gem_context_get_param(i915, &p);
+	igt_assert_eq(p.size, base + idx);
+	igt_assert(!memcmp(&expected, &engines, idx));
+
+	p.size = base;
+	gem_context_set_param(i915, &p);
+	gem_context_get_param(i915, &p);
+	igt_assert_eq(p.size, base);
+
+	/* and it should not have overwritten the previous contents */
+	igt_assert(!memcmp(&expected, &engines, idx));
+
+	memset(&engines, 0, sizeof(engines));
+	engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
+	engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
+	idx = sizeof(*engines.engines);
+	p.size = base + idx;
+	gem_context_set_param(i915, &p);
+
+	memcpy(&expected, &engines, sizeof(expected));
+
+	gem_context_get_param(i915, &p);
+	igt_assert_eq(p.size, base + idx);
+	igt_assert(!memcmp(&expected, &engines, idx));
+
+	memset(&engines, 0, sizeof(engines));
+	p.size = sizeof(engines);
+	gem_context_set_param(i915, &p);
+
+	memcpy(&expected, &engines, sizeof(expected));
+
+	gem_context_get_param(i915, &p);
+	igt_assert_eq(p.size, sizeof(engines));
+	igt_assert(!memcmp(&expected, &engines, idx));
+
+	gem_context_destroy(i915, p.ctx_id);
+}
+
+static void execute_one(int i915)
+{
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines , I915_EXEC_RING_MASK + 1);
+	struct drm_i915_gem_context_param param = {
+		.ctx_id = gem_context_create(i915),
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.value = to_user_pointer(&engines),
+		/* .size to be filled in later */
+	};
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = param.ctx_id,
+	};
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	const struct intel_execution_engine2 *e;
+
+	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
+
+	/* Unadulterated I915_EXEC_DEFAULT should work */
+	execbuf.flags = 0;
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	for_each_engine_class_instance(i915, e) {
+		struct drm_i915_gem_busy busy = { .handle = obj.handle };
+
+		for (int i = -1; i <= I915_EXEC_RING_MASK; i++) {
+			igt_spin_t *spin;
+
+			memset(&engines, 0, sizeof(engines));
+			engine_class(&engines, 0) = e->class;
+			engine_instance(&engines, 0) = e->instance;
+			param.size = offsetof(typeof(engines), engines[1]);
+			gem_context_set_param(i915, &param);
+
+			spin = igt_spin_new(i915,
+					    .ctx = param.ctx_id,
+					    .engine = 0);
+
+			igt_debug("Testing with map of %d engines\n", i + 1);
+			memset(&engines.engines, -1, sizeof(engines.engines));
+			if (i != -1) {
+				engine_class(&engines, i) = e->class;
+				engine_instance(&engines, i) = e->instance;
+			}
+			param.size = sizeof(uint64_t) + (i + 1) * sizeof(uint32_t);
+			gem_context_set_param(i915, &param);
+
+			for (int j = 0; j <= I915_EXEC_RING_MASK; j++) {
+				int expected = j == i ? 0 : -EINVAL;
+
+				execbuf.flags = j;
+				igt_assert_f(__gem_execbuf(i915, &execbuf) == expected,
+					     "Failed to report the %s engine for slot %d (valid at %d)\n",
+					     j == i ? "valid" : "invalid", j, i);
+			}
+
+			do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
+			igt_assert_eq(busy.busy, i != -1 ? 1 << (e->class + 16) : 0);
+
+			igt_spin_free(i915, spin);
+
+			gem_sync(i915, obj.handle);
+			do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
+			igt_assert_eq(busy.busy, 0);
+		}
+	}
+
+	/* Restore the defaults and check I915_EXEC_DEFAULT works again. */
+	param.size = 0;
+	gem_context_set_param(i915, &param);
+	execbuf.flags = 0;
+	gem_execbuf(i915, &execbuf);
+
+	gem_close(i915, obj.handle);
+	gem_context_destroy(i915, param.ctx_id);
+}
+
+static void execute_oneforall(int i915)
+{
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines , I915_EXEC_RING_MASK + 1);
+	struct drm_i915_gem_context_param param = {
+		.ctx_id = gem_context_create(i915),
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.value = to_user_pointer(&engines),
+		.size = sizeof(engines),
+	};
+	const struct intel_execution_engine2 *e;
+
+	for_each_engine_class_instance(i915, e) {
+		memset(&engines, 0, sizeof(engines));
+		for (int i = 0; i <= I915_EXEC_RING_MASK; i++) {
+			engine_class(&engines, i) = e->class;
+			engine_instance(&engines, i) = e->instance;
+		}
+		gem_context_set_param(i915, &param);
+
+		for (int i = 0; i <= I915_EXEC_RING_MASK; i++) {
+			struct drm_i915_gem_busy busy = {};
+			igt_spin_t *spin;
+
+			spin = __igt_spin_new(i915,
+					      .ctx = param.ctx_id,
+					      .engine = i);
+
+			busy.handle = spin->handle;
+			do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
+			igt_assert_eq(busy.busy, 1 << (e->class + 16));
+
+			igt_spin_free(i915, spin);
+		}
+	}
+
+	gem_context_destroy(i915, param.ctx_id);
+}
+
+static void execute_allforone(int i915)
+{
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines , I915_EXEC_RING_MASK + 1);
+	struct drm_i915_gem_context_param param = {
+		.ctx_id = gem_context_create(i915),
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.value = to_user_pointer(&engines),
+	};
+	const struct intel_execution_engine2 *e;
+	int i;
+
+	i = 0;
+	memset(&engines, 0, sizeof(engines));
+	for_each_engine_class_instance(i915, e) {
+		engine_class(&engines, i) = e->class;
+		engine_instance(&engines, i) = e->instance;
+		i++;
+	}
+	param.size = sizeof(uint64_t) + i * sizeof(uint32_t);
+	gem_context_set_param(i915, &param);
+
+	i = 0;
+	for_each_engine_class_instance(i915, e) {
+		struct drm_i915_gem_busy busy = {};
+		igt_spin_t *spin;
+
+		spin = __igt_spin_new(i915,
+				      .ctx = param.ctx_id,
+				      .engine = i++);
+
+		busy.handle = spin->handle;
+		do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
+		igt_assert_eq(busy.busy, 1 << (e->class + 16));
+
+		igt_spin_free(i915, spin);
+	}
+
+	gem_context_destroy(i915, param.ctx_id);
+}
+
+static void independent(int i915)
+{
+#define RCS_TIMESTAMP (0x2000 + 0x358)
+	const int gen = intel_gen(intel_get_drm_devid(i915));
+	const int has_64bit_reloc = gen >= 8;
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines , I915_EXEC_RING_MASK + 1);
+	struct drm_i915_gem_context_param param = {
+		.ctx_id = gem_context_create(i915),
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.value = to_user_pointer(&engines),
+		.size = sizeof(engines),
+	};
+	struct drm_i915_gem_exec_object2 results = { .handle = gem_create(i915, 4096) };
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	int timeline = sw_sync_timeline_create();
+	uint32_t last, *map;
+
+	{
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(&results),
+			.buffer_count = 1,
+			.rsvd1 = param.ctx_id,
+		};
+		gem_write(i915, results.handle, 0, &bbe, sizeof(bbe));
+		gem_execbuf(i915, &execbuf);
+		results.flags = EXEC_OBJECT_PINNED;
+	}
+
+	memset(&engines, 0, sizeof(engines)); /* All rcs0 */
+	gem_context_set_param(i915, &param);
+
+	for (int i = 0; i < I915_EXEC_RING_MASK + 1; i++) {
+		struct drm_i915_gem_exec_object2 obj[2] = {
+			results, /* write hazard lies! */
+			{ .handle = gem_create(i915, 4096) },
+		};
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(obj),
+			.buffer_count = 2,
+			.rsvd1 = param.ctx_id,
+			.rsvd2 = sw_sync_timeline_create_fence(timeline, i + 1),
+			.flags = (I915_EXEC_RING_MASK - i) | I915_EXEC_FENCE_IN,
+		};
+		uint64_t offset = results.offset + 4 * i;
+		uint32_t *cs;
+		int j = 0;
+
+		cs = gem_mmap__cpu(i915, obj[1].handle, 0, 4096, PROT_WRITE);
+
+		cs[j] = 0x24 << 23 | 1; /* SRM */
+		if (has_64bit_reloc)
+			cs[j]++;
+		j++;
+		cs[j++] = RCS_TIMESTAMP;
+		cs[j++] = offset;
+		if (has_64bit_reloc)
+			cs[j++] = offset >> 32;
+		cs[j++] = MI_BATCH_BUFFER_END;
+
+		munmap(cs, 4096);
+
+		gem_execbuf(i915, &execbuf);
+		gem_close(i915, obj[1].handle);
+		close(execbuf.rsvd2);
+	}
+	close(timeline);
+	gem_sync(i915, results.handle);
+
+	map = gem_mmap__cpu(i915, results.handle, 0, 4096, PROT_READ);
+	gem_set_domain(i915, results.handle, I915_GEM_DOMAIN_CPU, 0);
+	gem_close(i915, results.handle);
+
+	last = map[0];
+	for (int i = 1; i < I915_EXEC_RING_MASK + 1; i++) {
+		igt_assert_f((map[i] - last) > 0,
+			     "Engine instance [%d] executed too late\n", i);
+		last = map[i];
+	}
+	munmap(map, 4096);
+
+	gem_context_destroy(i915, param.ctx_id);
+}
+
+igt_main
+{
+	int i915 = -1;
+
+	igt_fixture {
+		i915 = drm_open_driver_render(DRIVER_INTEL);
+		igt_require_gem(i915);
+
+		gem_require_contexts(i915);
+		igt_require(has_context_engines(i915));
+	}
+
+	igt_subtest("invalid-engines")
+		invalid_engines(i915);
+
+	igt_subtest("idempotent")
+		idempotent(i915);
+
+	igt_subtest("execute-one")
+		execute_one(i915);
+
+	igt_subtest("execute-oneforall")
+		execute_oneforall(i915);
+
+	igt_subtest("execute-allforone")
+		execute_allforone(i915);
+
+	igt_subtest("independent")
+		independent(i915);
+}
diff --git a/tests/meson.build b/tests/meson.build
index 3883ae127..7e0089e74 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -111,6 +111,7 @@ i915_progs = [
 	'gem_ctx_bad_destroy',
 	'gem_ctx_clone',
 	'gem_ctx_create',
+	'gem_ctx_engines',
 	'gem_ctx_exec',
 	'gem_ctx_isolation',
 	'gem_ctx_param',
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [igt-dev] [PATCH i-g-t 12/16] i915: Add gem_ctx_engines
@ 2019-05-08 10:09   ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Andi Shyti

To exercise the new I915_CONTEXT_PARAM_ENGINES and interactions with
gem_execbuf().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Andi Shyti <andi@etezian.org>
Reviewed-by: Andi Shyti <andi@etezian.org>
---
 tests/Makefile.sources       |   1 +
 tests/i915/gem_ctx_engines.c | 517 +++++++++++++++++++++++++++++++++++
 tests/meson.build            |   1 +
 3 files changed, 519 insertions(+)
 create mode 100644 tests/i915/gem_ctx_engines.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 3552e895b..e7ee27e81 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -22,6 +22,7 @@ TESTS_progs = \
 	drm_mm \
 	drm_read \
 	i915/gem_ctx_clone \
+	i915/gem_ctx_engines \
 	i915/gem_ctx_shared \
 	i915/gem_vm_create \
 	kms_3d \
diff --git a/tests/i915/gem_ctx_engines.c b/tests/i915/gem_ctx_engines.c
new file mode 100644
index 000000000..f83aa4772
--- /dev/null
+++ b/tests/i915/gem_ctx_engines.c
@@ -0,0 +1,517 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+#include "i915/gem_context.h"
+#include "sw_sync.h"
+
+#define engine_class(e, n) ((e)->engines[(n)].engine_class)
+#define engine_instance(e, n) ((e)->engines[(n)].engine_instance)
+
+static bool has_context_engines(int i915)
+{
+	struct drm_i915_gem_context_param param = {
+		.ctx_id = 0,
+		.param = I915_CONTEXT_PARAM_ENGINES,
+	};
+	return __gem_context_set_param(i915, &param) == 0;
+}
+
+static void invalid_engines(int i915)
+{
+	struct i915_context_param_engines stack = {}, *engines;
+	struct drm_i915_gem_context_param param = {
+		.ctx_id = gem_context_create(i915),
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.value = to_user_pointer(&stack),
+	};
+	uint32_t handle;
+	void *ptr;
+
+	param.size = 0;
+	igt_assert_eq(__gem_context_set_param(i915, &param), 0);
+
+	param.size = 1;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EINVAL);
+
+	param.size = sizeof(stack) - 1;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EINVAL);
+
+	param.size = sizeof(stack) + 1;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EINVAL);
+
+	param.size = 0;
+	igt_assert_eq(__gem_context_set_param(i915, &param), 0);
+
+	/* Create a single page surrounded by inaccessible nothingness */
+	ptr = mmap(NULL, 3 * 4096, PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
+	igt_assert(ptr != MAP_FAILED);
+
+	munmap(ptr, 4096);
+	engines = ptr + 4096;
+	munmap(ptr + 2 *4096, 4096);
+
+	param.size = sizeof(*engines) + sizeof(*engines->engines);
+	param.value = to_user_pointer(engines);
+
+	engines->engines[0].engine_class = -1;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -ENOENT);
+
+	mprotect(engines, 4096, PROT_READ);
+	igt_assert_eq(__gem_context_set_param(i915, &param), -ENOENT);
+
+	mprotect(engines, 4096, PROT_WRITE);
+	engines->engines[0].engine_class = 0;
+	if (__gem_context_set_param(i915, &param)) /* XXX needs RCS */
+		goto out;
+
+	engines->extensions = to_user_pointer(ptr);
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	engines->extensions = 0;
+	igt_assert_eq(__gem_context_set_param(i915, &param), 0);
+
+	param.value = to_user_pointer(engines - 1);
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(engines) - 1;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(engines) - param.size +  1;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(engines) + 4096;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(engines) - param.size + 4096;
+	igt_assert_eq(__gem_context_set_param(i915, &param), 0);
+
+	param.value = to_user_pointer(engines) - param.size + 4096 + 1;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(engines) + 4096;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(engines) + 4096 - 1;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(engines) - 1;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(engines - 1);
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(engines - 1) + 4096;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(engines - 1) + 4096 - sizeof(*engines->engines) / 2;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	handle = gem_create(i915, 4096 * 3);
+	ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_READ);
+	gem_close(i915, handle);
+
+	munmap(ptr, 4096);
+	munmap(ptr + 8192, 4096);
+
+	param.value = to_user_pointer(ptr + 4096);
+	igt_assert_eq(__gem_context_set_param(i915, &param), 0);
+
+	param.value = to_user_pointer(ptr);
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(ptr) + 4095;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(ptr) + 8192;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	param.value = to_user_pointer(ptr) + 12287;
+	igt_assert_eq(__gem_context_set_param(i915, &param), -EFAULT);
+
+	munmap(ptr + 4096, 4096);
+
+out:
+	munmap(engines, 4096);
+	gem_context_destroy(i915, param.ctx_id);
+}
+
+static void idempotent(int i915)
+{
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines , I915_EXEC_RING_MASK + 1);
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(expected , I915_EXEC_RING_MASK + 1);
+	struct drm_i915_gem_context_param p = {
+		.ctx_id = gem_context_create(i915),
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.value = to_user_pointer(&engines),
+		.size = sizeof(engines),
+	};
+	const size_t base = sizeof(struct i915_context_param_engines);
+	const struct intel_execution_engine2 *e;
+	int idx;
+
+	/* What goes in, must come out. And what comes out, must go in */
+
+	gem_context_get_param(i915, &p);
+	igt_assert_eq(p.size, 0); /* atm default is to use legacy ring mask */
+
+	idx = 0;
+	memset(&engines, 0, sizeof(engines));
+	for_each_engine_class_instance(i915, e) {
+		engines.engines[idx].engine_class = e->class;
+		engines.engines[idx].engine_instance = e->instance;
+		idx++;
+	}
+	idx *= sizeof(*engines.engines);
+	p.size = base + idx;
+	gem_context_set_param(i915, &p);
+
+	memcpy(&expected, &engines, sizeof(expected));
+
+	gem_context_get_param(i915, &p);
+	igt_assert_eq(p.size, base + idx);
+	igt_assert(!memcmp(&expected, &engines, idx));
+
+	p.size = base;
+	gem_context_set_param(i915, &p);
+	gem_context_get_param(i915, &p);
+	igt_assert_eq(p.size, base);
+
+	/* and it should not have overwritten the previous contents */
+	igt_assert(!memcmp(&expected, &engines, idx));
+
+	memset(&engines, 0, sizeof(engines));
+	engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
+	engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
+	idx = sizeof(*engines.engines);
+	p.size = base + idx;
+	gem_context_set_param(i915, &p);
+
+	memcpy(&expected, &engines, sizeof(expected));
+
+	gem_context_get_param(i915, &p);
+	igt_assert_eq(p.size, base + idx);
+	igt_assert(!memcmp(&expected, &engines, idx));
+
+	memset(&engines, 0, sizeof(engines));
+	p.size = sizeof(engines);
+	gem_context_set_param(i915, &p);
+
+	memcpy(&expected, &engines, sizeof(expected));
+
+	gem_context_get_param(i915, &p);
+	igt_assert_eq(p.size, sizeof(engines));
+	igt_assert(!memcmp(&expected, &engines, idx));
+
+	gem_context_destroy(i915, p.ctx_id);
+}
+
+static void execute_one(int i915)
+{
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines , I915_EXEC_RING_MASK + 1);
+	struct drm_i915_gem_context_param param = {
+		.ctx_id = gem_context_create(i915),
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.value = to_user_pointer(&engines),
+		/* .size to be filled in later */
+	};
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = param.ctx_id,
+	};
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	const struct intel_execution_engine2 *e;
+
+	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
+
+	/* Unadulterated I915_EXEC_DEFAULT should work */
+	execbuf.flags = 0;
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	for_each_engine_class_instance(i915, e) {
+		struct drm_i915_gem_busy busy = { .handle = obj.handle };
+
+		for (int i = -1; i <= I915_EXEC_RING_MASK; i++) {
+			igt_spin_t *spin;
+
+			memset(&engines, 0, sizeof(engines));
+			engine_class(&engines, 0) = e->class;
+			engine_instance(&engines, 0) = e->instance;
+			param.size = offsetof(typeof(engines), engines[1]);
+			gem_context_set_param(i915, &param);
+
+			spin = igt_spin_new(i915,
+					    .ctx = param.ctx_id,
+					    .engine = 0);
+
+			igt_debug("Testing with map of %d engines\n", i + 1);
+			memset(&engines.engines, -1, sizeof(engines.engines));
+			if (i != -1) {
+				engine_class(&engines, i) = e->class;
+				engine_instance(&engines, i) = e->instance;
+			}
+			param.size = sizeof(uint64_t) + (i + 1) * sizeof(uint32_t);
+			gem_context_set_param(i915, &param);
+
+			for (int j = 0; j <= I915_EXEC_RING_MASK; j++) {
+				int expected = j == i ? 0 : -EINVAL;
+
+				execbuf.flags = j;
+				igt_assert_f(__gem_execbuf(i915, &execbuf) == expected,
+					     "Failed to report the %s engine for slot %d (valid at %d)\n",
+					     j == i ? "valid" : "invalid", j, i);
+			}
+
+			do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
+			igt_assert_eq(busy.busy, i != -1 ? 1 << (e->class + 16) : 0);
+
+			igt_spin_free(i915, spin);
+
+			gem_sync(i915, obj.handle);
+			do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
+			igt_assert_eq(busy.busy, 0);
+		}
+	}
+
+	/* Restore the defaults and check I915_EXEC_DEFAULT works again. */
+	param.size = 0;
+	gem_context_set_param(i915, &param);
+	execbuf.flags = 0;
+	gem_execbuf(i915, &execbuf);
+
+	gem_close(i915, obj.handle);
+	gem_context_destroy(i915, param.ctx_id);
+}
+
+static void execute_oneforall(int i915)
+{
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines , I915_EXEC_RING_MASK + 1);
+	struct drm_i915_gem_context_param param = {
+		.ctx_id = gem_context_create(i915),
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.value = to_user_pointer(&engines),
+		.size = sizeof(engines),
+	};
+	const struct intel_execution_engine2 *e;
+
+	for_each_engine_class_instance(i915, e) {
+		memset(&engines, 0, sizeof(engines));
+		for (int i = 0; i <= I915_EXEC_RING_MASK; i++) {
+			engine_class(&engines, i) = e->class;
+			engine_instance(&engines, i) = e->instance;
+		}
+		gem_context_set_param(i915, &param);
+
+		for (int i = 0; i <= I915_EXEC_RING_MASK; i++) {
+			struct drm_i915_gem_busy busy = {};
+			igt_spin_t *spin;
+
+			spin = __igt_spin_new(i915,
+					      .ctx = param.ctx_id,
+					      .engine = i);
+
+			busy.handle = spin->handle;
+			do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
+			igt_assert_eq(busy.busy, 1 << (e->class + 16));
+
+			igt_spin_free(i915, spin);
+		}
+	}
+
+	gem_context_destroy(i915, param.ctx_id);
+}
+
+static void execute_allforone(int i915)
+{
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines , I915_EXEC_RING_MASK + 1);
+	struct drm_i915_gem_context_param param = {
+		.ctx_id = gem_context_create(i915),
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.value = to_user_pointer(&engines),
+	};
+	const struct intel_execution_engine2 *e;
+	int i;
+
+	i = 0;
+	memset(&engines, 0, sizeof(engines));
+	for_each_engine_class_instance(i915, e) {
+		engine_class(&engines, i) = e->class;
+		engine_instance(&engines, i) = e->instance;
+		i++;
+	}
+	param.size = sizeof(uint64_t) + i * sizeof(uint32_t);
+	gem_context_set_param(i915, &param);
+
+	i = 0;
+	for_each_engine_class_instance(i915, e) {
+		struct drm_i915_gem_busy busy = {};
+		igt_spin_t *spin;
+
+		spin = __igt_spin_new(i915,
+				      .ctx = param.ctx_id,
+				      .engine = i++);
+
+		busy.handle = spin->handle;
+		do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
+		igt_assert_eq(busy.busy, 1 << (e->class + 16));
+
+		igt_spin_free(i915, spin);
+	}
+
+	gem_context_destroy(i915, param.ctx_id);
+}
+
+static void independent(int i915)
+{
+#define RCS_TIMESTAMP (0x2000 + 0x358)
+	const int gen = intel_gen(intel_get_drm_devid(i915));
+	const int has_64bit_reloc = gen >= 8;
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines , I915_EXEC_RING_MASK + 1);
+	struct drm_i915_gem_context_param param = {
+		.ctx_id = gem_context_create(i915),
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.value = to_user_pointer(&engines),
+		.size = sizeof(engines),
+	};
+	struct drm_i915_gem_exec_object2 results = { .handle = gem_create(i915, 4096) };
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	int timeline = sw_sync_timeline_create();
+	uint32_t last, *map;
+
+	{
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(&results),
+			.buffer_count = 1,
+			.rsvd1 = param.ctx_id,
+		};
+		gem_write(i915, results.handle, 0, &bbe, sizeof(bbe));
+		gem_execbuf(i915, &execbuf);
+		results.flags = EXEC_OBJECT_PINNED;
+	}
+
+	memset(&engines, 0, sizeof(engines)); /* All rcs0 */
+	gem_context_set_param(i915, &param);
+
+	for (int i = 0; i < I915_EXEC_RING_MASK + 1; i++) {
+		struct drm_i915_gem_exec_object2 obj[2] = {
+			results, /* write hazard lies! */
+			{ .handle = gem_create(i915, 4096) },
+		};
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(obj),
+			.buffer_count = 2,
+			.rsvd1 = param.ctx_id,
+			.rsvd2 = sw_sync_timeline_create_fence(timeline, i + 1),
+			.flags = (I915_EXEC_RING_MASK - i) | I915_EXEC_FENCE_IN,
+		};
+		uint64_t offset = results.offset + 4 * i;
+		uint32_t *cs;
+		int j = 0;
+
+		cs = gem_mmap__cpu(i915, obj[1].handle, 0, 4096, PROT_WRITE);
+
+		cs[j] = 0x24 << 23 | 1; /* SRM */
+		if (has_64bit_reloc)
+			cs[j]++;
+		j++;
+		cs[j++] = RCS_TIMESTAMP;
+		cs[j++] = offset;
+		if (has_64bit_reloc)
+			cs[j++] = offset >> 32;
+		cs[j++] = MI_BATCH_BUFFER_END;
+
+		munmap(cs, 4096);
+
+		gem_execbuf(i915, &execbuf);
+		gem_close(i915, obj[1].handle);
+		close(execbuf.rsvd2);
+	}
+	close(timeline);
+	gem_sync(i915, results.handle);
+
+	map = gem_mmap__cpu(i915, results.handle, 0, 4096, PROT_READ);
+	gem_set_domain(i915, results.handle, I915_GEM_DOMAIN_CPU, 0);
+	gem_close(i915, results.handle);
+
+	last = map[0];
+	for (int i = 1; i < I915_EXEC_RING_MASK + 1; i++) {
+		igt_assert_f((map[i] - last) > 0,
+			     "Engine instance [%d] executed too late\n", i);
+		last = map[i];
+	}
+	munmap(map, 4096);
+
+	gem_context_destroy(i915, param.ctx_id);
+}
+
+igt_main
+{
+	int i915 = -1;
+
+	igt_fixture {
+		i915 = drm_open_driver_render(DRIVER_INTEL);
+		igt_require_gem(i915);
+
+		gem_require_contexts(i915);
+		igt_require(has_context_engines(i915));
+	}
+
+	igt_subtest("invalid-engines")
+		invalid_engines(i915);
+
+	igt_subtest("idempotent")
+		idempotent(i915);
+
+	igt_subtest("execute-one")
+		execute_one(i915);
+
+	igt_subtest("execute-oneforall")
+		execute_oneforall(i915);
+
+	igt_subtest("execute-allforone")
+		execute_allforone(i915);
+
+	igt_subtest("independent")
+		independent(i915);
+}
diff --git a/tests/meson.build b/tests/meson.build
index 3883ae127..7e0089e74 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -111,6 +111,7 @@ i915_progs = [
 	'gem_ctx_bad_destroy',
 	'gem_ctx_clone',
 	'gem_ctx_create',
+	'gem_ctx_engines',
 	'gem_ctx_exec',
 	'gem_ctx_isolation',
 	'gem_ctx_param',
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH i-g-t 13/16] i915: Add gem_exec_balancer
  2019-05-08 10:09 ` [igt-dev] " Chris Wilson
@ 2019-05-08 10:09   ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Exercise the in-kernel load balancer checking that we can distribute
batches across the set of ctx->engines to avoid load.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/Makefile.am              |    1 +
 tests/Makefile.sources         |    1 +
 tests/i915/gem_exec_balancer.c | 1050 ++++++++++++++++++++++++++++++++
 tests/meson.build              |    7 +
 4 files changed, 1059 insertions(+)
 create mode 100644 tests/i915/gem_exec_balancer.c

diff --git a/tests/Makefile.am b/tests/Makefile.am
index 5097debf6..c6af0aeaf 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -96,6 +96,7 @@ gem_close_race_LDADD = $(LDADD) -lpthread
 gem_ctx_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 gem_ctx_thrash_LDADD = $(LDADD) -lpthread
 gem_ctx_sseu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
+i915_gem_exec_balancer_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
 gem_exec_capture_LDADD = $(LDADD) -lz
 gem_exec_parallel_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 gem_exec_parallel_LDADD = $(LDADD) -lpthread
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index e7ee27e81..323b625aa 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -24,6 +24,7 @@ TESTS_progs = \
 	i915/gem_ctx_clone \
 	i915/gem_ctx_engines \
 	i915/gem_ctx_shared \
+	i915/gem_exec_balancer \
 	i915/gem_vm_create \
 	kms_3d \
 	kms_addfb_basic \
diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
new file mode 100644
index 000000000..25195d478
--- /dev/null
+++ b/tests/i915/gem_exec_balancer.c
@@ -0,0 +1,1050 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <sched.h>
+
+#include "igt.h"
+#include "igt_perf.h"
+#include "i915/gem_ring.h"
+#include "sw_sync.h"
+
+IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
+
+#define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
+
+static bool has_class_instance(int i915, uint16_t class, uint16_t instance)
+{
+	int fd;
+
+	fd = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance));
+	if (fd != -1) {
+		close(fd);
+		return true;
+	}
+
+	return false;
+}
+
+static struct i915_engine_class_instance *
+list_engines(int i915, uint32_t class_mask, unsigned int *out)
+{
+	unsigned int count = 0, size = 64;
+	struct i915_engine_class_instance *engines;
+
+	engines = malloc(size * sizeof(*engines));
+	if (!engines) {
+		*out = 0;
+		return NULL;
+	}
+
+	for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER;
+	     class_mask;
+	     class++, class_mask >>= 1) {
+		if (!(class_mask & 1))
+			continue;
+
+		for (unsigned int instance = 0;
+		     instance < INSTANCE_COUNT;
+		     instance++) {
+		     if (!has_class_instance(i915, class, instance))
+			     continue;
+
+			if (count == size) {
+				struct i915_engine_class_instance *e;
+
+				size *= 2;
+				e = realloc(engines, size*sizeof(*engines));
+				if (!e) {
+					*out = count;
+					return engines;
+				}
+
+				engines = e;
+			}
+
+			engines[count++] = (struct i915_engine_class_instance){
+				.engine_class = class,
+				.engine_instance = instance,
+			};
+		}
+	}
+
+	if (!count) {
+		free(engines);
+		engines = NULL;
+	}
+
+	*out = count;
+	return engines;
+}
+
+static int __set_load_balancer(int i915, uint32_t ctx,
+			       const struct i915_engine_class_instance *ci,
+			       unsigned int count)
+{
+	I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, count);
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1 + count);
+	struct drm_i915_gem_context_param p = {
+		.ctx_id = ctx,
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.size = sizeof(engines),
+		.value = to_user_pointer(&engines)
+	};
+
+	memset(&balancer, 0, sizeof(balancer));
+	balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
+
+	igt_assert(count);
+	balancer.num_siblings = count;
+	memcpy(balancer.engines, ci, count * sizeof(*ci));
+
+	memset(&engines, 0, sizeof(engines));
+	engines.extensions = to_user_pointer(&balancer);
+	engines.engines[0].engine_class =
+		I915_ENGINE_CLASS_INVALID;
+	engines.engines[0].engine_instance =
+		I915_ENGINE_CLASS_INVALID_NONE;
+	memcpy(engines.engines + 1, ci, count * sizeof(*ci));
+
+	return __gem_context_set_param(i915, &p);
+}
+
+static void set_load_balancer(int i915, uint32_t ctx,
+			      const struct i915_engine_class_instance *ci,
+			      unsigned int count)
+{
+	igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0);
+}
+
+static uint32_t load_balancer_create(int i915,
+				     const struct i915_engine_class_instance *ci,
+				     unsigned int count)
+{
+	uint32_t ctx;
+
+	ctx = gem_context_create(i915);
+	set_load_balancer(i915, ctx, ci, count);
+
+	return ctx;
+}
+
+static uint32_t __batch_create(int i915, uint32_t offset)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	uint32_t handle;
+
+	handle = gem_create(i915, ALIGN(offset + 4, 4096));
+	gem_write(i915, handle, offset, &bbe, sizeof(bbe));
+
+	return handle;
+}
+
+static uint32_t batch_create(int i915)
+{
+	return __batch_create(i915, 0);
+}
+
+static void invalid_balancer(int i915)
+{
+	I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, 64);
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 64);
+	struct drm_i915_gem_context_param p = {
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.value = to_user_pointer(&engines)
+	};
+	uint32_t handle;
+	void *ptr;
+
+	/*
+	 * Assume that I915_CONTEXT_PARAM_ENGINE validates the array
+	 * of engines[], our job is to determine if the load_balancer
+	 * extension explodes.
+	 */
+
+	for (int class = 0; class < 32; class++) {
+		struct i915_engine_class_instance *ci;
+		unsigned int count;
+
+		ci = list_engines(i915, 1 << class, &count);
+		if (!ci)
+			continue;
+
+		igt_debug("Found %d engines\n", count);
+		igt_assert_lte(count, 64);
+
+		p.ctx_id = gem_context_create(i915);
+		p.size = (sizeof(struct i915_context_param_engines) +
+				(count + 1) * sizeof(*engines.engines));
+
+		memset(&engines, 0, sizeof(engines));
+		engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
+		engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
+		memcpy(engines.engines + 1, ci, count * sizeof(*ci));
+		gem_context_set_param(i915, &p);
+
+		engines.extensions = -1ull;
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+		engines.extensions = 1ull;
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+		memset(&balancer, 0, sizeof(balancer));
+		balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
+		balancer.num_siblings = count;
+		memcpy(balancer.engines, ci, count * sizeof(*ci));
+
+		engines.extensions = to_user_pointer(&balancer);
+		gem_context_set_param(i915, &p);
+
+		balancer.engine_index = 1;
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
+
+		balancer.engine_index = count;
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
+
+		balancer.engine_index = count + 1;
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EINVAL);
+
+		balancer.engine_index = 0;
+		gem_context_set_param(i915, &p);
+
+		balancer.base.next_extension = to_user_pointer(&balancer);
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
+
+		balancer.base.next_extension = -1ull;
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+		handle = gem_create(i915, 4096 * 3);
+		ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE);
+		gem_close(i915, handle);
+
+		memset(&engines, 0, sizeof(engines));
+		engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
+		engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
+		engines.engines[1].engine_class = I915_ENGINE_CLASS_INVALID;
+		engines.engines[1].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
+		memcpy(engines.engines + 2, ci, count * sizeof(ci));
+		p.size = (sizeof(struct i915_context_param_engines) +
+				(count + 2) * sizeof(*engines.engines));
+		gem_context_set_param(i915, &p);
+
+		balancer.base.next_extension = 0;
+		balancer.engine_index = 1;
+		engines.extensions = to_user_pointer(&balancer);
+		gem_context_set_param(i915, &p);
+
+		memcpy(ptr + 4096 - 8, &balancer, sizeof(balancer));
+		memcpy(ptr + 8192 - 8, &balancer, sizeof(balancer));
+		balancer.engine_index = 0;
+
+		engines.extensions = to_user_pointer(ptr) + 4096 - 8;
+		gem_context_set_param(i915, &p);
+
+		balancer.base.next_extension = engines.extensions;
+		engines.extensions = to_user_pointer(&balancer);
+		gem_context_set_param(i915, &p);
+
+		munmap(ptr, 4096);
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+		engines.extensions = to_user_pointer(ptr) + 4096 - 8;
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+		engines.extensions = to_user_pointer(ptr) + 8192 - 8;
+		gem_context_set_param(i915, &p);
+
+		balancer.base.next_extension = engines.extensions;
+		engines.extensions = to_user_pointer(&balancer);
+		gem_context_set_param(i915, &p);
+
+		munmap(ptr + 8192, 4096);
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+		engines.extensions = to_user_pointer(ptr) + 8192 - 8;
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+		munmap(ptr + 4096, 4096);
+
+		gem_context_destroy(i915, p.ctx_id);
+		free(ci);
+	}
+}
+
+static void kick_kthreads(int period_us)
+{
+	sched_yield();
+	usleep(period_us);
+}
+
+static double measure_load(int pmu, int period_us)
+{
+	uint64_t data[2];
+	uint64_t d_t, d_v;
+
+	kick_kthreads(period_us);
+
+	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+	d_v = -data[0];
+	d_t = -data[1];
+
+	usleep(period_us);
+
+	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+	d_v += data[0];
+	d_t += data[1];
+
+	return d_v / (double)d_t;
+}
+
+static double measure_min_load(int pmu, unsigned int num, int period_us)
+{
+	uint64_t data[2 + num];
+	uint64_t d_t, d_v[num];
+	uint64_t min = -1, max = 0;
+
+	kick_kthreads(period_us);
+
+	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+	for (unsigned int n = 0; n < num; n++)
+		d_v[n] = -data[2 + n];
+	d_t = -data[1];
+
+	usleep(period_us);
+
+	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+
+	d_t += data[1];
+	for (unsigned int n = 0; n < num; n++) {
+		d_v[n] += data[2 + n];
+		igt_debug("engine[%d]: %.1f%%\n",
+			  n, d_v[n] / (double)d_t * 100);
+		if (d_v[n] < min)
+			min = d_v[n];
+		if (d_v[n] > max)
+			max = d_v[n];
+	}
+
+	igt_debug("elapsed: %"PRIu64"ns, load [%.1f, %.1f]%%\n",
+		  d_t, min / (double)d_t * 100,  max / (double)d_t * 100);
+
+	return min / (double)d_t;
+}
+
+static void check_individual_engine(int i915,
+				    uint32_t ctx,
+				    const struct i915_engine_class_instance *ci,
+				    int idx)
+{
+	igt_spin_t *spin;
+	double load;
+	int pmu;
+
+	pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(ci[idx].engine_class,
+						  ci[idx].engine_instance));
+
+	spin = igt_spin_new(i915, .ctx = ctx, .engine = idx + 1);
+	load = measure_load(pmu, 10000);
+	igt_spin_free(i915, spin);
+
+	close(pmu);
+
+	igt_assert_f(load > 0.90,
+		     "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
+		     idx, ci[idx].engine_class, ci[idx].engine_instance, load*100);
+}
+
+static void individual(int i915)
+{
+	uint32_t ctx;
+
+	/*
+	 * I915_CONTEXT_PARAM_ENGINE allows us to index into the user
+	 * supplied array from gem_execbuf(). Our check is to build the
+	 * ctx->engine[] with various different engine classes, feed in
+	 * a spinner and then ask pmu to confirm it the expected engine
+	 * was busy.
+	 */
+
+	ctx = gem_context_create(i915);
+
+	for (int mask = 0; mask < 32; mask++) {
+		struct i915_engine_class_instance *ci;
+		unsigned int count;
+
+		ci = list_engines(i915, 1u << mask, &count);
+		if (!ci)
+			continue;
+
+		igt_debug("Found %d engines of class %d\n", count, mask);
+
+		for (int pass = 0; pass < count; pass++) { /* approx. count! */
+			igt_permute_array(ci, count, igt_exchange_int64);
+			set_load_balancer(i915, ctx, ci, count);
+			for (unsigned int n = 0; n < count; n++)
+				check_individual_engine(i915, ctx, ci, n);
+		}
+
+		free(ci);
+	}
+
+	gem_context_destroy(i915, ctx);
+	gem_quiescent_gpu(i915);
+}
+
+static void indicies(int i915)
+{
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
+	struct drm_i915_gem_context_param p = {
+		.ctx_id = gem_context_create(i915),
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.value = to_user_pointer(&engines)
+	};
+
+	struct drm_i915_gem_exec_object2 batch = {
+		.handle = batch_create(i915),
+	};
+
+	unsigned int nengines = 0;
+	void *balancers = NULL;
+
+	/*
+	 * We can populate our engine map with multiple virtual engines.
+	 * Do so.
+	 */
+
+	for (int class = 0; class < 32; class++) {
+		struct i915_engine_class_instance *ci;
+		unsigned int count;
+
+		ci = list_engines(i915, 1u << class, &count);
+		if (!ci)
+			continue;
+
+		igt_debug("Found %d engines of class %d\n", count, class);
+
+		for (int n = 0; n < count; n++) {
+			I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(*balancer,
+								 count);
+
+			engines.engines[nengines].engine_class =
+				I915_ENGINE_CLASS_INVALID;
+			engines.engines[nengines].engine_instance =
+				I915_ENGINE_CLASS_INVALID_NONE;
+
+			balancer = calloc(sizeof(*balancer), 1);
+			igt_assert(balancer);
+
+			balancer->base.name =
+				I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
+			balancer->base.next_extension =
+				to_user_pointer(balancers);
+			balancers = balancer;
+
+			balancer->engine_index = nengines++;
+			balancer->num_siblings = count;
+
+			memcpy(balancer->engines,
+			       ci, count * sizeof(*ci));
+		}
+		free(ci);
+	}
+
+	igt_require(balancers);
+	engines.extensions = to_user_pointer(balancers);
+	p.size = (sizeof(struct i915_engine_class_instance) * nengines +
+		  sizeof(struct i915_context_param_engines));
+	gem_context_set_param(i915, &p);
+
+	for (unsigned int n = 0; n < nengines; n++) {
+		struct drm_i915_gem_execbuffer2 eb = {
+			.buffers_ptr = to_user_pointer(&batch),
+			.buffer_count = 1,
+			.flags = n,
+			.rsvd1 = p.ctx_id,
+		};
+		igt_debug("Executing on index=%d\n", n);
+		gem_execbuf(i915, &eb);
+	}
+	gem_context_destroy(i915, p.ctx_id);
+
+	gem_sync(i915, batch.handle);
+	gem_close(i915, batch.handle);
+
+	while (balancers) {
+		struct i915_context_engines_load_balance *b, *n;
+
+		b = balancers;
+		n = from_user_pointer(b->base.next_extension);
+		free(b);
+
+		balancers = n;
+	}
+
+	gem_quiescent_gpu(i915);
+}
+
+static void busy(int i915)
+{
+	uint32_t scratch = gem_create(i915, 4096);
+
+	/*
+	 * Check that virtual engines are reported via GEM_BUSY.
+	 *
+	 * When running, the batch will be on the real engine and report
+	 * the actual class.
+	 *
+	 * Prior to running, if the load-balancer is across multiple
+	 * classes we don't know which engine the batch will
+	 * execute on, so we report them all!
+	 *
+	 * However, as we only support (and test) creating a load-balancer
+	 * from engines of only one class, that can be propagated accurately
+	 * through to GEM_BUSY.
+	 */
+
+	for (int class = 0; class < 16; class++) {
+		struct drm_i915_gem_busy busy;
+		struct i915_engine_class_instance *ci;
+		unsigned int count;
+		igt_spin_t *spin[2];
+		uint32_t ctx;
+
+		ci = list_engines(i915, 1u << class, &count);
+		if (!ci)
+			continue;
+
+		igt_debug("Found %d engines of class %d\n", count, class);
+		ctx = load_balancer_create(i915, ci, count);
+		free(ci);
+
+		spin[0] = __igt_spin_new(i915,
+					 .ctx = ctx,
+					 .flags = IGT_SPIN_POLL_RUN);
+		spin[1] = __igt_spin_new(i915,
+					 .ctx = ctx,
+					 .dependency = scratch);
+
+		igt_spin_busywait_until_started(spin[0]);
+
+		/* Running: actual class */
+		busy.handle = spin[0]->handle;
+		do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
+		igt_assert_eq_u32(busy.busy, 1u << (class + 16));
+
+		/* Queued(read): expected class */
+		busy.handle = spin[1]->handle;
+		do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
+		igt_assert_eq_u32(busy.busy, 1u << (class + 16));
+
+		/* Queued(write): expected class */
+		busy.handle = scratch;
+		do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
+		igt_assert_eq_u32(busy.busy,
+				  (1u << (class + 16)) | (class + 1));
+
+		igt_spin_free(i915, spin[1]);
+		igt_spin_free(i915, spin[0]);
+
+		gem_context_destroy(i915, ctx);
+	}
+
+	gem_close(i915, scratch);
+	gem_quiescent_gpu(i915);
+}
+
+static int add_pmu(int pmu, const struct i915_engine_class_instance *ci)
+{
+	return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->engine_class,
+							 ci->engine_instance),
+				    pmu);
+}
+
+static void full(int i915, unsigned int flags)
+#define PULSE 0x1
+#define LATE 0x2
+{
+	struct drm_i915_gem_exec_object2 batch = {
+		.handle = batch_create(i915),
+	};
+
+	if (flags & LATE)
+		igt_require_sw_sync();
+
+	/*
+	 * I915_CONTEXT_PARAM_ENGINE changes the meaning of I915_EXEC_DEFAULT
+	 * to provide an automatic selection from the ctx->engine[]. It
+	 * employs load-balancing to evenly distribute the workload the
+	 * array. If we submit N spinners, we expect them to be simultaneously
+	 * running across N engines and use PMU to confirm that the entire
+	 * set of engines are busy.
+	 *
+	 * We complicate matters by interpersing shortlived tasks to challenge
+	 * the kernel to search for space in which to insert new batches.
+	 */
+
+
+	for (int mask = 0; mask < 32; mask++) {
+		struct i915_engine_class_instance *ci;
+		igt_spin_t *spin = NULL;
+		IGT_CORK_FENCE(cork);
+		unsigned int count;
+		double load;
+		int fence = -1;
+		int *pmu;
+
+		ci = list_engines(i915, 1u << mask, &count);
+		if (!ci)
+			continue;
+
+		igt_debug("Found %d engines of class %d\n", count, mask);
+
+		pmu = malloc(sizeof(*pmu) * count);
+		igt_assert(pmu);
+
+		if (flags & LATE)
+			fence = igt_cork_plug(&cork, i915);
+
+		pmu[0] = -1;
+		for (unsigned int n = 0; n < count; n++) {
+			uint32_t ctx;
+
+			pmu[n] = add_pmu(pmu[0], &ci[n]);
+
+			if (flags & PULSE) {
+				struct drm_i915_gem_execbuffer2 eb = {
+					.buffers_ptr = to_user_pointer(&batch),
+					.buffer_count = 1,
+					.rsvd2 = fence,
+					.flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
+				};
+				gem_execbuf(i915, &eb);
+			}
+
+			/*
+			 * Each spinner needs to be one a new timeline,
+			 * otherwise they will just sit in the single queue
+			 * and not run concurrently.
+			 */
+			ctx = load_balancer_create(i915, ci, count);
+
+			if (spin == NULL) {
+				spin = __igt_spin_new(i915, .ctx = ctx);
+			} else {
+				struct drm_i915_gem_execbuffer2 eb = {
+					.buffers_ptr = spin->execbuf.buffers_ptr,
+					.buffer_count = spin->execbuf.buffer_count,
+					.rsvd1 = ctx,
+					.rsvd2 = fence,
+					.flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
+				};
+				gem_execbuf(i915, &eb);
+			}
+
+			gem_context_destroy(i915, ctx);
+		}
+
+		if (flags & LATE) {
+			igt_cork_unplug(&cork);
+			close(fence);
+		}
+
+		load = measure_min_load(pmu[0], count, 10000);
+		igt_spin_free(i915, spin);
+
+		close(pmu[0]);
+		free(pmu);
+
+		free(ci);
+
+		igt_assert_f(load > 0.90,
+			     "minimum load for %d x class:%d was found to be only %.1f%% busy\n",
+			     count, mask, load*100);
+		gem_quiescent_gpu(i915);
+	}
+
+	gem_close(i915, batch.handle);
+	gem_quiescent_gpu(i915);
+}
+
+static void nop(int i915)
+{
+	struct drm_i915_gem_exec_object2 batch = {
+		.handle = batch_create(i915),
+	};
+
+	for (int mask = 0; mask < 32; mask++) {
+		struct i915_engine_class_instance *ci;
+		unsigned int count;
+		uint32_t ctx;
+
+		ci = list_engines(i915, 1u << mask, &count);
+		if (!ci)
+			continue;
+
+		if (count < 2) {
+			free(ci);
+			continue;
+		}
+
+		igt_debug("Found %d engines of class %d\n", count, mask);
+		ctx = load_balancer_create(i915, ci, count);
+
+		for (int n = 0; n < count; n++) {
+			struct drm_i915_gem_execbuffer2 execbuf = {
+				.buffers_ptr = to_user_pointer(&batch),
+				.buffer_count = 1,
+				.flags = n + 1,
+				.rsvd1 = ctx,
+			};
+			struct timespec tv = {};
+			unsigned long nops;
+			double t;
+
+			igt_nsec_elapsed(&tv);
+			nops = 0;
+			do {
+				for (int r = 0; r < 1024; r++)
+					gem_execbuf(i915, &execbuf);
+				nops += 1024;
+			} while (igt_seconds_elapsed(&tv) < 2);
+			gem_sync(i915, batch.handle);
+
+			t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
+			igt_info("%x:%d %.3fus\n", mask, n, t);
+		}
+
+		{
+			struct drm_i915_gem_execbuffer2 execbuf = {
+				.buffers_ptr = to_user_pointer(&batch),
+				.buffer_count = 1,
+				.rsvd1 = ctx,
+			};
+			struct timespec tv = {};
+			unsigned long nops;
+			double t;
+
+			igt_nsec_elapsed(&tv);
+			nops = 0;
+			do {
+				for (int r = 0; r < 1024; r++)
+					gem_execbuf(i915, &execbuf);
+				nops += 1024;
+			} while (igt_seconds_elapsed(&tv) < 2);
+			gem_sync(i915, batch.handle);
+
+			t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
+			igt_info("%x:* %.3fus\n", mask, t);
+		}
+
+
+		igt_fork(child, count) {
+			struct drm_i915_gem_execbuffer2 execbuf = {
+				.buffers_ptr = to_user_pointer(&batch),
+				.buffer_count = 1,
+				.flags = child + 1,
+				.rsvd1 = gem_context_clone(i915, ctx,
+							   I915_CONTEXT_CLONE_ENGINES, 0),
+			};
+			struct timespec tv = {};
+			unsigned long nops;
+			double t;
+
+			igt_nsec_elapsed(&tv);
+			nops = 0;
+			do {
+				for (int r = 0; r < 1024; r++)
+					gem_execbuf(i915, &execbuf);
+				nops += 1024;
+			} while (igt_seconds_elapsed(&tv) < 2);
+			gem_sync(i915, batch.handle);
+
+			t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
+			igt_info("[%d] %x:%d %.3fus\n", child, mask, child, t);
+
+			memset(&tv, 0, sizeof(tv));
+			execbuf.flags = 0;
+
+			igt_nsec_elapsed(&tv);
+			nops = 0;
+			do {
+				for (int r = 0; r < 1024; r++)
+					gem_execbuf(i915, &execbuf);
+				nops += 1024;
+			} while (igt_seconds_elapsed(&tv) < 2);
+			gem_sync(i915, batch.handle);
+
+			t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
+			igt_info("[%d] %x:* %.3fus\n", child, mask, t);
+
+			gem_context_destroy(i915, execbuf.rsvd1);
+		}
+
+		igt_waitchildren();
+
+		gem_context_destroy(i915, ctx);
+		free(ci);
+	}
+
+	gem_close(i915, batch.handle);
+	gem_quiescent_gpu(i915);
+}
+
+static void ping(int i915, uint32_t ctx, unsigned int engine)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = engine,
+		.rsvd1 = ctx,
+	};
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+	gem_close(i915, obj.handle);
+}
+
+static void semaphore(int i915)
+{
+	uint32_t block[2], scratch;
+	igt_spin_t *spin[3];
+
+	/*
+	 * If we are using HW semaphores to launch serialised requests
+	 * on different engine concurrently, we want to verify that real
+	 * work is unimpeded.
+	 */
+	igt_require(gem_scheduler_has_preemption(i915));
+
+	block[0] = gem_context_create(i915);
+	block[1] = gem_context_create(i915);
+
+	scratch = gem_create(i915, 4096);
+	spin[2] = igt_spin_new(i915, .dependency = scratch);
+	for (int mask = 1; mask < 32; mask++) {
+		struct i915_engine_class_instance *ci;
+		unsigned int count;
+		uint32_t vip;
+
+		ci = list_engines(i915, 1u << mask, &count);
+		if (!ci)
+			continue;
+
+		if (count < ARRAY_SIZE(block))
+			continue;
+
+		/* Ensure that we completely occupy all engines in this group */
+		count = ARRAY_SIZE(block);
+
+		for (int i = 0; i < count; i++) {
+			set_load_balancer(i915, block[i], ci, count);
+			spin[i] = __igt_spin_new(i915,
+						       .ctx = block[i],
+						       .dependency = scratch);
+		}
+
+		/*
+		 * Either we haven't blocked both engines with semaphores,
+		 * or we let the vip through. If not, we hang.
+		 */
+		vip = gem_context_create(i915);
+		set_load_balancer(i915, vip, ci, count);
+		ping(i915, vip, 0);
+		gem_context_destroy(i915, vip);
+
+		for (int i = 0; i < count; i++)
+			igt_spin_free(i915, spin[i]);
+
+		free(ci);
+	}
+	igt_spin_free(i915, spin[2]);
+	gem_close(i915, scratch);
+
+	gem_context_destroy(i915, block[1]);
+	gem_context_destroy(i915, block[0]);
+
+	gem_quiescent_gpu(i915);
+}
+
+static void smoketest(int i915, int timeout)
+{
+	struct drm_i915_gem_exec_object2 batch[2] = {
+		{ .handle = __batch_create(i915, 16380) }
+	};
+	unsigned int ncontext = 0;
+	uint32_t *contexts = NULL;
+	uint32_t *handles = NULL;
+
+	igt_require_sw_sync();
+
+	for (int mask = 0; mask < 32; mask++) {
+		struct i915_engine_class_instance *ci;
+		unsigned int count = 0;
+
+		ci = list_engines(i915, 1u << mask, &count);
+		if (!ci || count < 2) {
+			free(ci);
+			continue;
+		}
+
+		igt_debug("Found %d engines of class %d\n", count, mask);
+
+		ncontext += 128;
+		contexts = realloc(contexts, sizeof(*contexts) * ncontext);
+		igt_assert(contexts);
+
+		for (unsigned int n = ncontext - 128; n < ncontext; n++) {
+			contexts[n] = load_balancer_create(i915, ci, count);
+			igt_assert(contexts[n]);
+		}
+
+		free(ci);
+	}
+	igt_debug("Created %d virtual engines (one per context)\n", ncontext);
+	igt_require(ncontext);
+
+	contexts = realloc(contexts, sizeof(*contexts) * ncontext * 4);
+	igt_assert(contexts);
+	memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
+	ncontext *= 2;
+	memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
+	ncontext *= 2;
+
+	handles = malloc(sizeof(*handles) * ncontext);
+	igt_assert(handles);
+	for (unsigned int n = 0; n < ncontext; n++)
+		handles[n] = gem_create(i915, 4096);
+
+	igt_until_timeout(timeout) {
+		unsigned int count = 1 + (rand() % (ncontext - 1));
+		IGT_CORK_FENCE(cork);
+		int fence = igt_cork_plug(&cork, i915);
+
+		for (unsigned int n = 0; n < count; n++) {
+			struct drm_i915_gem_execbuffer2 eb = {
+				.buffers_ptr = to_user_pointer(batch),
+				.buffer_count = ARRAY_SIZE(batch),
+				.rsvd1 = contexts[n],
+				.rsvd2 = fence,
+				.flags = I915_EXEC_BATCH_FIRST | I915_EXEC_FENCE_IN,
+			};
+			batch[1].handle = handles[n];
+			gem_execbuf(i915, &eb);
+		}
+		igt_permute_array(handles, count, igt_exchange_int);
+
+		igt_cork_unplug(&cork);
+		for (unsigned int n = 0; n < count; n++)
+			gem_sync(i915, handles[n]);
+
+		close(fence);
+	}
+
+	for (unsigned int n = 0; n < ncontext; n++) {
+		gem_close(i915, handles[n]);
+		__gem_context_destroy(i915, contexts[n]);
+	}
+	free(handles);
+	free(contexts);
+	gem_close(i915, batch[0].handle);
+}
+
+static bool has_context_engines(int i915)
+{
+	struct drm_i915_gem_context_param p = {
+		.param = I915_CONTEXT_PARAM_ENGINES,
+	};
+
+	return __gem_context_set_param(i915, &p) == 0;
+}
+
+static bool has_load_balancer(int i915)
+{
+	struct i915_engine_class_instance ci = {};
+	uint32_t ctx;
+	int err;
+
+	ctx = gem_context_create(i915);
+	err = __set_load_balancer(i915, ctx, &ci, 1);
+	gem_context_destroy(i915, ctx);
+
+	return err == 0;
+}
+
+igt_main
+{
+	int i915 = -1;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		i915 = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(i915);
+
+		gem_require_contexts(i915);
+		igt_require(has_context_engines(i915));
+		igt_require(has_load_balancer(i915));
+
+		igt_fork_hang_detector(i915);
+	}
+
+	igt_subtest("invalid-balancer")
+		invalid_balancer(i915);
+
+	igt_subtest("individual")
+		individual(i915);
+
+	igt_subtest("indicies")
+		indicies(i915);
+
+	igt_subtest("busy")
+		busy(i915);
+
+	igt_subtest_group {
+		static const struct {
+			const char *name;
+			unsigned int flags;
+		} phases[] = {
+			{ "", 0 },
+			{ "-pulse", PULSE },
+			{ "-late", LATE },
+			{ "-late-pulse", PULSE | LATE },
+			{ }
+		};
+		for (typeof(*phases) *p = phases; p->name; p++)
+			igt_subtest_f("full%s", p->name)
+				full(i915, p->flags);
+	}
+
+	igt_subtest("nop")
+		nop(i915);
+
+	igt_subtest("semaphore")
+		semaphore(i915);
+
+	igt_subtest("smoke")
+		smoketest(i915, 20);
+
+	igt_fixture {
+		igt_stop_hang_detector();
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index 7e0089e74..eeea3611d 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -288,6 +288,13 @@ test_executables += executable('gem_eio',
 	   install : true)
 test_list += 'gem_eio'
 
+test_executables += executable('gem_exec_balancer', 'i915/gem_exec_balancer.c',
+	   dependencies : test_deps + [ lib_igt_perf ],
+	   install_dir : libexecdir,
+	   install_rpath : libexecdir_rpathdir,
+	   install : true)
+test_progs += 'gem_exec_balancer'
+
 test_executables += executable('gem_mocs_settings',
 	   join_paths('i915', 'gem_mocs_settings.c'),
 	   dependencies : test_deps + [ lib_igt_perf ],
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [igt-dev] [PATCH i-g-t 13/16] i915: Add gem_exec_balancer
@ 2019-05-08 10:09   ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Exercise the in-kernel load balancer checking that we can distribute
batches across the set of ctx->engines to avoid load.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/Makefile.am              |    1 +
 tests/Makefile.sources         |    1 +
 tests/i915/gem_exec_balancer.c | 1050 ++++++++++++++++++++++++++++++++
 tests/meson.build              |    7 +
 4 files changed, 1059 insertions(+)
 create mode 100644 tests/i915/gem_exec_balancer.c

diff --git a/tests/Makefile.am b/tests/Makefile.am
index 5097debf6..c6af0aeaf 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -96,6 +96,7 @@ gem_close_race_LDADD = $(LDADD) -lpthread
 gem_ctx_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 gem_ctx_thrash_LDADD = $(LDADD) -lpthread
 gem_ctx_sseu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
+i915_gem_exec_balancer_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
 gem_exec_capture_LDADD = $(LDADD) -lz
 gem_exec_parallel_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 gem_exec_parallel_LDADD = $(LDADD) -lpthread
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index e7ee27e81..323b625aa 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -24,6 +24,7 @@ TESTS_progs = \
 	i915/gem_ctx_clone \
 	i915/gem_ctx_engines \
 	i915/gem_ctx_shared \
+	i915/gem_exec_balancer \
 	i915/gem_vm_create \
 	kms_3d \
 	kms_addfb_basic \
diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
new file mode 100644
index 000000000..25195d478
--- /dev/null
+++ b/tests/i915/gem_exec_balancer.c
@@ -0,0 +1,1050 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <sched.h>
+
+#include "igt.h"
+#include "igt_perf.h"
+#include "i915/gem_ring.h"
+#include "sw_sync.h"
+
+IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
+
+#define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
+
+static bool has_class_instance(int i915, uint16_t class, uint16_t instance)
+{
+	int fd;
+
+	fd = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance));
+	if (fd != -1) {
+		close(fd);
+		return true;
+	}
+
+	return false;
+}
+
+static struct i915_engine_class_instance *
+list_engines(int i915, uint32_t class_mask, unsigned int *out)
+{
+	unsigned int count = 0, size = 64;
+	struct i915_engine_class_instance *engines;
+
+	engines = malloc(size * sizeof(*engines));
+	if (!engines) {
+		*out = 0;
+		return NULL;
+	}
+
+	for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER;
+	     class_mask;
+	     class++, class_mask >>= 1) {
+		if (!(class_mask & 1))
+			continue;
+
+		for (unsigned int instance = 0;
+		     instance < INSTANCE_COUNT;
+		     instance++) {
+		     if (!has_class_instance(i915, class, instance))
+			     continue;
+
+			if (count == size) {
+				struct i915_engine_class_instance *e;
+
+				size *= 2;
+				e = realloc(engines, size*sizeof(*engines));
+				if (!e) {
+					*out = count;
+					return engines;
+				}
+
+				engines = e;
+			}
+
+			engines[count++] = (struct i915_engine_class_instance){
+				.engine_class = class,
+				.engine_instance = instance,
+			};
+		}
+	}
+
+	if (!count) {
+		free(engines);
+		engines = NULL;
+	}
+
+	*out = count;
+	return engines;
+}
+
+static int __set_load_balancer(int i915, uint32_t ctx,
+			       const struct i915_engine_class_instance *ci,
+			       unsigned int count)
+{
+	I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, count);
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1 + count);
+	struct drm_i915_gem_context_param p = {
+		.ctx_id = ctx,
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.size = sizeof(engines),
+		.value = to_user_pointer(&engines)
+	};
+
+	memset(&balancer, 0, sizeof(balancer));
+	balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
+
+	igt_assert(count);
+	balancer.num_siblings = count;
+	memcpy(balancer.engines, ci, count * sizeof(*ci));
+
+	memset(&engines, 0, sizeof(engines));
+	engines.extensions = to_user_pointer(&balancer);
+	engines.engines[0].engine_class =
+		I915_ENGINE_CLASS_INVALID;
+	engines.engines[0].engine_instance =
+		I915_ENGINE_CLASS_INVALID_NONE;
+	memcpy(engines.engines + 1, ci, count * sizeof(*ci));
+
+	return __gem_context_set_param(i915, &p);
+}
+
+static void set_load_balancer(int i915, uint32_t ctx,
+			      const struct i915_engine_class_instance *ci,
+			      unsigned int count)
+{
+	igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0);
+}
+
+static uint32_t load_balancer_create(int i915,
+				     const struct i915_engine_class_instance *ci,
+				     unsigned int count)
+{
+	uint32_t ctx;
+
+	ctx = gem_context_create(i915);
+	set_load_balancer(i915, ctx, ci, count);
+
+	return ctx;
+}
+
+static uint32_t __batch_create(int i915, uint32_t offset)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	uint32_t handle;
+
+	handle = gem_create(i915, ALIGN(offset + 4, 4096));
+	gem_write(i915, handle, offset, &bbe, sizeof(bbe));
+
+	return handle;
+}
+
+static uint32_t batch_create(int i915)
+{
+	return __batch_create(i915, 0);
+}
+
+static void invalid_balancer(int i915)
+{
+	I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, 64);
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 64);
+	struct drm_i915_gem_context_param p = {
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.value = to_user_pointer(&engines)
+	};
+	uint32_t handle;
+	void *ptr;
+
+	/*
+	 * Assume that I915_CONTEXT_PARAM_ENGINE validates the array
+	 * of engines[], our job is to determine if the load_balancer
+	 * extension explodes.
+	 */
+
+	for (int class = 0; class < 32; class++) {
+		struct i915_engine_class_instance *ci;
+		unsigned int count;
+
+		ci = list_engines(i915, 1 << class, &count);
+		if (!ci)
+			continue;
+
+		igt_debug("Found %d engines\n", count);
+		igt_assert_lte(count, 64);
+
+		p.ctx_id = gem_context_create(i915);
+		p.size = (sizeof(struct i915_context_param_engines) +
+				(count + 1) * sizeof(*engines.engines));
+
+		memset(&engines, 0, sizeof(engines));
+		engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
+		engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
+		memcpy(engines.engines + 1, ci, count * sizeof(*ci));
+		gem_context_set_param(i915, &p);
+
+		engines.extensions = -1ull;
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+		engines.extensions = 1ull;
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+		memset(&balancer, 0, sizeof(balancer));
+		balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
+		balancer.num_siblings = count;
+		memcpy(balancer.engines, ci, count * sizeof(*ci));
+
+		engines.extensions = to_user_pointer(&balancer);
+		gem_context_set_param(i915, &p);
+
+		balancer.engine_index = 1;
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
+
+		balancer.engine_index = count;
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
+
+		balancer.engine_index = count + 1;
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EINVAL);
+
+		balancer.engine_index = 0;
+		gem_context_set_param(i915, &p);
+
+		balancer.base.next_extension = to_user_pointer(&balancer);
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
+
+		balancer.base.next_extension = -1ull;
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+		handle = gem_create(i915, 4096 * 3);
+		ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE);
+		gem_close(i915, handle);
+
+		memset(&engines, 0, sizeof(engines));
+		engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
+		engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
+		engines.engines[1].engine_class = I915_ENGINE_CLASS_INVALID;
+		engines.engines[1].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
+		memcpy(engines.engines + 2, ci, count * sizeof(ci));
+		p.size = (sizeof(struct i915_context_param_engines) +
+				(count + 2) * sizeof(*engines.engines));
+		gem_context_set_param(i915, &p);
+
+		balancer.base.next_extension = 0;
+		balancer.engine_index = 1;
+		engines.extensions = to_user_pointer(&balancer);
+		gem_context_set_param(i915, &p);
+
+		memcpy(ptr + 4096 - 8, &balancer, sizeof(balancer));
+		memcpy(ptr + 8192 - 8, &balancer, sizeof(balancer));
+		balancer.engine_index = 0;
+
+		engines.extensions = to_user_pointer(ptr) + 4096 - 8;
+		gem_context_set_param(i915, &p);
+
+		balancer.base.next_extension = engines.extensions;
+		engines.extensions = to_user_pointer(&balancer);
+		gem_context_set_param(i915, &p);
+
+		munmap(ptr, 4096);
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+		engines.extensions = to_user_pointer(ptr) + 4096 - 8;
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+		engines.extensions = to_user_pointer(ptr) + 8192 - 8;
+		gem_context_set_param(i915, &p);
+
+		balancer.base.next_extension = engines.extensions;
+		engines.extensions = to_user_pointer(&balancer);
+		gem_context_set_param(i915, &p);
+
+		munmap(ptr + 8192, 4096);
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+		engines.extensions = to_user_pointer(ptr) + 8192 - 8;
+		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+		munmap(ptr + 4096, 4096);
+
+		gem_context_destroy(i915, p.ctx_id);
+		free(ci);
+	}
+}
+
+static void kick_kthreads(int period_us)
+{
+	sched_yield();
+	usleep(period_us);
+}
+
+static double measure_load(int pmu, int period_us)
+{
+	uint64_t data[2];
+	uint64_t d_t, d_v;
+
+	kick_kthreads(period_us);
+
+	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+	d_v = -data[0];
+	d_t = -data[1];
+
+	usleep(period_us);
+
+	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+	d_v += data[0];
+	d_t += data[1];
+
+	return d_v / (double)d_t;
+}
+
+static double measure_min_load(int pmu, unsigned int num, int period_us)
+{
+	uint64_t data[2 + num];
+	uint64_t d_t, d_v[num];
+	uint64_t min = -1, max = 0;
+
+	kick_kthreads(period_us);
+
+	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+	for (unsigned int n = 0; n < num; n++)
+		d_v[n] = -data[2 + n];
+	d_t = -data[1];
+
+	usleep(period_us);
+
+	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+
+	d_t += data[1];
+	for (unsigned int n = 0; n < num; n++) {
+		d_v[n] += data[2 + n];
+		igt_debug("engine[%d]: %.1f%%\n",
+			  n, d_v[n] / (double)d_t * 100);
+		if (d_v[n] < min)
+			min = d_v[n];
+		if (d_v[n] > max)
+			max = d_v[n];
+	}
+
+	igt_debug("elapsed: %"PRIu64"ns, load [%.1f, %.1f]%%\n",
+		  d_t, min / (double)d_t * 100,  max / (double)d_t * 100);
+
+	return min / (double)d_t;
+}
+
+static void check_individual_engine(int i915,
+				    uint32_t ctx,
+				    const struct i915_engine_class_instance *ci,
+				    int idx)
+{
+	igt_spin_t *spin;
+	double load;
+	int pmu;
+
+	pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(ci[idx].engine_class,
+						  ci[idx].engine_instance));
+
+	spin = igt_spin_new(i915, .ctx = ctx, .engine = idx + 1);
+	load = measure_load(pmu, 10000);
+	igt_spin_free(i915, spin);
+
+	close(pmu);
+
+	igt_assert_f(load > 0.90,
+		     "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
+		     idx, ci[idx].engine_class, ci[idx].engine_instance, load*100);
+}
+
+static void individual(int i915)
+{
+	uint32_t ctx;
+
+	/*
+	 * I915_CONTEXT_PARAM_ENGINE allows us to index into the user
+	 * supplied array from gem_execbuf(). Our check is to build the
+	 * ctx->engine[] with various different engine classes, feed in
+	 * a spinner and then ask pmu to confirm it the expected engine
+	 * was busy.
+	 */
+
+	ctx = gem_context_create(i915);
+
+	for (int mask = 0; mask < 32; mask++) {
+		struct i915_engine_class_instance *ci;
+		unsigned int count;
+
+		ci = list_engines(i915, 1u << mask, &count);
+		if (!ci)
+			continue;
+
+		igt_debug("Found %d engines of class %d\n", count, mask);
+
+		for (int pass = 0; pass < count; pass++) { /* approx. count! */
+			igt_permute_array(ci, count, igt_exchange_int64);
+			set_load_balancer(i915, ctx, ci, count);
+			for (unsigned int n = 0; n < count; n++)
+				check_individual_engine(i915, ctx, ci, n);
+		}
+
+		free(ci);
+	}
+
+	gem_context_destroy(i915, ctx);
+	gem_quiescent_gpu(i915);
+}
+
+static void indicies(int i915)
+{
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
+	struct drm_i915_gem_context_param p = {
+		.ctx_id = gem_context_create(i915),
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.value = to_user_pointer(&engines)
+	};
+
+	struct drm_i915_gem_exec_object2 batch = {
+		.handle = batch_create(i915),
+	};
+
+	unsigned int nengines = 0;
+	void *balancers = NULL;
+
+	/*
+	 * We can populate our engine map with multiple virtual engines.
+	 * Do so.
+	 */
+
+	for (int class = 0; class < 32; class++) {
+		struct i915_engine_class_instance *ci;
+		unsigned int count;
+
+		ci = list_engines(i915, 1u << class, &count);
+		if (!ci)
+			continue;
+
+		igt_debug("Found %d engines of class %d\n", count, class);
+
+		for (int n = 0; n < count; n++) {
+			I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(*balancer,
+								 count);
+
+			engines.engines[nengines].engine_class =
+				I915_ENGINE_CLASS_INVALID;
+			engines.engines[nengines].engine_instance =
+				I915_ENGINE_CLASS_INVALID_NONE;
+
+			balancer = calloc(sizeof(*balancer), 1);
+			igt_assert(balancer);
+
+			balancer->base.name =
+				I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
+			balancer->base.next_extension =
+				to_user_pointer(balancers);
+			balancers = balancer;
+
+			balancer->engine_index = nengines++;
+			balancer->num_siblings = count;
+
+			memcpy(balancer->engines,
+			       ci, count * sizeof(*ci));
+		}
+		free(ci);
+	}
+
+	igt_require(balancers);
+	engines.extensions = to_user_pointer(balancers);
+	p.size = (sizeof(struct i915_engine_class_instance) * nengines +
+		  sizeof(struct i915_context_param_engines));
+	gem_context_set_param(i915, &p);
+
+	for (unsigned int n = 0; n < nengines; n++) {
+		struct drm_i915_gem_execbuffer2 eb = {
+			.buffers_ptr = to_user_pointer(&batch),
+			.buffer_count = 1,
+			.flags = n,
+			.rsvd1 = p.ctx_id,
+		};
+		igt_debug("Executing on index=%d\n", n);
+		gem_execbuf(i915, &eb);
+	}
+	gem_context_destroy(i915, p.ctx_id);
+
+	gem_sync(i915, batch.handle);
+	gem_close(i915, batch.handle);
+
+	while (balancers) {
+		struct i915_context_engines_load_balance *b, *n;
+
+		b = balancers;
+		n = from_user_pointer(b->base.next_extension);
+		free(b);
+
+		balancers = n;
+	}
+
+	gem_quiescent_gpu(i915);
+}
+
+static void busy(int i915)
+{
+	uint32_t scratch = gem_create(i915, 4096);
+
+	/*
+	 * Check that virtual engines are reported via GEM_BUSY.
+	 *
+	 * When running, the batch will be on the real engine and report
+	 * the actual class.
+	 *
+	 * Prior to running, if the load-balancer is across multiple
+	 * classes we don't know which engine the batch will
+	 * execute on, so we report them all!
+	 *
+	 * However, as we only support (and test) creating a load-balancer
+	 * from engines of only one class, that can be propagated accurately
+	 * through to GEM_BUSY.
+	 */
+
+	for (int class = 0; class < 16; class++) {
+		struct drm_i915_gem_busy busy;
+		struct i915_engine_class_instance *ci;
+		unsigned int count;
+		igt_spin_t *spin[2];
+		uint32_t ctx;
+
+		ci = list_engines(i915, 1u << class, &count);
+		if (!ci)
+			continue;
+
+		igt_debug("Found %d engines of class %d\n", count, class);
+		ctx = load_balancer_create(i915, ci, count);
+		free(ci);
+
+		spin[0] = __igt_spin_new(i915,
+					 .ctx = ctx,
+					 .flags = IGT_SPIN_POLL_RUN);
+		spin[1] = __igt_spin_new(i915,
+					 .ctx = ctx,
+					 .dependency = scratch);
+
+		igt_spin_busywait_until_started(spin[0]);
+
+		/* Running: actual class */
+		busy.handle = spin[0]->handle;
+		do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
+		igt_assert_eq_u32(busy.busy, 1u << (class + 16));
+
+		/* Queued(read): expected class */
+		busy.handle = spin[1]->handle;
+		do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
+		igt_assert_eq_u32(busy.busy, 1u << (class + 16));
+
+		/* Queued(write): expected class */
+		busy.handle = scratch;
+		do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
+		igt_assert_eq_u32(busy.busy,
+				  (1u << (class + 16)) | (class + 1));
+
+		igt_spin_free(i915, spin[1]);
+		igt_spin_free(i915, spin[0]);
+
+		gem_context_destroy(i915, ctx);
+	}
+
+	gem_close(i915, scratch);
+	gem_quiescent_gpu(i915);
+}
+
+static int add_pmu(int pmu, const struct i915_engine_class_instance *ci)
+{
+	return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->engine_class,
+							 ci->engine_instance),
+				    pmu);
+}
+
+static void full(int i915, unsigned int flags)
+#define PULSE 0x1
+#define LATE 0x2
+{
+	struct drm_i915_gem_exec_object2 batch = {
+		.handle = batch_create(i915),
+	};
+
+	if (flags & LATE)
+		igt_require_sw_sync();
+
+	/*
+	 * I915_CONTEXT_PARAM_ENGINE changes the meaning of I915_EXEC_DEFAULT
+	 * to provide an automatic selection from the ctx->engine[]. It
+	 * employs load-balancing to evenly distribute the workload the
+	 * array. If we submit N spinners, we expect them to be simultaneously
+	 * running across N engines and use PMU to confirm that the entire
+	 * set of engines are busy.
+	 *
+	 * We complicate matters by interpersing shortlived tasks to challenge
+	 * the kernel to search for space in which to insert new batches.
+	 */
+
+
+	for (int mask = 0; mask < 32; mask++) {
+		struct i915_engine_class_instance *ci;
+		igt_spin_t *spin = NULL;
+		IGT_CORK_FENCE(cork);
+		unsigned int count;
+		double load;
+		int fence = -1;
+		int *pmu;
+
+		ci = list_engines(i915, 1u << mask, &count);
+		if (!ci)
+			continue;
+
+		igt_debug("Found %d engines of class %d\n", count, mask);
+
+		pmu = malloc(sizeof(*pmu) * count);
+		igt_assert(pmu);
+
+		if (flags & LATE)
+			fence = igt_cork_plug(&cork, i915);
+
+		pmu[0] = -1;
+		for (unsigned int n = 0; n < count; n++) {
+			uint32_t ctx;
+
+			pmu[n] = add_pmu(pmu[0], &ci[n]);
+
+			if (flags & PULSE) {
+				struct drm_i915_gem_execbuffer2 eb = {
+					.buffers_ptr = to_user_pointer(&batch),
+					.buffer_count = 1,
+					.rsvd2 = fence,
+					.flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
+				};
+				gem_execbuf(i915, &eb);
+			}
+
+			/*
+			 * Each spinner needs to be one a new timeline,
+			 * otherwise they will just sit in the single queue
+			 * and not run concurrently.
+			 */
+			ctx = load_balancer_create(i915, ci, count);
+
+			if (spin == NULL) {
+				spin = __igt_spin_new(i915, .ctx = ctx);
+			} else {
+				struct drm_i915_gem_execbuffer2 eb = {
+					.buffers_ptr = spin->execbuf.buffers_ptr,
+					.buffer_count = spin->execbuf.buffer_count,
+					.rsvd1 = ctx,
+					.rsvd2 = fence,
+					.flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
+				};
+				gem_execbuf(i915, &eb);
+			}
+
+			gem_context_destroy(i915, ctx);
+		}
+
+		if (flags & LATE) {
+			igt_cork_unplug(&cork);
+			close(fence);
+		}
+
+		load = measure_min_load(pmu[0], count, 10000);
+		igt_spin_free(i915, spin);
+
+		close(pmu[0]);
+		free(pmu);
+
+		free(ci);
+
+		igt_assert_f(load > 0.90,
+			     "minimum load for %d x class:%d was found to be only %.1f%% busy\n",
+			     count, mask, load*100);
+		gem_quiescent_gpu(i915);
+	}
+
+	gem_close(i915, batch.handle);
+	gem_quiescent_gpu(i915);
+}
+
+static void nop(int i915)
+{
+	struct drm_i915_gem_exec_object2 batch = {
+		.handle = batch_create(i915),
+	};
+
+	for (int mask = 0; mask < 32; mask++) {
+		struct i915_engine_class_instance *ci;
+		unsigned int count;
+		uint32_t ctx;
+
+		ci = list_engines(i915, 1u << mask, &count);
+		if (!ci)
+			continue;
+
+		if (count < 2) {
+			free(ci);
+			continue;
+		}
+
+		igt_debug("Found %d engines of class %d\n", count, mask);
+		ctx = load_balancer_create(i915, ci, count);
+
+		for (int n = 0; n < count; n++) {
+			struct drm_i915_gem_execbuffer2 execbuf = {
+				.buffers_ptr = to_user_pointer(&batch),
+				.buffer_count = 1,
+				.flags = n + 1,
+				.rsvd1 = ctx,
+			};
+			struct timespec tv = {};
+			unsigned long nops;
+			double t;
+
+			igt_nsec_elapsed(&tv);
+			nops = 0;
+			do {
+				for (int r = 0; r < 1024; r++)
+					gem_execbuf(i915, &execbuf);
+				nops += 1024;
+			} while (igt_seconds_elapsed(&tv) < 2);
+			gem_sync(i915, batch.handle);
+
+			t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
+			igt_info("%x:%d %.3fus\n", mask, n, t);
+		}
+
+		{
+			struct drm_i915_gem_execbuffer2 execbuf = {
+				.buffers_ptr = to_user_pointer(&batch),
+				.buffer_count = 1,
+				.rsvd1 = ctx,
+			};
+			struct timespec tv = {};
+			unsigned long nops;
+			double t;
+
+			igt_nsec_elapsed(&tv);
+			nops = 0;
+			do {
+				for (int r = 0; r < 1024; r++)
+					gem_execbuf(i915, &execbuf);
+				nops += 1024;
+			} while (igt_seconds_elapsed(&tv) < 2);
+			gem_sync(i915, batch.handle);
+
+			t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
+			igt_info("%x:* %.3fus\n", mask, t);
+		}
+
+
+		igt_fork(child, count) {
+			struct drm_i915_gem_execbuffer2 execbuf = {
+				.buffers_ptr = to_user_pointer(&batch),
+				.buffer_count = 1,
+				.flags = child + 1,
+				.rsvd1 = gem_context_clone(i915, ctx,
+							   I915_CONTEXT_CLONE_ENGINES, 0),
+			};
+			struct timespec tv = {};
+			unsigned long nops;
+			double t;
+
+			igt_nsec_elapsed(&tv);
+			nops = 0;
+			do {
+				for (int r = 0; r < 1024; r++)
+					gem_execbuf(i915, &execbuf);
+				nops += 1024;
+			} while (igt_seconds_elapsed(&tv) < 2);
+			gem_sync(i915, batch.handle);
+
+			t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
+			igt_info("[%d] %x:%d %.3fus\n", child, mask, child, t);
+
+			memset(&tv, 0, sizeof(tv));
+			execbuf.flags = 0;
+
+			igt_nsec_elapsed(&tv);
+			nops = 0;
+			do {
+				for (int r = 0; r < 1024; r++)
+					gem_execbuf(i915, &execbuf);
+				nops += 1024;
+			} while (igt_seconds_elapsed(&tv) < 2);
+			gem_sync(i915, batch.handle);
+
+			t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
+			igt_info("[%d] %x:* %.3fus\n", child, mask, t);
+
+			gem_context_destroy(i915, execbuf.rsvd1);
+		}
+
+		igt_waitchildren();
+
+		gem_context_destroy(i915, ctx);
+		free(ci);
+	}
+
+	gem_close(i915, batch.handle);
+	gem_quiescent_gpu(i915);
+}
+
+static void ping(int i915, uint32_t ctx, unsigned int engine)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = engine,
+		.rsvd1 = ctx,
+	};
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+	gem_close(i915, obj.handle);
+}
+
+static void semaphore(int i915)
+{
+	uint32_t block[2], scratch;
+	igt_spin_t *spin[3];
+
+	/*
+	 * If we are using HW semaphores to launch serialised requests
+	 * on different engine concurrently, we want to verify that real
+	 * work is unimpeded.
+	 */
+	igt_require(gem_scheduler_has_preemption(i915));
+
+	block[0] = gem_context_create(i915);
+	block[1] = gem_context_create(i915);
+
+	scratch = gem_create(i915, 4096);
+	spin[2] = igt_spin_new(i915, .dependency = scratch);
+	for (int mask = 1; mask < 32; mask++) {
+		struct i915_engine_class_instance *ci;
+		unsigned int count;
+		uint32_t vip;
+
+		ci = list_engines(i915, 1u << mask, &count);
+		if (!ci)
+			continue;
+
+		if (count < ARRAY_SIZE(block))
+			continue;
+
+		/* Ensure that we completely occupy all engines in this group */
+		count = ARRAY_SIZE(block);
+
+		for (int i = 0; i < count; i++) {
+			set_load_balancer(i915, block[i], ci, count);
+			spin[i] = __igt_spin_new(i915,
+						       .ctx = block[i],
+						       .dependency = scratch);
+		}
+
+		/*
+		 * Either we haven't blocked both engines with semaphores,
+		 * or we let the vip through. If not, we hang.
+		 */
+		vip = gem_context_create(i915);
+		set_load_balancer(i915, vip, ci, count);
+		ping(i915, vip, 0);
+		gem_context_destroy(i915, vip);
+
+		for (int i = 0; i < count; i++)
+			igt_spin_free(i915, spin[i]);
+
+		free(ci);
+	}
+	igt_spin_free(i915, spin[2]);
+	gem_close(i915, scratch);
+
+	gem_context_destroy(i915, block[1]);
+	gem_context_destroy(i915, block[0]);
+
+	gem_quiescent_gpu(i915);
+}
+
+static void smoketest(int i915, int timeout)
+{
+	struct drm_i915_gem_exec_object2 batch[2] = {
+		{ .handle = __batch_create(i915, 16380) }
+	};
+	unsigned int ncontext = 0;
+	uint32_t *contexts = NULL;
+	uint32_t *handles = NULL;
+
+	igt_require_sw_sync();
+
+	for (int mask = 0; mask < 32; mask++) {
+		struct i915_engine_class_instance *ci;
+		unsigned int count = 0;
+
+		ci = list_engines(i915, 1u << mask, &count);
+		if (!ci || count < 2) {
+			free(ci);
+			continue;
+		}
+
+		igt_debug("Found %d engines of class %d\n", count, mask);
+
+		ncontext += 128;
+		contexts = realloc(contexts, sizeof(*contexts) * ncontext);
+		igt_assert(contexts);
+
+		for (unsigned int n = ncontext - 128; n < ncontext; n++) {
+			contexts[n] = load_balancer_create(i915, ci, count);
+			igt_assert(contexts[n]);
+		}
+
+		free(ci);
+	}
+	igt_debug("Created %d virtual engines (one per context)\n", ncontext);
+	igt_require(ncontext);
+
+	contexts = realloc(contexts, sizeof(*contexts) * ncontext * 4);
+	igt_assert(contexts);
+	memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
+	ncontext *= 2;
+	memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
+	ncontext *= 2;
+
+	handles = malloc(sizeof(*handles) * ncontext);
+	igt_assert(handles);
+	for (unsigned int n = 0; n < ncontext; n++)
+		handles[n] = gem_create(i915, 4096);
+
+	igt_until_timeout(timeout) {
+		unsigned int count = 1 + (rand() % (ncontext - 1));
+		IGT_CORK_FENCE(cork);
+		int fence = igt_cork_plug(&cork, i915);
+
+		for (unsigned int n = 0; n < count; n++) {
+			struct drm_i915_gem_execbuffer2 eb = {
+				.buffers_ptr = to_user_pointer(batch),
+				.buffer_count = ARRAY_SIZE(batch),
+				.rsvd1 = contexts[n],
+				.rsvd2 = fence,
+				.flags = I915_EXEC_BATCH_FIRST | I915_EXEC_FENCE_IN,
+			};
+			batch[1].handle = handles[n];
+			gem_execbuf(i915, &eb);
+		}
+		igt_permute_array(handles, count, igt_exchange_int);
+
+		igt_cork_unplug(&cork);
+		for (unsigned int n = 0; n < count; n++)
+			gem_sync(i915, handles[n]);
+
+		close(fence);
+	}
+
+	for (unsigned int n = 0; n < ncontext; n++) {
+		gem_close(i915, handles[n]);
+		__gem_context_destroy(i915, contexts[n]);
+	}
+	free(handles);
+	free(contexts);
+	gem_close(i915, batch[0].handle);
+}
+
+static bool has_context_engines(int i915)
+{
+	struct drm_i915_gem_context_param p = {
+		.param = I915_CONTEXT_PARAM_ENGINES,
+	};
+
+	return __gem_context_set_param(i915, &p) == 0;
+}
+
+static bool has_load_balancer(int i915)
+{
+	struct i915_engine_class_instance ci = {};
+	uint32_t ctx;
+	int err;
+
+	ctx = gem_context_create(i915);
+	err = __set_load_balancer(i915, ctx, &ci, 1);
+	gem_context_destroy(i915, ctx);
+
+	return err == 0;
+}
+
+igt_main
+{
+	int i915 = -1;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		i915 = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(i915);
+
+		gem_require_contexts(i915);
+		igt_require(has_context_engines(i915));
+		igt_require(has_load_balancer(i915));
+
+		igt_fork_hang_detector(i915);
+	}
+
+	igt_subtest("invalid-balancer")
+		invalid_balancer(i915);
+
+	igt_subtest("individual")
+		individual(i915);
+
+	igt_subtest("indicies")
+		indicies(i915);
+
+	igt_subtest("busy")
+		busy(i915);
+
+	igt_subtest_group {
+		static const struct {
+			const char *name;
+			unsigned int flags;
+		} phases[] = {
+			{ "", 0 },
+			{ "-pulse", PULSE },
+			{ "-late", LATE },
+			{ "-late-pulse", PULSE | LATE },
+			{ }
+		};
+		for (typeof(*phases) *p = phases; p->name; p++)
+			igt_subtest_f("full%s", p->name)
+				full(i915, p->flags);
+	}
+
+	igt_subtest("nop")
+		nop(i915);
+
+	igt_subtest("semaphore")
+		semaphore(i915);
+
+	igt_subtest("smoke")
+		smoketest(i915, 20);
+
+	igt_fixture {
+		igt_stop_hang_detector();
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index 7e0089e74..eeea3611d 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -288,6 +288,13 @@ test_executables += executable('gem_eio',
 	   install : true)
 test_list += 'gem_eio'
 
+test_executables += executable('gem_exec_balancer', 'i915/gem_exec_balancer.c',
+	   dependencies : test_deps + [ lib_igt_perf ],
+	   install_dir : libexecdir,
+	   install_rpath : libexecdir_rpathdir,
+	   install : true)
+test_progs += 'gem_exec_balancer'
+
 test_executables += executable('gem_mocs_settings',
 	   join_paths('i915', 'gem_mocs_settings.c'),
 	   dependencies : test_deps + [ lib_igt_perf ],
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH i-g-t 14/16] i915/gem_exec_balancer: Exercise bonded pairs
  2019-05-08 10:09 ` [igt-dev] " Chris Wilson
@ 2019-05-08 10:09   ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

The submit-fence + load_balancing apis allow for us to execute a named
pair of engines in parallel; that this by submitting a request to one
engine, we can then use the generated submit-fence to submit a second
request to another engine and have it execute at the same time.
Furthermore, by specifying bonded pairs, we can direct the virtual
engine to use a particular engine in parallel to the first request.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_exec_balancer.c | 234 +++++++++++++++++++++++++++++++--
 1 file changed, 224 insertions(+), 10 deletions(-)

diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
index 25195d478..20ad66727 100644
--- a/tests/i915/gem_exec_balancer.c
+++ b/tests/i915/gem_exec_balancer.c
@@ -98,9 +98,35 @@ list_engines(int i915, uint32_t class_mask, unsigned int *out)
 	return engines;
 }
 
+static int __set_engines(int i915, uint32_t ctx,
+			 const struct i915_engine_class_instance *ci,
+			 unsigned int count)
+{
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, count);
+	struct drm_i915_gem_context_param p = {
+		.ctx_id = ctx,
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.size = sizeof(engines),
+		.value = to_user_pointer(&engines)
+	};
+
+	engines.extensions = 0;
+	memcpy(engines.engines, ci, sizeof(engines.engines));
+
+	return __gem_context_set_param(i915, &p);
+}
+
+static void set_engines(int i915, uint32_t ctx,
+			const struct i915_engine_class_instance *ci,
+			unsigned int count)
+{
+	igt_assert_eq(__set_engines(i915, ctx, ci, count), 0);
+}
+
 static int __set_load_balancer(int i915, uint32_t ctx,
 			       const struct i915_engine_class_instance *ci,
-			       unsigned int count)
+			       unsigned int count,
+			       void *ext)
 {
 	I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, count);
 	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1 + count);
@@ -113,6 +139,7 @@ static int __set_load_balancer(int i915, uint32_t ctx,
 
 	memset(&balancer, 0, sizeof(balancer));
 	balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
+	balancer.base.next_extension = to_user_pointer(ext);
 
 	igt_assert(count);
 	balancer.num_siblings = count;
@@ -131,9 +158,10 @@ static int __set_load_balancer(int i915, uint32_t ctx,
 
 static void set_load_balancer(int i915, uint32_t ctx,
 			      const struct i915_engine_class_instance *ci,
-			      unsigned int count)
+			      unsigned int count,
+			      void *ext)
 {
-	igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0);
+	igt_assert_eq(__set_load_balancer(i915, ctx, ci, count, ext), 0);
 }
 
 static uint32_t load_balancer_create(int i915,
@@ -143,7 +171,7 @@ static uint32_t load_balancer_create(int i915,
 	uint32_t ctx;
 
 	ctx = gem_context_create(i915);
-	set_load_balancer(i915, ctx, ci, count);
+	set_load_balancer(i915, ctx, ci, count, NULL);
 
 	return ctx;
 }
@@ -288,6 +316,74 @@ static void invalid_balancer(int i915)
 	}
 }
 
+static void invalid_bonds(int i915)
+{
+	I915_DEFINE_CONTEXT_ENGINES_BOND(bonds[16], 1);
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1);
+	struct drm_i915_gem_context_param p = {
+		.ctx_id = gem_context_create(i915),
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.value = to_user_pointer(&engines),
+		.size = sizeof(engines),
+	};
+	uint32_t handle;
+	void *ptr;
+
+	memset(&engines, 0, sizeof(engines));
+	gem_context_set_param(i915, &p);
+
+	memset(bonds, 0, sizeof(bonds));
+	for (int n = 0; n < ARRAY_SIZE(bonds); n++) {
+		bonds[n].base.name = I915_CONTEXT_ENGINES_EXT_BOND;
+		bonds[n].base.next_extension =
+			n ? to_user_pointer(&bonds[n - 1]) : 0;
+		bonds[n].num_bonds = 1;
+	}
+	engines.extensions = to_user_pointer(&bonds);
+	gem_context_set_param(i915, &p);
+
+	bonds[0].base.next_extension = -1ull;
+	igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+	bonds[0].base.next_extension = to_user_pointer(&bonds[0]);
+	igt_assert_eq(__gem_context_set_param(i915, &p), -E2BIG);
+
+	engines.extensions = to_user_pointer(&bonds[1]);
+	igt_assert_eq(__gem_context_set_param(i915, &p), -E2BIG);
+	bonds[0].base.next_extension = 0;
+	gem_context_set_param(i915, &p);
+
+	handle = gem_create(i915, 4096 * 3);
+	ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE);
+	gem_close(i915, handle);
+
+	memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
+	engines.extensions = to_user_pointer(ptr) + 4096;
+	gem_context_set_param(i915, &p);
+
+	memcpy(ptr, &bonds[0], sizeof(bonds[0]));
+	bonds[0].base.next_extension = to_user_pointer(ptr);
+	memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
+	gem_context_set_param(i915, &p);
+
+	munmap(ptr, 4096);
+	igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+	bonds[0].base.next_extension = 0;
+	memcpy(ptr + 8192, &bonds[0], sizeof(bonds[0]));
+	bonds[0].base.next_extension = to_user_pointer(ptr) + 8192;
+	memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
+	gem_context_set_param(i915, &p);
+
+	munmap(ptr + 8192, 4096);
+	igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+	munmap(ptr + 4096, 4096);
+	igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+	gem_context_destroy(i915, p.ctx_id);
+}
+
 static void kick_kthreads(int period_us)
 {
 	sched_yield();
@@ -397,7 +493,7 @@ static void individual(int i915)
 
 		for (int pass = 0; pass < count; pass++) { /* approx. count! */
 			igt_permute_array(ci, count, igt_exchange_int64);
-			set_load_balancer(i915, ctx, ci, count);
+			set_load_balancer(i915, ctx, ci, count, NULL);
 			for (unsigned int n = 0; n < count; n++)
 				check_individual_engine(i915, ctx, ci, n);
 		}
@@ -409,6 +505,115 @@ static void individual(int i915)
 	gem_quiescent_gpu(i915);
 }
 
+static void bonded(int i915, unsigned int flags)
+#define CORK 0x1
+{
+	I915_DEFINE_CONTEXT_ENGINES_BOND(bonds[16], 1);
+	struct i915_engine_class_instance *master_engines;
+	uint32_t master;
+
+	/*
+	 * I915_CONTEXT_PARAM_ENGINE provides an extension that allows us
+	 * to specify which engine(s) to pair with a parallel (EXEC_SUBMIT)
+	 * request submitted to another engine.
+	 */
+
+	master = gem_queue_create(i915);
+
+	memset(bonds, 0, sizeof(bonds));
+	for (int n = 0; n < ARRAY_SIZE(bonds); n++) {
+		bonds[n].base.name = I915_CONTEXT_ENGINES_EXT_BOND;
+		bonds[n].base.next_extension =
+			n ? to_user_pointer(&bonds[n - 1]) : 0;
+		bonds[n].num_bonds = 1;
+	}
+
+	for (int mask = 0; mask < 32; mask++) {
+		unsigned int count, limit;
+		struct i915_engine_class_instance *siblings;
+		uint32_t ctx;
+		int n;
+
+		siblings = list_engines(i915, 1u << mask, &count);
+		if (!siblings)
+			continue;
+
+		if (count < 2) {
+			free(siblings);
+			continue;
+		}
+
+		igt_debug("Found %d engines of class %d\n", count, mask);
+
+		master_engines = list_engines(i915, ~(1u << mask), &limit);
+		set_engines(i915, master, master_engines, limit);
+
+		limit = min(count, limit);
+		for (n = 0; n < limit; n++) {
+			bonds[n].master = master_engines[n];
+			bonds[n].engines[0] = siblings[n];
+		}
+
+		ctx = gem_context_clone(i915,
+				       	master, I915_CONTEXT_CLONE_VM,
+					I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
+		set_load_balancer(i915, ctx, siblings, count, &bonds[limit - 1]);
+
+		for (n = 0; n < limit; n++) {
+			struct drm_i915_gem_execbuffer2 eb;
+			IGT_CORK_HANDLE(cork);
+			igt_spin_t *spin, *plug;
+			double load;
+			int pmu;
+
+			igt_assert(siblings[n].engine_class != master_engines[n].engine_class);
+
+			pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(siblings[n].engine_class,
+								  siblings[n].engine_instance));
+
+			plug = NULL;
+			if (flags & CORK) {
+				plug = __igt_spin_new(i915,
+						      .ctx = master,
+						      .engine = n,
+						      .dependency = igt_cork_plug(&cork, i915));
+			}
+
+			spin = __igt_spin_new(i915,
+					      .ctx = master,
+					      .engine = n,
+					      .flags = IGT_SPIN_FENCE_OUT);
+
+			eb = spin->execbuf;
+			eb.rsvd1 = ctx;
+			eb.rsvd2 = spin->out_fence;
+			eb.flags = I915_EXEC_FENCE_SUBMIT;
+			gem_execbuf(i915, &eb);
+
+			if (plug) {
+				igt_cork_unplug(&cork);
+				igt_spin_free(i915, plug);
+			}
+
+			load = measure_load(pmu, 10000);
+			igt_spin_free(i915, spin);
+
+			close(pmu);
+
+			igt_assert_f(load > 0.90,
+				     "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
+				     n, siblings[n].engine_class, siblings[n].engine_instance,
+				     load*100);
+		}
+
+		gem_context_destroy(i915, ctx);
+		free(master_engines);
+		free(siblings);
+	}
+
+	gem_context_destroy(i915, master);
+}
+
 static void indicies(int i915)
 {
 	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
@@ -856,10 +1061,10 @@ static void semaphore(int i915)
 		count = ARRAY_SIZE(block);
 
 		for (int i = 0; i < count; i++) {
-			set_load_balancer(i915, block[i], ci, count);
+			set_load_balancer(i915, block[i], ci, count, NULL);
 			spin[i] = __igt_spin_new(i915,
-						       .ctx = block[i],
-						       .dependency = scratch);
+						 .ctx = block[i],
+						 .dependency = scratch);
 		}
 
 		/*
@@ -867,7 +1072,7 @@ static void semaphore(int i915)
 		 * or we let the vip through. If not, we hang.
 		 */
 		vip = gem_context_create(i915);
-		set_load_balancer(i915, vip, ci, count);
+		set_load_balancer(i915, vip, ci, count, NULL);
 		ping(i915, vip, 0);
 		gem_context_destroy(i915, vip);
 
@@ -984,7 +1189,7 @@ static bool has_load_balancer(int i915)
 	int err;
 
 	ctx = gem_context_create(i915);
-	err = __set_load_balancer(i915, ctx, &ci, 1);
+	err = __set_load_balancer(i915, ctx, &ci, 1, NULL);
 	gem_context_destroy(i915, ctx);
 
 	return err == 0;
@@ -1010,6 +1215,9 @@ igt_main
 	igt_subtest("invalid-balancer")
 		invalid_balancer(i915);
 
+	igt_subtest("invalid-bonds")
+		invalid_bonds(i915);
+
 	igt_subtest("individual")
 		individual(i915);
 
@@ -1044,6 +1252,12 @@ igt_main
 	igt_subtest("smoke")
 		smoketest(i915, 20);
 
+	igt_subtest("bonded-imm")
+		bonded(i915, 0);
+
+	igt_subtest("bonded-cork")
+		bonded(i915, CORK);
+
 	igt_fixture {
 		igt_stop_hang_detector();
 	}
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [igt-dev] [PATCH i-g-t 14/16] i915/gem_exec_balancer: Exercise bonded pairs
@ 2019-05-08 10:09   ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

The submit-fence + load_balancing apis allow for us to execute a named
pair of engines in parallel; that this by submitting a request to one
engine, we can then use the generated submit-fence to submit a second
request to another engine and have it execute at the same time.
Furthermore, by specifying bonded pairs, we can direct the virtual
engine to use a particular engine in parallel to the first request.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_exec_balancer.c | 234 +++++++++++++++++++++++++++++++--
 1 file changed, 224 insertions(+), 10 deletions(-)

diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
index 25195d478..20ad66727 100644
--- a/tests/i915/gem_exec_balancer.c
+++ b/tests/i915/gem_exec_balancer.c
@@ -98,9 +98,35 @@ list_engines(int i915, uint32_t class_mask, unsigned int *out)
 	return engines;
 }
 
+static int __set_engines(int i915, uint32_t ctx,
+			 const struct i915_engine_class_instance *ci,
+			 unsigned int count)
+{
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, count);
+	struct drm_i915_gem_context_param p = {
+		.ctx_id = ctx,
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.size = sizeof(engines),
+		.value = to_user_pointer(&engines)
+	};
+
+	engines.extensions = 0;
+	memcpy(engines.engines, ci, sizeof(engines.engines));
+
+	return __gem_context_set_param(i915, &p);
+}
+
+static void set_engines(int i915, uint32_t ctx,
+			const struct i915_engine_class_instance *ci,
+			unsigned int count)
+{
+	igt_assert_eq(__set_engines(i915, ctx, ci, count), 0);
+}
+
 static int __set_load_balancer(int i915, uint32_t ctx,
 			       const struct i915_engine_class_instance *ci,
-			       unsigned int count)
+			       unsigned int count,
+			       void *ext)
 {
 	I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, count);
 	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1 + count);
@@ -113,6 +139,7 @@ static int __set_load_balancer(int i915, uint32_t ctx,
 
 	memset(&balancer, 0, sizeof(balancer));
 	balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
+	balancer.base.next_extension = to_user_pointer(ext);
 
 	igt_assert(count);
 	balancer.num_siblings = count;
@@ -131,9 +158,10 @@ static int __set_load_balancer(int i915, uint32_t ctx,
 
 static void set_load_balancer(int i915, uint32_t ctx,
 			      const struct i915_engine_class_instance *ci,
-			      unsigned int count)
+			      unsigned int count,
+			      void *ext)
 {
-	igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0);
+	igt_assert_eq(__set_load_balancer(i915, ctx, ci, count, ext), 0);
 }
 
 static uint32_t load_balancer_create(int i915,
@@ -143,7 +171,7 @@ static uint32_t load_balancer_create(int i915,
 	uint32_t ctx;
 
 	ctx = gem_context_create(i915);
-	set_load_balancer(i915, ctx, ci, count);
+	set_load_balancer(i915, ctx, ci, count, NULL);
 
 	return ctx;
 }
@@ -288,6 +316,74 @@ static void invalid_balancer(int i915)
 	}
 }
 
+static void invalid_bonds(int i915)
+{
+	I915_DEFINE_CONTEXT_ENGINES_BOND(bonds[16], 1);
+	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1);
+	struct drm_i915_gem_context_param p = {
+		.ctx_id = gem_context_create(i915),
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.value = to_user_pointer(&engines),
+		.size = sizeof(engines),
+	};
+	uint32_t handle;
+	void *ptr;
+
+	memset(&engines, 0, sizeof(engines));
+	gem_context_set_param(i915, &p);
+
+	memset(bonds, 0, sizeof(bonds));
+	for (int n = 0; n < ARRAY_SIZE(bonds); n++) {
+		bonds[n].base.name = I915_CONTEXT_ENGINES_EXT_BOND;
+		bonds[n].base.next_extension =
+			n ? to_user_pointer(&bonds[n - 1]) : 0;
+		bonds[n].num_bonds = 1;
+	}
+	engines.extensions = to_user_pointer(&bonds);
+	gem_context_set_param(i915, &p);
+
+	bonds[0].base.next_extension = -1ull;
+	igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+	bonds[0].base.next_extension = to_user_pointer(&bonds[0]);
+	igt_assert_eq(__gem_context_set_param(i915, &p), -E2BIG);
+
+	engines.extensions = to_user_pointer(&bonds[1]);
+	igt_assert_eq(__gem_context_set_param(i915, &p), -E2BIG);
+	bonds[0].base.next_extension = 0;
+	gem_context_set_param(i915, &p);
+
+	handle = gem_create(i915, 4096 * 3);
+	ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE);
+	gem_close(i915, handle);
+
+	memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
+	engines.extensions = to_user_pointer(ptr) + 4096;
+	gem_context_set_param(i915, &p);
+
+	memcpy(ptr, &bonds[0], sizeof(bonds[0]));
+	bonds[0].base.next_extension = to_user_pointer(ptr);
+	memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
+	gem_context_set_param(i915, &p);
+
+	munmap(ptr, 4096);
+	igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+	bonds[0].base.next_extension = 0;
+	memcpy(ptr + 8192, &bonds[0], sizeof(bonds[0]));
+	bonds[0].base.next_extension = to_user_pointer(ptr) + 8192;
+	memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
+	gem_context_set_param(i915, &p);
+
+	munmap(ptr + 8192, 4096);
+	igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+	munmap(ptr + 4096, 4096);
+	igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+	gem_context_destroy(i915, p.ctx_id);
+}
+
 static void kick_kthreads(int period_us)
 {
 	sched_yield();
@@ -397,7 +493,7 @@ static void individual(int i915)
 
 		for (int pass = 0; pass < count; pass++) { /* approx. count! */
 			igt_permute_array(ci, count, igt_exchange_int64);
-			set_load_balancer(i915, ctx, ci, count);
+			set_load_balancer(i915, ctx, ci, count, NULL);
 			for (unsigned int n = 0; n < count; n++)
 				check_individual_engine(i915, ctx, ci, n);
 		}
@@ -409,6 +505,115 @@ static void individual(int i915)
 	gem_quiescent_gpu(i915);
 }
 
+static void bonded(int i915, unsigned int flags)
+#define CORK 0x1
+{
+	I915_DEFINE_CONTEXT_ENGINES_BOND(bonds[16], 1);
+	struct i915_engine_class_instance *master_engines;
+	uint32_t master;
+
+	/*
+	 * I915_CONTEXT_PARAM_ENGINE provides an extension that allows us
+	 * to specify which engine(s) to pair with a parallel (EXEC_SUBMIT)
+	 * request submitted to another engine.
+	 */
+
+	master = gem_queue_create(i915);
+
+	memset(bonds, 0, sizeof(bonds));
+	for (int n = 0; n < ARRAY_SIZE(bonds); n++) {
+		bonds[n].base.name = I915_CONTEXT_ENGINES_EXT_BOND;
+		bonds[n].base.next_extension =
+			n ? to_user_pointer(&bonds[n - 1]) : 0;
+		bonds[n].num_bonds = 1;
+	}
+
+	for (int mask = 0; mask < 32; mask++) {
+		unsigned int count, limit;
+		struct i915_engine_class_instance *siblings;
+		uint32_t ctx;
+		int n;
+
+		siblings = list_engines(i915, 1u << mask, &count);
+		if (!siblings)
+			continue;
+
+		if (count < 2) {
+			free(siblings);
+			continue;
+		}
+
+		igt_debug("Found %d engines of class %d\n", count, mask);
+
+		master_engines = list_engines(i915, ~(1u << mask), &limit);
+		set_engines(i915, master, master_engines, limit);
+
+		limit = min(count, limit);
+		for (n = 0; n < limit; n++) {
+			bonds[n].master = master_engines[n];
+			bonds[n].engines[0] = siblings[n];
+		}
+
+		ctx = gem_context_clone(i915,
+				       	master, I915_CONTEXT_CLONE_VM,
+					I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
+		set_load_balancer(i915, ctx, siblings, count, &bonds[limit - 1]);
+
+		for (n = 0; n < limit; n++) {
+			struct drm_i915_gem_execbuffer2 eb;
+			IGT_CORK_HANDLE(cork);
+			igt_spin_t *spin, *plug;
+			double load;
+			int pmu;
+
+			igt_assert(siblings[n].engine_class != master_engines[n].engine_class);
+
+			pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(siblings[n].engine_class,
+								  siblings[n].engine_instance));
+
+			plug = NULL;
+			if (flags & CORK) {
+				plug = __igt_spin_new(i915,
+						      .ctx = master,
+						      .engine = n,
+						      .dependency = igt_cork_plug(&cork, i915));
+			}
+
+			spin = __igt_spin_new(i915,
+					      .ctx = master,
+					      .engine = n,
+					      .flags = IGT_SPIN_FENCE_OUT);
+
+			eb = spin->execbuf;
+			eb.rsvd1 = ctx;
+			eb.rsvd2 = spin->out_fence;
+			eb.flags = I915_EXEC_FENCE_SUBMIT;
+			gem_execbuf(i915, &eb);
+
+			if (plug) {
+				igt_cork_unplug(&cork);
+				igt_spin_free(i915, plug);
+			}
+
+			load = measure_load(pmu, 10000);
+			igt_spin_free(i915, spin);
+
+			close(pmu);
+
+			igt_assert_f(load > 0.90,
+				     "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
+				     n, siblings[n].engine_class, siblings[n].engine_instance,
+				     load*100);
+		}
+
+		gem_context_destroy(i915, ctx);
+		free(master_engines);
+		free(siblings);
+	}
+
+	gem_context_destroy(i915, master);
+}
+
 static void indicies(int i915)
 {
 	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
@@ -856,10 +1061,10 @@ static void semaphore(int i915)
 		count = ARRAY_SIZE(block);
 
 		for (int i = 0; i < count; i++) {
-			set_load_balancer(i915, block[i], ci, count);
+			set_load_balancer(i915, block[i], ci, count, NULL);
 			spin[i] = __igt_spin_new(i915,
-						       .ctx = block[i],
-						       .dependency = scratch);
+						 .ctx = block[i],
+						 .dependency = scratch);
 		}
 
 		/*
@@ -867,7 +1072,7 @@ static void semaphore(int i915)
 		 * or we let the vip through. If not, we hang.
 		 */
 		vip = gem_context_create(i915);
-		set_load_balancer(i915, vip, ci, count);
+		set_load_balancer(i915, vip, ci, count, NULL);
 		ping(i915, vip, 0);
 		gem_context_destroy(i915, vip);
 
@@ -984,7 +1189,7 @@ static bool has_load_balancer(int i915)
 	int err;
 
 	ctx = gem_context_create(i915);
-	err = __set_load_balancer(i915, ctx, &ci, 1);
+	err = __set_load_balancer(i915, ctx, &ci, 1, NULL);
 	gem_context_destroy(i915, ctx);
 
 	return err == 0;
@@ -1010,6 +1215,9 @@ igt_main
 	igt_subtest("invalid-balancer")
 		invalid_balancer(i915);
 
+	igt_subtest("invalid-bonds")
+		invalid_bonds(i915);
+
 	igt_subtest("individual")
 		individual(i915);
 
@@ -1044,6 +1252,12 @@ igt_main
 	igt_subtest("smoke")
 		smoketest(i915, 20);
 
+	igt_subtest("bonded-imm")
+		bonded(i915, 0);
+
+	igt_subtest("bonded-cork")
+		bonded(i915, CORK);
+
 	igt_fixture {
 		igt_stop_hang_detector();
 	}
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH i-g-t 15/16] i915/gem_exec_latency: Measure the latency of context switching
  2019-05-08 10:09 ` [igt-dev] " Chris Wilson
@ 2019-05-08 10:09   ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Measure the baseline latency between contexts in order to directly
compare that with the additional cost of preemption.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_exec_latency.c | 230 ++++++++++++++++++++++++++++++++++
 1 file changed, 230 insertions(+)

diff --git a/tests/i915/gem_exec_latency.c b/tests/i915/gem_exec_latency.c
index e56d62780..e88fbbc6a 100644
--- a/tests/i915/gem_exec_latency.c
+++ b/tests/i915/gem_exec_latency.c
@@ -410,6 +410,86 @@ static void latency_from_ring(int fd,
 	}
 }
 
+static void execution_latency(int i915, unsigned int ring, const char *name)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4095),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = ring | LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT,
+	};
+	const unsigned int mmio_base = 0x2000;
+	const unsigned int cs_timestamp = mmio_base + 0x358;
+	volatile uint32_t *timestamp;
+	uint32_t *cs, *result;
+
+	timestamp =
+		(volatile uint32_t *)((volatile char *)igt_global_mmio + cs_timestamp);
+
+	obj.handle = gem_create(i915, 4096);
+	obj.flags = EXEC_OBJECT_PINNED;
+	result = gem_mmap__wc(i915, obj.handle, 0, 4096, PROT_WRITE);
+
+	for (int i = 0; i < 16; i++) {
+		cs = result + 16 * i;
+		*cs++ = 0x24 << 23 | 2; /* SRM */
+		*cs++ = cs_timestamp;
+		*cs++ = 4096 - 16 * 4 + i * 4;
+		*cs++ = 0;
+		*cs++ = 0xa << 23;
+	}
+
+	cs = result + 1024 - 16;
+
+	for (int length = 2; length <= 16; length <<= 1) {
+		struct igt_mean submit, batch, total;
+		int last = length - 1;
+
+		igt_mean_init(&submit);
+		igt_mean_init(&batch);
+		igt_mean_init(&total);
+
+		igt_until_timeout(2) {
+			uint32_t now, end;
+
+			cs[last] = 0;
+
+			now = *timestamp;
+			for (int i = 0; i < length; i++) {
+				execbuf.batch_start_offset = 64 * i;
+				gem_execbuf(i915, &execbuf);
+			}
+			while (!((volatile uint32_t *)cs)[last])
+				;
+			end = *timestamp;
+
+			igt_mean_add(&submit, (cs[0] - now) * rcs_clock);
+			igt_mean_add(&batch, (cs[last] - cs[0]) * rcs_clock / last);
+			igt_mean_add(&total, (end - now) * rcs_clock);
+		}
+
+		igt_info("%sx%d Submission latency: %.2f±%.2fus\n",
+			 name, length,
+			 1e-3 * igt_mean_get(&submit),
+			 1e-3 * sqrt(igt_mean_get_variance(&submit)));
+
+		igt_info("%sx%d Inter-batch latency: %.2f±%.2fus\n",
+			 name, length,
+			 1e-3 * igt_mean_get(&batch),
+			 1e-3 * sqrt(igt_mean_get_variance(&batch)));
+
+		igt_info("%sx%d End-to-end latency: %.2f±%.2fus\n",
+			 name, length,
+			 1e-3 * igt_mean_get(&total),
+			 1e-3 * sqrt(igt_mean_get_variance(&total)));
+	}
+
+	munmap(result, 4096);
+	gem_close(i915, obj.handle);
+}
+
 static void
 __submit_spin(int fd, igt_spin_t *spin, unsigned int flags)
 {
@@ -616,6 +696,142 @@ rthog_latency_on_ring(int fd, unsigned int engine, const char *name, unsigned in
 	munmap(results, MMAP_SZ);
 }
 
+static void context_switch(int i915,
+			   unsigned int engine, const char *name,
+			   unsigned int flags)
+{
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry reloc[5];
+	struct drm_i915_gem_execbuffer2 eb;
+	uint32_t *cs, *bbe, *results, v;
+	unsigned int mmio_base;
+	struct igt_mean mean;
+	uint32_t ctx[2];
+
+	/* XXX i915_query()! */
+	switch (engine) {
+	case I915_EXEC_DEFAULT:
+	case I915_EXEC_RENDER:
+		mmio_base = 0x2000;
+		break;
+#if 0
+	case I915_EXEC_BSD:
+		mmio_base = 0x12000;
+		break;
+#endif
+	case I915_EXEC_BLT:
+		mmio_base = 0x22000;
+		break;
+
+	case I915_EXEC_VEBOX:
+		if (intel_gen(intel_get_drm_devid(i915)) >= 11)
+			mmio_base = 0x1d8000;
+		else
+			mmio_base = 0x1a000;
+		break;
+
+	default:
+		igt_skip("mmio base not known\n");
+	}
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+		ctx[i] = gem_context_create(i915);
+
+	if (flags & PREEMPT) {
+		gem_context_set_priority(i915, ctx[0], -1023);
+		gem_context_set_priority(i915, ctx[1], +1023);
+	}
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = gem_create(i915, 4096);
+	gem_set_caching(i915, obj[0].handle, 1);
+	results = gem_mmap__cpu(i915, obj[0].handle, 0, 4096, PROT_READ);
+	gem_set_domain(i915, obj[0].handle, I915_GEM_DOMAIN_CPU, 0);
+
+	obj[1].handle = gem_create(i915, 4096);
+	memset(reloc,0, sizeof(reloc));
+	obj[1].relocation_count = ARRAY_SIZE(reloc);
+	obj[1].relocs_ptr = to_user_pointer(reloc);
+	bbe = gem_mmap__wc(i915, obj[1].handle, 0, 4096, PROT_WRITE);
+	gem_set_domain(i915, obj[1].handle,
+		       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
+
+	cs = bbe;
+	*cs++ = 0x5 << 23;
+	*cs++ = 0x24 << 23 | 2; /* SRM */
+	*cs++ = mmio_base + 0x358; /* TIMESTAMP */
+	reloc[0].target_handle = obj[0].handle;
+	reloc[0].offset = (cs - bbe) * sizeof(*cs);
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = MI_BATCH_BUFFER_START | 1;
+	reloc[1].target_handle = obj[1].handle;
+	reloc[1].offset = (cs - bbe) * sizeof(*cs);
+	*cs++ = 0;
+	*cs++ = 0;
+
+	cs = bbe + 64;
+	*cs++ = 0x24 << 23 | 2; /* SRM */
+	*cs++ = mmio_base + 0x358; /* TIMESTAMP */
+	reloc[2].target_handle = obj[0].handle;
+	reloc[2].offset = (cs - bbe) * sizeof(*cs);
+	*cs++ = reloc[2].delta = 4;
+	*cs++ = 0;
+	*cs++ = 0x29 << 23 | 2; /* LRM */
+	*cs++ = mmio_base + 0x600; /* GPR0 */
+	reloc[3].target_handle = obj[0].handle;
+	reloc[3].offset = (cs - bbe) * sizeof(*cs);
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0x24 << 23 | 2; /* SRM */
+	*cs++ = mmio_base + 0x600; /* GPR0 */
+	reloc[4].target_handle = obj[0].handle;
+	reloc[4].offset = (cs - bbe) * sizeof(*cs);
+	*cs++ = reloc[4].delta = 8;
+	*cs++ = 0;
+	*cs++ = 0xa << 23;
+
+	memset(&eb, 0, sizeof(eb));
+	eb.buffers_ptr = to_user_pointer(obj);
+	eb.buffer_count = ARRAY_SIZE(obj);
+	eb.flags = engine;
+	eb.flags |= LOCAL_I915_EXEC_NO_RELOC;
+
+	v = 0;
+	igt_mean_init(&mean);
+	igt_until_timeout(5) {
+		eb.rsvd1 = ctx[0];
+		eb.batch_start_offset = 0;
+		gem_execbuf(i915, &eb);
+
+		while (results[0] == v)
+			igt_assert(gem_bo_busy(i915, obj[1].handle));
+
+		eb.rsvd1 = ctx[1];
+		eb.batch_start_offset = 64 * sizeof(*cs);
+		gem_execbuf(i915, &eb);
+
+		*bbe = 0xa << 23;
+		gem_sync(i915, obj[1].handle);
+		*bbe = 0x5 << 23;
+
+		v = results[0];
+		igt_mean_add(&mean, (results[1] - results[2]) * rcs_clock);
+	}
+	igt_info("%s context switch latency%s: %.2f±%.2fus\n",
+		 name, flags & PREEMPT ? " (preempt)" : "",
+		 1e-3 * igt_mean_get(&mean),
+		 1e-3 * sqrt(igt_mean_get_variance(&mean)));
+	munmap(results, 4096);
+	munmap(bbe, 4096);
+
+	for (int i = 0; i < ARRAY_SIZE(obj); i++)
+		gem_close(i915, obj[i].handle);
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+		gem_context_destroy(i915, ctx[i]);
+}
+
 static double clockrate(int i915, int reg)
 {
 	volatile uint32_t *mmio;
@@ -722,6 +938,11 @@ igt_main
 							      e->name,
 							      0);
 
+				igt_subtest_f("%s-execution-latency", e->name)
+					execution_latency(device,
+							  e->exec_id | e->flags,
+							  e->name);
+
 				igt_subtest_f("%s-live-dispatch-queued", e->name)
 					latency_on_ring(device,
 							e->exec_id | e->flags,
@@ -741,12 +962,21 @@ igt_main
 							  e->exec_id | e->flags,
 							  e->name, CORK);
 
+				igt_subtest_f("%s-cs", e->name)
+					context_switch(device,
+						       e->exec_id | e->flags,
+						       e->name, 0);
 				igt_subtest_group {
 					igt_fixture {
 						gem_require_contexts(device);
 						igt_require(gem_scheduler_has_preemption(device));
 					}
 
+					igt_subtest_f("%s-cs-preempt", e->name)
+						context_switch(device,
+								e->exec_id | e->flags,
+								e->name, PREEMPT);
+
 					igt_subtest_f("%s-preemption", e->name)
 						latency_from_ring(device,
 								  e->exec_id | e->flags,
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [Intel-gfx] [PATCH i-g-t 15/16] i915/gem_exec_latency: Measure the latency of context switching
@ 2019-05-08 10:09   ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Measure the baseline latency between contexts in order to directly
compare that with the additional cost of preemption.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_exec_latency.c | 230 ++++++++++++++++++++++++++++++++++
 1 file changed, 230 insertions(+)

diff --git a/tests/i915/gem_exec_latency.c b/tests/i915/gem_exec_latency.c
index e56d62780..e88fbbc6a 100644
--- a/tests/i915/gem_exec_latency.c
+++ b/tests/i915/gem_exec_latency.c
@@ -410,6 +410,86 @@ static void latency_from_ring(int fd,
 	}
 }
 
+static void execution_latency(int i915, unsigned int ring, const char *name)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4095),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = ring | LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT,
+	};
+	const unsigned int mmio_base = 0x2000;
+	const unsigned int cs_timestamp = mmio_base + 0x358;
+	volatile uint32_t *timestamp;
+	uint32_t *cs, *result;
+
+	timestamp =
+		(volatile uint32_t *)((volatile char *)igt_global_mmio + cs_timestamp);
+
+	obj.handle = gem_create(i915, 4096);
+	obj.flags = EXEC_OBJECT_PINNED;
+	result = gem_mmap__wc(i915, obj.handle, 0, 4096, PROT_WRITE);
+
+	for (int i = 0; i < 16; i++) {
+		cs = result + 16 * i;
+		*cs++ = 0x24 << 23 | 2; /* SRM */
+		*cs++ = cs_timestamp;
+		*cs++ = 4096 - 16 * 4 + i * 4;
+		*cs++ = 0;
+		*cs++ = 0xa << 23;
+	}
+
+	cs = result + 1024 - 16;
+
+	for (int length = 2; length <= 16; length <<= 1) {
+		struct igt_mean submit, batch, total;
+		int last = length - 1;
+
+		igt_mean_init(&submit);
+		igt_mean_init(&batch);
+		igt_mean_init(&total);
+
+		igt_until_timeout(2) {
+			uint32_t now, end;
+
+			cs[last] = 0;
+
+			now = *timestamp;
+			for (int i = 0; i < length; i++) {
+				execbuf.batch_start_offset = 64 * i;
+				gem_execbuf(i915, &execbuf);
+			}
+			while (!((volatile uint32_t *)cs)[last])
+				;
+			end = *timestamp;
+
+			igt_mean_add(&submit, (cs[0] - now) * rcs_clock);
+			igt_mean_add(&batch, (cs[last] - cs[0]) * rcs_clock / last);
+			igt_mean_add(&total, (end - now) * rcs_clock);
+		}
+
+		igt_info("%sx%d Submission latency: %.2f±%.2fus\n",
+			 name, length,
+			 1e-3 * igt_mean_get(&submit),
+			 1e-3 * sqrt(igt_mean_get_variance(&submit)));
+
+		igt_info("%sx%d Inter-batch latency: %.2f±%.2fus\n",
+			 name, length,
+			 1e-3 * igt_mean_get(&batch),
+			 1e-3 * sqrt(igt_mean_get_variance(&batch)));
+
+		igt_info("%sx%d End-to-end latency: %.2f±%.2fus\n",
+			 name, length,
+			 1e-3 * igt_mean_get(&total),
+			 1e-3 * sqrt(igt_mean_get_variance(&total)));
+	}
+
+	munmap(result, 4096);
+	gem_close(i915, obj.handle);
+}
+
 static void
 __submit_spin(int fd, igt_spin_t *spin, unsigned int flags)
 {
@@ -616,6 +696,142 @@ rthog_latency_on_ring(int fd, unsigned int engine, const char *name, unsigned in
 	munmap(results, MMAP_SZ);
 }
 
+static void context_switch(int i915,
+			   unsigned int engine, const char *name,
+			   unsigned int flags)
+{
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry reloc[5];
+	struct drm_i915_gem_execbuffer2 eb;
+	uint32_t *cs, *bbe, *results, v;
+	unsigned int mmio_base;
+	struct igt_mean mean;
+	uint32_t ctx[2];
+
+	/* XXX i915_query()! */
+	switch (engine) {
+	case I915_EXEC_DEFAULT:
+	case I915_EXEC_RENDER:
+		mmio_base = 0x2000;
+		break;
+#if 0
+	case I915_EXEC_BSD:
+		mmio_base = 0x12000;
+		break;
+#endif
+	case I915_EXEC_BLT:
+		mmio_base = 0x22000;
+		break;
+
+	case I915_EXEC_VEBOX:
+		if (intel_gen(intel_get_drm_devid(i915)) >= 11)
+			mmio_base = 0x1d8000;
+		else
+			mmio_base = 0x1a000;
+		break;
+
+	default:
+		igt_skip("mmio base not known\n");
+	}
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+		ctx[i] = gem_context_create(i915);
+
+	if (flags & PREEMPT) {
+		gem_context_set_priority(i915, ctx[0], -1023);
+		gem_context_set_priority(i915, ctx[1], +1023);
+	}
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = gem_create(i915, 4096);
+	gem_set_caching(i915, obj[0].handle, 1);
+	results = gem_mmap__cpu(i915, obj[0].handle, 0, 4096, PROT_READ);
+	gem_set_domain(i915, obj[0].handle, I915_GEM_DOMAIN_CPU, 0);
+
+	obj[1].handle = gem_create(i915, 4096);
+	memset(reloc,0, sizeof(reloc));
+	obj[1].relocation_count = ARRAY_SIZE(reloc);
+	obj[1].relocs_ptr = to_user_pointer(reloc);
+	bbe = gem_mmap__wc(i915, obj[1].handle, 0, 4096, PROT_WRITE);
+	gem_set_domain(i915, obj[1].handle,
+		       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
+
+	cs = bbe;
+	*cs++ = 0x5 << 23;
+	*cs++ = 0x24 << 23 | 2; /* SRM */
+	*cs++ = mmio_base + 0x358; /* TIMESTAMP */
+	reloc[0].target_handle = obj[0].handle;
+	reloc[0].offset = (cs - bbe) * sizeof(*cs);
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = MI_BATCH_BUFFER_START | 1;
+	reloc[1].target_handle = obj[1].handle;
+	reloc[1].offset = (cs - bbe) * sizeof(*cs);
+	*cs++ = 0;
+	*cs++ = 0;
+
+	cs = bbe + 64;
+	*cs++ = 0x24 << 23 | 2; /* SRM */
+	*cs++ = mmio_base + 0x358; /* TIMESTAMP */
+	reloc[2].target_handle = obj[0].handle;
+	reloc[2].offset = (cs - bbe) * sizeof(*cs);
+	*cs++ = reloc[2].delta = 4;
+	*cs++ = 0;
+	*cs++ = 0x29 << 23 | 2; /* LRM */
+	*cs++ = mmio_base + 0x600; /* GPR0 */
+	reloc[3].target_handle = obj[0].handle;
+	reloc[3].offset = (cs - bbe) * sizeof(*cs);
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0x24 << 23 | 2; /* SRM */
+	*cs++ = mmio_base + 0x600; /* GPR0 */
+	reloc[4].target_handle = obj[0].handle;
+	reloc[4].offset = (cs - bbe) * sizeof(*cs);
+	*cs++ = reloc[4].delta = 8;
+	*cs++ = 0;
+	*cs++ = 0xa << 23;
+
+	memset(&eb, 0, sizeof(eb));
+	eb.buffers_ptr = to_user_pointer(obj);
+	eb.buffer_count = ARRAY_SIZE(obj);
+	eb.flags = engine;
+	eb.flags |= LOCAL_I915_EXEC_NO_RELOC;
+
+	v = 0;
+	igt_mean_init(&mean);
+	igt_until_timeout(5) {
+		eb.rsvd1 = ctx[0];
+		eb.batch_start_offset = 0;
+		gem_execbuf(i915, &eb);
+
+		while (results[0] == v)
+			igt_assert(gem_bo_busy(i915, obj[1].handle));
+
+		eb.rsvd1 = ctx[1];
+		eb.batch_start_offset = 64 * sizeof(*cs);
+		gem_execbuf(i915, &eb);
+
+		*bbe = 0xa << 23;
+		gem_sync(i915, obj[1].handle);
+		*bbe = 0x5 << 23;
+
+		v = results[0];
+		igt_mean_add(&mean, (results[1] - results[2]) * rcs_clock);
+	}
+	igt_info("%s context switch latency%s: %.2f±%.2fus\n",
+		 name, flags & PREEMPT ? " (preempt)" : "",
+		 1e-3 * igt_mean_get(&mean),
+		 1e-3 * sqrt(igt_mean_get_variance(&mean)));
+	munmap(results, 4096);
+	munmap(bbe, 4096);
+
+	for (int i = 0; i < ARRAY_SIZE(obj); i++)
+		gem_close(i915, obj[i].handle);
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+		gem_context_destroy(i915, ctx[i]);
+}
+
 static double clockrate(int i915, int reg)
 {
 	volatile uint32_t *mmio;
@@ -722,6 +938,11 @@ igt_main
 							      e->name,
 							      0);
 
+				igt_subtest_f("%s-execution-latency", e->name)
+					execution_latency(device,
+							  e->exec_id | e->flags,
+							  e->name);
+
 				igt_subtest_f("%s-live-dispatch-queued", e->name)
 					latency_on_ring(device,
 							e->exec_id | e->flags,
@@ -741,12 +962,21 @@ igt_main
 							  e->exec_id | e->flags,
 							  e->name, CORK);
 
+				igt_subtest_f("%s-cs", e->name)
+					context_switch(device,
+						       e->exec_id | e->flags,
+						       e->name, 0);
 				igt_subtest_group {
 					igt_fixture {
 						gem_require_contexts(device);
 						igt_require(gem_scheduler_has_preemption(device));
 					}
 
+					igt_subtest_f("%s-cs-preempt", e->name)
+						context_switch(device,
+								e->exec_id | e->flags,
+								e->name, PREEMPT);
+
 					igt_subtest_f("%s-preemption", e->name)
 						latency_from_ring(device,
 								  e->exec_id | e->flags,
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [PATCH i-g-t 16/16] i915/gem_exec_latency: Add another variant of waiter latency
  2019-05-08 10:09 ` [igt-dev] " Chris Wilson
@ 2019-05-08 10:09   ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_exec_latency.c | 81 +++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/tests/i915/gem_exec_latency.c b/tests/i915/gem_exec_latency.c
index e88fbbc6a..fd4ceb4d6 100644
--- a/tests/i915/gem_exec_latency.c
+++ b/tests/i915/gem_exec_latency.c
@@ -490,6 +490,83 @@ static void execution_latency(int i915, unsigned int ring, const char *name)
 	gem_close(i915, obj.handle);
 }
 
+static void wakeup_latency(int i915, unsigned int ring, const char *name)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4095),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = ring | LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT,
+	};
+	const unsigned int mmio_base = 0x2000;
+	const unsigned int cs_timestamp = mmio_base + 0x358;
+	volatile uint32_t *timestamp;
+	struct igt_mean wakeup;
+	uint32_t *cs, *result;
+
+	timestamp =
+		(volatile uint32_t *)((volatile char *)igt_global_mmio + cs_timestamp);
+
+	obj.handle = gem_create(i915, 4096);
+	obj.flags = EXEC_OBJECT_PINNED;
+	result = gem_mmap__wc(i915, obj.handle, 0, 4096, PROT_WRITE);
+
+	cs = result;
+
+	*cs++ = 0x24 << 23 | 2; /* SRM */
+	*cs++ = cs_timestamp;
+	*cs++ = 4096 - 16 * 4;
+	*cs++ = 0;
+
+	*cs++ = MI_BATCH_BUFFER_START | 1;
+	*cs++ = 0;
+	*cs++ = 0;
+
+	*cs++ = 0x24 << 23 | 2; /* SRM */
+	*cs++ = cs_timestamp;
+	*cs++ = 4096 - 16 * 4 + 4;
+	*cs++ = 0;
+	*cs++ = 0xa << 23;
+
+	cs = result + 1024 - 16;
+
+	{
+		struct sched_param p = { .sched_priority = 99 };
+		sched_setscheduler(0, SCHED_FIFO | SCHED_RESET_ON_FORK, &p);
+	}
+
+	igt_mean_init(&wakeup);
+	igt_until_timeout(2) {
+		uint32_t end;
+
+		igt_fork(child, 1) {
+			result[4] = MI_BATCH_BUFFER_START | 1;
+			cs[0] = 0;
+
+			gem_execbuf(i915, &execbuf);
+
+			while (!cs[0])
+				;
+			result[4] = 0;
+			__sync_synchronize();
+		}
+		gem_sync(i915, obj.handle);
+		end = *timestamp;
+
+		igt_mean_add(&wakeup, (end - cs[1]) * rcs_clock);
+		igt_waitchildren();
+	}
+	igt_info("%s Wakeup latency: %.2f±%.2fms [%.2f, %.2f]\n", name,
+		 1e-6 * igt_mean_get(&wakeup),
+		 1e-6 * sqrt(igt_mean_get_variance(&wakeup)),
+		 1e-6 * wakeup.min, 1e-6 * wakeup.max);
+
+	munmap(result, 4096);
+	gem_close(i915, obj.handle);
+}
+
 static void
 __submit_spin(int fd, igt_spin_t *spin, unsigned int flags)
 {
@@ -942,6 +1019,10 @@ igt_main
 					execution_latency(device,
 							  e->exec_id | e->flags,
 							  e->name);
+				igt_subtest_f("%s-wakeup-latency", e->name)
+					wakeup_latency(device,
+							e->exec_id | e->flags,
+							e->name);
 
 				igt_subtest_f("%s-live-dispatch-queued", e->name)
 					latency_on_ring(device,
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [Intel-gfx] [PATCH i-g-t 16/16] i915/gem_exec_latency: Add another variant of waiter latency
@ 2019-05-08 10:09   ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-08 10:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_exec_latency.c | 81 +++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/tests/i915/gem_exec_latency.c b/tests/i915/gem_exec_latency.c
index e88fbbc6a..fd4ceb4d6 100644
--- a/tests/i915/gem_exec_latency.c
+++ b/tests/i915/gem_exec_latency.c
@@ -490,6 +490,83 @@ static void execution_latency(int i915, unsigned int ring, const char *name)
 	gem_close(i915, obj.handle);
 }
 
+static void wakeup_latency(int i915, unsigned int ring, const char *name)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4095),
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = ring | LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT,
+	};
+	const unsigned int mmio_base = 0x2000;
+	const unsigned int cs_timestamp = mmio_base + 0x358;
+	volatile uint32_t *timestamp;
+	struct igt_mean wakeup;
+	uint32_t *cs, *result;
+
+	timestamp =
+		(volatile uint32_t *)((volatile char *)igt_global_mmio + cs_timestamp);
+
+	obj.handle = gem_create(i915, 4096);
+	obj.flags = EXEC_OBJECT_PINNED;
+	result = gem_mmap__wc(i915, obj.handle, 0, 4096, PROT_WRITE);
+
+	cs = result;
+
+	*cs++ = 0x24 << 23 | 2; /* SRM */
+	*cs++ = cs_timestamp;
+	*cs++ = 4096 - 16 * 4;
+	*cs++ = 0;
+
+	*cs++ = MI_BATCH_BUFFER_START | 1;
+	*cs++ = 0;
+	*cs++ = 0;
+
+	*cs++ = 0x24 << 23 | 2; /* SRM */
+	*cs++ = cs_timestamp;
+	*cs++ = 4096 - 16 * 4 + 4;
+	*cs++ = 0;
+	*cs++ = 0xa << 23;
+
+	cs = result + 1024 - 16;
+
+	{
+		struct sched_param p = { .sched_priority = 99 };
+		sched_setscheduler(0, SCHED_FIFO | SCHED_RESET_ON_FORK, &p);
+	}
+
+	igt_mean_init(&wakeup);
+	igt_until_timeout(2) {
+		uint32_t end;
+
+		igt_fork(child, 1) {
+			result[4] = MI_BATCH_BUFFER_START | 1;
+			cs[0] = 0;
+
+			gem_execbuf(i915, &execbuf);
+
+			while (!cs[0])
+				;
+			result[4] = 0;
+			__sync_synchronize();
+		}
+		gem_sync(i915, obj.handle);
+		end = *timestamp;
+
+		igt_mean_add(&wakeup, (end - cs[1]) * rcs_clock);
+		igt_waitchildren();
+	}
+	igt_info("%s Wakeup latency: %.2f±%.2fms [%.2f, %.2f]\n", name,
+		 1e-6 * igt_mean_get(&wakeup),
+		 1e-6 * sqrt(igt_mean_get_variance(&wakeup)),
+		 1e-6 * wakeup.min, 1e-6 * wakeup.max);
+
+	munmap(result, 4096);
+	gem_close(i915, obj.handle);
+}
+
 static void
 __submit_spin(int fd, igt_spin_t *spin, unsigned int flags)
 {
@@ -942,6 +1019,10 @@ igt_main
 					execution_latency(device,
 							  e->exec_id | e->flags,
 							  e->name);
+				igt_subtest_f("%s-wakeup-latency", e->name)
+					wakeup_latency(device,
+							e->exec_id | e->flags,
+							e->name);
 
 				igt_subtest_f("%s-live-dispatch-queued", e->name)
 					latency_on_ring(device,
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 92+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,01/16] i915/gem_exec_schedule: Semaphore priority fixups
  2019-05-08 10:09 ` [igt-dev] " Chris Wilson
                   ` (15 preceding siblings ...)
  (?)
@ 2019-05-08 10:41 ` Patchwork
  -1 siblings, 0 replies; 92+ messages in thread
From: Patchwork @ 2019-05-08 10:41 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: series starting with [i-g-t,01/16] i915/gem_exec_schedule: Semaphore priority fixups
URL   : https://patchwork.freedesktop.org/series/60411/
State : success

== Summary ==

CI Bug Log - changes from IGT_4973 -> IGTPW_2951
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/60411/revisions/1/mbox/

Known issues
------------

  Here are the changes found in IGTPW_2951 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live_contexts:
    - fi-bdw-gvtdvm:      [PASS][1] -> [DMESG-FAIL][2] ([fdo#110235])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/fi-bdw-gvtdvm/igt@i915_selftest@live_contexts.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/fi-bdw-gvtdvm/igt@i915_selftest@live_contexts.html

  
  [fdo#110235]: https://bugs.freedesktop.org/show_bug.cgi?id=110235


Participating hosts (51 -> 44)
------------------------------

  Additional (1): fi-icl-u2 
  Missing    (8): fi-kbl-soraka fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * IGT: IGT_4973 -> IGTPW_2951

  CI_DRM_6063: 44ae4003d35743cbc7883825c5fe777d136b5247 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_2951: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/
  IGT_4973: 3e3ff0e48989abd25fce4916e85e8fef20a3c63a @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools



== Testlist changes ==

+++ 243 lines
--- 2 lines

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* [igt-dev] ✗ Fi.CI.IGT: failure for series starting with [i-g-t,01/16] i915/gem_exec_schedule: Semaphore priority fixups
  2019-05-08 10:09 ` [igt-dev] " Chris Wilson
                   ` (16 preceding siblings ...)
  (?)
@ 2019-05-08 12:38 ` Patchwork
  -1 siblings, 0 replies; 92+ messages in thread
From: Patchwork @ 2019-05-08 12:38 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: series starting with [i-g-t,01/16] i915/gem_exec_schedule: Semaphore priority fixups
URL   : https://patchwork.freedesktop.org/series/60411/
State : failure

== Summary ==

CI Bug Log - changes from IGT_4973_full -> IGTPW_2951_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with IGTPW_2951_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in IGTPW_2951_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/60411/revisions/1/mbox/

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in IGTPW_2951_full:

### IGT changes ###

#### Possible regressions ####

  * {igt@gem_ctx_shared@exhaust-shared-gtt-lrc} (NEW):
    - shard-iclb:         NOTRUN -> [SKIP][1] +63 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-iclb5/igt@gem_ctx_shared@exhaust-shared-gtt-lrc.html

  
New tests
---------

  New tests have been introduced between IGT_4973_full and IGTPW_2951_full:

### New IGT tests (80) ###

  * igt@gem_ctx_clone@engines:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_clone@flags:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_clone@invalid:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_clone@scheduler:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_clone@vm:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_engines@execute-allforone:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_engines@execute-one:
    - Statuses : 5 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_engines@execute-oneforall:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_engines@idempotent:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_engines@independent:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_engines@invalid-engines:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_param@vm:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@create-shared-gtt:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@detached-shared-gtt:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@disjoint-timelines:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@exec-shared-gtt-blt:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@exec-shared-gtt-bsd:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@exec-shared-gtt-bsd1:
    - Statuses : 5 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@exec-shared-gtt-bsd2:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@exec-shared-gtt-default:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@exec-shared-gtt-render:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@exec-shared-gtt-vebox:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@exec-single-timeline-blt:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@exec-single-timeline-bsd1:
    - Statuses : 5 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@exec-single-timeline-bsd2:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@exec-single-timeline-render:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@exec-single-timeline-vebox:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@exhaust-shared-gtt:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@exhaust-shared-gtt-lrc:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-in-order-blt:
    - Statuses : 5 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-in-order-bsd:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-in-order-bsd1:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-in-order-bsd2:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-in-order-default:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-in-order-render:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-in-order-vebox:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-independent-blt:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-independent-bsd:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-independent-bsd1:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-independent-bsd2:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-independent-default:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-independent-render:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-independent-vebox:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-out-order-blt:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-out-order-bsd:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-out-order-bsd1:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-out-order-bsd2:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-out-order-default:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-out-order-render:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-out-order-vebox:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-promotion-blt:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-promotion-bsd:
    - Statuses : 5 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-promotion-bsd1:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-promotion-bsd2:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-promotion-default:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-promotion-render:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-promotion-vebox:
    - Statuses : 5 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-smoketest-all:
    - Statuses : 5 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-smoketest-blt:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-smoketest-bsd:
    - Statuses : 5 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-smoketest-bsd1:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-smoketest-bsd2:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-smoketest-default:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-smoketest-render:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@q-smoketest-vebox:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_shared@single-timeline:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_switch@basic-default-heavy-interruptible:
    - Statuses : 6 pass(s)
    - Exec time: [5.39, 18.63] s

  * igt@gem_ctx_switch@basic-default-heavy-queue:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_switch@basic-default-heavy-queue-interruptible:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_switch@basic-default-interruptible:
    - Statuses : 6 pass(s)
    - Exec time: [5.03, 5.09] s

  * igt@gem_ctx_switch@basic-default-queue:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_switch@basic-default-queue-interruptible:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_switch@basic-queue-heavy:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_ctx_switch@basic-queue-light:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_vm_create@async-destroy:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_vm_create@create-ext:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_vm_create@execbuf:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_vm_create@invalid-create:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_vm_create@invalid-destroy:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@gem_vm_create@isolation:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  

Known issues
------------

  Here are the changes found in IGTPW_2951_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_pm_rps@waitboost:
    - shard-hsw:          [PASS][2] -> [FAIL][3] ([fdo#102250])
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-hsw2/igt@i915_pm_rps@waitboost.html
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-hsw6/igt@i915_pm_rps@waitboost.html

  * igt@i915_suspend@fence-restore-untiled:
    - shard-apl:          [PASS][4] -> [DMESG-WARN][5] ([fdo#108566]) +5 similar issues
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-apl2/igt@i915_suspend@fence-restore-untiled.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-apl5/igt@i915_suspend@fence-restore-untiled.html

  * igt@kms_cursor_legacy@2x-nonblocking-modeset-vs-cursor-atomic:
    - shard-glk:          [PASS][6] -> [FAIL][7] ([fdo#106509] / [fdo#107409])
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-glk7/igt@kms_cursor_legacy@2x-nonblocking-modeset-vs-cursor-atomic.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-glk1/igt@kms_cursor_legacy@2x-nonblocking-modeset-vs-cursor-atomic.html

  * igt@kms_flip@2x-flip-vs-suspend-interruptible:
    - shard-hsw:          [PASS][8] -> [INCOMPLETE][9] ([fdo#103540])
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-hsw4/igt@kms_flip@2x-flip-vs-suspend-interruptible.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-hsw2/igt@kms_flip@2x-flip-vs-suspend-interruptible.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible:
    - shard-glk:          [PASS][10] -> [FAIL][11] ([fdo#102887] / [fdo#105363])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-glk7/igt@kms_flip@flip-vs-expired-vblank-interruptible.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-glk5/igt@kms_flip@flip-vs-expired-vblank-interruptible.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-cur-indfb-draw-render:
    - shard-iclb:         [PASS][12] -> [FAIL][13] ([fdo#103167]) +3 similar issues
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-iclb6/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-cur-indfb-draw-render.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-iclb8/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-cur-indfb-draw-render.html

  * igt@kms_plane_scaling@pipe-a-scaler-with-clipping-clamping:
    - shard-glk:          [PASS][14] -> [SKIP][15] ([fdo#109271] / [fdo#109278])
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-glk9/igt@kms_plane_scaling@pipe-a-scaler-with-clipping-clamping.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-glk3/igt@kms_plane_scaling@pipe-a-scaler-with-clipping-clamping.html

  * igt@kms_psr@psr2_primary_mmap_cpu:
    - shard-iclb:         [PASS][16] -> [SKIP][17] ([fdo#109441]) +1 similar issue
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-iclb2/igt@kms_psr@psr2_primary_mmap_cpu.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-iclb1/igt@kms_psr@psr2_primary_mmap_cpu.html

  * igt@kms_rotation_crc@multiplane-rotation-cropping-bottom:
    - shard-kbl:          [PASS][18] -> [DMESG-FAIL][19] ([fdo#105763])
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-kbl6/igt@kms_rotation_crc@multiplane-rotation-cropping-bottom.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-kbl1/igt@kms_rotation_crc@multiplane-rotation-cropping-bottom.html

  
#### Possible fixes ####

  * igt@gem_tiled_swapping@non-threaded:
    - shard-glk:          [DMESG-WARN][20] ([fdo#108686]) -> [PASS][21]
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-glk7/igt@gem_tiled_swapping@non-threaded.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-glk6/igt@gem_tiled_swapping@non-threaded.html

  * igt@i915_pm_rc6_residency@rc6-accuracy:
    - shard-snb:          [SKIP][22] ([fdo#109271]) -> [PASS][23]
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-snb6/igt@i915_pm_rc6_residency@rc6-accuracy.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-snb4/igt@i915_pm_rc6_residency@rc6-accuracy.html

  * igt@i915_suspend@debugfs-reader:
    - shard-apl:          [DMESG-WARN][24] ([fdo#108566]) -> [PASS][25] +3 similar issues
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-apl4/igt@i915_suspend@debugfs-reader.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-apl7/igt@i915_suspend@debugfs-reader.html

  * igt@kms_cursor_crc@cursor-64x21-sliding:
    - shard-kbl:          [FAIL][26] ([fdo#103232]) -> [PASS][27]
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-kbl3/igt@kms_cursor_crc@cursor-64x21-sliding.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-kbl2/igt@kms_cursor_crc@cursor-64x21-sliding.html

  * igt@kms_flip@2x-flip-vs-expired-vblank-interruptible:
    - shard-glk:          [FAIL][28] ([fdo#105363]) -> [PASS][29]
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-glk4/igt@kms_flip@2x-flip-vs-expired-vblank-interruptible.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-glk8/igt@kms_flip@2x-flip-vs-expired-vblank-interruptible.html

  * igt@kms_flip@flip-vs-suspend-interruptible:
    - shard-hsw:          [INCOMPLETE][30] ([fdo#103540]) -> [PASS][31]
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-hsw1/igt@kms_flip@flip-vs-suspend-interruptible.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-hsw1/igt@kms_flip@flip-vs-suspend-interruptible.html

  * igt@kms_flip@plain-flip-fb-recreate:
    - shard-kbl:          [FAIL][32] ([fdo#100368]) -> [PASS][33]
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-kbl4/igt@kms_flip@plain-flip-fb-recreate.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-kbl6/igt@kms_flip@plain-flip-fb-recreate.html

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-blt:
    - shard-iclb:         [FAIL][34] ([fdo#103167]) -> [PASS][35] +6 similar issues
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-iclb2/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-blt.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-iclb1/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-blt.html

  * igt@kms_plane@pixel-format-pipe-c-planes-source-clamping:
    - shard-glk:          [SKIP][36] ([fdo#109271]) -> [PASS][37]
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-glk1/igt@kms_plane@pixel-format-pipe-c-planes-source-clamping.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-glk9/igt@kms_plane@pixel-format-pipe-c-planes-source-clamping.html

  * igt@kms_plane_lowres@pipe-a-tiling-x:
    - shard-iclb:         [FAIL][38] ([fdo#103166]) -> [PASS][39]
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-iclb5/igt@kms_plane_lowres@pipe-a-tiling-x.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-iclb6/igt@kms_plane_lowres@pipe-a-tiling-x.html

  * igt@kms_psr@psr2_no_drrs:
    - shard-iclb:         [SKIP][40] ([fdo#109441]) -> [PASS][41] +1 similar issue
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-iclb5/igt@kms_psr@psr2_no_drrs.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-iclb2/igt@kms_psr@psr2_no_drrs.html

  * igt@kms_vblank@pipe-a-ts-continuation-suspend:
    - shard-kbl:          [INCOMPLETE][42] ([fdo#103665]) -> [PASS][43]
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_4973/shard-kbl1/igt@kms_vblank@pipe-a-ts-continuation-suspend.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/shard-kbl3/igt@kms_vblank@pipe-a-ts-continuation-suspend.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#100368]: https://bugs.freedesktop.org/show_bug.cgi?id=100368
  [fdo#102250]: https://bugs.freedesktop.org/show_bug.cgi?id=102250
  [fdo#102887]: https://bugs.freedesktop.org/show_bug.cgi?id=102887
  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103540]: https://bugs.freedesktop.org/show_bug.cgi?id=103540
  [fdo#103665]: https://bugs.freedesktop.org/show_bug.cgi?id=103665
  [fdo#105363]: https://bugs.freedesktop.org/show_bug.cgi?id=105363
  [fdo#105763]: https://bugs.freedesktop.org/show_bug.cgi?id=105763
  [fdo#106509]: https://bugs.freedesktop.org/show_bug.cgi?id=106509
  [fdo#107409]: https://bugs.freedesktop.org/show_bug.cgi?id=107409
  [fdo#108566]: https://bugs.freedesktop.org/show_bug.cgi?id=108566
  [fdo#108686]: https://bugs.freedesktop.org/show_bug.cgi?id=108686
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441


Participating hosts (7 -> 6)
------------------------------

  Missing    (1): shard-skl 


Build changes
-------------

  * IGT: IGT_4973 -> IGTPW_2951

  CI_DRM_6063: 44ae4003d35743cbc7883825c5fe777d136b5247 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_2951: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/
  IGT_4973: 3e3ff0e48989abd25fce4916e85e8fef20a3c63a @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_2951/
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 01/16] i915/gem_exec_schedule: Semaphore priority fixups
  2019-05-08 10:09 ` [igt-dev] " Chris Wilson
@ 2019-05-14  9:39   ` Tvrtko Ursulin
  -1 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-14  9:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/05/2019 11:09, Chris Wilson wrote:
> A stray git add from my test boxen -- we were being careful enough to
> preserve priority and ordering to match the implicit policies.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/i915/gem_exec_schedule.c | 2 ++
>   1 file changed, 2 insertions(+)
> 
> diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
> index 330e8a54e..77a264a6a 100644
> --- a/tests/i915/gem_exec_schedule.c
> +++ b/tests/i915/gem_exec_schedule.c
> @@ -507,6 +507,7 @@ static void semaphore_resolve(int i915)
>   		uint32_t handle, cancel;
>   		uint32_t *cs, *map;
>   		igt_spin_t *spin;
> +		int64_t poke = 1;
>   
>   		if (!gem_can_store_dword(i915, engine))
>   			continue;
> @@ -587,6 +588,7 @@ static void semaphore_resolve(int i915)
>   		eb.buffer_count = 2;
>   		eb.rsvd1 = inner;
>   		gem_execbuf(i915, &eb);
> +		gem_wait(i915, cancel, &poke);
>   		gem_close(i915, cancel);
>   
>   		gem_sync(i915, handle); /* To hang unless cancel runs! */
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 01/16] i915/gem_exec_schedule: Semaphore priority fixups
@ 2019-05-14  9:39   ` Tvrtko Ursulin
  0 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-14  9:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/05/2019 11:09, Chris Wilson wrote:
> A stray git add from my test boxen -- we were being careful enough to
> preserve priority and ordering to match the implicit policies.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/i915/gem_exec_schedule.c | 2 ++
>   1 file changed, 2 insertions(+)
> 
> diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
> index 330e8a54e..77a264a6a 100644
> --- a/tests/i915/gem_exec_schedule.c
> +++ b/tests/i915/gem_exec_schedule.c
> @@ -507,6 +507,7 @@ static void semaphore_resolve(int i915)
>   		uint32_t handle, cancel;
>   		uint32_t *cs, *map;
>   		igt_spin_t *spin;
> +		int64_t poke = 1;
>   
>   		if (!gem_can_store_dword(i915, engine))
>   			continue;
> @@ -587,6 +588,7 @@ static void semaphore_resolve(int i915)
>   		eb.buffer_count = 2;
>   		eb.rsvd1 = inner;
>   		gem_execbuf(i915, &eb);
> +		gem_wait(i915, cancel, &poke);
>   		gem_close(i915, cancel);
>   
>   		gem_sync(i915, handle); /* To hang unless cancel runs! */
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 04/16] i915/gem_ctx_param: Test set/get (copy) VM
  2019-05-08 10:09   ` [igt-dev] " Chris Wilson
@ 2019-05-14  9:47     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-14  9:47 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/05/2019 11:09, Chris Wilson wrote:
> Exercise reusing the GTT of one ctx in another.
> 
> v2: Test setting back to the same VM
> v3: Check the VM still exists after the parent ctx are dead.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   tests/i915/gem_ctx_param.c | 107 ++++++++++++++++++++++++++++++++-----
>   1 file changed, 95 insertions(+), 12 deletions(-)
> 
> diff --git a/tests/i915/gem_ctx_param.c b/tests/i915/gem_ctx_param.c
> index b6f57236c..d949cef32 100644
> --- a/tests/i915/gem_ctx_param.c
> +++ b/tests/i915/gem_ctx_param.c
> @@ -28,6 +28,7 @@
>   #include <limits.h>
>   
>   #include "igt.h"
> +#include "i915/gem_vm.h"
>   
>   IGT_TEST_DESCRIPTION("Basic test for context set/get param input validation.");
>   
> @@ -36,17 +37,6 @@ IGT_TEST_DESCRIPTION("Basic test for context set/get param input validation.");
>   #define NEW_CTX	BIT(0)
>   #define USER BIT(1)
>   
> -static int reopen_driver(int fd)
> -{
> -	char path[256];
> -
> -	snprintf(path, sizeof(path), "/proc/self/fd/%d", fd);
> -	fd = open(path, O_RDWR);
> -	igt_assert_lte(0, fd);
> -
> -	return fd;
> -}
> -
>   static void set_priority(int i915)
>   {
>   	static const int64_t test_values[] = {
> @@ -91,7 +81,7 @@ static void set_priority(int i915)
>   	igt_permute_array(values, size, igt_exchange_int64);
>   
>   	igt_fork(flags, NEW_CTX | USER) {
> -		int fd = reopen_driver(i915);
> +		int fd = gem_reopen_driver(i915);
>   		struct drm_i915_gem_context_param arg = {
>   			.param = I915_CONTEXT_PARAM_PRIORITY,
>   			.ctx_id = flags & NEW_CTX ? gem_context_create(fd) : 0,
> @@ -143,6 +133,96 @@ static void set_priority(int i915)
>   	free(values);
>   }
>   
> +static uint32_t __batch_create(int i915, uint32_t offset)
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	uint32_t handle;
> +
> +	handle = gem_create(i915, ALIGN(offset + 4, 4096));
> +	gem_write(i915, handle, offset, &bbe, sizeof(bbe));
> +
> +	return handle;
> +}
> +
> +static uint32_t batch_create(int i915)
> +{
> +	return __batch_create(i915, 0);
> +}
> +
> +static void test_vm(int i915)
> +{
> +	const uint64_t nonzero_offset = 48 << 20;
> +	struct drm_i915_gem_exec_object2 batch = {
> +		.handle = batch_create(i915),
> +	};
> +	struct drm_i915_gem_execbuffer2 eb = {
> +		.buffers_ptr = to_user_pointer(&batch),
> +		.buffer_count = 1,
> +	};
> +	struct drm_i915_gem_context_param arg = {
> +		.param = I915_CONTEXT_PARAM_VM,
> +	};
> +	uint32_t parent, child;
> +
> +	arg.value = -1ull;
> +	igt_require(__gem_context_set_param(i915, &arg) == -ENOENT);
> +
> +	parent = gem_context_create(i915);
> +	child = gem_context_create(i915);
> +
> +	/* Using implicit soft-pinning */
> +	eb.rsvd1 = parent;
> +	batch.offset = nonzero_offset;
> +	gem_execbuf(i915, &eb);
> +	igt_assert_eq_u64(batch.offset, nonzero_offset);
> +
> +	eb.rsvd1 = child;
> +	batch.offset = 0;
> +	gem_execbuf(i915, &eb);
> +	igt_assert_eq_u64(batch.offset, 0);
> +
> +	eb.rsvd1 = parent;
> +	gem_execbuf(i915, &eb);
> +	igt_assert_eq_u64(batch.offset, nonzero_offset);
> +
> +	arg.ctx_id = parent;
> +	gem_context_get_param(i915, &arg);
> +	gem_context_set_param(i915, &arg);
> +
> +	/* Still the same VM, so expect the old VMA again */
> +	batch.offset = 0;
> +	gem_execbuf(i915, &eb);
> +	igt_assert_eq_u64(batch.offset, nonzero_offset);
> +
> +	arg.ctx_id = child;
> +	gem_context_set_param(i915, &arg);
> +
> +	eb.rsvd1 = child;
> +	batch.offset = 0;
> +	gem_execbuf(i915, &eb);
> +	igt_assert_eq_u64(batch.offset, nonzero_offset);
> +
> +	gem_context_destroy(i915, child);
> +	gem_context_destroy(i915, parent);

High level commentary is lacking so my condolences to future readers.

I'd also add a get_vm after set_vm just to check it reads back the same.

Otherwise:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

> +
> +	/* both contexts destroyed, but we still keep hold of the vm */
> +	child = gem_context_create(i915);
> +
> +	arg.ctx_id = child;
> +	gem_context_set_param(i915, &arg);
> +
> +	eb.rsvd1 = child;
> +	batch.offset = 0;
> +	gem_execbuf(i915, &eb);
> +	igt_assert_eq_u64(batch.offset, nonzero_offset);
> +
> +	gem_context_destroy(i915, child);
> +	gem_vm_destroy(i915, arg.value);
> +
> +	gem_sync(i915, batch.handle);
> +	gem_close(i915, batch.handle);
> +}
> +
>   igt_main
>   {
>   	struct drm_i915_gem_context_param arg;
> @@ -253,6 +333,9 @@ igt_main
>   		gem_context_set_param(fd, &arg);
>   	}
>   
> +	igt_subtest("vm")
> +		test_vm(fd);
> +
>   	arg.param = I915_CONTEXT_PARAM_PRIORITY;
>   
>   	igt_subtest("set-priority-not-supported") {
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 04/16] i915/gem_ctx_param: Test set/get (copy) VM
@ 2019-05-14  9:47     ` Tvrtko Ursulin
  0 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-14  9:47 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev, Tvrtko Ursulin


On 08/05/2019 11:09, Chris Wilson wrote:
> Exercise reusing the GTT of one ctx in another.
> 
> v2: Test setting back to the same VM
> v3: Check the VM still exists after the parent ctx are dead.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   tests/i915/gem_ctx_param.c | 107 ++++++++++++++++++++++++++++++++-----
>   1 file changed, 95 insertions(+), 12 deletions(-)
> 
> diff --git a/tests/i915/gem_ctx_param.c b/tests/i915/gem_ctx_param.c
> index b6f57236c..d949cef32 100644
> --- a/tests/i915/gem_ctx_param.c
> +++ b/tests/i915/gem_ctx_param.c
> @@ -28,6 +28,7 @@
>   #include <limits.h>
>   
>   #include "igt.h"
> +#include "i915/gem_vm.h"
>   
>   IGT_TEST_DESCRIPTION("Basic test for context set/get param input validation.");
>   
> @@ -36,17 +37,6 @@ IGT_TEST_DESCRIPTION("Basic test for context set/get param input validation.");
>   #define NEW_CTX	BIT(0)
>   #define USER BIT(1)
>   
> -static int reopen_driver(int fd)
> -{
> -	char path[256];
> -
> -	snprintf(path, sizeof(path), "/proc/self/fd/%d", fd);
> -	fd = open(path, O_RDWR);
> -	igt_assert_lte(0, fd);
> -
> -	return fd;
> -}
> -
>   static void set_priority(int i915)
>   {
>   	static const int64_t test_values[] = {
> @@ -91,7 +81,7 @@ static void set_priority(int i915)
>   	igt_permute_array(values, size, igt_exchange_int64);
>   
>   	igt_fork(flags, NEW_CTX | USER) {
> -		int fd = reopen_driver(i915);
> +		int fd = gem_reopen_driver(i915);
>   		struct drm_i915_gem_context_param arg = {
>   			.param = I915_CONTEXT_PARAM_PRIORITY,
>   			.ctx_id = flags & NEW_CTX ? gem_context_create(fd) : 0,
> @@ -143,6 +133,96 @@ static void set_priority(int i915)
>   	free(values);
>   }
>   
> +static uint32_t __batch_create(int i915, uint32_t offset)
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	uint32_t handle;
> +
> +	handle = gem_create(i915, ALIGN(offset + 4, 4096));
> +	gem_write(i915, handle, offset, &bbe, sizeof(bbe));
> +
> +	return handle;
> +}
> +
> +static uint32_t batch_create(int i915)
> +{
> +	return __batch_create(i915, 0);
> +}
> +
> +static void test_vm(int i915)
> +{
> +	const uint64_t nonzero_offset = 48 << 20;
> +	struct drm_i915_gem_exec_object2 batch = {
> +		.handle = batch_create(i915),
> +	};
> +	struct drm_i915_gem_execbuffer2 eb = {
> +		.buffers_ptr = to_user_pointer(&batch),
> +		.buffer_count = 1,
> +	};
> +	struct drm_i915_gem_context_param arg = {
> +		.param = I915_CONTEXT_PARAM_VM,
> +	};
> +	uint32_t parent, child;
> +
> +	arg.value = -1ull;
> +	igt_require(__gem_context_set_param(i915, &arg) == -ENOENT);
> +
> +	parent = gem_context_create(i915);
> +	child = gem_context_create(i915);
> +
> +	/* Using implicit soft-pinning */
> +	eb.rsvd1 = parent;
> +	batch.offset = nonzero_offset;
> +	gem_execbuf(i915, &eb);
> +	igt_assert_eq_u64(batch.offset, nonzero_offset);
> +
> +	eb.rsvd1 = child;
> +	batch.offset = 0;
> +	gem_execbuf(i915, &eb);
> +	igt_assert_eq_u64(batch.offset, 0);
> +
> +	eb.rsvd1 = parent;
> +	gem_execbuf(i915, &eb);
> +	igt_assert_eq_u64(batch.offset, nonzero_offset);
> +
> +	arg.ctx_id = parent;
> +	gem_context_get_param(i915, &arg);
> +	gem_context_set_param(i915, &arg);
> +
> +	/* Still the same VM, so expect the old VMA again */
> +	batch.offset = 0;
> +	gem_execbuf(i915, &eb);
> +	igt_assert_eq_u64(batch.offset, nonzero_offset);
> +
> +	arg.ctx_id = child;
> +	gem_context_set_param(i915, &arg);
> +
> +	eb.rsvd1 = child;
> +	batch.offset = 0;
> +	gem_execbuf(i915, &eb);
> +	igt_assert_eq_u64(batch.offset, nonzero_offset);
> +
> +	gem_context_destroy(i915, child);
> +	gem_context_destroy(i915, parent);

High level commentary is lacking so my condolences to future readers.

I'd also add a get_vm after set_vm just to check it reads back the same.

Otherwise:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

> +
> +	/* both contexts destroyed, but we still keep hold of the vm */
> +	child = gem_context_create(i915);
> +
> +	arg.ctx_id = child;
> +	gem_context_set_param(i915, &arg);
> +
> +	eb.rsvd1 = child;
> +	batch.offset = 0;
> +	gem_execbuf(i915, &eb);
> +	igt_assert_eq_u64(batch.offset, nonzero_offset);
> +
> +	gem_context_destroy(i915, child);
> +	gem_vm_destroy(i915, arg.value);
> +
> +	gem_sync(i915, batch.handle);
> +	gem_close(i915, batch.handle);
> +}
> +
>   igt_main
>   {
>   	struct drm_i915_gem_context_param arg;
> @@ -253,6 +333,9 @@ igt_main
>   		gem_context_set_param(fd, &arg);
>   	}
>   
> +	igt_subtest("vm")
> +		test_vm(fd);
> +
>   	arg.param = I915_CONTEXT_PARAM_PRIORITY;
>   
>   	igt_subtest("set-priority-not-supported") {
> 
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 05/16] i915/gem_ctx_create: Basic checks for constructor properties
  2019-05-08 10:09   ` [igt-dev] " Chris Wilson
@ 2019-05-14 10:15     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-14 10:15 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/05/2019 11:09, Chris Wilson wrote:
> Check that the extended create interface accepts setparam.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/i915/gem_ctx_create.c | 225 ++++++++++++++++++++++++++++++++++--
>   1 file changed, 213 insertions(+), 12 deletions(-)
> 
> diff --git a/tests/i915/gem_ctx_create.c b/tests/i915/gem_ctx_create.c
> index a664070db..9b4fddbe7 100644
> --- a/tests/i915/gem_ctx_create.c
> +++ b/tests/i915/gem_ctx_create.c
> @@ -33,6 +33,7 @@
>   #include <time.h>
>   
>   #include "igt_rand.h"
> +#include "sw_sync.h"
>   
>   #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
>   #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
> @@ -45,12 +46,33 @@ static unsigned all_nengine;
>   static unsigned ppgtt_engines[16];
>   static unsigned ppgtt_nengine;
>   
> -static int __gem_context_create_local(int fd, struct drm_i915_gem_context_create *arg)
> +static int create_ioctl(int fd, struct drm_i915_gem_context_create *arg)
>   {
> -	int ret = 0;
> -	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg))
> -		ret = -errno;
> -	return ret;
> +	int err;
> +
> +	err = 0;
> +	if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg)) {
> +		err = -errno;
> +		igt_assert(err);
> +	}
> +
> +	errno = 0;
> +	return err;
> +}
> +
> +static int create_ext_ioctl(int i915,
> +			    struct drm_i915_gem_context_create_ext *arg)
> +{
> +	int err;
> +
> +	err = 0;
> +	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
> +		err = -errno;
> +		igt_assume(err);
> +	}
> +
> +	errno = 0;
> +	return err;
>   }
>   
>   static double elapsed(const struct timespec *start,
> @@ -308,6 +330,187 @@ static void maximum(int fd, int ncpus, unsigned mode)
>   	free(contexts);
>   }
>   
> +static void basic_ext_param(int i915)
> +{
> +	struct drm_i915_gem_context_create_ext_setparam ext = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS
> +	};
> +	struct drm_i915_gem_context_param get;
> +
> +	igt_require(create_ext_ioctl(i915, &create) == 0);
> +	gem_context_destroy(i915, create.ctx_id);
> +
> +	create.extensions = -1ull;
> +	igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
> +
> +	create.extensions = to_user_pointer(&ext);
> +	igt_assert_eq(create_ext_ioctl(i915, &create), -EINVAL);
> +
> +	ext.param.param = I915_CONTEXT_PARAM_PRIORITY;
> +	if (create_ext_ioctl(i915, &create) != -ENODEV) {
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		ext.base.next_extension = -1ull;
> +		igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
> +		ext.base.next_extension = to_user_pointer(&ext);
> +		igt_assert_eq(create_ext_ioctl(i915, &create), -E2BIG);
> +		ext.base.next_extension = 0;
> +
> +		ext.param.value = 32;
> +		igt_assert_eq(create_ext_ioctl(i915, &create), 0);
> +
> +		memset(&get, 0, sizeof(get));
> +		get.ctx_id = create.ctx_id;
> +		get.param = I915_CONTEXT_PARAM_PRIORITY;
> +		gem_context_get_param(i915, &get);
> +		igt_assert_eq(get.value, ext.param.value);
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +	}
> +}
> +
> +static void check_single_timeline(int i915, uint32_t ctx, int num_engines)
> +{
> +#define RCS_TIMESTAMP (0x2000 + 0x358)
> +	const int gen = intel_gen(intel_get_drm_devid(i915));
> +	const int has_64bit_reloc = gen >= 8;
> +	struct drm_i915_gem_exec_object2 results = { .handle = gem_create(i915, 4096) };
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	int timeline = sw_sync_timeline_create();
> +	uint32_t last, *map;
> +
> +	{
> +		struct drm_i915_gem_execbuffer2 execbuf = {
> +			.buffers_ptr = to_user_pointer(&results),
> +			.buffer_count = 1,
> +			.rsvd1 = ctx,
> +		};
> +		gem_write(i915, results.handle, 0, &bbe, sizeof(bbe));
> +		gem_execbuf(i915, &execbuf);
> +		results.flags = EXEC_OBJECT_PINNED;
> +	}
> +
> +	for (int i = 0; i < num_engines; i++) {
> +		struct drm_i915_gem_exec_object2 obj[2] = {
> +			results, /* write hazard lies! */
> +			{ .handle = gem_create(i915, 4096) },
> +		};
> +		struct drm_i915_gem_execbuffer2 execbuf = {
> +			.buffers_ptr = to_user_pointer(obj),
> +			.buffer_count = 2,
> +			.rsvd1 = ctx,
> +			.rsvd2 = sw_sync_timeline_create_fence(timeline, num_engines - i),
> +			.flags = i | I915_EXEC_FENCE_IN,
> +		};
> +		uint64_t offset = results.offset + 4 * i;
> +		uint32_t *cs;
> +		int j = 0;
> +
> +		cs = gem_mmap__cpu(i915, obj[1].handle, 0, 4096, PROT_WRITE);
> +
> +		cs[j] = 0x24 << 23 | 1; /* SRM */
> +		if (has_64bit_reloc)
> +			cs[j]++;
> +		j++;
> +		cs[j++] = RCS_TIMESTAMP;
> +		cs[j++] = offset;
> +		if (has_64bit_reloc)
> +			cs[j++] = offset >> 32;
> +		cs[j++] = MI_BATCH_BUFFER_END;
> +
> +		munmap(cs, 4096);
> +
> +		gem_execbuf(i915, &execbuf);
> +		gem_close(i915, obj[1].handle);
> +		close(execbuf.rsvd2);
> +	}
> +	close(timeline);
> +	gem_sync(i915, results.handle);
> +
> +	map = gem_mmap__cpu(i915, results.handle, 0, 4096, PROT_READ);
> +	gem_set_domain(i915, results.handle, I915_GEM_DOMAIN_CPU, 0);
> +	gem_close(i915, results.handle);
> +
> +	last = map[0];
> +	for (int i = 1; i < num_engines; i++) {
> +		igt_assert_f((map[i] - last) > 0,
> +			     "Engine instance [%d] executed too early: this:%x, last:%x\n",
> +			     i, map[i], last);
> +		last = map[i];
> +	}

Hm.. aren't two sw fences (two seqnos) just a needless complication - 
since the execution order in the single timeline is controlled by 
submission order. The statement is true only when compounded with the 
fact that you signal both fences at the same time. I am thinking that if 
it wasn't a single timeline context what would happen. Fences would be 
signaled in order, but execution does not have to happen in order. That 
it does is a property of single timeline and not fence ordering. So two 
input fences with two seqnos is misleading. Single plug would do I think

Or you are thinking to nudge the driver to do the right thing? But in 
that case I think you'd need to manually advance the first seqno (2nd 
batch) first and wait a bit to check it hasn't been execute. Then signal 
the second seqno (first batch) and run the above check to see they have 
been executed in order.

> +	munmap(map, 4096);
> +}
> +
> +static void iris_pipeline(int i915)
> +{
> +#ifdef I915_DEFINE_CONTEXT_PARAM_ENGINES

Remove this I expect?

> +#define RCS0 {0, 0}
> +	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = {
> +		.engines = { RCS0, RCS0 }
> +	};
> +	struct drm_i915_gem_context_create_ext_setparam p_engines = {
> +		.base = {
> +			.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
> +			.next_extension = 0, /* end of chain */
> +		},
> +		.param = {
> +			.param = I915_CONTEXT_PARAM_ENGINES,
> +			.value = to_user_pointer(&engines),
> +			.size = sizeof(engines),
> +		},
> +	};
> +	struct drm_i915_gem_context_create_ext_setparam p_recover = {
> +		.base = {
> +			.name =I915_CONTEXT_CREATE_EXT_SETPARAM,
> +			.next_extension = to_user_pointer(&p_engines),
> +		},
> +		.param = {
> +			.param = I915_CONTEXT_PARAM_RECOVERABLE,
> +			.value = 0,
> +		},
> +	};
> +	struct drm_i915_gem_context_create_ext_setparam p_prio = {
> +		.base = {
> +			.name =I915_CONTEXT_CREATE_EXT_SETPARAM,
> +			.next_extension = to_user_pointer(&p_recover),
> +		},
> +		.param = {
> +			.param = I915_CONTEXT_PARAM_PRIORITY,
> +			.value = 768,
> +		},
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = (I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE |
> +			  I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS),
> +	};
> +	struct drm_i915_gem_context_param get;
> +
> +	igt_require(create_ext_ioctl(i915, &create) == 0);

Context destroy here I think.

> +
> +	create.extensions = to_user_pointer(&p_prio);
> +	igt_assert_eq(create_ext_ioctl(i915, &create), 0);
> +
> +	memset(&get, 0, sizeof(get));
> +	get.ctx_id = create.ctx_id;
> +	get.param = I915_CONTEXT_PARAM_PRIORITY;
> +	gem_context_get_param(i915, &get);
> +	igt_assert_eq(get.value, p_prio.param.value);
> +
> +	memset(&get, 0, sizeof(get));
> +	get.ctx_id = create.ctx_id;
> +	get.param = I915_CONTEXT_PARAM_RECOVERABLE;
> +	gem_context_get_param(i915, &get);
> +	igt_assert_eq(get.value, 0);
> +
> +	check_single_timeline(i915, create.ctx_id, 2);
> +
> +	gem_context_destroy(i915, create.ctx_id);
> +#endif /* I915_DEFINE_CONTEXT_PARAM_ENGINES */
> +}
> +
>   igt_main
>   {
>   	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> @@ -340,17 +543,15 @@ igt_main
>   		memset(&create, 0, sizeof(create));
>   		create.ctx_id = rand();
>   		create.pad = 0;
> -		igt_assert_eq(__gem_context_create_local(fd, &create), 0);
> +		igt_assert_eq(create_ioctl(fd, &create), 0);
>   		igt_assert(create.ctx_id != 0);
>   		gem_context_destroy(fd, create.ctx_id);
>   	}
>   
> -	igt_subtest("invalid-pad") {
> -		memset(&create, 0, sizeof(create));
> -		create.ctx_id = rand();
> -		create.pad = 1;
> -		igt_assert_eq(__gem_context_create_local(fd, &create), -EINVAL);
> -	}
> +	igt_subtest("ext-param")
> +		basic_ext_param(fd);

basic-ext-param? Do we even rely on basic prefix these days?

> +	igt_subtest("iris-pipeline")
> +		iris_pipeline(fd);
>   
>   	igt_subtest("maximum-mem")
>   		maximum(fd, ncpus, CHECK_RAM);
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 05/16] i915/gem_ctx_create: Basic checks for constructor properties
@ 2019-05-14 10:15     ` Tvrtko Ursulin
  0 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-14 10:15 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/05/2019 11:09, Chris Wilson wrote:
> Check that the extended create interface accepts setparam.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/i915/gem_ctx_create.c | 225 ++++++++++++++++++++++++++++++++++--
>   1 file changed, 213 insertions(+), 12 deletions(-)
> 
> diff --git a/tests/i915/gem_ctx_create.c b/tests/i915/gem_ctx_create.c
> index a664070db..9b4fddbe7 100644
> --- a/tests/i915/gem_ctx_create.c
> +++ b/tests/i915/gem_ctx_create.c
> @@ -33,6 +33,7 @@
>   #include <time.h>
>   
>   #include "igt_rand.h"
> +#include "sw_sync.h"
>   
>   #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
>   #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
> @@ -45,12 +46,33 @@ static unsigned all_nengine;
>   static unsigned ppgtt_engines[16];
>   static unsigned ppgtt_nengine;
>   
> -static int __gem_context_create_local(int fd, struct drm_i915_gem_context_create *arg)
> +static int create_ioctl(int fd, struct drm_i915_gem_context_create *arg)
>   {
> -	int ret = 0;
> -	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg))
> -		ret = -errno;
> -	return ret;
> +	int err;
> +
> +	err = 0;
> +	if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg)) {
> +		err = -errno;
> +		igt_assert(err);
> +	}
> +
> +	errno = 0;
> +	return err;
> +}
> +
> +static int create_ext_ioctl(int i915,
> +			    struct drm_i915_gem_context_create_ext *arg)
> +{
> +	int err;
> +
> +	err = 0;
> +	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
> +		err = -errno;
> +		igt_assume(err);
> +	}
> +
> +	errno = 0;
> +	return err;
>   }
>   
>   static double elapsed(const struct timespec *start,
> @@ -308,6 +330,187 @@ static void maximum(int fd, int ncpus, unsigned mode)
>   	free(contexts);
>   }
>   
> +static void basic_ext_param(int i915)
> +{
> +	struct drm_i915_gem_context_create_ext_setparam ext = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS
> +	};
> +	struct drm_i915_gem_context_param get;
> +
> +	igt_require(create_ext_ioctl(i915, &create) == 0);
> +	gem_context_destroy(i915, create.ctx_id);
> +
> +	create.extensions = -1ull;
> +	igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
> +
> +	create.extensions = to_user_pointer(&ext);
> +	igt_assert_eq(create_ext_ioctl(i915, &create), -EINVAL);
> +
> +	ext.param.param = I915_CONTEXT_PARAM_PRIORITY;
> +	if (create_ext_ioctl(i915, &create) != -ENODEV) {
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		ext.base.next_extension = -1ull;
> +		igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
> +		ext.base.next_extension = to_user_pointer(&ext);
> +		igt_assert_eq(create_ext_ioctl(i915, &create), -E2BIG);
> +		ext.base.next_extension = 0;
> +
> +		ext.param.value = 32;
> +		igt_assert_eq(create_ext_ioctl(i915, &create), 0);
> +
> +		memset(&get, 0, sizeof(get));
> +		get.ctx_id = create.ctx_id;
> +		get.param = I915_CONTEXT_PARAM_PRIORITY;
> +		gem_context_get_param(i915, &get);
> +		igt_assert_eq(get.value, ext.param.value);
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +	}
> +}
> +
> +static void check_single_timeline(int i915, uint32_t ctx, int num_engines)
> +{
> +#define RCS_TIMESTAMP (0x2000 + 0x358)
> +	const int gen = intel_gen(intel_get_drm_devid(i915));
> +	const int has_64bit_reloc = gen >= 8;
> +	struct drm_i915_gem_exec_object2 results = { .handle = gem_create(i915, 4096) };
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	int timeline = sw_sync_timeline_create();
> +	uint32_t last, *map;
> +
> +	{
> +		struct drm_i915_gem_execbuffer2 execbuf = {
> +			.buffers_ptr = to_user_pointer(&results),
> +			.buffer_count = 1,
> +			.rsvd1 = ctx,
> +		};
> +		gem_write(i915, results.handle, 0, &bbe, sizeof(bbe));
> +		gem_execbuf(i915, &execbuf);
> +		results.flags = EXEC_OBJECT_PINNED;
> +	}
> +
> +	for (int i = 0; i < num_engines; i++) {
> +		struct drm_i915_gem_exec_object2 obj[2] = {
> +			results, /* write hazard lies! */
> +			{ .handle = gem_create(i915, 4096) },
> +		};
> +		struct drm_i915_gem_execbuffer2 execbuf = {
> +			.buffers_ptr = to_user_pointer(obj),
> +			.buffer_count = 2,
> +			.rsvd1 = ctx,
> +			.rsvd2 = sw_sync_timeline_create_fence(timeline, num_engines - i),
> +			.flags = i | I915_EXEC_FENCE_IN,
> +		};
> +		uint64_t offset = results.offset + 4 * i;
> +		uint32_t *cs;
> +		int j = 0;
> +
> +		cs = gem_mmap__cpu(i915, obj[1].handle, 0, 4096, PROT_WRITE);
> +
> +		cs[j] = 0x24 << 23 | 1; /* SRM */
> +		if (has_64bit_reloc)
> +			cs[j]++;
> +		j++;
> +		cs[j++] = RCS_TIMESTAMP;
> +		cs[j++] = offset;
> +		if (has_64bit_reloc)
> +			cs[j++] = offset >> 32;
> +		cs[j++] = MI_BATCH_BUFFER_END;
> +
> +		munmap(cs, 4096);
> +
> +		gem_execbuf(i915, &execbuf);
> +		gem_close(i915, obj[1].handle);
> +		close(execbuf.rsvd2);
> +	}
> +	close(timeline);
> +	gem_sync(i915, results.handle);
> +
> +	map = gem_mmap__cpu(i915, results.handle, 0, 4096, PROT_READ);
> +	gem_set_domain(i915, results.handle, I915_GEM_DOMAIN_CPU, 0);
> +	gem_close(i915, results.handle);
> +
> +	last = map[0];
> +	for (int i = 1; i < num_engines; i++) {
> +		igt_assert_f((map[i] - last) > 0,
> +			     "Engine instance [%d] executed too early: this:%x, last:%x\n",
> +			     i, map[i], last);
> +		last = map[i];
> +	}

Hm.. aren't two sw fences (two seqnos) just a needless complication - 
since the execution order in the single timeline is controlled by 
submission order. The statement is true only when compounded with the 
fact that you signal both fences at the same time. I am thinking that if 
it wasn't a single timeline context what would happen. Fences would be 
signaled in order, but execution does not have to happen in order. That 
it does is a property of single timeline and not fence ordering. So two 
input fences with two seqnos is misleading. Single plug would do I think

Or you are thinking to nudge the driver to do the right thing? But in 
that case I think you'd need to manually advance the first seqno (2nd 
batch) first and wait a bit to check it hasn't been execute. Then signal 
the second seqno (first batch) and run the above check to see they have 
been executed in order.

> +	munmap(map, 4096);
> +}
> +
> +static void iris_pipeline(int i915)
> +{
> +#ifdef I915_DEFINE_CONTEXT_PARAM_ENGINES

Remove this I expect?

> +#define RCS0 {0, 0}
> +	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = {
> +		.engines = { RCS0, RCS0 }
> +	};
> +	struct drm_i915_gem_context_create_ext_setparam p_engines = {
> +		.base = {
> +			.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
> +			.next_extension = 0, /* end of chain */
> +		},
> +		.param = {
> +			.param = I915_CONTEXT_PARAM_ENGINES,
> +			.value = to_user_pointer(&engines),
> +			.size = sizeof(engines),
> +		},
> +	};
> +	struct drm_i915_gem_context_create_ext_setparam p_recover = {
> +		.base = {
> +			.name =I915_CONTEXT_CREATE_EXT_SETPARAM,
> +			.next_extension = to_user_pointer(&p_engines),
> +		},
> +		.param = {
> +			.param = I915_CONTEXT_PARAM_RECOVERABLE,
> +			.value = 0,
> +		},
> +	};
> +	struct drm_i915_gem_context_create_ext_setparam p_prio = {
> +		.base = {
> +			.name =I915_CONTEXT_CREATE_EXT_SETPARAM,
> +			.next_extension = to_user_pointer(&p_recover),
> +		},
> +		.param = {
> +			.param = I915_CONTEXT_PARAM_PRIORITY,
> +			.value = 768,
> +		},
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = (I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE |
> +			  I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS),
> +	};
> +	struct drm_i915_gem_context_param get;
> +
> +	igt_require(create_ext_ioctl(i915, &create) == 0);

Context destroy here I think.

> +
> +	create.extensions = to_user_pointer(&p_prio);
> +	igt_assert_eq(create_ext_ioctl(i915, &create), 0);
> +
> +	memset(&get, 0, sizeof(get));
> +	get.ctx_id = create.ctx_id;
> +	get.param = I915_CONTEXT_PARAM_PRIORITY;
> +	gem_context_get_param(i915, &get);
> +	igt_assert_eq(get.value, p_prio.param.value);
> +
> +	memset(&get, 0, sizeof(get));
> +	get.ctx_id = create.ctx_id;
> +	get.param = I915_CONTEXT_PARAM_RECOVERABLE;
> +	gem_context_get_param(i915, &get);
> +	igt_assert_eq(get.value, 0);
> +
> +	check_single_timeline(i915, create.ctx_id, 2);
> +
> +	gem_context_destroy(i915, create.ctx_id);
> +#endif /* I915_DEFINE_CONTEXT_PARAM_ENGINES */
> +}
> +
>   igt_main
>   {
>   	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> @@ -340,17 +543,15 @@ igt_main
>   		memset(&create, 0, sizeof(create));
>   		create.ctx_id = rand();
>   		create.pad = 0;
> -		igt_assert_eq(__gem_context_create_local(fd, &create), 0);
> +		igt_assert_eq(create_ioctl(fd, &create), 0);
>   		igt_assert(create.ctx_id != 0);
>   		gem_context_destroy(fd, create.ctx_id);
>   	}
>   
> -	igt_subtest("invalid-pad") {
> -		memset(&create, 0, sizeof(create));
> -		create.ctx_id = rand();
> -		create.pad = 1;
> -		igt_assert_eq(__gem_context_create_local(fd, &create), -EINVAL);
> -	}
> +	igt_subtest("ext-param")
> +		basic_ext_param(fd);

basic-ext-param? Do we even rely on basic prefix these days?

> +	igt_subtest("iris-pipeline")
> +		iris_pipeline(fd);
>   
>   	igt_subtest("maximum-mem")
>   		maximum(fd, ncpus, CHECK_RAM);
> 

Regards,

Tvrtko
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 05/16] i915/gem_ctx_create: Basic checks for constructor properties
  2019-05-08 10:09   ` [igt-dev] " Chris Wilson
@ 2019-05-14 12:27     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-14 12:27 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/05/2019 11:09, Chris Wilson wrote:
> Check that the extended create interface accepts setparam.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/i915/gem_ctx_create.c | 225 ++++++++++++++++++++++++++++++++++--
>   1 file changed, 213 insertions(+), 12 deletions(-)
> 
> diff --git a/tests/i915/gem_ctx_create.c b/tests/i915/gem_ctx_create.c
> index a664070db..9b4fddbe7 100644
> --- a/tests/i915/gem_ctx_create.c
> +++ b/tests/i915/gem_ctx_create.c
> @@ -33,6 +33,7 @@
>   #include <time.h>
>   
>   #include "igt_rand.h"
> +#include "sw_sync.h"
>   
>   #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
>   #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
> @@ -45,12 +46,33 @@ static unsigned all_nengine;
>   static unsigned ppgtt_engines[16];
>   static unsigned ppgtt_nengine;
>   
> -static int __gem_context_create_local(int fd, struct drm_i915_gem_context_create *arg)
> +static int create_ioctl(int fd, struct drm_i915_gem_context_create *arg)
>   {
> -	int ret = 0;
> -	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg))
> -		ret = -errno;
> -	return ret;
> +	int err;
> +
> +	err = 0;
> +	if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg)) {
> +		err = -errno;
> +		igt_assert(err);
> +	}
> +
> +	errno = 0;
> +	return err;
> +}
> +
> +static int create_ext_ioctl(int i915,
> +			    struct drm_i915_gem_context_create_ext *arg)
> +{
> +	int err;
> +
> +	err = 0;
> +	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
> +		err = -errno;
> +		igt_assume(err);
> +	}
> +
> +	errno = 0;
> +	return err;
>   }
>   
>   static double elapsed(const struct timespec *start,
> @@ -308,6 +330,187 @@ static void maximum(int fd, int ncpus, unsigned mode)
>   	free(contexts);
>   }
>   
> +static void basic_ext_param(int i915)
> +{
> +	struct drm_i915_gem_context_create_ext_setparam ext = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS
> +	};
> +	struct drm_i915_gem_context_param get;
> +
> +	igt_require(create_ext_ioctl(i915, &create) == 0);
> +	gem_context_destroy(i915, create.ctx_id);
> +
> +	create.extensions = -1ull;
> +	igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
> +
> +	create.extensions = to_user_pointer(&ext);
> +	igt_assert_eq(create_ext_ioctl(i915, &create), -EINVAL);

I think this is the unknown param, right?

Need another -EINVAL test for non-zero ext.ctx_id.

Regards,

Tvrtko

> +
> +	ext.param.param = I915_CONTEXT_PARAM_PRIORITY;
> +	if (create_ext_ioctl(i915, &create) != -ENODEV) {
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		ext.base.next_extension = -1ull;
> +		igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
> +		ext.base.next_extension = to_user_pointer(&ext);
> +		igt_assert_eq(create_ext_ioctl(i915, &create), -E2BIG);
> +		ext.base.next_extension = 0;
> +
> +		ext.param.value = 32;
> +		igt_assert_eq(create_ext_ioctl(i915, &create), 0);
> +
> +		memset(&get, 0, sizeof(get));
> +		get.ctx_id = create.ctx_id;
> +		get.param = I915_CONTEXT_PARAM_PRIORITY;
> +		gem_context_get_param(i915, &get);
> +		igt_assert_eq(get.value, ext.param.value);
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +	}
> +}
> +
> +static void check_single_timeline(int i915, uint32_t ctx, int num_engines)
> +{
> +#define RCS_TIMESTAMP (0x2000 + 0x358)
> +	const int gen = intel_gen(intel_get_drm_devid(i915));
> +	const int has_64bit_reloc = gen >= 8;
> +	struct drm_i915_gem_exec_object2 results = { .handle = gem_create(i915, 4096) };
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	int timeline = sw_sync_timeline_create();
> +	uint32_t last, *map;
> +
> +	{
> +		struct drm_i915_gem_execbuffer2 execbuf = {
> +			.buffers_ptr = to_user_pointer(&results),
> +			.buffer_count = 1,
> +			.rsvd1 = ctx,
> +		};
> +		gem_write(i915, results.handle, 0, &bbe, sizeof(bbe));
> +		gem_execbuf(i915, &execbuf);
> +		results.flags = EXEC_OBJECT_PINNED;
> +	}
> +
> +	for (int i = 0; i < num_engines; i++) {
> +		struct drm_i915_gem_exec_object2 obj[2] = {
> +			results, /* write hazard lies! */
> +			{ .handle = gem_create(i915, 4096) },
> +		};
> +		struct drm_i915_gem_execbuffer2 execbuf = {
> +			.buffers_ptr = to_user_pointer(obj),
> +			.buffer_count = 2,
> +			.rsvd1 = ctx,
> +			.rsvd2 = sw_sync_timeline_create_fence(timeline, num_engines - i),
> +			.flags = i | I915_EXEC_FENCE_IN,
> +		};
> +		uint64_t offset = results.offset + 4 * i;
> +		uint32_t *cs;
> +		int j = 0;
> +
> +		cs = gem_mmap__cpu(i915, obj[1].handle, 0, 4096, PROT_WRITE);
> +
> +		cs[j] = 0x24 << 23 | 1; /* SRM */
> +		if (has_64bit_reloc)
> +			cs[j]++;
> +		j++;
> +		cs[j++] = RCS_TIMESTAMP;
> +		cs[j++] = offset;
> +		if (has_64bit_reloc)
> +			cs[j++] = offset >> 32;
> +		cs[j++] = MI_BATCH_BUFFER_END;
> +
> +		munmap(cs, 4096);
> +
> +		gem_execbuf(i915, &execbuf);
> +		gem_close(i915, obj[1].handle);
> +		close(execbuf.rsvd2);
> +	}
> +	close(timeline);
> +	gem_sync(i915, results.handle);
> +
> +	map = gem_mmap__cpu(i915, results.handle, 0, 4096, PROT_READ);
> +	gem_set_domain(i915, results.handle, I915_GEM_DOMAIN_CPU, 0);
> +	gem_close(i915, results.handle);
> +
> +	last = map[0];
> +	for (int i = 1; i < num_engines; i++) {
> +		igt_assert_f((map[i] - last) > 0,
> +			     "Engine instance [%d] executed too early: this:%x, last:%x\n",
> +			     i, map[i], last);
> +		last = map[i];
> +	}
> +	munmap(map, 4096);
> +}
> +
> +static void iris_pipeline(int i915)
> +{
> +#ifdef I915_DEFINE_CONTEXT_PARAM_ENGINES
> +#define RCS0 {0, 0}
> +	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = {
> +		.engines = { RCS0, RCS0 }
> +	};
> +	struct drm_i915_gem_context_create_ext_setparam p_engines = {
> +		.base = {
> +			.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
> +			.next_extension = 0, /* end of chain */
> +		},
> +		.param = {
> +			.param = I915_CONTEXT_PARAM_ENGINES,
> +			.value = to_user_pointer(&engines),
> +			.size = sizeof(engines),
> +		},
> +	};
> +	struct drm_i915_gem_context_create_ext_setparam p_recover = {
> +		.base = {
> +			.name =I915_CONTEXT_CREATE_EXT_SETPARAM,
> +			.next_extension = to_user_pointer(&p_engines),
> +		},
> +		.param = {
> +			.param = I915_CONTEXT_PARAM_RECOVERABLE,
> +			.value = 0,
> +		},
> +	};
> +	struct drm_i915_gem_context_create_ext_setparam p_prio = {
> +		.base = {
> +			.name =I915_CONTEXT_CREATE_EXT_SETPARAM,
> +			.next_extension = to_user_pointer(&p_recover),
> +		},
> +		.param = {
> +			.param = I915_CONTEXT_PARAM_PRIORITY,
> +			.value = 768,
> +		},
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = (I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE |
> +			  I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS),
> +	};
> +	struct drm_i915_gem_context_param get;
> +
> +	igt_require(create_ext_ioctl(i915, &create) == 0);
> +
> +	create.extensions = to_user_pointer(&p_prio);
> +	igt_assert_eq(create_ext_ioctl(i915, &create), 0);
> +
> +	memset(&get, 0, sizeof(get));
> +	get.ctx_id = create.ctx_id;
> +	get.param = I915_CONTEXT_PARAM_PRIORITY;
> +	gem_context_get_param(i915, &get);
> +	igt_assert_eq(get.value, p_prio.param.value);
> +
> +	memset(&get, 0, sizeof(get));
> +	get.ctx_id = create.ctx_id;
> +	get.param = I915_CONTEXT_PARAM_RECOVERABLE;
> +	gem_context_get_param(i915, &get);
> +	igt_assert_eq(get.value, 0);
> +
> +	check_single_timeline(i915, create.ctx_id, 2);
> +
> +	gem_context_destroy(i915, create.ctx_id);
> +#endif /* I915_DEFINE_CONTEXT_PARAM_ENGINES */
> +}
> +
>   igt_main
>   {
>   	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> @@ -340,17 +543,15 @@ igt_main
>   		memset(&create, 0, sizeof(create));
>   		create.ctx_id = rand();
>   		create.pad = 0;
> -		igt_assert_eq(__gem_context_create_local(fd, &create), 0);
> +		igt_assert_eq(create_ioctl(fd, &create), 0);
>   		igt_assert(create.ctx_id != 0);
>   		gem_context_destroy(fd, create.ctx_id);
>   	}
>   
> -	igt_subtest("invalid-pad") {
> -		memset(&create, 0, sizeof(create));
> -		create.ctx_id = rand();
> -		create.pad = 1;
> -		igt_assert_eq(__gem_context_create_local(fd, &create), -EINVAL);
> -	}
> +	igt_subtest("ext-param")
> +		basic_ext_param(fd);
> +	igt_subtest("iris-pipeline")
> +		iris_pipeline(fd);
>   
>   	igt_subtest("maximum-mem")
>   		maximum(fd, ncpus, CHECK_RAM);
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 05/16] i915/gem_ctx_create: Basic checks for constructor properties
@ 2019-05-14 12:27     ` Tvrtko Ursulin
  0 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-14 12:27 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/05/2019 11:09, Chris Wilson wrote:
> Check that the extended create interface accepts setparam.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/i915/gem_ctx_create.c | 225 ++++++++++++++++++++++++++++++++++--
>   1 file changed, 213 insertions(+), 12 deletions(-)
> 
> diff --git a/tests/i915/gem_ctx_create.c b/tests/i915/gem_ctx_create.c
> index a664070db..9b4fddbe7 100644
> --- a/tests/i915/gem_ctx_create.c
> +++ b/tests/i915/gem_ctx_create.c
> @@ -33,6 +33,7 @@
>   #include <time.h>
>   
>   #include "igt_rand.h"
> +#include "sw_sync.h"
>   
>   #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
>   #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
> @@ -45,12 +46,33 @@ static unsigned all_nengine;
>   static unsigned ppgtt_engines[16];
>   static unsigned ppgtt_nengine;
>   
> -static int __gem_context_create_local(int fd, struct drm_i915_gem_context_create *arg)
> +static int create_ioctl(int fd, struct drm_i915_gem_context_create *arg)
>   {
> -	int ret = 0;
> -	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg))
> -		ret = -errno;
> -	return ret;
> +	int err;
> +
> +	err = 0;
> +	if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg)) {
> +		err = -errno;
> +		igt_assert(err);
> +	}
> +
> +	errno = 0;
> +	return err;
> +}
> +
> +static int create_ext_ioctl(int i915,
> +			    struct drm_i915_gem_context_create_ext *arg)
> +{
> +	int err;
> +
> +	err = 0;
> +	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
> +		err = -errno;
> +		igt_assume(err);
> +	}
> +
> +	errno = 0;
> +	return err;
>   }
>   
>   static double elapsed(const struct timespec *start,
> @@ -308,6 +330,187 @@ static void maximum(int fd, int ncpus, unsigned mode)
>   	free(contexts);
>   }
>   
> +static void basic_ext_param(int i915)
> +{
> +	struct drm_i915_gem_context_create_ext_setparam ext = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS
> +	};
> +	struct drm_i915_gem_context_param get;
> +
> +	igt_require(create_ext_ioctl(i915, &create) == 0);
> +	gem_context_destroy(i915, create.ctx_id);
> +
> +	create.extensions = -1ull;
> +	igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
> +
> +	create.extensions = to_user_pointer(&ext);
> +	igt_assert_eq(create_ext_ioctl(i915, &create), -EINVAL);

I think this is the unknown param, right?

Need another -EINVAL test for non-zero ext.ctx_id.

Regards,

Tvrtko

> +
> +	ext.param.param = I915_CONTEXT_PARAM_PRIORITY;
> +	if (create_ext_ioctl(i915, &create) != -ENODEV) {
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		ext.base.next_extension = -1ull;
> +		igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
> +		ext.base.next_extension = to_user_pointer(&ext);
> +		igt_assert_eq(create_ext_ioctl(i915, &create), -E2BIG);
> +		ext.base.next_extension = 0;
> +
> +		ext.param.value = 32;
> +		igt_assert_eq(create_ext_ioctl(i915, &create), 0);
> +
> +		memset(&get, 0, sizeof(get));
> +		get.ctx_id = create.ctx_id;
> +		get.param = I915_CONTEXT_PARAM_PRIORITY;
> +		gem_context_get_param(i915, &get);
> +		igt_assert_eq(get.value, ext.param.value);
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +	}
> +}
> +
> +static void check_single_timeline(int i915, uint32_t ctx, int num_engines)
> +{
> +#define RCS_TIMESTAMP (0x2000 + 0x358)
> +	const int gen = intel_gen(intel_get_drm_devid(i915));
> +	const int has_64bit_reloc = gen >= 8;
> +	struct drm_i915_gem_exec_object2 results = { .handle = gem_create(i915, 4096) };
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	int timeline = sw_sync_timeline_create();
> +	uint32_t last, *map;
> +
> +	{
> +		struct drm_i915_gem_execbuffer2 execbuf = {
> +			.buffers_ptr = to_user_pointer(&results),
> +			.buffer_count = 1,
> +			.rsvd1 = ctx,
> +		};
> +		gem_write(i915, results.handle, 0, &bbe, sizeof(bbe));
> +		gem_execbuf(i915, &execbuf);
> +		results.flags = EXEC_OBJECT_PINNED;
> +	}
> +
> +	for (int i = 0; i < num_engines; i++) {
> +		struct drm_i915_gem_exec_object2 obj[2] = {
> +			results, /* write hazard lies! */
> +			{ .handle = gem_create(i915, 4096) },
> +		};
> +		struct drm_i915_gem_execbuffer2 execbuf = {
> +			.buffers_ptr = to_user_pointer(obj),
> +			.buffer_count = 2,
> +			.rsvd1 = ctx,
> +			.rsvd2 = sw_sync_timeline_create_fence(timeline, num_engines - i),
> +			.flags = i | I915_EXEC_FENCE_IN,
> +		};
> +		uint64_t offset = results.offset + 4 * i;
> +		uint32_t *cs;
> +		int j = 0;
> +
> +		cs = gem_mmap__cpu(i915, obj[1].handle, 0, 4096, PROT_WRITE);
> +
> +		cs[j] = 0x24 << 23 | 1; /* SRM */
> +		if (has_64bit_reloc)
> +			cs[j]++;
> +		j++;
> +		cs[j++] = RCS_TIMESTAMP;
> +		cs[j++] = offset;
> +		if (has_64bit_reloc)
> +			cs[j++] = offset >> 32;
> +		cs[j++] = MI_BATCH_BUFFER_END;
> +
> +		munmap(cs, 4096);
> +
> +		gem_execbuf(i915, &execbuf);
> +		gem_close(i915, obj[1].handle);
> +		close(execbuf.rsvd2);
> +	}
> +	close(timeline);
> +	gem_sync(i915, results.handle);
> +
> +	map = gem_mmap__cpu(i915, results.handle, 0, 4096, PROT_READ);
> +	gem_set_domain(i915, results.handle, I915_GEM_DOMAIN_CPU, 0);
> +	gem_close(i915, results.handle);
> +
> +	last = map[0];
> +	for (int i = 1; i < num_engines; i++) {
> +		igt_assert_f((map[i] - last) > 0,
> +			     "Engine instance [%d] executed too early: this:%x, last:%x\n",
> +			     i, map[i], last);
> +		last = map[i];
> +	}
> +	munmap(map, 4096);
> +}
> +
> +static void iris_pipeline(int i915)
> +{
> +#ifdef I915_DEFINE_CONTEXT_PARAM_ENGINES
> +#define RCS0 {0, 0}
> +	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = {
> +		.engines = { RCS0, RCS0 }
> +	};
> +	struct drm_i915_gem_context_create_ext_setparam p_engines = {
> +		.base = {
> +			.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
> +			.next_extension = 0, /* end of chain */
> +		},
> +		.param = {
> +			.param = I915_CONTEXT_PARAM_ENGINES,
> +			.value = to_user_pointer(&engines),
> +			.size = sizeof(engines),
> +		},
> +	};
> +	struct drm_i915_gem_context_create_ext_setparam p_recover = {
> +		.base = {
> +			.name =I915_CONTEXT_CREATE_EXT_SETPARAM,
> +			.next_extension = to_user_pointer(&p_engines),
> +		},
> +		.param = {
> +			.param = I915_CONTEXT_PARAM_RECOVERABLE,
> +			.value = 0,
> +		},
> +	};
> +	struct drm_i915_gem_context_create_ext_setparam p_prio = {
> +		.base = {
> +			.name =I915_CONTEXT_CREATE_EXT_SETPARAM,
> +			.next_extension = to_user_pointer(&p_recover),
> +		},
> +		.param = {
> +			.param = I915_CONTEXT_PARAM_PRIORITY,
> +			.value = 768,
> +		},
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = (I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE |
> +			  I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS),
> +	};
> +	struct drm_i915_gem_context_param get;
> +
> +	igt_require(create_ext_ioctl(i915, &create) == 0);
> +
> +	create.extensions = to_user_pointer(&p_prio);
> +	igt_assert_eq(create_ext_ioctl(i915, &create), 0);
> +
> +	memset(&get, 0, sizeof(get));
> +	get.ctx_id = create.ctx_id;
> +	get.param = I915_CONTEXT_PARAM_PRIORITY;
> +	gem_context_get_param(i915, &get);
> +	igt_assert_eq(get.value, p_prio.param.value);
> +
> +	memset(&get, 0, sizeof(get));
> +	get.ctx_id = create.ctx_id;
> +	get.param = I915_CONTEXT_PARAM_RECOVERABLE;
> +	gem_context_get_param(i915, &get);
> +	igt_assert_eq(get.value, 0);
> +
> +	check_single_timeline(i915, create.ctx_id, 2);
> +
> +	gem_context_destroy(i915, create.ctx_id);
> +#endif /* I915_DEFINE_CONTEXT_PARAM_ENGINES */
> +}
> +
>   igt_main
>   {
>   	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> @@ -340,17 +543,15 @@ igt_main
>   		memset(&create, 0, sizeof(create));
>   		create.ctx_id = rand();
>   		create.pad = 0;
> -		igt_assert_eq(__gem_context_create_local(fd, &create), 0);
> +		igt_assert_eq(create_ioctl(fd, &create), 0);
>   		igt_assert(create.ctx_id != 0);
>   		gem_context_destroy(fd, create.ctx_id);
>   	}
>   
> -	igt_subtest("invalid-pad") {
> -		memset(&create, 0, sizeof(create));
> -		create.ctx_id = rand();
> -		create.pad = 1;
> -		igt_assert_eq(__gem_context_create_local(fd, &create), -EINVAL);
> -	}
> +	igt_subtest("ext-param")
> +		basic_ext_param(fd);
> +	igt_subtest("iris-pipeline")
> +		iris_pipeline(fd);
>   
>   	igt_subtest("maximum-mem")
>   		maximum(fd, ncpus, CHECK_RAM);
> 
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 07/16] i915: Add gem_ctx_clone
  2019-05-08 10:09   ` [igt-dev] " Chris Wilson
@ 2019-05-14 12:41     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-14 12:41 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/05/2019 11:09, Chris Wilson wrote:
> Exercise cloning contexts, an extension of merely creating one.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/Makefile.sources     |   1 +
>   tests/i915/gem_ctx_clone.c | 460 +++++++++++++++++++++++++++++++++++++
>   tests/meson.build          |   1 +
>   3 files changed, 462 insertions(+)
>   create mode 100644 tests/i915/gem_ctx_clone.c
> 
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index 1a541d206..e1b7feeb2 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -21,6 +21,7 @@ TESTS_progs = \
>   	drm_import_export \
>   	drm_mm \
>   	drm_read \
> +	i915/gem_ctx_clone \
>   	i915/gem_vm_create \
>   	kms_3d \
>   	kms_addfb_basic \
> diff --git a/tests/i915/gem_ctx_clone.c b/tests/i915/gem_ctx_clone.c
> new file mode 100644
> index 000000000..cdc5bf413
> --- /dev/null
> +++ b/tests/i915/gem_ctx_clone.c
> @@ -0,0 +1,460 @@
> +/*
> + * Copyright © 2019 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "igt.h"
> +#include "igt_gt.h"
> +#include "i915/gem_vm.h"
> +#include "i915_drm.h"
> +
> +static int ctx_create_ioctl(int i915, struct drm_i915_gem_context_create_ext *arg)
> +{
> +	int err;
> +
> +	err = 0;
> +	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
> +		err = -errno;
> +		igt_assume(err);
> +	}
> +
> +	errno = 0;
> +	return err;
> +}
> +
> +static bool has_ctx_clone(int i915)
> +{
> +	struct drm_i915_gem_context_create_ext_clone ext = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
> +		.clone_id = -1,
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> +		.extensions = to_user_pointer(&ext),
> +	};
> +	return ctx_create_ioctl(i915, &create) == -ENOENT;
> +}
> +
> +static void invalid_clone(int i915)
> +{
> +	struct drm_i915_gem_context_create_ext_clone ext = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> +		.extensions = to_user_pointer(&ext),
> +	};
> +
> +	igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +	gem_context_destroy(i915, create.ctx_id);
> +
> +	ext.flags = -1; /* Hopefully we won't run out of flags */
> +	igt_assert_eq(ctx_create_ioctl(i915, &create), -EINVAL);
> +	ext.flags = 0;
> +
> +	ext.base.next_extension = -1;
> +	igt_assert_eq(ctx_create_ioctl(i915, &create), -EFAULT);
> +	ext.base.next_extension = to_user_pointer(&ext);
> +	igt_assert_eq(ctx_create_ioctl(i915, &create), -E2BIG);
> +	ext.base.next_extension = 0;
> +
> +	ext.clone_id = -1;
> +	igt_assert_eq(ctx_create_ioctl(i915, &create), -ENOENT);
> +	ext.clone_id = 0;
> +}
> +
> +static void clone_flags(int i915)
> +{
> +	struct drm_i915_gem_context_create_ext_setparam set = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> +		{ .param = I915_CONTEXT_PARAM_RECOVERABLE },
> +	};
> +	struct drm_i915_gem_context_create_ext_clone ext = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
> +		.flags = I915_CONTEXT_CLONE_FLAGS,
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> +		.extensions = to_user_pointer(&ext),
> +	};
> +	int expected;
> +
> +	set.param.value = 1; /* default is recoverable */
> +	igt_require(__gem_context_set_param(i915, &set.param) == 0);
> +
> +	for (int pass = 0; pass < 2; pass++) { /* cloning default, then child */
> +		igt_debug("Cloning %d\n", ext.clone_id);
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +
> +		set.param.ctx_id = ext.clone_id;
> +		gem_context_get_param(i915, &set.param); > +		expected = set.param.value;
> +
> +		set.param.ctx_id = create.ctx_id;
> +		gem_context_get_param(i915, &set.param);
> +
> +		igt_assert_eq_u64(set.param.param,
> +				  I915_CONTEXT_PARAM_RECOVERABLE);
> +		igt_assert_eq((int)set.param.value, expected);
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		expected = set.param.value = 0;
> +		set.param.ctx_id = ext.clone_id;
> +		gem_context_set_param(i915, &set.param);
> +
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +
> +		set.param.ctx_id = create.ctx_id;
> +		gem_context_get_param(i915, &set.param);
> +
> +		igt_assert_eq_u64(set.param.param,
> +				  I915_CONTEXT_PARAM_RECOVERABLE);
> +		igt_assert_eq((int)set.param.value, expected);
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		/* clone but then reset priority to default... */

Just correct priority/prio here and below.

> +		set.param.ctx_id = 0;
> +		set.param.value = 1;
> +		ext.base.next_extension = to_user_pointer(&set);
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +		ext.base.next_extension = 0;
> +
> +		/* new context should have updated prio... */
> +		set.param.ctx_id = create.ctx_id;
> +		gem_context_get_param(i915, &set.param);
> +		igt_assert_eq_u64(set.param.value, 1);
> +
> +		/* but original context should have default prio */
> +		set.param.ctx_id = ext.clone_id;
> +		gem_context_get_param(i915, &set.param);
> +		igt_assert_eq_u64(set.param.value, 0);
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +		ext.clone_id = gem_context_create(i915);
> +	}
> +
> +	gem_context_destroy(i915, ext.clone_id);
> +}
> +
> +static void clone_engines(int i915)
> +{
> +	struct drm_i915_gem_context_create_ext_setparam set = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> +		{ .param = I915_CONTEXT_PARAM_ENGINES },
> +	};
> +	struct drm_i915_gem_context_create_ext_clone ext = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
> +		.flags = I915_CONTEXT_CLONE_ENGINES,
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> +		.extensions = to_user_pointer(&ext),
> +	};
> +	I915_DEFINE_CONTEXT_PARAM_ENGINES(expected, 64);
> +	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 64);
> +	uint64_t ex_size;
> +
> +	memset(&expected, 0, sizeof(expected));
> +	memset(&engines, 0, sizeof(engines));
> +
> +	igt_require(__gem_context_set_param(i915, &set.param) == 0);
> +
> +	for (int pass = 0; pass < 2; pass++) { /* cloning default, then child */
> +		igt_debug("Cloning %d\n", ext.clone_id);
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +
> +		set.param.ctx_id = ext.clone_id;
> +		set.param.size = sizeof(expected);
> +		set.param.value = to_user_pointer(&expected);
> +		gem_context_get_param(i915, &set.param);
> +		ex_size = set.param.size;
> +
> +		set.param.ctx_id = create.ctx_id;
> +		set.param.size = sizeof(engines);
> +		set.param.value = to_user_pointer(&engines);
> +		gem_context_get_param(i915, &set.param);
> +
> +		igt_assert_eq_u64(set.param.param, I915_CONTEXT_PARAM_ENGINES);
> +		igt_assert_eq_u64(set.param.size, ex_size);
> +		igt_assert(!memcmp(&engines, &expected, ex_size));
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		expected.engines[0].engine_class =
> +			I915_ENGINE_CLASS_INVALID;
> +		expected.engines[0].engine_instance =
> +			I915_ENGINE_CLASS_INVALID_NONE;
> +		ex_size = (sizeof(struct i915_context_param_engines) +
> +			   sizeof(expected.engines[0]));
> +
> +		set.param.ctx_id = ext.clone_id;
> +		set.param.size = ex_size;
> +		set.param.value = to_user_pointer(&expected);
> +		gem_context_set_param(i915, &set.param);
> +
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +
> +		set.param.ctx_id = create.ctx_id;
> +		set.param.size = sizeof(engines);
> +		set.param.value = to_user_pointer(&engines);
> +		gem_context_get_param(i915, &set.param);
> +
> +		igt_assert_eq_u64(set.param.size, ex_size);
> +		igt_assert(!memcmp(&engines, &expected, ex_size));
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		/* clone but then reset engines to default */
> +		set.param.ctx_id = 0;
> +		set.param.size = 0;
> +		set.param.value = 0;
> +		ext.base.next_extension = to_user_pointer(&set);
> +
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +		ext.base.next_extension = 0;
> +
> +		set.param.ctx_id = create.ctx_id;
> +		set.param.size = sizeof(engines);
> +		set.param.value = to_user_pointer(&engines);
> +		gem_context_get_param(i915, &set.param);
> +		igt_assert_eq_u64(set.param.size, 0);
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		/* And check we ignore the flag */
> +		ext.flags = 0;
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +		ext.flags = I915_CONTEXT_CLONE_ENGINES;
> +
> +		set.param.ctx_id = create.ctx_id;
> +		set.param.size = sizeof(engines);
> +		set.param.value = to_user_pointer(&engines);
> +		gem_context_get_param(i915, &set.param);
> +		igt_assert_eq_u64(set.param.size, 0);

It is quite hard to review/follow all these tests (and so gauge the 
coverage). It is a very stateful flow and for each step one has to 
remember/back-reference what is the currently active chain of 
extensions, and what is the active state of contexts and used context ids.

Annoyingly I don't have any good ideas on how to easily and reasonably 
express this. Perhaps less reuse of the same stack objects in favour of 
dedicated helpers for querying would reduce the mess? Hard to say 
without trying it out.

But I think something needs to be done since people will struggle to 
follow this if there is a bug one day.

> +
> +		ext.clone_id = gem_context_create(i915);
> +	}
> +
> +	gem_context_destroy(i915, ext.clone_id);
> +}
> +
> +static void clone_scheduler(int i915)
> +{
> +	struct drm_i915_gem_context_create_ext_setparam set = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> +		{ .param = I915_CONTEXT_PARAM_PRIORITY },
> +	};
> +	struct drm_i915_gem_context_create_ext_clone ext = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
> +		.flags = I915_CONTEXT_CLONE_SCHEDATTR,
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> +		.extensions = to_user_pointer(&ext),
> +	};
> +	int expected;
> +
> +	igt_require(__gem_context_set_param(i915, &set.param) == 0);
> +
> +	for (int pass = 0; pass < 2; pass++) { /* cloning default, then child */
> +		igt_debug("Cloning %d\n", ext.clone_id);
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +
> +		set.param.ctx_id = ext.clone_id;
> +		gem_context_get_param(i915, &set.param);
> +		expected = set.param.value;
> +
> +		set.param.ctx_id = create.ctx_id;
> +		gem_context_get_param(i915, &set.param);
> +
> +		igt_assert_eq_u64(set.param.param, I915_CONTEXT_PARAM_PRIORITY);
> +		igt_assert_eq((int)set.param.value, expected);
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		expected = set.param.value = 1;
> +		set.param.ctx_id = ext.clone_id;
> +		gem_context_set_param(i915, &set.param);
> +
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +
> +		set.param.ctx_id = create.ctx_id;
> +		gem_context_get_param(i915, &set.param);
> +
> +		igt_assert_eq_u64(set.param.param, I915_CONTEXT_PARAM_PRIORITY);
> +		igt_assert_eq((int)set.param.value, expected);
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		/* clone but then reset priority to default */
> +		set.param.ctx_id = 0;
> +		set.param.value = 0;
> +		ext.base.next_extension = to_user_pointer(&set);
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +		ext.base.next_extension = 0;
> +
> +		set.param.ctx_id = create.ctx_id;
> +		gem_context_get_param(i915, &set.param);
> +		igt_assert_eq_u64(set.param.value, 0);
> +
> +		set.param.ctx_id = ext.clone_id;
> +		gem_context_get_param(i915, &set.param);
> +		igt_assert_eq_u64(set.param.value, 1);
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +		ext.clone_id = gem_context_create(i915);
> +	}
> +
> +	gem_context_destroy(i915, ext.clone_id);
> +}
> +
> +static uint32_t __batch_create(int i915, uint32_t offset)
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	uint32_t handle;
> +
> +	handle = gem_create(i915, ALIGN(offset + 4, 4096));
> +	gem_write(i915, handle, offset, &bbe, sizeof(bbe));
> +
> +	return handle;
> +}
> +
> +static uint32_t batch_create(int i915)
> +{
> +	return __batch_create(i915, 0);
> +}
> +
> +static void check_same_vm(int i915, uint32_t ctx_a, uint32_t ctx_b)
> +{
> +	struct drm_i915_gem_exec_object2 batch = {
> +		.handle = batch_create(i915),
> +	};
> +	struct drm_i915_gem_execbuffer2 eb = {
> +		.buffers_ptr = to_user_pointer(&batch),
> +		.buffer_count = 1,
> +	};
> +
> +	/* First verify that we try to use "softpinning" by default */
> +	batch.offset = 48 << 20;
> +	eb.rsvd1 = ctx_a;
> +	gem_execbuf(i915, &eb);
> +	igt_assert_eq_u64(batch.offset, 48 << 20);
> +
> +	/* An already active VMA will try to keep its offset */
> +	batch.offset = 0;
> +	eb.rsvd1 = ctx_b;
> +	gem_execbuf(i915, &eb);
> +	igt_assert_eq_u64(batch.offset, 48 << 20);
> +
> +	gem_sync(i915, batch.handle);
> +	gem_close(i915, batch.handle);
> +
> +	gem_quiescent_gpu(i915); /* evict the vma */
> +}
> +
> +static void clone_vm(int i915)
> +{
> +	struct drm_i915_gem_context_param set = {
> +		.param = I915_CONTEXT_PARAM_VM,
> +	};
> +	struct drm_i915_gem_context_create_ext_clone ext = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
> +		.flags = I915_CONTEXT_CLONE_VM,
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> +		.extensions = to_user_pointer(&ext),
> +	};
> +	uint32_t vm_id[2];
> +
> +	igt_require(__gem_context_set_param(i915, &set) == -ENOENT);
> +
> +	/* Scrub the VM for our tests */
> +	i915 = gem_reopen_driver(i915);
> +
> +	set.ctx_id = gem_context_create(i915);
> +	gem_context_get_param(i915, &set);
> +	vm_id[0] = set.value;
> +	gem_context_destroy(i915, set.ctx_id);
> +
> +	vm_id[1] = gem_vm_create(i915);
> +
> +	for (int pass = 0; pass < 2; pass++) { /* cloning default, then child */
> +		igt_debug("Cloning %d\n", ext.clone_id);
> +
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +		check_same_vm(i915, ext.clone_id, create.ctx_id);
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		set.value = vm_id[pass];
> +		set.ctx_id = ext.clone_id;
> +		gem_context_set_param(i915, &set);
> +
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +		check_same_vm(i915, ext.clone_id, create.ctx_id);
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		ext.clone_id = gem_context_create(i915);
> +	}
> +
> +	gem_context_destroy(i915, ext.clone_id);
> +
> +	for (int i = 0; i < ARRAY_SIZE(vm_id); i++)
> +		gem_vm_destroy(i915, vm_id[i]);
> +
> +	close(i915);
> +}
> +
> +igt_main
> +{
> +	int i915 = -1;
> +
> +	igt_fixture {
> +		i915 = drm_open_driver(DRIVER_INTEL);
> +		igt_require_gem(i915);
> +		gem_require_contexts(i915);
> +
> +		igt_require(has_ctx_clone(i915));
> +		igt_fork_hang_detector(i915);
> +	}
> +
> +	igt_subtest("invalid")
> +		invalid_clone(i915);
> +
> +	igt_subtest("engines")
> +		clone_engines(i915);
> +
> +	igt_subtest("flags")
> +		clone_flags(i915);
> +
> +	igt_subtest("scheduler")
> +		clone_scheduler(i915);
> +
> +	igt_subtest("vm")
> +		clone_vm(i915);
> +
> +	igt_fixture {
> +		igt_stop_hang_detector();
> +		close(i915);
> +	}
> +}
> diff --git a/tests/meson.build b/tests/meson.build
> index e7dbc5756..3810bd760 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -109,6 +109,7 @@ i915_progs = [
>   	'gem_cs_prefetch',
>   	'gem_cs_tlb',
>   	'gem_ctx_bad_destroy',
> +	'gem_ctx_clone',
>   	'gem_ctx_create',
>   	'gem_ctx_exec',
>   	'gem_ctx_isolation',
> 

It looks fine in principle so I leave to your conscience if you'll try 
to improve the readability. :) With the priority renamed to recoverable:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 07/16] i915: Add gem_ctx_clone
@ 2019-05-14 12:41     ` Tvrtko Ursulin
  0 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-14 12:41 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/05/2019 11:09, Chris Wilson wrote:
> Exercise cloning contexts, an extension of merely creating one.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/Makefile.sources     |   1 +
>   tests/i915/gem_ctx_clone.c | 460 +++++++++++++++++++++++++++++++++++++
>   tests/meson.build          |   1 +
>   3 files changed, 462 insertions(+)
>   create mode 100644 tests/i915/gem_ctx_clone.c
> 
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index 1a541d206..e1b7feeb2 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -21,6 +21,7 @@ TESTS_progs = \
>   	drm_import_export \
>   	drm_mm \
>   	drm_read \
> +	i915/gem_ctx_clone \
>   	i915/gem_vm_create \
>   	kms_3d \
>   	kms_addfb_basic \
> diff --git a/tests/i915/gem_ctx_clone.c b/tests/i915/gem_ctx_clone.c
> new file mode 100644
> index 000000000..cdc5bf413
> --- /dev/null
> +++ b/tests/i915/gem_ctx_clone.c
> @@ -0,0 +1,460 @@
> +/*
> + * Copyright © 2019 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "igt.h"
> +#include "igt_gt.h"
> +#include "i915/gem_vm.h"
> +#include "i915_drm.h"
> +
> +static int ctx_create_ioctl(int i915, struct drm_i915_gem_context_create_ext *arg)
> +{
> +	int err;
> +
> +	err = 0;
> +	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
> +		err = -errno;
> +		igt_assume(err);
> +	}
> +
> +	errno = 0;
> +	return err;
> +}
> +
> +static bool has_ctx_clone(int i915)
> +{
> +	struct drm_i915_gem_context_create_ext_clone ext = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
> +		.clone_id = -1,
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> +		.extensions = to_user_pointer(&ext),
> +	};
> +	return ctx_create_ioctl(i915, &create) == -ENOENT;
> +}
> +
> +static void invalid_clone(int i915)
> +{
> +	struct drm_i915_gem_context_create_ext_clone ext = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> +		.extensions = to_user_pointer(&ext),
> +	};
> +
> +	igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +	gem_context_destroy(i915, create.ctx_id);
> +
> +	ext.flags = -1; /* Hopefully we won't run out of flags */
> +	igt_assert_eq(ctx_create_ioctl(i915, &create), -EINVAL);
> +	ext.flags = 0;
> +
> +	ext.base.next_extension = -1;
> +	igt_assert_eq(ctx_create_ioctl(i915, &create), -EFAULT);
> +	ext.base.next_extension = to_user_pointer(&ext);
> +	igt_assert_eq(ctx_create_ioctl(i915, &create), -E2BIG);
> +	ext.base.next_extension = 0;
> +
> +	ext.clone_id = -1;
> +	igt_assert_eq(ctx_create_ioctl(i915, &create), -ENOENT);
> +	ext.clone_id = 0;
> +}
> +
> +static void clone_flags(int i915)
> +{
> +	struct drm_i915_gem_context_create_ext_setparam set = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> +		{ .param = I915_CONTEXT_PARAM_RECOVERABLE },
> +	};
> +	struct drm_i915_gem_context_create_ext_clone ext = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
> +		.flags = I915_CONTEXT_CLONE_FLAGS,
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> +		.extensions = to_user_pointer(&ext),
> +	};
> +	int expected;
> +
> +	set.param.value = 1; /* default is recoverable */
> +	igt_require(__gem_context_set_param(i915, &set.param) == 0);
> +
> +	for (int pass = 0; pass < 2; pass++) { /* cloning default, then child */
> +		igt_debug("Cloning %d\n", ext.clone_id);
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +
> +		set.param.ctx_id = ext.clone_id;
> +		gem_context_get_param(i915, &set.param); > +		expected = set.param.value;
> +
> +		set.param.ctx_id = create.ctx_id;
> +		gem_context_get_param(i915, &set.param);
> +
> +		igt_assert_eq_u64(set.param.param,
> +				  I915_CONTEXT_PARAM_RECOVERABLE);
> +		igt_assert_eq((int)set.param.value, expected);
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		expected = set.param.value = 0;
> +		set.param.ctx_id = ext.clone_id;
> +		gem_context_set_param(i915, &set.param);
> +
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +
> +		set.param.ctx_id = create.ctx_id;
> +		gem_context_get_param(i915, &set.param);
> +
> +		igt_assert_eq_u64(set.param.param,
> +				  I915_CONTEXT_PARAM_RECOVERABLE);
> +		igt_assert_eq((int)set.param.value, expected);
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		/* clone but then reset priority to default... */

Just correct priority/prio here and below.

> +		set.param.ctx_id = 0;
> +		set.param.value = 1;
> +		ext.base.next_extension = to_user_pointer(&set);
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +		ext.base.next_extension = 0;
> +
> +		/* new context should have updated prio... */
> +		set.param.ctx_id = create.ctx_id;
> +		gem_context_get_param(i915, &set.param);
> +		igt_assert_eq_u64(set.param.value, 1);
> +
> +		/* but original context should have default prio */
> +		set.param.ctx_id = ext.clone_id;
> +		gem_context_get_param(i915, &set.param);
> +		igt_assert_eq_u64(set.param.value, 0);
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +		ext.clone_id = gem_context_create(i915);
> +	}
> +
> +	gem_context_destroy(i915, ext.clone_id);
> +}
> +
> +static void clone_engines(int i915)
> +{
> +	struct drm_i915_gem_context_create_ext_setparam set = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> +		{ .param = I915_CONTEXT_PARAM_ENGINES },
> +	};
> +	struct drm_i915_gem_context_create_ext_clone ext = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
> +		.flags = I915_CONTEXT_CLONE_ENGINES,
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> +		.extensions = to_user_pointer(&ext),
> +	};
> +	I915_DEFINE_CONTEXT_PARAM_ENGINES(expected, 64);
> +	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 64);
> +	uint64_t ex_size;
> +
> +	memset(&expected, 0, sizeof(expected));
> +	memset(&engines, 0, sizeof(engines));
> +
> +	igt_require(__gem_context_set_param(i915, &set.param) == 0);
> +
> +	for (int pass = 0; pass < 2; pass++) { /* cloning default, then child */
> +		igt_debug("Cloning %d\n", ext.clone_id);
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +
> +		set.param.ctx_id = ext.clone_id;
> +		set.param.size = sizeof(expected);
> +		set.param.value = to_user_pointer(&expected);
> +		gem_context_get_param(i915, &set.param);
> +		ex_size = set.param.size;
> +
> +		set.param.ctx_id = create.ctx_id;
> +		set.param.size = sizeof(engines);
> +		set.param.value = to_user_pointer(&engines);
> +		gem_context_get_param(i915, &set.param);
> +
> +		igt_assert_eq_u64(set.param.param, I915_CONTEXT_PARAM_ENGINES);
> +		igt_assert_eq_u64(set.param.size, ex_size);
> +		igt_assert(!memcmp(&engines, &expected, ex_size));
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		expected.engines[0].engine_class =
> +			I915_ENGINE_CLASS_INVALID;
> +		expected.engines[0].engine_instance =
> +			I915_ENGINE_CLASS_INVALID_NONE;
> +		ex_size = (sizeof(struct i915_context_param_engines) +
> +			   sizeof(expected.engines[0]));
> +
> +		set.param.ctx_id = ext.clone_id;
> +		set.param.size = ex_size;
> +		set.param.value = to_user_pointer(&expected);
> +		gem_context_set_param(i915, &set.param);
> +
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +
> +		set.param.ctx_id = create.ctx_id;
> +		set.param.size = sizeof(engines);
> +		set.param.value = to_user_pointer(&engines);
> +		gem_context_get_param(i915, &set.param);
> +
> +		igt_assert_eq_u64(set.param.size, ex_size);
> +		igt_assert(!memcmp(&engines, &expected, ex_size));
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		/* clone but then reset engines to default */
> +		set.param.ctx_id = 0;
> +		set.param.size = 0;
> +		set.param.value = 0;
> +		ext.base.next_extension = to_user_pointer(&set);
> +
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +		ext.base.next_extension = 0;
> +
> +		set.param.ctx_id = create.ctx_id;
> +		set.param.size = sizeof(engines);
> +		set.param.value = to_user_pointer(&engines);
> +		gem_context_get_param(i915, &set.param);
> +		igt_assert_eq_u64(set.param.size, 0);
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		/* And check we ignore the flag */
> +		ext.flags = 0;
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +		ext.flags = I915_CONTEXT_CLONE_ENGINES;
> +
> +		set.param.ctx_id = create.ctx_id;
> +		set.param.size = sizeof(engines);
> +		set.param.value = to_user_pointer(&engines);
> +		gem_context_get_param(i915, &set.param);
> +		igt_assert_eq_u64(set.param.size, 0);

It is quite hard to review/follow all these tests (and so gauge the 
coverage). It is a very stateful flow and for each step one has to 
remember/back-reference what is the currently active chain of 
extensions, and what is the active state of contexts and used context ids.

Annoyingly I don't have any good ideas on how to easily and reasonably 
express this. Perhaps less reuse of the same stack objects in favour of 
dedicated helpers for querying would reduce the mess? Hard to say 
without trying it out.

But I think something needs to be done since people will struggle to 
follow this if there is a bug one day.

> +
> +		ext.clone_id = gem_context_create(i915);
> +	}
> +
> +	gem_context_destroy(i915, ext.clone_id);
> +}
> +
> +static void clone_scheduler(int i915)
> +{
> +	struct drm_i915_gem_context_create_ext_setparam set = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> +		{ .param = I915_CONTEXT_PARAM_PRIORITY },
> +	};
> +	struct drm_i915_gem_context_create_ext_clone ext = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
> +		.flags = I915_CONTEXT_CLONE_SCHEDATTR,
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> +		.extensions = to_user_pointer(&ext),
> +	};
> +	int expected;
> +
> +	igt_require(__gem_context_set_param(i915, &set.param) == 0);
> +
> +	for (int pass = 0; pass < 2; pass++) { /* cloning default, then child */
> +		igt_debug("Cloning %d\n", ext.clone_id);
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +
> +		set.param.ctx_id = ext.clone_id;
> +		gem_context_get_param(i915, &set.param);
> +		expected = set.param.value;
> +
> +		set.param.ctx_id = create.ctx_id;
> +		gem_context_get_param(i915, &set.param);
> +
> +		igt_assert_eq_u64(set.param.param, I915_CONTEXT_PARAM_PRIORITY);
> +		igt_assert_eq((int)set.param.value, expected);
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		expected = set.param.value = 1;
> +		set.param.ctx_id = ext.clone_id;
> +		gem_context_set_param(i915, &set.param);
> +
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +
> +		set.param.ctx_id = create.ctx_id;
> +		gem_context_get_param(i915, &set.param);
> +
> +		igt_assert_eq_u64(set.param.param, I915_CONTEXT_PARAM_PRIORITY);
> +		igt_assert_eq((int)set.param.value, expected);
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		/* clone but then reset priority to default */
> +		set.param.ctx_id = 0;
> +		set.param.value = 0;
> +		ext.base.next_extension = to_user_pointer(&set);
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +		ext.base.next_extension = 0;
> +
> +		set.param.ctx_id = create.ctx_id;
> +		gem_context_get_param(i915, &set.param);
> +		igt_assert_eq_u64(set.param.value, 0);
> +
> +		set.param.ctx_id = ext.clone_id;
> +		gem_context_get_param(i915, &set.param);
> +		igt_assert_eq_u64(set.param.value, 1);
> +
> +		gem_context_destroy(i915, create.ctx_id);
> +		ext.clone_id = gem_context_create(i915);
> +	}
> +
> +	gem_context_destroy(i915, ext.clone_id);
> +}
> +
> +static uint32_t __batch_create(int i915, uint32_t offset)
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	uint32_t handle;
> +
> +	handle = gem_create(i915, ALIGN(offset + 4, 4096));
> +	gem_write(i915, handle, offset, &bbe, sizeof(bbe));
> +
> +	return handle;
> +}
> +
> +static uint32_t batch_create(int i915)
> +{
> +	return __batch_create(i915, 0);
> +}
> +
> +static void check_same_vm(int i915, uint32_t ctx_a, uint32_t ctx_b)
> +{
> +	struct drm_i915_gem_exec_object2 batch = {
> +		.handle = batch_create(i915),
> +	};
> +	struct drm_i915_gem_execbuffer2 eb = {
> +		.buffers_ptr = to_user_pointer(&batch),
> +		.buffer_count = 1,
> +	};
> +
> +	/* First verify that we try to use "softpinning" by default */
> +	batch.offset = 48 << 20;
> +	eb.rsvd1 = ctx_a;
> +	gem_execbuf(i915, &eb);
> +	igt_assert_eq_u64(batch.offset, 48 << 20);
> +
> +	/* An already active VMA will try to keep its offset */
> +	batch.offset = 0;
> +	eb.rsvd1 = ctx_b;
> +	gem_execbuf(i915, &eb);
> +	igt_assert_eq_u64(batch.offset, 48 << 20);
> +
> +	gem_sync(i915, batch.handle);
> +	gem_close(i915, batch.handle);
> +
> +	gem_quiescent_gpu(i915); /* evict the vma */
> +}
> +
> +static void clone_vm(int i915)
> +{
> +	struct drm_i915_gem_context_param set = {
> +		.param = I915_CONTEXT_PARAM_VM,
> +	};
> +	struct drm_i915_gem_context_create_ext_clone ext = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
> +		.flags = I915_CONTEXT_CLONE_VM,
> +	};
> +	struct drm_i915_gem_context_create_ext create = {
> +		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> +		.extensions = to_user_pointer(&ext),
> +	};
> +	uint32_t vm_id[2];
> +
> +	igt_require(__gem_context_set_param(i915, &set) == -ENOENT);
> +
> +	/* Scrub the VM for our tests */
> +	i915 = gem_reopen_driver(i915);
> +
> +	set.ctx_id = gem_context_create(i915);
> +	gem_context_get_param(i915, &set);
> +	vm_id[0] = set.value;
> +	gem_context_destroy(i915, set.ctx_id);
> +
> +	vm_id[1] = gem_vm_create(i915);
> +
> +	for (int pass = 0; pass < 2; pass++) { /* cloning default, then child */
> +		igt_debug("Cloning %d\n", ext.clone_id);
> +
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +		check_same_vm(i915, ext.clone_id, create.ctx_id);
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		set.value = vm_id[pass];
> +		set.ctx_id = ext.clone_id;
> +		gem_context_set_param(i915, &set);
> +
> +		igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> +		check_same_vm(i915, ext.clone_id, create.ctx_id);
> +		gem_context_destroy(i915, create.ctx_id);
> +
> +		ext.clone_id = gem_context_create(i915);
> +	}
> +
> +	gem_context_destroy(i915, ext.clone_id);
> +
> +	for (int i = 0; i < ARRAY_SIZE(vm_id); i++)
> +		gem_vm_destroy(i915, vm_id[i]);
> +
> +	close(i915);
> +}
> +
> +igt_main
> +{
> +	int i915 = -1;
> +
> +	igt_fixture {
> +		i915 = drm_open_driver(DRIVER_INTEL);
> +		igt_require_gem(i915);
> +		gem_require_contexts(i915);
> +
> +		igt_require(has_ctx_clone(i915));
> +		igt_fork_hang_detector(i915);
> +	}
> +
> +	igt_subtest("invalid")
> +		invalid_clone(i915);
> +
> +	igt_subtest("engines")
> +		clone_engines(i915);
> +
> +	igt_subtest("flags")
> +		clone_flags(i915);
> +
> +	igt_subtest("scheduler")
> +		clone_scheduler(i915);
> +
> +	igt_subtest("vm")
> +		clone_vm(i915);
> +
> +	igt_fixture {
> +		igt_stop_hang_detector();
> +		close(i915);
> +	}
> +}
> diff --git a/tests/meson.build b/tests/meson.build
> index e7dbc5756..3810bd760 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -109,6 +109,7 @@ i915_progs = [
>   	'gem_cs_prefetch',
>   	'gem_cs_tlb',
>   	'gem_ctx_bad_destroy',
> +	'gem_ctx_clone',
>   	'gem_ctx_create',
>   	'gem_ctx_exec',
>   	'gem_ctx_isolation',
> 

It looks fine in principle so I leave to your conscience if you'll try 
to improve the readability. :) With the priority renamed to recoverable:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 09/16] i915/gem_ctx_switch: Exercise queues
  2019-05-08 10:09   ` [igt-dev] " Chris Wilson
@ 2019-05-14 12:47     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-14 12:47 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/05/2019 11:09, Chris Wilson wrote:
> Queues are a form of contexts that share vm and enfore a single timeline
> across all engines. Test switching between them, just like ordinary
> contexts.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/i915/gem_ctx_switch.c | 75 +++++++++++++++++++++++++++----------
>   1 file changed, 55 insertions(+), 20 deletions(-)
> 
> diff --git a/tests/i915/gem_ctx_switch.c b/tests/i915/gem_ctx_switch.c
> index 87e13b915..647911d4c 100644
> --- a/tests/i915/gem_ctx_switch.c
> +++ b/tests/i915/gem_ctx_switch.c
> @@ -44,7 +44,8 @@
>   #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
>   #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
>   
> -#define INTERRUPTIBLE 1
> +#define INTERRUPTIBLE 0x1
> +#define QUEUE 0x2
>   
>   static double elapsed(const struct timespec *start, const struct timespec *end)
>   {
> @@ -126,8 +127,12 @@ static void single(int fd, uint32_t handle,
>   
>   	gem_require_ring(fd, e->exec_id | e->flags);
>   
> -	for (n = 0; n < 64; n++)
> -		contexts[n] = gem_context_create(fd);
> +	for (n = 0; n < 64; n++) {
> +		if (flags & QUEUE)
> +			contexts[n] = gem_queue_create(fd);
> +		else
> +			contexts[n] = gem_context_create(fd);
> +	}
>   
>   	memset(&obj, 0, sizeof(obj));
>   	obj.handle = handle;
> @@ -232,8 +237,12 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>   	}
>   	igt_require(nengine);
>   
> -	for (n = 0; n < ARRAY_SIZE(contexts); n++)
> -		contexts[n] = gem_context_create(fd);
> +	for (n = 0; n < ARRAY_SIZE(contexts); n++) {
> +		if (flags & QUEUE)
> +			contexts[n] = gem_queue_create(fd);
> +		else
> +			contexts[n] = gem_context_create(fd);
> +	}
>   
>   	memset(obj, 0, sizeof(obj));
>   	obj[1].handle = handle;
> @@ -298,6 +307,17 @@ igt_main
>   {
>   	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
>   	const struct intel_execution_engine *e;
> +	static const struct {
> +		const char *name;
> +		unsigned int flags;
> +		bool (*require)(int fd);
> +	} phases[] = {
> +		{ "", 0, NULL },
> +		{ "-interruptible", INTERRUPTIBLE, NULL },
> +		{ "-queue", QUEUE, gem_has_queues },
> +		{ "-queue-interruptible", QUEUE | INTERRUPTIBLE, gem_has_queues },
> +		{ }
> +	};
>   	uint32_t light = 0, heavy;
>   	int fd = -1;
>   
> @@ -319,21 +339,26 @@ igt_main
>   	}
>   
>   	for (e = intel_execution_engines; e->name; e++) {
> -		igt_subtest_f("%s%s", e->exec_id == 0 ? "basic-" : "", e->name)
> -			single(fd, light, e, 0, 1, 5);
> -
> -		igt_skip_on_simulation();
> -
> -		igt_subtest_f("%s%s-heavy", e->exec_id == 0 ? "basic-" : "", e->name)
> -			single(fd, heavy, e, 0, 1, 5);
> -		igt_subtest_f("%s-interruptible", e->name)
> -			single(fd, light, e, INTERRUPTIBLE, 1, 150);
> -		igt_subtest_f("forked-%s", e->name)
> -			single(fd, light, e, 0, ncpus, 150);
> -		igt_subtest_f("forked-%s-heavy", e->name)
> -			single(fd, heavy, e, 0, ncpus, 150);
> -		igt_subtest_f("forked-%s-interruptible", e->name)
> -			single(fd, light, e, INTERRUPTIBLE, ncpus, 150);
> +		for (typeof(*phases) *p = phases; p->name; p++) {
> +			igt_subtest_group {
> +				igt_fixture {
> +					if (p->require)
> +						igt_require(p->require(fd));
> +				}
> +
> +				igt_subtest_f("%s%s%s", e->exec_id == 0 ? "basic-" : "", e->name, p->name)
> +					single(fd, light, e, p->flags, 1, 5);
> +
> +				igt_skip_on_simulation();
> +
> +				igt_subtest_f("%s%s-heavy%s", e->exec_id == 0 ? "basic-" : "", e->name, p->name)
> +					single(fd, heavy, e, p->flags, 1, 5);
> +				igt_subtest_f("forked-%s%s", e->name, p->name)
> +					single(fd, light, e, p->flags, ncpus, 150);
> +				igt_subtest_f("forked-%s-heavy%s", e->name, p->name)
> +					single(fd, heavy, e, p->flags, ncpus, 150);
> +			}
> +		}
>   	}
>   
>   	igt_subtest("basic-all-light")
> @@ -341,6 +366,16 @@ igt_main
>   	igt_subtest("basic-all-heavy")
>   		all(fd, heavy, 0, 5);
>   
> +	igt_subtest_group {
> +		igt_fixture {
> +			igt_require(gem_has_queues(fd));
> +		}
> +		igt_subtest("basic-queue-light")
> +			all(fd, light, QUEUE, 5);
> +		igt_subtest("basic-queue-heavy")
> +			all(fd, heavy, QUEUE, 5);
> +	}
> +
>   	igt_fixture {
>   		igt_stop_hang_detector();
>   		gem_close(fd, heavy);
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 09/16] i915/gem_ctx_switch: Exercise queues
@ 2019-05-14 12:47     ` Tvrtko Ursulin
  0 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-14 12:47 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/05/2019 11:09, Chris Wilson wrote:
> Queues are a form of contexts that share vm and enfore a single timeline
> across all engines. Test switching between them, just like ordinary
> contexts.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/i915/gem_ctx_switch.c | 75 +++++++++++++++++++++++++++----------
>   1 file changed, 55 insertions(+), 20 deletions(-)
> 
> diff --git a/tests/i915/gem_ctx_switch.c b/tests/i915/gem_ctx_switch.c
> index 87e13b915..647911d4c 100644
> --- a/tests/i915/gem_ctx_switch.c
> +++ b/tests/i915/gem_ctx_switch.c
> @@ -44,7 +44,8 @@
>   #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
>   #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
>   
> -#define INTERRUPTIBLE 1
> +#define INTERRUPTIBLE 0x1
> +#define QUEUE 0x2
>   
>   static double elapsed(const struct timespec *start, const struct timespec *end)
>   {
> @@ -126,8 +127,12 @@ static void single(int fd, uint32_t handle,
>   
>   	gem_require_ring(fd, e->exec_id | e->flags);
>   
> -	for (n = 0; n < 64; n++)
> -		contexts[n] = gem_context_create(fd);
> +	for (n = 0; n < 64; n++) {
> +		if (flags & QUEUE)
> +			contexts[n] = gem_queue_create(fd);
> +		else
> +			contexts[n] = gem_context_create(fd);
> +	}
>   
>   	memset(&obj, 0, sizeof(obj));
>   	obj.handle = handle;
> @@ -232,8 +237,12 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>   	}
>   	igt_require(nengine);
>   
> -	for (n = 0; n < ARRAY_SIZE(contexts); n++)
> -		contexts[n] = gem_context_create(fd);
> +	for (n = 0; n < ARRAY_SIZE(contexts); n++) {
> +		if (flags & QUEUE)
> +			contexts[n] = gem_queue_create(fd);
> +		else
> +			contexts[n] = gem_context_create(fd);
> +	}
>   
>   	memset(obj, 0, sizeof(obj));
>   	obj[1].handle = handle;
> @@ -298,6 +307,17 @@ igt_main
>   {
>   	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
>   	const struct intel_execution_engine *e;
> +	static const struct {
> +		const char *name;
> +		unsigned int flags;
> +		bool (*require)(int fd);
> +	} phases[] = {
> +		{ "", 0, NULL },
> +		{ "-interruptible", INTERRUPTIBLE, NULL },
> +		{ "-queue", QUEUE, gem_has_queues },
> +		{ "-queue-interruptible", QUEUE | INTERRUPTIBLE, gem_has_queues },
> +		{ }
> +	};
>   	uint32_t light = 0, heavy;
>   	int fd = -1;
>   
> @@ -319,21 +339,26 @@ igt_main
>   	}
>   
>   	for (e = intel_execution_engines; e->name; e++) {
> -		igt_subtest_f("%s%s", e->exec_id == 0 ? "basic-" : "", e->name)
> -			single(fd, light, e, 0, 1, 5);
> -
> -		igt_skip_on_simulation();
> -
> -		igt_subtest_f("%s%s-heavy", e->exec_id == 0 ? "basic-" : "", e->name)
> -			single(fd, heavy, e, 0, 1, 5);
> -		igt_subtest_f("%s-interruptible", e->name)
> -			single(fd, light, e, INTERRUPTIBLE, 1, 150);
> -		igt_subtest_f("forked-%s", e->name)
> -			single(fd, light, e, 0, ncpus, 150);
> -		igt_subtest_f("forked-%s-heavy", e->name)
> -			single(fd, heavy, e, 0, ncpus, 150);
> -		igt_subtest_f("forked-%s-interruptible", e->name)
> -			single(fd, light, e, INTERRUPTIBLE, ncpus, 150);
> +		for (typeof(*phases) *p = phases; p->name; p++) {
> +			igt_subtest_group {
> +				igt_fixture {
> +					if (p->require)
> +						igt_require(p->require(fd));
> +				}
> +
> +				igt_subtest_f("%s%s%s", e->exec_id == 0 ? "basic-" : "", e->name, p->name)
> +					single(fd, light, e, p->flags, 1, 5);
> +
> +				igt_skip_on_simulation();
> +
> +				igt_subtest_f("%s%s-heavy%s", e->exec_id == 0 ? "basic-" : "", e->name, p->name)
> +					single(fd, heavy, e, p->flags, 1, 5);
> +				igt_subtest_f("forked-%s%s", e->name, p->name)
> +					single(fd, light, e, p->flags, ncpus, 150);
> +				igt_subtest_f("forked-%s-heavy%s", e->name, p->name)
> +					single(fd, heavy, e, p->flags, ncpus, 150);
> +			}
> +		}
>   	}
>   
>   	igt_subtest("basic-all-light")
> @@ -341,6 +366,16 @@ igt_main
>   	igt_subtest("basic-all-heavy")
>   		all(fd, heavy, 0, 5);
>   
> +	igt_subtest_group {
> +		igt_fixture {
> +			igt_require(gem_has_queues(fd));
> +		}
> +		igt_subtest("basic-queue-light")
> +			all(fd, light, QUEUE, 5);
> +		igt_subtest("basic-queue-heavy")
> +			all(fd, heavy, QUEUE, 5);
> +	}
> +
>   	igt_fixture {
>   		igt_stop_hang_detector();
>   		gem_close(fd, heavy);
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 11/16] i915/gem_exec_whisper: debugfs/next_seqno is defunct
  2019-05-08 10:09   ` [Intel-gfx] " Chris Wilson
@ 2019-05-14 12:48     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-14 12:48 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/05/2019 11:09, Chris Wilson wrote:
> We removed next_seqno in 5.1, so time to wave goodbye.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/i915/gem_exec_whisper.c | 12 ------------
>   1 file changed, 12 deletions(-)
> 
> diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
> index d5afc8119..61b8d6dac 100644
> --- a/tests/i915/gem_exec_whisper.c
> +++ b/tests/i915/gem_exec_whisper.c
> @@ -44,15 +44,6 @@
>   
>   #define VERIFY 0
>   
> -static void write_seqno(int dir, unsigned offset)
> -{
> -	uint32_t seqno = UINT32_MAX - offset;
> -
> -	igt_sysfs_printf(dir, "i915_next_seqno", "0x%x", seqno);
> -
> -	igt_debug("next seqno set to: 0x%x\n", seqno);
> -}
> -
>   static void check_bo(int fd, uint32_t handle, int pass)
>   {
>   	uint32_t *map;
> @@ -355,9 +346,6 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   			igt_until_timeout(150) {
>   				uint64_t offset;
>   
> -				if (nchild == 1)
> -					write_seqno(debugfs, pass);
> -
>   				if (flags & HANG)
>   					submit_hang(&hang, engines, nengine, flags);
>   
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 11/16] i915/gem_exec_whisper: debugfs/next_seqno is defunct
@ 2019-05-14 12:48     ` Tvrtko Ursulin
  0 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-14 12:48 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/05/2019 11:09, Chris Wilson wrote:
> We removed next_seqno in 5.1, so time to wave goodbye.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/i915/gem_exec_whisper.c | 12 ------------
>   1 file changed, 12 deletions(-)
> 
> diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
> index d5afc8119..61b8d6dac 100644
> --- a/tests/i915/gem_exec_whisper.c
> +++ b/tests/i915/gem_exec_whisper.c
> @@ -44,15 +44,6 @@
>   
>   #define VERIFY 0
>   
> -static void write_seqno(int dir, unsigned offset)
> -{
> -	uint32_t seqno = UINT32_MAX - offset;
> -
> -	igt_sysfs_printf(dir, "i915_next_seqno", "0x%x", seqno);
> -
> -	igt_debug("next seqno set to: 0x%x\n", seqno);
> -}
> -
>   static void check_bo(int fd, uint32_t handle, int pass)
>   {
>   	uint32_t *map;
> @@ -355,9 +346,6 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   			igt_until_timeout(150) {
>   				uint64_t offset;
>   
> -				if (nchild == 1)
> -					write_seqno(debugfs, pass);
> -
>   				if (flags & HANG)
>   					submit_hang(&hang, engines, nengine, flags);
>   
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 10/16] i915/gem_exec_whisper: Fork all-engine tests one-per-engine
  2019-05-08 10:09   ` [igt-dev] " Chris Wilson
@ 2019-05-14 12:57     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-14 12:57 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/05/2019 11:09, Chris Wilson wrote:
> Add a new mode for some more stress, submit the all-engines tests
> simultaneously, a stream per engine.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/i915/gem_exec_whisper.c | 27 ++++++++++++++++++++++-----
>   1 file changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
> index d3e0b0ba2..d5afc8119 100644
> --- a/tests/i915/gem_exec_whisper.c
> +++ b/tests/i915/gem_exec_whisper.c
> @@ -88,6 +88,7 @@ static void verify_reloc(int fd, uint32_t handle,
>   #define SYNC 0x40
>   #define PRIORITY 0x80
>   #define QUEUES 0x100
> +#define ALL 0x200
>   
>   struct hang {
>   	struct drm_i915_gem_exec_object2 obj;
> @@ -199,6 +200,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   	uint64_t old_offset;
>   	int i, n, loc;
>   	int debugfs;
> +	int nchild;
>   
>   	if (flags & PRIORITY) {
>   		igt_require(gem_scheduler_enabled(fd));
> @@ -215,6 +217,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   				engines[nengine++] = engine;
>   		}
>   	} else {
> +		igt_assert(!(flags & ALL));
>   		igt_require(gem_has_ring(fd, engine));
>   		igt_require(gem_can_store_dword(fd, engine));
>   		engines[nengine++] = engine;
> @@ -233,11 +236,22 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   	if (flags & HANG)
>   		init_hang(&hang);
>   
> +	nchild = 1;
> +	if (flags & FORKED)
> +		nchild *= sysconf(_SC_NPROCESSORS_ONLN);
> +	if (flags & ALL)
> +		nchild *= nengine;
> +
>   	intel_detect_and_clear_missed_interrupts(fd);
>   	gpu_power_read(&power, &sample[0]);
> -	igt_fork(child, flags & FORKED ? sysconf(_SC_NPROCESSORS_ONLN) : 1)  {
> +	igt_fork(child, nchild) {
>   		unsigned int pass;
>   
> +		if (flags & ALL) {
> +			engines[0] = engines[child % nengine];

Relying on PIDs being sequential feels fragile but suggesting pipes or 
shared memory would be overkill. How about another loop:

if (flags & ALL) {
	for (i = 0; i < nchild; i++) {
		engines_copy = engines;
		nengines_copy = nengine;
		negines_child = 1;
		engines[0] = engines[i];
		igt_fork(child, 1) {
			...
		}
		
		if (in_parent) {
			engines = engines_copy;
			nengine = nengines_copy;
		} else {
			break;
		}
	}
}

?

Regards,

Tvrtko

> +			nengine = 1;
> +		}
> +
>   		memset(&scratch, 0, sizeof(scratch));
>   		scratch.handle = gem_create(fd, 4096);
>   		scratch.flags = EXEC_OBJECT_WRITE;
> @@ -341,7 +355,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   			igt_until_timeout(150) {
>   				uint64_t offset;
>   
> -				if (!(flags & FORKED))
> +				if (nchild == 1)
>   					write_seqno(debugfs, pass);
>   
>   				if (flags & HANG)
> @@ -382,8 +396,8 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   
>   				gem_write(fd, batches[1023].handle, loc, &pass, sizeof(pass));
>   				for (n = 1024; --n >= 1; ) {
> +					uint32_t handle[2] = {};
>   					int this_fd = fd;
> -					uint32_t handle[2];
>   
>   					execbuf.buffers_ptr = to_user_pointer(&batches[n-1]);
>   					reloc_migrations += batches[n-1].offset != inter[n].presumed_offset;
> @@ -550,7 +564,7 @@ igt_main
>   		{ "queues-sync", QUEUES | SYNC },
>   		{ NULL }
>   	};
> -	int fd;
> +	int fd = -1;
>   
>   	igt_fixture {
>   		fd = drm_open_driver_master(DRIVER_INTEL);
> @@ -561,9 +575,12 @@ igt_main
>   		igt_fork_hang_detector(fd);
>   	}
>   
> -	for (const struct mode *m = modes; m->name; m++)
> +	for (const struct mode *m = modes; m->name; m++) {
>   		igt_subtest_f("%s", m->name)
>   			whisper(fd, ALL_ENGINES, m->flags);
> +		igt_subtest_f("%s-all", m->name)
> +			whisper(fd, ALL_ENGINES, m->flags | ALL);
> +	}
>   
>   	for (const struct intel_execution_engine *e = intel_execution_engines;
>   	     e->name; e++) {
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 10/16] i915/gem_exec_whisper: Fork all-engine tests one-per-engine
@ 2019-05-14 12:57     ` Tvrtko Ursulin
  0 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-14 12:57 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/05/2019 11:09, Chris Wilson wrote:
> Add a new mode for some more stress, submit the all-engines tests
> simultaneously, a stream per engine.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/i915/gem_exec_whisper.c | 27 ++++++++++++++++++++++-----
>   1 file changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
> index d3e0b0ba2..d5afc8119 100644
> --- a/tests/i915/gem_exec_whisper.c
> +++ b/tests/i915/gem_exec_whisper.c
> @@ -88,6 +88,7 @@ static void verify_reloc(int fd, uint32_t handle,
>   #define SYNC 0x40
>   #define PRIORITY 0x80
>   #define QUEUES 0x100
> +#define ALL 0x200
>   
>   struct hang {
>   	struct drm_i915_gem_exec_object2 obj;
> @@ -199,6 +200,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   	uint64_t old_offset;
>   	int i, n, loc;
>   	int debugfs;
> +	int nchild;
>   
>   	if (flags & PRIORITY) {
>   		igt_require(gem_scheduler_enabled(fd));
> @@ -215,6 +217,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   				engines[nengine++] = engine;
>   		}
>   	} else {
> +		igt_assert(!(flags & ALL));
>   		igt_require(gem_has_ring(fd, engine));
>   		igt_require(gem_can_store_dword(fd, engine));
>   		engines[nengine++] = engine;
> @@ -233,11 +236,22 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   	if (flags & HANG)
>   		init_hang(&hang);
>   
> +	nchild = 1;
> +	if (flags & FORKED)
> +		nchild *= sysconf(_SC_NPROCESSORS_ONLN);
> +	if (flags & ALL)
> +		nchild *= nengine;
> +
>   	intel_detect_and_clear_missed_interrupts(fd);
>   	gpu_power_read(&power, &sample[0]);
> -	igt_fork(child, flags & FORKED ? sysconf(_SC_NPROCESSORS_ONLN) : 1)  {
> +	igt_fork(child, nchild) {
>   		unsigned int pass;
>   
> +		if (flags & ALL) {
> +			engines[0] = engines[child % nengine];

Relying on PIDs being sequential feels fragile but suggesting pipes or 
shared memory would be overkill. How about another loop:

if (flags & ALL) {
	for (i = 0; i < nchild; i++) {
		engines_copy = engines;
		nengines_copy = nengine;
		negines_child = 1;
		engines[0] = engines[i];
		igt_fork(child, 1) {
			...
		}
		
		if (in_parent) {
			engines = engines_copy;
			nengine = nengines_copy;
		} else {
			break;
		}
	}
}

?

Regards,

Tvrtko

> +			nengine = 1;
> +		}
> +
>   		memset(&scratch, 0, sizeof(scratch));
>   		scratch.handle = gem_create(fd, 4096);
>   		scratch.flags = EXEC_OBJECT_WRITE;
> @@ -341,7 +355,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   			igt_until_timeout(150) {
>   				uint64_t offset;
>   
> -				if (!(flags & FORKED))
> +				if (nchild == 1)
>   					write_seqno(debugfs, pass);
>   
>   				if (flags & HANG)
> @@ -382,8 +396,8 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   
>   				gem_write(fd, batches[1023].handle, loc, &pass, sizeof(pass));
>   				for (n = 1024; --n >= 1; ) {
> +					uint32_t handle[2] = {};
>   					int this_fd = fd;
> -					uint32_t handle[2];
>   
>   					execbuf.buffers_ptr = to_user_pointer(&batches[n-1]);
>   					reloc_migrations += batches[n-1].offset != inter[n].presumed_offset;
> @@ -550,7 +564,7 @@ igt_main
>   		{ "queues-sync", QUEUES | SYNC },
>   		{ NULL }
>   	};
> -	int fd;
> +	int fd = -1;
>   
>   	igt_fixture {
>   		fd = drm_open_driver_master(DRIVER_INTEL);
> @@ -561,9 +575,12 @@ igt_main
>   		igt_fork_hang_detector(fd);
>   	}
>   
> -	for (const struct mode *m = modes; m->name; m++)
> +	for (const struct mode *m = modes; m->name; m++) {
>   		igt_subtest_f("%s", m->name)
>   			whisper(fd, ALL_ENGINES, m->flags);
> +		igt_subtest_f("%s-all", m->name)
> +			whisper(fd, ALL_ENGINES, m->flags | ALL);
> +	}
>   
>   	for (const struct intel_execution_engine *e = intel_execution_engines;
>   	     e->name; e++) {
> 
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 08/16] i915: Exercise creating context with shared GTT
  2019-05-08 10:09   ` [igt-dev] " Chris Wilson
@ 2019-05-15  6:37     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-15  6:37 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/05/2019 11:09, Chris Wilson wrote:
> v2: Test each shared context is its own timeline and allows request
> reordering between shared contexts.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> ---
>   lib/i915/gem_context.c        |  68 +++
>   lib/i915/gem_context.h        |  13 +
>   tests/Makefile.sources        |   1 +
>   tests/i915/gem_ctx_shared.c   | 856 ++++++++++++++++++++++++++++++++++
>   tests/i915/gem_exec_whisper.c |  32 +-
>   tests/meson.build             |   1 +
>   6 files changed, 962 insertions(+), 9 deletions(-)
>   create mode 100644 tests/i915/gem_ctx_shared.c
> 
> diff --git a/lib/i915/gem_context.c b/lib/i915/gem_context.c
> index f94d89cb4..8fb8984d1 100644
> --- a/lib/i915/gem_context.c
> +++ b/lib/i915/gem_context.c
> @@ -272,6 +272,74 @@ void gem_context_set_priority(int fd, uint32_t ctx_id, int prio)
>   	igt_assert_eq(__gem_context_set_priority(fd, ctx_id, prio), 0);
>   }
>   
> +int
> +__gem_context_clone(int i915,
> +		    uint32_t src, unsigned int share,
> +		    unsigned int flags,
> +		    uint32_t *out)
> +{
> +	struct drm_i915_gem_context_create_ext_clone clone = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
> +		.clone_id = src,
> +		.flags = share,
> +	};
> +	struct drm_i915_gem_context_create_ext arg = {
> +		.flags = flags | I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> +		.extensions = to_user_pointer(&clone),
> +	};
> +	int err = 0;
> +
> +	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, &arg))
> +		err = -errno;
> +
> +	*out = arg.ctx_id;
> +
> +	errno = 0;
> +	return err;
> +}
> +
> +static bool __gem_context_has(int i915, uint32_t share, unsigned int flags)
> +{
> +	uint32_t ctx;
> +
> +	__gem_context_clone(i915, 0, share, flags, &ctx);
> +	if (ctx)
> +		gem_context_destroy(i915, ctx);
> +
> +	errno = 0;
> +	return ctx;
> +}
> +
> +bool gem_contexts_has_shared_gtt(int i915)
> +{
> +	return __gem_context_has(i915, I915_CONTEXT_CLONE_VM, 0);
> +}
> +
> +bool gem_has_queues(int i915)
> +{
> +	return __gem_context_has(i915,
> +				 I915_CONTEXT_CLONE_VM,
> +				 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> +}
> +
> +uint32_t gem_context_clone(int i915,
> +			   uint32_t src, unsigned int share,
> +			   unsigned int flags)
> +{
> +	uint32_t ctx;
> +
> +	igt_assert_eq(__gem_context_clone(i915, src, share, flags, &ctx), 0);
> +
> +	return ctx;
> +}
> +
> +uint32_t gem_queue_create(int i915)
> +{
> +	return gem_context_clone(i915, 0,
> +				 I915_CONTEXT_CLONE_VM,
> +				 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> +}
> +
>   bool gem_context_has_engine(int fd, uint32_t ctx, uint64_t engine)
>   {
>   	struct drm_i915_gem_exec_object2 exec = {};
> diff --git a/lib/i915/gem_context.h b/lib/i915/gem_context.h
> index a052714d4..8043c3401 100644
> --- a/lib/i915/gem_context.h
> +++ b/lib/i915/gem_context.h
> @@ -29,6 +29,19 @@ int __gem_context_create(int fd, uint32_t *ctx_id);
>   void gem_context_destroy(int fd, uint32_t ctx_id);
>   int __gem_context_destroy(int fd, uint32_t ctx_id);
>   
> +int __gem_context_clone(int i915,
> +			uint32_t src, unsigned int share,
> +			unsigned int flags,
> +			uint32_t *out);
> +uint32_t gem_context_clone(int i915,
> +			   uint32_t src, unsigned int share,
> +			   unsigned int flags);
> +
> +uint32_t gem_queue_create(int i915);
> +
> +bool gem_contexts_has_shared_gtt(int i915);
> +bool gem_has_queues(int i915);
> +
>   bool gem_has_contexts(int fd);
>   void gem_require_contexts(int fd);
>   void gem_context_require_bannable(int fd);
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index e1b7feeb2..3552e895b 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -22,6 +22,7 @@ TESTS_progs = \
>   	drm_mm \
>   	drm_read \
>   	i915/gem_ctx_clone \
> +	i915/gem_ctx_shared \
>   	i915/gem_vm_create \
>   	kms_3d \
>   	kms_addfb_basic \
> diff --git a/tests/i915/gem_ctx_shared.c b/tests/i915/gem_ctx_shared.c
> new file mode 100644
> index 000000000..0076f5e9d
> --- /dev/null
> +++ b/tests/i915/gem_ctx_shared.c
> @@ -0,0 +1,856 @@
> +/*
> + * Copyright © 2017 Intel Corporation

2019

> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include "igt.h"
> +
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <inttypes.h>
> +#include <errno.h>
> +#include <sys/stat.h>
> +#include <sys/ioctl.h>
> +#include <sys/time.h>
> +
> +#include <drm.h>
> +
> +#include "igt_rand.h"
> +#include "igt_vgem.h"
> +#include "sync_file.h"
> +
> +#define LO 0
> +#define HI 1
> +#define NOISE 2
> +
> +#define MAX_PRIO LOCAL_I915_CONTEXT_MAX_USER_PRIORITY
> +#define MIN_PRIO LOCAL_I915_CONTEXT_MIN_USER_PRIORITY
> +
> +static int priorities[] = {
> +	[LO] = MIN_PRIO / 2,
> +	[HI] = MAX_PRIO / 2,
> +};
> +
> +#define MAX_ELSP_QLEN 16
> +
> +IGT_TEST_DESCRIPTION("Test shared contexts.");
> +
> +static void create_shared_gtt(int i915, unsigned int flags)
> +#define DETACHED 0x1
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4096),
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +	};
> +	uint32_t parent, child;
> +
> +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +	gem_execbuf(i915, &execbuf);
> +	gem_sync(i915, obj.handle);
> +
> +	child = flags & DETACHED ? gem_context_create(i915) : 0;
> +	igt_until_timeout(2) {
> +		parent = flags & DETACHED ? child : 0;
> +		child = gem_context_clone(i915,
> +					  parent, I915_CONTEXT_CLONE_VM,
> +					  0);
> +		execbuf.rsvd1 = child;
> +		gem_execbuf(i915, &execbuf);
> +
> +		if (flags & DETACHED) {
> +			gem_context_destroy(i915, parent);
> +			gem_execbuf(i915, &execbuf);
> +		} else {
> +			parent = child;
> +			gem_context_destroy(i915, parent);
> +		}
> +
> +		execbuf.rsvd1 = parent;
> +		igt_assert_eq(__gem_execbuf(i915, &execbuf), -ENOENT);
> +		igt_assert_eq(__gem_context_clone(i915,
> +						  parent, I915_CONTEXT_CLONE_VM,
> +						  0, &parent), -ENOENT);
> +	}
> +	if (flags & DETACHED)
> +		gem_context_destroy(i915, child);
> +
> +	gem_sync(i915, obj.handle);
> +	gem_close(i915, obj.handle);
> +}
> +
> +static void disjoint_timelines(int i915)
> +{
> +	IGT_CORK_HANDLE(cork);
> +	igt_spin_t *spin[2];
> +	uint32_t plug, child;
> +
> +	igt_require(gem_has_execlists(i915));
> +
> +	/*
> +	 * Each context, although they share a vm, are expected to be
> +	 * distinct timelines. A request queued to one context should be
> +	 * independent of any shared contexts.
> +	 */
> +	child = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
> +	plug = igt_cork_plug(&cork, i915);
> +
> +	spin[0] = __igt_spin_new(i915, .ctx = 0, .dependency = plug);
> +	spin[1] = __igt_spin_new(i915, .ctx = child);
> +
> +	/* Wait for the second spinner, will hang if stuck behind the first */
> +	igt_spin_end(spin[1]);
> +	gem_sync(i915, spin[1]->handle);
> +
> +	igt_cork_unplug(&cork);
> +
> +	igt_spin_free(i915, spin[1]);
> +	igt_spin_free(i915, spin[0]);
> +}
> +
> +static void exhaust_shared_gtt(int i915, unsigned int flags)
> +#define EXHAUST_LRC 0x1
> +{
> +	i915 = gem_reopen_driver(i915);
> +
> +	igt_fork(pid, 1) {
> +		const uint32_t bbe = MI_BATCH_BUFFER_END;
> +		struct drm_i915_gem_exec_object2 obj = {
> +			.handle = gem_create(i915, 4096)
> +		};
> +		struct drm_i915_gem_execbuffer2 execbuf = {
> +			.buffers_ptr = to_user_pointer(&obj),
> +			.buffer_count = 1,
> +		};
> +		uint32_t parent, child;
> +		unsigned long count = 0;
> +		int err;
> +
> +		gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +
> +		child = 0;
> +		for (;;) {
> +			parent = child;
> +			err = __gem_context_clone(i915,
> +						  parent, I915_CONTEXT_CLONE_VM,
> +						  0, &child);
> +			if (err)
> +				break;
> +
> +			if (flags & EXHAUST_LRC) {
> +				execbuf.rsvd1 = child;
> +				err = __gem_execbuf(i915, &execbuf);
> +				if (err)
> +					break;
> +			}

What are the stop conditions in this test, with and without the 
EXHAUST_LRC flag? It would be good to put that in a comment.

Especially since AFAIR this one was causing OOM for me so might need to 
be tweaked.

> +
> +			count++;
> +		}
> +		gem_sync(i915, obj.handle);
> +
> +		igt_info("Created %lu shared contexts, before %d (%s)\n",
> +			 count, err, strerror(-err));
> +	}
> +	close(i915);
> +	igt_waitchildren();
> +}
> +
> +static void exec_shared_gtt(int i915, unsigned int ring)
> +{
> +	const int gen = intel_gen(intel_get_drm_devid(i915));
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4096)
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +		.flags = ring,
> +	};
> +	uint32_t scratch = obj.handle;
> +	uint32_t batch[16];
> +	int i;
> +
> +	gem_require_ring(i915, ring);
> +	igt_require(gem_can_store_dword(i915, ring));
> +
> +	/* Load object into place in the GTT */
> +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +	gem_execbuf(i915, &execbuf);
> +
> +	/* Presume nothing causes an eviction in the meantime */
> +
> +	obj.handle = gem_create(i915, 4096);
> +
> +	i = 0;
> +	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> +	if (gen >= 8) {
> +		batch[++i] = obj.offset;
> +		batch[++i] = 0;
> +	} else if (gen >= 4) {
> +		batch[++i] = 0;
> +		batch[++i] = obj.offset;
> +	} else {
> +		batch[i]--;
> +		batch[++i] = obj.offset;
> +	}
> +	batch[++i] = 0xc0ffee;
> +	batch[++i] = MI_BATCH_BUFFER_END;
> +	gem_write(i915, obj.handle, 0, batch, sizeof(batch));
> +
> +	obj.offset += 4096; /* make sure we don't cause an eviction! */

Is 4k apart safe?

A short comment on how does this test work would be good.

> +	obj.flags |= EXEC_OBJECT_PINNED;
> +	execbuf.rsvd1 = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
> +	if (gen > 3 && gen < 6)
> +		execbuf.flags |= I915_EXEC_SECURE;
> +
> +	gem_execbuf(i915, &execbuf);
> +	gem_context_destroy(i915, execbuf.rsvd1);
> +	gem_sync(i915, obj.handle); /* write hazard lies */
> +	gem_close(i915, obj.handle);
> +
> +	gem_read(i915, scratch, 0, batch, sizeof(uint32_t));
> +	gem_close(i915, scratch);
> +
> +	igt_assert_eq_u32(*batch, 0xc0ffee);
> +}
> +
> +static int nop_sync(int i915, uint32_t ctx, unsigned int ring, int64_t timeout)
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4096),
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +		.flags = ring,
> +		.rsvd1 = ctx,
> +	};
> +	int err;
> +
> +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +	gem_execbuf(i915, &execbuf);
> +	err = gem_wait(i915, obj.handle, &timeout);
> +	gem_close(i915, obj.handle);
> +
> +	return err;
> +}
> +
> +static bool has_single_timeline(int i915)
> +{
> +	uint32_t ctx;
> +
> +	__gem_context_clone(i915, 0, 0,
> +			    I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
> +			    &ctx);
> +	if (ctx)
> +		gem_context_destroy(i915, ctx);
> +
> +	return ctx != 0;
> +}
> +
> +static bool ignore_engine(unsigned engine)
> +{
> +	if (engine == 0)
> +		return true;
> +
> +	if (engine == I915_EXEC_BSD)
> +		return true;
> +
> +	return false;
> +}
> +
> +static void single_timeline(int i915)
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4096),
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +	};
> +	struct sync_fence_info rings[16];

Could use for_each_physical_engine to count the engines. But we probably 
have plenty of this around the code base.

> +	struct sync_file_info sync_file_info = {
> +		.num_fences = 1,
> +	};
> +	unsigned int engine;
> +	int n;
> +
> +	igt_require(has_single_timeline(i915));
> +
> +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +	gem_execbuf(i915, &execbuf);
> +	gem_sync(i915, obj.handle);
> +
> +	/*
> +	 * For a "single timeline" context, each ring is on the common
> +	 * timeline, unlike a normal context where each ring has an
> +	 * independent timeline. That is no matter which engine we submit
> +	 * to, it reports the same timeline name and fence context. However,
> +	 * the fence context is not reported through the sync_fence_info.

Is the test useful then? There was one I reviewed earlier in this series 
which tested for execution ordering, which sounds like is what's needed.

> +	 */
> +	execbuf.rsvd1 =
> +		gem_context_clone(i915, 0, 0,
> +				  I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> +	execbuf.flags = I915_EXEC_FENCE_OUT;
> +	n = 0;
> +	for_each_engine(i915, engine) {

for_each_physical_engine to align with Andi's work?

> +		gem_execbuf_wr(i915, &execbuf);
> +		sync_file_info.sync_fence_info = to_user_pointer(&rings[n]);
> +		do_ioctl(execbuf.rsvd2 >> 32, SYNC_IOC_FILE_INFO, &sync_file_info);
> +		close(execbuf.rsvd2 >> 32);
> +
> +		igt_info("ring[%d] fence: %s %s\n",
> +			 n, rings[n].driver_name, rings[n].obj_name);
> +		n++;
> +	}
> +	gem_sync(i915, obj.handle);
> +	gem_close(i915, obj.handle);
> +
> +	for (int i = 1; i < n; i++) {
> +		igt_assert(!strcmp(rings[0].driver_name, rings[i].driver_name));
> +		igt_assert(!strcmp(rings[0].obj_name, rings[i].obj_name));

What is in obj_name?

> +	}
> +}
> +
> +static void exec_single_timeline(int i915, unsigned int ring)
> +{
> +	unsigned int other;
> +	igt_spin_t *spin;
> +	uint32_t ctx;
> +
> +	gem_require_ring(i915, ring);
> +	igt_require(has_single_timeline(i915));
> +
> +	/*
> +	 * On an ordinary context, a blockage on one ring doesn't prevent
> +	 * execution on an other.
> +	 */
> +	ctx = 0;
> +	spin = NULL;
> +	for_each_engine(i915, other) {

for_each_physical

> +		if (other == ring || ignore_engine(other))
> +			continue;
> +
> +		if (spin == NULL) {
> +			spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
> +		} else {
> +			struct drm_i915_gem_execbuffer2 execbuf = {
> +				.buffers_ptr = spin->execbuf.buffers_ptr,
> +				.buffer_count = spin->execbuf.buffer_count,
> +				.flags = other,
> +				.rsvd1 = ctx,
> +			};
> +			gem_execbuf(i915, &execbuf);
> +		}
> +	}
> +	igt_require(spin);
> +	igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), 0);
> +	igt_spin_free(i915, spin);
> +
> +	/*
> +	 * But if we create a context with just a single shared timeline,
> +	 * then it will block waiting for the earlier requests on the
> +	 * other engines.
> +	 */
> +	ctx = gem_context_clone(i915, 0, 0,
> +				I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> +	spin = NULL;
> +	for_each_engine(i915, other) {

Ditto.

> +		if (other == ring || ignore_engine(other))
> +			continue;
> +
> +		if (spin == NULL) {
> +			spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
> +		} else {
> +			struct drm_i915_gem_execbuffer2 execbuf = {
> +				.buffers_ptr = spin->execbuf.buffers_ptr,
> +				.buffer_count = spin->execbuf.buffer_count,
> +				.flags = other,
> +				.rsvd1 = ctx,
> +			};
> +			gem_execbuf(i915, &execbuf);
> +		}
> +	}
> +	igt_assert(spin);
> +	igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), -ETIME);
> +	igt_spin_free(i915, spin);
> +}
> +
> +static void store_dword(int i915, uint32_t ctx, unsigned ring,
> +			uint32_t target, uint32_t offset, uint32_t value,
> +			uint32_t cork, unsigned write_domain)
> +{
> +	const int gen = intel_gen(intel_get_drm_devid(i915));
> +	struct drm_i915_gem_exec_object2 obj[3];
> +	struct drm_i915_gem_relocation_entry reloc;
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	uint32_t batch[16];
> +	int i;
> +
> +	memset(&execbuf, 0, sizeof(execbuf));
> +	execbuf.buffers_ptr = to_user_pointer(obj + !cork);
> +	execbuf.buffer_count = 2 + !!cork;
> +	execbuf.flags = ring;
> +	if (gen < 6)
> +		execbuf.flags |= I915_EXEC_SECURE;
> +	execbuf.rsvd1 = ctx;
> +
> +	memset(obj, 0, sizeof(obj));
> +	obj[0].handle = cork;
> +	obj[1].handle = target;
> +	obj[2].handle = gem_create(i915, 4096);
> +
> +	memset(&reloc, 0, sizeof(reloc));
> +	reloc.target_handle = obj[1].handle;
> +	reloc.presumed_offset = 0;
> +	reloc.offset = sizeof(uint32_t);
> +	reloc.delta = offset;
> +	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> +	reloc.write_domain = write_domain;
> +	obj[2].relocs_ptr = to_user_pointer(&reloc);
> +	obj[2].relocation_count = 1;
> +
> +	i = 0;
> +	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> +	if (gen >= 8) {
> +		batch[++i] = offset;
> +		batch[++i] = 0;
> +	} else if (gen >= 4) {
> +		batch[++i] = 0;
> +		batch[++i] = offset;
> +		reloc.offset += sizeof(uint32_t);
> +	} else {
> +		batch[i]--;
> +		batch[++i] = offset;
> +	}
> +	batch[++i] = value;
> +	batch[++i] = MI_BATCH_BUFFER_END;
> +	gem_write(i915, obj[2].handle, 0, batch, sizeof(batch));
> +	gem_execbuf(i915, &execbuf);
> +	gem_close(i915, obj[2].handle);
> +}
> +
> +static uint32_t create_highest_priority(int i915)
> +{
> +	uint32_t ctx = gem_context_create(i915);
> +
> +	/*
> +	 * If there is no priority support, all contexts will have equal
> +	 * priority (and therefore the max user priority), so no context
> +	 * can overtake us, and we effectively can form a plug.
> +	 */
> +	__gem_context_set_priority(i915, ctx, MAX_PRIO);
> +
> +	return ctx;
> +}
> +
> +static void unplug_show_queue(int i915, struct igt_cork *c, unsigned int engine)
> +{
> +	igt_spin_t *spin[MAX_ELSP_QLEN];

Why is this 16?

> +
> +	for (int n = 0; n < ARRAY_SIZE(spin); n++) {
> +		const struct igt_spin_factory opts = {
> +			.ctx = create_highest_priority(i915),
> +			.engine = engine,
> +		};
> +		spin[n] = __igt_spin_factory(i915, &opts);
> +		gem_context_destroy(i915, opts.ctx);
> +	}
> +
> +	igt_cork_unplug(c); /* batches will now be queued on the engine */
> +	igt_debugfs_dump(i915, "i915_engine_info");
> +
> +	for (int n = 0; n < ARRAY_SIZE(spin); n++)
> +		igt_spin_free(i915, spin[n]);
> +}
> +
> +static uint32_t store_timestamp(int i915,
> +				uint32_t ctx, unsigned ring,
> +				unsigned mmio_base)
> +{
> +	const bool r64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4096),
> +		.relocation_count = 1,
> +	};
> +	struct drm_i915_gem_relocation_entry reloc = {
> +		.target_handle = obj.handle,
> +		.offset = 2 * sizeof(uint32_t),
> +		.delta = 4092,
> +		.read_domains = I915_GEM_DOMAIN_INSTRUCTION,
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +		.flags = ring,
> +		.rsvd1 = ctx,
> +	};
> +	uint32_t batch[] = {
> +		0x24 << 23 | (1 + r64b), /* SRM */
> +		mmio_base + 0x358,
> +		4092,
> +		0,
> +		MI_BATCH_BUFFER_END
> +	};
> +
> +	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 7);
> +
> +	gem_write(i915, obj.handle, 0, batch, sizeof(batch));
> +	obj.relocs_ptr = to_user_pointer(&reloc);
> +
> +	gem_execbuf(i915, &execbuf);
> +
> +	return obj.handle;
> +}
> +
> +static void independent(int i915, unsigned ring, unsigned flags)
> +{
> +	uint32_t handle[ARRAY_SIZE(priorities)];
> +	igt_spin_t *spin[MAX_ELSP_QLEN];
> +	unsigned int mmio_base;
> +
> +	/* XXX i915_query()! */
> +	switch (ring) {
> +	case I915_EXEC_DEFAULT:
> +	case I915_EXEC_RENDER:
> +		mmio_base = 0x2000;
> +		break;
> +#if 0
> +	case I915_EXEC_BSD:
> +		mmio_base = 0x12000;
> +		break;
> +#endif
> +	case I915_EXEC_BLT:
> +		mmio_base = 0x22000;
> +		break;
> +
> +	case I915_EXEC_VEBOX:
> +		if (intel_gen(intel_get_drm_devid(i915)) >= 11)
> +			mmio_base = 0x1d8000;
> +		else
> +			mmio_base = 0x1a000;
> +		break;
> +
> +	default:
> +		igt_skip("mmio base not known\n");
> +	}

Ufff this is quite questionable. Should we rather have this subtest in 
selftests only?

> +
> +	for (int n = 0; n < ARRAY_SIZE(spin); n++) {
> +		const struct igt_spin_factory opts = {
> +			.ctx = create_highest_priority(i915),
> +			.engine = ring,
> +		};
> +		spin[n] = __igt_spin_factory(i915, &opts);
> +		gem_context_destroy(i915, opts.ctx);
> +	}
> +
> +	for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
> +		uint32_t ctx = gem_queue_create(i915);
> +		gem_context_set_priority(i915, ctx, priorities[i]);
> +		handle[i] = store_timestamp(i915, ctx, ring, mmio_base);
> +		gem_context_destroy(i915, ctx);
> +	}
> +
> +	for (int n = 0; n < ARRAY_SIZE(spin); n++)
> +		igt_spin_free(i915, spin[n]);
> +
> +	for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
> +		uint32_t *ptr;
> +
> +		ptr = gem_mmap__gtt(i915, handle[i], 4096, PROT_READ);
> +		gem_set_domain(i915, handle[i], /* no write hazard lies! */
> +			       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +		gem_close(i915, handle[i]);
> +
> +		handle[i] = ptr[1023];

1023 relates to 4092 from store_timestamp I gather. The two need to be 
defined closer together.

> +		munmap(ptr, 4096);
> +
> +		igt_debug("ctx[%d] .prio=%d, timestamp=%u\n",
> +			  i, priorities[i], handle[i]);
> +	}
> +
> +	igt_assert((int32_t)(handle[HI] - handle[LO]) < 0);
> +}
> +
> +static void reorder(int i915, unsigned ring, unsigned flags)
> +#define EQUAL 1
> +{
> +	IGT_CORK_HANDLE(cork);
> +	uint32_t scratch;
> +	uint32_t *ptr;
> +	uint32_t ctx[2];
> +	uint32_t plug;
> +
> +	ctx[LO] = gem_queue_create(i915);
> +	gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
> +
> +	ctx[HI] = gem_queue_create(i915);
> +	gem_context_set_priority(i915, ctx[HI], flags & EQUAL ? MIN_PRIO : 0);
> +
> +	scratch = gem_create(i915, 4096);
> +	plug = igt_cork_plug(&cork, i915);
> +
> +	/* We expect the high priority context to be executed first, and
> +	 * so the final result will be value from the low priority context.
> +	 */
> +	store_dword(i915, ctx[LO], ring, scratch, 0, ctx[LO], plug, 0);
> +	store_dword(i915, ctx[HI], ring, scratch, 0, ctx[HI], plug, 0);
> +
> +	unplug_show_queue(i915, &cork, ring);
> +	gem_close(i915, plug);
> +
> +	gem_context_destroy(i915, ctx[LO]);
> +	gem_context_destroy(i915, ctx[HI]);
> +
> +	ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
> +	gem_set_domain(i915, scratch, /* no write hazard lies! */
> +		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +	gem_close(i915, scratch);
> +
> +	if (flags & EQUAL) /* equal priority, result will be fifo */
> +		igt_assert_eq_u32(ptr[0], ctx[HI]);
> +	else
> +		igt_assert_eq_u32(ptr[0], ctx[LO]);
> +	munmap(ptr, 4096);
> +}
> +
> +static void promotion(int i915, unsigned ring)
> +{
> +	IGT_CORK_HANDLE(cork);
> +	uint32_t result, dep;
> +	uint32_t *ptr;
> +	uint32_t ctx[3];
> +	uint32_t plug;
> +
> +	ctx[LO] = gem_queue_create(i915);
> +	gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
> +
> +	ctx[HI] = gem_queue_create(i915);
> +	gem_context_set_priority(i915, ctx[HI], 0);
> +
> +	ctx[NOISE] = gem_queue_create(i915);
> +	gem_context_set_priority(i915, ctx[NOISE], MIN_PRIO/2);
> +
> +	result = gem_create(i915, 4096);
> +	dep = gem_create(i915, 4096);
> +
> +	plug = igt_cork_plug(&cork, i915);
> +
> +	/* Expect that HI promotes LO, so the order will be LO, HI, NOISE.
> +	 *
> +	 * fifo would be NOISE, LO, HI.
> +	 * strict priority would be  HI, NOISE, LO
> +	 */
> +	store_dword(i915, ctx[NOISE], ring, result, 0, ctx[NOISE], plug, 0);
> +	store_dword(i915, ctx[LO], ring, result, 0, ctx[LO], plug, 0);
> +
> +	/* link LO <-> HI via a dependency on another buffer */
> +	store_dword(i915, ctx[LO], ring, dep, 0, ctx[LO], 0, I915_GEM_DOMAIN_INSTRUCTION);
> +	store_dword(i915, ctx[HI], ring, dep, 0, ctx[HI], 0, 0);
> +
> +	store_dword(i915, ctx[HI], ring, result, 0, ctx[HI], 0, 0);
> +
> +	unplug_show_queue(i915, &cork, ring);
> +	gem_close(i915, plug);
> +
> +	gem_context_destroy(i915, ctx[NOISE]);
> +	gem_context_destroy(i915, ctx[LO]);
> +	gem_context_destroy(i915, ctx[HI]);
> +
> +	ptr = gem_mmap__gtt(i915, dep, 4096, PROT_READ);
> +	gem_set_domain(i915, dep, /* no write hazard lies! */
> +			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +	gem_close(i915, dep);
> +
> +	igt_assert_eq_u32(ptr[0], ctx[HI]);
> +	munmap(ptr, 4096);
> +
> +	ptr = gem_mmap__gtt(i915, result, 4096, PROT_READ);
> +	gem_set_domain(i915, result, /* no write hazard lies! */
> +			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +	gem_close(i915, result);
> +
> +	igt_assert_eq_u32(ptr[0], ctx[NOISE]);
> +	munmap(ptr, 4096);
> +}
> +
> +static void smoketest(int i915, unsigned ring, unsigned timeout)
> +{
> +	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> +	unsigned engines[16];
> +	unsigned nengine;
> +	unsigned engine;
> +	uint32_t scratch;
> +	uint32_t *ptr;
> +
> +	nengine = 0;
> +	for_each_engine(i915, engine) {
> +		if (ignore_engine(engine))
> +			continue;
> +
> +		engines[nengine++] = engine;
> +	}
> +	igt_require(nengine);

for_each_physical and counring the engines for engines array would be 
better I think.

> +
> +	scratch = gem_create(i915, 4096);
> +	igt_fork(child, ncpus) {
> +		unsigned long count = 0;
> +		uint32_t ctx;
> +
> +		hars_petruska_f54_1_random_perturb(child);
> +
> +		ctx = gem_queue_create(i915);
> +		igt_until_timeout(timeout) {
> +			int prio;
> +
> +			prio = hars_petruska_f54_1_random_unsafe_max(MAX_PRIO - MIN_PRIO) + MIN_PRIO;
> +			gem_context_set_priority(i915, ctx, prio);
> +
> +			engine = engines[hars_petruska_f54_1_random_unsafe_max(nengine)];
> +			store_dword(i915, ctx, engine, scratch,
> +				    8*child + 0, ~child,
> +				    0, 0);
> +			for (unsigned int step = 0; step < 8; step++)
> +				store_dword(i915, ctx, engine, scratch,
> +					    8*child + 4, count++,
> +					    0, 0);
> +		}
> +		gem_context_destroy(i915, ctx);
> +	}
> +	igt_waitchildren();
> +
> +	ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
> +	gem_set_domain(i915, scratch, /* no write hazard lies! */
> +			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +	gem_close(i915, scratch);
> +
> +	for (unsigned n = 0; n < ncpus; n++) {
> +		igt_assert_eq_u32(ptr[2*n], ~n);
> +		/*
> +		 * Note this count is approximate due to unconstrained
> +		 * ordering of the dword writes between engines.
> +		 *
> +		 * Take the result with a pinch of salt.
> +		 */
> +		igt_info("Child[%d] completed %u cycles\n",  n, ptr[2*n+1]);
> +	}
> +	munmap(ptr, 4096);
> +}
> +
> +igt_main
> +{
> +	const struct intel_execution_engine *e;
> +	int i915 = -1;
> +
> +	igt_fixture {
> +		i915 = drm_open_driver(DRIVER_INTEL);
> +		igt_require_gem(i915);
> +	}
> +
> +	igt_subtest_group {
> +		igt_fixture {
> +			igt_require(gem_contexts_has_shared_gtt(i915));
> +			igt_fork_hang_detector(i915);
> +		}
> +
> +		igt_subtest("create-shared-gtt")
> +			create_shared_gtt(i915, 0);
> +
> +		igt_subtest("detached-shared-gtt")
> +			create_shared_gtt(i915, DETACHED);
> +
> +		igt_subtest("disjoint-timelines")
> +			disjoint_timelines(i915);
> +
> +		igt_subtest("single-timeline")
> +			single_timeline(i915);
> +
> +		igt_subtest("exhaust-shared-gtt")
> +			exhaust_shared_gtt(i915, 0);
> +
> +		igt_subtest("exhaust-shared-gtt-lrc")
> +			exhaust_shared_gtt(i915, EXHAUST_LRC);
> +
> +		for (e = intel_execution_engines; e->name; e++) {
> +			igt_subtest_f("exec-shared-gtt-%s", e->name)
> +				exec_shared_gtt(i915, e->exec_id | e->flags);

The same previously raised question on should it iterate the legacy 
execbuf engines or physical engines. Maybe you won't different subtests 
do both?

> +
> +			if (!ignore_engine(e->exec_id | e->flags)) {
> +				igt_subtest_f("exec-single-timeline-%s",
> +					      e->name)
> +					exec_single_timeline(i915,
> +							     e->exec_id | e->flags);
> +			}
> +
> +			/*
> +			 * Check that the shared contexts operate independently,
> +			 * that is requests on one ("queue") can be scheduled
> +			 * around another queue. We only check the basics here,
> +			 * enough to reduce the queue into just another context,
> +			 * and so rely on gem_exec_schedule to prove the rest.
> +			 */
> +			igt_subtest_group {
> +				igt_fixture {
> +					gem_require_ring(i915, e->exec_id | e->flags);
> +					igt_require(gem_can_store_dword(i915, e->exec_id) | e->flags);
> +					igt_require(gem_scheduler_enabled(i915));
> +					igt_require(gem_scheduler_has_ctx_priority(i915));
> +				}
> +
> +				igt_subtest_f("Q-independent-%s", e->name)
> +					independent(i915, e->exec_id | e->flags, 0);
> +
> +				igt_subtest_f("Q-in-order-%s", e->name)
> +					reorder(i915, e->exec_id | e->flags, EQUAL);
> +
> +				igt_subtest_f("Q-out-order-%s", e->name)
> +					reorder(i915, e->exec_id | e->flags, 0);
> +
> +				igt_subtest_f("Q-promotion-%s", e->name)
> +					promotion(i915, e->exec_id | e->flags);
> +
> +				igt_subtest_f("Q-smoketest-%s", e->name)
> +					smoketest(i915, e->exec_id | e->flags, 5);
> +			}
> +		}
> +
> +		igt_subtest("Q-smoketest-all") {
> +			igt_require(gem_scheduler_enabled(i915));
> +			igt_require(gem_scheduler_has_ctx_priority(i915));
> +			smoketest(i915, -1, 30);
> +		}
> +
> +		igt_fixture {
> +			igt_stop_hang_detector();
> +		}
> +	}
> +}
> diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
> index 6c3b53756..d3e0b0ba2 100644
> --- a/tests/i915/gem_exec_whisper.c
> +++ b/tests/i915/gem_exec_whisper.c
> @@ -87,6 +87,7 @@ static void verify_reloc(int fd, uint32_t handle,
>   #define HANG 0x20
>   #define SYNC 0x40
>   #define PRIORITY 0x80
> +#define QUEUES 0x100
>   
>   struct hang {
>   	struct drm_i915_gem_exec_object2 obj;
> @@ -171,7 +172,7 @@ static void ctx_set_random_priority(int fd, uint32_t ctx)
>   {
>   	int prio = hars_petruska_f54_1_random_unsafe_max(1024) - 512;
>   	gem_context_set_priority(fd, ctx, prio);
> -};
> +}
>   
>   static void whisper(int fd, unsigned engine, unsigned flags)
>   {
> @@ -226,6 +227,9 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   	if (flags & CONTEXTS)
>   		gem_require_contexts(fd);
>   
> +	if (flags & QUEUES)
> +		igt_require(gem_has_queues(fd));
> +
>   	if (flags & HANG)
>   		init_hang(&hang);
>   
> @@ -290,6 +294,10 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   			for (n = 0; n < 64; n++)
>   				contexts[n] = gem_context_create(fd);
>   		}
> +		if (flags & QUEUES) {
> +			for (n = 0; n < 64; n++)
> +				contexts[n] = gem_queue_create(fd);
> +		}
>   		if (flags & FDS) {
>   			for (n = 0; n < 64; n++)
>   				fds[n] = drm_open_driver(DRIVER_INTEL);
> @@ -403,7 +411,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   						execbuf.flags &= ~ENGINE_MASK;
>   						execbuf.flags |= engines[rand() % nengine];
>   					}
> -					if (flags & CONTEXTS) {
> +					if (flags & (CONTEXTS | QUEUES)) {
>   						execbuf.rsvd1 = contexts[rand() % 64];
>   						if (flags & PRIORITY)
>   							ctx_set_random_priority(this_fd, execbuf.rsvd1);
> @@ -486,7 +494,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   			for (n = 0; n < 64; n++)
>   				close(fds[n]);
>   		}
> -		if (flags & CONTEXTS) {
> +		if (flags & (CONTEXTS | QUEUES)) {
>   			for (n = 0; n < 64; n++)
>   				gem_context_destroy(fd, contexts[n]);
>   		}
> @@ -522,18 +530,24 @@ igt_main
>   		{ "chain-forked", CHAIN | FORKED },
>   		{ "chain-interruptible", CHAIN | INTERRUPTIBLE },
>   		{ "chain-sync", CHAIN | SYNC },
> -		{ "contexts", CONTEXTS },
> -		{ "contexts-interruptible", CONTEXTS | INTERRUPTIBLE},
> -		{ "contexts-forked", CONTEXTS | FORKED},
> -		{ "contexts-priority", CONTEXTS | FORKED | PRIORITY },
> -		{ "contexts-chain", CONTEXTS | CHAIN },
> -		{ "contexts-sync", CONTEXTS | SYNC },
>   		{ "fds", FDS },
>   		{ "fds-interruptible", FDS | INTERRUPTIBLE},
>   		{ "fds-forked", FDS | FORKED},
>   		{ "fds-priority", FDS | FORKED | PRIORITY },
>   		{ "fds-chain", FDS | CHAIN},
>   		{ "fds-sync", FDS | SYNC},
> +		{ "contexts", CONTEXTS },
> +		{ "contexts-interruptible", CONTEXTS | INTERRUPTIBLE},
> +		{ "contexts-forked", CONTEXTS | FORKED},
> +		{ "contexts-priority", CONTEXTS | FORKED | PRIORITY },
> +		{ "contexts-chain", CONTEXTS | CHAIN },
> +		{ "contexts-sync", CONTEXTS | SYNC },
> +		{ "queues", QUEUES },
> +		{ "queues-interruptible", QUEUES | INTERRUPTIBLE},
> +		{ "queues-forked", QUEUES | FORKED},
> +		{ "queues-priority", QUEUES | FORKED | PRIORITY },
> +		{ "queues-chain", QUEUES | CHAIN },
> +		{ "queues-sync", QUEUES | SYNC },
>   		{ NULL }
>   	};
>   	int fd;
> diff --git a/tests/meson.build b/tests/meson.build
> index 3810bd760..3883ae127 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -114,6 +114,7 @@ i915_progs = [
>   	'gem_ctx_exec',
>   	'gem_ctx_isolation',
>   	'gem_ctx_param',
> +	'gem_ctx_shared',
>   	'gem_ctx_switch',
>   	'gem_ctx_thrash',
>   	'gem_double_irq_loop',
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 08/16] i915: Exercise creating context with shared GTT
@ 2019-05-15  6:37     ` Tvrtko Ursulin
  0 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-15  6:37 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev, Tvrtko Ursulin


On 08/05/2019 11:09, Chris Wilson wrote:
> v2: Test each shared context is its own timeline and allows request
> reordering between shared contexts.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> ---
>   lib/i915/gem_context.c        |  68 +++
>   lib/i915/gem_context.h        |  13 +
>   tests/Makefile.sources        |   1 +
>   tests/i915/gem_ctx_shared.c   | 856 ++++++++++++++++++++++++++++++++++
>   tests/i915/gem_exec_whisper.c |  32 +-
>   tests/meson.build             |   1 +
>   6 files changed, 962 insertions(+), 9 deletions(-)
>   create mode 100644 tests/i915/gem_ctx_shared.c
> 
> diff --git a/lib/i915/gem_context.c b/lib/i915/gem_context.c
> index f94d89cb4..8fb8984d1 100644
> --- a/lib/i915/gem_context.c
> +++ b/lib/i915/gem_context.c
> @@ -272,6 +272,74 @@ void gem_context_set_priority(int fd, uint32_t ctx_id, int prio)
>   	igt_assert_eq(__gem_context_set_priority(fd, ctx_id, prio), 0);
>   }
>   
> +int
> +__gem_context_clone(int i915,
> +		    uint32_t src, unsigned int share,
> +		    unsigned int flags,
> +		    uint32_t *out)
> +{
> +	struct drm_i915_gem_context_create_ext_clone clone = {
> +		{ .name = I915_CONTEXT_CREATE_EXT_CLONE },
> +		.clone_id = src,
> +		.flags = share,
> +	};
> +	struct drm_i915_gem_context_create_ext arg = {
> +		.flags = flags | I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> +		.extensions = to_user_pointer(&clone),
> +	};
> +	int err = 0;
> +
> +	if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, &arg))
> +		err = -errno;
> +
> +	*out = arg.ctx_id;
> +
> +	errno = 0;
> +	return err;
> +}
> +
> +static bool __gem_context_has(int i915, uint32_t share, unsigned int flags)
> +{
> +	uint32_t ctx;
> +
> +	__gem_context_clone(i915, 0, share, flags, &ctx);
> +	if (ctx)
> +		gem_context_destroy(i915, ctx);
> +
> +	errno = 0;
> +	return ctx;
> +}
> +
> +bool gem_contexts_has_shared_gtt(int i915)
> +{
> +	return __gem_context_has(i915, I915_CONTEXT_CLONE_VM, 0);
> +}
> +
> +bool gem_has_queues(int i915)
> +{
> +	return __gem_context_has(i915,
> +				 I915_CONTEXT_CLONE_VM,
> +				 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> +}
> +
> +uint32_t gem_context_clone(int i915,
> +			   uint32_t src, unsigned int share,
> +			   unsigned int flags)
> +{
> +	uint32_t ctx;
> +
> +	igt_assert_eq(__gem_context_clone(i915, src, share, flags, &ctx), 0);
> +
> +	return ctx;
> +}
> +
> +uint32_t gem_queue_create(int i915)
> +{
> +	return gem_context_clone(i915, 0,
> +				 I915_CONTEXT_CLONE_VM,
> +				 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> +}
> +
>   bool gem_context_has_engine(int fd, uint32_t ctx, uint64_t engine)
>   {
>   	struct drm_i915_gem_exec_object2 exec = {};
> diff --git a/lib/i915/gem_context.h b/lib/i915/gem_context.h
> index a052714d4..8043c3401 100644
> --- a/lib/i915/gem_context.h
> +++ b/lib/i915/gem_context.h
> @@ -29,6 +29,19 @@ int __gem_context_create(int fd, uint32_t *ctx_id);
>   void gem_context_destroy(int fd, uint32_t ctx_id);
>   int __gem_context_destroy(int fd, uint32_t ctx_id);
>   
> +int __gem_context_clone(int i915,
> +			uint32_t src, unsigned int share,
> +			unsigned int flags,
> +			uint32_t *out);
> +uint32_t gem_context_clone(int i915,
> +			   uint32_t src, unsigned int share,
> +			   unsigned int flags);
> +
> +uint32_t gem_queue_create(int i915);
> +
> +bool gem_contexts_has_shared_gtt(int i915);
> +bool gem_has_queues(int i915);
> +
>   bool gem_has_contexts(int fd);
>   void gem_require_contexts(int fd);
>   void gem_context_require_bannable(int fd);
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index e1b7feeb2..3552e895b 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -22,6 +22,7 @@ TESTS_progs = \
>   	drm_mm \
>   	drm_read \
>   	i915/gem_ctx_clone \
> +	i915/gem_ctx_shared \
>   	i915/gem_vm_create \
>   	kms_3d \
>   	kms_addfb_basic \
> diff --git a/tests/i915/gem_ctx_shared.c b/tests/i915/gem_ctx_shared.c
> new file mode 100644
> index 000000000..0076f5e9d
> --- /dev/null
> +++ b/tests/i915/gem_ctx_shared.c
> @@ -0,0 +1,856 @@
> +/*
> + * Copyright © 2017 Intel Corporation

2019

> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include "igt.h"
> +
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <inttypes.h>
> +#include <errno.h>
> +#include <sys/stat.h>
> +#include <sys/ioctl.h>
> +#include <sys/time.h>
> +
> +#include <drm.h>
> +
> +#include "igt_rand.h"
> +#include "igt_vgem.h"
> +#include "sync_file.h"
> +
> +#define LO 0
> +#define HI 1
> +#define NOISE 2
> +
> +#define MAX_PRIO LOCAL_I915_CONTEXT_MAX_USER_PRIORITY
> +#define MIN_PRIO LOCAL_I915_CONTEXT_MIN_USER_PRIORITY
> +
> +static int priorities[] = {
> +	[LO] = MIN_PRIO / 2,
> +	[HI] = MAX_PRIO / 2,
> +};
> +
> +#define MAX_ELSP_QLEN 16
> +
> +IGT_TEST_DESCRIPTION("Test shared contexts.");
> +
> +static void create_shared_gtt(int i915, unsigned int flags)
> +#define DETACHED 0x1
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4096),
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +	};
> +	uint32_t parent, child;
> +
> +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +	gem_execbuf(i915, &execbuf);
> +	gem_sync(i915, obj.handle);
> +
> +	child = flags & DETACHED ? gem_context_create(i915) : 0;
> +	igt_until_timeout(2) {
> +		parent = flags & DETACHED ? child : 0;
> +		child = gem_context_clone(i915,
> +					  parent, I915_CONTEXT_CLONE_VM,
> +					  0);
> +		execbuf.rsvd1 = child;
> +		gem_execbuf(i915, &execbuf);
> +
> +		if (flags & DETACHED) {
> +			gem_context_destroy(i915, parent);
> +			gem_execbuf(i915, &execbuf);
> +		} else {
> +			parent = child;
> +			gem_context_destroy(i915, parent);
> +		}
> +
> +		execbuf.rsvd1 = parent;
> +		igt_assert_eq(__gem_execbuf(i915, &execbuf), -ENOENT);
> +		igt_assert_eq(__gem_context_clone(i915,
> +						  parent, I915_CONTEXT_CLONE_VM,
> +						  0, &parent), -ENOENT);
> +	}
> +	if (flags & DETACHED)
> +		gem_context_destroy(i915, child);
> +
> +	gem_sync(i915, obj.handle);
> +	gem_close(i915, obj.handle);
> +}
> +
> +static void disjoint_timelines(int i915)
> +{
> +	IGT_CORK_HANDLE(cork);
> +	igt_spin_t *spin[2];
> +	uint32_t plug, child;
> +
> +	igt_require(gem_has_execlists(i915));
> +
> +	/*
> +	 * Each context, although they share a vm, are expected to be
> +	 * distinct timelines. A request queued to one context should be
> +	 * independent of any shared contexts.
> +	 */
> +	child = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
> +	plug = igt_cork_plug(&cork, i915);
> +
> +	spin[0] = __igt_spin_new(i915, .ctx = 0, .dependency = plug);
> +	spin[1] = __igt_spin_new(i915, .ctx = child);
> +
> +	/* Wait for the second spinner, will hang if stuck behind the first */
> +	igt_spin_end(spin[1]);
> +	gem_sync(i915, spin[1]->handle);
> +
> +	igt_cork_unplug(&cork);
> +
> +	igt_spin_free(i915, spin[1]);
> +	igt_spin_free(i915, spin[0]);
> +}
> +
> +static void exhaust_shared_gtt(int i915, unsigned int flags)
> +#define EXHAUST_LRC 0x1
> +{
> +	i915 = gem_reopen_driver(i915);
> +
> +	igt_fork(pid, 1) {
> +		const uint32_t bbe = MI_BATCH_BUFFER_END;
> +		struct drm_i915_gem_exec_object2 obj = {
> +			.handle = gem_create(i915, 4096)
> +		};
> +		struct drm_i915_gem_execbuffer2 execbuf = {
> +			.buffers_ptr = to_user_pointer(&obj),
> +			.buffer_count = 1,
> +		};
> +		uint32_t parent, child;
> +		unsigned long count = 0;
> +		int err;
> +
> +		gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +
> +		child = 0;
> +		for (;;) {
> +			parent = child;
> +			err = __gem_context_clone(i915,
> +						  parent, I915_CONTEXT_CLONE_VM,
> +						  0, &child);
> +			if (err)
> +				break;
> +
> +			if (flags & EXHAUST_LRC) {
> +				execbuf.rsvd1 = child;
> +				err = __gem_execbuf(i915, &execbuf);
> +				if (err)
> +					break;
> +			}

What are the stop conditions in this test, with and without the 
EXHAUST_LRC flag? It would be good to put that in a comment.

Especially since AFAIR this one was causing OOM for me so might need to 
be tweaked.

> +
> +			count++;
> +		}
> +		gem_sync(i915, obj.handle);
> +
> +		igt_info("Created %lu shared contexts, before %d (%s)\n",
> +			 count, err, strerror(-err));
> +	}
> +	close(i915);
> +	igt_waitchildren();
> +}
> +
> +static void exec_shared_gtt(int i915, unsigned int ring)
> +{
> +	const int gen = intel_gen(intel_get_drm_devid(i915));
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4096)
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +		.flags = ring,
> +	};
> +	uint32_t scratch = obj.handle;
> +	uint32_t batch[16];
> +	int i;
> +
> +	gem_require_ring(i915, ring);
> +	igt_require(gem_can_store_dword(i915, ring));
> +
> +	/* Load object into place in the GTT */
> +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +	gem_execbuf(i915, &execbuf);
> +
> +	/* Presume nothing causes an eviction in the meantime */
> +
> +	obj.handle = gem_create(i915, 4096);
> +
> +	i = 0;
> +	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> +	if (gen >= 8) {
> +		batch[++i] = obj.offset;
> +		batch[++i] = 0;
> +	} else if (gen >= 4) {
> +		batch[++i] = 0;
> +		batch[++i] = obj.offset;
> +	} else {
> +		batch[i]--;
> +		batch[++i] = obj.offset;
> +	}
> +	batch[++i] = 0xc0ffee;
> +	batch[++i] = MI_BATCH_BUFFER_END;
> +	gem_write(i915, obj.handle, 0, batch, sizeof(batch));
> +
> +	obj.offset += 4096; /* make sure we don't cause an eviction! */

Is 4k apart safe?

A short comment on how does this test work would be good.

> +	obj.flags |= EXEC_OBJECT_PINNED;
> +	execbuf.rsvd1 = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
> +	if (gen > 3 && gen < 6)
> +		execbuf.flags |= I915_EXEC_SECURE;
> +
> +	gem_execbuf(i915, &execbuf);
> +	gem_context_destroy(i915, execbuf.rsvd1);
> +	gem_sync(i915, obj.handle); /* write hazard lies */
> +	gem_close(i915, obj.handle);
> +
> +	gem_read(i915, scratch, 0, batch, sizeof(uint32_t));
> +	gem_close(i915, scratch);
> +
> +	igt_assert_eq_u32(*batch, 0xc0ffee);
> +}
> +
> +static int nop_sync(int i915, uint32_t ctx, unsigned int ring, int64_t timeout)
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4096),
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +		.flags = ring,
> +		.rsvd1 = ctx,
> +	};
> +	int err;
> +
> +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +	gem_execbuf(i915, &execbuf);
> +	err = gem_wait(i915, obj.handle, &timeout);
> +	gem_close(i915, obj.handle);
> +
> +	return err;
> +}
> +
> +static bool has_single_timeline(int i915)
> +{
> +	uint32_t ctx;
> +
> +	__gem_context_clone(i915, 0, 0,
> +			    I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
> +			    &ctx);
> +	if (ctx)
> +		gem_context_destroy(i915, ctx);
> +
> +	return ctx != 0;
> +}
> +
> +static bool ignore_engine(unsigned engine)
> +{
> +	if (engine == 0)
> +		return true;
> +
> +	if (engine == I915_EXEC_BSD)
> +		return true;
> +
> +	return false;
> +}
> +
> +static void single_timeline(int i915)
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4096),
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +	};
> +	struct sync_fence_info rings[16];

Could use for_each_physical_engine to count the engines. But we probably 
have plenty of this around the code base.

> +	struct sync_file_info sync_file_info = {
> +		.num_fences = 1,
> +	};
> +	unsigned int engine;
> +	int n;
> +
> +	igt_require(has_single_timeline(i915));
> +
> +	gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> +	gem_execbuf(i915, &execbuf);
> +	gem_sync(i915, obj.handle);
> +
> +	/*
> +	 * For a "single timeline" context, each ring is on the common
> +	 * timeline, unlike a normal context where each ring has an
> +	 * independent timeline. That is no matter which engine we submit
> +	 * to, it reports the same timeline name and fence context. However,
> +	 * the fence context is not reported through the sync_fence_info.

Is the test useful then? There was one I reviewed earlier in this series 
which tested for execution ordering, which sounds like is what's needed.

> +	 */
> +	execbuf.rsvd1 =
> +		gem_context_clone(i915, 0, 0,
> +				  I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> +	execbuf.flags = I915_EXEC_FENCE_OUT;
> +	n = 0;
> +	for_each_engine(i915, engine) {

for_each_physical_engine to align with Andi's work?

> +		gem_execbuf_wr(i915, &execbuf);
> +		sync_file_info.sync_fence_info = to_user_pointer(&rings[n]);
> +		do_ioctl(execbuf.rsvd2 >> 32, SYNC_IOC_FILE_INFO, &sync_file_info);
> +		close(execbuf.rsvd2 >> 32);
> +
> +		igt_info("ring[%d] fence: %s %s\n",
> +			 n, rings[n].driver_name, rings[n].obj_name);
> +		n++;
> +	}
> +	gem_sync(i915, obj.handle);
> +	gem_close(i915, obj.handle);
> +
> +	for (int i = 1; i < n; i++) {
> +		igt_assert(!strcmp(rings[0].driver_name, rings[i].driver_name));
> +		igt_assert(!strcmp(rings[0].obj_name, rings[i].obj_name));

What is in obj_name?

> +	}
> +}
> +
> +static void exec_single_timeline(int i915, unsigned int ring)
> +{
> +	unsigned int other;
> +	igt_spin_t *spin;
> +	uint32_t ctx;
> +
> +	gem_require_ring(i915, ring);
> +	igt_require(has_single_timeline(i915));
> +
> +	/*
> +	 * On an ordinary context, a blockage on one ring doesn't prevent
> +	 * execution on an other.
> +	 */
> +	ctx = 0;
> +	spin = NULL;
> +	for_each_engine(i915, other) {

for_each_physical

> +		if (other == ring || ignore_engine(other))
> +			continue;
> +
> +		if (spin == NULL) {
> +			spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
> +		} else {
> +			struct drm_i915_gem_execbuffer2 execbuf = {
> +				.buffers_ptr = spin->execbuf.buffers_ptr,
> +				.buffer_count = spin->execbuf.buffer_count,
> +				.flags = other,
> +				.rsvd1 = ctx,
> +			};
> +			gem_execbuf(i915, &execbuf);
> +		}
> +	}
> +	igt_require(spin);
> +	igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), 0);
> +	igt_spin_free(i915, spin);
> +
> +	/*
> +	 * But if we create a context with just a single shared timeline,
> +	 * then it will block waiting for the earlier requests on the
> +	 * other engines.
> +	 */
> +	ctx = gem_context_clone(i915, 0, 0,
> +				I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> +	spin = NULL;
> +	for_each_engine(i915, other) {

Ditto.

> +		if (other == ring || ignore_engine(other))
> +			continue;
> +
> +		if (spin == NULL) {
> +			spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
> +		} else {
> +			struct drm_i915_gem_execbuffer2 execbuf = {
> +				.buffers_ptr = spin->execbuf.buffers_ptr,
> +				.buffer_count = spin->execbuf.buffer_count,
> +				.flags = other,
> +				.rsvd1 = ctx,
> +			};
> +			gem_execbuf(i915, &execbuf);
> +		}
> +	}
> +	igt_assert(spin);
> +	igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), -ETIME);
> +	igt_spin_free(i915, spin);
> +}
> +
> +static void store_dword(int i915, uint32_t ctx, unsigned ring,
> +			uint32_t target, uint32_t offset, uint32_t value,
> +			uint32_t cork, unsigned write_domain)
> +{
> +	const int gen = intel_gen(intel_get_drm_devid(i915));
> +	struct drm_i915_gem_exec_object2 obj[3];
> +	struct drm_i915_gem_relocation_entry reloc;
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	uint32_t batch[16];
> +	int i;
> +
> +	memset(&execbuf, 0, sizeof(execbuf));
> +	execbuf.buffers_ptr = to_user_pointer(obj + !cork);
> +	execbuf.buffer_count = 2 + !!cork;
> +	execbuf.flags = ring;
> +	if (gen < 6)
> +		execbuf.flags |= I915_EXEC_SECURE;
> +	execbuf.rsvd1 = ctx;
> +
> +	memset(obj, 0, sizeof(obj));
> +	obj[0].handle = cork;
> +	obj[1].handle = target;
> +	obj[2].handle = gem_create(i915, 4096);
> +
> +	memset(&reloc, 0, sizeof(reloc));
> +	reloc.target_handle = obj[1].handle;
> +	reloc.presumed_offset = 0;
> +	reloc.offset = sizeof(uint32_t);
> +	reloc.delta = offset;
> +	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> +	reloc.write_domain = write_domain;
> +	obj[2].relocs_ptr = to_user_pointer(&reloc);
> +	obj[2].relocation_count = 1;
> +
> +	i = 0;
> +	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> +	if (gen >= 8) {
> +		batch[++i] = offset;
> +		batch[++i] = 0;
> +	} else if (gen >= 4) {
> +		batch[++i] = 0;
> +		batch[++i] = offset;
> +		reloc.offset += sizeof(uint32_t);
> +	} else {
> +		batch[i]--;
> +		batch[++i] = offset;
> +	}
> +	batch[++i] = value;
> +	batch[++i] = MI_BATCH_BUFFER_END;
> +	gem_write(i915, obj[2].handle, 0, batch, sizeof(batch));
> +	gem_execbuf(i915, &execbuf);
> +	gem_close(i915, obj[2].handle);
> +}
> +
> +static uint32_t create_highest_priority(int i915)
> +{
> +	uint32_t ctx = gem_context_create(i915);
> +
> +	/*
> +	 * If there is no priority support, all contexts will have equal
> +	 * priority (and therefore the max user priority), so no context
> +	 * can overtake us, and we effectively can form a plug.
> +	 */
> +	__gem_context_set_priority(i915, ctx, MAX_PRIO);
> +
> +	return ctx;
> +}
> +
> +static void unplug_show_queue(int i915, struct igt_cork *c, unsigned int engine)
> +{
> +	igt_spin_t *spin[MAX_ELSP_QLEN];

Why is this 16?

> +
> +	for (int n = 0; n < ARRAY_SIZE(spin); n++) {
> +		const struct igt_spin_factory opts = {
> +			.ctx = create_highest_priority(i915),
> +			.engine = engine,
> +		};
> +		spin[n] = __igt_spin_factory(i915, &opts);
> +		gem_context_destroy(i915, opts.ctx);
> +	}
> +
> +	igt_cork_unplug(c); /* batches will now be queued on the engine */
> +	igt_debugfs_dump(i915, "i915_engine_info");
> +
> +	for (int n = 0; n < ARRAY_SIZE(spin); n++)
> +		igt_spin_free(i915, spin[n]);
> +}
> +
> +static uint32_t store_timestamp(int i915,
> +				uint32_t ctx, unsigned ring,
> +				unsigned mmio_base)
> +{
> +	const bool r64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = gem_create(i915, 4096),
> +		.relocation_count = 1,
> +	};
> +	struct drm_i915_gem_relocation_entry reloc = {
> +		.target_handle = obj.handle,
> +		.offset = 2 * sizeof(uint32_t),
> +		.delta = 4092,
> +		.read_domains = I915_GEM_DOMAIN_INSTRUCTION,
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +		.flags = ring,
> +		.rsvd1 = ctx,
> +	};
> +	uint32_t batch[] = {
> +		0x24 << 23 | (1 + r64b), /* SRM */
> +		mmio_base + 0x358,
> +		4092,
> +		0,
> +		MI_BATCH_BUFFER_END
> +	};
> +
> +	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 7);
> +
> +	gem_write(i915, obj.handle, 0, batch, sizeof(batch));
> +	obj.relocs_ptr = to_user_pointer(&reloc);
> +
> +	gem_execbuf(i915, &execbuf);
> +
> +	return obj.handle;
> +}
> +
> +static void independent(int i915, unsigned ring, unsigned flags)
> +{
> +	uint32_t handle[ARRAY_SIZE(priorities)];
> +	igt_spin_t *spin[MAX_ELSP_QLEN];
> +	unsigned int mmio_base;
> +
> +	/* XXX i915_query()! */
> +	switch (ring) {
> +	case I915_EXEC_DEFAULT:
> +	case I915_EXEC_RENDER:
> +		mmio_base = 0x2000;
> +		break;
> +#if 0
> +	case I915_EXEC_BSD:
> +		mmio_base = 0x12000;
> +		break;
> +#endif
> +	case I915_EXEC_BLT:
> +		mmio_base = 0x22000;
> +		break;
> +
> +	case I915_EXEC_VEBOX:
> +		if (intel_gen(intel_get_drm_devid(i915)) >= 11)
> +			mmio_base = 0x1d8000;
> +		else
> +			mmio_base = 0x1a000;
> +		break;
> +
> +	default:
> +		igt_skip("mmio base not known\n");
> +	}

Ufff this is quite questionable. Should we rather have this subtest in 
selftests only?

> +
> +	for (int n = 0; n < ARRAY_SIZE(spin); n++) {
> +		const struct igt_spin_factory opts = {
> +			.ctx = create_highest_priority(i915),
> +			.engine = ring,
> +		};
> +		spin[n] = __igt_spin_factory(i915, &opts);
> +		gem_context_destroy(i915, opts.ctx);
> +	}
> +
> +	for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
> +		uint32_t ctx = gem_queue_create(i915);
> +		gem_context_set_priority(i915, ctx, priorities[i]);
> +		handle[i] = store_timestamp(i915, ctx, ring, mmio_base);
> +		gem_context_destroy(i915, ctx);
> +	}
> +
> +	for (int n = 0; n < ARRAY_SIZE(spin); n++)
> +		igt_spin_free(i915, spin[n]);
> +
> +	for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
> +		uint32_t *ptr;
> +
> +		ptr = gem_mmap__gtt(i915, handle[i], 4096, PROT_READ);
> +		gem_set_domain(i915, handle[i], /* no write hazard lies! */
> +			       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +		gem_close(i915, handle[i]);
> +
> +		handle[i] = ptr[1023];

1023 relates to 4092 from store_timestamp I gather. The two need to be 
defined closer together.

> +		munmap(ptr, 4096);
> +
> +		igt_debug("ctx[%d] .prio=%d, timestamp=%u\n",
> +			  i, priorities[i], handle[i]);
> +	}
> +
> +	igt_assert((int32_t)(handle[HI] - handle[LO]) < 0);
> +}
> +
> +static void reorder(int i915, unsigned ring, unsigned flags)
> +#define EQUAL 1
> +{
> +	IGT_CORK_HANDLE(cork);
> +	uint32_t scratch;
> +	uint32_t *ptr;
> +	uint32_t ctx[2];
> +	uint32_t plug;
> +
> +	ctx[LO] = gem_queue_create(i915);
> +	gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
> +
> +	ctx[HI] = gem_queue_create(i915);
> +	gem_context_set_priority(i915, ctx[HI], flags & EQUAL ? MIN_PRIO : 0);
> +
> +	scratch = gem_create(i915, 4096);
> +	plug = igt_cork_plug(&cork, i915);
> +
> +	/* We expect the high priority context to be executed first, and
> +	 * so the final result will be value from the low priority context.
> +	 */
> +	store_dword(i915, ctx[LO], ring, scratch, 0, ctx[LO], plug, 0);
> +	store_dword(i915, ctx[HI], ring, scratch, 0, ctx[HI], plug, 0);
> +
> +	unplug_show_queue(i915, &cork, ring);
> +	gem_close(i915, plug);
> +
> +	gem_context_destroy(i915, ctx[LO]);
> +	gem_context_destroy(i915, ctx[HI]);
> +
> +	ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
> +	gem_set_domain(i915, scratch, /* no write hazard lies! */
> +		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +	gem_close(i915, scratch);
> +
> +	if (flags & EQUAL) /* equal priority, result will be fifo */
> +		igt_assert_eq_u32(ptr[0], ctx[HI]);
> +	else
> +		igt_assert_eq_u32(ptr[0], ctx[LO]);
> +	munmap(ptr, 4096);
> +}
> +
> +static void promotion(int i915, unsigned ring)
> +{
> +	IGT_CORK_HANDLE(cork);
> +	uint32_t result, dep;
> +	uint32_t *ptr;
> +	uint32_t ctx[3];
> +	uint32_t plug;
> +
> +	ctx[LO] = gem_queue_create(i915);
> +	gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
> +
> +	ctx[HI] = gem_queue_create(i915);
> +	gem_context_set_priority(i915, ctx[HI], 0);
> +
> +	ctx[NOISE] = gem_queue_create(i915);
> +	gem_context_set_priority(i915, ctx[NOISE], MIN_PRIO/2);
> +
> +	result = gem_create(i915, 4096);
> +	dep = gem_create(i915, 4096);
> +
> +	plug = igt_cork_plug(&cork, i915);
> +
> +	/* Expect that HI promotes LO, so the order will be LO, HI, NOISE.
> +	 *
> +	 * fifo would be NOISE, LO, HI.
> +	 * strict priority would be  HI, NOISE, LO
> +	 */
> +	store_dword(i915, ctx[NOISE], ring, result, 0, ctx[NOISE], plug, 0);
> +	store_dword(i915, ctx[LO], ring, result, 0, ctx[LO], plug, 0);
> +
> +	/* link LO <-> HI via a dependency on another buffer */
> +	store_dword(i915, ctx[LO], ring, dep, 0, ctx[LO], 0, I915_GEM_DOMAIN_INSTRUCTION);
> +	store_dword(i915, ctx[HI], ring, dep, 0, ctx[HI], 0, 0);
> +
> +	store_dword(i915, ctx[HI], ring, result, 0, ctx[HI], 0, 0);
> +
> +	unplug_show_queue(i915, &cork, ring);
> +	gem_close(i915, plug);
> +
> +	gem_context_destroy(i915, ctx[NOISE]);
> +	gem_context_destroy(i915, ctx[LO]);
> +	gem_context_destroy(i915, ctx[HI]);
> +
> +	ptr = gem_mmap__gtt(i915, dep, 4096, PROT_READ);
> +	gem_set_domain(i915, dep, /* no write hazard lies! */
> +			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +	gem_close(i915, dep);
> +
> +	igt_assert_eq_u32(ptr[0], ctx[HI]);
> +	munmap(ptr, 4096);
> +
> +	ptr = gem_mmap__gtt(i915, result, 4096, PROT_READ);
> +	gem_set_domain(i915, result, /* no write hazard lies! */
> +			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +	gem_close(i915, result);
> +
> +	igt_assert_eq_u32(ptr[0], ctx[NOISE]);
> +	munmap(ptr, 4096);
> +}
> +
> +static void smoketest(int i915, unsigned ring, unsigned timeout)
> +{
> +	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> +	unsigned engines[16];
> +	unsigned nengine;
> +	unsigned engine;
> +	uint32_t scratch;
> +	uint32_t *ptr;
> +
> +	nengine = 0;
> +	for_each_engine(i915, engine) {
> +		if (ignore_engine(engine))
> +			continue;
> +
> +		engines[nengine++] = engine;
> +	}
> +	igt_require(nengine);

for_each_physical and counring the engines for engines array would be 
better I think.

> +
> +	scratch = gem_create(i915, 4096);
> +	igt_fork(child, ncpus) {
> +		unsigned long count = 0;
> +		uint32_t ctx;
> +
> +		hars_petruska_f54_1_random_perturb(child);
> +
> +		ctx = gem_queue_create(i915);
> +		igt_until_timeout(timeout) {
> +			int prio;
> +
> +			prio = hars_petruska_f54_1_random_unsafe_max(MAX_PRIO - MIN_PRIO) + MIN_PRIO;
> +			gem_context_set_priority(i915, ctx, prio);
> +
> +			engine = engines[hars_petruska_f54_1_random_unsafe_max(nengine)];
> +			store_dword(i915, ctx, engine, scratch,
> +				    8*child + 0, ~child,
> +				    0, 0);
> +			for (unsigned int step = 0; step < 8; step++)
> +				store_dword(i915, ctx, engine, scratch,
> +					    8*child + 4, count++,
> +					    0, 0);
> +		}
> +		gem_context_destroy(i915, ctx);
> +	}
> +	igt_waitchildren();
> +
> +	ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
> +	gem_set_domain(i915, scratch, /* no write hazard lies! */
> +			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> +	gem_close(i915, scratch);
> +
> +	for (unsigned n = 0; n < ncpus; n++) {
> +		igt_assert_eq_u32(ptr[2*n], ~n);
> +		/*
> +		 * Note this count is approximate due to unconstrained
> +		 * ordering of the dword writes between engines.
> +		 *
> +		 * Take the result with a pinch of salt.
> +		 */
> +		igt_info("Child[%d] completed %u cycles\n",  n, ptr[2*n+1]);
> +	}
> +	munmap(ptr, 4096);
> +}
> +
> +igt_main
> +{
> +	const struct intel_execution_engine *e;
> +	int i915 = -1;
> +
> +	igt_fixture {
> +		i915 = drm_open_driver(DRIVER_INTEL);
> +		igt_require_gem(i915);
> +	}
> +
> +	igt_subtest_group {
> +		igt_fixture {
> +			igt_require(gem_contexts_has_shared_gtt(i915));
> +			igt_fork_hang_detector(i915);
> +		}
> +
> +		igt_subtest("create-shared-gtt")
> +			create_shared_gtt(i915, 0);
> +
> +		igt_subtest("detached-shared-gtt")
> +			create_shared_gtt(i915, DETACHED);
> +
> +		igt_subtest("disjoint-timelines")
> +			disjoint_timelines(i915);
> +
> +		igt_subtest("single-timeline")
> +			single_timeline(i915);
> +
> +		igt_subtest("exhaust-shared-gtt")
> +			exhaust_shared_gtt(i915, 0);
> +
> +		igt_subtest("exhaust-shared-gtt-lrc")
> +			exhaust_shared_gtt(i915, EXHAUST_LRC);
> +
> +		for (e = intel_execution_engines; e->name; e++) {
> +			igt_subtest_f("exec-shared-gtt-%s", e->name)
> +				exec_shared_gtt(i915, e->exec_id | e->flags);

The same previously raised question on should it iterate the legacy 
execbuf engines or physical engines. Maybe you won't different subtests 
do both?

> +
> +			if (!ignore_engine(e->exec_id | e->flags)) {
> +				igt_subtest_f("exec-single-timeline-%s",
> +					      e->name)
> +					exec_single_timeline(i915,
> +							     e->exec_id | e->flags);
> +			}
> +
> +			/*
> +			 * Check that the shared contexts operate independently,
> +			 * that is requests on one ("queue") can be scheduled
> +			 * around another queue. We only check the basics here,
> +			 * enough to reduce the queue into just another context,
> +			 * and so rely on gem_exec_schedule to prove the rest.
> +			 */
> +			igt_subtest_group {
> +				igt_fixture {
> +					gem_require_ring(i915, e->exec_id | e->flags);
> +					igt_require(gem_can_store_dword(i915, e->exec_id) | e->flags);
> +					igt_require(gem_scheduler_enabled(i915));
> +					igt_require(gem_scheduler_has_ctx_priority(i915));
> +				}
> +
> +				igt_subtest_f("Q-independent-%s", e->name)
> +					independent(i915, e->exec_id | e->flags, 0);
> +
> +				igt_subtest_f("Q-in-order-%s", e->name)
> +					reorder(i915, e->exec_id | e->flags, EQUAL);
> +
> +				igt_subtest_f("Q-out-order-%s", e->name)
> +					reorder(i915, e->exec_id | e->flags, 0);
> +
> +				igt_subtest_f("Q-promotion-%s", e->name)
> +					promotion(i915, e->exec_id | e->flags);
> +
> +				igt_subtest_f("Q-smoketest-%s", e->name)
> +					smoketest(i915, e->exec_id | e->flags, 5);
> +			}
> +		}
> +
> +		igt_subtest("Q-smoketest-all") {
> +			igt_require(gem_scheduler_enabled(i915));
> +			igt_require(gem_scheduler_has_ctx_priority(i915));
> +			smoketest(i915, -1, 30);
> +		}
> +
> +		igt_fixture {
> +			igt_stop_hang_detector();
> +		}
> +	}
> +}
> diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
> index 6c3b53756..d3e0b0ba2 100644
> --- a/tests/i915/gem_exec_whisper.c
> +++ b/tests/i915/gem_exec_whisper.c
> @@ -87,6 +87,7 @@ static void verify_reloc(int fd, uint32_t handle,
>   #define HANG 0x20
>   #define SYNC 0x40
>   #define PRIORITY 0x80
> +#define QUEUES 0x100
>   
>   struct hang {
>   	struct drm_i915_gem_exec_object2 obj;
> @@ -171,7 +172,7 @@ static void ctx_set_random_priority(int fd, uint32_t ctx)
>   {
>   	int prio = hars_petruska_f54_1_random_unsafe_max(1024) - 512;
>   	gem_context_set_priority(fd, ctx, prio);
> -};
> +}
>   
>   static void whisper(int fd, unsigned engine, unsigned flags)
>   {
> @@ -226,6 +227,9 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   	if (flags & CONTEXTS)
>   		gem_require_contexts(fd);
>   
> +	if (flags & QUEUES)
> +		igt_require(gem_has_queues(fd));
> +
>   	if (flags & HANG)
>   		init_hang(&hang);
>   
> @@ -290,6 +294,10 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   			for (n = 0; n < 64; n++)
>   				contexts[n] = gem_context_create(fd);
>   		}
> +		if (flags & QUEUES) {
> +			for (n = 0; n < 64; n++)
> +				contexts[n] = gem_queue_create(fd);
> +		}
>   		if (flags & FDS) {
>   			for (n = 0; n < 64; n++)
>   				fds[n] = drm_open_driver(DRIVER_INTEL);
> @@ -403,7 +411,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   						execbuf.flags &= ~ENGINE_MASK;
>   						execbuf.flags |= engines[rand() % nengine];
>   					}
> -					if (flags & CONTEXTS) {
> +					if (flags & (CONTEXTS | QUEUES)) {
>   						execbuf.rsvd1 = contexts[rand() % 64];
>   						if (flags & PRIORITY)
>   							ctx_set_random_priority(this_fd, execbuf.rsvd1);
> @@ -486,7 +494,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>   			for (n = 0; n < 64; n++)
>   				close(fds[n]);
>   		}
> -		if (flags & CONTEXTS) {
> +		if (flags & (CONTEXTS | QUEUES)) {
>   			for (n = 0; n < 64; n++)
>   				gem_context_destroy(fd, contexts[n]);
>   		}
> @@ -522,18 +530,24 @@ igt_main
>   		{ "chain-forked", CHAIN | FORKED },
>   		{ "chain-interruptible", CHAIN | INTERRUPTIBLE },
>   		{ "chain-sync", CHAIN | SYNC },
> -		{ "contexts", CONTEXTS },
> -		{ "contexts-interruptible", CONTEXTS | INTERRUPTIBLE},
> -		{ "contexts-forked", CONTEXTS | FORKED},
> -		{ "contexts-priority", CONTEXTS | FORKED | PRIORITY },
> -		{ "contexts-chain", CONTEXTS | CHAIN },
> -		{ "contexts-sync", CONTEXTS | SYNC },
>   		{ "fds", FDS },
>   		{ "fds-interruptible", FDS | INTERRUPTIBLE},
>   		{ "fds-forked", FDS | FORKED},
>   		{ "fds-priority", FDS | FORKED | PRIORITY },
>   		{ "fds-chain", FDS | CHAIN},
>   		{ "fds-sync", FDS | SYNC},
> +		{ "contexts", CONTEXTS },
> +		{ "contexts-interruptible", CONTEXTS | INTERRUPTIBLE},
> +		{ "contexts-forked", CONTEXTS | FORKED},
> +		{ "contexts-priority", CONTEXTS | FORKED | PRIORITY },
> +		{ "contexts-chain", CONTEXTS | CHAIN },
> +		{ "contexts-sync", CONTEXTS | SYNC },
> +		{ "queues", QUEUES },
> +		{ "queues-interruptible", QUEUES | INTERRUPTIBLE},
> +		{ "queues-forked", QUEUES | FORKED},
> +		{ "queues-priority", QUEUES | FORKED | PRIORITY },
> +		{ "queues-chain", QUEUES | CHAIN },
> +		{ "queues-sync", QUEUES | SYNC },
>   		{ NULL }
>   	};
>   	int fd;
> diff --git a/tests/meson.build b/tests/meson.build
> index 3810bd760..3883ae127 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -114,6 +114,7 @@ i915_progs = [
>   	'gem_ctx_exec',
>   	'gem_ctx_isolation',
>   	'gem_ctx_param',
> +	'gem_ctx_shared',
>   	'gem_ctx_switch',
>   	'gem_ctx_thrash',
>   	'gem_double_irq_loop',
> 

Regards,

Tvrtko
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 13/16] i915: Add gem_exec_balancer
  2019-05-08 10:09   ` [igt-dev] " Chris Wilson
@ 2019-05-15 10:49     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-15 10:49 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/05/2019 11:09, Chris Wilson wrote:
> Exercise the in-kernel load balancer checking that we can distribute
> batches across the set of ctx->engines to avoid load.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/Makefile.am              |    1 +
>   tests/Makefile.sources         |    1 +
>   tests/i915/gem_exec_balancer.c | 1050 ++++++++++++++++++++++++++++++++
>   tests/meson.build              |    7 +
>   4 files changed, 1059 insertions(+)
>   create mode 100644 tests/i915/gem_exec_balancer.c
> 
> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index 5097debf6..c6af0aeaf 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -96,6 +96,7 @@ gem_close_race_LDADD = $(LDADD) -lpthread
>   gem_ctx_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
>   gem_ctx_thrash_LDADD = $(LDADD) -lpthread
>   gem_ctx_sseu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
> +i915_gem_exec_balancer_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
>   gem_exec_capture_LDADD = $(LDADD) -lz
>   gem_exec_parallel_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
>   gem_exec_parallel_LDADD = $(LDADD) -lpthread
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index e7ee27e81..323b625aa 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -24,6 +24,7 @@ TESTS_progs = \
>   	i915/gem_ctx_clone \
>   	i915/gem_ctx_engines \
>   	i915/gem_ctx_shared \
> +	i915/gem_exec_balancer \
>   	i915/gem_vm_create \
>   	kms_3d \
>   	kms_addfb_basic \
> diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
> new file mode 100644
> index 000000000..25195d478
> --- /dev/null
> +++ b/tests/i915/gem_exec_balancer.c
> @@ -0,0 +1,1050 @@
> +/*
> + * Copyright © 2018 Intel Corporation

2019 I guess, even though work was started in 2018?

> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include <sched.h>
> +
> +#include "igt.h"
> +#include "igt_perf.h"
> +#include "i915/gem_ring.h"
> +#include "sw_sync.h"
> +
> +IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
> +
> +#define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)

Hmm.. this is a strange surrogate but I guess it works.

> +
> +static bool has_class_instance(int i915, uint16_t class, uint16_t instance)
> +{
> +	int fd;
> +
> +	fd = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance));

More work for Andi to replace with real engine discovery. :)

> +	if (fd != -1) {
> +		close(fd);
> +		return true;
> +	}
> +
> +	return false;
> +}
> +
> +static struct i915_engine_class_instance *
> +list_engines(int i915, uint32_t class_mask, unsigned int *out)
> +{
> +	unsigned int count = 0, size = 64;
> +	struct i915_engine_class_instance *engines;
> +
> +	engines = malloc(size * sizeof(*engines));
> +	if (!engines) {
> +		*out = 0;
> +		return NULL;
> +	}
> +
> +	for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER;
> +	     class_mask;
> +	     class++, class_mask >>= 1) {
> +		if (!(class_mask & 1))
> +			continue;
> +
> +		for (unsigned int instance = 0;
> +		     instance < INSTANCE_COUNT;
> +		     instance++) {
> +		     if (!has_class_instance(i915, class, instance))
> +			     continue;
> +
> +			if (count == size) {
> +				struct i915_engine_class_instance *e;
> +
> +				size *= 2;
> +				e = realloc(engines, size*sizeof(*engines));
> +				if (!e) {

I'd just assert. On malloc as well.

> +					*out = count;
> +					return engines;
> +				}
> +
> +				engines = e;
> +			}
> +
> +			engines[count++] = (struct i915_engine_class_instance){
> +				.engine_class = class,
> +				.engine_instance = instance,
> +			};
> +		}
> +	}
> +
> +	if (!count) {
> +		free(engines);
> +		engines = NULL;
> +	}
> +
> +	*out = count;
> +	return engines;
> +}
> +
> +static int __set_load_balancer(int i915, uint32_t ctx,
> +			       const struct i915_engine_class_instance *ci,
> +			       unsigned int count)
> +{
> +	I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, count);
> +	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1 + count);
> +	struct drm_i915_gem_context_param p = {
> +		.ctx_id = ctx,
> +		.param = I915_CONTEXT_PARAM_ENGINES,
> +		.size = sizeof(engines),
> +		.value = to_user_pointer(&engines)
> +	};
> +
> +	memset(&balancer, 0, sizeof(balancer));
> +	balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> +
> +	igt_assert(count);
> +	balancer.num_siblings = count;
> +	memcpy(balancer.engines, ci, count * sizeof(*ci));
> +
> +	memset(&engines, 0, sizeof(engines));
> +	engines.extensions = to_user_pointer(&balancer);
> +	engines.engines[0].engine_class =
> +		I915_ENGINE_CLASS_INVALID;
> +	engines.engines[0].engine_instance =
> +		I915_ENGINE_CLASS_INVALID_NONE;
> +	memcpy(engines.engines + 1, ci, count * sizeof(*ci));
> +
> +	return __gem_context_set_param(i915, &p);
> +}
> +
> +static void set_load_balancer(int i915, uint32_t ctx,
> +			      const struct i915_engine_class_instance *ci,
> +			      unsigned int count)
> +{
> +	igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0);
> +}
> +
> +static uint32_t load_balancer_create(int i915,
> +				     const struct i915_engine_class_instance *ci,
> +				     unsigned int count)
> +{
> +	uint32_t ctx;
> +
> +	ctx = gem_context_create(i915);
> +	set_load_balancer(i915, ctx, ci, count);
> +
> +	return ctx;
> +}
> +
> +static uint32_t __batch_create(int i915, uint32_t offset)
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	uint32_t handle;
> +
> +	handle = gem_create(i915, ALIGN(offset + 4, 4096));
> +	gem_write(i915, handle, offset, &bbe, sizeof(bbe));
> +
> +	return handle;
> +}
> +
> +static uint32_t batch_create(int i915)
> +{
> +	return __batch_create(i915, 0);
> +}
> +
> +static void invalid_balancer(int i915)
> +{
> +	I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, 64);
> +	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 64);
> +	struct drm_i915_gem_context_param p = {
> +		.param = I915_CONTEXT_PARAM_ENGINES,
> +		.value = to_user_pointer(&engines)
> +	};
> +	uint32_t handle;
> +	void *ptr;
> +
> +	/*
> +	 * Assume that I915_CONTEXT_PARAM_ENGINE validates the array
> +	 * of engines[], our job is to determine if the load_balancer
> +	 * extension explodes.
> +	 */
> +
> +	for (int class = 0; class < 32; class++) {
> +		struct i915_engine_class_instance *ci;
> +		unsigned int count;
> +
> +		ci = list_engines(i915, 1 << class, &count);
> +		if (!ci)
> +			continue;
> +
> +		igt_debug("Found %d engines\n", count);
> +		igt_assert_lte(count, 64);

Hey.. you always say trust the kernel! ;)

> +
> +		p.ctx_id = gem_context_create(i915);
> +		p.size = (sizeof(struct i915_context_param_engines) +
> +				(count + 1) * sizeof(*engines.engines));

Alignment looks off.

> +
> +		memset(&engines, 0, sizeof(engines));
> +		engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
> +		engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
> +		memcpy(engines.engines + 1, ci, count * sizeof(*ci));
> +		gem_context_set_param(i915, &p);
> +
> +		engines.extensions = -1ull;
> +		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +
> +		engines.extensions = 1ull;
> +		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +
> +		memset(&balancer, 0, sizeof(balancer));
> +		balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> +		balancer.num_siblings = count;
> +		memcpy(balancer.engines, ci, count * sizeof(*ci));
> +
> +		engines.extensions = to_user_pointer(&balancer);
> +		gem_context_set_param(i915, &p);
> +
> +		balancer.engine_index = 1;
> +		igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
> +
> +		balancer.engine_index = count;
> +		igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
> +
> +		balancer.engine_index = count + 1;
> +		igt_assert_eq(__gem_context_set_param(i915, &p), -EINVAL);
> +
> +		balancer.engine_index = 0;
> +		gem_context_set_param(i915, &p);
> +
> +		balancer.base.next_extension = to_user_pointer(&balancer);
> +		igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
> +
> +		balancer.base.next_extension = -1ull;
> +		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +
> +		handle = gem_create(i915, 4096 * 3);
> +		ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE);
> +		gem_close(i915, handle);
> +
> +		memset(&engines, 0, sizeof(engines));
> +		engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
> +		engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
> +		engines.engines[1].engine_class = I915_ENGINE_CLASS_INVALID;
> +		engines.engines[1].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
> +		memcpy(engines.engines + 2, ci, count * sizeof(ci));
> +		p.size = (sizeof(struct i915_context_param_engines) +
> +				(count + 2) * sizeof(*engines.engines));

Alignment again.

> +		gem_context_set_param(i915, &p);
> +
> +		balancer.base.next_extension = 0;
> +		balancer.engine_index = 1;
> +		engines.extensions = to_user_pointer(&balancer);
> +		gem_context_set_param(i915, &p);
> +
> +		memcpy(ptr + 4096 - 8, &balancer, sizeof(balancer));
> +		memcpy(ptr + 8192 - 8, &balancer, sizeof(balancer));
> +		balancer.engine_index = 0;
> +
> +		engines.extensions = to_user_pointer(ptr) + 4096 - 8;
> +		gem_context_set_param(i915, &p);
> +
> +		balancer.base.next_extension = engines.extensions;
> +		engines.extensions = to_user_pointer(&balancer);
> +		gem_context_set_param(i915, &p);

mmap_gtt and unmapped area testing in one?

> +		munmap(ptr, 4096); >+		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +		engines.extensions = to_user_pointer(ptr) + 4096 - 8;
> +		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +
> +		engines.extensions = to_user_pointer(ptr) + 8192 - 8;
> +		gem_context_set_param(i915, &p);
> +
> +		balancer.base.next_extension = engines.extensions;
> +		engines.extensions = to_user_pointer(&balancer);
> +		gem_context_set_param(i915, &p);
> +
> +		munmap(ptr + 8192, 4096);
> +		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +		engines.extensions = to_user_pointer(ptr) + 8192 - 8;
> +		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +
> +		munmap(ptr + 4096, 4096);
> +
> +		gem_context_destroy(i915, p.ctx_id);
> +		free(ci);
> +	}
> +}
> +
> +static void kick_kthreads(int period_us)
> +{
> +	sched_yield();
> +	usleep(period_us);

yield and sleep hm.. calling with zero period_us? Doesn't seem like it. 
So what's it about?

> +}
> +
> +static double measure_load(int pmu, int period_us)
> +{
> +	uint64_t data[2];
> +	uint64_t d_t, d_v;
> +
> +	kick_kthreads(period_us);
> +
> +	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> +	d_v = -data[0];
> +	d_t = -data[1];
> +
> +	usleep(period_us);
> +
> +	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> +	d_v += data[0];
> +	d_t += data[1];

This -val + val trick with uint64_t works?

> +
> +	return d_v / (double)d_t;
> +}
> +
> +static double measure_min_load(int pmu, unsigned int num, int period_us)
> +{
> +	uint64_t data[2 + num];
> +	uint64_t d_t, d_v[num];
> +	uint64_t min = -1, max = 0;
> +
> +	kick_kthreads(period_us);
> +
> +	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> +	for (unsigned int n = 0; n < num; n++)
> +		d_v[n] = -data[2 + n];
> +	d_t = -data[1];
> +
> +	usleep(period_us);
> +
> +	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> +
> +	d_t += data[1];
> +	for (unsigned int n = 0; n < num; n++) {
> +		d_v[n] += data[2 + n];
> +		igt_debug("engine[%d]: %.1f%%\n",
> +			  n, d_v[n] / (double)d_t * 100);
> +		if (d_v[n] < min)
> +			min = d_v[n];
> +		if (d_v[n] > max)
> +			max = d_v[n];
> +	}
> +
> +	igt_debug("elapsed: %"PRIu64"ns, load [%.1f, %.1f]%%\n",
> +		  d_t, min / (double)d_t * 100,  max / (double)d_t * 100);
> +
> +	return min / (double)d_t;
> +}
> +
> +static void check_individual_engine(int i915,
> +				    uint32_t ctx,
> +				    const struct i915_engine_class_instance *ci,
> +				    int idx)
> +{
> +	igt_spin_t *spin;
> +	double load;
> +	int pmu;
> +
> +	pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(ci[idx].engine_class,
> +						  ci[idx].engine_instance));
> +
> +	spin = igt_spin_new(i915, .ctx = ctx, .engine = idx + 1);
> +	load = measure_load(pmu, 10000);

Hm usleep before start of measuring and between two samples is the same. 
The one before should be fixed I think, no?

> +	igt_spin_free(i915, spin);
> +
> +	close(pmu);
> +
> +	igt_assert_f(load > 0.90,
> +		     "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
> +		     idx, ci[idx].engine_class, ci[idx].engine_instance, load*100);
> +}
> +
> +static void individual(int i915)
> +{
> +	uint32_t ctx;
> +
> +	/*
> +	 * I915_CONTEXT_PARAM_ENGINE allows us to index into the user
> +	 * supplied array from gem_execbuf(). Our check is to build the
> +	 * ctx->engine[] with various different engine classes, feed in
> +	 * a spinner and then ask pmu to confirm it the expected engine
> +	 * was busy.
> +	 */
> +
> +	ctx = gem_context_create(i915);
> +
> +	for (int mask = 0; mask < 32; mask++) {
> +		struct i915_engine_class_instance *ci;
> +		unsigned int count;
> +
> +		ci = list_engines(i915, 1u << mask, &count);
> +		if (!ci)
> +			continue;
> +
> +		igt_debug("Found %d engines of class %d\n", count, mask);
> +
> +		for (int pass = 0; pass < count; pass++) { /* approx. count! */
> +			igt_permute_array(ci, count, igt_exchange_int64);

struct i915_engine_class_instance is four bytes long, so swap func looks 
wrong. Unless for some reason you want to swap in blocks of two. Don't 
know. Last index would grab into random memory though. I must be missing 
something or it wouldn't have worked..

> +			set_load_balancer(i915, ctx, ci, count);
> +			for (unsigned int n = 0; n < count; n++)
> +				check_individual_engine(i915, ctx, ci, n);
> +		}
> +
> +		free(ci);
> +	}
> +
> +	gem_context_destroy(i915, ctx);
> +	gem_quiescent_gpu(i915);
> +}
> +
> +static void indicies(int i915)

indices?

> +{
> +	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
> +	struct drm_i915_gem_context_param p = {
> +		.ctx_id = gem_context_create(i915),
> +		.param = I915_CONTEXT_PARAM_ENGINES,
> +		.value = to_user_pointer(&engines)
> +	};
> +
> +	struct drm_i915_gem_exec_object2 batch = {
> +		.handle = batch_create(i915),
> +	};
> +
> +	unsigned int nengines = 0;
> +	void *balancers = NULL;
> +
> +	/*
> +	 * We can populate our engine map with multiple virtual engines.
> +	 * Do so.
> +	 */
> +
> +	for (int class = 0; class < 32; class++) {
> +		struct i915_engine_class_instance *ci;
> +		unsigned int count;
> +
> +		ci = list_engines(i915, 1u << class, &count);
> +		if (!ci)
> +			continue;
> +
> +		igt_debug("Found %d engines of class %d\n", count, class);

Maybe this debug message should go into list_engines, since it seems 
repeated a few times already.

> +
> +		for (int n = 0; n < count; n++) {
> +			I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(*balancer,
> +								 count);
> +
> +			engines.engines[nengines].engine_class =
> +				I915_ENGINE_CLASS_INVALID;
> +			engines.engines[nengines].engine_instance =
> +				I915_ENGINE_CLASS_INVALID_NONE;
> +
> +			balancer = calloc(sizeof(*balancer), 1);
> +			igt_assert(balancer);
> +
> +			balancer->base.name =
> +				I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> +			balancer->base.next_extension =
> +				to_user_pointer(balancers);
> +			balancers = balancer;
> +
> +			balancer->engine_index = nengines++;
> +			balancer->num_siblings = count;
> +
> +			memcpy(balancer->engines,
> +			       ci, count * sizeof(*ci));
> +		}
> +		free(ci);
> +	}
> +
> +	igt_require(balancers);
> +	engines.extensions = to_user_pointer(balancers);
> +	p.size = (sizeof(struct i915_engine_class_instance) * nengines +
> +		  sizeof(struct i915_context_param_engines));
> +	gem_context_set_param(i915, &p);
> +
> +	for (unsigned int n = 0; n < nengines; n++) {
> +		struct drm_i915_gem_execbuffer2 eb = {
> +			.buffers_ptr = to_user_pointer(&batch),
> +			.buffer_count = 1,
> +			.flags = n,
> +			.rsvd1 = p.ctx_id,
> +		};
> +		igt_debug("Executing on index=%d\n", n);
> +		gem_execbuf(i915, &eb);
> +	}
> +	gem_context_destroy(i915, p.ctx_id);
> +
> +	gem_sync(i915, batch.handle);
> +	gem_close(i915, batch.handle);
> +
> +	while (balancers) {
> +		struct i915_context_engines_load_balance *b, *n;
> +
> +		b = balancers;
> +		n = from_user_pointer(b->base.next_extension);
> +		free(b);
> +
> +		balancers = n;
> +	}
> +
> +	gem_quiescent_gpu(i915);
> +}
> +
> +static void busy(int i915)
> +{
> +	uint32_t scratch = gem_create(i915, 4096);
> +
> +	/*
> +	 * Check that virtual engines are reported via GEM_BUSY.
> +	 *
> +	 * When running, the batch will be on the real engine and report
> +	 * the actual class.
> +	 *
> +	 * Prior to running, if the load-balancer is across multiple
> +	 * classes we don't know which engine the batch will
> +	 * execute on, so we report them all!
> +	 *
> +	 * However, as we only support (and test) creating a load-balancer
> +	 * from engines of only one class, that can be propagated accurately
> +	 * through to GEM_BUSY.
> +	 */
> +
> +	for (int class = 0; class < 16; class++) {
> +		struct drm_i915_gem_busy busy;
> +		struct i915_engine_class_instance *ci;
> +		unsigned int count;
> +		igt_spin_t *spin[2];
> +		uint32_t ctx;
> +
> +		ci = list_engines(i915, 1u << class, &count);
> +		if (!ci)
> +			continue;
> +
> +		igt_debug("Found %d engines of class %d\n", count, class);
> +		ctx = load_balancer_create(i915, ci, count);
> +		free(ci);
> +
> +		spin[0] = __igt_spin_new(i915,
> +					 .ctx = ctx,
> +					 .flags = IGT_SPIN_POLL_RUN);
> +		spin[1] = __igt_spin_new(i915,
> +					 .ctx = ctx,
> +					 .dependency = scratch);
> +
> +		igt_spin_busywait_until_started(spin[0]);
> +
> +		/* Running: actual class */
> +		busy.handle = spin[0]->handle;
> +		do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
> +		igt_assert_eq_u32(busy.busy, 1u << (class + 16));
> +
> +		/* Queued(read): expected class */
> +		busy.handle = spin[1]->handle;
> +		do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
> +		igt_assert_eq_u32(busy.busy, 1u << (class + 16));
> +
> +		/* Queued(write): expected class */
> +		busy.handle = scratch;
> +		do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
> +		igt_assert_eq_u32(busy.busy,
> +				  (1u << (class + 16)) | (class + 1));
> +
> +		igt_spin_free(i915, spin[1]);
> +		igt_spin_free(i915, spin[0]);
> +
> +		gem_context_destroy(i915, ctx);
> +	}
> +
> +	gem_close(i915, scratch);
> +	gem_quiescent_gpu(i915);
> +}
> +
> +static int add_pmu(int pmu, const struct i915_engine_class_instance *ci)
> +{
> +	return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->engine_class,
> +							 ci->engine_instance),
> +				    pmu);
> +}
> +
> +static void full(int i915, unsigned int flags)
> +#define PULSE 0x1
> +#define LATE 0x2
> +{
> +	struct drm_i915_gem_exec_object2 batch = {
> +		.handle = batch_create(i915),
> +	};
> +
> +	if (flags & LATE)
> +		igt_require_sw_sync();
> +
> +	/*
> +	 * I915_CONTEXT_PARAM_ENGINE changes the meaning of I915_EXEC_DEFAULT
> +	 * to provide an automatic selection from the ctx->engine[]. It
> +	 * employs load-balancing to evenly distribute the workload the

The leading section needs rewritting for truth. It is the load balance 
extensions which _can_ redefine the meanign of I915_EXEC_DEFAULT etc.. 
I'm sure I didn't need to explain, but have just to make it clear which 
part I am complaining about. :)

> +	 * array. If we submit N spinners, we expect them to be simultaneously
> +	 * running across N engines and use PMU to confirm that the entire
> +	 * set of engines are busy.

Clarify it is only if using N contexts.

> +	 *
> +	 * We complicate matters by interpersing shortlived tasks to challenge
> +	 * the kernel to search for space in which to insert new batches.
> +	 */
> +
> +
> +	for (int mask = 0; mask < 32; mask++) {
> +		struct i915_engine_class_instance *ci;
> +		igt_spin_t *spin = NULL;
> +		IGT_CORK_FENCE(cork);
> +		unsigned int count;
> +		double load;
> +		int fence = -1;
> +		int *pmu;
> +
> +		ci = list_engines(i915, 1u << mask, &count);
> +		if (!ci)
> +			continue;
> +
> +		igt_debug("Found %d engines of class %d\n", count, mask);
> +
> +		pmu = malloc(sizeof(*pmu) * count);
> +		igt_assert(pmu);
> +
> +		if (flags & LATE)
> +			fence = igt_cork_plug(&cork, i915);
> +
> +		pmu[0] = -1;
> +		for (unsigned int n = 0; n < count; n++) {
> +			uint32_t ctx;
> +
> +			pmu[n] = add_pmu(pmu[0], &ci[n]);
> +
> +			if (flags & PULSE) {
> +				struct drm_i915_gem_execbuffer2 eb = {
> +					.buffers_ptr = to_user_pointer(&batch),
> +					.buffer_count = 1,
> +					.rsvd2 = fence,
> +					.flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
> +				};
> +				gem_execbuf(i915, &eb);
> +			}
> +
> +			/*
> +			 * Each spinner needs to be one a new timeline,
> +			 * otherwise they will just sit in the single queue
> +			 * and not run concurrently.
> +			 */
> +			ctx = load_balancer_create(i915, ci, count);
> +
> +			if (spin == NULL) {
> +				spin = __igt_spin_new(i915, .ctx = ctx);
> +			} else {
> +				struct drm_i915_gem_execbuffer2 eb = {
> +					.buffers_ptr = spin->execbuf.buffers_ptr,
> +					.buffer_count = spin->execbuf.buffer_count,
> +					.rsvd1 = ctx,
> +					.rsvd2 = fence,
> +					.flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
> +				};
> +				gem_execbuf(i915, &eb);
> +			}
> +
> +			gem_context_destroy(i915, ctx);
> +		}
> +
> +		if (flags & LATE) {
> +			igt_cork_unplug(&cork);
> +			close(fence);
> +		}
> +
> +		load = measure_min_load(pmu[0], count, 10000);
> +		igt_spin_free(i915, spin);
> +
> +		close(pmu[0]);
> +		free(pmu);
> +
> +		free(ci);
> +
> +		igt_assert_f(load > 0.90,
> +			     "minimum load for %d x class:%d was found to be only %.1f%% busy\n",
> +			     count, mask, load*100);
> +		gem_quiescent_gpu(i915);
> +	}
> +
> +	gem_close(i915, batch.handle);
> +	gem_quiescent_gpu(i915);
> +}
> +
> +static void nop(int i915)
> +{
> +	struct drm_i915_gem_exec_object2 batch = {
> +		.handle = batch_create(i915),
> +	};
> +
> +	for (int mask = 0; mask < 32; mask++) {

s/mask/class/

> +		struct i915_engine_class_instance *ci;
> +		unsigned int count;
> +		uint32_t ctx;
> +
> +		ci = list_engines(i915, 1u << mask, &count);
> +		if (!ci)
> +			continue;
> +
> +		if (count < 2) {
> +			free(ci);
> +			continue;

Benchamrk only subtest for real veng?

> +		}
> +
> +		igt_debug("Found %d engines of class %d\n", count, mask);
> +		ctx = load_balancer_create(i915, ci, count);
> +
> +		for (int n = 0; n < count; n++) {
> +			struct drm_i915_gem_execbuffer2 execbuf = {
> +				.buffers_ptr = to_user_pointer(&batch),
> +				.buffer_count = 1,
> +				.flags = n + 1,
> +				.rsvd1 = ctx,
> +			};
> +			struct timespec tv = {};
> +			unsigned long nops;
> +			double t;
> +
> +			igt_nsec_elapsed(&tv);
> +			nops = 0;
> +			do {
> +				for (int r = 0; r < 1024; r++)
> +					gem_execbuf(i915, &execbuf);
> +				nops += 1024;
> +			} while (igt_seconds_elapsed(&tv) < 2);
> +			gem_sync(i915, batch.handle);
> +
> +			t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
> +			igt_info("%x:%d %.3fus\n", mask, n, t);

Class in decimal is better I think.

And some descriptive labels to info messages would be good. Like 
"individual engines", "virtual engine" etc.

> +		}
> +
> +		{
> +			struct drm_i915_gem_execbuffer2 execbuf = {
> +				.buffers_ptr = to_user_pointer(&batch),
> +				.buffer_count = 1,
> +				.rsvd1 = ctx,
> +			};
> +			struct timespec tv = {};
> +			unsigned long nops;
> +			double t;
> +
> +			igt_nsec_elapsed(&tv);
> +			nops = 0;
> +			do {
> +				for (int r = 0; r < 1024; r++)
> +					gem_execbuf(i915, &execbuf);
> +				nops += 1024;
> +			} while (igt_seconds_elapsed(&tv) < 2);
> +			gem_sync(i915, batch.handle);
> +
> +			t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
> +			igt_info("%x:* %.3fus\n", mask, t);
> +		}
> +
> +
> +		igt_fork(child, count) {
> +			struct drm_i915_gem_execbuffer2 execbuf = {
> +				.buffers_ptr = to_user_pointer(&batch),
> +				.buffer_count = 1,
> +				.flags = child + 1,
> +				.rsvd1 = gem_context_clone(i915, ctx,
> +							   I915_CONTEXT_CLONE_ENGINES, 0),
> +			};
> +			struct timespec tv = {};
> +			unsigned long nops;
> +			double t;
> +
> +			igt_nsec_elapsed(&tv);
> +			nops = 0;
> +			do {
> +				for (int r = 0; r < 1024; r++)
> +					gem_execbuf(i915, &execbuf);
> +				nops += 1024;
> +			} while (igt_seconds_elapsed(&tv) < 2);
> +			gem_sync(i915, batch.handle);
> +
> +			t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
> +			igt_info("[%d] %x:%d %.3fus\n", child, mask, child, t);
> +
> +			memset(&tv, 0, sizeof(tv));
> +			execbuf.flags = 0;
> +
> +			igt_nsec_elapsed(&tv);
> +			nops = 0;
> +			do {
> +				for (int r = 0; r < 1024; r++)
> +					gem_execbuf(i915, &execbuf);
> +				nops += 1024;
> +			} while (igt_seconds_elapsed(&tv) < 2);
> +			gem_sync(i915, batch.handle);
> +
> +			t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
> +			igt_info("[%d] %x:* %.3fus\n", child, mask, t);
> +
> +			gem_context_destroy(i915, execbuf.rsvd1);
> +		}
> +
> +		igt_waitchildren();
> +
> +		gem_context_destroy(i915, ctx);
> +		free(ci);
> +	}
> +
> +	gem_close(i915, batch.handle);
> +	gem_quiescent_gpu(i915);
> +}
> +
> +static void ping(int i915, uint32_t ctx, unsigned int engine)
> +{
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = batch_create(i915),
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +		.flags = engine,
> +		.rsvd1 = ctx,
> +	};
> +	gem_execbuf(i915, &execbuf);
> +	gem_sync(i915, obj.handle);
> +	gem_close(i915, obj.handle);
> +}
> +
> +static void semaphore(int i915)
> +{
> +	uint32_t block[2], scratch;
> +	igt_spin_t *spin[3];
> +
> +	/*
> +	 * If we are using HW semaphores to launch serialised requests
> +	 * on different engine concurrently, we want to verify that real
> +	 * work is unimpeded.
> +	 */
> +	igt_require(gem_scheduler_has_preemption(i915));
> +
> +	block[0] = gem_context_create(i915);
> +	block[1] = gem_context_create(i915);
> +
> +	scratch = gem_create(i915, 4096);
> +	spin[2] = igt_spin_new(i915, .dependency = scratch);
> +	for (int mask = 1; mask < 32; mask++) {

s/mask/class/ throughout.

> +		struct i915_engine_class_instance *ci;
> +		unsigned int count;
> +		uint32_t vip;
> +
> +		ci = list_engines(i915, 1u << mask, &count);
> +		if (!ci)
> +			continue;
> +
> +		if (count < ARRAY_SIZE(block))
> +			continue;
> +
> +		/* Ensure that we completely occupy all engines in this group */
> +		count = ARRAY_SIZE(block);
> +
> +		for (int i = 0; i < count; i++) {
> +			set_load_balancer(i915, block[i], ci, count);
> +			spin[i] = __igt_spin_new(i915,
> +						       .ctx = block[i],
> +						       .dependency = scratch);

Alignment.

> +		}
> +
> +		/*
> +		 * Either we haven't blocked both engines with semaphores,
> +		 * or we let the vip through. If not, we hang.
> +		 */
> +		vip = gem_context_create(i915);
> +		set_load_balancer(i915, vip, ci, count);
> +		ping(i915, vip, 0);
> +		gem_context_destroy(i915, vip);
> +
> +		for (int i = 0; i < count; i++)
> +			igt_spin_free(i915, spin[i]);
> +
> +		free(ci);
> +	}
> +	igt_spin_free(i915, spin[2]);
> +	gem_close(i915, scratch);
> +
> +	gem_context_destroy(i915, block[1]);
> +	gem_context_destroy(i915, block[0]);
> +
> +	gem_quiescent_gpu(i915);
> +}
> +
> +static void smoketest(int i915, int timeout)
> +{
> +	struct drm_i915_gem_exec_object2 batch[2] = {
> +		{ .handle = __batch_create(i915, 16380) }
> +	};
> +	unsigned int ncontext = 0;
> +	uint32_t *contexts = NULL;
> +	uint32_t *handles = NULL;
> +
> +	igt_require_sw_sync();
> +
> +	for (int mask = 0; mask < 32; mask++) {
> +		struct i915_engine_class_instance *ci;
> +		unsigned int count = 0;
> +
> +		ci = list_engines(i915, 1u << mask, &count);
> +		if (!ci || count < 2) {
> +			free(ci);
> +			continue;
> +		}
> +
> +		igt_debug("Found %d engines of class %d\n", count, mask);
> +
> +		ncontext += 128;
> +		contexts = realloc(contexts, sizeof(*contexts) * ncontext);
> +		igt_assert(contexts);
> +
> +		for (unsigned int n = ncontext - 128; n < ncontext; n++) {
> +			contexts[n] = load_balancer_create(i915, ci, count);
> +			igt_assert(contexts[n]);
> +		}
> +
> +		free(ci);
> +	}
> +	igt_debug("Created %d virtual engines (one per context)\n", ncontext);
> +	igt_require(ncontext);
> +
> +	contexts = realloc(contexts, sizeof(*contexts) * ncontext * 4);
> +	igt_assert(contexts);
> +	memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
> +	ncontext *= 2;
> +	memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
> +	ncontext *= 2;
> +
> +	handles = malloc(sizeof(*handles) * ncontext);
> +	igt_assert(handles);
> +	for (unsigned int n = 0; n < ncontext; n++)
> +		handles[n] = gem_create(i915, 4096);
> +
> +	igt_until_timeout(timeout) {
> +		unsigned int count = 1 + (rand() % (ncontext - 1));
> +		IGT_CORK_FENCE(cork);
> +		int fence = igt_cork_plug(&cork, i915);
> +
> +		for (unsigned int n = 0; n < count; n++) {
> +			struct drm_i915_gem_execbuffer2 eb = {
> +				.buffers_ptr = to_user_pointer(batch),
> +				.buffer_count = ARRAY_SIZE(batch),
> +				.rsvd1 = contexts[n],
> +				.rsvd2 = fence,
> +				.flags = I915_EXEC_BATCH_FIRST | I915_EXEC_FENCE_IN,
> +			};
> +			batch[1].handle = handles[n];
> +			gem_execbuf(i915, &eb);
> +		}
> +		igt_permute_array(handles, count, igt_exchange_int);
> +
> +		igt_cork_unplug(&cork);
> +		for (unsigned int n = 0; n < count; n++)
> +			gem_sync(i915, handles[n]);
> +
> +		close(fence);
> +	}
> +
> +	for (unsigned int n = 0; n < ncontext; n++) {
> +		gem_close(i915, handles[n]);
> +		__gem_context_destroy(i915, contexts[n]);
> +	}
> +	free(handles);
> +	free(contexts);
> +	gem_close(i915, batch[0].handle);
> +}
> +
> +static bool has_context_engines(int i915)
> +{
> +	struct drm_i915_gem_context_param p = {
> +		.param = I915_CONTEXT_PARAM_ENGINES,
> +	};
> +
> +	return __gem_context_set_param(i915, &p) == 0;
> +}
> +
> +static bool has_load_balancer(int i915)
> +{
> +	struct i915_engine_class_instance ci = {};
> +	uint32_t ctx;
> +	int err;
> +
> +	ctx = gem_context_create(i915);
> +	err = __set_load_balancer(i915, ctx, &ci, 1);
> +	gem_context_destroy(i915, ctx);
> +
> +	return err == 0;
> +}
> +
> +igt_main
> +{
> +	int i915 = -1;
> +
> +	igt_skip_on_simulation();
> +
> +	igt_fixture {
> +		i915 = drm_open_driver(DRIVER_INTEL);
> +		igt_require_gem(i915);
> +
> +		gem_require_contexts(i915);
> +		igt_require(has_context_engines(i915));
> +		igt_require(has_load_balancer(i915));
> +
> +		igt_fork_hang_detector(i915);
> +	}
> +
> +	igt_subtest("invalid-balancer")
> +		invalid_balancer(i915);
> +
> +	igt_subtest("individual")
> +		individual(i915);
> +
> +	igt_subtest("indicies")
> +		indicies(i915);
> +
> +	igt_subtest("busy")
> +		busy(i915);
> +
> +	igt_subtest_group {
> +		static const struct {
> +			const char *name;
> +			unsigned int flags;
> +		} phases[] = {
> +			{ "", 0 },
> +			{ "-pulse", PULSE },
> +			{ "-late", LATE },
> +			{ "-late-pulse", PULSE | LATE },
> +			{ }
> +		};
> +		for (typeof(*phases) *p = phases; p->name; p++)
> +			igt_subtest_f("full%s", p->name)
> +				full(i915, p->flags);
> +	}
> +
> +	igt_subtest("nop")
> +		nop(i915);
> +
> +	igt_subtest("semaphore")
> +		semaphore(i915);
> +
> +	igt_subtest("smoke")
> +		smoketest(i915, 20);
> +
> +	igt_fixture {
> +		igt_stop_hang_detector();
> +	}
> +}
> diff --git a/tests/meson.build b/tests/meson.build
> index 7e0089e74..eeea3611d 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -288,6 +288,13 @@ test_executables += executable('gem_eio',
>   	   install : true)
>   test_list += 'gem_eio'
>   
> +test_executables += executable('gem_exec_balancer', 'i915/gem_exec_balancer.c',
> +	   dependencies : test_deps + [ lib_igt_perf ],
> +	   install_dir : libexecdir,
> +	   install_rpath : libexecdir_rpathdir,
> +	   install : true)
> +test_progs += 'gem_exec_balancer'
> +
>   test_executables += executable('gem_mocs_settings',
>   	   join_paths('i915', 'gem_mocs_settings.c'),
>   	   dependencies : test_deps + [ lib_igt_perf ],
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 13/16] i915: Add gem_exec_balancer
@ 2019-05-15 10:49     ` Tvrtko Ursulin
  0 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-15 10:49 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/05/2019 11:09, Chris Wilson wrote:
> Exercise the in-kernel load balancer checking that we can distribute
> batches across the set of ctx->engines to avoid load.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/Makefile.am              |    1 +
>   tests/Makefile.sources         |    1 +
>   tests/i915/gem_exec_balancer.c | 1050 ++++++++++++++++++++++++++++++++
>   tests/meson.build              |    7 +
>   4 files changed, 1059 insertions(+)
>   create mode 100644 tests/i915/gem_exec_balancer.c
> 
> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index 5097debf6..c6af0aeaf 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -96,6 +96,7 @@ gem_close_race_LDADD = $(LDADD) -lpthread
>   gem_ctx_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
>   gem_ctx_thrash_LDADD = $(LDADD) -lpthread
>   gem_ctx_sseu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
> +i915_gem_exec_balancer_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
>   gem_exec_capture_LDADD = $(LDADD) -lz
>   gem_exec_parallel_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
>   gem_exec_parallel_LDADD = $(LDADD) -lpthread
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index e7ee27e81..323b625aa 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -24,6 +24,7 @@ TESTS_progs = \
>   	i915/gem_ctx_clone \
>   	i915/gem_ctx_engines \
>   	i915/gem_ctx_shared \
> +	i915/gem_exec_balancer \
>   	i915/gem_vm_create \
>   	kms_3d \
>   	kms_addfb_basic \
> diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
> new file mode 100644
> index 000000000..25195d478
> --- /dev/null
> +++ b/tests/i915/gem_exec_balancer.c
> @@ -0,0 +1,1050 @@
> +/*
> + * Copyright © 2018 Intel Corporation

2019 I guess, even though work was started in 2018?

> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include <sched.h>
> +
> +#include "igt.h"
> +#include "igt_perf.h"
> +#include "i915/gem_ring.h"
> +#include "sw_sync.h"
> +
> +IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
> +
> +#define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)

Hmm.. this is a strange surrogate but I guess it works.

> +
> +static bool has_class_instance(int i915, uint16_t class, uint16_t instance)
> +{
> +	int fd;
> +
> +	fd = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance));

More work for Andi to replace with real engine discovery. :)

> +	if (fd != -1) {
> +		close(fd);
> +		return true;
> +	}
> +
> +	return false;
> +}
> +
> +static struct i915_engine_class_instance *
> +list_engines(int i915, uint32_t class_mask, unsigned int *out)
> +{
> +	unsigned int count = 0, size = 64;
> +	struct i915_engine_class_instance *engines;
> +
> +	engines = malloc(size * sizeof(*engines));
> +	if (!engines) {
> +		*out = 0;
> +		return NULL;
> +	}
> +
> +	for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER;
> +	     class_mask;
> +	     class++, class_mask >>= 1) {
> +		if (!(class_mask & 1))
> +			continue;
> +
> +		for (unsigned int instance = 0;
> +		     instance < INSTANCE_COUNT;
> +		     instance++) {
> +		     if (!has_class_instance(i915, class, instance))
> +			     continue;
> +
> +			if (count == size) {
> +				struct i915_engine_class_instance *e;
> +
> +				size *= 2;
> +				e = realloc(engines, size*sizeof(*engines));
> +				if (!e) {

I'd just assert. On malloc as well.

> +					*out = count;
> +					return engines;
> +				}
> +
> +				engines = e;
> +			}
> +
> +			engines[count++] = (struct i915_engine_class_instance){
> +				.engine_class = class,
> +				.engine_instance = instance,
> +			};
> +		}
> +	}
> +
> +	if (!count) {
> +		free(engines);
> +		engines = NULL;
> +	}
> +
> +	*out = count;
> +	return engines;
> +}
> +
> +static int __set_load_balancer(int i915, uint32_t ctx,
> +			       const struct i915_engine_class_instance *ci,
> +			       unsigned int count)
> +{
> +	I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, count);
> +	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1 + count);
> +	struct drm_i915_gem_context_param p = {
> +		.ctx_id = ctx,
> +		.param = I915_CONTEXT_PARAM_ENGINES,
> +		.size = sizeof(engines),
> +		.value = to_user_pointer(&engines)
> +	};
> +
> +	memset(&balancer, 0, sizeof(balancer));
> +	balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> +
> +	igt_assert(count);
> +	balancer.num_siblings = count;
> +	memcpy(balancer.engines, ci, count * sizeof(*ci));
> +
> +	memset(&engines, 0, sizeof(engines));
> +	engines.extensions = to_user_pointer(&balancer);
> +	engines.engines[0].engine_class =
> +		I915_ENGINE_CLASS_INVALID;
> +	engines.engines[0].engine_instance =
> +		I915_ENGINE_CLASS_INVALID_NONE;
> +	memcpy(engines.engines + 1, ci, count * sizeof(*ci));
> +
> +	return __gem_context_set_param(i915, &p);
> +}
> +
> +static void set_load_balancer(int i915, uint32_t ctx,
> +			      const struct i915_engine_class_instance *ci,
> +			      unsigned int count)
> +{
> +	igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0);
> +}
> +
> +static uint32_t load_balancer_create(int i915,
> +				     const struct i915_engine_class_instance *ci,
> +				     unsigned int count)
> +{
> +	uint32_t ctx;
> +
> +	ctx = gem_context_create(i915);
> +	set_load_balancer(i915, ctx, ci, count);
> +
> +	return ctx;
> +}
> +
> +static uint32_t __batch_create(int i915, uint32_t offset)
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	uint32_t handle;
> +
> +	handle = gem_create(i915, ALIGN(offset + 4, 4096));
> +	gem_write(i915, handle, offset, &bbe, sizeof(bbe));
> +
> +	return handle;
> +}
> +
> +static uint32_t batch_create(int i915)
> +{
> +	return __batch_create(i915, 0);
> +}
> +
> +static void invalid_balancer(int i915)
> +{
> +	I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, 64);
> +	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 64);
> +	struct drm_i915_gem_context_param p = {
> +		.param = I915_CONTEXT_PARAM_ENGINES,
> +		.value = to_user_pointer(&engines)
> +	};
> +	uint32_t handle;
> +	void *ptr;
> +
> +	/*
> +	 * Assume that I915_CONTEXT_PARAM_ENGINE validates the array
> +	 * of engines[], our job is to determine if the load_balancer
> +	 * extension explodes.
> +	 */
> +
> +	for (int class = 0; class < 32; class++) {
> +		struct i915_engine_class_instance *ci;
> +		unsigned int count;
> +
> +		ci = list_engines(i915, 1 << class, &count);
> +		if (!ci)
> +			continue;
> +
> +		igt_debug("Found %d engines\n", count);
> +		igt_assert_lte(count, 64);

Hey.. you always say trust the kernel! ;)

> +
> +		p.ctx_id = gem_context_create(i915);
> +		p.size = (sizeof(struct i915_context_param_engines) +
> +				(count + 1) * sizeof(*engines.engines));

Alignment looks off.

> +
> +		memset(&engines, 0, sizeof(engines));
> +		engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
> +		engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
> +		memcpy(engines.engines + 1, ci, count * sizeof(*ci));
> +		gem_context_set_param(i915, &p);
> +
> +		engines.extensions = -1ull;
> +		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +
> +		engines.extensions = 1ull;
> +		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +
> +		memset(&balancer, 0, sizeof(balancer));
> +		balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> +		balancer.num_siblings = count;
> +		memcpy(balancer.engines, ci, count * sizeof(*ci));
> +
> +		engines.extensions = to_user_pointer(&balancer);
> +		gem_context_set_param(i915, &p);
> +
> +		balancer.engine_index = 1;
> +		igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
> +
> +		balancer.engine_index = count;
> +		igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
> +
> +		balancer.engine_index = count + 1;
> +		igt_assert_eq(__gem_context_set_param(i915, &p), -EINVAL);
> +
> +		balancer.engine_index = 0;
> +		gem_context_set_param(i915, &p);
> +
> +		balancer.base.next_extension = to_user_pointer(&balancer);
> +		igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
> +
> +		balancer.base.next_extension = -1ull;
> +		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +
> +		handle = gem_create(i915, 4096 * 3);
> +		ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE);
> +		gem_close(i915, handle);
> +
> +		memset(&engines, 0, sizeof(engines));
> +		engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
> +		engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
> +		engines.engines[1].engine_class = I915_ENGINE_CLASS_INVALID;
> +		engines.engines[1].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
> +		memcpy(engines.engines + 2, ci, count * sizeof(ci));
> +		p.size = (sizeof(struct i915_context_param_engines) +
> +				(count + 2) * sizeof(*engines.engines));

Alignment again.

> +		gem_context_set_param(i915, &p);
> +
> +		balancer.base.next_extension = 0;
> +		balancer.engine_index = 1;
> +		engines.extensions = to_user_pointer(&balancer);
> +		gem_context_set_param(i915, &p);
> +
> +		memcpy(ptr + 4096 - 8, &balancer, sizeof(balancer));
> +		memcpy(ptr + 8192 - 8, &balancer, sizeof(balancer));
> +		balancer.engine_index = 0;
> +
> +		engines.extensions = to_user_pointer(ptr) + 4096 - 8;
> +		gem_context_set_param(i915, &p);
> +
> +		balancer.base.next_extension = engines.extensions;
> +		engines.extensions = to_user_pointer(&balancer);
> +		gem_context_set_param(i915, &p);

mmap_gtt and unmapped area testing in one?

> +		munmap(ptr, 4096); >+		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +		engines.extensions = to_user_pointer(ptr) + 4096 - 8;
> +		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +
> +		engines.extensions = to_user_pointer(ptr) + 8192 - 8;
> +		gem_context_set_param(i915, &p);
> +
> +		balancer.base.next_extension = engines.extensions;
> +		engines.extensions = to_user_pointer(&balancer);
> +		gem_context_set_param(i915, &p);
> +
> +		munmap(ptr + 8192, 4096);
> +		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +		engines.extensions = to_user_pointer(ptr) + 8192 - 8;
> +		igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +
> +		munmap(ptr + 4096, 4096);
> +
> +		gem_context_destroy(i915, p.ctx_id);
> +		free(ci);
> +	}
> +}
> +
> +static void kick_kthreads(int period_us)
> +{
> +	sched_yield();
> +	usleep(period_us);

yield and sleep hm.. calling with zero period_us? Doesn't seem like it. 
So what's it about?

> +}
> +
> +static double measure_load(int pmu, int period_us)
> +{
> +	uint64_t data[2];
> +	uint64_t d_t, d_v;
> +
> +	kick_kthreads(period_us);
> +
> +	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> +	d_v = -data[0];
> +	d_t = -data[1];
> +
> +	usleep(period_us);
> +
> +	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> +	d_v += data[0];
> +	d_t += data[1];

This -val + val trick with uint64_t works?

> +
> +	return d_v / (double)d_t;
> +}
> +
> +static double measure_min_load(int pmu, unsigned int num, int period_us)
> +{
> +	uint64_t data[2 + num];
> +	uint64_t d_t, d_v[num];
> +	uint64_t min = -1, max = 0;
> +
> +	kick_kthreads(period_us);
> +
> +	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> +	for (unsigned int n = 0; n < num; n++)
> +		d_v[n] = -data[2 + n];
> +	d_t = -data[1];
> +
> +	usleep(period_us);
> +
> +	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> +
> +	d_t += data[1];
> +	for (unsigned int n = 0; n < num; n++) {
> +		d_v[n] += data[2 + n];
> +		igt_debug("engine[%d]: %.1f%%\n",
> +			  n, d_v[n] / (double)d_t * 100);
> +		if (d_v[n] < min)
> +			min = d_v[n];
> +		if (d_v[n] > max)
> +			max = d_v[n];
> +	}
> +
> +	igt_debug("elapsed: %"PRIu64"ns, load [%.1f, %.1f]%%\n",
> +		  d_t, min / (double)d_t * 100,  max / (double)d_t * 100);
> +
> +	return min / (double)d_t;
> +}
> +
> +static void check_individual_engine(int i915,
> +				    uint32_t ctx,
> +				    const struct i915_engine_class_instance *ci,
> +				    int idx)
> +{
> +	igt_spin_t *spin;
> +	double load;
> +	int pmu;
> +
> +	pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(ci[idx].engine_class,
> +						  ci[idx].engine_instance));
> +
> +	spin = igt_spin_new(i915, .ctx = ctx, .engine = idx + 1);
> +	load = measure_load(pmu, 10000);

Hm usleep before start of measuring and between two samples is the same. 
The one before should be fixed I think, no?

> +	igt_spin_free(i915, spin);
> +
> +	close(pmu);
> +
> +	igt_assert_f(load > 0.90,
> +		     "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
> +		     idx, ci[idx].engine_class, ci[idx].engine_instance, load*100);
> +}
> +
> +static void individual(int i915)
> +{
> +	uint32_t ctx;
> +
> +	/*
> +	 * I915_CONTEXT_PARAM_ENGINE allows us to index into the user
> +	 * supplied array from gem_execbuf(). Our check is to build the
> +	 * ctx->engine[] with various different engine classes, feed in
> +	 * a spinner and then ask pmu to confirm it the expected engine
> +	 * was busy.
> +	 */
> +
> +	ctx = gem_context_create(i915);
> +
> +	for (int mask = 0; mask < 32; mask++) {
> +		struct i915_engine_class_instance *ci;
> +		unsigned int count;
> +
> +		ci = list_engines(i915, 1u << mask, &count);
> +		if (!ci)
> +			continue;
> +
> +		igt_debug("Found %d engines of class %d\n", count, mask);
> +
> +		for (int pass = 0; pass < count; pass++) { /* approx. count! */
> +			igt_permute_array(ci, count, igt_exchange_int64);

struct i915_engine_class_instance is four bytes long, so swap func looks 
wrong. Unless for some reason you want to swap in blocks of two. Don't 
know. Last index would grab into random memory though. I must be missing 
something or it wouldn't have worked..

> +			set_load_balancer(i915, ctx, ci, count);
> +			for (unsigned int n = 0; n < count; n++)
> +				check_individual_engine(i915, ctx, ci, n);
> +		}
> +
> +		free(ci);
> +	}
> +
> +	gem_context_destroy(i915, ctx);
> +	gem_quiescent_gpu(i915);
> +}
> +
> +static void indicies(int i915)

indices?

> +{
> +	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
> +	struct drm_i915_gem_context_param p = {
> +		.ctx_id = gem_context_create(i915),
> +		.param = I915_CONTEXT_PARAM_ENGINES,
> +		.value = to_user_pointer(&engines)
> +	};
> +
> +	struct drm_i915_gem_exec_object2 batch = {
> +		.handle = batch_create(i915),
> +	};
> +
> +	unsigned int nengines = 0;
> +	void *balancers = NULL;
> +
> +	/*
> +	 * We can populate our engine map with multiple virtual engines.
> +	 * Do so.
> +	 */
> +
> +	for (int class = 0; class < 32; class++) {
> +		struct i915_engine_class_instance *ci;
> +		unsigned int count;
> +
> +		ci = list_engines(i915, 1u << class, &count);
> +		if (!ci)
> +			continue;
> +
> +		igt_debug("Found %d engines of class %d\n", count, class);

Maybe this debug message should go into list_engines, since it seems 
repeated a few times already.

> +
> +		for (int n = 0; n < count; n++) {
> +			I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(*balancer,
> +								 count);
> +
> +			engines.engines[nengines].engine_class =
> +				I915_ENGINE_CLASS_INVALID;
> +			engines.engines[nengines].engine_instance =
> +				I915_ENGINE_CLASS_INVALID_NONE;
> +
> +			balancer = calloc(sizeof(*balancer), 1);
> +			igt_assert(balancer);
> +
> +			balancer->base.name =
> +				I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> +			balancer->base.next_extension =
> +				to_user_pointer(balancers);
> +			balancers = balancer;
> +
> +			balancer->engine_index = nengines++;
> +			balancer->num_siblings = count;
> +
> +			memcpy(balancer->engines,
> +			       ci, count * sizeof(*ci));
> +		}
> +		free(ci);
> +	}
> +
> +	igt_require(balancers);
> +	engines.extensions = to_user_pointer(balancers);
> +	p.size = (sizeof(struct i915_engine_class_instance) * nengines +
> +		  sizeof(struct i915_context_param_engines));
> +	gem_context_set_param(i915, &p);
> +
> +	for (unsigned int n = 0; n < nengines; n++) {
> +		struct drm_i915_gem_execbuffer2 eb = {
> +			.buffers_ptr = to_user_pointer(&batch),
> +			.buffer_count = 1,
> +			.flags = n,
> +			.rsvd1 = p.ctx_id,
> +		};
> +		igt_debug("Executing on index=%d\n", n);
> +		gem_execbuf(i915, &eb);
> +	}
> +	gem_context_destroy(i915, p.ctx_id);
> +
> +	gem_sync(i915, batch.handle);
> +	gem_close(i915, batch.handle);
> +
> +	while (balancers) {
> +		struct i915_context_engines_load_balance *b, *n;
> +
> +		b = balancers;
> +		n = from_user_pointer(b->base.next_extension);
> +		free(b);
> +
> +		balancers = n;
> +	}
> +
> +	gem_quiescent_gpu(i915);
> +}
> +
> +static void busy(int i915)
> +{
> +	uint32_t scratch = gem_create(i915, 4096);
> +
> +	/*
> +	 * Check that virtual engines are reported via GEM_BUSY.
> +	 *
> +	 * When running, the batch will be on the real engine and report
> +	 * the actual class.
> +	 *
> +	 * Prior to running, if the load-balancer is across multiple
> +	 * classes we don't know which engine the batch will
> +	 * execute on, so we report them all!
> +	 *
> +	 * However, as we only support (and test) creating a load-balancer
> +	 * from engines of only one class, that can be propagated accurately
> +	 * through to GEM_BUSY.
> +	 */
> +
> +	for (int class = 0; class < 16; class++) {
> +		struct drm_i915_gem_busy busy;
> +		struct i915_engine_class_instance *ci;
> +		unsigned int count;
> +		igt_spin_t *spin[2];
> +		uint32_t ctx;
> +
> +		ci = list_engines(i915, 1u << class, &count);
> +		if (!ci)
> +			continue;
> +
> +		igt_debug("Found %d engines of class %d\n", count, class);
> +		ctx = load_balancer_create(i915, ci, count);
> +		free(ci);
> +
> +		spin[0] = __igt_spin_new(i915,
> +					 .ctx = ctx,
> +					 .flags = IGT_SPIN_POLL_RUN);
> +		spin[1] = __igt_spin_new(i915,
> +					 .ctx = ctx,
> +					 .dependency = scratch);
> +
> +		igt_spin_busywait_until_started(spin[0]);
> +
> +		/* Running: actual class */
> +		busy.handle = spin[0]->handle;
> +		do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
> +		igt_assert_eq_u32(busy.busy, 1u << (class + 16));
> +
> +		/* Queued(read): expected class */
> +		busy.handle = spin[1]->handle;
> +		do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
> +		igt_assert_eq_u32(busy.busy, 1u << (class + 16));
> +
> +		/* Queued(write): expected class */
> +		busy.handle = scratch;
> +		do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
> +		igt_assert_eq_u32(busy.busy,
> +				  (1u << (class + 16)) | (class + 1));
> +
> +		igt_spin_free(i915, spin[1]);
> +		igt_spin_free(i915, spin[0]);
> +
> +		gem_context_destroy(i915, ctx);
> +	}
> +
> +	gem_close(i915, scratch);
> +	gem_quiescent_gpu(i915);
> +}
> +
> +static int add_pmu(int pmu, const struct i915_engine_class_instance *ci)
> +{
> +	return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->engine_class,
> +							 ci->engine_instance),
> +				    pmu);
> +}
> +
> +static void full(int i915, unsigned int flags)
> +#define PULSE 0x1
> +#define LATE 0x2
> +{
> +	struct drm_i915_gem_exec_object2 batch = {
> +		.handle = batch_create(i915),
> +	};
> +
> +	if (flags & LATE)
> +		igt_require_sw_sync();
> +
> +	/*
> +	 * I915_CONTEXT_PARAM_ENGINE changes the meaning of I915_EXEC_DEFAULT
> +	 * to provide an automatic selection from the ctx->engine[]. It
> +	 * employs load-balancing to evenly distribute the workload the

The leading section needs rewritting for truth. It is the load balance 
extensions which _can_ redefine the meanign of I915_EXEC_DEFAULT etc.. 
I'm sure I didn't need to explain, but have just to make it clear which 
part I am complaining about. :)

> +	 * array. If we submit N spinners, we expect them to be simultaneously
> +	 * running across N engines and use PMU to confirm that the entire
> +	 * set of engines are busy.

Clarify it is only if using N contexts.

> +	 *
> +	 * We complicate matters by interpersing shortlived tasks to challenge
> +	 * the kernel to search for space in which to insert new batches.
> +	 */
> +
> +
> +	for (int mask = 0; mask < 32; mask++) {
> +		struct i915_engine_class_instance *ci;
> +		igt_spin_t *spin = NULL;
> +		IGT_CORK_FENCE(cork);
> +		unsigned int count;
> +		double load;
> +		int fence = -1;
> +		int *pmu;
> +
> +		ci = list_engines(i915, 1u << mask, &count);
> +		if (!ci)
> +			continue;
> +
> +		igt_debug("Found %d engines of class %d\n", count, mask);
> +
> +		pmu = malloc(sizeof(*pmu) * count);
> +		igt_assert(pmu);
> +
> +		if (flags & LATE)
> +			fence = igt_cork_plug(&cork, i915);
> +
> +		pmu[0] = -1;
> +		for (unsigned int n = 0; n < count; n++) {
> +			uint32_t ctx;
> +
> +			pmu[n] = add_pmu(pmu[0], &ci[n]);
> +
> +			if (flags & PULSE) {
> +				struct drm_i915_gem_execbuffer2 eb = {
> +					.buffers_ptr = to_user_pointer(&batch),
> +					.buffer_count = 1,
> +					.rsvd2 = fence,
> +					.flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
> +				};
> +				gem_execbuf(i915, &eb);
> +			}
> +
> +			/*
> +			 * Each spinner needs to be one a new timeline,
> +			 * otherwise they will just sit in the single queue
> +			 * and not run concurrently.
> +			 */
> +			ctx = load_balancer_create(i915, ci, count);
> +
> +			if (spin == NULL) {
> +				spin = __igt_spin_new(i915, .ctx = ctx);
> +			} else {
> +				struct drm_i915_gem_execbuffer2 eb = {
> +					.buffers_ptr = spin->execbuf.buffers_ptr,
> +					.buffer_count = spin->execbuf.buffer_count,
> +					.rsvd1 = ctx,
> +					.rsvd2 = fence,
> +					.flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
> +				};
> +				gem_execbuf(i915, &eb);
> +			}
> +
> +			gem_context_destroy(i915, ctx);
> +		}
> +
> +		if (flags & LATE) {
> +			igt_cork_unplug(&cork);
> +			close(fence);
> +		}
> +
> +		load = measure_min_load(pmu[0], count, 10000);
> +		igt_spin_free(i915, spin);
> +
> +		close(pmu[0]);
> +		free(pmu);
> +
> +		free(ci);
> +
> +		igt_assert_f(load > 0.90,
> +			     "minimum load for %d x class:%d was found to be only %.1f%% busy\n",
> +			     count, mask, load*100);
> +		gem_quiescent_gpu(i915);
> +	}
> +
> +	gem_close(i915, batch.handle);
> +	gem_quiescent_gpu(i915);
> +}
> +
> +static void nop(int i915)
> +{
> +	struct drm_i915_gem_exec_object2 batch = {
> +		.handle = batch_create(i915),
> +	};
> +
> +	for (int mask = 0; mask < 32; mask++) {

s/mask/class/

> +		struct i915_engine_class_instance *ci;
> +		unsigned int count;
> +		uint32_t ctx;
> +
> +		ci = list_engines(i915, 1u << mask, &count);
> +		if (!ci)
> +			continue;
> +
> +		if (count < 2) {
> +			free(ci);
> +			continue;

Benchamrk only subtest for real veng?

> +		}
> +
> +		igt_debug("Found %d engines of class %d\n", count, mask);
> +		ctx = load_balancer_create(i915, ci, count);
> +
> +		for (int n = 0; n < count; n++) {
> +			struct drm_i915_gem_execbuffer2 execbuf = {
> +				.buffers_ptr = to_user_pointer(&batch),
> +				.buffer_count = 1,
> +				.flags = n + 1,
> +				.rsvd1 = ctx,
> +			};
> +			struct timespec tv = {};
> +			unsigned long nops;
> +			double t;
> +
> +			igt_nsec_elapsed(&tv);
> +			nops = 0;
> +			do {
> +				for (int r = 0; r < 1024; r++)
> +					gem_execbuf(i915, &execbuf);
> +				nops += 1024;
> +			} while (igt_seconds_elapsed(&tv) < 2);
> +			gem_sync(i915, batch.handle);
> +
> +			t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
> +			igt_info("%x:%d %.3fus\n", mask, n, t);

Class in decimal is better I think.

And some descriptive labels to info messages would be good. Like 
"individual engines", "virtual engine" etc.

> +		}
> +
> +		{
> +			struct drm_i915_gem_execbuffer2 execbuf = {
> +				.buffers_ptr = to_user_pointer(&batch),
> +				.buffer_count = 1,
> +				.rsvd1 = ctx,
> +			};
> +			struct timespec tv = {};
> +			unsigned long nops;
> +			double t;
> +
> +			igt_nsec_elapsed(&tv);
> +			nops = 0;
> +			do {
> +				for (int r = 0; r < 1024; r++)
> +					gem_execbuf(i915, &execbuf);
> +				nops += 1024;
> +			} while (igt_seconds_elapsed(&tv) < 2);
> +			gem_sync(i915, batch.handle);
> +
> +			t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
> +			igt_info("%x:* %.3fus\n", mask, t);
> +		}
> +
> +
> +		igt_fork(child, count) {
> +			struct drm_i915_gem_execbuffer2 execbuf = {
> +				.buffers_ptr = to_user_pointer(&batch),
> +				.buffer_count = 1,
> +				.flags = child + 1,
> +				.rsvd1 = gem_context_clone(i915, ctx,
> +							   I915_CONTEXT_CLONE_ENGINES, 0),
> +			};
> +			struct timespec tv = {};
> +			unsigned long nops;
> +			double t;
> +
> +			igt_nsec_elapsed(&tv);
> +			nops = 0;
> +			do {
> +				for (int r = 0; r < 1024; r++)
> +					gem_execbuf(i915, &execbuf);
> +				nops += 1024;
> +			} while (igt_seconds_elapsed(&tv) < 2);
> +			gem_sync(i915, batch.handle);
> +
> +			t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
> +			igt_info("[%d] %x:%d %.3fus\n", child, mask, child, t);
> +
> +			memset(&tv, 0, sizeof(tv));
> +			execbuf.flags = 0;
> +
> +			igt_nsec_elapsed(&tv);
> +			nops = 0;
> +			do {
> +				for (int r = 0; r < 1024; r++)
> +					gem_execbuf(i915, &execbuf);
> +				nops += 1024;
> +			} while (igt_seconds_elapsed(&tv) < 2);
> +			gem_sync(i915, batch.handle);
> +
> +			t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
> +			igt_info("[%d] %x:* %.3fus\n", child, mask, t);
> +
> +			gem_context_destroy(i915, execbuf.rsvd1);
> +		}
> +
> +		igt_waitchildren();
> +
> +		gem_context_destroy(i915, ctx);
> +		free(ci);
> +	}
> +
> +	gem_close(i915, batch.handle);
> +	gem_quiescent_gpu(i915);
> +}
> +
> +static void ping(int i915, uint32_t ctx, unsigned int engine)
> +{
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = batch_create(i915),
> +	};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +		.flags = engine,
> +		.rsvd1 = ctx,
> +	};
> +	gem_execbuf(i915, &execbuf);
> +	gem_sync(i915, obj.handle);
> +	gem_close(i915, obj.handle);
> +}
> +
> +static void semaphore(int i915)
> +{
> +	uint32_t block[2], scratch;
> +	igt_spin_t *spin[3];
> +
> +	/*
> +	 * If we are using HW semaphores to launch serialised requests
> +	 * on different engine concurrently, we want to verify that real
> +	 * work is unimpeded.
> +	 */
> +	igt_require(gem_scheduler_has_preemption(i915));
> +
> +	block[0] = gem_context_create(i915);
> +	block[1] = gem_context_create(i915);
> +
> +	scratch = gem_create(i915, 4096);
> +	spin[2] = igt_spin_new(i915, .dependency = scratch);
> +	for (int mask = 1; mask < 32; mask++) {

s/mask/class/ throughout.

> +		struct i915_engine_class_instance *ci;
> +		unsigned int count;
> +		uint32_t vip;
> +
> +		ci = list_engines(i915, 1u << mask, &count);
> +		if (!ci)
> +			continue;
> +
> +		if (count < ARRAY_SIZE(block))
> +			continue;
> +
> +		/* Ensure that we completely occupy all engines in this group */
> +		count = ARRAY_SIZE(block);
> +
> +		for (int i = 0; i < count; i++) {
> +			set_load_balancer(i915, block[i], ci, count);
> +			spin[i] = __igt_spin_new(i915,
> +						       .ctx = block[i],
> +						       .dependency = scratch);

Alignment.

> +		}
> +
> +		/*
> +		 * Either we haven't blocked both engines with semaphores,
> +		 * or we let the vip through. If not, we hang.
> +		 */
> +		vip = gem_context_create(i915);
> +		set_load_balancer(i915, vip, ci, count);
> +		ping(i915, vip, 0);
> +		gem_context_destroy(i915, vip);
> +
> +		for (int i = 0; i < count; i++)
> +			igt_spin_free(i915, spin[i]);
> +
> +		free(ci);
> +	}
> +	igt_spin_free(i915, spin[2]);
> +	gem_close(i915, scratch);
> +
> +	gem_context_destroy(i915, block[1]);
> +	gem_context_destroy(i915, block[0]);
> +
> +	gem_quiescent_gpu(i915);
> +}
> +
> +static void smoketest(int i915, int timeout)
> +{
> +	struct drm_i915_gem_exec_object2 batch[2] = {
> +		{ .handle = __batch_create(i915, 16380) }
> +	};
> +	unsigned int ncontext = 0;
> +	uint32_t *contexts = NULL;
> +	uint32_t *handles = NULL;
> +
> +	igt_require_sw_sync();
> +
> +	for (int mask = 0; mask < 32; mask++) {
> +		struct i915_engine_class_instance *ci;
> +		unsigned int count = 0;
> +
> +		ci = list_engines(i915, 1u << mask, &count);
> +		if (!ci || count < 2) {
> +			free(ci);
> +			continue;
> +		}
> +
> +		igt_debug("Found %d engines of class %d\n", count, mask);
> +
> +		ncontext += 128;
> +		contexts = realloc(contexts, sizeof(*contexts) * ncontext);
> +		igt_assert(contexts);
> +
> +		for (unsigned int n = ncontext - 128; n < ncontext; n++) {
> +			contexts[n] = load_balancer_create(i915, ci, count);
> +			igt_assert(contexts[n]);
> +		}
> +
> +		free(ci);
> +	}
> +	igt_debug("Created %d virtual engines (one per context)\n", ncontext);
> +	igt_require(ncontext);
> +
> +	contexts = realloc(contexts, sizeof(*contexts) * ncontext * 4);
> +	igt_assert(contexts);
> +	memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
> +	ncontext *= 2;
> +	memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
> +	ncontext *= 2;
> +
> +	handles = malloc(sizeof(*handles) * ncontext);
> +	igt_assert(handles);
> +	for (unsigned int n = 0; n < ncontext; n++)
> +		handles[n] = gem_create(i915, 4096);
> +
> +	igt_until_timeout(timeout) {
> +		unsigned int count = 1 + (rand() % (ncontext - 1));
> +		IGT_CORK_FENCE(cork);
> +		int fence = igt_cork_plug(&cork, i915);
> +
> +		for (unsigned int n = 0; n < count; n++) {
> +			struct drm_i915_gem_execbuffer2 eb = {
> +				.buffers_ptr = to_user_pointer(batch),
> +				.buffer_count = ARRAY_SIZE(batch),
> +				.rsvd1 = contexts[n],
> +				.rsvd2 = fence,
> +				.flags = I915_EXEC_BATCH_FIRST | I915_EXEC_FENCE_IN,
> +			};
> +			batch[1].handle = handles[n];
> +			gem_execbuf(i915, &eb);
> +		}
> +		igt_permute_array(handles, count, igt_exchange_int);
> +
> +		igt_cork_unplug(&cork);
> +		for (unsigned int n = 0; n < count; n++)
> +			gem_sync(i915, handles[n]);
> +
> +		close(fence);
> +	}
> +
> +	for (unsigned int n = 0; n < ncontext; n++) {
> +		gem_close(i915, handles[n]);
> +		__gem_context_destroy(i915, contexts[n]);
> +	}
> +	free(handles);
> +	free(contexts);
> +	gem_close(i915, batch[0].handle);
> +}
> +
> +static bool has_context_engines(int i915)
> +{
> +	struct drm_i915_gem_context_param p = {
> +		.param = I915_CONTEXT_PARAM_ENGINES,
> +	};
> +
> +	return __gem_context_set_param(i915, &p) == 0;
> +}
> +
> +static bool has_load_balancer(int i915)
> +{
> +	struct i915_engine_class_instance ci = {};
> +	uint32_t ctx;
> +	int err;
> +
> +	ctx = gem_context_create(i915);
> +	err = __set_load_balancer(i915, ctx, &ci, 1);
> +	gem_context_destroy(i915, ctx);
> +
> +	return err == 0;
> +}
> +
> +igt_main
> +{
> +	int i915 = -1;
> +
> +	igt_skip_on_simulation();
> +
> +	igt_fixture {
> +		i915 = drm_open_driver(DRIVER_INTEL);
> +		igt_require_gem(i915);
> +
> +		gem_require_contexts(i915);
> +		igt_require(has_context_engines(i915));
> +		igt_require(has_load_balancer(i915));
> +
> +		igt_fork_hang_detector(i915);
> +	}
> +
> +	igt_subtest("invalid-balancer")
> +		invalid_balancer(i915);
> +
> +	igt_subtest("individual")
> +		individual(i915);
> +
> +	igt_subtest("indicies")
> +		indicies(i915);
> +
> +	igt_subtest("busy")
> +		busy(i915);
> +
> +	igt_subtest_group {
> +		static const struct {
> +			const char *name;
> +			unsigned int flags;
> +		} phases[] = {
> +			{ "", 0 },
> +			{ "-pulse", PULSE },
> +			{ "-late", LATE },
> +			{ "-late-pulse", PULSE | LATE },
> +			{ }
> +		};
> +		for (typeof(*phases) *p = phases; p->name; p++)
> +			igt_subtest_f("full%s", p->name)
> +				full(i915, p->flags);
> +	}
> +
> +	igt_subtest("nop")
> +		nop(i915);
> +
> +	igt_subtest("semaphore")
> +		semaphore(i915);
> +
> +	igt_subtest("smoke")
> +		smoketest(i915, 20);
> +
> +	igt_fixture {
> +		igt_stop_hang_detector();
> +	}
> +}
> diff --git a/tests/meson.build b/tests/meson.build
> index 7e0089e74..eeea3611d 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -288,6 +288,13 @@ test_executables += executable('gem_eio',
>   	   install : true)
>   test_list += 'gem_eio'
>   
> +test_executables += executable('gem_exec_balancer', 'i915/gem_exec_balancer.c',
> +	   dependencies : test_deps + [ lib_igt_perf ],
> +	   install_dir : libexecdir,
> +	   install_rpath : libexecdir_rpathdir,
> +	   install : true)
> +test_progs += 'gem_exec_balancer'
> +
>   test_executables += executable('gem_mocs_settings',
>   	   join_paths('i915', 'gem_mocs_settings.c'),
>   	   dependencies : test_deps + [ lib_igt_perf ],
> 

Regards,

Tvrtko
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 14/16] i915/gem_exec_balancer: Exercise bonded pairs
  2019-05-08 10:09   ` [igt-dev] " Chris Wilson
@ 2019-05-15 10:58     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-15 10:58 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/05/2019 11:09, Chris Wilson wrote:
> The submit-fence + load_balancing apis allow for us to execute a named
> pair of engines in parallel; that this by submitting a request to one
> engine, we can then use the generated submit-fence to submit a second
> request to another engine and have it execute at the same time.
> Furthermore, by specifying bonded pairs, we can direct the virtual
> engine to use a particular engine in parallel to the first request.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/i915/gem_exec_balancer.c | 234 +++++++++++++++++++++++++++++++--
>   1 file changed, 224 insertions(+), 10 deletions(-)
> 
> diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
> index 25195d478..20ad66727 100644
> --- a/tests/i915/gem_exec_balancer.c
> +++ b/tests/i915/gem_exec_balancer.c
> @@ -98,9 +98,35 @@ list_engines(int i915, uint32_t class_mask, unsigned int *out)
>   	return engines;
>   }
>   
> +static int __set_engines(int i915, uint32_t ctx,
> +			 const struct i915_engine_class_instance *ci,
> +			 unsigned int count)
> +{
> +	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, count);
> +	struct drm_i915_gem_context_param p = {
> +		.ctx_id = ctx,
> +		.param = I915_CONTEXT_PARAM_ENGINES,
> +		.size = sizeof(engines),
> +		.value = to_user_pointer(&engines)
> +	};
> +
> +	engines.extensions = 0;
> +	memcpy(engines.engines, ci, sizeof(engines.engines));
> +
> +	return __gem_context_set_param(i915, &p);
> +}
> +
> +static void set_engines(int i915, uint32_t ctx,
> +			const struct i915_engine_class_instance *ci,
> +			unsigned int count)
> +{
> +	igt_assert_eq(__set_engines(i915, ctx, ci, count), 0);
> +}
> +
>   static int __set_load_balancer(int i915, uint32_t ctx,
>   			       const struct i915_engine_class_instance *ci,
> -			       unsigned int count)
> +			       unsigned int count,
> +			       void *ext)
>   {
>   	I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, count);
>   	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1 + count);
> @@ -113,6 +139,7 @@ static int __set_load_balancer(int i915, uint32_t ctx,
>   
>   	memset(&balancer, 0, sizeof(balancer));
>   	balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> +	balancer.base.next_extension = to_user_pointer(ext);
>   
>   	igt_assert(count);
>   	balancer.num_siblings = count;
> @@ -131,9 +158,10 @@ static int __set_load_balancer(int i915, uint32_t ctx,
>   
>   static void set_load_balancer(int i915, uint32_t ctx,
>   			      const struct i915_engine_class_instance *ci,
> -			      unsigned int count)
> +			      unsigned int count,
> +			      void *ext)
>   {
> -	igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0);
> +	igt_assert_eq(__set_load_balancer(i915, ctx, ci, count, ext), 0);
>   }
>   
>   static uint32_t load_balancer_create(int i915,
> @@ -143,7 +171,7 @@ static uint32_t load_balancer_create(int i915,
>   	uint32_t ctx;
>   
>   	ctx = gem_context_create(i915);
> -	set_load_balancer(i915, ctx, ci, count);
> +	set_load_balancer(i915, ctx, ci, count, NULL);
>   
>   	return ctx;
>   }
> @@ -288,6 +316,74 @@ static void invalid_balancer(int i915)
>   	}
>   }
>   
> +static void invalid_bonds(int i915)
> +{
> +	I915_DEFINE_CONTEXT_ENGINES_BOND(bonds[16], 1);
> +	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1);
> +	struct drm_i915_gem_context_param p = {
> +		.ctx_id = gem_context_create(i915),
> +		.param = I915_CONTEXT_PARAM_ENGINES,
> +		.value = to_user_pointer(&engines),
> +		.size = sizeof(engines),
> +	};
> +	uint32_t handle;
> +	void *ptr;
> +
> +	memset(&engines, 0, sizeof(engines));
> +	gem_context_set_param(i915, &p);
> +
> +	memset(bonds, 0, sizeof(bonds));
> +	for (int n = 0; n < ARRAY_SIZE(bonds); n++) {
> +		bonds[n].base.name = I915_CONTEXT_ENGINES_EXT_BOND;
> +		bonds[n].base.next_extension =
> +			n ? to_user_pointer(&bonds[n - 1]) : 0;
> +		bonds[n].num_bonds = 1;
> +	}
> +	engines.extensions = to_user_pointer(&bonds);
> +	gem_context_set_param(i915, &p);
> +
> +	bonds[0].base.next_extension = -1ull;
> +	igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +
> +	bonds[0].base.next_extension = to_user_pointer(&bonds[0]);
> +	igt_assert_eq(__gem_context_set_param(i915, &p), -E2BIG);
> +
> +	engines.extensions = to_user_pointer(&bonds[1]);
> +	igt_assert_eq(__gem_context_set_param(i915, &p), -E2BIG);
> +	bonds[0].base.next_extension = 0;
> +	gem_context_set_param(i915, &p);
> +
> +	handle = gem_create(i915, 4096 * 3);
> +	ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE);
> +	gem_close(i915, handle);
> +
> +	memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
> +	engines.extensions = to_user_pointer(ptr) + 4096;
> +	gem_context_set_param(i915, &p);
> +
> +	memcpy(ptr, &bonds[0], sizeof(bonds[0]));
> +	bonds[0].base.next_extension = to_user_pointer(ptr);
> +	memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
> +	gem_context_set_param(i915, &p);
> +
> +	munmap(ptr, 4096);
> +	igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +
> +	bonds[0].base.next_extension = 0;
> +	memcpy(ptr + 8192, &bonds[0], sizeof(bonds[0]));
> +	bonds[0].base.next_extension = to_user_pointer(ptr) + 8192;
> +	memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
> +	gem_context_set_param(i915, &p);
> +
> +	munmap(ptr + 8192, 4096);
> +	igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +
> +	munmap(ptr + 4096, 4096);
> +	igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +
> +	gem_context_destroy(i915, p.ctx_id);
> +}
> +
>   static void kick_kthreads(int period_us)
>   {
>   	sched_yield();
> @@ -397,7 +493,7 @@ static void individual(int i915)
>   
>   		for (int pass = 0; pass < count; pass++) { /* approx. count! */
>   			igt_permute_array(ci, count, igt_exchange_int64);
> -			set_load_balancer(i915, ctx, ci, count);
> +			set_load_balancer(i915, ctx, ci, count, NULL);
>   			for (unsigned int n = 0; n < count; n++)
>   				check_individual_engine(i915, ctx, ci, n);
>   		}
> @@ -409,6 +505,115 @@ static void individual(int i915)
>   	gem_quiescent_gpu(i915);
>   }
>   
> +static void bonded(int i915, unsigned int flags)
> +#define CORK 0x1
> +{
> +	I915_DEFINE_CONTEXT_ENGINES_BOND(bonds[16], 1);
> +	struct i915_engine_class_instance *master_engines;
> +	uint32_t master;
> +
> +	/*
> +	 * I915_CONTEXT_PARAM_ENGINE provides an extension that allows us
> +	 * to specify which engine(s) to pair with a parallel (EXEC_SUBMIT)
> +	 * request submitted to another engine.
> +	 */
> +
> +	master = gem_queue_create(i915);
> +
> +	memset(bonds, 0, sizeof(bonds));
> +	for (int n = 0; n < ARRAY_SIZE(bonds); n++) {
> +		bonds[n].base.name = I915_CONTEXT_ENGINES_EXT_BOND;
> +		bonds[n].base.next_extension =
> +			n ? to_user_pointer(&bonds[n - 1]) : 0;
> +		bonds[n].num_bonds = 1;
> +	}
> +
> +	for (int mask = 0; mask < 32; mask++) {

s/mask/class/

> +		unsigned int count, limit;
> +		struct i915_engine_class_instance *siblings;
> +		uint32_t ctx;
> +		int n;
> +
> +		siblings = list_engines(i915, 1u << mask, &count);
> +		if (!siblings)
> +			continue;
> +
> +		if (count < 2) {
> +			free(siblings);
> +			continue;
> +		}
> +
> +		igt_debug("Found %d engines of class %d\n", count, mask);
> +
> +		master_engines = list_engines(i915, ~(1u << mask), &limit);
> +		set_engines(i915, master, master_engines, limit);
> +
> +		limit = min(count, limit);

igt_assert(limit <= ARRAY_SIZE(bonds);

> +		for (n = 0; n < limit; n++) {
> +			bonds[n].master = master_engines[n];
> +			bonds[n].engines[0] = siblings[n];
> +		}
> +
> +		ctx = gem_context_clone(i915,
> +				       	master, I915_CONTEXT_CLONE_VM,
> +					I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> +		set_load_balancer(i915, ctx, siblings, count, &bonds[limit - 1]);
> +
> +		for (n = 0; n < limit; n++) {
> +			struct drm_i915_gem_execbuffer2 eb;
> +			IGT_CORK_HANDLE(cork);
> +			igt_spin_t *spin, *plug;
> +			double load;
> +			int pmu;
> +
> +			igt_assert(siblings[n].engine_class != master_engines[n].engine_class);
> +
> +			pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(siblings[n].engine_class,
> +								  siblings[n].engine_instance));
> +
> +			plug = NULL;
> +			if (flags & CORK) {
> +				plug = __igt_spin_new(i915,
> +						      .ctx = master,
> +						      .engine = n,
> +						      .dependency = igt_cork_plug(&cork, i915));
> +			}
> +
> +			spin = __igt_spin_new(i915,
> +					      .ctx = master,
> +					      .engine = n,
> +					      .flags = IGT_SPIN_FENCE_OUT);
> +
> +			eb = spin->execbuf;
> +			eb.rsvd1 = ctx;
> +			eb.rsvd2 = spin->out_fence;
> +			eb.flags = I915_EXEC_FENCE_SUBMIT;
> +			gem_execbuf(i915, &eb);
> +
> +			if (plug) {
> +				igt_cork_unplug(&cork);
> +				igt_spin_free(i915, plug);
> +			}
> +
> +			load = measure_load(pmu, 10000);
> +			igt_spin_free(i915, spin);
> +
> +			close(pmu);
> +
> +			igt_assert_f(load > 0.90,
> +				     "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
> +				     n, siblings[n].engine_class, siblings[n].engine_instance,
> +				     load*100);

Master also needs to be checked I think. You have the infrastructure to 
open two pmus in the previous patch so should be easy.

> +		}
> +
> +		gem_context_destroy(i915, ctx);
> +		free(master_engines);
> +		free(siblings);
> +	}
> +
> +	gem_context_destroy(i915, master);
> +}
> +
>   static void indicies(int i915)
>   {
>   	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
> @@ -856,10 +1061,10 @@ static void semaphore(int i915)
>   		count = ARRAY_SIZE(block);
>   
>   		for (int i = 0; i < count; i++) {
> -			set_load_balancer(i915, block[i], ci, count);
> +			set_load_balancer(i915, block[i], ci, count, NULL);
>   			spin[i] = __igt_spin_new(i915,
> -						       .ctx = block[i],
> -						       .dependency = scratch);
> +						 .ctx = block[i],
> +						 .dependency = scratch);
>   		}
>   
>   		/*
> @@ -867,7 +1072,7 @@ static void semaphore(int i915)
>   		 * or we let the vip through. If not, we hang.
>   		 */
>   		vip = gem_context_create(i915);
> -		set_load_balancer(i915, vip, ci, count);
> +		set_load_balancer(i915, vip, ci, count, NULL);
>   		ping(i915, vip, 0);
>   		gem_context_destroy(i915, vip);
>   
> @@ -984,7 +1189,7 @@ static bool has_load_balancer(int i915)
>   	int err;
>   
>   	ctx = gem_context_create(i915);
> -	err = __set_load_balancer(i915, ctx, &ci, 1);
> +	err = __set_load_balancer(i915, ctx, &ci, 1, NULL);
>   	gem_context_destroy(i915, ctx);
>   
>   	return err == 0;
> @@ -1010,6 +1215,9 @@ igt_main
>   	igt_subtest("invalid-balancer")
>   		invalid_balancer(i915);
>   
> +	igt_subtest("invalid-bonds")
> +		invalid_bonds(i915);
> +
>   	igt_subtest("individual")
>   		individual(i915);
>   
> @@ -1044,6 +1252,12 @@ igt_main
>   	igt_subtest("smoke")
>   		smoketest(i915, 20);
>   
> +	igt_subtest("bonded-imm")
> +		bonded(i915, 0);
> +
> +	igt_subtest("bonded-cork")
> +		bonded(i915, CORK);
> +
>   	igt_fixture {
>   		igt_stop_hang_detector();
>   	}
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 14/16] i915/gem_exec_balancer: Exercise bonded pairs
@ 2019-05-15 10:58     ` Tvrtko Ursulin
  0 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-15 10:58 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/05/2019 11:09, Chris Wilson wrote:
> The submit-fence + load_balancing apis allow for us to execute a named
> pair of engines in parallel; that this by submitting a request to one
> engine, we can then use the generated submit-fence to submit a second
> request to another engine and have it execute at the same time.
> Furthermore, by specifying bonded pairs, we can direct the virtual
> engine to use a particular engine in parallel to the first request.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/i915/gem_exec_balancer.c | 234 +++++++++++++++++++++++++++++++--
>   1 file changed, 224 insertions(+), 10 deletions(-)
> 
> diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
> index 25195d478..20ad66727 100644
> --- a/tests/i915/gem_exec_balancer.c
> +++ b/tests/i915/gem_exec_balancer.c
> @@ -98,9 +98,35 @@ list_engines(int i915, uint32_t class_mask, unsigned int *out)
>   	return engines;
>   }
>   
> +static int __set_engines(int i915, uint32_t ctx,
> +			 const struct i915_engine_class_instance *ci,
> +			 unsigned int count)
> +{
> +	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, count);
> +	struct drm_i915_gem_context_param p = {
> +		.ctx_id = ctx,
> +		.param = I915_CONTEXT_PARAM_ENGINES,
> +		.size = sizeof(engines),
> +		.value = to_user_pointer(&engines)
> +	};
> +
> +	engines.extensions = 0;
> +	memcpy(engines.engines, ci, sizeof(engines.engines));
> +
> +	return __gem_context_set_param(i915, &p);
> +}
> +
> +static void set_engines(int i915, uint32_t ctx,
> +			const struct i915_engine_class_instance *ci,
> +			unsigned int count)
> +{
> +	igt_assert_eq(__set_engines(i915, ctx, ci, count), 0);
> +}
> +
>   static int __set_load_balancer(int i915, uint32_t ctx,
>   			       const struct i915_engine_class_instance *ci,
> -			       unsigned int count)
> +			       unsigned int count,
> +			       void *ext)
>   {
>   	I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, count);
>   	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1 + count);
> @@ -113,6 +139,7 @@ static int __set_load_balancer(int i915, uint32_t ctx,
>   
>   	memset(&balancer, 0, sizeof(balancer));
>   	balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> +	balancer.base.next_extension = to_user_pointer(ext);
>   
>   	igt_assert(count);
>   	balancer.num_siblings = count;
> @@ -131,9 +158,10 @@ static int __set_load_balancer(int i915, uint32_t ctx,
>   
>   static void set_load_balancer(int i915, uint32_t ctx,
>   			      const struct i915_engine_class_instance *ci,
> -			      unsigned int count)
> +			      unsigned int count,
> +			      void *ext)
>   {
> -	igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0);
> +	igt_assert_eq(__set_load_balancer(i915, ctx, ci, count, ext), 0);
>   }
>   
>   static uint32_t load_balancer_create(int i915,
> @@ -143,7 +171,7 @@ static uint32_t load_balancer_create(int i915,
>   	uint32_t ctx;
>   
>   	ctx = gem_context_create(i915);
> -	set_load_balancer(i915, ctx, ci, count);
> +	set_load_balancer(i915, ctx, ci, count, NULL);
>   
>   	return ctx;
>   }
> @@ -288,6 +316,74 @@ static void invalid_balancer(int i915)
>   	}
>   }
>   
> +static void invalid_bonds(int i915)
> +{
> +	I915_DEFINE_CONTEXT_ENGINES_BOND(bonds[16], 1);
> +	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1);
> +	struct drm_i915_gem_context_param p = {
> +		.ctx_id = gem_context_create(i915),
> +		.param = I915_CONTEXT_PARAM_ENGINES,
> +		.value = to_user_pointer(&engines),
> +		.size = sizeof(engines),
> +	};
> +	uint32_t handle;
> +	void *ptr;
> +
> +	memset(&engines, 0, sizeof(engines));
> +	gem_context_set_param(i915, &p);
> +
> +	memset(bonds, 0, sizeof(bonds));
> +	for (int n = 0; n < ARRAY_SIZE(bonds); n++) {
> +		bonds[n].base.name = I915_CONTEXT_ENGINES_EXT_BOND;
> +		bonds[n].base.next_extension =
> +			n ? to_user_pointer(&bonds[n - 1]) : 0;
> +		bonds[n].num_bonds = 1;
> +	}
> +	engines.extensions = to_user_pointer(&bonds);
> +	gem_context_set_param(i915, &p);
> +
> +	bonds[0].base.next_extension = -1ull;
> +	igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +
> +	bonds[0].base.next_extension = to_user_pointer(&bonds[0]);
> +	igt_assert_eq(__gem_context_set_param(i915, &p), -E2BIG);
> +
> +	engines.extensions = to_user_pointer(&bonds[1]);
> +	igt_assert_eq(__gem_context_set_param(i915, &p), -E2BIG);
> +	bonds[0].base.next_extension = 0;
> +	gem_context_set_param(i915, &p);
> +
> +	handle = gem_create(i915, 4096 * 3);
> +	ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE);
> +	gem_close(i915, handle);
> +
> +	memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
> +	engines.extensions = to_user_pointer(ptr) + 4096;
> +	gem_context_set_param(i915, &p);
> +
> +	memcpy(ptr, &bonds[0], sizeof(bonds[0]));
> +	bonds[0].base.next_extension = to_user_pointer(ptr);
> +	memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
> +	gem_context_set_param(i915, &p);
> +
> +	munmap(ptr, 4096);
> +	igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +
> +	bonds[0].base.next_extension = 0;
> +	memcpy(ptr + 8192, &bonds[0], sizeof(bonds[0]));
> +	bonds[0].base.next_extension = to_user_pointer(ptr) + 8192;
> +	memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
> +	gem_context_set_param(i915, &p);
> +
> +	munmap(ptr + 8192, 4096);
> +	igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +
> +	munmap(ptr + 4096, 4096);
> +	igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> +
> +	gem_context_destroy(i915, p.ctx_id);
> +}
> +
>   static void kick_kthreads(int period_us)
>   {
>   	sched_yield();
> @@ -397,7 +493,7 @@ static void individual(int i915)
>   
>   		for (int pass = 0; pass < count; pass++) { /* approx. count! */
>   			igt_permute_array(ci, count, igt_exchange_int64);
> -			set_load_balancer(i915, ctx, ci, count);
> +			set_load_balancer(i915, ctx, ci, count, NULL);
>   			for (unsigned int n = 0; n < count; n++)
>   				check_individual_engine(i915, ctx, ci, n);
>   		}
> @@ -409,6 +505,115 @@ static void individual(int i915)
>   	gem_quiescent_gpu(i915);
>   }
>   
> +static void bonded(int i915, unsigned int flags)
> +#define CORK 0x1
> +{
> +	I915_DEFINE_CONTEXT_ENGINES_BOND(bonds[16], 1);
> +	struct i915_engine_class_instance *master_engines;
> +	uint32_t master;
> +
> +	/*
> +	 * I915_CONTEXT_PARAM_ENGINE provides an extension that allows us
> +	 * to specify which engine(s) to pair with a parallel (EXEC_SUBMIT)
> +	 * request submitted to another engine.
> +	 */
> +
> +	master = gem_queue_create(i915);
> +
> +	memset(bonds, 0, sizeof(bonds));
> +	for (int n = 0; n < ARRAY_SIZE(bonds); n++) {
> +		bonds[n].base.name = I915_CONTEXT_ENGINES_EXT_BOND;
> +		bonds[n].base.next_extension =
> +			n ? to_user_pointer(&bonds[n - 1]) : 0;
> +		bonds[n].num_bonds = 1;
> +	}
> +
> +	for (int mask = 0; mask < 32; mask++) {

s/mask/class/

> +		unsigned int count, limit;
> +		struct i915_engine_class_instance *siblings;
> +		uint32_t ctx;
> +		int n;
> +
> +		siblings = list_engines(i915, 1u << mask, &count);
> +		if (!siblings)
> +			continue;
> +
> +		if (count < 2) {
> +			free(siblings);
> +			continue;
> +		}
> +
> +		igt_debug("Found %d engines of class %d\n", count, mask);
> +
> +		master_engines = list_engines(i915, ~(1u << mask), &limit);
> +		set_engines(i915, master, master_engines, limit);
> +
> +		limit = min(count, limit);

igt_assert(limit <= ARRAY_SIZE(bonds);

> +		for (n = 0; n < limit; n++) {
> +			bonds[n].master = master_engines[n];
> +			bonds[n].engines[0] = siblings[n];
> +		}
> +
> +		ctx = gem_context_clone(i915,
> +				       	master, I915_CONTEXT_CLONE_VM,
> +					I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> +		set_load_balancer(i915, ctx, siblings, count, &bonds[limit - 1]);
> +
> +		for (n = 0; n < limit; n++) {
> +			struct drm_i915_gem_execbuffer2 eb;
> +			IGT_CORK_HANDLE(cork);
> +			igt_spin_t *spin, *plug;
> +			double load;
> +			int pmu;
> +
> +			igt_assert(siblings[n].engine_class != master_engines[n].engine_class);
> +
> +			pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(siblings[n].engine_class,
> +								  siblings[n].engine_instance));
> +
> +			plug = NULL;
> +			if (flags & CORK) {
> +				plug = __igt_spin_new(i915,
> +						      .ctx = master,
> +						      .engine = n,
> +						      .dependency = igt_cork_plug(&cork, i915));
> +			}
> +
> +			spin = __igt_spin_new(i915,
> +					      .ctx = master,
> +					      .engine = n,
> +					      .flags = IGT_SPIN_FENCE_OUT);
> +
> +			eb = spin->execbuf;
> +			eb.rsvd1 = ctx;
> +			eb.rsvd2 = spin->out_fence;
> +			eb.flags = I915_EXEC_FENCE_SUBMIT;
> +			gem_execbuf(i915, &eb);
> +
> +			if (plug) {
> +				igt_cork_unplug(&cork);
> +				igt_spin_free(i915, plug);
> +			}
> +
> +			load = measure_load(pmu, 10000);
> +			igt_spin_free(i915, spin);
> +
> +			close(pmu);
> +
> +			igt_assert_f(load > 0.90,
> +				     "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
> +				     n, siblings[n].engine_class, siblings[n].engine_instance,
> +				     load*100);

Master also needs to be checked I think. You have the infrastructure to 
open two pmus in the previous patch so should be easy.

> +		}
> +
> +		gem_context_destroy(i915, ctx);
> +		free(master_engines);
> +		free(siblings);
> +	}
> +
> +	gem_context_destroy(i915, master);
> +}
> +
>   static void indicies(int i915)
>   {
>   	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
> @@ -856,10 +1061,10 @@ static void semaphore(int i915)
>   		count = ARRAY_SIZE(block);
>   
>   		for (int i = 0; i < count; i++) {
> -			set_load_balancer(i915, block[i], ci, count);
> +			set_load_balancer(i915, block[i], ci, count, NULL);
>   			spin[i] = __igt_spin_new(i915,
> -						       .ctx = block[i],
> -						       .dependency = scratch);
> +						 .ctx = block[i],
> +						 .dependency = scratch);
>   		}
>   
>   		/*
> @@ -867,7 +1072,7 @@ static void semaphore(int i915)
>   		 * or we let the vip through. If not, we hang.
>   		 */
>   		vip = gem_context_create(i915);
> -		set_load_balancer(i915, vip, ci, count);
> +		set_load_balancer(i915, vip, ci, count, NULL);
>   		ping(i915, vip, 0);
>   		gem_context_destroy(i915, vip);
>   
> @@ -984,7 +1189,7 @@ static bool has_load_balancer(int i915)
>   	int err;
>   
>   	ctx = gem_context_create(i915);
> -	err = __set_load_balancer(i915, ctx, &ci, 1);
> +	err = __set_load_balancer(i915, ctx, &ci, 1, NULL);
>   	gem_context_destroy(i915, ctx);
>   
>   	return err == 0;
> @@ -1010,6 +1215,9 @@ igt_main
>   	igt_subtest("invalid-balancer")
>   		invalid_balancer(i915);
>   
> +	igt_subtest("invalid-bonds")
> +		invalid_bonds(i915);
> +
>   	igt_subtest("individual")
>   		individual(i915);
>   
> @@ -1044,6 +1252,12 @@ igt_main
>   	igt_subtest("smoke")
>   		smoketest(i915, 20);
>   
> +	igt_subtest("bonded-imm")
> +		bonded(i915, 0);
> +
> +	igt_subtest("bonded-cork")
> +		bonded(i915, CORK);
> +
>   	igt_fixture {
>   		igt_stop_hang_detector();
>   	}
> 

Regards,

Tvrtko
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 05/16] i915/gem_ctx_create: Basic checks for constructor properties
  2019-05-14 10:15     ` [igt-dev] " Tvrtko Ursulin
@ 2019-05-15 19:05       ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-15 19:05 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2019-05-14 11:15:12)
> 
> On 08/05/2019 11:09, Chris Wilson wrote:
> > Check that the extended create interface accepts setparam.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   tests/i915/gem_ctx_create.c | 225 ++++++++++++++++++++++++++++++++++--
> >   1 file changed, 213 insertions(+), 12 deletions(-)
> > 
> > diff --git a/tests/i915/gem_ctx_create.c b/tests/i915/gem_ctx_create.c
> > index a664070db..9b4fddbe7 100644
> > --- a/tests/i915/gem_ctx_create.c
> > +++ b/tests/i915/gem_ctx_create.c
> > @@ -33,6 +33,7 @@
> >   #include <time.h>
> >   
> >   #include "igt_rand.h"
> > +#include "sw_sync.h"
> >   
> >   #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
> >   #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
> > @@ -45,12 +46,33 @@ static unsigned all_nengine;
> >   static unsigned ppgtt_engines[16];
> >   static unsigned ppgtt_nengine;
> >   
> > -static int __gem_context_create_local(int fd, struct drm_i915_gem_context_create *arg)
> > +static int create_ioctl(int fd, struct drm_i915_gem_context_create *arg)
> >   {
> > -     int ret = 0;
> > -     if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg))
> > -             ret = -errno;
> > -     return ret;
> > +     int err;
> > +
> > +     err = 0;
> > +     if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg)) {
> > +             err = -errno;
> > +             igt_assert(err);
> > +     }
> > +
> > +     errno = 0;
> > +     return err;
> > +}
> > +
> > +static int create_ext_ioctl(int i915,
> > +                         struct drm_i915_gem_context_create_ext *arg)
> > +{
> > +     int err;
> > +
> > +     err = 0;
> > +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
> > +             err = -errno;
> > +             igt_assume(err);
> > +     }
> > +
> > +     errno = 0;
> > +     return err;
> >   }
> >   
> >   static double elapsed(const struct timespec *start,
> > @@ -308,6 +330,187 @@ static void maximum(int fd, int ncpus, unsigned mode)
> >       free(contexts);
> >   }
> >   
> > +static void basic_ext_param(int i915)
> > +{
> > +     struct drm_i915_gem_context_create_ext_setparam ext = {
> > +             { .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> > +     };
> > +     struct drm_i915_gem_context_create_ext create = {
> > +             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS
> > +     };
> > +     struct drm_i915_gem_context_param get;
> > +
> > +     igt_require(create_ext_ioctl(i915, &create) == 0);
> > +     gem_context_destroy(i915, create.ctx_id);
> > +
> > +     create.extensions = -1ull;
> > +     igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
> > +
> > +     create.extensions = to_user_pointer(&ext);
> > +     igt_assert_eq(create_ext_ioctl(i915, &create), -EINVAL);
> > +
> > +     ext.param.param = I915_CONTEXT_PARAM_PRIORITY;
> > +     if (create_ext_ioctl(i915, &create) != -ENODEV) {
> > +             gem_context_destroy(i915, create.ctx_id);
> > +
> > +             ext.base.next_extension = -1ull;
> > +             igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
> > +             ext.base.next_extension = to_user_pointer(&ext);
> > +             igt_assert_eq(create_ext_ioctl(i915, &create), -E2BIG);
> > +             ext.base.next_extension = 0;
> > +
> > +             ext.param.value = 32;
> > +             igt_assert_eq(create_ext_ioctl(i915, &create), 0);
> > +
> > +             memset(&get, 0, sizeof(get));
> > +             get.ctx_id = create.ctx_id;
> > +             get.param = I915_CONTEXT_PARAM_PRIORITY;
> > +             gem_context_get_param(i915, &get);
> > +             igt_assert_eq(get.value, ext.param.value);
> > +
> > +             gem_context_destroy(i915, create.ctx_id);
> > +     }
> > +}
> > +
> > +static void check_single_timeline(int i915, uint32_t ctx, int num_engines)
> > +{
> > +#define RCS_TIMESTAMP (0x2000 + 0x358)
> > +     const int gen = intel_gen(intel_get_drm_devid(i915));
> > +     const int has_64bit_reloc = gen >= 8;
> > +     struct drm_i915_gem_exec_object2 results = { .handle = gem_create(i915, 4096) };
> > +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +     int timeline = sw_sync_timeline_create();
> > +     uint32_t last, *map;
> > +
> > +     {
> > +             struct drm_i915_gem_execbuffer2 execbuf = {
> > +                     .buffers_ptr = to_user_pointer(&results),
> > +                     .buffer_count = 1,
> > +                     .rsvd1 = ctx,
> > +             };
> > +             gem_write(i915, results.handle, 0, &bbe, sizeof(bbe));
> > +             gem_execbuf(i915, &execbuf);
> > +             results.flags = EXEC_OBJECT_PINNED;
> > +     }
> > +
> > +     for (int i = 0; i < num_engines; i++) {
> > +             struct drm_i915_gem_exec_object2 obj[2] = {
> > +                     results, /* write hazard lies! */
> > +                     { .handle = gem_create(i915, 4096) },
> > +             };
> > +             struct drm_i915_gem_execbuffer2 execbuf = {
> > +                     .buffers_ptr = to_user_pointer(obj),
> > +                     .buffer_count = 2,
> > +                     .rsvd1 = ctx,
> > +                     .rsvd2 = sw_sync_timeline_create_fence(timeline, num_engines - i),
> > +                     .flags = i | I915_EXEC_FENCE_IN,
> > +             };
> > +             uint64_t offset = results.offset + 4 * i;
> > +             uint32_t *cs;
> > +             int j = 0;
> > +
> > +             cs = gem_mmap__cpu(i915, obj[1].handle, 0, 4096, PROT_WRITE);
> > +
> > +             cs[j] = 0x24 << 23 | 1; /* SRM */
> > +             if (has_64bit_reloc)
> > +                     cs[j]++;
> > +             j++;
> > +             cs[j++] = RCS_TIMESTAMP;
> > +             cs[j++] = offset;
> > +             if (has_64bit_reloc)
> > +                     cs[j++] = offset >> 32;
> > +             cs[j++] = MI_BATCH_BUFFER_END;
> > +
> > +             munmap(cs, 4096);
> > +
> > +             gem_execbuf(i915, &execbuf);
> > +             gem_close(i915, obj[1].handle);
> > +             close(execbuf.rsvd2);
> > +     }
> > +     close(timeline);
> > +     gem_sync(i915, results.handle);
> > +
> > +     map = gem_mmap__cpu(i915, results.handle, 0, 4096, PROT_READ);
> > +     gem_set_domain(i915, results.handle, I915_GEM_DOMAIN_CPU, 0);
> > +     gem_close(i915, results.handle);
> > +
> > +     last = map[0];
> > +     for (int i = 1; i < num_engines; i++) {
> > +             igt_assert_f((map[i] - last) > 0,
> > +                          "Engine instance [%d] executed too early: this:%x, last:%x\n",
> > +                          i, map[i], last);
> > +             last = map[i];
> > +     }
> 
> Hm.. aren't two sw fences (two seqnos) just a needless complication - 
> since the execution order in the single timeline is controlled by 
> submission order. The statement is true only when compounded with the 
> fact that you signal both fences at the same time. I am thinking that if 
> it wasn't a single timeline context what would happen. Fences would be 
> signaled in order, but execution does not have to happen in order. That 
> it does is a property of single timeline and not fence ordering. So two 
> input fences with two seqnos is misleading. Single plug would do I think

But that would not detect the case when it was multiple timelines...
 
> Or you are thinking to nudge the driver to do the right thing? But in 
> that case I think you'd need to manually advance the first seqno (2nd 
> batch) first and wait a bit to check it hasn't been execute. Then signal 
> the second seqno (first batch) and run the above check to see they have 
> been executed in order.

The challenge is that we detect if the driver uses 2 timelines instead
of one. So that is what we setup to detect.

> > +     munmap(map, 4096);
> > +}
> > +
> > +static void iris_pipeline(int i915)
> > +{
> > +#ifdef I915_DEFINE_CONTEXT_PARAM_ENGINES
> 
> Remove this I expect?

Depends on later header. Early plan was to have the bits and pieces
added piecemeal, but then I decided to add a full feature test.

> > +#define RCS0 {0, 0}
> > +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = {
> > +             .engines = { RCS0, RCS0 }
> > +     };
> > +     struct drm_i915_gem_context_create_ext_setparam p_engines = {
> > +             .base = {
> > +                     .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
> > +                     .next_extension = 0, /* end of chain */
> > +             },
> > +             .param = {
> > +                     .param = I915_CONTEXT_PARAM_ENGINES,
> > +                     .value = to_user_pointer(&engines),
> > +                     .size = sizeof(engines),
> > +             },
> > +     };
> > +     struct drm_i915_gem_context_create_ext_setparam p_recover = {
> > +             .base = {
> > +                     .name =I915_CONTEXT_CREATE_EXT_SETPARAM,
> > +                     .next_extension = to_user_pointer(&p_engines),
> > +             },
> > +             .param = {
> > +                     .param = I915_CONTEXT_PARAM_RECOVERABLE,
> > +                     .value = 0,
> > +             },
> > +     };
> > +     struct drm_i915_gem_context_create_ext_setparam p_prio = {
> > +             .base = {
> > +                     .name =I915_CONTEXT_CREATE_EXT_SETPARAM,
> > +                     .next_extension = to_user_pointer(&p_recover),
> > +             },
> > +             .param = {
> > +                     .param = I915_CONTEXT_PARAM_PRIORITY,
> > +                     .value = 768,
> > +             },
> > +     };
> > +     struct drm_i915_gem_context_create_ext create = {
> > +             .flags = (I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE |
> > +                       I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS),
> > +     };
> > +     struct drm_i915_gem_context_param get;
> > +
> > +     igt_require(create_ext_ioctl(i915, &create) == 0);
> 
> Context destroy here I think.
> 
> > +
> > +     create.extensions = to_user_pointer(&p_prio);
> > +     igt_assert_eq(create_ext_ioctl(i915, &create), 0);
> > +
> > +     memset(&get, 0, sizeof(get));
> > +     get.ctx_id = create.ctx_id;
> > +     get.param = I915_CONTEXT_PARAM_PRIORITY;
> > +     gem_context_get_param(i915, &get);
> > +     igt_assert_eq(get.value, p_prio.param.value);
> > +
> > +     memset(&get, 0, sizeof(get));
> > +     get.ctx_id = create.ctx_id;
> > +     get.param = I915_CONTEXT_PARAM_RECOVERABLE;
> > +     gem_context_get_param(i915, &get);
> > +     igt_assert_eq(get.value, 0);
> > +
> > +     check_single_timeline(i915, create.ctx_id, 2);
> > +
> > +     gem_context_destroy(i915, create.ctx_id);
> > +#endif /* I915_DEFINE_CONTEXT_PARAM_ENGINES */
> > +}
> > +
> >   igt_main
> >   {
> >       const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> > @@ -340,17 +543,15 @@ igt_main
> >               memset(&create, 0, sizeof(create));
> >               create.ctx_id = rand();
> >               create.pad = 0;
> > -             igt_assert_eq(__gem_context_create_local(fd, &create), 0);
> > +             igt_assert_eq(create_ioctl(fd, &create), 0);
> >               igt_assert(create.ctx_id != 0);
> >               gem_context_destroy(fd, create.ctx_id);
> >       }
> >   
> > -     igt_subtest("invalid-pad") {
> > -             memset(&create, 0, sizeof(create));
> > -             create.ctx_id = rand();
> > -             create.pad = 1;
> > -             igt_assert_eq(__gem_context_create_local(fd, &create), -EINVAL);
> > -     }
> > +     igt_subtest("ext-param")
> > +             basic_ext_param(fd);
> 
> basic-ext-param? Do we even rely on basic prefix these days?

basic test prefix is dead.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 05/16] i915/gem_ctx_create: Basic checks for constructor properties
@ 2019-05-15 19:05       ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-15 19:05 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2019-05-14 11:15:12)
> 
> On 08/05/2019 11:09, Chris Wilson wrote:
> > Check that the extended create interface accepts setparam.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   tests/i915/gem_ctx_create.c | 225 ++++++++++++++++++++++++++++++++++--
> >   1 file changed, 213 insertions(+), 12 deletions(-)
> > 
> > diff --git a/tests/i915/gem_ctx_create.c b/tests/i915/gem_ctx_create.c
> > index a664070db..9b4fddbe7 100644
> > --- a/tests/i915/gem_ctx_create.c
> > +++ b/tests/i915/gem_ctx_create.c
> > @@ -33,6 +33,7 @@
> >   #include <time.h>
> >   
> >   #include "igt_rand.h"
> > +#include "sw_sync.h"
> >   
> >   #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
> >   #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
> > @@ -45,12 +46,33 @@ static unsigned all_nengine;
> >   static unsigned ppgtt_engines[16];
> >   static unsigned ppgtt_nengine;
> >   
> > -static int __gem_context_create_local(int fd, struct drm_i915_gem_context_create *arg)
> > +static int create_ioctl(int fd, struct drm_i915_gem_context_create *arg)
> >   {
> > -     int ret = 0;
> > -     if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg))
> > -             ret = -errno;
> > -     return ret;
> > +     int err;
> > +
> > +     err = 0;
> > +     if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg)) {
> > +             err = -errno;
> > +             igt_assert(err);
> > +     }
> > +
> > +     errno = 0;
> > +     return err;
> > +}
> > +
> > +static int create_ext_ioctl(int i915,
> > +                         struct drm_i915_gem_context_create_ext *arg)
> > +{
> > +     int err;
> > +
> > +     err = 0;
> > +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
> > +             err = -errno;
> > +             igt_assume(err);
> > +     }
> > +
> > +     errno = 0;
> > +     return err;
> >   }
> >   
> >   static double elapsed(const struct timespec *start,
> > @@ -308,6 +330,187 @@ static void maximum(int fd, int ncpus, unsigned mode)
> >       free(contexts);
> >   }
> >   
> > +static void basic_ext_param(int i915)
> > +{
> > +     struct drm_i915_gem_context_create_ext_setparam ext = {
> > +             { .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> > +     };
> > +     struct drm_i915_gem_context_create_ext create = {
> > +             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS
> > +     };
> > +     struct drm_i915_gem_context_param get;
> > +
> > +     igt_require(create_ext_ioctl(i915, &create) == 0);
> > +     gem_context_destroy(i915, create.ctx_id);
> > +
> > +     create.extensions = -1ull;
> > +     igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
> > +
> > +     create.extensions = to_user_pointer(&ext);
> > +     igt_assert_eq(create_ext_ioctl(i915, &create), -EINVAL);
> > +
> > +     ext.param.param = I915_CONTEXT_PARAM_PRIORITY;
> > +     if (create_ext_ioctl(i915, &create) != -ENODEV) {
> > +             gem_context_destroy(i915, create.ctx_id);
> > +
> > +             ext.base.next_extension = -1ull;
> > +             igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
> > +             ext.base.next_extension = to_user_pointer(&ext);
> > +             igt_assert_eq(create_ext_ioctl(i915, &create), -E2BIG);
> > +             ext.base.next_extension = 0;
> > +
> > +             ext.param.value = 32;
> > +             igt_assert_eq(create_ext_ioctl(i915, &create), 0);
> > +
> > +             memset(&get, 0, sizeof(get));
> > +             get.ctx_id = create.ctx_id;
> > +             get.param = I915_CONTEXT_PARAM_PRIORITY;
> > +             gem_context_get_param(i915, &get);
> > +             igt_assert_eq(get.value, ext.param.value);
> > +
> > +             gem_context_destroy(i915, create.ctx_id);
> > +     }
> > +}
> > +
> > +static void check_single_timeline(int i915, uint32_t ctx, int num_engines)
> > +{
> > +#define RCS_TIMESTAMP (0x2000 + 0x358)
> > +     const int gen = intel_gen(intel_get_drm_devid(i915));
> > +     const int has_64bit_reloc = gen >= 8;
> > +     struct drm_i915_gem_exec_object2 results = { .handle = gem_create(i915, 4096) };
> > +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +     int timeline = sw_sync_timeline_create();
> > +     uint32_t last, *map;
> > +
> > +     {
> > +             struct drm_i915_gem_execbuffer2 execbuf = {
> > +                     .buffers_ptr = to_user_pointer(&results),
> > +                     .buffer_count = 1,
> > +                     .rsvd1 = ctx,
> > +             };
> > +             gem_write(i915, results.handle, 0, &bbe, sizeof(bbe));
> > +             gem_execbuf(i915, &execbuf);
> > +             results.flags = EXEC_OBJECT_PINNED;
> > +     }
> > +
> > +     for (int i = 0; i < num_engines; i++) {
> > +             struct drm_i915_gem_exec_object2 obj[2] = {
> > +                     results, /* write hazard lies! */
> > +                     { .handle = gem_create(i915, 4096) },
> > +             };
> > +             struct drm_i915_gem_execbuffer2 execbuf = {
> > +                     .buffers_ptr = to_user_pointer(obj),
> > +                     .buffer_count = 2,
> > +                     .rsvd1 = ctx,
> > +                     .rsvd2 = sw_sync_timeline_create_fence(timeline, num_engines - i),
> > +                     .flags = i | I915_EXEC_FENCE_IN,
> > +             };
> > +             uint64_t offset = results.offset + 4 * i;
> > +             uint32_t *cs;
> > +             int j = 0;
> > +
> > +             cs = gem_mmap__cpu(i915, obj[1].handle, 0, 4096, PROT_WRITE);
> > +
> > +             cs[j] = 0x24 << 23 | 1; /* SRM */
> > +             if (has_64bit_reloc)
> > +                     cs[j]++;
> > +             j++;
> > +             cs[j++] = RCS_TIMESTAMP;
> > +             cs[j++] = offset;
> > +             if (has_64bit_reloc)
> > +                     cs[j++] = offset >> 32;
> > +             cs[j++] = MI_BATCH_BUFFER_END;
> > +
> > +             munmap(cs, 4096);
> > +
> > +             gem_execbuf(i915, &execbuf);
> > +             gem_close(i915, obj[1].handle);
> > +             close(execbuf.rsvd2);
> > +     }
> > +     close(timeline);
> > +     gem_sync(i915, results.handle);
> > +
> > +     map = gem_mmap__cpu(i915, results.handle, 0, 4096, PROT_READ);
> > +     gem_set_domain(i915, results.handle, I915_GEM_DOMAIN_CPU, 0);
> > +     gem_close(i915, results.handle);
> > +
> > +     last = map[0];
> > +     for (int i = 1; i < num_engines; i++) {
> > +             igt_assert_f((map[i] - last) > 0,
> > +                          "Engine instance [%d] executed too early: this:%x, last:%x\n",
> > +                          i, map[i], last);
> > +             last = map[i];
> > +     }
> 
> Hm.. aren't two sw fences (two seqnos) just a needless complication - 
> since the execution order in the single timeline is controlled by 
> submission order. The statement is true only when compounded with the 
> fact that you signal both fences at the same time. I am thinking that if 
> it wasn't a single timeline context what would happen. Fences would be 
> signaled in order, but execution does not have to happen in order. That 
> it does is a property of single timeline and not fence ordering. So two 
> input fences with two seqnos is misleading. Single plug would do I think

But that would not detect the case when it was multiple timelines...
 
> Or you are thinking to nudge the driver to do the right thing? But in 
> that case I think you'd need to manually advance the first seqno (2nd 
> batch) first and wait a bit to check it hasn't been execute. Then signal 
> the second seqno (first batch) and run the above check to see they have 
> been executed in order.

The challenge is that we detect if the driver uses 2 timelines instead
of one. So that is what we setup to detect.

> > +     munmap(map, 4096);
> > +}
> > +
> > +static void iris_pipeline(int i915)
> > +{
> > +#ifdef I915_DEFINE_CONTEXT_PARAM_ENGINES
> 
> Remove this I expect?

Depends on later header. Early plan was to have the bits and pieces
added piecemeal, but then I decided to add a full feature test.

> > +#define RCS0 {0, 0}
> > +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = {
> > +             .engines = { RCS0, RCS0 }
> > +     };
> > +     struct drm_i915_gem_context_create_ext_setparam p_engines = {
> > +             .base = {
> > +                     .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
> > +                     .next_extension = 0, /* end of chain */
> > +             },
> > +             .param = {
> > +                     .param = I915_CONTEXT_PARAM_ENGINES,
> > +                     .value = to_user_pointer(&engines),
> > +                     .size = sizeof(engines),
> > +             },
> > +     };
> > +     struct drm_i915_gem_context_create_ext_setparam p_recover = {
> > +             .base = {
> > +                     .name =I915_CONTEXT_CREATE_EXT_SETPARAM,
> > +                     .next_extension = to_user_pointer(&p_engines),
> > +             },
> > +             .param = {
> > +                     .param = I915_CONTEXT_PARAM_RECOVERABLE,
> > +                     .value = 0,
> > +             },
> > +     };
> > +     struct drm_i915_gem_context_create_ext_setparam p_prio = {
> > +             .base = {
> > +                     .name =I915_CONTEXT_CREATE_EXT_SETPARAM,
> > +                     .next_extension = to_user_pointer(&p_recover),
> > +             },
> > +             .param = {
> > +                     .param = I915_CONTEXT_PARAM_PRIORITY,
> > +                     .value = 768,
> > +             },
> > +     };
> > +     struct drm_i915_gem_context_create_ext create = {
> > +             .flags = (I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE |
> > +                       I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS),
> > +     };
> > +     struct drm_i915_gem_context_param get;
> > +
> > +     igt_require(create_ext_ioctl(i915, &create) == 0);
> 
> Context destroy here I think.
> 
> > +
> > +     create.extensions = to_user_pointer(&p_prio);
> > +     igt_assert_eq(create_ext_ioctl(i915, &create), 0);
> > +
> > +     memset(&get, 0, sizeof(get));
> > +     get.ctx_id = create.ctx_id;
> > +     get.param = I915_CONTEXT_PARAM_PRIORITY;
> > +     gem_context_get_param(i915, &get);
> > +     igt_assert_eq(get.value, p_prio.param.value);
> > +
> > +     memset(&get, 0, sizeof(get));
> > +     get.ctx_id = create.ctx_id;
> > +     get.param = I915_CONTEXT_PARAM_RECOVERABLE;
> > +     gem_context_get_param(i915, &get);
> > +     igt_assert_eq(get.value, 0);
> > +
> > +     check_single_timeline(i915, create.ctx_id, 2);
> > +
> > +     gem_context_destroy(i915, create.ctx_id);
> > +#endif /* I915_DEFINE_CONTEXT_PARAM_ENGINES */
> > +}
> > +
> >   igt_main
> >   {
> >       const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> > @@ -340,17 +543,15 @@ igt_main
> >               memset(&create, 0, sizeof(create));
> >               create.ctx_id = rand();
> >               create.pad = 0;
> > -             igt_assert_eq(__gem_context_create_local(fd, &create), 0);
> > +             igt_assert_eq(create_ioctl(fd, &create), 0);
> >               igt_assert(create.ctx_id != 0);
> >               gem_context_destroy(fd, create.ctx_id);
> >       }
> >   
> > -     igt_subtest("invalid-pad") {
> > -             memset(&create, 0, sizeof(create));
> > -             create.ctx_id = rand();
> > -             create.pad = 1;
> > -             igt_assert_eq(__gem_context_create_local(fd, &create), -EINVAL);
> > -     }
> > +     igt_subtest("ext-param")
> > +             basic_ext_param(fd);
> 
> basic-ext-param? Do we even rely on basic prefix these days?

basic test prefix is dead.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 05/16] i915/gem_ctx_create: Basic checks for constructor properties
  2019-05-14 12:27     ` [igt-dev] " Tvrtko Ursulin
@ 2019-05-15 19:06       ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-15 19:06 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2019-05-14 13:27:38)
> 
> On 08/05/2019 11:09, Chris Wilson wrote:
> > Check that the extended create interface accepts setparam.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   tests/i915/gem_ctx_create.c | 225 ++++++++++++++++++++++++++++++++++--
> >   1 file changed, 213 insertions(+), 12 deletions(-)
> > 
> > diff --git a/tests/i915/gem_ctx_create.c b/tests/i915/gem_ctx_create.c
> > index a664070db..9b4fddbe7 100644
> > --- a/tests/i915/gem_ctx_create.c
> > +++ b/tests/i915/gem_ctx_create.c
> > @@ -33,6 +33,7 @@
> >   #include <time.h>
> >   
> >   #include "igt_rand.h"
> > +#include "sw_sync.h"
> >   
> >   #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
> >   #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
> > @@ -45,12 +46,33 @@ static unsigned all_nengine;
> >   static unsigned ppgtt_engines[16];
> >   static unsigned ppgtt_nengine;
> >   
> > -static int __gem_context_create_local(int fd, struct drm_i915_gem_context_create *arg)
> > +static int create_ioctl(int fd, struct drm_i915_gem_context_create *arg)
> >   {
> > -     int ret = 0;
> > -     if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg))
> > -             ret = -errno;
> > -     return ret;
> > +     int err;
> > +
> > +     err = 0;
> > +     if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg)) {
> > +             err = -errno;
> > +             igt_assert(err);
> > +     }
> > +
> > +     errno = 0;
> > +     return err;
> > +}
> > +
> > +static int create_ext_ioctl(int i915,
> > +                         struct drm_i915_gem_context_create_ext *arg)
> > +{
> > +     int err;
> > +
> > +     err = 0;
> > +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
> > +             err = -errno;
> > +             igt_assume(err);
> > +     }
> > +
> > +     errno = 0;
> > +     return err;
> >   }
> >   
> >   static double elapsed(const struct timespec *start,
> > @@ -308,6 +330,187 @@ static void maximum(int fd, int ncpus, unsigned mode)
> >       free(contexts);
> >   }
> >   
> > +static void basic_ext_param(int i915)
> > +{
> > +     struct drm_i915_gem_context_create_ext_setparam ext = {
> > +             { .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> > +     };
> > +     struct drm_i915_gem_context_create_ext create = {
> > +             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS
> > +     };
> > +     struct drm_i915_gem_context_param get;
> > +
> > +     igt_require(create_ext_ioctl(i915, &create) == 0);
> > +     gem_context_destroy(i915, create.ctx_id);
> > +
> > +     create.extensions = -1ull;
> > +     igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
> > +
> > +     create.extensions = to_user_pointer(&ext);
> > +     igt_assert_eq(create_ext_ioctl(i915, &create), -EINVAL);
> 
> I think this is the unknown param, right?
> 
> Need another -EINVAL test for non-zero ext.ctx_id.

No, this is non-zero ctx_id.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 05/16] i915/gem_ctx_create: Basic checks for constructor properties
@ 2019-05-15 19:06       ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-15 19:06 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2019-05-14 13:27:38)
> 
> On 08/05/2019 11:09, Chris Wilson wrote:
> > Check that the extended create interface accepts setparam.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   tests/i915/gem_ctx_create.c | 225 ++++++++++++++++++++++++++++++++++--
> >   1 file changed, 213 insertions(+), 12 deletions(-)
> > 
> > diff --git a/tests/i915/gem_ctx_create.c b/tests/i915/gem_ctx_create.c
> > index a664070db..9b4fddbe7 100644
> > --- a/tests/i915/gem_ctx_create.c
> > +++ b/tests/i915/gem_ctx_create.c
> > @@ -33,6 +33,7 @@
> >   #include <time.h>
> >   
> >   #include "igt_rand.h"
> > +#include "sw_sync.h"
> >   
> >   #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
> >   #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
> > @@ -45,12 +46,33 @@ static unsigned all_nengine;
> >   static unsigned ppgtt_engines[16];
> >   static unsigned ppgtt_nengine;
> >   
> > -static int __gem_context_create_local(int fd, struct drm_i915_gem_context_create *arg)
> > +static int create_ioctl(int fd, struct drm_i915_gem_context_create *arg)
> >   {
> > -     int ret = 0;
> > -     if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg))
> > -             ret = -errno;
> > -     return ret;
> > +     int err;
> > +
> > +     err = 0;
> > +     if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg)) {
> > +             err = -errno;
> > +             igt_assert(err);
> > +     }
> > +
> > +     errno = 0;
> > +     return err;
> > +}
> > +
> > +static int create_ext_ioctl(int i915,
> > +                         struct drm_i915_gem_context_create_ext *arg)
> > +{
> > +     int err;
> > +
> > +     err = 0;
> > +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
> > +             err = -errno;
> > +             igt_assume(err);
> > +     }
> > +
> > +     errno = 0;
> > +     return err;
> >   }
> >   
> >   static double elapsed(const struct timespec *start,
> > @@ -308,6 +330,187 @@ static void maximum(int fd, int ncpus, unsigned mode)
> >       free(contexts);
> >   }
> >   
> > +static void basic_ext_param(int i915)
> > +{
> > +     struct drm_i915_gem_context_create_ext_setparam ext = {
> > +             { .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> > +     };
> > +     struct drm_i915_gem_context_create_ext create = {
> > +             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS
> > +     };
> > +     struct drm_i915_gem_context_param get;
> > +
> > +     igt_require(create_ext_ioctl(i915, &create) == 0);
> > +     gem_context_destroy(i915, create.ctx_id);
> > +
> > +     create.extensions = -1ull;
> > +     igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
> > +
> > +     create.extensions = to_user_pointer(&ext);
> > +     igt_assert_eq(create_ext_ioctl(i915, &create), -EINVAL);
> 
> I think this is the unknown param, right?
> 
> Need another -EINVAL test for non-zero ext.ctx_id.

No, this is non-zero ctx_id.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 05/16] i915/gem_ctx_create: Basic checks for constructor properties
  2019-05-15 19:06       ` [igt-dev] " Chris Wilson
@ 2019-05-15 19:09         ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-15 19:09 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Chris Wilson (2019-05-15 20:06:45)
> Quoting Tvrtko Ursulin (2019-05-14 13:27:38)
> > 
> > On 08/05/2019 11:09, Chris Wilson wrote:
> > > Check that the extended create interface accepts setparam.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > ---
> > >   tests/i915/gem_ctx_create.c | 225 ++++++++++++++++++++++++++++++++++--
> > >   1 file changed, 213 insertions(+), 12 deletions(-)
> > > 
> > > diff --git a/tests/i915/gem_ctx_create.c b/tests/i915/gem_ctx_create.c
> > > index a664070db..9b4fddbe7 100644
> > > --- a/tests/i915/gem_ctx_create.c
> > > +++ b/tests/i915/gem_ctx_create.c
> > > @@ -33,6 +33,7 @@
> > >   #include <time.h>
> > >   
> > >   #include "igt_rand.h"
> > > +#include "sw_sync.h"
> > >   
> > >   #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
> > >   #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
> > > @@ -45,12 +46,33 @@ static unsigned all_nengine;
> > >   static unsigned ppgtt_engines[16];
> > >   static unsigned ppgtt_nengine;
> > >   
> > > -static int __gem_context_create_local(int fd, struct drm_i915_gem_context_create *arg)
> > > +static int create_ioctl(int fd, struct drm_i915_gem_context_create *arg)
> > >   {
> > > -     int ret = 0;
> > > -     if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg))
> > > -             ret = -errno;
> > > -     return ret;
> > > +     int err;
> > > +
> > > +     err = 0;
> > > +     if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg)) {
> > > +             err = -errno;
> > > +             igt_assert(err);
> > > +     }
> > > +
> > > +     errno = 0;
> > > +     return err;
> > > +}
> > > +
> > > +static int create_ext_ioctl(int i915,
> > > +                         struct drm_i915_gem_context_create_ext *arg)
> > > +{
> > > +     int err;
> > > +
> > > +     err = 0;
> > > +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
> > > +             err = -errno;
> > > +             igt_assume(err);
> > > +     }
> > > +
> > > +     errno = 0;
> > > +     return err;
> > >   }
> > >   
> > >   static double elapsed(const struct timespec *start,
> > > @@ -308,6 +330,187 @@ static void maximum(int fd, int ncpus, unsigned mode)
> > >       free(contexts);
> > >   }
> > >   
> > > +static void basic_ext_param(int i915)
> > > +{
> > > +     struct drm_i915_gem_context_create_ext_setparam ext = {
> > > +             { .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> > > +     };
> > > +     struct drm_i915_gem_context_create_ext create = {
> > > +             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS
> > > +     };
> > > +     struct drm_i915_gem_context_param get;
> > > +
> > > +     igt_require(create_ext_ioctl(i915, &create) == 0);
> > > +     gem_context_destroy(i915, create.ctx_id);
> > > +
> > > +     create.extensions = -1ull;
> > > +     igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
> > > +
> > > +     create.extensions = to_user_pointer(&ext);
> > > +     igt_assert_eq(create_ext_ioctl(i915, &create), -EINVAL);
> > 
> > I think this is the unknown param, right?
> > 
> > Need another -EINVAL test for non-zero ext.ctx_id.
> 
> No, this is non-zero ctx_id.

No, I read the wrong ctx_id.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 05/16] i915/gem_ctx_create: Basic checks for constructor properties
@ 2019-05-15 19:09         ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-15 19:09 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Chris Wilson (2019-05-15 20:06:45)
> Quoting Tvrtko Ursulin (2019-05-14 13:27:38)
> > 
> > On 08/05/2019 11:09, Chris Wilson wrote:
> > > Check that the extended create interface accepts setparam.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > ---
> > >   tests/i915/gem_ctx_create.c | 225 ++++++++++++++++++++++++++++++++++--
> > >   1 file changed, 213 insertions(+), 12 deletions(-)
> > > 
> > > diff --git a/tests/i915/gem_ctx_create.c b/tests/i915/gem_ctx_create.c
> > > index a664070db..9b4fddbe7 100644
> > > --- a/tests/i915/gem_ctx_create.c
> > > +++ b/tests/i915/gem_ctx_create.c
> > > @@ -33,6 +33,7 @@
> > >   #include <time.h>
> > >   
> > >   #include "igt_rand.h"
> > > +#include "sw_sync.h"
> > >   
> > >   #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
> > >   #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
> > > @@ -45,12 +46,33 @@ static unsigned all_nengine;
> > >   static unsigned ppgtt_engines[16];
> > >   static unsigned ppgtt_nengine;
> > >   
> > > -static int __gem_context_create_local(int fd, struct drm_i915_gem_context_create *arg)
> > > +static int create_ioctl(int fd, struct drm_i915_gem_context_create *arg)
> > >   {
> > > -     int ret = 0;
> > > -     if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg))
> > > -             ret = -errno;
> > > -     return ret;
> > > +     int err;
> > > +
> > > +     err = 0;
> > > +     if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg)) {
> > > +             err = -errno;
> > > +             igt_assert(err);
> > > +     }
> > > +
> > > +     errno = 0;
> > > +     return err;
> > > +}
> > > +
> > > +static int create_ext_ioctl(int i915,
> > > +                         struct drm_i915_gem_context_create_ext *arg)
> > > +{
> > > +     int err;
> > > +
> > > +     err = 0;
> > > +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
> > > +             err = -errno;
> > > +             igt_assume(err);
> > > +     }
> > > +
> > > +     errno = 0;
> > > +     return err;
> > >   }
> > >   
> > >   static double elapsed(const struct timespec *start,
> > > @@ -308,6 +330,187 @@ static void maximum(int fd, int ncpus, unsigned mode)
> > >       free(contexts);
> > >   }
> > >   
> > > +static void basic_ext_param(int i915)
> > > +{
> > > +     struct drm_i915_gem_context_create_ext_setparam ext = {
> > > +             { .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> > > +     };
> > > +     struct drm_i915_gem_context_create_ext create = {
> > > +             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS
> > > +     };
> > > +     struct drm_i915_gem_context_param get;
> > > +
> > > +     igt_require(create_ext_ioctl(i915, &create) == 0);
> > > +     gem_context_destroy(i915, create.ctx_id);
> > > +
> > > +     create.extensions = -1ull;
> > > +     igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
> > > +
> > > +     create.extensions = to_user_pointer(&ext);
> > > +     igt_assert_eq(create_ext_ioctl(i915, &create), -EINVAL);
> > 
> > I think this is the unknown param, right?
> > 
> > Need another -EINVAL test for non-zero ext.ctx_id.
> 
> No, this is non-zero ctx_id.

No, I read the wrong ctx_id.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 07/16] i915: Add gem_ctx_clone
  2019-05-14 12:41     ` [igt-dev] " Tvrtko Ursulin
@ 2019-05-15 19:14       ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-15 19:14 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2019-05-14 13:41:13)
> 
> On 08/05/2019 11:09, Chris Wilson wrote:
> > Exercise cloning contexts, an extension of merely creating one.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   tests/Makefile.sources     |   1 +
> >   tests/i915/gem_ctx_clone.c | 460 +++++++++++++++++++++++++++++++++++++
> >   tests/meson.build          |   1 +
> >   3 files changed, 462 insertions(+)
> >   create mode 100644 tests/i915/gem_ctx_clone.c
> > 
> > diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> > index 1a541d206..e1b7feeb2 100644
> > --- a/tests/Makefile.sources
> > +++ b/tests/Makefile.sources
> > @@ -21,6 +21,7 @@ TESTS_progs = \
> >       drm_import_export \
> >       drm_mm \
> >       drm_read \
> > +     i915/gem_ctx_clone \
> >       i915/gem_vm_create \
> >       kms_3d \
> >       kms_addfb_basic \
> > diff --git a/tests/i915/gem_ctx_clone.c b/tests/i915/gem_ctx_clone.c
> > new file mode 100644
> > index 000000000..cdc5bf413
> > --- /dev/null
> > +++ b/tests/i915/gem_ctx_clone.c
> > @@ -0,0 +1,460 @@
> > +/*
> > + * Copyright © 2019 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person obtaining a
> > + * copy of this software and associated documentation files (the "Software"),
> > + * to deal in the Software without restriction, including without limitation
> > + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the next
> > + * paragraph) shall be included in all copies or substantial portions of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> > + * IN THE SOFTWARE.
> > + */
> > +
> > +#include "igt.h"
> > +#include "igt_gt.h"
> > +#include "i915/gem_vm.h"
> > +#include "i915_drm.h"
> > +
> > +static int ctx_create_ioctl(int i915, struct drm_i915_gem_context_create_ext *arg)
> > +{
> > +     int err;
> > +
> > +     err = 0;
> > +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
> > +             err = -errno;
> > +             igt_assume(err);
> > +     }
> > +
> > +     errno = 0;
> > +     return err;
> > +}
> > +
> > +static bool has_ctx_clone(int i915)
> > +{
> > +     struct drm_i915_gem_context_create_ext_clone ext = {
> > +             { .name = I915_CONTEXT_CREATE_EXT_CLONE },
> > +             .clone_id = -1,
> > +     };
> > +     struct drm_i915_gem_context_create_ext create = {
> > +             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> > +             .extensions = to_user_pointer(&ext),
> > +     };
> > +     return ctx_create_ioctl(i915, &create) == -ENOENT;
> > +}
> > +
> > +static void invalid_clone(int i915)
> > +{
> > +     struct drm_i915_gem_context_create_ext_clone ext = {
> > +             { .name = I915_CONTEXT_CREATE_EXT_CLONE },
> > +     };
> > +     struct drm_i915_gem_context_create_ext create = {
> > +             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> > +             .extensions = to_user_pointer(&ext),
> > +     };
> > +
> > +     igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> > +     gem_context_destroy(i915, create.ctx_id);
> > +
> > +     ext.flags = -1; /* Hopefully we won't run out of flags */
> > +     igt_assert_eq(ctx_create_ioctl(i915, &create), -EINVAL);
> > +     ext.flags = 0;
> > +
> > +     ext.base.next_extension = -1;
> > +     igt_assert_eq(ctx_create_ioctl(i915, &create), -EFAULT);
> > +     ext.base.next_extension = to_user_pointer(&ext);
> > +     igt_assert_eq(ctx_create_ioctl(i915, &create), -E2BIG);
> > +     ext.base.next_extension = 0;
> > +
> > +     ext.clone_id = -1;
> > +     igt_assert_eq(ctx_create_ioctl(i915, &create), -ENOENT);
> > +     ext.clone_id = 0;
> > +}
> > +
> > +static void clone_flags(int i915)
> > +{
> > +     struct drm_i915_gem_context_create_ext_setparam set = {
> > +             { .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> > +             { .param = I915_CONTEXT_PARAM_RECOVERABLE },
> > +     };
> > +     struct drm_i915_gem_context_create_ext_clone ext = {
> > +             { .name = I915_CONTEXT_CREATE_EXT_CLONE },
> > +             .flags = I915_CONTEXT_CLONE_FLAGS,
> > +     };
> > +     struct drm_i915_gem_context_create_ext create = {
> > +             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> > +             .extensions = to_user_pointer(&ext),
> > +     };
> > +     int expected;
> > +
> > +     set.param.value = 1; /* default is recoverable */
> > +     igt_require(__gem_context_set_param(i915, &set.param) == 0);
> > +
> > +     for (int pass = 0; pass < 2; pass++) { /* cloning default, then child */
> > +             igt_debug("Cloning %d\n", ext.clone_id);
> > +             igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> > +
> > +             set.param.ctx_id = ext.clone_id;
> > +             gem_context_get_param(i915, &set.param); > +            expected = set.param.value;
> > +
> > +             set.param.ctx_id = create.ctx_id;
> > +             gem_context_get_param(i915, &set.param);
> > +
> > +             igt_assert_eq_u64(set.param.param,
> > +                               I915_CONTEXT_PARAM_RECOVERABLE);
> > +             igt_assert_eq((int)set.param.value, expected);
> > +
> > +             gem_context_destroy(i915, create.ctx_id);
> > +
> > +             expected = set.param.value = 0;
> > +             set.param.ctx_id = ext.clone_id;
> > +             gem_context_set_param(i915, &set.param);
> > +
> > +             igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> > +
> > +             set.param.ctx_id = create.ctx_id;
> > +             gem_context_get_param(i915, &set.param);
> > +
> > +             igt_assert_eq_u64(set.param.param,
> > +                               I915_CONTEXT_PARAM_RECOVERABLE);
> > +             igt_assert_eq((int)set.param.value, expected);
> > +
> > +             gem_context_destroy(i915, create.ctx_id);
> > +
> > +             /* clone but then reset priority to default... */
> 
> Just correct priority/prio here and below.
> 
> > +             set.param.ctx_id = 0;
> > +             set.param.value = 1;
> > +             ext.base.next_extension = to_user_pointer(&set);
> > +             igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> > +             ext.base.next_extension = 0;
> > +
> > +             /* new context should have updated prio... */
> > +             set.param.ctx_id = create.ctx_id;
> > +             gem_context_get_param(i915, &set.param);
> > +             igt_assert_eq_u64(set.param.value, 1);
> > +
> > +             /* but original context should have default prio */
> > +             set.param.ctx_id = ext.clone_id;
> > +             gem_context_get_param(i915, &set.param);
> > +             igt_assert_eq_u64(set.param.value, 0);
> > +
> > +             gem_context_destroy(i915, create.ctx_id);
> > +             ext.clone_id = gem_context_create(i915);
> > +     }
> > +
> > +     gem_context_destroy(i915, ext.clone_id);
> > +}
> > +
> > +static void clone_engines(int i915)
> > +{
> > +     struct drm_i915_gem_context_create_ext_setparam set = {
> > +             { .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> > +             { .param = I915_CONTEXT_PARAM_ENGINES },
> > +     };
> > +     struct drm_i915_gem_context_create_ext_clone ext = {
> > +             { .name = I915_CONTEXT_CREATE_EXT_CLONE },
> > +             .flags = I915_CONTEXT_CLONE_ENGINES,
> > +     };
> > +     struct drm_i915_gem_context_create_ext create = {
> > +             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> > +             .extensions = to_user_pointer(&ext),
> > +     };
> > +     I915_DEFINE_CONTEXT_PARAM_ENGINES(expected, 64);
> > +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 64);
> > +     uint64_t ex_size;
> > +
> > +     memset(&expected, 0, sizeof(expected));
> > +     memset(&engines, 0, sizeof(engines));
> > +
> > +     igt_require(__gem_context_set_param(i915, &set.param) == 0);
> > +
> > +     for (int pass = 0; pass < 2; pass++) { /* cloning default, then child */
> > +             igt_debug("Cloning %d\n", ext.clone_id);
> > +             igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> > +
> > +             set.param.ctx_id = ext.clone_id;
> > +             set.param.size = sizeof(expected);
> > +             set.param.value = to_user_pointer(&expected);
> > +             gem_context_get_param(i915, &set.param);
> > +             ex_size = set.param.size;
> > +
> > +             set.param.ctx_id = create.ctx_id;
> > +             set.param.size = sizeof(engines);
> > +             set.param.value = to_user_pointer(&engines);
> > +             gem_context_get_param(i915, &set.param);
> > +
> > +             igt_assert_eq_u64(set.param.param, I915_CONTEXT_PARAM_ENGINES);
> > +             igt_assert_eq_u64(set.param.size, ex_size);
> > +             igt_assert(!memcmp(&engines, &expected, ex_size));
> > +
> > +             gem_context_destroy(i915, create.ctx_id);
> > +
> > +             expected.engines[0].engine_class =
> > +                     I915_ENGINE_CLASS_INVALID;
> > +             expected.engines[0].engine_instance =
> > +                     I915_ENGINE_CLASS_INVALID_NONE;
> > +             ex_size = (sizeof(struct i915_context_param_engines) +
> > +                        sizeof(expected.engines[0]));
> > +
> > +             set.param.ctx_id = ext.clone_id;
> > +             set.param.size = ex_size;
> > +             set.param.value = to_user_pointer(&expected);
> > +             gem_context_set_param(i915, &set.param);
> > +
> > +             igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> > +
> > +             set.param.ctx_id = create.ctx_id;
> > +             set.param.size = sizeof(engines);
> > +             set.param.value = to_user_pointer(&engines);
> > +             gem_context_get_param(i915, &set.param);
> > +
> > +             igt_assert_eq_u64(set.param.size, ex_size);
> > +             igt_assert(!memcmp(&engines, &expected, ex_size));
> > +
> > +             gem_context_destroy(i915, create.ctx_id);
> > +
> > +             /* clone but then reset engines to default */
> > +             set.param.ctx_id = 0;
> > +             set.param.size = 0;
> > +             set.param.value = 0;
> > +             ext.base.next_extension = to_user_pointer(&set);
> > +
> > +             igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> > +             ext.base.next_extension = 0;
> > +
> > +             set.param.ctx_id = create.ctx_id;
> > +             set.param.size = sizeof(engines);
> > +             set.param.value = to_user_pointer(&engines);
> > +             gem_context_get_param(i915, &set.param);
> > +             igt_assert_eq_u64(set.param.size, 0);
> > +
> > +             gem_context_destroy(i915, create.ctx_id);
> > +
> > +             /* And check we ignore the flag */
> > +             ext.flags = 0;
> > +             igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> > +             ext.flags = I915_CONTEXT_CLONE_ENGINES;
> > +
> > +             set.param.ctx_id = create.ctx_id;
> > +             set.param.size = sizeof(engines);
> > +             set.param.value = to_user_pointer(&engines);
> > +             gem_context_get_param(i915, &set.param);
> > +             igt_assert_eq_u64(set.param.size, 0);
> 
> It is quite hard to review/follow all these tests (and so gauge the 
> coverage). It is a very stateful flow and for each step one has to 
> remember/back-reference what is the currently active chain of 
> extensions, and what is the active state of contexts and used context ids.
> 
> Annoyingly I don't have any good ideas on how to easily and reasonably 
> express this. Perhaps less reuse of the same stack objects in favour of 
> dedicated helpers for querying would reduce the mess? Hard to say 
> without trying it out.
> 
> But I think something needs to be done since people will struggle to 
> follow this if there is a bug one day.

[snip]

> It looks fine in principle so I leave to your conscience if you'll try 
> to improve the readability. :) With the priority renamed to recoverable:

You know I care very little for negative testing that is better left to
fuzzing. Coverage, utility and versatility of these handwritten tests
barely justifies the effort. A lot of work to scratch the surface, that
fails to probe anything interesting.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 07/16] i915: Add gem_ctx_clone
@ 2019-05-15 19:14       ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-15 19:14 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2019-05-14 13:41:13)
> 
> On 08/05/2019 11:09, Chris Wilson wrote:
> > Exercise cloning contexts, an extension of merely creating one.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   tests/Makefile.sources     |   1 +
> >   tests/i915/gem_ctx_clone.c | 460 +++++++++++++++++++++++++++++++++++++
> >   tests/meson.build          |   1 +
> >   3 files changed, 462 insertions(+)
> >   create mode 100644 tests/i915/gem_ctx_clone.c
> > 
> > diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> > index 1a541d206..e1b7feeb2 100644
> > --- a/tests/Makefile.sources
> > +++ b/tests/Makefile.sources
> > @@ -21,6 +21,7 @@ TESTS_progs = \
> >       drm_import_export \
> >       drm_mm \
> >       drm_read \
> > +     i915/gem_ctx_clone \
> >       i915/gem_vm_create \
> >       kms_3d \
> >       kms_addfb_basic \
> > diff --git a/tests/i915/gem_ctx_clone.c b/tests/i915/gem_ctx_clone.c
> > new file mode 100644
> > index 000000000..cdc5bf413
> > --- /dev/null
> > +++ b/tests/i915/gem_ctx_clone.c
> > @@ -0,0 +1,460 @@
> > +/*
> > + * Copyright © 2019 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person obtaining a
> > + * copy of this software and associated documentation files (the "Software"),
> > + * to deal in the Software without restriction, including without limitation
> > + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the next
> > + * paragraph) shall be included in all copies or substantial portions of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> > + * IN THE SOFTWARE.
> > + */
> > +
> > +#include "igt.h"
> > +#include "igt_gt.h"
> > +#include "i915/gem_vm.h"
> > +#include "i915_drm.h"
> > +
> > +static int ctx_create_ioctl(int i915, struct drm_i915_gem_context_create_ext *arg)
> > +{
> > +     int err;
> > +
> > +     err = 0;
> > +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
> > +             err = -errno;
> > +             igt_assume(err);
> > +     }
> > +
> > +     errno = 0;
> > +     return err;
> > +}
> > +
> > +static bool has_ctx_clone(int i915)
> > +{
> > +     struct drm_i915_gem_context_create_ext_clone ext = {
> > +             { .name = I915_CONTEXT_CREATE_EXT_CLONE },
> > +             .clone_id = -1,
> > +     };
> > +     struct drm_i915_gem_context_create_ext create = {
> > +             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> > +             .extensions = to_user_pointer(&ext),
> > +     };
> > +     return ctx_create_ioctl(i915, &create) == -ENOENT;
> > +}
> > +
> > +static void invalid_clone(int i915)
> > +{
> > +     struct drm_i915_gem_context_create_ext_clone ext = {
> > +             { .name = I915_CONTEXT_CREATE_EXT_CLONE },
> > +     };
> > +     struct drm_i915_gem_context_create_ext create = {
> > +             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> > +             .extensions = to_user_pointer(&ext),
> > +     };
> > +
> > +     igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> > +     gem_context_destroy(i915, create.ctx_id);
> > +
> > +     ext.flags = -1; /* Hopefully we won't run out of flags */
> > +     igt_assert_eq(ctx_create_ioctl(i915, &create), -EINVAL);
> > +     ext.flags = 0;
> > +
> > +     ext.base.next_extension = -1;
> > +     igt_assert_eq(ctx_create_ioctl(i915, &create), -EFAULT);
> > +     ext.base.next_extension = to_user_pointer(&ext);
> > +     igt_assert_eq(ctx_create_ioctl(i915, &create), -E2BIG);
> > +     ext.base.next_extension = 0;
> > +
> > +     ext.clone_id = -1;
> > +     igt_assert_eq(ctx_create_ioctl(i915, &create), -ENOENT);
> > +     ext.clone_id = 0;
> > +}
> > +
> > +static void clone_flags(int i915)
> > +{
> > +     struct drm_i915_gem_context_create_ext_setparam set = {
> > +             { .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> > +             { .param = I915_CONTEXT_PARAM_RECOVERABLE },
> > +     };
> > +     struct drm_i915_gem_context_create_ext_clone ext = {
> > +             { .name = I915_CONTEXT_CREATE_EXT_CLONE },
> > +             .flags = I915_CONTEXT_CLONE_FLAGS,
> > +     };
> > +     struct drm_i915_gem_context_create_ext create = {
> > +             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> > +             .extensions = to_user_pointer(&ext),
> > +     };
> > +     int expected;
> > +
> > +     set.param.value = 1; /* default is recoverable */
> > +     igt_require(__gem_context_set_param(i915, &set.param) == 0);
> > +
> > +     for (int pass = 0; pass < 2; pass++) { /* cloning default, then child */
> > +             igt_debug("Cloning %d\n", ext.clone_id);
> > +             igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> > +
> > +             set.param.ctx_id = ext.clone_id;
> > +             gem_context_get_param(i915, &set.param); > +            expected = set.param.value;
> > +
> > +             set.param.ctx_id = create.ctx_id;
> > +             gem_context_get_param(i915, &set.param);
> > +
> > +             igt_assert_eq_u64(set.param.param,
> > +                               I915_CONTEXT_PARAM_RECOVERABLE);
> > +             igt_assert_eq((int)set.param.value, expected);
> > +
> > +             gem_context_destroy(i915, create.ctx_id);
> > +
> > +             expected = set.param.value = 0;
> > +             set.param.ctx_id = ext.clone_id;
> > +             gem_context_set_param(i915, &set.param);
> > +
> > +             igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> > +
> > +             set.param.ctx_id = create.ctx_id;
> > +             gem_context_get_param(i915, &set.param);
> > +
> > +             igt_assert_eq_u64(set.param.param,
> > +                               I915_CONTEXT_PARAM_RECOVERABLE);
> > +             igt_assert_eq((int)set.param.value, expected);
> > +
> > +             gem_context_destroy(i915, create.ctx_id);
> > +
> > +             /* clone but then reset priority to default... */
> 
> Just correct priority/prio here and below.
> 
> > +             set.param.ctx_id = 0;
> > +             set.param.value = 1;
> > +             ext.base.next_extension = to_user_pointer(&set);
> > +             igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> > +             ext.base.next_extension = 0;
> > +
> > +             /* new context should have updated prio... */
> > +             set.param.ctx_id = create.ctx_id;
> > +             gem_context_get_param(i915, &set.param);
> > +             igt_assert_eq_u64(set.param.value, 1);
> > +
> > +             /* but original context should have default prio */
> > +             set.param.ctx_id = ext.clone_id;
> > +             gem_context_get_param(i915, &set.param);
> > +             igt_assert_eq_u64(set.param.value, 0);
> > +
> > +             gem_context_destroy(i915, create.ctx_id);
> > +             ext.clone_id = gem_context_create(i915);
> > +     }
> > +
> > +     gem_context_destroy(i915, ext.clone_id);
> > +}
> > +
> > +static void clone_engines(int i915)
> > +{
> > +     struct drm_i915_gem_context_create_ext_setparam set = {
> > +             { .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> > +             { .param = I915_CONTEXT_PARAM_ENGINES },
> > +     };
> > +     struct drm_i915_gem_context_create_ext_clone ext = {
> > +             { .name = I915_CONTEXT_CREATE_EXT_CLONE },
> > +             .flags = I915_CONTEXT_CLONE_ENGINES,
> > +     };
> > +     struct drm_i915_gem_context_create_ext create = {
> > +             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> > +             .extensions = to_user_pointer(&ext),
> > +     };
> > +     I915_DEFINE_CONTEXT_PARAM_ENGINES(expected, 64);
> > +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 64);
> > +     uint64_t ex_size;
> > +
> > +     memset(&expected, 0, sizeof(expected));
> > +     memset(&engines, 0, sizeof(engines));
> > +
> > +     igt_require(__gem_context_set_param(i915, &set.param) == 0);
> > +
> > +     for (int pass = 0; pass < 2; pass++) { /* cloning default, then child */
> > +             igt_debug("Cloning %d\n", ext.clone_id);
> > +             igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> > +
> > +             set.param.ctx_id = ext.clone_id;
> > +             set.param.size = sizeof(expected);
> > +             set.param.value = to_user_pointer(&expected);
> > +             gem_context_get_param(i915, &set.param);
> > +             ex_size = set.param.size;
> > +
> > +             set.param.ctx_id = create.ctx_id;
> > +             set.param.size = sizeof(engines);
> > +             set.param.value = to_user_pointer(&engines);
> > +             gem_context_get_param(i915, &set.param);
> > +
> > +             igt_assert_eq_u64(set.param.param, I915_CONTEXT_PARAM_ENGINES);
> > +             igt_assert_eq_u64(set.param.size, ex_size);
> > +             igt_assert(!memcmp(&engines, &expected, ex_size));
> > +
> > +             gem_context_destroy(i915, create.ctx_id);
> > +
> > +             expected.engines[0].engine_class =
> > +                     I915_ENGINE_CLASS_INVALID;
> > +             expected.engines[0].engine_instance =
> > +                     I915_ENGINE_CLASS_INVALID_NONE;
> > +             ex_size = (sizeof(struct i915_context_param_engines) +
> > +                        sizeof(expected.engines[0]));
> > +
> > +             set.param.ctx_id = ext.clone_id;
> > +             set.param.size = ex_size;
> > +             set.param.value = to_user_pointer(&expected);
> > +             gem_context_set_param(i915, &set.param);
> > +
> > +             igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> > +
> > +             set.param.ctx_id = create.ctx_id;
> > +             set.param.size = sizeof(engines);
> > +             set.param.value = to_user_pointer(&engines);
> > +             gem_context_get_param(i915, &set.param);
> > +
> > +             igt_assert_eq_u64(set.param.size, ex_size);
> > +             igt_assert(!memcmp(&engines, &expected, ex_size));
> > +
> > +             gem_context_destroy(i915, create.ctx_id);
> > +
> > +             /* clone but then reset engines to default */
> > +             set.param.ctx_id = 0;
> > +             set.param.size = 0;
> > +             set.param.value = 0;
> > +             ext.base.next_extension = to_user_pointer(&set);
> > +
> > +             igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> > +             ext.base.next_extension = 0;
> > +
> > +             set.param.ctx_id = create.ctx_id;
> > +             set.param.size = sizeof(engines);
> > +             set.param.value = to_user_pointer(&engines);
> > +             gem_context_get_param(i915, &set.param);
> > +             igt_assert_eq_u64(set.param.size, 0);
> > +
> > +             gem_context_destroy(i915, create.ctx_id);
> > +
> > +             /* And check we ignore the flag */
> > +             ext.flags = 0;
> > +             igt_assert_eq(ctx_create_ioctl(i915, &create), 0);
> > +             ext.flags = I915_CONTEXT_CLONE_ENGINES;
> > +
> > +             set.param.ctx_id = create.ctx_id;
> > +             set.param.size = sizeof(engines);
> > +             set.param.value = to_user_pointer(&engines);
> > +             gem_context_get_param(i915, &set.param);
> > +             igt_assert_eq_u64(set.param.size, 0);
> 
> It is quite hard to review/follow all these tests (and so gauge the 
> coverage). It is a very stateful flow and for each step one has to 
> remember/back-reference what is the currently active chain of 
> extensions, and what is the active state of contexts and used context ids.
> 
> Annoyingly I don't have any good ideas on how to easily and reasonably 
> express this. Perhaps less reuse of the same stack objects in favour of 
> dedicated helpers for querying would reduce the mess? Hard to say 
> without trying it out.
> 
> But I think something needs to be done since people will struggle to 
> follow this if there is a bug one day.

[snip]

> It looks fine in principle so I leave to your conscience if you'll try 
> to improve the readability. :) With the priority renamed to recoverable:

You know I care very little for negative testing that is better left to
fuzzing. Coverage, utility and versatility of these handwritten tests
barely justifies the effort. A lot of work to scratch the surface, that
fails to probe anything interesting.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 08/16] i915: Exercise creating context with shared GTT
  2019-05-15  6:37     ` [igt-dev] " Tvrtko Ursulin
@ 2019-05-15 19:33       ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-15 19:33 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2019-05-15 07:37:18)
> 
> On 08/05/2019 11:09, Chris Wilson wrote:
> > v2: Test each shared context is its own timeline and allows request
> > reordering between shared contexts.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> > ---
> >   lib/i915/gem_context.c        |  68 +++
> >   lib/i915/gem_context.h        |  13 +
> >   tests/Makefile.sources        |   1 +
> >   tests/i915/gem_ctx_shared.c   | 856 ++++++++++++++++++++++++++++++++++
> >   tests/i915/gem_exec_whisper.c |  32 +-
> >   tests/meson.build             |   1 +
> >   6 files changed, 962 insertions(+), 9 deletions(-)
> >   create mode 100644 tests/i915/gem_ctx_shared.c
> > 
> > diff --git a/lib/i915/gem_context.c b/lib/i915/gem_context.c
> > index f94d89cb4..8fb8984d1 100644
> > --- a/lib/i915/gem_context.c
> > +++ b/lib/i915/gem_context.c
> > @@ -272,6 +272,74 @@ void gem_context_set_priority(int fd, uint32_t ctx_id, int prio)
> >       igt_assert_eq(__gem_context_set_priority(fd, ctx_id, prio), 0);
> >   }
> >   
> > +int
> > +__gem_context_clone(int i915,
> > +                 uint32_t src, unsigned int share,
> > +                 unsigned int flags,
> > +                 uint32_t *out)
> > +{
> > +     struct drm_i915_gem_context_create_ext_clone clone = {
> > +             { .name = I915_CONTEXT_CREATE_EXT_CLONE },
> > +             .clone_id = src,
> > +             .flags = share,
> > +     };
> > +     struct drm_i915_gem_context_create_ext arg = {
> > +             .flags = flags | I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> > +             .extensions = to_user_pointer(&clone),
> > +     };
> > +     int err = 0;
> > +
> > +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, &arg))
> > +             err = -errno;
> > +
> > +     *out = arg.ctx_id;
> > +
> > +     errno = 0;
> > +     return err;
> > +}
> > +
> > +static bool __gem_context_has(int i915, uint32_t share, unsigned int flags)
> > +{
> > +     uint32_t ctx;
> > +
> > +     __gem_context_clone(i915, 0, share, flags, &ctx);
> > +     if (ctx)
> > +             gem_context_destroy(i915, ctx);
> > +
> > +     errno = 0;
> > +     return ctx;
> > +}
> > +
> > +bool gem_contexts_has_shared_gtt(int i915)
> > +{
> > +     return __gem_context_has(i915, I915_CONTEXT_CLONE_VM, 0);
> > +}
> > +
> > +bool gem_has_queues(int i915)
> > +{
> > +     return __gem_context_has(i915,
> > +                              I915_CONTEXT_CLONE_VM,
> > +                              I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> > +}
> > +
> > +uint32_t gem_context_clone(int i915,
> > +                        uint32_t src, unsigned int share,
> > +                        unsigned int flags)
> > +{
> > +     uint32_t ctx;
> > +
> > +     igt_assert_eq(__gem_context_clone(i915, src, share, flags, &ctx), 0);
> > +
> > +     return ctx;
> > +}
> > +
> > +uint32_t gem_queue_create(int i915)
> > +{
> > +     return gem_context_clone(i915, 0,
> > +                              I915_CONTEXT_CLONE_VM,
> > +                              I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> > +}
> > +
> >   bool gem_context_has_engine(int fd, uint32_t ctx, uint64_t engine)
> >   {
> >       struct drm_i915_gem_exec_object2 exec = {};
> > diff --git a/lib/i915/gem_context.h b/lib/i915/gem_context.h
> > index a052714d4..8043c3401 100644
> > --- a/lib/i915/gem_context.h
> > +++ b/lib/i915/gem_context.h
> > @@ -29,6 +29,19 @@ int __gem_context_create(int fd, uint32_t *ctx_id);
> >   void gem_context_destroy(int fd, uint32_t ctx_id);
> >   int __gem_context_destroy(int fd, uint32_t ctx_id);
> >   
> > +int __gem_context_clone(int i915,
> > +                     uint32_t src, unsigned int share,
> > +                     unsigned int flags,
> > +                     uint32_t *out);
> > +uint32_t gem_context_clone(int i915,
> > +                        uint32_t src, unsigned int share,
> > +                        unsigned int flags);
> > +
> > +uint32_t gem_queue_create(int i915);
> > +
> > +bool gem_contexts_has_shared_gtt(int i915);
> > +bool gem_has_queues(int i915);
> > +
> >   bool gem_has_contexts(int fd);
> >   void gem_require_contexts(int fd);
> >   void gem_context_require_bannable(int fd);
> > diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> > index e1b7feeb2..3552e895b 100644
> > --- a/tests/Makefile.sources
> > +++ b/tests/Makefile.sources
> > @@ -22,6 +22,7 @@ TESTS_progs = \
> >       drm_mm \
> >       drm_read \
> >       i915/gem_ctx_clone \
> > +     i915/gem_ctx_shared \
> >       i915/gem_vm_create \
> >       kms_3d \
> >       kms_addfb_basic \
> > diff --git a/tests/i915/gem_ctx_shared.c b/tests/i915/gem_ctx_shared.c
> > new file mode 100644
> > index 000000000..0076f5e9d
> > --- /dev/null
> > +++ b/tests/i915/gem_ctx_shared.c
> > @@ -0,0 +1,856 @@
> > +/*
> > + * Copyright © 2017 Intel Corporation
> 
> 2019

Nah, that would imply I put any thought into touching it since.

> > +static void exhaust_shared_gtt(int i915, unsigned int flags)
> > +#define EXHAUST_LRC 0x1
> > +{
> > +     i915 = gem_reopen_driver(i915);
> > +
> > +     igt_fork(pid, 1) {
> > +             const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +             struct drm_i915_gem_exec_object2 obj = {
> > +                     .handle = gem_create(i915, 4096)
> > +             };
> > +             struct drm_i915_gem_execbuffer2 execbuf = {
> > +                     .buffers_ptr = to_user_pointer(&obj),
> > +                     .buffer_count = 1,
> > +             };
> > +             uint32_t parent, child;
> > +             unsigned long count = 0;
> > +             int err;
> > +
> > +             gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> > +
> > +             child = 0;
> > +             for (;;) {
> > +                     parent = child;
> > +                     err = __gem_context_clone(i915,
> > +                                               parent, I915_CONTEXT_CLONE_VM,
> > +                                               0, &child);
> > +                     if (err)
> > +                             break;
> > +
> > +                     if (flags & EXHAUST_LRC) {
> > +                             execbuf.rsvd1 = child;
> > +                             err = __gem_execbuf(i915, &execbuf);
> > +                             if (err)
> > +                                     break;
> > +                     }
> 
> What are the stop conditions in this test, with and without the 
> EXHAUST_LRC flag? It would be good to put that in a comment.

It runs until the kernel dies. The giveaway is meant to be the test name.
 
> Especially since AFAIR this one was causing OOM for me so might need to 
> be tweaked.

It runs until the kernel dies.

> > +
> > +                     count++;
> > +             }
> > +             gem_sync(i915, obj.handle);
> > +
> > +             igt_info("Created %lu shared contexts, before %d (%s)\n",
> > +                      count, err, strerror(-err));
> > +     }
> > +     close(i915);
> > +     igt_waitchildren();
> > +}
> > +
> > +static void exec_shared_gtt(int i915, unsigned int ring)
> > +{
> > +     const int gen = intel_gen(intel_get_drm_devid(i915));
> > +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +     struct drm_i915_gem_exec_object2 obj = {
> > +             .handle = gem_create(i915, 4096)
> > +     };
> > +     struct drm_i915_gem_execbuffer2 execbuf = {
> > +             .buffers_ptr = to_user_pointer(&obj),
> > +             .buffer_count = 1,
> > +             .flags = ring,
> > +     };
> > +     uint32_t scratch = obj.handle;
> > +     uint32_t batch[16];
> > +     int i;
> > +
> > +     gem_require_ring(i915, ring);
> > +     igt_require(gem_can_store_dword(i915, ring));
> > +
> > +     /* Load object into place in the GTT */
> > +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> > +     gem_execbuf(i915, &execbuf);
> > +
> > +     /* Presume nothing causes an eviction in the meantime */
> > +
> > +     obj.handle = gem_create(i915, 4096);
> > +
> > +     i = 0;
> > +     batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> > +     if (gen >= 8) {
> > +             batch[++i] = obj.offset;
> > +             batch[++i] = 0;
> > +     } else if (gen >= 4) {
> > +             batch[++i] = 0;
> > +             batch[++i] = obj.offset;
> > +     } else {
> > +             batch[i]--;
> > +             batch[++i] = obj.offset;
> > +     }
> > +     batch[++i] = 0xc0ffee;
> > +     batch[++i] = MI_BATCH_BUFFER_END;
> > +     gem_write(i915, obj.handle, 0, batch, sizeof(batch));
> > +
> > +     obj.offset += 4096; /* make sure we don't cause an eviction! */
> 
> Is 4k apart safe?

Since to change would imply an ABI break and I see no param indicating
an ABI change, and Joonas keeps on refusing to add such information.
 
> A short comment on how does this test work would be good.
> 
> > +     obj.flags |= EXEC_OBJECT_PINNED;
> > +     execbuf.rsvd1 = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
> > +     if (gen > 3 && gen < 6)
> > +             execbuf.flags |= I915_EXEC_SECURE;
> > +
> > +     gem_execbuf(i915, &execbuf);
> > +     gem_context_destroy(i915, execbuf.rsvd1);
> > +     gem_sync(i915, obj.handle); /* write hazard lies */
> > +     gem_close(i915, obj.handle);
> > +
> > +     gem_read(i915, scratch, 0, batch, sizeof(uint32_t));
> > +     gem_close(i915, scratch);
> > +
> > +     igt_assert_eq_u32(*batch, 0xc0ffee);
> > +}
> > +
> > +static int nop_sync(int i915, uint32_t ctx, unsigned int ring, int64_t timeout)
> > +{
> > +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +     struct drm_i915_gem_exec_object2 obj = {
> > +             .handle = gem_create(i915, 4096),
> > +     };
> > +     struct drm_i915_gem_execbuffer2 execbuf = {
> > +             .buffers_ptr = to_user_pointer(&obj),
> > +             .buffer_count = 1,
> > +             .flags = ring,
> > +             .rsvd1 = ctx,
> > +     };
> > +     int err;
> > +
> > +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> > +     gem_execbuf(i915, &execbuf);
> > +     err = gem_wait(i915, obj.handle, &timeout);
> > +     gem_close(i915, obj.handle);
> > +
> > +     return err;
> > +}
> > +
> > +static bool has_single_timeline(int i915)
> > +{
> > +     uint32_t ctx;
> > +
> > +     __gem_context_clone(i915, 0, 0,
> > +                         I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
> > +                         &ctx);
> > +     if (ctx)
> > +             gem_context_destroy(i915, ctx);
> > +
> > +     return ctx != 0;
> > +}
> > +
> > +static bool ignore_engine(unsigned engine)
> > +{
> > +     if (engine == 0)
> > +             return true;
> > +
> > +     if (engine == I915_EXEC_BSD)
> > +             return true;
> > +
> > +     return false;
> > +}
> > +
> > +static void single_timeline(int i915)
> > +{
> > +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +     struct drm_i915_gem_exec_object2 obj = {
> > +             .handle = gem_create(i915, 4096),
> > +     };
> > +     struct drm_i915_gem_execbuffer2 execbuf = {
> > +             .buffers_ptr = to_user_pointer(&obj),
> > +             .buffer_count = 1,
> > +     };
> > +     struct sync_fence_info rings[16];
> 
> Could use for_each_physical_engine to count the engines. But we probably 
> have plenty of this around the code base.
> 
> > +     struct sync_file_info sync_file_info = {
> > +             .num_fences = 1,
> > +     };
> > +     unsigned int engine;
> > +     int n;
> > +
> > +     igt_require(has_single_timeline(i915));
> > +
> > +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> > +     gem_execbuf(i915, &execbuf);
> > +     gem_sync(i915, obj.handle);
> > +
> > +     /*
> > +      * For a "single timeline" context, each ring is on the common
> > +      * timeline, unlike a normal context where each ring has an
> > +      * independent timeline. That is no matter which engine we submit
> > +      * to, it reports the same timeline name and fence context. However,
> > +      * the fence context is not reported through the sync_fence_info.
> 
> Is the test useful then? There was one I reviewed earlier in this series 
> which tested for execution ordering, which sounds like is what's needed.

It is a variant. This one is a couple of years older. Both accomplish
similar things through very different means, the more the serendipitous.

> 
> > +      */
> > +     execbuf.rsvd1 =
> > +             gem_context_clone(i915, 0, 0,
> > +                               I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> > +     execbuf.flags = I915_EXEC_FENCE_OUT;
> > +     n = 0;
> > +     for_each_engine(i915, engine) {
> 
> for_each_physical_engine to align with Andi's work?

No, this would be an ABI iterator not a physical iterator.
 
> > +             gem_execbuf_wr(i915, &execbuf);
> > +             sync_file_info.sync_fence_info = to_user_pointer(&rings[n]);
> > +             do_ioctl(execbuf.rsvd2 >> 32, SYNC_IOC_FILE_INFO, &sync_file_info);
> > +             close(execbuf.rsvd2 >> 32);
> > +
> > +             igt_info("ring[%d] fence: %s %s\n",
> > +                      n, rings[n].driver_name, rings[n].obj_name);
> > +             n++;
> > +     }
> > +     gem_sync(i915, obj.handle);
> > +     gem_close(i915, obj.handle);
> > +
> > +     for (int i = 1; i < n; i++) {
> > +             igt_assert(!strcmp(rings[0].driver_name, rings[i].driver_name));
> > +             igt_assert(!strcmp(rings[0].obj_name, rings[i].obj_name));
> 
> What is in obj_name?

The timeline name. sync_file is plain old useless. The asserts here are
drivel.

> > +     }
> > +}
> > +
> > +static void exec_single_timeline(int i915, unsigned int ring)
> > +{
> > +     unsigned int other;
> > +     igt_spin_t *spin;
> > +     uint32_t ctx;
> > +
> > +     gem_require_ring(i915, ring);
> > +     igt_require(has_single_timeline(i915));
> > +
> > +     /*
> > +      * On an ordinary context, a blockage on one ring doesn't prevent
> > +      * execution on an other.
> > +      */
> > +     ctx = 0;
> > +     spin = NULL;
> > +     for_each_engine(i915, other) {
> 
> for_each_physical

Modern inventions.
 
> > +             if (other == ring || ignore_engine(other))
> > +                     continue;
> > +
> > +             if (spin == NULL) {
> > +                     spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
> > +             } else {
> > +                     struct drm_i915_gem_execbuffer2 execbuf = {
> > +                             .buffers_ptr = spin->execbuf.buffers_ptr,
> > +                             .buffer_count = spin->execbuf.buffer_count,
> > +                             .flags = other,
> > +                             .rsvd1 = ctx,
> > +                     };
> > +                     gem_execbuf(i915, &execbuf);
> > +             }
> > +     }
> > +     igt_require(spin);
> > +     igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), 0);
> > +     igt_spin_free(i915, spin);
> > +
> > +     /*
> > +      * But if we create a context with just a single shared timeline,
> > +      * then it will block waiting for the earlier requests on the
> > +      * other engines.
> > +      */
> > +     ctx = gem_context_clone(i915, 0, 0,
> > +                             I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> > +     spin = NULL;
> > +     for_each_engine(i915, other) {
> 
> Ditto.

Hey! Where was that when this code was written^W copied :-p
 
> > +             if (other == ring || ignore_engine(other))
> > +                     continue;
> > +
> > +             if (spin == NULL) {
> > +                     spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
> > +             } else {
> > +                     struct drm_i915_gem_execbuffer2 execbuf = {
> > +                             .buffers_ptr = spin->execbuf.buffers_ptr,
> > +                             .buffer_count = spin->execbuf.buffer_count,
> > +                             .flags = other,
> > +                             .rsvd1 = ctx,
> > +                     };
> > +                     gem_execbuf(i915, &execbuf);
> > +             }
> > +     }
> > +     igt_assert(spin);
> > +     igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), -ETIME);
> > +     igt_spin_free(i915, spin);
> > +}
> > +
> > +static void store_dword(int i915, uint32_t ctx, unsigned ring,
> > +                     uint32_t target, uint32_t offset, uint32_t value,
> > +                     uint32_t cork, unsigned write_domain)
> > +{
> > +     const int gen = intel_gen(intel_get_drm_devid(i915));
> > +     struct drm_i915_gem_exec_object2 obj[3];
> > +     struct drm_i915_gem_relocation_entry reloc;
> > +     struct drm_i915_gem_execbuffer2 execbuf;
> > +     uint32_t batch[16];
> > +     int i;
> > +
> > +     memset(&execbuf, 0, sizeof(execbuf));
> > +     execbuf.buffers_ptr = to_user_pointer(obj + !cork);
> > +     execbuf.buffer_count = 2 + !!cork;
> > +     execbuf.flags = ring;
> > +     if (gen < 6)
> > +             execbuf.flags |= I915_EXEC_SECURE;
> > +     execbuf.rsvd1 = ctx;
> > +
> > +     memset(obj, 0, sizeof(obj));
> > +     obj[0].handle = cork;
> > +     obj[1].handle = target;
> > +     obj[2].handle = gem_create(i915, 4096);
> > +
> > +     memset(&reloc, 0, sizeof(reloc));
> > +     reloc.target_handle = obj[1].handle;
> > +     reloc.presumed_offset = 0;
> > +     reloc.offset = sizeof(uint32_t);
> > +     reloc.delta = offset;
> > +     reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> > +     reloc.write_domain = write_domain;
> > +     obj[2].relocs_ptr = to_user_pointer(&reloc);
> > +     obj[2].relocation_count = 1;
> > +
> > +     i = 0;
> > +     batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> > +     if (gen >= 8) {
> > +             batch[++i] = offset;
> > +             batch[++i] = 0;
> > +     } else if (gen >= 4) {
> > +             batch[++i] = 0;
> > +             batch[++i] = offset;
> > +             reloc.offset += sizeof(uint32_t);
> > +     } else {
> > +             batch[i]--;
> > +             batch[++i] = offset;
> > +     }
> > +     batch[++i] = value;
> > +     batch[++i] = MI_BATCH_BUFFER_END;
> > +     gem_write(i915, obj[2].handle, 0, batch, sizeof(batch));
> > +     gem_execbuf(i915, &execbuf);
> > +     gem_close(i915, obj[2].handle);
> > +}
> > +
> > +static uint32_t create_highest_priority(int i915)
> > +{
> > +     uint32_t ctx = gem_context_create(i915);
> > +
> > +     /*
> > +      * If there is no priority support, all contexts will have equal
> > +      * priority (and therefore the max user priority), so no context
> > +      * can overtake us, and we effectively can form a plug.
> > +      */
> > +     __gem_context_set_priority(i915, ctx, MAX_PRIO);
> > +
> > +     return ctx;
> > +}
> > +
> > +static void unplug_show_queue(int i915, struct igt_cork *c, unsigned int engine)
> > +{
> > +     igt_spin_t *spin[MAX_ELSP_QLEN];
> 
> Why is this 16?

2x as big as the deepest known qlen. And 16 is that number that crops up
everywhere as a "just big enough number"
 
> > +
> > +     for (int n = 0; n < ARRAY_SIZE(spin); n++) {
> > +             const struct igt_spin_factory opts = {
> > +                     .ctx = create_highest_priority(i915),
> > +                     .engine = engine,
> > +             };
> > +             spin[n] = __igt_spin_factory(i915, &opts);
> > +             gem_context_destroy(i915, opts.ctx);
> > +     }
> > +
> > +     igt_cork_unplug(c); /* batches will now be queued on the engine */
> > +     igt_debugfs_dump(i915, "i915_engine_info");
> > +
> > +     for (int n = 0; n < ARRAY_SIZE(spin); n++)
> > +             igt_spin_free(i915, spin[n]);
> > +}
> > +
> > +static uint32_t store_timestamp(int i915,
> > +                             uint32_t ctx, unsigned ring,
> > +                             unsigned mmio_base)
> > +{
> > +     const bool r64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
> > +     struct drm_i915_gem_exec_object2 obj = {
> > +             .handle = gem_create(i915, 4096),
> > +             .relocation_count = 1,
> > +     };
> > +     struct drm_i915_gem_relocation_entry reloc = {
> > +             .target_handle = obj.handle,
> > +             .offset = 2 * sizeof(uint32_t),
> > +             .delta = 4092,
> > +             .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
> > +     };
> > +     struct drm_i915_gem_execbuffer2 execbuf = {
> > +             .buffers_ptr = to_user_pointer(&obj),
> > +             .buffer_count = 1,
> > +             .flags = ring,
> > +             .rsvd1 = ctx,
> > +     };
> > +     uint32_t batch[] = {
> > +             0x24 << 23 | (1 + r64b), /* SRM */
> > +             mmio_base + 0x358,
> > +             4092,
> > +             0,
> > +             MI_BATCH_BUFFER_END
> > +     };
> > +
> > +     igt_require(intel_gen(intel_get_drm_devid(i915)) >= 7);
> > +
> > +     gem_write(i915, obj.handle, 0, batch, sizeof(batch));
> > +     obj.relocs_ptr = to_user_pointer(&reloc);
> > +
> > +     gem_execbuf(i915, &execbuf);
> > +
> > +     return obj.handle;
> > +}
> > +
> > +static void independent(int i915, unsigned ring, unsigned flags)
> > +{
> > +     uint32_t handle[ARRAY_SIZE(priorities)];
> > +     igt_spin_t *spin[MAX_ELSP_QLEN];
> > +     unsigned int mmio_base;
> > +
> > +     /* XXX i915_query()! */
> > +     switch (ring) {
> > +     case I915_EXEC_DEFAULT:
> > +     case I915_EXEC_RENDER:
> > +             mmio_base = 0x2000;
> > +             break;
> > +#if 0
> > +     case I915_EXEC_BSD:
> > +             mmio_base = 0x12000;
> > +             break;
> > +#endif
> > +     case I915_EXEC_BLT:
> > +             mmio_base = 0x22000;
> > +             break;
> > +
> > +     case I915_EXEC_VEBOX:
> > +             if (intel_gen(intel_get_drm_devid(i915)) >= 11)
> > +                     mmio_base = 0x1d8000;
> > +             else
> > +                     mmio_base = 0x1a000;
> > +             break;
> > +
> > +     default:
> > +             igt_skip("mmio base not known\n");
> > +     }
> 
> Ufff this is quite questionable. Should we rather have this subtest in 
> selftests only?

We should be exporting this information. It is a non-privileged register
that is used by normal clients to measure elapsed time.

> > +
> > +     for (int n = 0; n < ARRAY_SIZE(spin); n++) {
> > +             const struct igt_spin_factory opts = {
> > +                     .ctx = create_highest_priority(i915),
> > +                     .engine = ring,
> > +             };
> > +             spin[n] = __igt_spin_factory(i915, &opts);
> > +             gem_context_destroy(i915, opts.ctx);
> > +     }
> > +
> > +     for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
> > +             uint32_t ctx = gem_queue_create(i915);
> > +             gem_context_set_priority(i915, ctx, priorities[i]);
> > +             handle[i] = store_timestamp(i915, ctx, ring, mmio_base);
> > +             gem_context_destroy(i915, ctx);
> > +     }
> > +
> > +     for (int n = 0; n < ARRAY_SIZE(spin); n++)
> > +             igt_spin_free(i915, spin[n]);
> > +
> > +     for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
> > +             uint32_t *ptr;
> > +
> > +             ptr = gem_mmap__gtt(i915, handle[i], 4096, PROT_READ);
> > +             gem_set_domain(i915, handle[i], /* no write hazard lies! */
> > +                            I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> > +             gem_close(i915, handle[i]);
> > +
> > +             handle[i] = ptr[1023];
> 
> 1023 relates to 4092 from store_timestamp I gather. The two need to be 
> defined closer together.
> 
> > +             munmap(ptr, 4096);
> > +
> > +             igt_debug("ctx[%d] .prio=%d, timestamp=%u\n",
> > +                       i, priorities[i], handle[i]);
> > +     }
> > +
> > +     igt_assert((int32_t)(handle[HI] - handle[LO]) < 0);
> > +}
> > +
> > +static void reorder(int i915, unsigned ring, unsigned flags)
> > +#define EQUAL 1
> > +{
> > +     IGT_CORK_HANDLE(cork);
> > +     uint32_t scratch;
> > +     uint32_t *ptr;
> > +     uint32_t ctx[2];
> > +     uint32_t plug;
> > +
> > +     ctx[LO] = gem_queue_create(i915);
> > +     gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
> > +
> > +     ctx[HI] = gem_queue_create(i915);
> > +     gem_context_set_priority(i915, ctx[HI], flags & EQUAL ? MIN_PRIO : 0);
> > +
> > +     scratch = gem_create(i915, 4096);
> > +     plug = igt_cork_plug(&cork, i915);
> > +
> > +     /* We expect the high priority context to be executed first, and
> > +      * so the final result will be value from the low priority context.
> > +      */
> > +     store_dword(i915, ctx[LO], ring, scratch, 0, ctx[LO], plug, 0);
> > +     store_dword(i915, ctx[HI], ring, scratch, 0, ctx[HI], plug, 0);
> > +
> > +     unplug_show_queue(i915, &cork, ring);
> > +     gem_close(i915, plug);
> > +
> > +     gem_context_destroy(i915, ctx[LO]);
> > +     gem_context_destroy(i915, ctx[HI]);
> > +
> > +     ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
> > +     gem_set_domain(i915, scratch, /* no write hazard lies! */
> > +                    I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> > +     gem_close(i915, scratch);
> > +
> > +     if (flags & EQUAL) /* equal priority, result will be fifo */
> > +             igt_assert_eq_u32(ptr[0], ctx[HI]);
> > +     else
> > +             igt_assert_eq_u32(ptr[0], ctx[LO]);
> > +     munmap(ptr, 4096);
> > +}
> > +
> > +static void promotion(int i915, unsigned ring)
> > +{
> > +     IGT_CORK_HANDLE(cork);
> > +     uint32_t result, dep;
> > +     uint32_t *ptr;
> > +     uint32_t ctx[3];
> > +     uint32_t plug;
> > +
> > +     ctx[LO] = gem_queue_create(i915);
> > +     gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
> > +
> > +     ctx[HI] = gem_queue_create(i915);
> > +     gem_context_set_priority(i915, ctx[HI], 0);
> > +
> > +     ctx[NOISE] = gem_queue_create(i915);
> > +     gem_context_set_priority(i915, ctx[NOISE], MIN_PRIO/2);
> > +
> > +     result = gem_create(i915, 4096);
> > +     dep = gem_create(i915, 4096);
> > +
> > +     plug = igt_cork_plug(&cork, i915);
> > +
> > +     /* Expect that HI promotes LO, so the order will be LO, HI, NOISE.
> > +      *
> > +      * fifo would be NOISE, LO, HI.
> > +      * strict priority would be  HI, NOISE, LO
> > +      */
> > +     store_dword(i915, ctx[NOISE], ring, result, 0, ctx[NOISE], plug, 0);
> > +     store_dword(i915, ctx[LO], ring, result, 0, ctx[LO], plug, 0);
> > +
> > +     /* link LO <-> HI via a dependency on another buffer */
> > +     store_dword(i915, ctx[LO], ring, dep, 0, ctx[LO], 0, I915_GEM_DOMAIN_INSTRUCTION);
> > +     store_dword(i915, ctx[HI], ring, dep, 0, ctx[HI], 0, 0);
> > +
> > +     store_dword(i915, ctx[HI], ring, result, 0, ctx[HI], 0, 0);
> > +
> > +     unplug_show_queue(i915, &cork, ring);
> > +     gem_close(i915, plug);
> > +
> > +     gem_context_destroy(i915, ctx[NOISE]);
> > +     gem_context_destroy(i915, ctx[LO]);
> > +     gem_context_destroy(i915, ctx[HI]);
> > +
> > +     ptr = gem_mmap__gtt(i915, dep, 4096, PROT_READ);
> > +     gem_set_domain(i915, dep, /* no write hazard lies! */
> > +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> > +     gem_close(i915, dep);
> > +
> > +     igt_assert_eq_u32(ptr[0], ctx[HI]);
> > +     munmap(ptr, 4096);
> > +
> > +     ptr = gem_mmap__gtt(i915, result, 4096, PROT_READ);
> > +     gem_set_domain(i915, result, /* no write hazard lies! */
> > +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> > +     gem_close(i915, result);
> > +
> > +     igt_assert_eq_u32(ptr[0], ctx[NOISE]);
> > +     munmap(ptr, 4096);
> > +}
> > +
> > +static void smoketest(int i915, unsigned ring, unsigned timeout)
> > +{
> > +     const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> > +     unsigned engines[16];
> > +     unsigned nengine;
> > +     unsigned engine;
> > +     uint32_t scratch;
> > +     uint32_t *ptr;
> > +
> > +     nengine = 0;
> > +     for_each_engine(i915, engine) {
> > +             if (ignore_engine(engine))
> > +                     continue;
> > +
> > +             engines[nengine++] = engine;
> > +     }
> > +     igt_require(nengine);
> 
> for_each_physical and counring the engines for engines array would be 
> better I think.
> 
> > +
> > +     scratch = gem_create(i915, 4096);
> > +     igt_fork(child, ncpus) {
> > +             unsigned long count = 0;
> > +             uint32_t ctx;
> > +
> > +             hars_petruska_f54_1_random_perturb(child);
> > +
> > +             ctx = gem_queue_create(i915);
> > +             igt_until_timeout(timeout) {
> > +                     int prio;
> > +
> > +                     prio = hars_petruska_f54_1_random_unsafe_max(MAX_PRIO - MIN_PRIO) + MIN_PRIO;
> > +                     gem_context_set_priority(i915, ctx, prio);
> > +
> > +                     engine = engines[hars_petruska_f54_1_random_unsafe_max(nengine)];
> > +                     store_dword(i915, ctx, engine, scratch,
> > +                                 8*child + 0, ~child,
> > +                                 0, 0);
> > +                     for (unsigned int step = 0; step < 8; step++)
> > +                             store_dword(i915, ctx, engine, scratch,
> > +                                         8*child + 4, count++,
> > +                                         0, 0);
> > +             }
> > +             gem_context_destroy(i915, ctx);
> > +     }
> > +     igt_waitchildren();
> > +
> > +     ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
> > +     gem_set_domain(i915, scratch, /* no write hazard lies! */
> > +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> > +     gem_close(i915, scratch);
> > +
> > +     for (unsigned n = 0; n < ncpus; n++) {
> > +             igt_assert_eq_u32(ptr[2*n], ~n);
> > +             /*
> > +              * Note this count is approximate due to unconstrained
> > +              * ordering of the dword writes between engines.
> > +              *
> > +              * Take the result with a pinch of salt.
> > +              */
> > +             igt_info("Child[%d] completed %u cycles\n",  n, ptr[2*n+1]);
> > +     }
> > +     munmap(ptr, 4096);
> > +}
> > +
> > +igt_main
> > +{
> > +     const struct intel_execution_engine *e;
> > +     int i915 = -1;
> > +
> > +     igt_fixture {
> > +             i915 = drm_open_driver(DRIVER_INTEL);
> > +             igt_require_gem(i915);
> > +     }
> > +
> > +     igt_subtest_group {
> > +             igt_fixture {
> > +                     igt_require(gem_contexts_has_shared_gtt(i915));
> > +                     igt_fork_hang_detector(i915);
> > +             }
> > +
> > +             igt_subtest("create-shared-gtt")
> > +                     create_shared_gtt(i915, 0);
> > +
> > +             igt_subtest("detached-shared-gtt")
> > +                     create_shared_gtt(i915, DETACHED);
> > +
> > +             igt_subtest("disjoint-timelines")
> > +                     disjoint_timelines(i915);
> > +
> > +             igt_subtest("single-timeline")
> > +                     single_timeline(i915);
> > +
> > +             igt_subtest("exhaust-shared-gtt")
> > +                     exhaust_shared_gtt(i915, 0);
> > +
> > +             igt_subtest("exhaust-shared-gtt-lrc")
> > +                     exhaust_shared_gtt(i915, EXHAUST_LRC);
> > +
> > +             for (e = intel_execution_engines; e->name; e++) {
> > +                     igt_subtest_f("exec-shared-gtt-%s", e->name)
> > +                             exec_shared_gtt(i915, e->exec_id | e->flags);
> 
> The same previously raised question on should it iterate the legacy 
> execbuf engines or physical engines. Maybe you won't different subtests 
> do both?

It should be testing the cross between the context and execbuf uABI, not
physical.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 08/16] i915: Exercise creating context with shared GTT
@ 2019-05-15 19:33       ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-15 19:33 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev, Tvrtko Ursulin

Quoting Tvrtko Ursulin (2019-05-15 07:37:18)
> 
> On 08/05/2019 11:09, Chris Wilson wrote:
> > v2: Test each shared context is its own timeline and allows request
> > reordering between shared contexts.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> > ---
> >   lib/i915/gem_context.c        |  68 +++
> >   lib/i915/gem_context.h        |  13 +
> >   tests/Makefile.sources        |   1 +
> >   tests/i915/gem_ctx_shared.c   | 856 ++++++++++++++++++++++++++++++++++
> >   tests/i915/gem_exec_whisper.c |  32 +-
> >   tests/meson.build             |   1 +
> >   6 files changed, 962 insertions(+), 9 deletions(-)
> >   create mode 100644 tests/i915/gem_ctx_shared.c
> > 
> > diff --git a/lib/i915/gem_context.c b/lib/i915/gem_context.c
> > index f94d89cb4..8fb8984d1 100644
> > --- a/lib/i915/gem_context.c
> > +++ b/lib/i915/gem_context.c
> > @@ -272,6 +272,74 @@ void gem_context_set_priority(int fd, uint32_t ctx_id, int prio)
> >       igt_assert_eq(__gem_context_set_priority(fd, ctx_id, prio), 0);
> >   }
> >   
> > +int
> > +__gem_context_clone(int i915,
> > +                 uint32_t src, unsigned int share,
> > +                 unsigned int flags,
> > +                 uint32_t *out)
> > +{
> > +     struct drm_i915_gem_context_create_ext_clone clone = {
> > +             { .name = I915_CONTEXT_CREATE_EXT_CLONE },
> > +             .clone_id = src,
> > +             .flags = share,
> > +     };
> > +     struct drm_i915_gem_context_create_ext arg = {
> > +             .flags = flags | I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
> > +             .extensions = to_user_pointer(&clone),
> > +     };
> > +     int err = 0;
> > +
> > +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, &arg))
> > +             err = -errno;
> > +
> > +     *out = arg.ctx_id;
> > +
> > +     errno = 0;
> > +     return err;
> > +}
> > +
> > +static bool __gem_context_has(int i915, uint32_t share, unsigned int flags)
> > +{
> > +     uint32_t ctx;
> > +
> > +     __gem_context_clone(i915, 0, share, flags, &ctx);
> > +     if (ctx)
> > +             gem_context_destroy(i915, ctx);
> > +
> > +     errno = 0;
> > +     return ctx;
> > +}
> > +
> > +bool gem_contexts_has_shared_gtt(int i915)
> > +{
> > +     return __gem_context_has(i915, I915_CONTEXT_CLONE_VM, 0);
> > +}
> > +
> > +bool gem_has_queues(int i915)
> > +{
> > +     return __gem_context_has(i915,
> > +                              I915_CONTEXT_CLONE_VM,
> > +                              I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> > +}
> > +
> > +uint32_t gem_context_clone(int i915,
> > +                        uint32_t src, unsigned int share,
> > +                        unsigned int flags)
> > +{
> > +     uint32_t ctx;
> > +
> > +     igt_assert_eq(__gem_context_clone(i915, src, share, flags, &ctx), 0);
> > +
> > +     return ctx;
> > +}
> > +
> > +uint32_t gem_queue_create(int i915)
> > +{
> > +     return gem_context_clone(i915, 0,
> > +                              I915_CONTEXT_CLONE_VM,
> > +                              I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> > +}
> > +
> >   bool gem_context_has_engine(int fd, uint32_t ctx, uint64_t engine)
> >   {
> >       struct drm_i915_gem_exec_object2 exec = {};
> > diff --git a/lib/i915/gem_context.h b/lib/i915/gem_context.h
> > index a052714d4..8043c3401 100644
> > --- a/lib/i915/gem_context.h
> > +++ b/lib/i915/gem_context.h
> > @@ -29,6 +29,19 @@ int __gem_context_create(int fd, uint32_t *ctx_id);
> >   void gem_context_destroy(int fd, uint32_t ctx_id);
> >   int __gem_context_destroy(int fd, uint32_t ctx_id);
> >   
> > +int __gem_context_clone(int i915,
> > +                     uint32_t src, unsigned int share,
> > +                     unsigned int flags,
> > +                     uint32_t *out);
> > +uint32_t gem_context_clone(int i915,
> > +                        uint32_t src, unsigned int share,
> > +                        unsigned int flags);
> > +
> > +uint32_t gem_queue_create(int i915);
> > +
> > +bool gem_contexts_has_shared_gtt(int i915);
> > +bool gem_has_queues(int i915);
> > +
> >   bool gem_has_contexts(int fd);
> >   void gem_require_contexts(int fd);
> >   void gem_context_require_bannable(int fd);
> > diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> > index e1b7feeb2..3552e895b 100644
> > --- a/tests/Makefile.sources
> > +++ b/tests/Makefile.sources
> > @@ -22,6 +22,7 @@ TESTS_progs = \
> >       drm_mm \
> >       drm_read \
> >       i915/gem_ctx_clone \
> > +     i915/gem_ctx_shared \
> >       i915/gem_vm_create \
> >       kms_3d \
> >       kms_addfb_basic \
> > diff --git a/tests/i915/gem_ctx_shared.c b/tests/i915/gem_ctx_shared.c
> > new file mode 100644
> > index 000000000..0076f5e9d
> > --- /dev/null
> > +++ b/tests/i915/gem_ctx_shared.c
> > @@ -0,0 +1,856 @@
> > +/*
> > + * Copyright © 2017 Intel Corporation
> 
> 2019

Nah, that would imply I put any thought into touching it since.

> > +static void exhaust_shared_gtt(int i915, unsigned int flags)
> > +#define EXHAUST_LRC 0x1
> > +{
> > +     i915 = gem_reopen_driver(i915);
> > +
> > +     igt_fork(pid, 1) {
> > +             const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +             struct drm_i915_gem_exec_object2 obj = {
> > +                     .handle = gem_create(i915, 4096)
> > +             };
> > +             struct drm_i915_gem_execbuffer2 execbuf = {
> > +                     .buffers_ptr = to_user_pointer(&obj),
> > +                     .buffer_count = 1,
> > +             };
> > +             uint32_t parent, child;
> > +             unsigned long count = 0;
> > +             int err;
> > +
> > +             gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> > +
> > +             child = 0;
> > +             for (;;) {
> > +                     parent = child;
> > +                     err = __gem_context_clone(i915,
> > +                                               parent, I915_CONTEXT_CLONE_VM,
> > +                                               0, &child);
> > +                     if (err)
> > +                             break;
> > +
> > +                     if (flags & EXHAUST_LRC) {
> > +                             execbuf.rsvd1 = child;
> > +                             err = __gem_execbuf(i915, &execbuf);
> > +                             if (err)
> > +                                     break;
> > +                     }
> 
> What are the stop conditions in this test, with and without the 
> EXHAUST_LRC flag? It would be good to put that in a comment.

It runs until the kernel dies. The giveaway is meant to be the test name.
 
> Especially since AFAIR this one was causing OOM for me so might need to 
> be tweaked.

It runs until the kernel dies.

> > +
> > +                     count++;
> > +             }
> > +             gem_sync(i915, obj.handle);
> > +
> > +             igt_info("Created %lu shared contexts, before %d (%s)\n",
> > +                      count, err, strerror(-err));
> > +     }
> > +     close(i915);
> > +     igt_waitchildren();
> > +}
> > +
> > +static void exec_shared_gtt(int i915, unsigned int ring)
> > +{
> > +     const int gen = intel_gen(intel_get_drm_devid(i915));
> > +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +     struct drm_i915_gem_exec_object2 obj = {
> > +             .handle = gem_create(i915, 4096)
> > +     };
> > +     struct drm_i915_gem_execbuffer2 execbuf = {
> > +             .buffers_ptr = to_user_pointer(&obj),
> > +             .buffer_count = 1,
> > +             .flags = ring,
> > +     };
> > +     uint32_t scratch = obj.handle;
> > +     uint32_t batch[16];
> > +     int i;
> > +
> > +     gem_require_ring(i915, ring);
> > +     igt_require(gem_can_store_dword(i915, ring));
> > +
> > +     /* Load object into place in the GTT */
> > +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> > +     gem_execbuf(i915, &execbuf);
> > +
> > +     /* Presume nothing causes an eviction in the meantime */
> > +
> > +     obj.handle = gem_create(i915, 4096);
> > +
> > +     i = 0;
> > +     batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> > +     if (gen >= 8) {
> > +             batch[++i] = obj.offset;
> > +             batch[++i] = 0;
> > +     } else if (gen >= 4) {
> > +             batch[++i] = 0;
> > +             batch[++i] = obj.offset;
> > +     } else {
> > +             batch[i]--;
> > +             batch[++i] = obj.offset;
> > +     }
> > +     batch[++i] = 0xc0ffee;
> > +     batch[++i] = MI_BATCH_BUFFER_END;
> > +     gem_write(i915, obj.handle, 0, batch, sizeof(batch));
> > +
> > +     obj.offset += 4096; /* make sure we don't cause an eviction! */
> 
> Is 4k apart safe?

Since to change would imply an ABI break and I see no param indicating
an ABI change, and Joonas keeps on refusing to add such information.
 
> A short comment on how does this test work would be good.
> 
> > +     obj.flags |= EXEC_OBJECT_PINNED;
> > +     execbuf.rsvd1 = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
> > +     if (gen > 3 && gen < 6)
> > +             execbuf.flags |= I915_EXEC_SECURE;
> > +
> > +     gem_execbuf(i915, &execbuf);
> > +     gem_context_destroy(i915, execbuf.rsvd1);
> > +     gem_sync(i915, obj.handle); /* write hazard lies */
> > +     gem_close(i915, obj.handle);
> > +
> > +     gem_read(i915, scratch, 0, batch, sizeof(uint32_t));
> > +     gem_close(i915, scratch);
> > +
> > +     igt_assert_eq_u32(*batch, 0xc0ffee);
> > +}
> > +
> > +static int nop_sync(int i915, uint32_t ctx, unsigned int ring, int64_t timeout)
> > +{
> > +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +     struct drm_i915_gem_exec_object2 obj = {
> > +             .handle = gem_create(i915, 4096),
> > +     };
> > +     struct drm_i915_gem_execbuffer2 execbuf = {
> > +             .buffers_ptr = to_user_pointer(&obj),
> > +             .buffer_count = 1,
> > +             .flags = ring,
> > +             .rsvd1 = ctx,
> > +     };
> > +     int err;
> > +
> > +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> > +     gem_execbuf(i915, &execbuf);
> > +     err = gem_wait(i915, obj.handle, &timeout);
> > +     gem_close(i915, obj.handle);
> > +
> > +     return err;
> > +}
> > +
> > +static bool has_single_timeline(int i915)
> > +{
> > +     uint32_t ctx;
> > +
> > +     __gem_context_clone(i915, 0, 0,
> > +                         I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
> > +                         &ctx);
> > +     if (ctx)
> > +             gem_context_destroy(i915, ctx);
> > +
> > +     return ctx != 0;
> > +}
> > +
> > +static bool ignore_engine(unsigned engine)
> > +{
> > +     if (engine == 0)
> > +             return true;
> > +
> > +     if (engine == I915_EXEC_BSD)
> > +             return true;
> > +
> > +     return false;
> > +}
> > +
> > +static void single_timeline(int i915)
> > +{
> > +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +     struct drm_i915_gem_exec_object2 obj = {
> > +             .handle = gem_create(i915, 4096),
> > +     };
> > +     struct drm_i915_gem_execbuffer2 execbuf = {
> > +             .buffers_ptr = to_user_pointer(&obj),
> > +             .buffer_count = 1,
> > +     };
> > +     struct sync_fence_info rings[16];
> 
> Could use for_each_physical_engine to count the engines. But we probably 
> have plenty of this around the code base.
> 
> > +     struct sync_file_info sync_file_info = {
> > +             .num_fences = 1,
> > +     };
> > +     unsigned int engine;
> > +     int n;
> > +
> > +     igt_require(has_single_timeline(i915));
> > +
> > +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
> > +     gem_execbuf(i915, &execbuf);
> > +     gem_sync(i915, obj.handle);
> > +
> > +     /*
> > +      * For a "single timeline" context, each ring is on the common
> > +      * timeline, unlike a normal context where each ring has an
> > +      * independent timeline. That is no matter which engine we submit
> > +      * to, it reports the same timeline name and fence context. However,
> > +      * the fence context is not reported through the sync_fence_info.
> 
> Is the test useful then? There was one I reviewed earlier in this series 
> which tested for execution ordering, which sounds like is what's needed.

It is a variant. This one is a couple of years older. Both accomplish
similar things through very different means, the more the serendipitous.

> 
> > +      */
> > +     execbuf.rsvd1 =
> > +             gem_context_clone(i915, 0, 0,
> > +                               I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> > +     execbuf.flags = I915_EXEC_FENCE_OUT;
> > +     n = 0;
> > +     for_each_engine(i915, engine) {
> 
> for_each_physical_engine to align with Andi's work?

No, this would be an ABI iterator not a physical iterator.
 
> > +             gem_execbuf_wr(i915, &execbuf);
> > +             sync_file_info.sync_fence_info = to_user_pointer(&rings[n]);
> > +             do_ioctl(execbuf.rsvd2 >> 32, SYNC_IOC_FILE_INFO, &sync_file_info);
> > +             close(execbuf.rsvd2 >> 32);
> > +
> > +             igt_info("ring[%d] fence: %s %s\n",
> > +                      n, rings[n].driver_name, rings[n].obj_name);
> > +             n++;
> > +     }
> > +     gem_sync(i915, obj.handle);
> > +     gem_close(i915, obj.handle);
> > +
> > +     for (int i = 1; i < n; i++) {
> > +             igt_assert(!strcmp(rings[0].driver_name, rings[i].driver_name));
> > +             igt_assert(!strcmp(rings[0].obj_name, rings[i].obj_name));
> 
> What is in obj_name?

The timeline name. sync_file is plain old useless. The asserts here are
drivel.

> > +     }
> > +}
> > +
> > +static void exec_single_timeline(int i915, unsigned int ring)
> > +{
> > +     unsigned int other;
> > +     igt_spin_t *spin;
> > +     uint32_t ctx;
> > +
> > +     gem_require_ring(i915, ring);
> > +     igt_require(has_single_timeline(i915));
> > +
> > +     /*
> > +      * On an ordinary context, a blockage on one ring doesn't prevent
> > +      * execution on an other.
> > +      */
> > +     ctx = 0;
> > +     spin = NULL;
> > +     for_each_engine(i915, other) {
> 
> for_each_physical

Modern inventions.
 
> > +             if (other == ring || ignore_engine(other))
> > +                     continue;
> > +
> > +             if (spin == NULL) {
> > +                     spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
> > +             } else {
> > +                     struct drm_i915_gem_execbuffer2 execbuf = {
> > +                             .buffers_ptr = spin->execbuf.buffers_ptr,
> > +                             .buffer_count = spin->execbuf.buffer_count,
> > +                             .flags = other,
> > +                             .rsvd1 = ctx,
> > +                     };
> > +                     gem_execbuf(i915, &execbuf);
> > +             }
> > +     }
> > +     igt_require(spin);
> > +     igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), 0);
> > +     igt_spin_free(i915, spin);
> > +
> > +     /*
> > +      * But if we create a context with just a single shared timeline,
> > +      * then it will block waiting for the earlier requests on the
> > +      * other engines.
> > +      */
> > +     ctx = gem_context_clone(i915, 0, 0,
> > +                             I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> > +     spin = NULL;
> > +     for_each_engine(i915, other) {
> 
> Ditto.

Hey! Where was that when this code was written^W copied :-p
 
> > +             if (other == ring || ignore_engine(other))
> > +                     continue;
> > +
> > +             if (spin == NULL) {
> > +                     spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
> > +             } else {
> > +                     struct drm_i915_gem_execbuffer2 execbuf = {
> > +                             .buffers_ptr = spin->execbuf.buffers_ptr,
> > +                             .buffer_count = spin->execbuf.buffer_count,
> > +                             .flags = other,
> > +                             .rsvd1 = ctx,
> > +                     };
> > +                     gem_execbuf(i915, &execbuf);
> > +             }
> > +     }
> > +     igt_assert(spin);
> > +     igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), -ETIME);
> > +     igt_spin_free(i915, spin);
> > +}
> > +
> > +static void store_dword(int i915, uint32_t ctx, unsigned ring,
> > +                     uint32_t target, uint32_t offset, uint32_t value,
> > +                     uint32_t cork, unsigned write_domain)
> > +{
> > +     const int gen = intel_gen(intel_get_drm_devid(i915));
> > +     struct drm_i915_gem_exec_object2 obj[3];
> > +     struct drm_i915_gem_relocation_entry reloc;
> > +     struct drm_i915_gem_execbuffer2 execbuf;
> > +     uint32_t batch[16];
> > +     int i;
> > +
> > +     memset(&execbuf, 0, sizeof(execbuf));
> > +     execbuf.buffers_ptr = to_user_pointer(obj + !cork);
> > +     execbuf.buffer_count = 2 + !!cork;
> > +     execbuf.flags = ring;
> > +     if (gen < 6)
> > +             execbuf.flags |= I915_EXEC_SECURE;
> > +     execbuf.rsvd1 = ctx;
> > +
> > +     memset(obj, 0, sizeof(obj));
> > +     obj[0].handle = cork;
> > +     obj[1].handle = target;
> > +     obj[2].handle = gem_create(i915, 4096);
> > +
> > +     memset(&reloc, 0, sizeof(reloc));
> > +     reloc.target_handle = obj[1].handle;
> > +     reloc.presumed_offset = 0;
> > +     reloc.offset = sizeof(uint32_t);
> > +     reloc.delta = offset;
> > +     reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> > +     reloc.write_domain = write_domain;
> > +     obj[2].relocs_ptr = to_user_pointer(&reloc);
> > +     obj[2].relocation_count = 1;
> > +
> > +     i = 0;
> > +     batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> > +     if (gen >= 8) {
> > +             batch[++i] = offset;
> > +             batch[++i] = 0;
> > +     } else if (gen >= 4) {
> > +             batch[++i] = 0;
> > +             batch[++i] = offset;
> > +             reloc.offset += sizeof(uint32_t);
> > +     } else {
> > +             batch[i]--;
> > +             batch[++i] = offset;
> > +     }
> > +     batch[++i] = value;
> > +     batch[++i] = MI_BATCH_BUFFER_END;
> > +     gem_write(i915, obj[2].handle, 0, batch, sizeof(batch));
> > +     gem_execbuf(i915, &execbuf);
> > +     gem_close(i915, obj[2].handle);
> > +}
> > +
> > +static uint32_t create_highest_priority(int i915)
> > +{
> > +     uint32_t ctx = gem_context_create(i915);
> > +
> > +     /*
> > +      * If there is no priority support, all contexts will have equal
> > +      * priority (and therefore the max user priority), so no context
> > +      * can overtake us, and we effectively can form a plug.
> > +      */
> > +     __gem_context_set_priority(i915, ctx, MAX_PRIO);
> > +
> > +     return ctx;
> > +}
> > +
> > +static void unplug_show_queue(int i915, struct igt_cork *c, unsigned int engine)
> > +{
> > +     igt_spin_t *spin[MAX_ELSP_QLEN];
> 
> Why is this 16?

2x as big as the deepest known qlen. And 16 is that number that crops up
everywhere as a "just big enough number"
 
> > +
> > +     for (int n = 0; n < ARRAY_SIZE(spin); n++) {
> > +             const struct igt_spin_factory opts = {
> > +                     .ctx = create_highest_priority(i915),
> > +                     .engine = engine,
> > +             };
> > +             spin[n] = __igt_spin_factory(i915, &opts);
> > +             gem_context_destroy(i915, opts.ctx);
> > +     }
> > +
> > +     igt_cork_unplug(c); /* batches will now be queued on the engine */
> > +     igt_debugfs_dump(i915, "i915_engine_info");
> > +
> > +     for (int n = 0; n < ARRAY_SIZE(spin); n++)
> > +             igt_spin_free(i915, spin[n]);
> > +}
> > +
> > +static uint32_t store_timestamp(int i915,
> > +                             uint32_t ctx, unsigned ring,
> > +                             unsigned mmio_base)
> > +{
> > +     const bool r64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
> > +     struct drm_i915_gem_exec_object2 obj = {
> > +             .handle = gem_create(i915, 4096),
> > +             .relocation_count = 1,
> > +     };
> > +     struct drm_i915_gem_relocation_entry reloc = {
> > +             .target_handle = obj.handle,
> > +             .offset = 2 * sizeof(uint32_t),
> > +             .delta = 4092,
> > +             .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
> > +     };
> > +     struct drm_i915_gem_execbuffer2 execbuf = {
> > +             .buffers_ptr = to_user_pointer(&obj),
> > +             .buffer_count = 1,
> > +             .flags = ring,
> > +             .rsvd1 = ctx,
> > +     };
> > +     uint32_t batch[] = {
> > +             0x24 << 23 | (1 + r64b), /* SRM */
> > +             mmio_base + 0x358,
> > +             4092,
> > +             0,
> > +             MI_BATCH_BUFFER_END
> > +     };
> > +
> > +     igt_require(intel_gen(intel_get_drm_devid(i915)) >= 7);
> > +
> > +     gem_write(i915, obj.handle, 0, batch, sizeof(batch));
> > +     obj.relocs_ptr = to_user_pointer(&reloc);
> > +
> > +     gem_execbuf(i915, &execbuf);
> > +
> > +     return obj.handle;
> > +}
> > +
> > +static void independent(int i915, unsigned ring, unsigned flags)
> > +{
> > +     uint32_t handle[ARRAY_SIZE(priorities)];
> > +     igt_spin_t *spin[MAX_ELSP_QLEN];
> > +     unsigned int mmio_base;
> > +
> > +     /* XXX i915_query()! */
> > +     switch (ring) {
> > +     case I915_EXEC_DEFAULT:
> > +     case I915_EXEC_RENDER:
> > +             mmio_base = 0x2000;
> > +             break;
> > +#if 0
> > +     case I915_EXEC_BSD:
> > +             mmio_base = 0x12000;
> > +             break;
> > +#endif
> > +     case I915_EXEC_BLT:
> > +             mmio_base = 0x22000;
> > +             break;
> > +
> > +     case I915_EXEC_VEBOX:
> > +             if (intel_gen(intel_get_drm_devid(i915)) >= 11)
> > +                     mmio_base = 0x1d8000;
> > +             else
> > +                     mmio_base = 0x1a000;
> > +             break;
> > +
> > +     default:
> > +             igt_skip("mmio base not known\n");
> > +     }
> 
> Ufff this is quite questionable. Should we rather have this subtest in 
> selftests only?

We should be exporting this information. It is a non-privileged register
that is used by normal clients to measure elapsed time.

> > +
> > +     for (int n = 0; n < ARRAY_SIZE(spin); n++) {
> > +             const struct igt_spin_factory opts = {
> > +                     .ctx = create_highest_priority(i915),
> > +                     .engine = ring,
> > +             };
> > +             spin[n] = __igt_spin_factory(i915, &opts);
> > +             gem_context_destroy(i915, opts.ctx);
> > +     }
> > +
> > +     for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
> > +             uint32_t ctx = gem_queue_create(i915);
> > +             gem_context_set_priority(i915, ctx, priorities[i]);
> > +             handle[i] = store_timestamp(i915, ctx, ring, mmio_base);
> > +             gem_context_destroy(i915, ctx);
> > +     }
> > +
> > +     for (int n = 0; n < ARRAY_SIZE(spin); n++)
> > +             igt_spin_free(i915, spin[n]);
> > +
> > +     for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
> > +             uint32_t *ptr;
> > +
> > +             ptr = gem_mmap__gtt(i915, handle[i], 4096, PROT_READ);
> > +             gem_set_domain(i915, handle[i], /* no write hazard lies! */
> > +                            I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> > +             gem_close(i915, handle[i]);
> > +
> > +             handle[i] = ptr[1023];
> 
> 1023 relates to 4092 from store_timestamp I gather. The two need to be 
> defined closer together.
> 
> > +             munmap(ptr, 4096);
> > +
> > +             igt_debug("ctx[%d] .prio=%d, timestamp=%u\n",
> > +                       i, priorities[i], handle[i]);
> > +     }
> > +
> > +     igt_assert((int32_t)(handle[HI] - handle[LO]) < 0);
> > +}
> > +
> > +static void reorder(int i915, unsigned ring, unsigned flags)
> > +#define EQUAL 1
> > +{
> > +     IGT_CORK_HANDLE(cork);
> > +     uint32_t scratch;
> > +     uint32_t *ptr;
> > +     uint32_t ctx[2];
> > +     uint32_t plug;
> > +
> > +     ctx[LO] = gem_queue_create(i915);
> > +     gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
> > +
> > +     ctx[HI] = gem_queue_create(i915);
> > +     gem_context_set_priority(i915, ctx[HI], flags & EQUAL ? MIN_PRIO : 0);
> > +
> > +     scratch = gem_create(i915, 4096);
> > +     plug = igt_cork_plug(&cork, i915);
> > +
> > +     /* We expect the high priority context to be executed first, and
> > +      * so the final result will be value from the low priority context.
> > +      */
> > +     store_dword(i915, ctx[LO], ring, scratch, 0, ctx[LO], plug, 0);
> > +     store_dword(i915, ctx[HI], ring, scratch, 0, ctx[HI], plug, 0);
> > +
> > +     unplug_show_queue(i915, &cork, ring);
> > +     gem_close(i915, plug);
> > +
> > +     gem_context_destroy(i915, ctx[LO]);
> > +     gem_context_destroy(i915, ctx[HI]);
> > +
> > +     ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
> > +     gem_set_domain(i915, scratch, /* no write hazard lies! */
> > +                    I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> > +     gem_close(i915, scratch);
> > +
> > +     if (flags & EQUAL) /* equal priority, result will be fifo */
> > +             igt_assert_eq_u32(ptr[0], ctx[HI]);
> > +     else
> > +             igt_assert_eq_u32(ptr[0], ctx[LO]);
> > +     munmap(ptr, 4096);
> > +}
> > +
> > +static void promotion(int i915, unsigned ring)
> > +{
> > +     IGT_CORK_HANDLE(cork);
> > +     uint32_t result, dep;
> > +     uint32_t *ptr;
> > +     uint32_t ctx[3];
> > +     uint32_t plug;
> > +
> > +     ctx[LO] = gem_queue_create(i915);
> > +     gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
> > +
> > +     ctx[HI] = gem_queue_create(i915);
> > +     gem_context_set_priority(i915, ctx[HI], 0);
> > +
> > +     ctx[NOISE] = gem_queue_create(i915);
> > +     gem_context_set_priority(i915, ctx[NOISE], MIN_PRIO/2);
> > +
> > +     result = gem_create(i915, 4096);
> > +     dep = gem_create(i915, 4096);
> > +
> > +     plug = igt_cork_plug(&cork, i915);
> > +
> > +     /* Expect that HI promotes LO, so the order will be LO, HI, NOISE.
> > +      *
> > +      * fifo would be NOISE, LO, HI.
> > +      * strict priority would be  HI, NOISE, LO
> > +      */
> > +     store_dword(i915, ctx[NOISE], ring, result, 0, ctx[NOISE], plug, 0);
> > +     store_dword(i915, ctx[LO], ring, result, 0, ctx[LO], plug, 0);
> > +
> > +     /* link LO <-> HI via a dependency on another buffer */
> > +     store_dword(i915, ctx[LO], ring, dep, 0, ctx[LO], 0, I915_GEM_DOMAIN_INSTRUCTION);
> > +     store_dword(i915, ctx[HI], ring, dep, 0, ctx[HI], 0, 0);
> > +
> > +     store_dword(i915, ctx[HI], ring, result, 0, ctx[HI], 0, 0);
> > +
> > +     unplug_show_queue(i915, &cork, ring);
> > +     gem_close(i915, plug);
> > +
> > +     gem_context_destroy(i915, ctx[NOISE]);
> > +     gem_context_destroy(i915, ctx[LO]);
> > +     gem_context_destroy(i915, ctx[HI]);
> > +
> > +     ptr = gem_mmap__gtt(i915, dep, 4096, PROT_READ);
> > +     gem_set_domain(i915, dep, /* no write hazard lies! */
> > +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> > +     gem_close(i915, dep);
> > +
> > +     igt_assert_eq_u32(ptr[0], ctx[HI]);
> > +     munmap(ptr, 4096);
> > +
> > +     ptr = gem_mmap__gtt(i915, result, 4096, PROT_READ);
> > +     gem_set_domain(i915, result, /* no write hazard lies! */
> > +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> > +     gem_close(i915, result);
> > +
> > +     igt_assert_eq_u32(ptr[0], ctx[NOISE]);
> > +     munmap(ptr, 4096);
> > +}
> > +
> > +static void smoketest(int i915, unsigned ring, unsigned timeout)
> > +{
> > +     const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> > +     unsigned engines[16];
> > +     unsigned nengine;
> > +     unsigned engine;
> > +     uint32_t scratch;
> > +     uint32_t *ptr;
> > +
> > +     nengine = 0;
> > +     for_each_engine(i915, engine) {
> > +             if (ignore_engine(engine))
> > +                     continue;
> > +
> > +             engines[nengine++] = engine;
> > +     }
> > +     igt_require(nengine);
> 
> for_each_physical and counring the engines for engines array would be 
> better I think.
> 
> > +
> > +     scratch = gem_create(i915, 4096);
> > +     igt_fork(child, ncpus) {
> > +             unsigned long count = 0;
> > +             uint32_t ctx;
> > +
> > +             hars_petruska_f54_1_random_perturb(child);
> > +
> > +             ctx = gem_queue_create(i915);
> > +             igt_until_timeout(timeout) {
> > +                     int prio;
> > +
> > +                     prio = hars_petruska_f54_1_random_unsafe_max(MAX_PRIO - MIN_PRIO) + MIN_PRIO;
> > +                     gem_context_set_priority(i915, ctx, prio);
> > +
> > +                     engine = engines[hars_petruska_f54_1_random_unsafe_max(nengine)];
> > +                     store_dword(i915, ctx, engine, scratch,
> > +                                 8*child + 0, ~child,
> > +                                 0, 0);
> > +                     for (unsigned int step = 0; step < 8; step++)
> > +                             store_dword(i915, ctx, engine, scratch,
> > +                                         8*child + 4, count++,
> > +                                         0, 0);
> > +             }
> > +             gem_context_destroy(i915, ctx);
> > +     }
> > +     igt_waitchildren();
> > +
> > +     ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
> > +     gem_set_domain(i915, scratch, /* no write hazard lies! */
> > +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
> > +     gem_close(i915, scratch);
> > +
> > +     for (unsigned n = 0; n < ncpus; n++) {
> > +             igt_assert_eq_u32(ptr[2*n], ~n);
> > +             /*
> > +              * Note this count is approximate due to unconstrained
> > +              * ordering of the dword writes between engines.
> > +              *
> > +              * Take the result with a pinch of salt.
> > +              */
> > +             igt_info("Child[%d] completed %u cycles\n",  n, ptr[2*n+1]);
> > +     }
> > +     munmap(ptr, 4096);
> > +}
> > +
> > +igt_main
> > +{
> > +     const struct intel_execution_engine *e;
> > +     int i915 = -1;
> > +
> > +     igt_fixture {
> > +             i915 = drm_open_driver(DRIVER_INTEL);
> > +             igt_require_gem(i915);
> > +     }
> > +
> > +     igt_subtest_group {
> > +             igt_fixture {
> > +                     igt_require(gem_contexts_has_shared_gtt(i915));
> > +                     igt_fork_hang_detector(i915);
> > +             }
> > +
> > +             igt_subtest("create-shared-gtt")
> > +                     create_shared_gtt(i915, 0);
> > +
> > +             igt_subtest("detached-shared-gtt")
> > +                     create_shared_gtt(i915, DETACHED);
> > +
> > +             igt_subtest("disjoint-timelines")
> > +                     disjoint_timelines(i915);
> > +
> > +             igt_subtest("single-timeline")
> > +                     single_timeline(i915);
> > +
> > +             igt_subtest("exhaust-shared-gtt")
> > +                     exhaust_shared_gtt(i915, 0);
> > +
> > +             igt_subtest("exhaust-shared-gtt-lrc")
> > +                     exhaust_shared_gtt(i915, EXHAUST_LRC);
> > +
> > +             for (e = intel_execution_engines; e->name; e++) {
> > +                     igt_subtest_f("exec-shared-gtt-%s", e->name)
> > +                             exec_shared_gtt(i915, e->exec_id | e->flags);
> 
> The same previously raised question on should it iterate the legacy 
> execbuf engines or physical engines. Maybe you won't different subtests 
> do both?

It should be testing the cross between the context and execbuf uABI, not
physical.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 10/16] i915/gem_exec_whisper: Fork all-engine tests one-per-engine
  2019-05-14 12:57     ` [igt-dev] " Tvrtko Ursulin
@ 2019-05-15 19:35       ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-15 19:35 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2019-05-14 13:57:26)
> 
> On 08/05/2019 11:09, Chris Wilson wrote:
> > Add a new mode for some more stress, submit the all-engines tests
> > simultaneously, a stream per engine.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   tests/i915/gem_exec_whisper.c | 27 ++++++++++++++++++++++-----
> >   1 file changed, 22 insertions(+), 5 deletions(-)
> > 
> > diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
> > index d3e0b0ba2..d5afc8119 100644
> > --- a/tests/i915/gem_exec_whisper.c
> > +++ b/tests/i915/gem_exec_whisper.c
> > @@ -88,6 +88,7 @@ static void verify_reloc(int fd, uint32_t handle,
> >   #define SYNC 0x40
> >   #define PRIORITY 0x80
> >   #define QUEUES 0x100
> > +#define ALL 0x200
> >   
> >   struct hang {
> >       struct drm_i915_gem_exec_object2 obj;
> > @@ -199,6 +200,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
> >       uint64_t old_offset;
> >       int i, n, loc;
> >       int debugfs;
> > +     int nchild;
> >   
> >       if (flags & PRIORITY) {
> >               igt_require(gem_scheduler_enabled(fd));
> > @@ -215,6 +217,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
> >                               engines[nengine++] = engine;
> >               }
> >       } else {
> > +             igt_assert(!(flags & ALL));
> >               igt_require(gem_has_ring(fd, engine));
> >               igt_require(gem_can_store_dword(fd, engine));
> >               engines[nengine++] = engine;
> > @@ -233,11 +236,22 @@ static void whisper(int fd, unsigned engine, unsigned flags)
> >       if (flags & HANG)
> >               init_hang(&hang);
> >   
> > +     nchild = 1;
> > +     if (flags & FORKED)
> > +             nchild *= sysconf(_SC_NPROCESSORS_ONLN);
> > +     if (flags & ALL)
> > +             nchild *= nengine;
> > +
> >       intel_detect_and_clear_missed_interrupts(fd);
> >       gpu_power_read(&power, &sample[0]);
> > -     igt_fork(child, flags & FORKED ? sysconf(_SC_NPROCESSORS_ONLN) : 1)  {
> > +     igt_fork(child, nchild) {
> >               unsigned int pass;
> >   
> > +             if (flags & ALL) {
> > +                     engines[0] = engines[child % nengine];
> 
> Relying on PIDs being sequential feels fragile but suggesting pipes or 
> shared memory would be overkill. How about another loop:

Where are you getting pid_t from? child is an integer [0, nchild).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 10/16] i915/gem_exec_whisper: Fork all-engine tests one-per-engine
@ 2019-05-15 19:35       ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-15 19:35 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2019-05-14 13:57:26)
> 
> On 08/05/2019 11:09, Chris Wilson wrote:
> > Add a new mode for some more stress, submit the all-engines tests
> > simultaneously, a stream per engine.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   tests/i915/gem_exec_whisper.c | 27 ++++++++++++++++++++++-----
> >   1 file changed, 22 insertions(+), 5 deletions(-)
> > 
> > diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
> > index d3e0b0ba2..d5afc8119 100644
> > --- a/tests/i915/gem_exec_whisper.c
> > +++ b/tests/i915/gem_exec_whisper.c
> > @@ -88,6 +88,7 @@ static void verify_reloc(int fd, uint32_t handle,
> >   #define SYNC 0x40
> >   #define PRIORITY 0x80
> >   #define QUEUES 0x100
> > +#define ALL 0x200
> >   
> >   struct hang {
> >       struct drm_i915_gem_exec_object2 obj;
> > @@ -199,6 +200,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
> >       uint64_t old_offset;
> >       int i, n, loc;
> >       int debugfs;
> > +     int nchild;
> >   
> >       if (flags & PRIORITY) {
> >               igt_require(gem_scheduler_enabled(fd));
> > @@ -215,6 +217,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
> >                               engines[nengine++] = engine;
> >               }
> >       } else {
> > +             igt_assert(!(flags & ALL));
> >               igt_require(gem_has_ring(fd, engine));
> >               igt_require(gem_can_store_dword(fd, engine));
> >               engines[nengine++] = engine;
> > @@ -233,11 +236,22 @@ static void whisper(int fd, unsigned engine, unsigned flags)
> >       if (flags & HANG)
> >               init_hang(&hang);
> >   
> > +     nchild = 1;
> > +     if (flags & FORKED)
> > +             nchild *= sysconf(_SC_NPROCESSORS_ONLN);
> > +     if (flags & ALL)
> > +             nchild *= nengine;
> > +
> >       intel_detect_and_clear_missed_interrupts(fd);
> >       gpu_power_read(&power, &sample[0]);
> > -     igt_fork(child, flags & FORKED ? sysconf(_SC_NPROCESSORS_ONLN) : 1)  {
> > +     igt_fork(child, nchild) {
> >               unsigned int pass;
> >   
> > +             if (flags & ALL) {
> > +                     engines[0] = engines[child % nengine];
> 
> Relying on PIDs being sequential feels fragile but suggesting pipes or 
> shared memory would be overkill. How about another loop:

Where are you getting pid_t from? child is an integer [0, nchild).
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 13/16] i915: Add gem_exec_balancer
  2019-05-15 10:49     ` [igt-dev] " Tvrtko Ursulin
@ 2019-05-15 19:50       ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-15 19:50 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2019-05-15 11:49:45)
> 
> On 08/05/2019 11:09, Chris Wilson wrote:
> > Exercise the in-kernel load balancer checking that we can distribute
> > batches across the set of ctx->engines to avoid load.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   tests/Makefile.am              |    1 +
> >   tests/Makefile.sources         |    1 +
> >   tests/i915/gem_exec_balancer.c | 1050 ++++++++++++++++++++++++++++++++
> >   tests/meson.build              |    7 +
> >   4 files changed, 1059 insertions(+)
> >   create mode 100644 tests/i915/gem_exec_balancer.c
> > 
> > diff --git a/tests/Makefile.am b/tests/Makefile.am
> > index 5097debf6..c6af0aeaf 100644
> > --- a/tests/Makefile.am
> > +++ b/tests/Makefile.am
> > @@ -96,6 +96,7 @@ gem_close_race_LDADD = $(LDADD) -lpthread
> >   gem_ctx_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
> >   gem_ctx_thrash_LDADD = $(LDADD) -lpthread
> >   gem_ctx_sseu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
> > +i915_gem_exec_balancer_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
> >   gem_exec_capture_LDADD = $(LDADD) -lz
> >   gem_exec_parallel_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
> >   gem_exec_parallel_LDADD = $(LDADD) -lpthread
> > diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> > index e7ee27e81..323b625aa 100644
> > --- a/tests/Makefile.sources
> > +++ b/tests/Makefile.sources
> > @@ -24,6 +24,7 @@ TESTS_progs = \
> >       i915/gem_ctx_clone \
> >       i915/gem_ctx_engines \
> >       i915/gem_ctx_shared \
> > +     i915/gem_exec_balancer \
> >       i915/gem_vm_create \
> >       kms_3d \
> >       kms_addfb_basic \
> > diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
> > new file mode 100644
> > index 000000000..25195d478
> > --- /dev/null
> > +++ b/tests/i915/gem_exec_balancer.c
> > @@ -0,0 +1,1050 @@
> > +/*
> > + * Copyright © 2018 Intel Corporation
> 
> 2019 I guess, even though work was started in 2018?
> 
> > + *
> > + * Permission is hereby granted, free of charge, to any person obtaining a
> > + * copy of this software and associated documentation files (the "Software"),
> > + * to deal in the Software without restriction, including without limitation
> > + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the next
> > + * paragraph) shall be included in all copies or substantial portions of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> > + * IN THE SOFTWARE.
> > + */
> > +
> > +#include <sched.h>
> > +
> > +#include "igt.h"
> > +#include "igt_perf.h"
> > +#include "i915/gem_ring.h"
> > +#include "sw_sync.h"
> > +
> > +IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
> > +
> > +#define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
> 
> Hmm.. this is a strange surrogate but I guess it works.
> 
> > +
> > +static bool has_class_instance(int i915, uint16_t class, uint16_t instance)
> > +{
> > +     int fd;
> > +
> > +     fd = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance));
> 
> More work for Andi to replace with real engine discovery. :)
> 
> > +     if (fd != -1) {
> > +             close(fd);
> > +             return true;
> > +     }
> > +
> > +     return false;
> > +}
> > +
> > +static struct i915_engine_class_instance *
> > +list_engines(int i915, uint32_t class_mask, unsigned int *out)
> > +{
> > +     unsigned int count = 0, size = 64;
> > +     struct i915_engine_class_instance *engines;
> > +
> > +     engines = malloc(size * sizeof(*engines));
> > +     if (!engines) {
> > +             *out = 0;
> > +             return NULL;
> > +     }
> > +
> > +     for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER;
> > +          class_mask;
> > +          class++, class_mask >>= 1) {
> > +             if (!(class_mask & 1))
> > +                     continue;
> > +
> > +             for (unsigned int instance = 0;
> > +                  instance < INSTANCE_COUNT;
> > +                  instance++) {
> > +                  if (!has_class_instance(i915, class, instance))
> > +                          continue;
> > +
> > +                     if (count == size) {
> > +                             struct i915_engine_class_instance *e;
> > +
> > +                             size *= 2;
> > +                             e = realloc(engines, size*sizeof(*engines));
> > +                             if (!e) {
> 
> I'd just assert. On malloc as well.
> 
> > +                                     *out = count;
> > +                                     return engines;
> > +                             }
> > +
> > +                             engines = e;
> > +                     }
> > +
> > +                     engines[count++] = (struct i915_engine_class_instance){
> > +                             .engine_class = class,
> > +                             .engine_instance = instance,
> > +                     };
> > +             }
> > +     }
> > +
> > +     if (!count) {
> > +             free(engines);
> > +             engines = NULL;
> > +     }
> > +
> > +     *out = count;
> > +     return engines;
> > +}
> > +
> > +static int __set_load_balancer(int i915, uint32_t ctx,
> > +                            const struct i915_engine_class_instance *ci,
> > +                            unsigned int count)
> > +{
> > +     I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, count);
> > +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1 + count);
> > +     struct drm_i915_gem_context_param p = {
> > +             .ctx_id = ctx,
> > +             .param = I915_CONTEXT_PARAM_ENGINES,
> > +             .size = sizeof(engines),
> > +             .value = to_user_pointer(&engines)
> > +     };
> > +
> > +     memset(&balancer, 0, sizeof(balancer));
> > +     balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> > +
> > +     igt_assert(count);
> > +     balancer.num_siblings = count;
> > +     memcpy(balancer.engines, ci, count * sizeof(*ci));
> > +
> > +     memset(&engines, 0, sizeof(engines));
> > +     engines.extensions = to_user_pointer(&balancer);
> > +     engines.engines[0].engine_class =
> > +             I915_ENGINE_CLASS_INVALID;
> > +     engines.engines[0].engine_instance =
> > +             I915_ENGINE_CLASS_INVALID_NONE;
> > +     memcpy(engines.engines + 1, ci, count * sizeof(*ci));
> > +
> > +     return __gem_context_set_param(i915, &p);
> > +}
> > +
> > +static void set_load_balancer(int i915, uint32_t ctx,
> > +                           const struct i915_engine_class_instance *ci,
> > +                           unsigned int count)
> > +{
> > +     igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0);
> > +}
> > +
> > +static uint32_t load_balancer_create(int i915,
> > +                                  const struct i915_engine_class_instance *ci,
> > +                                  unsigned int count)
> > +{
> > +     uint32_t ctx;
> > +
> > +     ctx = gem_context_create(i915);
> > +     set_load_balancer(i915, ctx, ci, count);
> > +
> > +     return ctx;
> > +}
> > +
> > +static uint32_t __batch_create(int i915, uint32_t offset)
> > +{
> > +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +     uint32_t handle;
> > +
> > +     handle = gem_create(i915, ALIGN(offset + 4, 4096));
> > +     gem_write(i915, handle, offset, &bbe, sizeof(bbe));
> > +
> > +     return handle;
> > +}
> > +
> > +static uint32_t batch_create(int i915)
> > +{
> > +     return __batch_create(i915, 0);
> > +}
> > +
> > +static void invalid_balancer(int i915)
> > +{
> > +     I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, 64);
> > +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 64);
> > +     struct drm_i915_gem_context_param p = {
> > +             .param = I915_CONTEXT_PARAM_ENGINES,
> > +             .value = to_user_pointer(&engines)
> > +     };
> > +     uint32_t handle;
> > +     void *ptr;
> > +
> > +     /*
> > +      * Assume that I915_CONTEXT_PARAM_ENGINE validates the array
> > +      * of engines[], our job is to determine if the load_balancer
> > +      * extension explodes.
> > +      */
> > +
> > +     for (int class = 0; class < 32; class++) {
> > +             struct i915_engine_class_instance *ci;
> > +             unsigned int count;
> > +
> > +             ci = list_engines(i915, 1 << class, &count);
> > +             if (!ci)
> > +                     continue;
> > +
> > +             igt_debug("Found %d engines\n", count);
> > +             igt_assert_lte(count, 64);
> 
> Hey.. you always say trust the kernel! ;)

This code was placeholder that you said you would replace by a proper
query api...

> 
> > +
> > +             p.ctx_id = gem_context_create(i915);
> > +             p.size = (sizeof(struct i915_context_param_engines) +
> > +                             (count + 1) * sizeof(*engines.engines));
> 
> Alignment looks off.
> 
> > +
> > +             memset(&engines, 0, sizeof(engines));
> > +             engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
> > +             engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
> > +             memcpy(engines.engines + 1, ci, count * sizeof(*ci));
> > +             gem_context_set_param(i915, &p);
> > +
> > +             engines.extensions = -1ull;
> > +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +
> > +             engines.extensions = 1ull;
> > +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +
> > +             memset(&balancer, 0, sizeof(balancer));
> > +             balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> > +             balancer.num_siblings = count;
> > +             memcpy(balancer.engines, ci, count * sizeof(*ci));
> > +
> > +             engines.extensions = to_user_pointer(&balancer);
> > +             gem_context_set_param(i915, &p);
> > +
> > +             balancer.engine_index = 1;
> > +             igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
> > +
> > +             balancer.engine_index = count;
> > +             igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
> > +
> > +             balancer.engine_index = count + 1;
> > +             igt_assert_eq(__gem_context_set_param(i915, &p), -EINVAL);
> > +
> > +             balancer.engine_index = 0;
> > +             gem_context_set_param(i915, &p);
> > +
> > +             balancer.base.next_extension = to_user_pointer(&balancer);
> > +             igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
> > +
> > +             balancer.base.next_extension = -1ull;
> > +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +
> > +             handle = gem_create(i915, 4096 * 3);
> > +             ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE);
> > +             gem_close(i915, handle);
> > +
> > +             memset(&engines, 0, sizeof(engines));
> > +             engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
> > +             engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
> > +             engines.engines[1].engine_class = I915_ENGINE_CLASS_INVALID;
> > +             engines.engines[1].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
> > +             memcpy(engines.engines + 2, ci, count * sizeof(ci));
> > +             p.size = (sizeof(struct i915_context_param_engines) +
> > +                             (count + 2) * sizeof(*engines.engines));
> 
> Alignment again.
> 
> > +             gem_context_set_param(i915, &p);
> > +
> > +             balancer.base.next_extension = 0;
> > +             balancer.engine_index = 1;
> > +             engines.extensions = to_user_pointer(&balancer);
> > +             gem_context_set_param(i915, &p);
> > +
> > +             memcpy(ptr + 4096 - 8, &balancer, sizeof(balancer));
> > +             memcpy(ptr + 8192 - 8, &balancer, sizeof(balancer));
> > +             balancer.engine_index = 0;
> > +
> > +             engines.extensions = to_user_pointer(ptr) + 4096 - 8;
> > +             gem_context_set_param(i915, &p);
> > +
> > +             balancer.base.next_extension = engines.extensions;
> > +             engines.extensions = to_user_pointer(&balancer);
> > +             gem_context_set_param(i915, &p);
> 
> mmap_gtt and unmapped area testing in one?

Neighbouring.

> > +             munmap(ptr, 4096); >+           igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +             engines.extensions = to_user_pointer(ptr) + 4096 - 8;
> > +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +
> > +             engines.extensions = to_user_pointer(ptr) + 8192 - 8;
> > +             gem_context_set_param(i915, &p);
> > +
> > +             balancer.base.next_extension = engines.extensions;
> > +             engines.extensions = to_user_pointer(&balancer);
> > +             gem_context_set_param(i915, &p);
> > +
> > +             munmap(ptr + 8192, 4096);
> > +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +             engines.extensions = to_user_pointer(ptr) + 8192 - 8;
> > +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +
> > +             munmap(ptr + 4096, 4096);
> > +
> > +             gem_context_destroy(i915, p.ctx_id);
> > +             free(ci);
> > +     }
> > +}
> > +
> > +static void kick_kthreads(int period_us)
> > +{
> > +     sched_yield();
> > +     usleep(period_us);
> 
> yield and sleep hm.. calling with zero period_us? Doesn't seem like it. 
> So what's it about?

Historically yield may have been a no-op, but sleep(0) actually yielded.

> > +}
> > +
> > +static double measure_load(int pmu, int period_us)
> > +{
> > +     uint64_t data[2];
> > +     uint64_t d_t, d_v;
> > +
> > +     kick_kthreads(period_us);
> > +
> > +     igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> > +     d_v = -data[0];
> > +     d_t = -data[1];
> > +
> > +     usleep(period_us);
> > +
> > +     igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> > +     d_v += data[0];
> > +     d_t += data[1];
> 
> This -val + val trick with uint64_t works?

Yes, unsigned overflow is defined.

> 
> > +
> > +     return d_v / (double)d_t;
> > +}
> > +
> > +static double measure_min_load(int pmu, unsigned int num, int period_us)
> > +{
> > +     uint64_t data[2 + num];
> > +     uint64_t d_t, d_v[num];
> > +     uint64_t min = -1, max = 0;
> > +
> > +     kick_kthreads(period_us);
> > +
> > +     igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> > +     for (unsigned int n = 0; n < num; n++)
> > +             d_v[n] = -data[2 + n];
> > +     d_t = -data[1];
> > +
> > +     usleep(period_us);
> > +
> > +     igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> > +
> > +     d_t += data[1];
> > +     for (unsigned int n = 0; n < num; n++) {
> > +             d_v[n] += data[2 + n];
> > +             igt_debug("engine[%d]: %.1f%%\n",
> > +                       n, d_v[n] / (double)d_t * 100);
> > +             if (d_v[n] < min)
> > +                     min = d_v[n];
> > +             if (d_v[n] > max)
> > +                     max = d_v[n];
> > +     }
> > +
> > +     igt_debug("elapsed: %"PRIu64"ns, load [%.1f, %.1f]%%\n",
> > +               d_t, min / (double)d_t * 100,  max / (double)d_t * 100);
> > +
> > +     return min / (double)d_t;
> > +}
> > +
> > +static void check_individual_engine(int i915,
> > +                                 uint32_t ctx,
> > +                                 const struct i915_engine_class_instance *ci,
> > +                                 int idx)
> > +{
> > +     igt_spin_t *spin;
> > +     double load;
> > +     int pmu;
> > +
> > +     pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(ci[idx].engine_class,
> > +                                               ci[idx].engine_instance));
> > +
> > +     spin = igt_spin_new(i915, .ctx = ctx, .engine = idx + 1);
> > +     load = measure_load(pmu, 10000);
> 
> Hm usleep before start of measuring and between two samples is the same. 
> The one before should be fixed I think, no?

Could be, that would require thought as to what the appropriate period
for kicking should be. Yay for ksoftirqd.

> > +     igt_spin_free(i915, spin);
> > +
> > +     close(pmu);
> > +
> > +     igt_assert_f(load > 0.90,
> > +                  "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
> > +                  idx, ci[idx].engine_class, ci[idx].engine_instance, load*100);
> > +}
> > +
> > +static void individual(int i915)
> > +{
> > +     uint32_t ctx;
> > +
> > +     /*
> > +      * I915_CONTEXT_PARAM_ENGINE allows us to index into the user
> > +      * supplied array from gem_execbuf(). Our check is to build the
> > +      * ctx->engine[] with various different engine classes, feed in
> > +      * a spinner and then ask pmu to confirm it the expected engine
> > +      * was busy.
> > +      */
> > +
> > +     ctx = gem_context_create(i915);
> > +
> > +     for (int mask = 0; mask < 32; mask++) {
> > +             struct i915_engine_class_instance *ci;
> > +             unsigned int count;
> > +
> > +             ci = list_engines(i915, 1u << mask, &count);
> > +             if (!ci)
> > +                     continue;
> > +
> > +             igt_debug("Found %d engines of class %d\n", count, mask);
> > +
> > +             for (int pass = 0; pass < count; pass++) { /* approx. count! */
> > +                     igt_permute_array(ci, count, igt_exchange_int64);
> 
> struct i915_engine_class_instance is four bytes long, so swap func looks 
> wrong. Unless for some reason you want to swap in blocks of two. Don't 
> know. Last index would grab into random memory though. I must be missing 
> something or it wouldn't have worked..

Once upon a time class_instance was 2xu32.

> 
> > +                     set_load_balancer(i915, ctx, ci, count);
> > +                     for (unsigned int n = 0; n < count; n++)
> > +                             check_individual_engine(i915, ctx, ci, n);
> > +             }
> > +
> > +             free(ci);
> > +     }
> > +
> > +     gem_context_destroy(i915, ctx);
> > +     gem_quiescent_gpu(i915);
> > +}
> > +
> > +static void indicies(int i915)
> 
> indices?
> 
> > +{
> > +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
> > +     struct drm_i915_gem_context_param p = {
> > +             .ctx_id = gem_context_create(i915),
> > +             .param = I915_CONTEXT_PARAM_ENGINES,
> > +             .value = to_user_pointer(&engines)
> > +     };
> > +
> > +     struct drm_i915_gem_exec_object2 batch = {
> > +             .handle = batch_create(i915),
> > +     };
> > +
> > +     unsigned int nengines = 0;
> > +     void *balancers = NULL;
> > +
> > +     /*
> > +      * We can populate our engine map with multiple virtual engines.
> > +      * Do so.
> > +      */
> > +
> > +     for (int class = 0; class < 32; class++) {
> > +             struct i915_engine_class_instance *ci;
> > +             unsigned int count;
> > +
> > +             ci = list_engines(i915, 1u << class, &count);
> > +             if (!ci)
> > +                     continue;
> > +
> > +             igt_debug("Found %d engines of class %d\n", count, class);
> 
> Maybe this debug message should go into list_engines, since it seems 
> repeated a few times already.

Or remove the debug, I hear you.

> > +
> > +             for (int n = 0; n < count; n++) {
> > +                     I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(*balancer,
> > +                                                              count);
> > +
> > +                     engines.engines[nengines].engine_class =
> > +                             I915_ENGINE_CLASS_INVALID;
> > +                     engines.engines[nengines].engine_instance =
> > +                             I915_ENGINE_CLASS_INVALID_NONE;
> > +
> > +                     balancer = calloc(sizeof(*balancer), 1);
> > +                     igt_assert(balancer);
> > +
> > +                     balancer->base.name =
> > +                             I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> > +                     balancer->base.next_extension =
> > +                             to_user_pointer(balancers);
> > +                     balancers = balancer;
> > +
> > +                     balancer->engine_index = nengines++;
> > +                     balancer->num_siblings = count;
> > +
> > +                     memcpy(balancer->engines,
> > +                            ci, count * sizeof(*ci));
> > +             }
> > +             free(ci);
> > +     }
> > +
> > +     igt_require(balancers);
> > +     engines.extensions = to_user_pointer(balancers);
> > +     p.size = (sizeof(struct i915_engine_class_instance) * nengines +
> > +               sizeof(struct i915_context_param_engines));
> > +     gem_context_set_param(i915, &p);
> > +
> > +     for (unsigned int n = 0; n < nengines; n++) {
> > +             struct drm_i915_gem_execbuffer2 eb = {
> > +                     .buffers_ptr = to_user_pointer(&batch),
> > +                     .buffer_count = 1,
> > +                     .flags = n,
> > +                     .rsvd1 = p.ctx_id,
> > +             };
> > +             igt_debug("Executing on index=%d\n", n);
> > +             gem_execbuf(i915, &eb);
> > +     }
> > +     gem_context_destroy(i915, p.ctx_id);
> > +
> > +     gem_sync(i915, batch.handle);
> > +     gem_close(i915, batch.handle);
> > +
> > +     while (balancers) {
> > +             struct i915_context_engines_load_balance *b, *n;
> > +
> > +             b = balancers;
> > +             n = from_user_pointer(b->base.next_extension);
> > +             free(b);
> > +
> > +             balancers = n;
> > +     }
> > +
> > +     gem_quiescent_gpu(i915);
> > +}
> > +
> > +static void busy(int i915)
> > +{
> > +     uint32_t scratch = gem_create(i915, 4096);
> > +
> > +     /*
> > +      * Check that virtual engines are reported via GEM_BUSY.
> > +      *
> > +      * When running, the batch will be on the real engine and report
> > +      * the actual class.
> > +      *
> > +      * Prior to running, if the load-balancer is across multiple
> > +      * classes we don't know which engine the batch will
> > +      * execute on, so we report them all!
> > +      *
> > +      * However, as we only support (and test) creating a load-balancer
> > +      * from engines of only one class, that can be propagated accurately
> > +      * through to GEM_BUSY.
> > +      */
> > +
> > +     for (int class = 0; class < 16; class++) {
> > +             struct drm_i915_gem_busy busy;
> > +             struct i915_engine_class_instance *ci;
> > +             unsigned int count;
> > +             igt_spin_t *spin[2];
> > +             uint32_t ctx;
> > +
> > +             ci = list_engines(i915, 1u << class, &count);
> > +             if (!ci)
> > +                     continue;
> > +
> > +             igt_debug("Found %d engines of class %d\n", count, class);
> > +             ctx = load_balancer_create(i915, ci, count);
> > +             free(ci);
> > +
> > +             spin[0] = __igt_spin_new(i915,
> > +                                      .ctx = ctx,
> > +                                      .flags = IGT_SPIN_POLL_RUN);
> > +             spin[1] = __igt_spin_new(i915,
> > +                                      .ctx = ctx,
> > +                                      .dependency = scratch);
> > +
> > +             igt_spin_busywait_until_started(spin[0]);
> > +
> > +             /* Running: actual class */
> > +             busy.handle = spin[0]->handle;
> > +             do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
> > +             igt_assert_eq_u32(busy.busy, 1u << (class + 16));
> > +
> > +             /* Queued(read): expected class */
> > +             busy.handle = spin[1]->handle;
> > +             do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
> > +             igt_assert_eq_u32(busy.busy, 1u << (class + 16));
> > +
> > +             /* Queued(write): expected class */
> > +             busy.handle = scratch;
> > +             do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
> > +             igt_assert_eq_u32(busy.busy,
> > +                               (1u << (class + 16)) | (class + 1));
> > +
> > +             igt_spin_free(i915, spin[1]);
> > +             igt_spin_free(i915, spin[0]);
> > +
> > +             gem_context_destroy(i915, ctx);
> > +     }
> > +
> > +     gem_close(i915, scratch);
> > +     gem_quiescent_gpu(i915);
> > +}
> > +
> > +static int add_pmu(int pmu, const struct i915_engine_class_instance *ci)
> > +{
> > +     return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->engine_class,
> > +                                                      ci->engine_instance),
> > +                                 pmu);
> > +}
> > +
> > +static void full(int i915, unsigned int flags)
> > +#define PULSE 0x1
> > +#define LATE 0x2
> > +{
> > +     struct drm_i915_gem_exec_object2 batch = {
> > +             .handle = batch_create(i915),
> > +     };
> > +
> > +     if (flags & LATE)
> > +             igt_require_sw_sync();
> > +
> > +     /*
> > +      * I915_CONTEXT_PARAM_ENGINE changes the meaning of I915_EXEC_DEFAULT
> > +      * to provide an automatic selection from the ctx->engine[]. It
> > +      * employs load-balancing to evenly distribute the workload the
> 
> The leading section needs rewritting for truth. It is the load balance 
> extensions which _can_ redefine the meanign of I915_EXEC_DEFAULT etc.. 
> I'm sure I didn't need to explain, but have just to make it clear which 
> part I am complaining about. :)

Hey, remember this is 2018!

> > +      * array. If we submit N spinners, we expect them to be simultaneously
> > +      * running across N engines and use PMU to confirm that the entire
> > +      * set of engines are busy.
> 
> Clarify it is only if using N contexts.
> 
> > +      *
> > +      * We complicate matters by interpersing shortlived tasks to challenge
> > +      * the kernel to search for space in which to insert new batches.
> > +      */
> > +
> > +
> > +     for (int mask = 0; mask < 32; mask++) {
> > +             struct i915_engine_class_instance *ci;
> > +             igt_spin_t *spin = NULL;
> > +             IGT_CORK_FENCE(cork);
> > +             unsigned int count;
> > +             double load;
> > +             int fence = -1;
> > +             int *pmu;
> > +
> > +             ci = list_engines(i915, 1u << mask, &count);
> > +             if (!ci)
> > +                     continue;
> > +
> > +             igt_debug("Found %d engines of class %d\n", count, mask);
> > +
> > +             pmu = malloc(sizeof(*pmu) * count);
> > +             igt_assert(pmu);
> > +
> > +             if (flags & LATE)
> > +                     fence = igt_cork_plug(&cork, i915);
> > +
> > +             pmu[0] = -1;
> > +             for (unsigned int n = 0; n < count; n++) {
> > +                     uint32_t ctx;
> > +
> > +                     pmu[n] = add_pmu(pmu[0], &ci[n]);
> > +
> > +                     if (flags & PULSE) {
> > +                             struct drm_i915_gem_execbuffer2 eb = {
> > +                                     .buffers_ptr = to_user_pointer(&batch),
> > +                                     .buffer_count = 1,
> > +                                     .rsvd2 = fence,
> > +                                     .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
> > +                             };
> > +                             gem_execbuf(i915, &eb);
> > +                     }
> > +
> > +                     /*
> > +                      * Each spinner needs to be one a new timeline,
> > +                      * otherwise they will just sit in the single queue
> > +                      * and not run concurrently.
> > +                      */
> > +                     ctx = load_balancer_create(i915, ci, count);
> > +
> > +                     if (spin == NULL) {
> > +                             spin = __igt_spin_new(i915, .ctx = ctx);
> > +                     } else {
> > +                             struct drm_i915_gem_execbuffer2 eb = {
> > +                                     .buffers_ptr = spin->execbuf.buffers_ptr,
> > +                                     .buffer_count = spin->execbuf.buffer_count,
> > +                                     .rsvd1 = ctx,
> > +                                     .rsvd2 = fence,
> > +                                     .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
> > +                             };
> > +                             gem_execbuf(i915, &eb);
> > +                     }
> > +
> > +                     gem_context_destroy(i915, ctx);
> > +             }
> > +
> > +             if (flags & LATE) {
> > +                     igt_cork_unplug(&cork);
> > +                     close(fence);
> > +             }
> > +
> > +             load = measure_min_load(pmu[0], count, 10000);
> > +             igt_spin_free(i915, spin);
> > +
> > +             close(pmu[0]);
> > +             free(pmu);
> > +
> > +             free(ci);
> > +
> > +             igt_assert_f(load > 0.90,
> > +                          "minimum load for %d x class:%d was found to be only %.1f%% busy\n",
> > +                          count, mask, load*100);
> > +             gem_quiescent_gpu(i915);
> > +     }
> > +
> > +     gem_close(i915, batch.handle);
> > +     gem_quiescent_gpu(i915);
> > +}
> > +
> > +static void nop(int i915)
> > +{
> > +     struct drm_i915_gem_exec_object2 batch = {
> > +             .handle = batch_create(i915),
> > +     };
> > +
> > +     for (int mask = 0; mask < 32; mask++) {
> 
> s/mask/class/
> 
> > +             struct i915_engine_class_instance *ci;
> > +             unsigned int count;
> > +             uint32_t ctx;
> > +
> > +             ci = list_engines(i915, 1u << mask, &count);
> > +             if (!ci)
> > +                     continue;
> > +
> > +             if (count < 2) {
> > +                     free(ci);
> > +                     continue;
> 
> Benchamrk only subtest for real veng?

Sure, that's a bit of internal knowledge leaking.

> > +             }
> > +
> > +             igt_debug("Found %d engines of class %d\n", count, mask);
> > +             ctx = load_balancer_create(i915, ci, count);
> > +
> > +             for (int n = 0; n < count; n++) {
> > +                     struct drm_i915_gem_execbuffer2 execbuf = {
> > +                             .buffers_ptr = to_user_pointer(&batch),
> > +                             .buffer_count = 1,
> > +                             .flags = n + 1,
> > +                             .rsvd1 = ctx,
> > +                     };
> > +                     struct timespec tv = {};
> > +                     unsigned long nops;
> > +                     double t;
> > +
> > +                     igt_nsec_elapsed(&tv);
> > +                     nops = 0;
> > +                     do {
> > +                             for (int r = 0; r < 1024; r++)
> > +                                     gem_execbuf(i915, &execbuf);
> > +                             nops += 1024;
> > +                     } while (igt_seconds_elapsed(&tv) < 2);
> > +                     gem_sync(i915, batch.handle);
> > +
> > +                     t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
> > +                     igt_info("%x:%d %.3fus\n", mask, n, t);
> 
> Class in decimal is better I think.

But it's mask :-p

It's treated as just a number and not as a class identifier.

> And some descriptive labels to info messages would be good. Like 
> "individual engines", "virtual engine" etc.

It does describe the individual engines and their composites. The output
looks clear and concise. You may want mask translated to a string... but
this code is oblivious as to what mask actually is.

The way it is used definitely looks more like mask than class.

> > +             }
> > +
> > +             {
> > +                     struct drm_i915_gem_execbuffer2 execbuf = {
> > +                             .buffers_ptr = to_user_pointer(&batch),
> > +                             .buffer_count = 1,
> > +                             .rsvd1 = ctx,
> > +                     };
> > +                     struct timespec tv = {};
> > +                     unsigned long nops;
> > +                     double t;
> > +
> > +                     igt_nsec_elapsed(&tv);
> > +                     nops = 0;
> > +                     do {
> > +                             for (int r = 0; r < 1024; r++)
> > +                                     gem_execbuf(i915, &execbuf);
> > +                             nops += 1024;
> > +                     } while (igt_seconds_elapsed(&tv) < 2);
> > +                     gem_sync(i915, batch.handle);
> > +
> > +                     t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
> > +                     igt_info("%x:* %.3fus\n", mask, t);
> > +             }
> > +
> > +
> > +             igt_fork(child, count) {
> > +                     struct drm_i915_gem_execbuffer2 execbuf = {
> > +                             .buffers_ptr = to_user_pointer(&batch),
> > +                             .buffer_count = 1,
> > +                             .flags = child + 1,
> > +                             .rsvd1 = gem_context_clone(i915, ctx,
> > +                                                        I915_CONTEXT_CLONE_ENGINES, 0),
> > +                     };
> > +                     struct timespec tv = {};
> > +                     unsigned long nops;
> > +                     double t;
> > +
> > +                     igt_nsec_elapsed(&tv);
> > +                     nops = 0;
> > +                     do {
> > +                             for (int r = 0; r < 1024; r++)
> > +                                     gem_execbuf(i915, &execbuf);
> > +                             nops += 1024;
> > +                     } while (igt_seconds_elapsed(&tv) < 2);
> > +                     gem_sync(i915, batch.handle);
> > +
> > +                     t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
> > +                     igt_info("[%d] %x:%d %.3fus\n", child, mask, child, t);
> > +
> > +                     memset(&tv, 0, sizeof(tv));
> > +                     execbuf.flags = 0;
> > +
> > +                     igt_nsec_elapsed(&tv);
> > +                     nops = 0;
> > +                     do {
> > +                             for (int r = 0; r < 1024; r++)
> > +                                     gem_execbuf(i915, &execbuf);
> > +                             nops += 1024;
> > +                     } while (igt_seconds_elapsed(&tv) < 2);
> > +                     gem_sync(i915, batch.handle);
> > +
> > +                     t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
> > +                     igt_info("[%d] %x:* %.3fus\n", child, mask, t);
> > +
> > +                     gem_context_destroy(i915, execbuf.rsvd1);
> > +             }
> > +
> > +             igt_waitchildren();
> > +
> > +             gem_context_destroy(i915, ctx);
> > +             free(ci);
> > +     }
> > +
> > +     gem_close(i915, batch.handle);
> > +     gem_quiescent_gpu(i915);
> > +}
> > +
> > +static void ping(int i915, uint32_t ctx, unsigned int engine)
> > +{
> > +     struct drm_i915_gem_exec_object2 obj = {
> > +             .handle = batch_create(i915),
> > +     };
> > +     struct drm_i915_gem_execbuffer2 execbuf = {
> > +             .buffers_ptr = to_user_pointer(&obj),
> > +             .buffer_count = 1,
> > +             .flags = engine,
> > +             .rsvd1 = ctx,
> > +     };
> > +     gem_execbuf(i915, &execbuf);
> > +     gem_sync(i915, obj.handle);
> > +     gem_close(i915, obj.handle);
> > +}
> > +
> > +static void semaphore(int i915)
> > +{
> > +     uint32_t block[2], scratch;
> > +     igt_spin_t *spin[3];
> > +
> > +     /*
> > +      * If we are using HW semaphores to launch serialised requests
> > +      * on different engine concurrently, we want to verify that real
> > +      * work is unimpeded.
> > +      */
> > +     igt_require(gem_scheduler_has_preemption(i915));
> > +
> > +     block[0] = gem_context_create(i915);
> > +     block[1] = gem_context_create(i915);
> > +
> > +     scratch = gem_create(i915, 4096);
> > +     spin[2] = igt_spin_new(i915, .dependency = scratch);
> > +     for (int mask = 1; mask < 32; mask++) {
> 
> s/mask/class/ throughout.
> 
> > +             struct i915_engine_class_instance *ci;
> > +             unsigned int count;
> > +             uint32_t vip;
> > +
> > +             ci = list_engines(i915, 1u << mask, &count);
> > +             if (!ci)
> > +                     continue;
> > +
> > +             if (count < ARRAY_SIZE(block))
> > +                     continue;
> > +
> > +             /* Ensure that we completely occupy all engines in this group */
> > +             count = ARRAY_SIZE(block);
> > +
> > +             for (int i = 0; i < count; i++) {
> > +                     set_load_balancer(i915, block[i], ci, count);
> > +                     spin[i] = __igt_spin_new(i915,
> > +                                                    .ctx = block[i],
> > +                                                    .dependency = scratch);
> 
> Alignment.
> 
> > +             }
> > +
> > +             /*
> > +              * Either we haven't blocked both engines with semaphores,
> > +              * or we let the vip through. If not, we hang.
> > +              */
> > +             vip = gem_context_create(i915);
> > +             set_load_balancer(i915, vip, ci, count);
> > +             ping(i915, vip, 0);
> > +             gem_context_destroy(i915, vip);
> > +
> > +             for (int i = 0; i < count; i++)
> > +                     igt_spin_free(i915, spin[i]);
> > +
> > +             free(ci);
> > +     }
> > +     igt_spin_free(i915, spin[2]);
> > +     gem_close(i915, scratch);
> > +
> > +     gem_context_destroy(i915, block[1]);
> > +     gem_context_destroy(i915, block[0]);
> > +
> > +     gem_quiescent_gpu(i915);
> > +}
> > +
> > +static void smoketest(int i915, int timeout)
> > +{
> > +     struct drm_i915_gem_exec_object2 batch[2] = {
> > +             { .handle = __batch_create(i915, 16380) }
> > +     };
> > +     unsigned int ncontext = 0;
> > +     uint32_t *contexts = NULL;
> > +     uint32_t *handles = NULL;
> > +
> > +     igt_require_sw_sync();
> > +
> > +     for (int mask = 0; mask < 32; mask++) {
> > +             struct i915_engine_class_instance *ci;
> > +             unsigned int count = 0;
> > +
> > +             ci = list_engines(i915, 1u << mask, &count);
> > +             if (!ci || count < 2) {
> > +                     free(ci);
> > +                     continue;
> > +             }
> > +
> > +             igt_debug("Found %d engines of class %d\n", count, mask);
> > +
> > +             ncontext += 128;
> > +             contexts = realloc(contexts, sizeof(*contexts) * ncontext);
> > +             igt_assert(contexts);
> > +
> > +             for (unsigned int n = ncontext - 128; n < ncontext; n++) {
> > +                     contexts[n] = load_balancer_create(i915, ci, count);
> > +                     igt_assert(contexts[n]);
> > +             }
> > +
> > +             free(ci);
> > +     }
> > +     igt_debug("Created %d virtual engines (one per context)\n", ncontext);
> > +     igt_require(ncontext);
> > +
> > +     contexts = realloc(contexts, sizeof(*contexts) * ncontext * 4);
> > +     igt_assert(contexts);
> > +     memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
> > +     ncontext *= 2;
> > +     memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
> > +     ncontext *= 2;
> > +
> > +     handles = malloc(sizeof(*handles) * ncontext);
> > +     igt_assert(handles);
> > +     for (unsigned int n = 0; n < ncontext; n++)
> > +             handles[n] = gem_create(i915, 4096);
> > +
> > +     igt_until_timeout(timeout) {
> > +             unsigned int count = 1 + (rand() % (ncontext - 1));
> > +             IGT_CORK_FENCE(cork);
> > +             int fence = igt_cork_plug(&cork, i915);
> > +
> > +             for (unsigned int n = 0; n < count; n++) {
> > +                     struct drm_i915_gem_execbuffer2 eb = {
> > +                             .buffers_ptr = to_user_pointer(batch),
> > +                             .buffer_count = ARRAY_SIZE(batch),
> > +                             .rsvd1 = contexts[n],
> > +                             .rsvd2 = fence,
> > +                             .flags = I915_EXEC_BATCH_FIRST | I915_EXEC_FENCE_IN,
> > +                     };
> > +                     batch[1].handle = handles[n];
> > +                     gem_execbuf(i915, &eb);
> > +             }
> > +             igt_permute_array(handles, count, igt_exchange_int);
> > +
> > +             igt_cork_unplug(&cork);
> > +             for (unsigned int n = 0; n < count; n++)
> > +                     gem_sync(i915, handles[n]);
> > +
> > +             close(fence);
> > +     }
> > +
> > +     for (unsigned int n = 0; n < ncontext; n++) {
> > +             gem_close(i915, handles[n]);
> > +             __gem_context_destroy(i915, contexts[n]);
> > +     }
> > +     free(handles);
> > +     free(contexts);
> > +     gem_close(i915, batch[0].handle);
> > +}
> > +
> > +static bool has_context_engines(int i915)
> > +{
> > +     struct drm_i915_gem_context_param p = {
> > +             .param = I915_CONTEXT_PARAM_ENGINES,
> > +     };
> > +
> > +     return __gem_context_set_param(i915, &p) == 0;
> > +}
> > +
> > +static bool has_load_balancer(int i915)
> > +{
> > +     struct i915_engine_class_instance ci = {};
> > +     uint32_t ctx;
> > +     int err;
> > +
> > +     ctx = gem_context_create(i915);
> > +     err = __set_load_balancer(i915, ctx, &ci, 1);
> > +     gem_context_destroy(i915, ctx);
> > +
> > +     return err == 0;
> > +}
> > +
> > +igt_main
> > +{
> > +     int i915 = -1;
> > +
> > +     igt_skip_on_simulation();
> > +
> > +     igt_fixture {
> > +             i915 = drm_open_driver(DRIVER_INTEL);
> > +             igt_require_gem(i915);
> > +
> > +             gem_require_contexts(i915);
> > +             igt_require(has_context_engines(i915));
> > +             igt_require(has_load_balancer(i915));
> > +
> > +             igt_fork_hang_detector(i915);
> > +     }
> > +
> > +     igt_subtest("invalid-balancer")
> > +             invalid_balancer(i915);
> > +
> > +     igt_subtest("individual")
> > +             individual(i915);
> > +
> > +     igt_subtest("indicies")
> > +             indicies(i915);
> > +
> > +     igt_subtest("busy")
> > +             busy(i915);
> > +
> > +     igt_subtest_group {
> > +             static const struct {
> > +                     const char *name;
> > +                     unsigned int flags;
> > +             } phases[] = {
> > +                     { "", 0 },
> > +                     { "-pulse", PULSE },
> > +                     { "-late", LATE },
> > +                     { "-late-pulse", PULSE | LATE },
> > +                     { }
> > +             };
> > +             for (typeof(*phases) *p = phases; p->name; p++)
> > +                     igt_subtest_f("full%s", p->name)
> > +                             full(i915, p->flags);
> > +     }
> > +
> > +     igt_subtest("nop")
> > +             nop(i915);
> > +
> > +     igt_subtest("semaphore")
> > +             semaphore(i915);
> > +
> > +     igt_subtest("smoke")
> > +             smoketest(i915, 20);
> > +
> > +     igt_fixture {
> > +             igt_stop_hang_detector();
> > +     }
> > +}
> > diff --git a/tests/meson.build b/tests/meson.build
> > index 7e0089e74..eeea3611d 100644
> > --- a/tests/meson.build
> > +++ b/tests/meson.build
> > @@ -288,6 +288,13 @@ test_executables += executable('gem_eio',
> >          install : true)
> >   test_list += 'gem_eio'
> >   
> > +test_executables += executable('gem_exec_balancer', 'i915/gem_exec_balancer.c',
> > +        dependencies : test_deps + [ lib_igt_perf ],
> > +        install_dir : libexecdir,
> > +        install_rpath : libexecdir_rpathdir,
> > +        install : true)
> > +test_progs += 'gem_exec_balancer'
> > +
> >   test_executables += executable('gem_mocs_settings',
> >          join_paths('i915', 'gem_mocs_settings.c'),
> >          dependencies : test_deps + [ lib_igt_perf ],
> > 
> 
> Regards,
> 
> Tvrtko
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 13/16] i915: Add gem_exec_balancer
@ 2019-05-15 19:50       ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-15 19:50 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2019-05-15 11:49:45)
> 
> On 08/05/2019 11:09, Chris Wilson wrote:
> > Exercise the in-kernel load balancer checking that we can distribute
> > batches across the set of ctx->engines to avoid load.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   tests/Makefile.am              |    1 +
> >   tests/Makefile.sources         |    1 +
> >   tests/i915/gem_exec_balancer.c | 1050 ++++++++++++++++++++++++++++++++
> >   tests/meson.build              |    7 +
> >   4 files changed, 1059 insertions(+)
> >   create mode 100644 tests/i915/gem_exec_balancer.c
> > 
> > diff --git a/tests/Makefile.am b/tests/Makefile.am
> > index 5097debf6..c6af0aeaf 100644
> > --- a/tests/Makefile.am
> > +++ b/tests/Makefile.am
> > @@ -96,6 +96,7 @@ gem_close_race_LDADD = $(LDADD) -lpthread
> >   gem_ctx_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
> >   gem_ctx_thrash_LDADD = $(LDADD) -lpthread
> >   gem_ctx_sseu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
> > +i915_gem_exec_balancer_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
> >   gem_exec_capture_LDADD = $(LDADD) -lz
> >   gem_exec_parallel_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
> >   gem_exec_parallel_LDADD = $(LDADD) -lpthread
> > diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> > index e7ee27e81..323b625aa 100644
> > --- a/tests/Makefile.sources
> > +++ b/tests/Makefile.sources
> > @@ -24,6 +24,7 @@ TESTS_progs = \
> >       i915/gem_ctx_clone \
> >       i915/gem_ctx_engines \
> >       i915/gem_ctx_shared \
> > +     i915/gem_exec_balancer \
> >       i915/gem_vm_create \
> >       kms_3d \
> >       kms_addfb_basic \
> > diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
> > new file mode 100644
> > index 000000000..25195d478
> > --- /dev/null
> > +++ b/tests/i915/gem_exec_balancer.c
> > @@ -0,0 +1,1050 @@
> > +/*
> > + * Copyright © 2018 Intel Corporation
> 
> 2019 I guess, even though work was started in 2018?
> 
> > + *
> > + * Permission is hereby granted, free of charge, to any person obtaining a
> > + * copy of this software and associated documentation files (the "Software"),
> > + * to deal in the Software without restriction, including without limitation
> > + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the next
> > + * paragraph) shall be included in all copies or substantial portions of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> > + * IN THE SOFTWARE.
> > + */
> > +
> > +#include <sched.h>
> > +
> > +#include "igt.h"
> > +#include "igt_perf.h"
> > +#include "i915/gem_ring.h"
> > +#include "sw_sync.h"
> > +
> > +IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
> > +
> > +#define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
> 
> Hmm.. this is a strange surrogate but I guess it works.
> 
> > +
> > +static bool has_class_instance(int i915, uint16_t class, uint16_t instance)
> > +{
> > +     int fd;
> > +
> > +     fd = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance));
> 
> More work for Andi to replace with real engine discovery. :)
> 
> > +     if (fd != -1) {
> > +             close(fd);
> > +             return true;
> > +     }
> > +
> > +     return false;
> > +}
> > +
> > +static struct i915_engine_class_instance *
> > +list_engines(int i915, uint32_t class_mask, unsigned int *out)
> > +{
> > +     unsigned int count = 0, size = 64;
> > +     struct i915_engine_class_instance *engines;
> > +
> > +     engines = malloc(size * sizeof(*engines));
> > +     if (!engines) {
> > +             *out = 0;
> > +             return NULL;
> > +     }
> > +
> > +     for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER;
> > +          class_mask;
> > +          class++, class_mask >>= 1) {
> > +             if (!(class_mask & 1))
> > +                     continue;
> > +
> > +             for (unsigned int instance = 0;
> > +                  instance < INSTANCE_COUNT;
> > +                  instance++) {
> > +                  if (!has_class_instance(i915, class, instance))
> > +                          continue;
> > +
> > +                     if (count == size) {
> > +                             struct i915_engine_class_instance *e;
> > +
> > +                             size *= 2;
> > +                             e = realloc(engines, size*sizeof(*engines));
> > +                             if (!e) {
> 
> I'd just assert. On malloc as well.
> 
> > +                                     *out = count;
> > +                                     return engines;
> > +                             }
> > +
> > +                             engines = e;
> > +                     }
> > +
> > +                     engines[count++] = (struct i915_engine_class_instance){
> > +                             .engine_class = class,
> > +                             .engine_instance = instance,
> > +                     };
> > +             }
> > +     }
> > +
> > +     if (!count) {
> > +             free(engines);
> > +             engines = NULL;
> > +     }
> > +
> > +     *out = count;
> > +     return engines;
> > +}
> > +
> > +static int __set_load_balancer(int i915, uint32_t ctx,
> > +                            const struct i915_engine_class_instance *ci,
> > +                            unsigned int count)
> > +{
> > +     I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, count);
> > +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1 + count);
> > +     struct drm_i915_gem_context_param p = {
> > +             .ctx_id = ctx,
> > +             .param = I915_CONTEXT_PARAM_ENGINES,
> > +             .size = sizeof(engines),
> > +             .value = to_user_pointer(&engines)
> > +     };
> > +
> > +     memset(&balancer, 0, sizeof(balancer));
> > +     balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> > +
> > +     igt_assert(count);
> > +     balancer.num_siblings = count;
> > +     memcpy(balancer.engines, ci, count * sizeof(*ci));
> > +
> > +     memset(&engines, 0, sizeof(engines));
> > +     engines.extensions = to_user_pointer(&balancer);
> > +     engines.engines[0].engine_class =
> > +             I915_ENGINE_CLASS_INVALID;
> > +     engines.engines[0].engine_instance =
> > +             I915_ENGINE_CLASS_INVALID_NONE;
> > +     memcpy(engines.engines + 1, ci, count * sizeof(*ci));
> > +
> > +     return __gem_context_set_param(i915, &p);
> > +}
> > +
> > +static void set_load_balancer(int i915, uint32_t ctx,
> > +                           const struct i915_engine_class_instance *ci,
> > +                           unsigned int count)
> > +{
> > +     igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0);
> > +}
> > +
> > +static uint32_t load_balancer_create(int i915,
> > +                                  const struct i915_engine_class_instance *ci,
> > +                                  unsigned int count)
> > +{
> > +     uint32_t ctx;
> > +
> > +     ctx = gem_context_create(i915);
> > +     set_load_balancer(i915, ctx, ci, count);
> > +
> > +     return ctx;
> > +}
> > +
> > +static uint32_t __batch_create(int i915, uint32_t offset)
> > +{
> > +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +     uint32_t handle;
> > +
> > +     handle = gem_create(i915, ALIGN(offset + 4, 4096));
> > +     gem_write(i915, handle, offset, &bbe, sizeof(bbe));
> > +
> > +     return handle;
> > +}
> > +
> > +static uint32_t batch_create(int i915)
> > +{
> > +     return __batch_create(i915, 0);
> > +}
> > +
> > +static void invalid_balancer(int i915)
> > +{
> > +     I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, 64);
> > +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 64);
> > +     struct drm_i915_gem_context_param p = {
> > +             .param = I915_CONTEXT_PARAM_ENGINES,
> > +             .value = to_user_pointer(&engines)
> > +     };
> > +     uint32_t handle;
> > +     void *ptr;
> > +
> > +     /*
> > +      * Assume that I915_CONTEXT_PARAM_ENGINE validates the array
> > +      * of engines[], our job is to determine if the load_balancer
> > +      * extension explodes.
> > +      */
> > +
> > +     for (int class = 0; class < 32; class++) {
> > +             struct i915_engine_class_instance *ci;
> > +             unsigned int count;
> > +
> > +             ci = list_engines(i915, 1 << class, &count);
> > +             if (!ci)
> > +                     continue;
> > +
> > +             igt_debug("Found %d engines\n", count);
> > +             igt_assert_lte(count, 64);
> 
> Hey.. you always say trust the kernel! ;)

This code was placeholder that you said you would replace by a proper
query api...

> 
> > +
> > +             p.ctx_id = gem_context_create(i915);
> > +             p.size = (sizeof(struct i915_context_param_engines) +
> > +                             (count + 1) * sizeof(*engines.engines));
> 
> Alignment looks off.
> 
> > +
> > +             memset(&engines, 0, sizeof(engines));
> > +             engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
> > +             engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
> > +             memcpy(engines.engines + 1, ci, count * sizeof(*ci));
> > +             gem_context_set_param(i915, &p);
> > +
> > +             engines.extensions = -1ull;
> > +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +
> > +             engines.extensions = 1ull;
> > +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +
> > +             memset(&balancer, 0, sizeof(balancer));
> > +             balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> > +             balancer.num_siblings = count;
> > +             memcpy(balancer.engines, ci, count * sizeof(*ci));
> > +
> > +             engines.extensions = to_user_pointer(&balancer);
> > +             gem_context_set_param(i915, &p);
> > +
> > +             balancer.engine_index = 1;
> > +             igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
> > +
> > +             balancer.engine_index = count;
> > +             igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
> > +
> > +             balancer.engine_index = count + 1;
> > +             igt_assert_eq(__gem_context_set_param(i915, &p), -EINVAL);
> > +
> > +             balancer.engine_index = 0;
> > +             gem_context_set_param(i915, &p);
> > +
> > +             balancer.base.next_extension = to_user_pointer(&balancer);
> > +             igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
> > +
> > +             balancer.base.next_extension = -1ull;
> > +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +
> > +             handle = gem_create(i915, 4096 * 3);
> > +             ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE);
> > +             gem_close(i915, handle);
> > +
> > +             memset(&engines, 0, sizeof(engines));
> > +             engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
> > +             engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
> > +             engines.engines[1].engine_class = I915_ENGINE_CLASS_INVALID;
> > +             engines.engines[1].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
> > +             memcpy(engines.engines + 2, ci, count * sizeof(ci));
> > +             p.size = (sizeof(struct i915_context_param_engines) +
> > +                             (count + 2) * sizeof(*engines.engines));
> 
> Alignment again.
> 
> > +             gem_context_set_param(i915, &p);
> > +
> > +             balancer.base.next_extension = 0;
> > +             balancer.engine_index = 1;
> > +             engines.extensions = to_user_pointer(&balancer);
> > +             gem_context_set_param(i915, &p);
> > +
> > +             memcpy(ptr + 4096 - 8, &balancer, sizeof(balancer));
> > +             memcpy(ptr + 8192 - 8, &balancer, sizeof(balancer));
> > +             balancer.engine_index = 0;
> > +
> > +             engines.extensions = to_user_pointer(ptr) + 4096 - 8;
> > +             gem_context_set_param(i915, &p);
> > +
> > +             balancer.base.next_extension = engines.extensions;
> > +             engines.extensions = to_user_pointer(&balancer);
> > +             gem_context_set_param(i915, &p);
> 
> mmap_gtt and unmapped area testing in one?

Neighbouring.

> > +             munmap(ptr, 4096); >+           igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +             engines.extensions = to_user_pointer(ptr) + 4096 - 8;
> > +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +
> > +             engines.extensions = to_user_pointer(ptr) + 8192 - 8;
> > +             gem_context_set_param(i915, &p);
> > +
> > +             balancer.base.next_extension = engines.extensions;
> > +             engines.extensions = to_user_pointer(&balancer);
> > +             gem_context_set_param(i915, &p);
> > +
> > +             munmap(ptr + 8192, 4096);
> > +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +             engines.extensions = to_user_pointer(ptr) + 8192 - 8;
> > +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +
> > +             munmap(ptr + 4096, 4096);
> > +
> > +             gem_context_destroy(i915, p.ctx_id);
> > +             free(ci);
> > +     }
> > +}
> > +
> > +static void kick_kthreads(int period_us)
> > +{
> > +     sched_yield();
> > +     usleep(period_us);
> 
> yield and sleep hm.. calling with zero period_us? Doesn't seem like it. 
> So what's it about?

Historically yield may have been a no-op, but sleep(0) actually yielded.

> > +}
> > +
> > +static double measure_load(int pmu, int period_us)
> > +{
> > +     uint64_t data[2];
> > +     uint64_t d_t, d_v;
> > +
> > +     kick_kthreads(period_us);
> > +
> > +     igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> > +     d_v = -data[0];
> > +     d_t = -data[1];
> > +
> > +     usleep(period_us);
> > +
> > +     igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> > +     d_v += data[0];
> > +     d_t += data[1];
> 
> This -val + val trick with uint64_t works?

Yes, unsigned overflow is defined.

> 
> > +
> > +     return d_v / (double)d_t;
> > +}
> > +
> > +static double measure_min_load(int pmu, unsigned int num, int period_us)
> > +{
> > +     uint64_t data[2 + num];
> > +     uint64_t d_t, d_v[num];
> > +     uint64_t min = -1, max = 0;
> > +
> > +     kick_kthreads(period_us);
> > +
> > +     igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> > +     for (unsigned int n = 0; n < num; n++)
> > +             d_v[n] = -data[2 + n];
> > +     d_t = -data[1];
> > +
> > +     usleep(period_us);
> > +
> > +     igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> > +
> > +     d_t += data[1];
> > +     for (unsigned int n = 0; n < num; n++) {
> > +             d_v[n] += data[2 + n];
> > +             igt_debug("engine[%d]: %.1f%%\n",
> > +                       n, d_v[n] / (double)d_t * 100);
> > +             if (d_v[n] < min)
> > +                     min = d_v[n];
> > +             if (d_v[n] > max)
> > +                     max = d_v[n];
> > +     }
> > +
> > +     igt_debug("elapsed: %"PRIu64"ns, load [%.1f, %.1f]%%\n",
> > +               d_t, min / (double)d_t * 100,  max / (double)d_t * 100);
> > +
> > +     return min / (double)d_t;
> > +}
> > +
> > +static void check_individual_engine(int i915,
> > +                                 uint32_t ctx,
> > +                                 const struct i915_engine_class_instance *ci,
> > +                                 int idx)
> > +{
> > +     igt_spin_t *spin;
> > +     double load;
> > +     int pmu;
> > +
> > +     pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(ci[idx].engine_class,
> > +                                               ci[idx].engine_instance));
> > +
> > +     spin = igt_spin_new(i915, .ctx = ctx, .engine = idx + 1);
> > +     load = measure_load(pmu, 10000);
> 
> Hm usleep before start of measuring and between two samples is the same. 
> The one before should be fixed I think, no?

Could be, that would require thought as to what the appropriate period
for kicking should be. Yay for ksoftirqd.

> > +     igt_spin_free(i915, spin);
> > +
> > +     close(pmu);
> > +
> > +     igt_assert_f(load > 0.90,
> > +                  "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
> > +                  idx, ci[idx].engine_class, ci[idx].engine_instance, load*100);
> > +}
> > +
> > +static void individual(int i915)
> > +{
> > +     uint32_t ctx;
> > +
> > +     /*
> > +      * I915_CONTEXT_PARAM_ENGINE allows us to index into the user
> > +      * supplied array from gem_execbuf(). Our check is to build the
> > +      * ctx->engine[] with various different engine classes, feed in
> > +      * a spinner and then ask pmu to confirm it the expected engine
> > +      * was busy.
> > +      */
> > +
> > +     ctx = gem_context_create(i915);
> > +
> > +     for (int mask = 0; mask < 32; mask++) {
> > +             struct i915_engine_class_instance *ci;
> > +             unsigned int count;
> > +
> > +             ci = list_engines(i915, 1u << mask, &count);
> > +             if (!ci)
> > +                     continue;
> > +
> > +             igt_debug("Found %d engines of class %d\n", count, mask);
> > +
> > +             for (int pass = 0; pass < count; pass++) { /* approx. count! */
> > +                     igt_permute_array(ci, count, igt_exchange_int64);
> 
> struct i915_engine_class_instance is four bytes long, so swap func looks 
> wrong. Unless for some reason you want to swap in blocks of two. Don't 
> know. Last index would grab into random memory though. I must be missing 
> something or it wouldn't have worked..

Once upon a time class_instance was 2xu32.

> 
> > +                     set_load_balancer(i915, ctx, ci, count);
> > +                     for (unsigned int n = 0; n < count; n++)
> > +                             check_individual_engine(i915, ctx, ci, n);
> > +             }
> > +
> > +             free(ci);
> > +     }
> > +
> > +     gem_context_destroy(i915, ctx);
> > +     gem_quiescent_gpu(i915);
> > +}
> > +
> > +static void indicies(int i915)
> 
> indices?
> 
> > +{
> > +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
> > +     struct drm_i915_gem_context_param p = {
> > +             .ctx_id = gem_context_create(i915),
> > +             .param = I915_CONTEXT_PARAM_ENGINES,
> > +             .value = to_user_pointer(&engines)
> > +     };
> > +
> > +     struct drm_i915_gem_exec_object2 batch = {
> > +             .handle = batch_create(i915),
> > +     };
> > +
> > +     unsigned int nengines = 0;
> > +     void *balancers = NULL;
> > +
> > +     /*
> > +      * We can populate our engine map with multiple virtual engines.
> > +      * Do so.
> > +      */
> > +
> > +     for (int class = 0; class < 32; class++) {
> > +             struct i915_engine_class_instance *ci;
> > +             unsigned int count;
> > +
> > +             ci = list_engines(i915, 1u << class, &count);
> > +             if (!ci)
> > +                     continue;
> > +
> > +             igt_debug("Found %d engines of class %d\n", count, class);
> 
> Maybe this debug message should go into list_engines, since it seems 
> repeated a few times already.

Or remove the debug, I hear you.

> > +
> > +             for (int n = 0; n < count; n++) {
> > +                     I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(*balancer,
> > +                                                              count);
> > +
> > +                     engines.engines[nengines].engine_class =
> > +                             I915_ENGINE_CLASS_INVALID;
> > +                     engines.engines[nengines].engine_instance =
> > +                             I915_ENGINE_CLASS_INVALID_NONE;
> > +
> > +                     balancer = calloc(sizeof(*balancer), 1);
> > +                     igt_assert(balancer);
> > +
> > +                     balancer->base.name =
> > +                             I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> > +                     balancer->base.next_extension =
> > +                             to_user_pointer(balancers);
> > +                     balancers = balancer;
> > +
> > +                     balancer->engine_index = nengines++;
> > +                     balancer->num_siblings = count;
> > +
> > +                     memcpy(balancer->engines,
> > +                            ci, count * sizeof(*ci));
> > +             }
> > +             free(ci);
> > +     }
> > +
> > +     igt_require(balancers);
> > +     engines.extensions = to_user_pointer(balancers);
> > +     p.size = (sizeof(struct i915_engine_class_instance) * nengines +
> > +               sizeof(struct i915_context_param_engines));
> > +     gem_context_set_param(i915, &p);
> > +
> > +     for (unsigned int n = 0; n < nengines; n++) {
> > +             struct drm_i915_gem_execbuffer2 eb = {
> > +                     .buffers_ptr = to_user_pointer(&batch),
> > +                     .buffer_count = 1,
> > +                     .flags = n,
> > +                     .rsvd1 = p.ctx_id,
> > +             };
> > +             igt_debug("Executing on index=%d\n", n);
> > +             gem_execbuf(i915, &eb);
> > +     }
> > +     gem_context_destroy(i915, p.ctx_id);
> > +
> > +     gem_sync(i915, batch.handle);
> > +     gem_close(i915, batch.handle);
> > +
> > +     while (balancers) {
> > +             struct i915_context_engines_load_balance *b, *n;
> > +
> > +             b = balancers;
> > +             n = from_user_pointer(b->base.next_extension);
> > +             free(b);
> > +
> > +             balancers = n;
> > +     }
> > +
> > +     gem_quiescent_gpu(i915);
> > +}
> > +
> > +static void busy(int i915)
> > +{
> > +     uint32_t scratch = gem_create(i915, 4096);
> > +
> > +     /*
> > +      * Check that virtual engines are reported via GEM_BUSY.
> > +      *
> > +      * When running, the batch will be on the real engine and report
> > +      * the actual class.
> > +      *
> > +      * Prior to running, if the load-balancer is across multiple
> > +      * classes we don't know which engine the batch will
> > +      * execute on, so we report them all!
> > +      *
> > +      * However, as we only support (and test) creating a load-balancer
> > +      * from engines of only one class, that can be propagated accurately
> > +      * through to GEM_BUSY.
> > +      */
> > +
> > +     for (int class = 0; class < 16; class++) {
> > +             struct drm_i915_gem_busy busy;
> > +             struct i915_engine_class_instance *ci;
> > +             unsigned int count;
> > +             igt_spin_t *spin[2];
> > +             uint32_t ctx;
> > +
> > +             ci = list_engines(i915, 1u << class, &count);
> > +             if (!ci)
> > +                     continue;
> > +
> > +             igt_debug("Found %d engines of class %d\n", count, class);
> > +             ctx = load_balancer_create(i915, ci, count);
> > +             free(ci);
> > +
> > +             spin[0] = __igt_spin_new(i915,
> > +                                      .ctx = ctx,
> > +                                      .flags = IGT_SPIN_POLL_RUN);
> > +             spin[1] = __igt_spin_new(i915,
> > +                                      .ctx = ctx,
> > +                                      .dependency = scratch);
> > +
> > +             igt_spin_busywait_until_started(spin[0]);
> > +
> > +             /* Running: actual class */
> > +             busy.handle = spin[0]->handle;
> > +             do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
> > +             igt_assert_eq_u32(busy.busy, 1u << (class + 16));
> > +
> > +             /* Queued(read): expected class */
> > +             busy.handle = spin[1]->handle;
> > +             do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
> > +             igt_assert_eq_u32(busy.busy, 1u << (class + 16));
> > +
> > +             /* Queued(write): expected class */
> > +             busy.handle = scratch;
> > +             do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
> > +             igt_assert_eq_u32(busy.busy,
> > +                               (1u << (class + 16)) | (class + 1));
> > +
> > +             igt_spin_free(i915, spin[1]);
> > +             igt_spin_free(i915, spin[0]);
> > +
> > +             gem_context_destroy(i915, ctx);
> > +     }
> > +
> > +     gem_close(i915, scratch);
> > +     gem_quiescent_gpu(i915);
> > +}
> > +
> > +static int add_pmu(int pmu, const struct i915_engine_class_instance *ci)
> > +{
> > +     return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->engine_class,
> > +                                                      ci->engine_instance),
> > +                                 pmu);
> > +}
> > +
> > +static void full(int i915, unsigned int flags)
> > +#define PULSE 0x1
> > +#define LATE 0x2
> > +{
> > +     struct drm_i915_gem_exec_object2 batch = {
> > +             .handle = batch_create(i915),
> > +     };
> > +
> > +     if (flags & LATE)
> > +             igt_require_sw_sync();
> > +
> > +     /*
> > +      * I915_CONTEXT_PARAM_ENGINE changes the meaning of I915_EXEC_DEFAULT
> > +      * to provide an automatic selection from the ctx->engine[]. It
> > +      * employs load-balancing to evenly distribute the workload the
> 
> The leading section needs rewritting for truth. It is the load balance 
> extensions which _can_ redefine the meanign of I915_EXEC_DEFAULT etc.. 
> I'm sure I didn't need to explain, but have just to make it clear which 
> part I am complaining about. :)

Hey, remember this is 2018!

> > +      * array. If we submit N spinners, we expect them to be simultaneously
> > +      * running across N engines and use PMU to confirm that the entire
> > +      * set of engines are busy.
> 
> Clarify it is only if using N contexts.
> 
> > +      *
> > +      * We complicate matters by interpersing shortlived tasks to challenge
> > +      * the kernel to search for space in which to insert new batches.
> > +      */
> > +
> > +
> > +     for (int mask = 0; mask < 32; mask++) {
> > +             struct i915_engine_class_instance *ci;
> > +             igt_spin_t *spin = NULL;
> > +             IGT_CORK_FENCE(cork);
> > +             unsigned int count;
> > +             double load;
> > +             int fence = -1;
> > +             int *pmu;
> > +
> > +             ci = list_engines(i915, 1u << mask, &count);
> > +             if (!ci)
> > +                     continue;
> > +
> > +             igt_debug("Found %d engines of class %d\n", count, mask);
> > +
> > +             pmu = malloc(sizeof(*pmu) * count);
> > +             igt_assert(pmu);
> > +
> > +             if (flags & LATE)
> > +                     fence = igt_cork_plug(&cork, i915);
> > +
> > +             pmu[0] = -1;
> > +             for (unsigned int n = 0; n < count; n++) {
> > +                     uint32_t ctx;
> > +
> > +                     pmu[n] = add_pmu(pmu[0], &ci[n]);
> > +
> > +                     if (flags & PULSE) {
> > +                             struct drm_i915_gem_execbuffer2 eb = {
> > +                                     .buffers_ptr = to_user_pointer(&batch),
> > +                                     .buffer_count = 1,
> > +                                     .rsvd2 = fence,
> > +                                     .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
> > +                             };
> > +                             gem_execbuf(i915, &eb);
> > +                     }
> > +
> > +                     /*
> > +                      * Each spinner needs to be one a new timeline,
> > +                      * otherwise they will just sit in the single queue
> > +                      * and not run concurrently.
> > +                      */
> > +                     ctx = load_balancer_create(i915, ci, count);
> > +
> > +                     if (spin == NULL) {
> > +                             spin = __igt_spin_new(i915, .ctx = ctx);
> > +                     } else {
> > +                             struct drm_i915_gem_execbuffer2 eb = {
> > +                                     .buffers_ptr = spin->execbuf.buffers_ptr,
> > +                                     .buffer_count = spin->execbuf.buffer_count,
> > +                                     .rsvd1 = ctx,
> > +                                     .rsvd2 = fence,
> > +                                     .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
> > +                             };
> > +                             gem_execbuf(i915, &eb);
> > +                     }
> > +
> > +                     gem_context_destroy(i915, ctx);
> > +             }
> > +
> > +             if (flags & LATE) {
> > +                     igt_cork_unplug(&cork);
> > +                     close(fence);
> > +             }
> > +
> > +             load = measure_min_load(pmu[0], count, 10000);
> > +             igt_spin_free(i915, spin);
> > +
> > +             close(pmu[0]);
> > +             free(pmu);
> > +
> > +             free(ci);
> > +
> > +             igt_assert_f(load > 0.90,
> > +                          "minimum load for %d x class:%d was found to be only %.1f%% busy\n",
> > +                          count, mask, load*100);
> > +             gem_quiescent_gpu(i915);
> > +     }
> > +
> > +     gem_close(i915, batch.handle);
> > +     gem_quiescent_gpu(i915);
> > +}
> > +
> > +static void nop(int i915)
> > +{
> > +     struct drm_i915_gem_exec_object2 batch = {
> > +             .handle = batch_create(i915),
> > +     };
> > +
> > +     for (int mask = 0; mask < 32; mask++) {
> 
> s/mask/class/
> 
> > +             struct i915_engine_class_instance *ci;
> > +             unsigned int count;
> > +             uint32_t ctx;
> > +
> > +             ci = list_engines(i915, 1u << mask, &count);
> > +             if (!ci)
> > +                     continue;
> > +
> > +             if (count < 2) {
> > +                     free(ci);
> > +                     continue;
> 
> Benchamrk only subtest for real veng?

Sure, that's a bit of internal knowledge leaking.

> > +             }
> > +
> > +             igt_debug("Found %d engines of class %d\n", count, mask);
> > +             ctx = load_balancer_create(i915, ci, count);
> > +
> > +             for (int n = 0; n < count; n++) {
> > +                     struct drm_i915_gem_execbuffer2 execbuf = {
> > +                             .buffers_ptr = to_user_pointer(&batch),
> > +                             .buffer_count = 1,
> > +                             .flags = n + 1,
> > +                             .rsvd1 = ctx,
> > +                     };
> > +                     struct timespec tv = {};
> > +                     unsigned long nops;
> > +                     double t;
> > +
> > +                     igt_nsec_elapsed(&tv);
> > +                     nops = 0;
> > +                     do {
> > +                             for (int r = 0; r < 1024; r++)
> > +                                     gem_execbuf(i915, &execbuf);
> > +                             nops += 1024;
> > +                     } while (igt_seconds_elapsed(&tv) < 2);
> > +                     gem_sync(i915, batch.handle);
> > +
> > +                     t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
> > +                     igt_info("%x:%d %.3fus\n", mask, n, t);
> 
> Class in decimal is better I think.

But it's mask :-p

It's treated as just a number and not as a class identifier.

> And some descriptive labels to info messages would be good. Like 
> "individual engines", "virtual engine" etc.

It does describe the individual engines and their composites. The output
looks clear and concise. You may want mask translated to a string... but
this code is oblivious as to what mask actually is.

The way it is used definitely looks more like mask than class.

> > +             }
> > +
> > +             {
> > +                     struct drm_i915_gem_execbuffer2 execbuf = {
> > +                             .buffers_ptr = to_user_pointer(&batch),
> > +                             .buffer_count = 1,
> > +                             .rsvd1 = ctx,
> > +                     };
> > +                     struct timespec tv = {};
> > +                     unsigned long nops;
> > +                     double t;
> > +
> > +                     igt_nsec_elapsed(&tv);
> > +                     nops = 0;
> > +                     do {
> > +                             for (int r = 0; r < 1024; r++)
> > +                                     gem_execbuf(i915, &execbuf);
> > +                             nops += 1024;
> > +                     } while (igt_seconds_elapsed(&tv) < 2);
> > +                     gem_sync(i915, batch.handle);
> > +
> > +                     t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
> > +                     igt_info("%x:* %.3fus\n", mask, t);
> > +             }
> > +
> > +
> > +             igt_fork(child, count) {
> > +                     struct drm_i915_gem_execbuffer2 execbuf = {
> > +                             .buffers_ptr = to_user_pointer(&batch),
> > +                             .buffer_count = 1,
> > +                             .flags = child + 1,
> > +                             .rsvd1 = gem_context_clone(i915, ctx,
> > +                                                        I915_CONTEXT_CLONE_ENGINES, 0),
> > +                     };
> > +                     struct timespec tv = {};
> > +                     unsigned long nops;
> > +                     double t;
> > +
> > +                     igt_nsec_elapsed(&tv);
> > +                     nops = 0;
> > +                     do {
> > +                             for (int r = 0; r < 1024; r++)
> > +                                     gem_execbuf(i915, &execbuf);
> > +                             nops += 1024;
> > +                     } while (igt_seconds_elapsed(&tv) < 2);
> > +                     gem_sync(i915, batch.handle);
> > +
> > +                     t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
> > +                     igt_info("[%d] %x:%d %.3fus\n", child, mask, child, t);
> > +
> > +                     memset(&tv, 0, sizeof(tv));
> > +                     execbuf.flags = 0;
> > +
> > +                     igt_nsec_elapsed(&tv);
> > +                     nops = 0;
> > +                     do {
> > +                             for (int r = 0; r < 1024; r++)
> > +                                     gem_execbuf(i915, &execbuf);
> > +                             nops += 1024;
> > +                     } while (igt_seconds_elapsed(&tv) < 2);
> > +                     gem_sync(i915, batch.handle);
> > +
> > +                     t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
> > +                     igt_info("[%d] %x:* %.3fus\n", child, mask, t);
> > +
> > +                     gem_context_destroy(i915, execbuf.rsvd1);
> > +             }
> > +
> > +             igt_waitchildren();
> > +
> > +             gem_context_destroy(i915, ctx);
> > +             free(ci);
> > +     }
> > +
> > +     gem_close(i915, batch.handle);
> > +     gem_quiescent_gpu(i915);
> > +}
> > +
> > +static void ping(int i915, uint32_t ctx, unsigned int engine)
> > +{
> > +     struct drm_i915_gem_exec_object2 obj = {
> > +             .handle = batch_create(i915),
> > +     };
> > +     struct drm_i915_gem_execbuffer2 execbuf = {
> > +             .buffers_ptr = to_user_pointer(&obj),
> > +             .buffer_count = 1,
> > +             .flags = engine,
> > +             .rsvd1 = ctx,
> > +     };
> > +     gem_execbuf(i915, &execbuf);
> > +     gem_sync(i915, obj.handle);
> > +     gem_close(i915, obj.handle);
> > +}
> > +
> > +static void semaphore(int i915)
> > +{
> > +     uint32_t block[2], scratch;
> > +     igt_spin_t *spin[3];
> > +
> > +     /*
> > +      * If we are using HW semaphores to launch serialised requests
> > +      * on different engine concurrently, we want to verify that real
> > +      * work is unimpeded.
> > +      */
> > +     igt_require(gem_scheduler_has_preemption(i915));
> > +
> > +     block[0] = gem_context_create(i915);
> > +     block[1] = gem_context_create(i915);
> > +
> > +     scratch = gem_create(i915, 4096);
> > +     spin[2] = igt_spin_new(i915, .dependency = scratch);
> > +     for (int mask = 1; mask < 32; mask++) {
> 
> s/mask/class/ throughout.
> 
> > +             struct i915_engine_class_instance *ci;
> > +             unsigned int count;
> > +             uint32_t vip;
> > +
> > +             ci = list_engines(i915, 1u << mask, &count);
> > +             if (!ci)
> > +                     continue;
> > +
> > +             if (count < ARRAY_SIZE(block))
> > +                     continue;
> > +
> > +             /* Ensure that we completely occupy all engines in this group */
> > +             count = ARRAY_SIZE(block);
> > +
> > +             for (int i = 0; i < count; i++) {
> > +                     set_load_balancer(i915, block[i], ci, count);
> > +                     spin[i] = __igt_spin_new(i915,
> > +                                                    .ctx = block[i],
> > +                                                    .dependency = scratch);
> 
> Alignment.
> 
> > +             }
> > +
> > +             /*
> > +              * Either we haven't blocked both engines with semaphores,
> > +              * or we let the vip through. If not, we hang.
> > +              */
> > +             vip = gem_context_create(i915);
> > +             set_load_balancer(i915, vip, ci, count);
> > +             ping(i915, vip, 0);
> > +             gem_context_destroy(i915, vip);
> > +
> > +             for (int i = 0; i < count; i++)
> > +                     igt_spin_free(i915, spin[i]);
> > +
> > +             free(ci);
> > +     }
> > +     igt_spin_free(i915, spin[2]);
> > +     gem_close(i915, scratch);
> > +
> > +     gem_context_destroy(i915, block[1]);
> > +     gem_context_destroy(i915, block[0]);
> > +
> > +     gem_quiescent_gpu(i915);
> > +}
> > +
> > +static void smoketest(int i915, int timeout)
> > +{
> > +     struct drm_i915_gem_exec_object2 batch[2] = {
> > +             { .handle = __batch_create(i915, 16380) }
> > +     };
> > +     unsigned int ncontext = 0;
> > +     uint32_t *contexts = NULL;
> > +     uint32_t *handles = NULL;
> > +
> > +     igt_require_sw_sync();
> > +
> > +     for (int mask = 0; mask < 32; mask++) {
> > +             struct i915_engine_class_instance *ci;
> > +             unsigned int count = 0;
> > +
> > +             ci = list_engines(i915, 1u << mask, &count);
> > +             if (!ci || count < 2) {
> > +                     free(ci);
> > +                     continue;
> > +             }
> > +
> > +             igt_debug("Found %d engines of class %d\n", count, mask);
> > +
> > +             ncontext += 128;
> > +             contexts = realloc(contexts, sizeof(*contexts) * ncontext);
> > +             igt_assert(contexts);
> > +
> > +             for (unsigned int n = ncontext - 128; n < ncontext; n++) {
> > +                     contexts[n] = load_balancer_create(i915, ci, count);
> > +                     igt_assert(contexts[n]);
> > +             }
> > +
> > +             free(ci);
> > +     }
> > +     igt_debug("Created %d virtual engines (one per context)\n", ncontext);
> > +     igt_require(ncontext);
> > +
> > +     contexts = realloc(contexts, sizeof(*contexts) * ncontext * 4);
> > +     igt_assert(contexts);
> > +     memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
> > +     ncontext *= 2;
> > +     memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
> > +     ncontext *= 2;
> > +
> > +     handles = malloc(sizeof(*handles) * ncontext);
> > +     igt_assert(handles);
> > +     for (unsigned int n = 0; n < ncontext; n++)
> > +             handles[n] = gem_create(i915, 4096);
> > +
> > +     igt_until_timeout(timeout) {
> > +             unsigned int count = 1 + (rand() % (ncontext - 1));
> > +             IGT_CORK_FENCE(cork);
> > +             int fence = igt_cork_plug(&cork, i915);
> > +
> > +             for (unsigned int n = 0; n < count; n++) {
> > +                     struct drm_i915_gem_execbuffer2 eb = {
> > +                             .buffers_ptr = to_user_pointer(batch),
> > +                             .buffer_count = ARRAY_SIZE(batch),
> > +                             .rsvd1 = contexts[n],
> > +                             .rsvd2 = fence,
> > +                             .flags = I915_EXEC_BATCH_FIRST | I915_EXEC_FENCE_IN,
> > +                     };
> > +                     batch[1].handle = handles[n];
> > +                     gem_execbuf(i915, &eb);
> > +             }
> > +             igt_permute_array(handles, count, igt_exchange_int);
> > +
> > +             igt_cork_unplug(&cork);
> > +             for (unsigned int n = 0; n < count; n++)
> > +                     gem_sync(i915, handles[n]);
> > +
> > +             close(fence);
> > +     }
> > +
> > +     for (unsigned int n = 0; n < ncontext; n++) {
> > +             gem_close(i915, handles[n]);
> > +             __gem_context_destroy(i915, contexts[n]);
> > +     }
> > +     free(handles);
> > +     free(contexts);
> > +     gem_close(i915, batch[0].handle);
> > +}
> > +
> > +static bool has_context_engines(int i915)
> > +{
> > +     struct drm_i915_gem_context_param p = {
> > +             .param = I915_CONTEXT_PARAM_ENGINES,
> > +     };
> > +
> > +     return __gem_context_set_param(i915, &p) == 0;
> > +}
> > +
> > +static bool has_load_balancer(int i915)
> > +{
> > +     struct i915_engine_class_instance ci = {};
> > +     uint32_t ctx;
> > +     int err;
> > +
> > +     ctx = gem_context_create(i915);
> > +     err = __set_load_balancer(i915, ctx, &ci, 1);
> > +     gem_context_destroy(i915, ctx);
> > +
> > +     return err == 0;
> > +}
> > +
> > +igt_main
> > +{
> > +     int i915 = -1;
> > +
> > +     igt_skip_on_simulation();
> > +
> > +     igt_fixture {
> > +             i915 = drm_open_driver(DRIVER_INTEL);
> > +             igt_require_gem(i915);
> > +
> > +             gem_require_contexts(i915);
> > +             igt_require(has_context_engines(i915));
> > +             igt_require(has_load_balancer(i915));
> > +
> > +             igt_fork_hang_detector(i915);
> > +     }
> > +
> > +     igt_subtest("invalid-balancer")
> > +             invalid_balancer(i915);
> > +
> > +     igt_subtest("individual")
> > +             individual(i915);
> > +
> > +     igt_subtest("indicies")
> > +             indicies(i915);
> > +
> > +     igt_subtest("busy")
> > +             busy(i915);
> > +
> > +     igt_subtest_group {
> > +             static const struct {
> > +                     const char *name;
> > +                     unsigned int flags;
> > +             } phases[] = {
> > +                     { "", 0 },
> > +                     { "-pulse", PULSE },
> > +                     { "-late", LATE },
> > +                     { "-late-pulse", PULSE | LATE },
> > +                     { }
> > +             };
> > +             for (typeof(*phases) *p = phases; p->name; p++)
> > +                     igt_subtest_f("full%s", p->name)
> > +                             full(i915, p->flags);
> > +     }
> > +
> > +     igt_subtest("nop")
> > +             nop(i915);
> > +
> > +     igt_subtest("semaphore")
> > +             semaphore(i915);
> > +
> > +     igt_subtest("smoke")
> > +             smoketest(i915, 20);
> > +
> > +     igt_fixture {
> > +             igt_stop_hang_detector();
> > +     }
> > +}
> > diff --git a/tests/meson.build b/tests/meson.build
> > index 7e0089e74..eeea3611d 100644
> > --- a/tests/meson.build
> > +++ b/tests/meson.build
> > @@ -288,6 +288,13 @@ test_executables += executable('gem_eio',
> >          install : true)
> >   test_list += 'gem_eio'
> >   
> > +test_executables += executable('gem_exec_balancer', 'i915/gem_exec_balancer.c',
> > +        dependencies : test_deps + [ lib_igt_perf ],
> > +        install_dir : libexecdir,
> > +        install_rpath : libexecdir_rpathdir,
> > +        install : true)
> > +test_progs += 'gem_exec_balancer'
> > +
> >   test_executables += executable('gem_mocs_settings',
> >          join_paths('i915', 'gem_mocs_settings.c'),
> >          dependencies : test_deps + [ lib_igt_perf ],
> > 
> 
> Regards,
> 
> Tvrtko
> 
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 14/16] i915/gem_exec_balancer: Exercise bonded pairs
  2019-05-15 10:58     ` [igt-dev] " Tvrtko Ursulin
@ 2019-05-15 19:57       ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-15 19:57 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2019-05-15 11:58:20)
> 
> On 08/05/2019 11:09, Chris Wilson wrote:
> > The submit-fence + load_balancing apis allow for us to execute a named
> > pair of engines in parallel; that this by submitting a request to one
> > engine, we can then use the generated submit-fence to submit a second
> > request to another engine and have it execute at the same time.
> > Furthermore, by specifying bonded pairs, we can direct the virtual
> > engine to use a particular engine in parallel to the first request.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   tests/i915/gem_exec_balancer.c | 234 +++++++++++++++++++++++++++++++--
> >   1 file changed, 224 insertions(+), 10 deletions(-)
> > 
> > diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
> > index 25195d478..20ad66727 100644
> > --- a/tests/i915/gem_exec_balancer.c
> > +++ b/tests/i915/gem_exec_balancer.c
> > @@ -98,9 +98,35 @@ list_engines(int i915, uint32_t class_mask, unsigned int *out)
> >       return engines;
> >   }
> >   
> > +static int __set_engines(int i915, uint32_t ctx,
> > +                      const struct i915_engine_class_instance *ci,
> > +                      unsigned int count)
> > +{
> > +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, count);
> > +     struct drm_i915_gem_context_param p = {
> > +             .ctx_id = ctx,
> > +             .param = I915_CONTEXT_PARAM_ENGINES,
> > +             .size = sizeof(engines),
> > +             .value = to_user_pointer(&engines)
> > +     };
> > +
> > +     engines.extensions = 0;
> > +     memcpy(engines.engines, ci, sizeof(engines.engines));
> > +
> > +     return __gem_context_set_param(i915, &p);
> > +}
> > +
> > +static void set_engines(int i915, uint32_t ctx,
> > +                     const struct i915_engine_class_instance *ci,
> > +                     unsigned int count)
> > +{
> > +     igt_assert_eq(__set_engines(i915, ctx, ci, count), 0);
> > +}
> > +
> >   static int __set_load_balancer(int i915, uint32_t ctx,
> >                              const struct i915_engine_class_instance *ci,
> > -                            unsigned int count)
> > +                            unsigned int count,
> > +                            void *ext)
> >   {
> >       I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, count);
> >       I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1 + count);
> > @@ -113,6 +139,7 @@ static int __set_load_balancer(int i915, uint32_t ctx,
> >   
> >       memset(&balancer, 0, sizeof(balancer));
> >       balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> > +     balancer.base.next_extension = to_user_pointer(ext);
> >   
> >       igt_assert(count);
> >       balancer.num_siblings = count;
> > @@ -131,9 +158,10 @@ static int __set_load_balancer(int i915, uint32_t ctx,
> >   
> >   static void set_load_balancer(int i915, uint32_t ctx,
> >                             const struct i915_engine_class_instance *ci,
> > -                           unsigned int count)
> > +                           unsigned int count,
> > +                           void *ext)
> >   {
> > -     igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0);
> > +     igt_assert_eq(__set_load_balancer(i915, ctx, ci, count, ext), 0);
> >   }
> >   
> >   static uint32_t load_balancer_create(int i915,
> > @@ -143,7 +171,7 @@ static uint32_t load_balancer_create(int i915,
> >       uint32_t ctx;
> >   
> >       ctx = gem_context_create(i915);
> > -     set_load_balancer(i915, ctx, ci, count);
> > +     set_load_balancer(i915, ctx, ci, count, NULL);
> >   
> >       return ctx;
> >   }
> > @@ -288,6 +316,74 @@ static void invalid_balancer(int i915)
> >       }
> >   }
> >   
> > +static void invalid_bonds(int i915)
> > +{
> > +     I915_DEFINE_CONTEXT_ENGINES_BOND(bonds[16], 1);
> > +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1);
> > +     struct drm_i915_gem_context_param p = {
> > +             .ctx_id = gem_context_create(i915),
> > +             .param = I915_CONTEXT_PARAM_ENGINES,
> > +             .value = to_user_pointer(&engines),
> > +             .size = sizeof(engines),
> > +     };
> > +     uint32_t handle;
> > +     void *ptr;
> > +
> > +     memset(&engines, 0, sizeof(engines));
> > +     gem_context_set_param(i915, &p);
> > +
> > +     memset(bonds, 0, sizeof(bonds));
> > +     for (int n = 0; n < ARRAY_SIZE(bonds); n++) {
> > +             bonds[n].base.name = I915_CONTEXT_ENGINES_EXT_BOND;
> > +             bonds[n].base.next_extension =
> > +                     n ? to_user_pointer(&bonds[n - 1]) : 0;
> > +             bonds[n].num_bonds = 1;
> > +     }
> > +     engines.extensions = to_user_pointer(&bonds);
> > +     gem_context_set_param(i915, &p);
> > +
> > +     bonds[0].base.next_extension = -1ull;
> > +     igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +
> > +     bonds[0].base.next_extension = to_user_pointer(&bonds[0]);
> > +     igt_assert_eq(__gem_context_set_param(i915, &p), -E2BIG);
> > +
> > +     engines.extensions = to_user_pointer(&bonds[1]);
> > +     igt_assert_eq(__gem_context_set_param(i915, &p), -E2BIG);
> > +     bonds[0].base.next_extension = 0;
> > +     gem_context_set_param(i915, &p);
> > +
> > +     handle = gem_create(i915, 4096 * 3);
> > +     ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE);
> > +     gem_close(i915, handle);
> > +
> > +     memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
> > +     engines.extensions = to_user_pointer(ptr) + 4096;
> > +     gem_context_set_param(i915, &p);
> > +
> > +     memcpy(ptr, &bonds[0], sizeof(bonds[0]));
> > +     bonds[0].base.next_extension = to_user_pointer(ptr);
> > +     memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
> > +     gem_context_set_param(i915, &p);
> > +
> > +     munmap(ptr, 4096);
> > +     igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +
> > +     bonds[0].base.next_extension = 0;
> > +     memcpy(ptr + 8192, &bonds[0], sizeof(bonds[0]));
> > +     bonds[0].base.next_extension = to_user_pointer(ptr) + 8192;
> > +     memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
> > +     gem_context_set_param(i915, &p);
> > +
> > +     munmap(ptr + 8192, 4096);
> > +     igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +
> > +     munmap(ptr + 4096, 4096);
> > +     igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +
> > +     gem_context_destroy(i915, p.ctx_id);
> > +}
> > +
> >   static void kick_kthreads(int period_us)
> >   {
> >       sched_yield();
> > @@ -397,7 +493,7 @@ static void individual(int i915)
> >   
> >               for (int pass = 0; pass < count; pass++) { /* approx. count! */
> >                       igt_permute_array(ci, count, igt_exchange_int64);
> > -                     set_load_balancer(i915, ctx, ci, count);
> > +                     set_load_balancer(i915, ctx, ci, count, NULL);
> >                       for (unsigned int n = 0; n < count; n++)
> >                               check_individual_engine(i915, ctx, ci, n);
> >               }
> > @@ -409,6 +505,115 @@ static void individual(int i915)
> >       gem_quiescent_gpu(i915);
> >   }
> >   
> > +static void bonded(int i915, unsigned int flags)
> > +#define CORK 0x1
> > +{
> > +     I915_DEFINE_CONTEXT_ENGINES_BOND(bonds[16], 1);
> > +     struct i915_engine_class_instance *master_engines;
> > +     uint32_t master;
> > +
> > +     /*
> > +      * I915_CONTEXT_PARAM_ENGINE provides an extension that allows us
> > +      * to specify which engine(s) to pair with a parallel (EXEC_SUBMIT)
> > +      * request submitted to another engine.
> > +      */
> > +
> > +     master = gem_queue_create(i915);
> > +
> > +     memset(bonds, 0, sizeof(bonds));
> > +     for (int n = 0; n < ARRAY_SIZE(bonds); n++) {
> > +             bonds[n].base.name = I915_CONTEXT_ENGINES_EXT_BOND;
> > +             bonds[n].base.next_extension =
> > +                     n ? to_user_pointer(&bonds[n - 1]) : 0;
> > +             bonds[n].num_bonds = 1;
> > +     }
> > +
> > +     for (int mask = 0; mask < 32; mask++) {
> 
> s/mask/class/
> 
> > +             unsigned int count, limit;
> > +             struct i915_engine_class_instance *siblings;
> > +             uint32_t ctx;
> > +             int n;
> > +
> > +             siblings = list_engines(i915, 1u << mask, &count);
> > +             if (!siblings)
> > +                     continue;
> > +
> > +             if (count < 2) {
> > +                     free(siblings);
> > +                     continue;
> > +             }
> > +
> > +             igt_debug("Found %d engines of class %d\n", count, mask);
> > +
> > +             master_engines = list_engines(i915, ~(1u << mask), &limit);
> > +             set_engines(i915, master, master_engines, limit);
> > +
> > +             limit = min(count, limit);
> 
> igt_assert(limit <= ARRAY_SIZE(bonds);
> 
> > +             for (n = 0; n < limit; n++) {
> > +                     bonds[n].master = master_engines[n];
> > +                     bonds[n].engines[0] = siblings[n];
> > +             }
> > +
> > +             ctx = gem_context_clone(i915,
> > +                                     master, I915_CONTEXT_CLONE_VM,
> > +                                     I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> > +             set_load_balancer(i915, ctx, siblings, count, &bonds[limit - 1]);
> > +
> > +             for (n = 0; n < limit; n++) {
> > +                     struct drm_i915_gem_execbuffer2 eb;
> > +                     IGT_CORK_HANDLE(cork);
> > +                     igt_spin_t *spin, *plug;
> > +                     double load;
> > +                     int pmu;
> > +
> > +                     igt_assert(siblings[n].engine_class != master_engines[n].engine_class);
> > +
> > +                     pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(siblings[n].engine_class,
> > +                                                               siblings[n].engine_instance));
> > +
> > +                     plug = NULL;
> > +                     if (flags & CORK) {
> > +                             plug = __igt_spin_new(i915,
> > +                                                   .ctx = master,
> > +                                                   .engine = n,
> > +                                                   .dependency = igt_cork_plug(&cork, i915));
> > +                     }
> > +
> > +                     spin = __igt_spin_new(i915,
> > +                                           .ctx = master,
> > +                                           .engine = n,
> > +                                           .flags = IGT_SPIN_FENCE_OUT);
> > +
> > +                     eb = spin->execbuf;
> > +                     eb.rsvd1 = ctx;
> > +                     eb.rsvd2 = spin->out_fence;
> > +                     eb.flags = I915_EXEC_FENCE_SUBMIT;
> > +                     gem_execbuf(i915, &eb);
> > +
> > +                     if (plug) {
> > +                             igt_cork_unplug(&cork);
> > +                             igt_spin_free(i915, plug);
> > +                     }
> > +
> > +                     load = measure_load(pmu, 10000);
> > +                     igt_spin_free(i915, spin);
> > +
> > +                     close(pmu);
> > +
> > +                     igt_assert_f(load > 0.90,
> > +                                  "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
> > +                                  n, siblings[n].engine_class, siblings[n].engine_instance,
> > +                                  load*100);
> 
> Master also needs to be checked I think. You have the infrastructure to 
> open two pmus in the previous patch so should be easy.

Haven't we checked precisely that in earlier tests? What would perhaps
be fairer here would be to verify the other engine was idle, otherwise
we could say we fluked it. Furthermore, we should repeat a few times
with say (0, 1), (0, 1), (1, 0), (1, 0) to further rule out flukes, and
then to finish with a random smoketest of some description.

Perhaps even a test is closer to the typical workload would involve
semaphore communication across the bond. But I don't know a way in which
I can determine which engine I am on in order to record that from the
GPU itself.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 14/16] i915/gem_exec_balancer: Exercise bonded pairs
@ 2019-05-15 19:57       ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-15 19:57 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2019-05-15 11:58:20)
> 
> On 08/05/2019 11:09, Chris Wilson wrote:
> > The submit-fence + load_balancing apis allow for us to execute a named
> > pair of engines in parallel; that this by submitting a request to one
> > engine, we can then use the generated submit-fence to submit a second
> > request to another engine and have it execute at the same time.
> > Furthermore, by specifying bonded pairs, we can direct the virtual
> > engine to use a particular engine in parallel to the first request.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   tests/i915/gem_exec_balancer.c | 234 +++++++++++++++++++++++++++++++--
> >   1 file changed, 224 insertions(+), 10 deletions(-)
> > 
> > diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
> > index 25195d478..20ad66727 100644
> > --- a/tests/i915/gem_exec_balancer.c
> > +++ b/tests/i915/gem_exec_balancer.c
> > @@ -98,9 +98,35 @@ list_engines(int i915, uint32_t class_mask, unsigned int *out)
> >       return engines;
> >   }
> >   
> > +static int __set_engines(int i915, uint32_t ctx,
> > +                      const struct i915_engine_class_instance *ci,
> > +                      unsigned int count)
> > +{
> > +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, count);
> > +     struct drm_i915_gem_context_param p = {
> > +             .ctx_id = ctx,
> > +             .param = I915_CONTEXT_PARAM_ENGINES,
> > +             .size = sizeof(engines),
> > +             .value = to_user_pointer(&engines)
> > +     };
> > +
> > +     engines.extensions = 0;
> > +     memcpy(engines.engines, ci, sizeof(engines.engines));
> > +
> > +     return __gem_context_set_param(i915, &p);
> > +}
> > +
> > +static void set_engines(int i915, uint32_t ctx,
> > +                     const struct i915_engine_class_instance *ci,
> > +                     unsigned int count)
> > +{
> > +     igt_assert_eq(__set_engines(i915, ctx, ci, count), 0);
> > +}
> > +
> >   static int __set_load_balancer(int i915, uint32_t ctx,
> >                              const struct i915_engine_class_instance *ci,
> > -                            unsigned int count)
> > +                            unsigned int count,
> > +                            void *ext)
> >   {
> >       I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, count);
> >       I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1 + count);
> > @@ -113,6 +139,7 @@ static int __set_load_balancer(int i915, uint32_t ctx,
> >   
> >       memset(&balancer, 0, sizeof(balancer));
> >       balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> > +     balancer.base.next_extension = to_user_pointer(ext);
> >   
> >       igt_assert(count);
> >       balancer.num_siblings = count;
> > @@ -131,9 +158,10 @@ static int __set_load_balancer(int i915, uint32_t ctx,
> >   
> >   static void set_load_balancer(int i915, uint32_t ctx,
> >                             const struct i915_engine_class_instance *ci,
> > -                           unsigned int count)
> > +                           unsigned int count,
> > +                           void *ext)
> >   {
> > -     igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0);
> > +     igt_assert_eq(__set_load_balancer(i915, ctx, ci, count, ext), 0);
> >   }
> >   
> >   static uint32_t load_balancer_create(int i915,
> > @@ -143,7 +171,7 @@ static uint32_t load_balancer_create(int i915,
> >       uint32_t ctx;
> >   
> >       ctx = gem_context_create(i915);
> > -     set_load_balancer(i915, ctx, ci, count);
> > +     set_load_balancer(i915, ctx, ci, count, NULL);
> >   
> >       return ctx;
> >   }
> > @@ -288,6 +316,74 @@ static void invalid_balancer(int i915)
> >       }
> >   }
> >   
> > +static void invalid_bonds(int i915)
> > +{
> > +     I915_DEFINE_CONTEXT_ENGINES_BOND(bonds[16], 1);
> > +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1);
> > +     struct drm_i915_gem_context_param p = {
> > +             .ctx_id = gem_context_create(i915),
> > +             .param = I915_CONTEXT_PARAM_ENGINES,
> > +             .value = to_user_pointer(&engines),
> > +             .size = sizeof(engines),
> > +     };
> > +     uint32_t handle;
> > +     void *ptr;
> > +
> > +     memset(&engines, 0, sizeof(engines));
> > +     gem_context_set_param(i915, &p);
> > +
> > +     memset(bonds, 0, sizeof(bonds));
> > +     for (int n = 0; n < ARRAY_SIZE(bonds); n++) {
> > +             bonds[n].base.name = I915_CONTEXT_ENGINES_EXT_BOND;
> > +             bonds[n].base.next_extension =
> > +                     n ? to_user_pointer(&bonds[n - 1]) : 0;
> > +             bonds[n].num_bonds = 1;
> > +     }
> > +     engines.extensions = to_user_pointer(&bonds);
> > +     gem_context_set_param(i915, &p);
> > +
> > +     bonds[0].base.next_extension = -1ull;
> > +     igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +
> > +     bonds[0].base.next_extension = to_user_pointer(&bonds[0]);
> > +     igt_assert_eq(__gem_context_set_param(i915, &p), -E2BIG);
> > +
> > +     engines.extensions = to_user_pointer(&bonds[1]);
> > +     igt_assert_eq(__gem_context_set_param(i915, &p), -E2BIG);
> > +     bonds[0].base.next_extension = 0;
> > +     gem_context_set_param(i915, &p);
> > +
> > +     handle = gem_create(i915, 4096 * 3);
> > +     ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE);
> > +     gem_close(i915, handle);
> > +
> > +     memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
> > +     engines.extensions = to_user_pointer(ptr) + 4096;
> > +     gem_context_set_param(i915, &p);
> > +
> > +     memcpy(ptr, &bonds[0], sizeof(bonds[0]));
> > +     bonds[0].base.next_extension = to_user_pointer(ptr);
> > +     memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
> > +     gem_context_set_param(i915, &p);
> > +
> > +     munmap(ptr, 4096);
> > +     igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +
> > +     bonds[0].base.next_extension = 0;
> > +     memcpy(ptr + 8192, &bonds[0], sizeof(bonds[0]));
> > +     bonds[0].base.next_extension = to_user_pointer(ptr) + 8192;
> > +     memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
> > +     gem_context_set_param(i915, &p);
> > +
> > +     munmap(ptr + 8192, 4096);
> > +     igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +
> > +     munmap(ptr + 4096, 4096);
> > +     igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
> > +
> > +     gem_context_destroy(i915, p.ctx_id);
> > +}
> > +
> >   static void kick_kthreads(int period_us)
> >   {
> >       sched_yield();
> > @@ -397,7 +493,7 @@ static void individual(int i915)
> >   
> >               for (int pass = 0; pass < count; pass++) { /* approx. count! */
> >                       igt_permute_array(ci, count, igt_exchange_int64);
> > -                     set_load_balancer(i915, ctx, ci, count);
> > +                     set_load_balancer(i915, ctx, ci, count, NULL);
> >                       for (unsigned int n = 0; n < count; n++)
> >                               check_individual_engine(i915, ctx, ci, n);
> >               }
> > @@ -409,6 +505,115 @@ static void individual(int i915)
> >       gem_quiescent_gpu(i915);
> >   }
> >   
> > +static void bonded(int i915, unsigned int flags)
> > +#define CORK 0x1
> > +{
> > +     I915_DEFINE_CONTEXT_ENGINES_BOND(bonds[16], 1);
> > +     struct i915_engine_class_instance *master_engines;
> > +     uint32_t master;
> > +
> > +     /*
> > +      * I915_CONTEXT_PARAM_ENGINE provides an extension that allows us
> > +      * to specify which engine(s) to pair with a parallel (EXEC_SUBMIT)
> > +      * request submitted to another engine.
> > +      */
> > +
> > +     master = gem_queue_create(i915);
> > +
> > +     memset(bonds, 0, sizeof(bonds));
> > +     for (int n = 0; n < ARRAY_SIZE(bonds); n++) {
> > +             bonds[n].base.name = I915_CONTEXT_ENGINES_EXT_BOND;
> > +             bonds[n].base.next_extension =
> > +                     n ? to_user_pointer(&bonds[n - 1]) : 0;
> > +             bonds[n].num_bonds = 1;
> > +     }
> > +
> > +     for (int mask = 0; mask < 32; mask++) {
> 
> s/mask/class/
> 
> > +             unsigned int count, limit;
> > +             struct i915_engine_class_instance *siblings;
> > +             uint32_t ctx;
> > +             int n;
> > +
> > +             siblings = list_engines(i915, 1u << mask, &count);
> > +             if (!siblings)
> > +                     continue;
> > +
> > +             if (count < 2) {
> > +                     free(siblings);
> > +                     continue;
> > +             }
> > +
> > +             igt_debug("Found %d engines of class %d\n", count, mask);
> > +
> > +             master_engines = list_engines(i915, ~(1u << mask), &limit);
> > +             set_engines(i915, master, master_engines, limit);
> > +
> > +             limit = min(count, limit);
> 
> igt_assert(limit <= ARRAY_SIZE(bonds);
> 
> > +             for (n = 0; n < limit; n++) {
> > +                     bonds[n].master = master_engines[n];
> > +                     bonds[n].engines[0] = siblings[n];
> > +             }
> > +
> > +             ctx = gem_context_clone(i915,
> > +                                     master, I915_CONTEXT_CLONE_VM,
> > +                                     I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
> > +             set_load_balancer(i915, ctx, siblings, count, &bonds[limit - 1]);
> > +
> > +             for (n = 0; n < limit; n++) {
> > +                     struct drm_i915_gem_execbuffer2 eb;
> > +                     IGT_CORK_HANDLE(cork);
> > +                     igt_spin_t *spin, *plug;
> > +                     double load;
> > +                     int pmu;
> > +
> > +                     igt_assert(siblings[n].engine_class != master_engines[n].engine_class);
> > +
> > +                     pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(siblings[n].engine_class,
> > +                                                               siblings[n].engine_instance));
> > +
> > +                     plug = NULL;
> > +                     if (flags & CORK) {
> > +                             plug = __igt_spin_new(i915,
> > +                                                   .ctx = master,
> > +                                                   .engine = n,
> > +                                                   .dependency = igt_cork_plug(&cork, i915));
> > +                     }
> > +
> > +                     spin = __igt_spin_new(i915,
> > +                                           .ctx = master,
> > +                                           .engine = n,
> > +                                           .flags = IGT_SPIN_FENCE_OUT);
> > +
> > +                     eb = spin->execbuf;
> > +                     eb.rsvd1 = ctx;
> > +                     eb.rsvd2 = spin->out_fence;
> > +                     eb.flags = I915_EXEC_FENCE_SUBMIT;
> > +                     gem_execbuf(i915, &eb);
> > +
> > +                     if (plug) {
> > +                             igt_cork_unplug(&cork);
> > +                             igt_spin_free(i915, plug);
> > +                     }
> > +
> > +                     load = measure_load(pmu, 10000);
> > +                     igt_spin_free(i915, spin);
> > +
> > +                     close(pmu);
> > +
> > +                     igt_assert_f(load > 0.90,
> > +                                  "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
> > +                                  n, siblings[n].engine_class, siblings[n].engine_instance,
> > +                                  load*100);
> 
> Master also needs to be checked I think. You have the infrastructure to 
> open two pmus in the previous patch so should be easy.

Haven't we checked precisely that in earlier tests? What would perhaps
be fairer here would be to verify the other engine was idle, otherwise
we could say we fluked it. Furthermore, we should repeat a few times
with say (0, 1), (0, 1), (1, 0), (1, 0) to further rule out flukes, and
then to finish with a random smoketest of some description.

Perhaps even a test is closer to the typical workload would involve
semaphore communication across the bond. But I don't know a way in which
I can determine which engine I am on in order to record that from the
GPU itself.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 14/16] i915/gem_exec_balancer: Exercise bonded pairs
  2019-05-15 19:57       ` [igt-dev] " Chris Wilson
@ 2019-05-15 20:32         ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-15 20:32 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Chris Wilson (2019-05-15 20:57:18)
> Quoting Tvrtko Ursulin (2019-05-15 11:58:20)
> > 
> > On 08/05/2019 11:09, Chris Wilson wrote:
> > > +                     igt_assert_f(load > 0.90,
> > > +                                  "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
> > > +                                  n, siblings[n].engine_class, siblings[n].engine_instance,
> > > +                                  load*100);
> > 
> > Master also needs to be checked I think. You have the infrastructure to 
> > open two pmus in the previous patch so should be easy.
> 
> Haven't we checked precisely that in earlier tests? What would perhaps
> be fairer here would be to verify the other engine was idle, otherwise
> we could say we fluked it. Furthermore, we should repeat a few times
> with say (0, 1), (0, 1), (1, 0), (1, 0) to further rule out flukes, and
> then to finish with a random smoketest of some description.
> 
> Perhaps even a test is closer to the typical workload would involve
> semaphore communication across the bond. But I don't know a way in which
> I can determine which engine I am on in order to record that from the
> GPU itself.

To remind myself, the importance here is on uABI stressing, it's is much
easier to prove the relationship in the kernel and that is where we do.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 14/16] i915/gem_exec_balancer: Exercise bonded pairs
@ 2019-05-15 20:32         ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-15 20:32 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Chris Wilson (2019-05-15 20:57:18)
> Quoting Tvrtko Ursulin (2019-05-15 11:58:20)
> > 
> > On 08/05/2019 11:09, Chris Wilson wrote:
> > > +                     igt_assert_f(load > 0.90,
> > > +                                  "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
> > > +                                  n, siblings[n].engine_class, siblings[n].engine_instance,
> > > +                                  load*100);
> > 
> > Master also needs to be checked I think. You have the infrastructure to 
> > open two pmus in the previous patch so should be easy.
> 
> Haven't we checked precisely that in earlier tests? What would perhaps
> be fairer here would be to verify the other engine was idle, otherwise
> we could say we fluked it. Furthermore, we should repeat a few times
> with say (0, 1), (0, 1), (1, 0), (1, 0) to further rule out flukes, and
> then to finish with a random smoketest of some description.
> 
> Perhaps even a test is closer to the typical workload would involve
> semaphore communication across the bond. But I don't know a way in which
> I can determine which engine I am on in order to record that from the
> GPU itself.

To remind myself, the importance here is on uABI stressing, it's is much
easier to prove the relationship in the kernel and that is where we do.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 05/16] i915/gem_ctx_create: Basic checks for constructor properties
  2019-05-15 19:05       ` [igt-dev] " Chris Wilson
@ 2019-05-16  8:38         ` Tvrtko Ursulin
  -1 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-16  8:38 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 15/05/2019 20:05, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-14 11:15:12)
>>
>> On 08/05/2019 11:09, Chris Wilson wrote:
>>> Check that the extended create interface accepts setparam.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    tests/i915/gem_ctx_create.c | 225 ++++++++++++++++++++++++++++++++++--
>>>    1 file changed, 213 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/tests/i915/gem_ctx_create.c b/tests/i915/gem_ctx_create.c
>>> index a664070db..9b4fddbe7 100644
>>> --- a/tests/i915/gem_ctx_create.c
>>> +++ b/tests/i915/gem_ctx_create.c
>>> @@ -33,6 +33,7 @@
>>>    #include <time.h>
>>>    
>>>    #include "igt_rand.h"
>>> +#include "sw_sync.h"
>>>    
>>>    #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
>>>    #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
>>> @@ -45,12 +46,33 @@ static unsigned all_nengine;
>>>    static unsigned ppgtt_engines[16];
>>>    static unsigned ppgtt_nengine;
>>>    
>>> -static int __gem_context_create_local(int fd, struct drm_i915_gem_context_create *arg)
>>> +static int create_ioctl(int fd, struct drm_i915_gem_context_create *arg)
>>>    {
>>> -     int ret = 0;
>>> -     if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg))
>>> -             ret = -errno;
>>> -     return ret;
>>> +     int err;
>>> +
>>> +     err = 0;
>>> +     if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg)) {
>>> +             err = -errno;
>>> +             igt_assert(err);
>>> +     }
>>> +
>>> +     errno = 0;
>>> +     return err;
>>> +}
>>> +
>>> +static int create_ext_ioctl(int i915,
>>> +                         struct drm_i915_gem_context_create_ext *arg)
>>> +{
>>> +     int err;
>>> +
>>> +     err = 0;
>>> +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
>>> +             err = -errno;
>>> +             igt_assume(err);
>>> +     }
>>> +
>>> +     errno = 0;
>>> +     return err;
>>>    }
>>>    
>>>    static double elapsed(const struct timespec *start,
>>> @@ -308,6 +330,187 @@ static void maximum(int fd, int ncpus, unsigned mode)
>>>        free(contexts);
>>>    }
>>>    
>>> +static void basic_ext_param(int i915)
>>> +{
>>> +     struct drm_i915_gem_context_create_ext_setparam ext = {
>>> +             { .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
>>> +     };
>>> +     struct drm_i915_gem_context_create_ext create = {
>>> +             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS
>>> +     };
>>> +     struct drm_i915_gem_context_param get;
>>> +
>>> +     igt_require(create_ext_ioctl(i915, &create) == 0);
>>> +     gem_context_destroy(i915, create.ctx_id);
>>> +
>>> +     create.extensions = -1ull;
>>> +     igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
>>> +
>>> +     create.extensions = to_user_pointer(&ext);
>>> +     igt_assert_eq(create_ext_ioctl(i915, &create), -EINVAL);
>>> +
>>> +     ext.param.param = I915_CONTEXT_PARAM_PRIORITY;
>>> +     if (create_ext_ioctl(i915, &create) != -ENODEV) {
>>> +             gem_context_destroy(i915, create.ctx_id);
>>> +
>>> +             ext.base.next_extension = -1ull;
>>> +             igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
>>> +             ext.base.next_extension = to_user_pointer(&ext);
>>> +             igt_assert_eq(create_ext_ioctl(i915, &create), -E2BIG);
>>> +             ext.base.next_extension = 0;
>>> +
>>> +             ext.param.value = 32;
>>> +             igt_assert_eq(create_ext_ioctl(i915, &create), 0);
>>> +
>>> +             memset(&get, 0, sizeof(get));
>>> +             get.ctx_id = create.ctx_id;
>>> +             get.param = I915_CONTEXT_PARAM_PRIORITY;
>>> +             gem_context_get_param(i915, &get);
>>> +             igt_assert_eq(get.value, ext.param.value);
>>> +
>>> +             gem_context_destroy(i915, create.ctx_id);
>>> +     }
>>> +}
>>> +
>>> +static void check_single_timeline(int i915, uint32_t ctx, int num_engines)
>>> +{
>>> +#define RCS_TIMESTAMP (0x2000 + 0x358)
>>> +     const int gen = intel_gen(intel_get_drm_devid(i915));
>>> +     const int has_64bit_reloc = gen >= 8;
>>> +     struct drm_i915_gem_exec_object2 results = { .handle = gem_create(i915, 4096) };
>>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +     int timeline = sw_sync_timeline_create();
>>> +     uint32_t last, *map;
>>> +
>>> +     {
>>> +             struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                     .buffers_ptr = to_user_pointer(&results),
>>> +                     .buffer_count = 1,
>>> +                     .rsvd1 = ctx,
>>> +             };
>>> +             gem_write(i915, results.handle, 0, &bbe, sizeof(bbe));
>>> +             gem_execbuf(i915, &execbuf);
>>> +             results.flags = EXEC_OBJECT_PINNED;
>>> +     }
>>> +
>>> +     for (int i = 0; i < num_engines; i++) {
>>> +             struct drm_i915_gem_exec_object2 obj[2] = {
>>> +                     results, /* write hazard lies! */
>>> +                     { .handle = gem_create(i915, 4096) },
>>> +             };
>>> +             struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                     .buffers_ptr = to_user_pointer(obj),
>>> +                     .buffer_count = 2,
>>> +                     .rsvd1 = ctx,
>>> +                     .rsvd2 = sw_sync_timeline_create_fence(timeline, num_engines - i),
>>> +                     .flags = i | I915_EXEC_FENCE_IN,
>>> +             };
>>> +             uint64_t offset = results.offset + 4 * i;
>>> +             uint32_t *cs;
>>> +             int j = 0;
>>> +
>>> +             cs = gem_mmap__cpu(i915, obj[1].handle, 0, 4096, PROT_WRITE);
>>> +
>>> +             cs[j] = 0x24 << 23 | 1; /* SRM */
>>> +             if (has_64bit_reloc)
>>> +                     cs[j]++;
>>> +             j++;
>>> +             cs[j++] = RCS_TIMESTAMP;
>>> +             cs[j++] = offset;
>>> +             if (has_64bit_reloc)
>>> +                     cs[j++] = offset >> 32;
>>> +             cs[j++] = MI_BATCH_BUFFER_END;
>>> +
>>> +             munmap(cs, 4096);
>>> +
>>> +             gem_execbuf(i915, &execbuf);
>>> +             gem_close(i915, obj[1].handle);
>>> +             close(execbuf.rsvd2);
>>> +     }
>>> +     close(timeline);
>>> +     gem_sync(i915, results.handle);
>>> +
>>> +     map = gem_mmap__cpu(i915, results.handle, 0, 4096, PROT_READ);
>>> +     gem_set_domain(i915, results.handle, I915_GEM_DOMAIN_CPU, 0);
>>> +     gem_close(i915, results.handle);
>>> +
>>> +     last = map[0];
>>> +     for (int i = 1; i < num_engines; i++) {
>>> +             igt_assert_f((map[i] - last) > 0,
>>> +                          "Engine instance [%d] executed too early: this:%x, last:%x\n",
>>> +                          i, map[i], last);
>>> +             last = map[i];
>>> +     }
>>
>> Hm.. aren't two sw fences (two seqnos) just a needless complication -
>> since the execution order in the single timeline is controlled by
>> submission order. The statement is true only when compounded with the
>> fact that you signal both fences at the same time. I am thinking that if
>> it wasn't a single timeline context what would happen. Fences would be
>> signaled in order, but execution does not have to happen in order. That
>> it does is a property of single timeline and not fence ordering. So two
>> input fences with two seqnos is misleading. Single plug would do I think
> 
> But that would not detect the case when it was multiple timelines...
>   
>> Or you are thinking to nudge the driver to do the right thing? But in
>> that case I think you'd need to manually advance the first seqno (2nd
>> batch) first and wait a bit to check it hasn't been execute. Then signal
>> the second seqno (first batch) and run the above check to see they have
>> been executed in order.
> 
> The challenge is that we detect if the driver uses 2 timelines instead
> of one. So that is what we setup to detect.

With a single seqno advance what determines the order of signal delivery 
on blocked fences? Is it defined in the dma-fence contract it happens in 
order? If it is, then is it defined that would map to in order 
submission in i915 (if the contexts/timelines were separate)? (Might not 
I am thinking, scheduler can decide whatever it wants.)

So I don't see a problem with being more explicit in this test and doing 
a step by step timeline advance so it is completely under test's control 
what's happening. And it would AFAICS detect the two timelines because 
it would expect first timeline advance must not result in request execution.

Regards,

Tvrtko


>>> +     munmap(map, 4096);
>>> +}
>>> +
>>> +static void iris_pipeline(int i915)
>>> +{
>>> +#ifdef I915_DEFINE_CONTEXT_PARAM_ENGINES
>>
>> Remove this I expect?
> 
> Depends on later header. Early plan was to have the bits and pieces
> added piecemeal, but then I decided to add a full feature test.
> 
>>> +#define RCS0 {0, 0}
>>> +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = {
>>> +             .engines = { RCS0, RCS0 }
>>> +     };
>>> +     struct drm_i915_gem_context_create_ext_setparam p_engines = {
>>> +             .base = {
>>> +                     .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
>>> +                     .next_extension = 0, /* end of chain */
>>> +             },
>>> +             .param = {
>>> +                     .param = I915_CONTEXT_PARAM_ENGINES,
>>> +                     .value = to_user_pointer(&engines),
>>> +                     .size = sizeof(engines),
>>> +             },
>>> +     };
>>> +     struct drm_i915_gem_context_create_ext_setparam p_recover = {
>>> +             .base = {
>>> +                     .name =I915_CONTEXT_CREATE_EXT_SETPARAM,
>>> +                     .next_extension = to_user_pointer(&p_engines),
>>> +             },
>>> +             .param = {
>>> +                     .param = I915_CONTEXT_PARAM_RECOVERABLE,
>>> +                     .value = 0,
>>> +             },
>>> +     };
>>> +     struct drm_i915_gem_context_create_ext_setparam p_prio = {
>>> +             .base = {
>>> +                     .name =I915_CONTEXT_CREATE_EXT_SETPARAM,
>>> +                     .next_extension = to_user_pointer(&p_recover),
>>> +             },
>>> +             .param = {
>>> +                     .param = I915_CONTEXT_PARAM_PRIORITY,
>>> +                     .value = 768,
>>> +             },
>>> +     };
>>> +     struct drm_i915_gem_context_create_ext create = {
>>> +             .flags = (I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE |
>>> +                       I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS),
>>> +     };
>>> +     struct drm_i915_gem_context_param get;
>>> +
>>> +     igt_require(create_ext_ioctl(i915, &create) == 0);
>>
>> Context destroy here I think.
>>
>>> +
>>> +     create.extensions = to_user_pointer(&p_prio);
>>> +     igt_assert_eq(create_ext_ioctl(i915, &create), 0);
>>> +
>>> +     memset(&get, 0, sizeof(get));
>>> +     get.ctx_id = create.ctx_id;
>>> +     get.param = I915_CONTEXT_PARAM_PRIORITY;
>>> +     gem_context_get_param(i915, &get);
>>> +     igt_assert_eq(get.value, p_prio.param.value);
>>> +
>>> +     memset(&get, 0, sizeof(get));
>>> +     get.ctx_id = create.ctx_id;
>>> +     get.param = I915_CONTEXT_PARAM_RECOVERABLE;
>>> +     gem_context_get_param(i915, &get);
>>> +     igt_assert_eq(get.value, 0);
>>> +
>>> +     check_single_timeline(i915, create.ctx_id, 2);
>>> +
>>> +     gem_context_destroy(i915, create.ctx_id);
>>> +#endif /* I915_DEFINE_CONTEXT_PARAM_ENGINES */
>>> +}
>>> +
>>>    igt_main
>>>    {
>>>        const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
>>> @@ -340,17 +543,15 @@ igt_main
>>>                memset(&create, 0, sizeof(create));
>>>                create.ctx_id = rand();
>>>                create.pad = 0;
>>> -             igt_assert_eq(__gem_context_create_local(fd, &create), 0);
>>> +             igt_assert_eq(create_ioctl(fd, &create), 0);
>>>                igt_assert(create.ctx_id != 0);
>>>                gem_context_destroy(fd, create.ctx_id);
>>>        }
>>>    
>>> -     igt_subtest("invalid-pad") {
>>> -             memset(&create, 0, sizeof(create));
>>> -             create.ctx_id = rand();
>>> -             create.pad = 1;
>>> -             igt_assert_eq(__gem_context_create_local(fd, &create), -EINVAL);
>>> -     }
>>> +     igt_subtest("ext-param")
>>> +             basic_ext_param(fd);
>>
>> basic-ext-param? Do we even rely on basic prefix these days?
> 
> basic test prefix is dead.
> -Chris
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 05/16] i915/gem_ctx_create: Basic checks for constructor properties
@ 2019-05-16  8:38         ` Tvrtko Ursulin
  0 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-16  8:38 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 15/05/2019 20:05, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-14 11:15:12)
>>
>> On 08/05/2019 11:09, Chris Wilson wrote:
>>> Check that the extended create interface accepts setparam.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    tests/i915/gem_ctx_create.c | 225 ++++++++++++++++++++++++++++++++++--
>>>    1 file changed, 213 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/tests/i915/gem_ctx_create.c b/tests/i915/gem_ctx_create.c
>>> index a664070db..9b4fddbe7 100644
>>> --- a/tests/i915/gem_ctx_create.c
>>> +++ b/tests/i915/gem_ctx_create.c
>>> @@ -33,6 +33,7 @@
>>>    #include <time.h>
>>>    
>>>    #include "igt_rand.h"
>>> +#include "sw_sync.h"
>>>    
>>>    #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
>>>    #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
>>> @@ -45,12 +46,33 @@ static unsigned all_nengine;
>>>    static unsigned ppgtt_engines[16];
>>>    static unsigned ppgtt_nengine;
>>>    
>>> -static int __gem_context_create_local(int fd, struct drm_i915_gem_context_create *arg)
>>> +static int create_ioctl(int fd, struct drm_i915_gem_context_create *arg)
>>>    {
>>> -     int ret = 0;
>>> -     if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg))
>>> -             ret = -errno;
>>> -     return ret;
>>> +     int err;
>>> +
>>> +     err = 0;
>>> +     if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg)) {
>>> +             err = -errno;
>>> +             igt_assert(err);
>>> +     }
>>> +
>>> +     errno = 0;
>>> +     return err;
>>> +}
>>> +
>>> +static int create_ext_ioctl(int i915,
>>> +                         struct drm_i915_gem_context_create_ext *arg)
>>> +{
>>> +     int err;
>>> +
>>> +     err = 0;
>>> +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
>>> +             err = -errno;
>>> +             igt_assume(err);
>>> +     }
>>> +
>>> +     errno = 0;
>>> +     return err;
>>>    }
>>>    
>>>    static double elapsed(const struct timespec *start,
>>> @@ -308,6 +330,187 @@ static void maximum(int fd, int ncpus, unsigned mode)
>>>        free(contexts);
>>>    }
>>>    
>>> +static void basic_ext_param(int i915)
>>> +{
>>> +     struct drm_i915_gem_context_create_ext_setparam ext = {
>>> +             { .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
>>> +     };
>>> +     struct drm_i915_gem_context_create_ext create = {
>>> +             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS
>>> +     };
>>> +     struct drm_i915_gem_context_param get;
>>> +
>>> +     igt_require(create_ext_ioctl(i915, &create) == 0);
>>> +     gem_context_destroy(i915, create.ctx_id);
>>> +
>>> +     create.extensions = -1ull;
>>> +     igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
>>> +
>>> +     create.extensions = to_user_pointer(&ext);
>>> +     igt_assert_eq(create_ext_ioctl(i915, &create), -EINVAL);
>>> +
>>> +     ext.param.param = I915_CONTEXT_PARAM_PRIORITY;
>>> +     if (create_ext_ioctl(i915, &create) != -ENODEV) {
>>> +             gem_context_destroy(i915, create.ctx_id);
>>> +
>>> +             ext.base.next_extension = -1ull;
>>> +             igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
>>> +             ext.base.next_extension = to_user_pointer(&ext);
>>> +             igt_assert_eq(create_ext_ioctl(i915, &create), -E2BIG);
>>> +             ext.base.next_extension = 0;
>>> +
>>> +             ext.param.value = 32;
>>> +             igt_assert_eq(create_ext_ioctl(i915, &create), 0);
>>> +
>>> +             memset(&get, 0, sizeof(get));
>>> +             get.ctx_id = create.ctx_id;
>>> +             get.param = I915_CONTEXT_PARAM_PRIORITY;
>>> +             gem_context_get_param(i915, &get);
>>> +             igt_assert_eq(get.value, ext.param.value);
>>> +
>>> +             gem_context_destroy(i915, create.ctx_id);
>>> +     }
>>> +}
>>> +
>>> +static void check_single_timeline(int i915, uint32_t ctx, int num_engines)
>>> +{
>>> +#define RCS_TIMESTAMP (0x2000 + 0x358)
>>> +     const int gen = intel_gen(intel_get_drm_devid(i915));
>>> +     const int has_64bit_reloc = gen >= 8;
>>> +     struct drm_i915_gem_exec_object2 results = { .handle = gem_create(i915, 4096) };
>>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +     int timeline = sw_sync_timeline_create();
>>> +     uint32_t last, *map;
>>> +
>>> +     {
>>> +             struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                     .buffers_ptr = to_user_pointer(&results),
>>> +                     .buffer_count = 1,
>>> +                     .rsvd1 = ctx,
>>> +             };
>>> +             gem_write(i915, results.handle, 0, &bbe, sizeof(bbe));
>>> +             gem_execbuf(i915, &execbuf);
>>> +             results.flags = EXEC_OBJECT_PINNED;
>>> +     }
>>> +
>>> +     for (int i = 0; i < num_engines; i++) {
>>> +             struct drm_i915_gem_exec_object2 obj[2] = {
>>> +                     results, /* write hazard lies! */
>>> +                     { .handle = gem_create(i915, 4096) },
>>> +             };
>>> +             struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                     .buffers_ptr = to_user_pointer(obj),
>>> +                     .buffer_count = 2,
>>> +                     .rsvd1 = ctx,
>>> +                     .rsvd2 = sw_sync_timeline_create_fence(timeline, num_engines - i),
>>> +                     .flags = i | I915_EXEC_FENCE_IN,
>>> +             };
>>> +             uint64_t offset = results.offset + 4 * i;
>>> +             uint32_t *cs;
>>> +             int j = 0;
>>> +
>>> +             cs = gem_mmap__cpu(i915, obj[1].handle, 0, 4096, PROT_WRITE);
>>> +
>>> +             cs[j] = 0x24 << 23 | 1; /* SRM */
>>> +             if (has_64bit_reloc)
>>> +                     cs[j]++;
>>> +             j++;
>>> +             cs[j++] = RCS_TIMESTAMP;
>>> +             cs[j++] = offset;
>>> +             if (has_64bit_reloc)
>>> +                     cs[j++] = offset >> 32;
>>> +             cs[j++] = MI_BATCH_BUFFER_END;
>>> +
>>> +             munmap(cs, 4096);
>>> +
>>> +             gem_execbuf(i915, &execbuf);
>>> +             gem_close(i915, obj[1].handle);
>>> +             close(execbuf.rsvd2);
>>> +     }
>>> +     close(timeline);
>>> +     gem_sync(i915, results.handle);
>>> +
>>> +     map = gem_mmap__cpu(i915, results.handle, 0, 4096, PROT_READ);
>>> +     gem_set_domain(i915, results.handle, I915_GEM_DOMAIN_CPU, 0);
>>> +     gem_close(i915, results.handle);
>>> +
>>> +     last = map[0];
>>> +     for (int i = 1; i < num_engines; i++) {
>>> +             igt_assert_f((map[i] - last) > 0,
>>> +                          "Engine instance [%d] executed too early: this:%x, last:%x\n",
>>> +                          i, map[i], last);
>>> +             last = map[i];
>>> +     }
>>
>> Hm.. aren't two sw fences (two seqnos) just a needless complication -
>> since the execution order in the single timeline is controlled by
>> submission order. The statement is true only when compounded with the
>> fact that you signal both fences at the same time. I am thinking that if
>> it wasn't a single timeline context what would happen. Fences would be
>> signaled in order, but execution does not have to happen in order. That
>> it does is a property of single timeline and not fence ordering. So two
>> input fences with two seqnos is misleading. Single plug would do I think
> 
> But that would not detect the case when it was multiple timelines...
>   
>> Or you are thinking to nudge the driver to do the right thing? But in
>> that case I think you'd need to manually advance the first seqno (2nd
>> batch) first and wait a bit to check it hasn't been execute. Then signal
>> the second seqno (first batch) and run the above check to see they have
>> been executed in order.
> 
> The challenge is that we detect if the driver uses 2 timelines instead
> of one. So that is what we setup to detect.

With a single seqno advance what determines the order of signal delivery 
on blocked fences? Is it defined in the dma-fence contract it happens in 
order? If it is, then is it defined that would map to in order 
submission in i915 (if the contexts/timelines were separate)? (Might not 
I am thinking, scheduler can decide whatever it wants.)

So I don't see a problem with being more explicit in this test and doing 
a step by step timeline advance so it is completely under test's control 
what's happening. And it would AFAICS detect the two timelines because 
it would expect first timeline advance must not result in request execution.

Regards,

Tvrtko


>>> +     munmap(map, 4096);
>>> +}
>>> +
>>> +static void iris_pipeline(int i915)
>>> +{
>>> +#ifdef I915_DEFINE_CONTEXT_PARAM_ENGINES
>>
>> Remove this I expect?
> 
> Depends on later header. Early plan was to have the bits and pieces
> added piecemeal, but then I decided to add a full feature test.
> 
>>> +#define RCS0 {0, 0}
>>> +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = {
>>> +             .engines = { RCS0, RCS0 }
>>> +     };
>>> +     struct drm_i915_gem_context_create_ext_setparam p_engines = {
>>> +             .base = {
>>> +                     .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
>>> +                     .next_extension = 0, /* end of chain */
>>> +             },
>>> +             .param = {
>>> +                     .param = I915_CONTEXT_PARAM_ENGINES,
>>> +                     .value = to_user_pointer(&engines),
>>> +                     .size = sizeof(engines),
>>> +             },
>>> +     };
>>> +     struct drm_i915_gem_context_create_ext_setparam p_recover = {
>>> +             .base = {
>>> +                     .name =I915_CONTEXT_CREATE_EXT_SETPARAM,
>>> +                     .next_extension = to_user_pointer(&p_engines),
>>> +             },
>>> +             .param = {
>>> +                     .param = I915_CONTEXT_PARAM_RECOVERABLE,
>>> +                     .value = 0,
>>> +             },
>>> +     };
>>> +     struct drm_i915_gem_context_create_ext_setparam p_prio = {
>>> +             .base = {
>>> +                     .name =I915_CONTEXT_CREATE_EXT_SETPARAM,
>>> +                     .next_extension = to_user_pointer(&p_recover),
>>> +             },
>>> +             .param = {
>>> +                     .param = I915_CONTEXT_PARAM_PRIORITY,
>>> +                     .value = 768,
>>> +             },
>>> +     };
>>> +     struct drm_i915_gem_context_create_ext create = {
>>> +             .flags = (I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE |
>>> +                       I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS),
>>> +     };
>>> +     struct drm_i915_gem_context_param get;
>>> +
>>> +     igt_require(create_ext_ioctl(i915, &create) == 0);
>>
>> Context destroy here I think.
>>
>>> +
>>> +     create.extensions = to_user_pointer(&p_prio);
>>> +     igt_assert_eq(create_ext_ioctl(i915, &create), 0);
>>> +
>>> +     memset(&get, 0, sizeof(get));
>>> +     get.ctx_id = create.ctx_id;
>>> +     get.param = I915_CONTEXT_PARAM_PRIORITY;
>>> +     gem_context_get_param(i915, &get);
>>> +     igt_assert_eq(get.value, p_prio.param.value);
>>> +
>>> +     memset(&get, 0, sizeof(get));
>>> +     get.ctx_id = create.ctx_id;
>>> +     get.param = I915_CONTEXT_PARAM_RECOVERABLE;
>>> +     gem_context_get_param(i915, &get);
>>> +     igt_assert_eq(get.value, 0);
>>> +
>>> +     check_single_timeline(i915, create.ctx_id, 2);
>>> +
>>> +     gem_context_destroy(i915, create.ctx_id);
>>> +#endif /* I915_DEFINE_CONTEXT_PARAM_ENGINES */
>>> +}
>>> +
>>>    igt_main
>>>    {
>>>        const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
>>> @@ -340,17 +543,15 @@ igt_main
>>>                memset(&create, 0, sizeof(create));
>>>                create.ctx_id = rand();
>>>                create.pad = 0;
>>> -             igt_assert_eq(__gem_context_create_local(fd, &create), 0);
>>> +             igt_assert_eq(create_ioctl(fd, &create), 0);
>>>                igt_assert(create.ctx_id != 0);
>>>                gem_context_destroy(fd, create.ctx_id);
>>>        }
>>>    
>>> -     igt_subtest("invalid-pad") {
>>> -             memset(&create, 0, sizeof(create));
>>> -             create.ctx_id = rand();
>>> -             create.pad = 1;
>>> -             igt_assert_eq(__gem_context_create_local(fd, &create), -EINVAL);
>>> -     }
>>> +     igt_subtest("ext-param")
>>> +             basic_ext_param(fd);
>>
>> basic-ext-param? Do we even rely on basic prefix these days?
> 
> basic test prefix is dead.
> -Chris
> 
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 08/16] i915: Exercise creating context with shared GTT
  2019-05-15 19:33       ` [igt-dev] " Chris Wilson
@ 2019-05-16  8:51         ` Tvrtko Ursulin
  -1 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-16  8:51 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 15/05/2019 20:33, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-15 07:37:18)
>>
>> On 08/05/2019 11:09, Chris Wilson wrote:
>>> v2: Test each shared context is its own timeline and allows request
>>> reordering between shared contexts.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>>> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
>>> ---
>>>    lib/i915/gem_context.c        |  68 +++
>>>    lib/i915/gem_context.h        |  13 +
>>>    tests/Makefile.sources        |   1 +
>>>    tests/i915/gem_ctx_shared.c   | 856 ++++++++++++++++++++++++++++++++++
>>>    tests/i915/gem_exec_whisper.c |  32 +-
>>>    tests/meson.build             |   1 +
>>>    6 files changed, 962 insertions(+), 9 deletions(-)
>>>    create mode 100644 tests/i915/gem_ctx_shared.c
>>>
>>> diff --git a/lib/i915/gem_context.c b/lib/i915/gem_context.c
>>> index f94d89cb4..8fb8984d1 100644
>>> --- a/lib/i915/gem_context.c
>>> +++ b/lib/i915/gem_context.c
>>> @@ -272,6 +272,74 @@ void gem_context_set_priority(int fd, uint32_t ctx_id, int prio)
>>>        igt_assert_eq(__gem_context_set_priority(fd, ctx_id, prio), 0);
>>>    }
>>>    
>>> +int
>>> +__gem_context_clone(int i915,
>>> +                 uint32_t src, unsigned int share,
>>> +                 unsigned int flags,
>>> +                 uint32_t *out)
>>> +{
>>> +     struct drm_i915_gem_context_create_ext_clone clone = {
>>> +             { .name = I915_CONTEXT_CREATE_EXT_CLONE },
>>> +             .clone_id = src,
>>> +             .flags = share,
>>> +     };
>>> +     struct drm_i915_gem_context_create_ext arg = {
>>> +             .flags = flags | I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
>>> +             .extensions = to_user_pointer(&clone),
>>> +     };
>>> +     int err = 0;
>>> +
>>> +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, &arg))
>>> +             err = -errno;
>>> +
>>> +     *out = arg.ctx_id;
>>> +
>>> +     errno = 0;
>>> +     return err;
>>> +}
>>> +
>>> +static bool __gem_context_has(int i915, uint32_t share, unsigned int flags)
>>> +{
>>> +     uint32_t ctx;
>>> +
>>> +     __gem_context_clone(i915, 0, share, flags, &ctx);
>>> +     if (ctx)
>>> +             gem_context_destroy(i915, ctx);
>>> +
>>> +     errno = 0;
>>> +     return ctx;
>>> +}
>>> +
>>> +bool gem_contexts_has_shared_gtt(int i915)
>>> +{
>>> +     return __gem_context_has(i915, I915_CONTEXT_CLONE_VM, 0);
>>> +}
>>> +
>>> +bool gem_has_queues(int i915)
>>> +{
>>> +     return __gem_context_has(i915,
>>> +                              I915_CONTEXT_CLONE_VM,
>>> +                              I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
>>> +}
>>> +
>>> +uint32_t gem_context_clone(int i915,
>>> +                        uint32_t src, unsigned int share,
>>> +                        unsigned int flags)
>>> +{
>>> +     uint32_t ctx;
>>> +
>>> +     igt_assert_eq(__gem_context_clone(i915, src, share, flags, &ctx), 0);
>>> +
>>> +     return ctx;
>>> +}
>>> +
>>> +uint32_t gem_queue_create(int i915)
>>> +{
>>> +     return gem_context_clone(i915, 0,
>>> +                              I915_CONTEXT_CLONE_VM,
>>> +                              I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
>>> +}
>>> +
>>>    bool gem_context_has_engine(int fd, uint32_t ctx, uint64_t engine)
>>>    {
>>>        struct drm_i915_gem_exec_object2 exec = {};
>>> diff --git a/lib/i915/gem_context.h b/lib/i915/gem_context.h
>>> index a052714d4..8043c3401 100644
>>> --- a/lib/i915/gem_context.h
>>> +++ b/lib/i915/gem_context.h
>>> @@ -29,6 +29,19 @@ int __gem_context_create(int fd, uint32_t *ctx_id);
>>>    void gem_context_destroy(int fd, uint32_t ctx_id);
>>>    int __gem_context_destroy(int fd, uint32_t ctx_id);
>>>    
>>> +int __gem_context_clone(int i915,
>>> +                     uint32_t src, unsigned int share,
>>> +                     unsigned int flags,
>>> +                     uint32_t *out);
>>> +uint32_t gem_context_clone(int i915,
>>> +                        uint32_t src, unsigned int share,
>>> +                        unsigned int flags);
>>> +
>>> +uint32_t gem_queue_create(int i915);
>>> +
>>> +bool gem_contexts_has_shared_gtt(int i915);
>>> +bool gem_has_queues(int i915);
>>> +
>>>    bool gem_has_contexts(int fd);
>>>    void gem_require_contexts(int fd);
>>>    void gem_context_require_bannable(int fd);
>>> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
>>> index e1b7feeb2..3552e895b 100644
>>> --- a/tests/Makefile.sources
>>> +++ b/tests/Makefile.sources
>>> @@ -22,6 +22,7 @@ TESTS_progs = \
>>>        drm_mm \
>>>        drm_read \
>>>        i915/gem_ctx_clone \
>>> +     i915/gem_ctx_shared \
>>>        i915/gem_vm_create \
>>>        kms_3d \
>>>        kms_addfb_basic \
>>> diff --git a/tests/i915/gem_ctx_shared.c b/tests/i915/gem_ctx_shared.c
>>> new file mode 100644
>>> index 000000000..0076f5e9d
>>> --- /dev/null
>>> +++ b/tests/i915/gem_ctx_shared.c
>>> @@ -0,0 +1,856 @@
>>> +/*
>>> + * Copyright © 2017 Intel Corporation
>>
>> 2019
> 
> Nah, that would imply I put any thought into touching it since.
> 
>>> +static void exhaust_shared_gtt(int i915, unsigned int flags)
>>> +#define EXHAUST_LRC 0x1
>>> +{
>>> +     i915 = gem_reopen_driver(i915);
>>> +
>>> +     igt_fork(pid, 1) {
>>> +             const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +             struct drm_i915_gem_exec_object2 obj = {
>>> +                     .handle = gem_create(i915, 4096)
>>> +             };
>>> +             struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                     .buffers_ptr = to_user_pointer(&obj),
>>> +                     .buffer_count = 1,
>>> +             };
>>> +             uint32_t parent, child;
>>> +             unsigned long count = 0;
>>> +             int err;
>>> +
>>> +             gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
>>> +
>>> +             child = 0;
>>> +             for (;;) {
>>> +                     parent = child;
>>> +                     err = __gem_context_clone(i915,
>>> +                                               parent, I915_CONTEXT_CLONE_VM,
>>> +                                               0, &child);
>>> +                     if (err)
>>> +                             break;
>>> +
>>> +                     if (flags & EXHAUST_LRC) {
>>> +                             execbuf.rsvd1 = child;
>>> +                             err = __gem_execbuf(i915, &execbuf);
>>> +                             if (err)
>>> +                                     break;
>>> +                     }
>>
>> What are the stop conditions in this test, with and without the
>> EXHAUST_LRC flag? It would be good to put that in a comment.
> 
> It runs until the kernel dies. The giveaway is meant to be the test name.
>   
>> Especially since AFAIR this one was causing OOM for me so might need to
>> be tweaked.
> 
> It runs until the kernel dies.

Aren't we not allowed to add failing tests?

> 
>>> +
>>> +                     count++;
>>> +             }
>>> +             gem_sync(i915, obj.handle);
>>> +
>>> +             igt_info("Created %lu shared contexts, before %d (%s)\n",
>>> +                      count, err, strerror(-err));
>>> +     }
>>> +     close(i915);
>>> +     igt_waitchildren();
>>> +}
>>> +
>>> +static void exec_shared_gtt(int i915, unsigned int ring)
>>> +{
>>> +     const int gen = intel_gen(intel_get_drm_devid(i915));
>>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +     struct drm_i915_gem_exec_object2 obj = {
>>> +             .handle = gem_create(i915, 4096)
>>> +     };
>>> +     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +             .buffers_ptr = to_user_pointer(&obj),
>>> +             .buffer_count = 1,
>>> +             .flags = ring,
>>> +     };
>>> +     uint32_t scratch = obj.handle;
>>> +     uint32_t batch[16];
>>> +     int i;
>>> +
>>> +     gem_require_ring(i915, ring);
>>> +     igt_require(gem_can_store_dword(i915, ring));
>>> +
>>> +     /* Load object into place in the GTT */
>>> +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
>>> +     gem_execbuf(i915, &execbuf);
>>> +
>>> +     /* Presume nothing causes an eviction in the meantime */
>>> +
>>> +     obj.handle = gem_create(i915, 4096);
>>> +
>>> +     i = 0;
>>> +     batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
>>> +     if (gen >= 8) {
>>> +             batch[++i] = obj.offset;
>>> +             batch[++i] = 0;
>>> +     } else if (gen >= 4) {
>>> +             batch[++i] = 0;
>>> +             batch[++i] = obj.offset;
>>> +     } else {
>>> +             batch[i]--;
>>> +             batch[++i] = obj.offset;
>>> +     }
>>> +     batch[++i] = 0xc0ffee;
>>> +     batch[++i] = MI_BATCH_BUFFER_END;
>>> +     gem_write(i915, obj.handle, 0, batch, sizeof(batch));
>>> +
>>> +     obj.offset += 4096; /* make sure we don't cause an eviction! */
>>
>> Is 4k apart safe?
> 
> Since to change would imply an ABI break and I see no param indicating
> an ABI change, and Joonas keeps on refusing to add such information.

Why it would be an ABI break? Why would the driver be not allowed to add 
arbitrary padding, or use larger pgtable entry or something, so if you 
assume +4k is empty it could actually not be?

>   
>> A short comment on how does this test work would be good.
>>
>>> +     obj.flags |= EXEC_OBJECT_PINNED;
>>> +     execbuf.rsvd1 = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
>>> +     if (gen > 3 && gen < 6)
>>> +             execbuf.flags |= I915_EXEC_SECURE;
>>> +
>>> +     gem_execbuf(i915, &execbuf);
>>> +     gem_context_destroy(i915, execbuf.rsvd1);
>>> +     gem_sync(i915, obj.handle); /* write hazard lies */
>>> +     gem_close(i915, obj.handle);
>>> +
>>> +     gem_read(i915, scratch, 0, batch, sizeof(uint32_t));
>>> +     gem_close(i915, scratch);
>>> +
>>> +     igt_assert_eq_u32(*batch, 0xc0ffee);
>>> +}
>>> +
>>> +static int nop_sync(int i915, uint32_t ctx, unsigned int ring, int64_t timeout)
>>> +{
>>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +     struct drm_i915_gem_exec_object2 obj = {
>>> +             .handle = gem_create(i915, 4096),
>>> +     };
>>> +     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +             .buffers_ptr = to_user_pointer(&obj),
>>> +             .buffer_count = 1,
>>> +             .flags = ring,
>>> +             .rsvd1 = ctx,
>>> +     };
>>> +     int err;
>>> +
>>> +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
>>> +     gem_execbuf(i915, &execbuf);
>>> +     err = gem_wait(i915, obj.handle, &timeout);
>>> +     gem_close(i915, obj.handle);
>>> +
>>> +     return err;
>>> +}
>>> +
>>> +static bool has_single_timeline(int i915)
>>> +{
>>> +     uint32_t ctx;
>>> +
>>> +     __gem_context_clone(i915, 0, 0,
>>> +                         I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
>>> +                         &ctx);
>>> +     if (ctx)
>>> +             gem_context_destroy(i915, ctx);
>>> +
>>> +     return ctx != 0;
>>> +}
>>> +
>>> +static bool ignore_engine(unsigned engine)
>>> +{
>>> +     if (engine == 0)
>>> +             return true;
>>> +
>>> +     if (engine == I915_EXEC_BSD)
>>> +             return true;
>>> +
>>> +     return false;
>>> +}
>>> +
>>> +static void single_timeline(int i915)
>>> +{
>>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +     struct drm_i915_gem_exec_object2 obj = {
>>> +             .handle = gem_create(i915, 4096),
>>> +     };
>>> +     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +             .buffers_ptr = to_user_pointer(&obj),
>>> +             .buffer_count = 1,
>>> +     };
>>> +     struct sync_fence_info rings[16];
>>
>> Could use for_each_physical_engine to count the engines. But we probably
>> have plenty of this around the code base.
>>
>>> +     struct sync_file_info sync_file_info = {
>>> +             .num_fences = 1,
>>> +     };
>>> +     unsigned int engine;
>>> +     int n;
>>> +
>>> +     igt_require(has_single_timeline(i915));
>>> +
>>> +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
>>> +     gem_execbuf(i915, &execbuf);
>>> +     gem_sync(i915, obj.handle);
>>> +
>>> +     /*
>>> +      * For a "single timeline" context, each ring is on the common
>>> +      * timeline, unlike a normal context where each ring has an
>>> +      * independent timeline. That is no matter which engine we submit
>>> +      * to, it reports the same timeline name and fence context. However,
>>> +      * the fence context is not reported through the sync_fence_info.
>>
>> Is the test useful then? There was one I reviewed earlier in this series
>> which tested for execution ordering, which sounds like is what's needed.
> 
> It is a variant. This one is a couple of years older. Both accomplish
> similar things through very different means, the more the serendipitous.
> 
>>
>>> +      */
>>> +     execbuf.rsvd1 =
>>> +             gem_context_clone(i915, 0, 0,
>>> +                               I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
>>> +     execbuf.flags = I915_EXEC_FENCE_OUT;
>>> +     n = 0;
>>> +     for_each_engine(i915, engine) {
>>
>> for_each_physical_engine to align with Andi's work?
> 
> No, this would be an ABI iterator not a physical iterator.

Legacy ABI, why? It then covers a subset of engines so is it useful to 
cover more than one but not all?

>   
>>> +             gem_execbuf_wr(i915, &execbuf);
>>> +             sync_file_info.sync_fence_info = to_user_pointer(&rings[n]);
>>> +             do_ioctl(execbuf.rsvd2 >> 32, SYNC_IOC_FILE_INFO, &sync_file_info);
>>> +             close(execbuf.rsvd2 >> 32);
>>> +
>>> +             igt_info("ring[%d] fence: %s %s\n",
>>> +                      n, rings[n].driver_name, rings[n].obj_name);
>>> +             n++;
>>> +     }
>>> +     gem_sync(i915, obj.handle);
>>> +     gem_close(i915, obj.handle);
>>> +
>>> +     for (int i = 1; i < n; i++) {
>>> +             igt_assert(!strcmp(rings[0].driver_name, rings[i].driver_name));
>>> +             igt_assert(!strcmp(rings[0].obj_name, rings[i].obj_name));
>>
>> What is in obj_name?
> 
> The timeline name. sync_file is plain old useless. The asserts here are
> drivel.
> 
>>> +     }
>>> +}
>>> +
>>> +static void exec_single_timeline(int i915, unsigned int ring)
>>> +{
>>> +     unsigned int other;
>>> +     igt_spin_t *spin;
>>> +     uint32_t ctx;
>>> +
>>> +     gem_require_ring(i915, ring);
>>> +     igt_require(has_single_timeline(i915));
>>> +
>>> +     /*
>>> +      * On an ordinary context, a blockage on one ring doesn't prevent
>>> +      * execution on an other.
>>> +      */
>>> +     ctx = 0;
>>> +     spin = NULL;
>>> +     for_each_engine(i915, other) {
>>
>> for_each_physical
> 
> Modern inventions.
>   
>>> +             if (other == ring || ignore_engine(other))
>>> +                     continue;
>>> +
>>> +             if (spin == NULL) {
>>> +                     spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
>>> +             } else {
>>> +                     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                             .buffers_ptr = spin->execbuf.buffers_ptr,
>>> +                             .buffer_count = spin->execbuf.buffer_count,
>>> +                             .flags = other,
>>> +                             .rsvd1 = ctx,
>>> +                     };
>>> +                     gem_execbuf(i915, &execbuf);
>>> +             }
>>> +     }
>>> +     igt_require(spin);
>>> +     igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), 0);
>>> +     igt_spin_free(i915, spin);
>>> +
>>> +     /*
>>> +      * But if we create a context with just a single shared timeline,
>>> +      * then it will block waiting for the earlier requests on the
>>> +      * other engines.
>>> +      */
>>> +     ctx = gem_context_clone(i915, 0, 0,
>>> +                             I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
>>> +     spin = NULL;
>>> +     for_each_engine(i915, other) {
>>
>> Ditto.
> 
> Hey! Where was that when this code was written^W copied :-p
>   
>>> +             if (other == ring || ignore_engine(other))
>>> +                     continue;
>>> +
>>> +             if (spin == NULL) {
>>> +                     spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
>>> +             } else {
>>> +                     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                             .buffers_ptr = spin->execbuf.buffers_ptr,
>>> +                             .buffer_count = spin->execbuf.buffer_count,
>>> +                             .flags = other,
>>> +                             .rsvd1 = ctx,
>>> +                     };
>>> +                     gem_execbuf(i915, &execbuf);
>>> +             }
>>> +     }
>>> +     igt_assert(spin);
>>> +     igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), -ETIME);
>>> +     igt_spin_free(i915, spin);
>>> +}
>>> +
>>> +static void store_dword(int i915, uint32_t ctx, unsigned ring,
>>> +                     uint32_t target, uint32_t offset, uint32_t value,
>>> +                     uint32_t cork, unsigned write_domain)
>>> +{
>>> +     const int gen = intel_gen(intel_get_drm_devid(i915));
>>> +     struct drm_i915_gem_exec_object2 obj[3];
>>> +     struct drm_i915_gem_relocation_entry reloc;
>>> +     struct drm_i915_gem_execbuffer2 execbuf;
>>> +     uint32_t batch[16];
>>> +     int i;
>>> +
>>> +     memset(&execbuf, 0, sizeof(execbuf));
>>> +     execbuf.buffers_ptr = to_user_pointer(obj + !cork);
>>> +     execbuf.buffer_count = 2 + !!cork;
>>> +     execbuf.flags = ring;
>>> +     if (gen < 6)
>>> +             execbuf.flags |= I915_EXEC_SECURE;
>>> +     execbuf.rsvd1 = ctx;
>>> +
>>> +     memset(obj, 0, sizeof(obj));
>>> +     obj[0].handle = cork;
>>> +     obj[1].handle = target;
>>> +     obj[2].handle = gem_create(i915, 4096);
>>> +
>>> +     memset(&reloc, 0, sizeof(reloc));
>>> +     reloc.target_handle = obj[1].handle;
>>> +     reloc.presumed_offset = 0;
>>> +     reloc.offset = sizeof(uint32_t);
>>> +     reloc.delta = offset;
>>> +     reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
>>> +     reloc.write_domain = write_domain;
>>> +     obj[2].relocs_ptr = to_user_pointer(&reloc);
>>> +     obj[2].relocation_count = 1;
>>> +
>>> +     i = 0;
>>> +     batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
>>> +     if (gen >= 8) {
>>> +             batch[++i] = offset;
>>> +             batch[++i] = 0;
>>> +     } else if (gen >= 4) {
>>> +             batch[++i] = 0;
>>> +             batch[++i] = offset;
>>> +             reloc.offset += sizeof(uint32_t);
>>> +     } else {
>>> +             batch[i]--;
>>> +             batch[++i] = offset;
>>> +     }
>>> +     batch[++i] = value;
>>> +     batch[++i] = MI_BATCH_BUFFER_END;
>>> +     gem_write(i915, obj[2].handle, 0, batch, sizeof(batch));
>>> +     gem_execbuf(i915, &execbuf);
>>> +     gem_close(i915, obj[2].handle);
>>> +}
>>> +
>>> +static uint32_t create_highest_priority(int i915)
>>> +{
>>> +     uint32_t ctx = gem_context_create(i915);
>>> +
>>> +     /*
>>> +      * If there is no priority support, all contexts will have equal
>>> +      * priority (and therefore the max user priority), so no context
>>> +      * can overtake us, and we effectively can form a plug.
>>> +      */
>>> +     __gem_context_set_priority(i915, ctx, MAX_PRIO);
>>> +
>>> +     return ctx;
>>> +}
>>> +
>>> +static void unplug_show_queue(int i915, struct igt_cork *c, unsigned int engine)
>>> +{
>>> +     igt_spin_t *spin[MAX_ELSP_QLEN];
>>
>> Why is this 16?
> 
> 2x as big as the deepest known qlen. And 16 is that number that crops up
> everywhere as a "just big enough number"
>   
>>> +
>>> +     for (int n = 0; n < ARRAY_SIZE(spin); n++) {
>>> +             const struct igt_spin_factory opts = {
>>> +                     .ctx = create_highest_priority(i915),
>>> +                     .engine = engine,
>>> +             };
>>> +             spin[n] = __igt_spin_factory(i915, &opts);
>>> +             gem_context_destroy(i915, opts.ctx);
>>> +     }
>>> +
>>> +     igt_cork_unplug(c); /* batches will now be queued on the engine */
>>> +     igt_debugfs_dump(i915, "i915_engine_info");
>>> +
>>> +     for (int n = 0; n < ARRAY_SIZE(spin); n++)
>>> +             igt_spin_free(i915, spin[n]);
>>> +}
>>> +
>>> +static uint32_t store_timestamp(int i915,
>>> +                             uint32_t ctx, unsigned ring,
>>> +                             unsigned mmio_base)
>>> +{
>>> +     const bool r64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
>>> +     struct drm_i915_gem_exec_object2 obj = {
>>> +             .handle = gem_create(i915, 4096),
>>> +             .relocation_count = 1,
>>> +     };
>>> +     struct drm_i915_gem_relocation_entry reloc = {
>>> +             .target_handle = obj.handle,
>>> +             .offset = 2 * sizeof(uint32_t),
>>> +             .delta = 4092,
>>> +             .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
>>> +     };
>>> +     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +             .buffers_ptr = to_user_pointer(&obj),
>>> +             .buffer_count = 1,
>>> +             .flags = ring,
>>> +             .rsvd1 = ctx,
>>> +     };
>>> +     uint32_t batch[] = {
>>> +             0x24 << 23 | (1 + r64b), /* SRM */
>>> +             mmio_base + 0x358,
>>> +             4092,
>>> +             0,
>>> +             MI_BATCH_BUFFER_END
>>> +     };
>>> +
>>> +     igt_require(intel_gen(intel_get_drm_devid(i915)) >= 7);
>>> +
>>> +     gem_write(i915, obj.handle, 0, batch, sizeof(batch));
>>> +     obj.relocs_ptr = to_user_pointer(&reloc);
>>> +
>>> +     gem_execbuf(i915, &execbuf);
>>> +
>>> +     return obj.handle;
>>> +}
>>> +
>>> +static void independent(int i915, unsigned ring, unsigned flags)
>>> +{
>>> +     uint32_t handle[ARRAY_SIZE(priorities)];
>>> +     igt_spin_t *spin[MAX_ELSP_QLEN];
>>> +     unsigned int mmio_base;
>>> +
>>> +     /* XXX i915_query()! */
>>> +     switch (ring) {
>>> +     case I915_EXEC_DEFAULT:
>>> +     case I915_EXEC_RENDER:
>>> +             mmio_base = 0x2000;
>>> +             break;
>>> +#if 0
>>> +     case I915_EXEC_BSD:
>>> +             mmio_base = 0x12000;
>>> +             break;
>>> +#endif
>>> +     case I915_EXEC_BLT:
>>> +             mmio_base = 0x22000;
>>> +             break;
>>> +
>>> +     case I915_EXEC_VEBOX:
>>> +             if (intel_gen(intel_get_drm_devid(i915)) >= 11)
>>> +                     mmio_base = 0x1d8000;
>>> +             else
>>> +                     mmio_base = 0x1a000;
>>> +             break;
>>> +
>>> +     default:
>>> +             igt_skip("mmio base not known\n");
>>> +     }
>>
>> Ufff this is quite questionable. Should we rather have this subtest in
>> selftests only?
> 
> We should be exporting this information. It is a non-privileged register
> that is used by normal clients to measure elapsed time.

I see.. who uses it? Mesa? Is it just one register? What would make more 
sense - add a query to read this register, or add a query to get the 
register address, or mmio_base (as you were proposing some time ago)?

> 
>>> +
>>> +     for (int n = 0; n < ARRAY_SIZE(spin); n++) {
>>> +             const struct igt_spin_factory opts = {
>>> +                     .ctx = create_highest_priority(i915),
>>> +                     .engine = ring,
>>> +             };
>>> +             spin[n] = __igt_spin_factory(i915, &opts);
>>> +             gem_context_destroy(i915, opts.ctx);
>>> +     }
>>> +
>>> +     for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
>>> +             uint32_t ctx = gem_queue_create(i915);
>>> +             gem_context_set_priority(i915, ctx, priorities[i]);
>>> +             handle[i] = store_timestamp(i915, ctx, ring, mmio_base);
>>> +             gem_context_destroy(i915, ctx);
>>> +     }
>>> +
>>> +     for (int n = 0; n < ARRAY_SIZE(spin); n++)
>>> +             igt_spin_free(i915, spin[n]);
>>> +
>>> +     for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
>>> +             uint32_t *ptr;
>>> +
>>> +             ptr = gem_mmap__gtt(i915, handle[i], 4096, PROT_READ);
>>> +             gem_set_domain(i915, handle[i], /* no write hazard lies! */
>>> +                            I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>>> +             gem_close(i915, handle[i]);
>>> +
>>> +             handle[i] = ptr[1023];
>>
>> 1023 relates to 4092 from store_timestamp I gather. The two need to be
>> defined closer together.
>>
>>> +             munmap(ptr, 4096);
>>> +
>>> +             igt_debug("ctx[%d] .prio=%d, timestamp=%u\n",
>>> +                       i, priorities[i], handle[i]);
>>> +     }
>>> +
>>> +     igt_assert((int32_t)(handle[HI] - handle[LO]) < 0);
>>> +}
>>> +
>>> +static void reorder(int i915, unsigned ring, unsigned flags)
>>> +#define EQUAL 1
>>> +{
>>> +     IGT_CORK_HANDLE(cork);
>>> +     uint32_t scratch;
>>> +     uint32_t *ptr;
>>> +     uint32_t ctx[2];
>>> +     uint32_t plug;
>>> +
>>> +     ctx[LO] = gem_queue_create(i915);
>>> +     gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
>>> +
>>> +     ctx[HI] = gem_queue_create(i915);
>>> +     gem_context_set_priority(i915, ctx[HI], flags & EQUAL ? MIN_PRIO : 0);
>>> +
>>> +     scratch = gem_create(i915, 4096);
>>> +     plug = igt_cork_plug(&cork, i915);
>>> +
>>> +     /* We expect the high priority context to be executed first, and
>>> +      * so the final result will be value from the low priority context.
>>> +      */
>>> +     store_dword(i915, ctx[LO], ring, scratch, 0, ctx[LO], plug, 0);
>>> +     store_dword(i915, ctx[HI], ring, scratch, 0, ctx[HI], plug, 0);
>>> +
>>> +     unplug_show_queue(i915, &cork, ring);
>>> +     gem_close(i915, plug);
>>> +
>>> +     gem_context_destroy(i915, ctx[LO]);
>>> +     gem_context_destroy(i915, ctx[HI]);
>>> +
>>> +     ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
>>> +     gem_set_domain(i915, scratch, /* no write hazard lies! */
>>> +                    I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>>> +     gem_close(i915, scratch);
>>> +
>>> +     if (flags & EQUAL) /* equal priority, result will be fifo */
>>> +             igt_assert_eq_u32(ptr[0], ctx[HI]);
>>> +     else
>>> +             igt_assert_eq_u32(ptr[0], ctx[LO]);
>>> +     munmap(ptr, 4096);
>>> +}
>>> +
>>> +static void promotion(int i915, unsigned ring)
>>> +{
>>> +     IGT_CORK_HANDLE(cork);
>>> +     uint32_t result, dep;
>>> +     uint32_t *ptr;
>>> +     uint32_t ctx[3];
>>> +     uint32_t plug;
>>> +
>>> +     ctx[LO] = gem_queue_create(i915);
>>> +     gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
>>> +
>>> +     ctx[HI] = gem_queue_create(i915);
>>> +     gem_context_set_priority(i915, ctx[HI], 0);
>>> +
>>> +     ctx[NOISE] = gem_queue_create(i915);
>>> +     gem_context_set_priority(i915, ctx[NOISE], MIN_PRIO/2);
>>> +
>>> +     result = gem_create(i915, 4096);
>>> +     dep = gem_create(i915, 4096);
>>> +
>>> +     plug = igt_cork_plug(&cork, i915);
>>> +
>>> +     /* Expect that HI promotes LO, so the order will be LO, HI, NOISE.
>>> +      *
>>> +      * fifo would be NOISE, LO, HI.
>>> +      * strict priority would be  HI, NOISE, LO
>>> +      */
>>> +     store_dword(i915, ctx[NOISE], ring, result, 0, ctx[NOISE], plug, 0);
>>> +     store_dword(i915, ctx[LO], ring, result, 0, ctx[LO], plug, 0);
>>> +
>>> +     /* link LO <-> HI via a dependency on another buffer */
>>> +     store_dword(i915, ctx[LO], ring, dep, 0, ctx[LO], 0, I915_GEM_DOMAIN_INSTRUCTION);
>>> +     store_dword(i915, ctx[HI], ring, dep, 0, ctx[HI], 0, 0);
>>> +
>>> +     store_dword(i915, ctx[HI], ring, result, 0, ctx[HI], 0, 0);
>>> +
>>> +     unplug_show_queue(i915, &cork, ring);
>>> +     gem_close(i915, plug);
>>> +
>>> +     gem_context_destroy(i915, ctx[NOISE]);
>>> +     gem_context_destroy(i915, ctx[LO]);
>>> +     gem_context_destroy(i915, ctx[HI]);
>>> +
>>> +     ptr = gem_mmap__gtt(i915, dep, 4096, PROT_READ);
>>> +     gem_set_domain(i915, dep, /* no write hazard lies! */
>>> +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>>> +     gem_close(i915, dep);
>>> +
>>> +     igt_assert_eq_u32(ptr[0], ctx[HI]);
>>> +     munmap(ptr, 4096);
>>> +
>>> +     ptr = gem_mmap__gtt(i915, result, 4096, PROT_READ);
>>> +     gem_set_domain(i915, result, /* no write hazard lies! */
>>> +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>>> +     gem_close(i915, result);
>>> +
>>> +     igt_assert_eq_u32(ptr[0], ctx[NOISE]);
>>> +     munmap(ptr, 4096);
>>> +}
>>> +
>>> +static void smoketest(int i915, unsigned ring, unsigned timeout)
>>> +{
>>> +     const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
>>> +     unsigned engines[16];
>>> +     unsigned nengine;
>>> +     unsigned engine;
>>> +     uint32_t scratch;
>>> +     uint32_t *ptr;
>>> +
>>> +     nengine = 0;
>>> +     for_each_engine(i915, engine) {
>>> +             if (ignore_engine(engine))
>>> +                     continue;
>>> +
>>> +             engines[nengine++] = engine;
>>> +     }
>>> +     igt_require(nengine);
>>
>> for_each_physical and counring the engines for engines array would be
>> better I think.
>>
>>> +
>>> +     scratch = gem_create(i915, 4096);
>>> +     igt_fork(child, ncpus) {
>>> +             unsigned long count = 0;
>>> +             uint32_t ctx;
>>> +
>>> +             hars_petruska_f54_1_random_perturb(child);
>>> +
>>> +             ctx = gem_queue_create(i915);
>>> +             igt_until_timeout(timeout) {
>>> +                     int prio;
>>> +
>>> +                     prio = hars_petruska_f54_1_random_unsafe_max(MAX_PRIO - MIN_PRIO) + MIN_PRIO;
>>> +                     gem_context_set_priority(i915, ctx, prio);
>>> +
>>> +                     engine = engines[hars_petruska_f54_1_random_unsafe_max(nengine)];
>>> +                     store_dword(i915, ctx, engine, scratch,
>>> +                                 8*child + 0, ~child,
>>> +                                 0, 0);
>>> +                     for (unsigned int step = 0; step < 8; step++)
>>> +                             store_dword(i915, ctx, engine, scratch,
>>> +                                         8*child + 4, count++,
>>> +                                         0, 0);
>>> +             }
>>> +             gem_context_destroy(i915, ctx);
>>> +     }
>>> +     igt_waitchildren();
>>> +
>>> +     ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
>>> +     gem_set_domain(i915, scratch, /* no write hazard lies! */
>>> +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>>> +     gem_close(i915, scratch);
>>> +
>>> +     for (unsigned n = 0; n < ncpus; n++) {
>>> +             igt_assert_eq_u32(ptr[2*n], ~n);
>>> +             /*
>>> +              * Note this count is approximate due to unconstrained
>>> +              * ordering of the dword writes between engines.
>>> +              *
>>> +              * Take the result with a pinch of salt.
>>> +              */
>>> +             igt_info("Child[%d] completed %u cycles\n",  n, ptr[2*n+1]);
>>> +     }
>>> +     munmap(ptr, 4096);
>>> +}
>>> +
>>> +igt_main
>>> +{
>>> +     const struct intel_execution_engine *e;
>>> +     int i915 = -1;
>>> +
>>> +     igt_fixture {
>>> +             i915 = drm_open_driver(DRIVER_INTEL);
>>> +             igt_require_gem(i915);
>>> +     }
>>> +
>>> +     igt_subtest_group {
>>> +             igt_fixture {
>>> +                     igt_require(gem_contexts_has_shared_gtt(i915));
>>> +                     igt_fork_hang_detector(i915);
>>> +             }
>>> +
>>> +             igt_subtest("create-shared-gtt")
>>> +                     create_shared_gtt(i915, 0);
>>> +
>>> +             igt_subtest("detached-shared-gtt")
>>> +                     create_shared_gtt(i915, DETACHED);
>>> +
>>> +             igt_subtest("disjoint-timelines")
>>> +                     disjoint_timelines(i915);
>>> +
>>> +             igt_subtest("single-timeline")
>>> +                     single_timeline(i915);
>>> +
>>> +             igt_subtest("exhaust-shared-gtt")
>>> +                     exhaust_shared_gtt(i915, 0);
>>> +
>>> +             igt_subtest("exhaust-shared-gtt-lrc")
>>> +                     exhaust_shared_gtt(i915, EXHAUST_LRC);
>>> +
>>> +             for (e = intel_execution_engines; e->name; e++) {
>>> +                     igt_subtest_f("exec-shared-gtt-%s", e->name)
>>> +                             exec_shared_gtt(i915, e->exec_id | e->flags);
>>
>> The same previously raised question on should it iterate the legacy
>> execbuf engines or physical engines. Maybe you won't different subtests
>> do both?
> 
> It should be testing the cross between the context and execbuf uABI, not
> physical.

Same question as earlier, why is this cross interesting? I mean what is 
interesting in intersection between legacy execbuf engine selection and 
ppgtt sharing? And at the same time not relevant to exercise the new 
execbuf engine selection abi?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 08/16] i915: Exercise creating context with shared GTT
@ 2019-05-16  8:51         ` Tvrtko Ursulin
  0 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-16  8:51 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev, Tvrtko Ursulin


On 15/05/2019 20:33, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-15 07:37:18)
>>
>> On 08/05/2019 11:09, Chris Wilson wrote:
>>> v2: Test each shared context is its own timeline and allows request
>>> reordering between shared contexts.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>>> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
>>> ---
>>>    lib/i915/gem_context.c        |  68 +++
>>>    lib/i915/gem_context.h        |  13 +
>>>    tests/Makefile.sources        |   1 +
>>>    tests/i915/gem_ctx_shared.c   | 856 ++++++++++++++++++++++++++++++++++
>>>    tests/i915/gem_exec_whisper.c |  32 +-
>>>    tests/meson.build             |   1 +
>>>    6 files changed, 962 insertions(+), 9 deletions(-)
>>>    create mode 100644 tests/i915/gem_ctx_shared.c
>>>
>>> diff --git a/lib/i915/gem_context.c b/lib/i915/gem_context.c
>>> index f94d89cb4..8fb8984d1 100644
>>> --- a/lib/i915/gem_context.c
>>> +++ b/lib/i915/gem_context.c
>>> @@ -272,6 +272,74 @@ void gem_context_set_priority(int fd, uint32_t ctx_id, int prio)
>>>        igt_assert_eq(__gem_context_set_priority(fd, ctx_id, prio), 0);
>>>    }
>>>    
>>> +int
>>> +__gem_context_clone(int i915,
>>> +                 uint32_t src, unsigned int share,
>>> +                 unsigned int flags,
>>> +                 uint32_t *out)
>>> +{
>>> +     struct drm_i915_gem_context_create_ext_clone clone = {
>>> +             { .name = I915_CONTEXT_CREATE_EXT_CLONE },
>>> +             .clone_id = src,
>>> +             .flags = share,
>>> +     };
>>> +     struct drm_i915_gem_context_create_ext arg = {
>>> +             .flags = flags | I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
>>> +             .extensions = to_user_pointer(&clone),
>>> +     };
>>> +     int err = 0;
>>> +
>>> +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, &arg))
>>> +             err = -errno;
>>> +
>>> +     *out = arg.ctx_id;
>>> +
>>> +     errno = 0;
>>> +     return err;
>>> +}
>>> +
>>> +static bool __gem_context_has(int i915, uint32_t share, unsigned int flags)
>>> +{
>>> +     uint32_t ctx;
>>> +
>>> +     __gem_context_clone(i915, 0, share, flags, &ctx);
>>> +     if (ctx)
>>> +             gem_context_destroy(i915, ctx);
>>> +
>>> +     errno = 0;
>>> +     return ctx;
>>> +}
>>> +
>>> +bool gem_contexts_has_shared_gtt(int i915)
>>> +{
>>> +     return __gem_context_has(i915, I915_CONTEXT_CLONE_VM, 0);
>>> +}
>>> +
>>> +bool gem_has_queues(int i915)
>>> +{
>>> +     return __gem_context_has(i915,
>>> +                              I915_CONTEXT_CLONE_VM,
>>> +                              I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
>>> +}
>>> +
>>> +uint32_t gem_context_clone(int i915,
>>> +                        uint32_t src, unsigned int share,
>>> +                        unsigned int flags)
>>> +{
>>> +     uint32_t ctx;
>>> +
>>> +     igt_assert_eq(__gem_context_clone(i915, src, share, flags, &ctx), 0);
>>> +
>>> +     return ctx;
>>> +}
>>> +
>>> +uint32_t gem_queue_create(int i915)
>>> +{
>>> +     return gem_context_clone(i915, 0,
>>> +                              I915_CONTEXT_CLONE_VM,
>>> +                              I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
>>> +}
>>> +
>>>    bool gem_context_has_engine(int fd, uint32_t ctx, uint64_t engine)
>>>    {
>>>        struct drm_i915_gem_exec_object2 exec = {};
>>> diff --git a/lib/i915/gem_context.h b/lib/i915/gem_context.h
>>> index a052714d4..8043c3401 100644
>>> --- a/lib/i915/gem_context.h
>>> +++ b/lib/i915/gem_context.h
>>> @@ -29,6 +29,19 @@ int __gem_context_create(int fd, uint32_t *ctx_id);
>>>    void gem_context_destroy(int fd, uint32_t ctx_id);
>>>    int __gem_context_destroy(int fd, uint32_t ctx_id);
>>>    
>>> +int __gem_context_clone(int i915,
>>> +                     uint32_t src, unsigned int share,
>>> +                     unsigned int flags,
>>> +                     uint32_t *out);
>>> +uint32_t gem_context_clone(int i915,
>>> +                        uint32_t src, unsigned int share,
>>> +                        unsigned int flags);
>>> +
>>> +uint32_t gem_queue_create(int i915);
>>> +
>>> +bool gem_contexts_has_shared_gtt(int i915);
>>> +bool gem_has_queues(int i915);
>>> +
>>>    bool gem_has_contexts(int fd);
>>>    void gem_require_contexts(int fd);
>>>    void gem_context_require_bannable(int fd);
>>> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
>>> index e1b7feeb2..3552e895b 100644
>>> --- a/tests/Makefile.sources
>>> +++ b/tests/Makefile.sources
>>> @@ -22,6 +22,7 @@ TESTS_progs = \
>>>        drm_mm \
>>>        drm_read \
>>>        i915/gem_ctx_clone \
>>> +     i915/gem_ctx_shared \
>>>        i915/gem_vm_create \
>>>        kms_3d \
>>>        kms_addfb_basic \
>>> diff --git a/tests/i915/gem_ctx_shared.c b/tests/i915/gem_ctx_shared.c
>>> new file mode 100644
>>> index 000000000..0076f5e9d
>>> --- /dev/null
>>> +++ b/tests/i915/gem_ctx_shared.c
>>> @@ -0,0 +1,856 @@
>>> +/*
>>> + * Copyright © 2017 Intel Corporation
>>
>> 2019
> 
> Nah, that would imply I put any thought into touching it since.
> 
>>> +static void exhaust_shared_gtt(int i915, unsigned int flags)
>>> +#define EXHAUST_LRC 0x1
>>> +{
>>> +     i915 = gem_reopen_driver(i915);
>>> +
>>> +     igt_fork(pid, 1) {
>>> +             const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +             struct drm_i915_gem_exec_object2 obj = {
>>> +                     .handle = gem_create(i915, 4096)
>>> +             };
>>> +             struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                     .buffers_ptr = to_user_pointer(&obj),
>>> +                     .buffer_count = 1,
>>> +             };
>>> +             uint32_t parent, child;
>>> +             unsigned long count = 0;
>>> +             int err;
>>> +
>>> +             gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
>>> +
>>> +             child = 0;
>>> +             for (;;) {
>>> +                     parent = child;
>>> +                     err = __gem_context_clone(i915,
>>> +                                               parent, I915_CONTEXT_CLONE_VM,
>>> +                                               0, &child);
>>> +                     if (err)
>>> +                             break;
>>> +
>>> +                     if (flags & EXHAUST_LRC) {
>>> +                             execbuf.rsvd1 = child;
>>> +                             err = __gem_execbuf(i915, &execbuf);
>>> +                             if (err)
>>> +                                     break;
>>> +                     }
>>
>> What are the stop conditions in this test, with and without the
>> EXHAUST_LRC flag? It would be good to put that in a comment.
> 
> It runs until the kernel dies. The giveaway is meant to be the test name.
>   
>> Especially since AFAIR this one was causing OOM for me so might need to
>> be tweaked.
> 
> It runs until the kernel dies.

Aren't we not allowed to add failing tests?

> 
>>> +
>>> +                     count++;
>>> +             }
>>> +             gem_sync(i915, obj.handle);
>>> +
>>> +             igt_info("Created %lu shared contexts, before %d (%s)\n",
>>> +                      count, err, strerror(-err));
>>> +     }
>>> +     close(i915);
>>> +     igt_waitchildren();
>>> +}
>>> +
>>> +static void exec_shared_gtt(int i915, unsigned int ring)
>>> +{
>>> +     const int gen = intel_gen(intel_get_drm_devid(i915));
>>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +     struct drm_i915_gem_exec_object2 obj = {
>>> +             .handle = gem_create(i915, 4096)
>>> +     };
>>> +     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +             .buffers_ptr = to_user_pointer(&obj),
>>> +             .buffer_count = 1,
>>> +             .flags = ring,
>>> +     };
>>> +     uint32_t scratch = obj.handle;
>>> +     uint32_t batch[16];
>>> +     int i;
>>> +
>>> +     gem_require_ring(i915, ring);
>>> +     igt_require(gem_can_store_dword(i915, ring));
>>> +
>>> +     /* Load object into place in the GTT */
>>> +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
>>> +     gem_execbuf(i915, &execbuf);
>>> +
>>> +     /* Presume nothing causes an eviction in the meantime */
>>> +
>>> +     obj.handle = gem_create(i915, 4096);
>>> +
>>> +     i = 0;
>>> +     batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
>>> +     if (gen >= 8) {
>>> +             batch[++i] = obj.offset;
>>> +             batch[++i] = 0;
>>> +     } else if (gen >= 4) {
>>> +             batch[++i] = 0;
>>> +             batch[++i] = obj.offset;
>>> +     } else {
>>> +             batch[i]--;
>>> +             batch[++i] = obj.offset;
>>> +     }
>>> +     batch[++i] = 0xc0ffee;
>>> +     batch[++i] = MI_BATCH_BUFFER_END;
>>> +     gem_write(i915, obj.handle, 0, batch, sizeof(batch));
>>> +
>>> +     obj.offset += 4096; /* make sure we don't cause an eviction! */
>>
>> Is 4k apart safe?
> 
> Since to change would imply an ABI break and I see no param indicating
> an ABI change, and Joonas keeps on refusing to add such information.

Why it would be an ABI break? Why would the driver be not allowed to add 
arbitrary padding, or use larger pgtable entry or something, so if you 
assume +4k is empty it could actually not be?

>   
>> A short comment on how does this test work would be good.
>>
>>> +     obj.flags |= EXEC_OBJECT_PINNED;
>>> +     execbuf.rsvd1 = gem_context_clone(i915, 0, I915_CONTEXT_CLONE_VM, 0);
>>> +     if (gen > 3 && gen < 6)
>>> +             execbuf.flags |= I915_EXEC_SECURE;
>>> +
>>> +     gem_execbuf(i915, &execbuf);
>>> +     gem_context_destroy(i915, execbuf.rsvd1);
>>> +     gem_sync(i915, obj.handle); /* write hazard lies */
>>> +     gem_close(i915, obj.handle);
>>> +
>>> +     gem_read(i915, scratch, 0, batch, sizeof(uint32_t));
>>> +     gem_close(i915, scratch);
>>> +
>>> +     igt_assert_eq_u32(*batch, 0xc0ffee);
>>> +}
>>> +
>>> +static int nop_sync(int i915, uint32_t ctx, unsigned int ring, int64_t timeout)
>>> +{
>>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +     struct drm_i915_gem_exec_object2 obj = {
>>> +             .handle = gem_create(i915, 4096),
>>> +     };
>>> +     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +             .buffers_ptr = to_user_pointer(&obj),
>>> +             .buffer_count = 1,
>>> +             .flags = ring,
>>> +             .rsvd1 = ctx,
>>> +     };
>>> +     int err;
>>> +
>>> +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
>>> +     gem_execbuf(i915, &execbuf);
>>> +     err = gem_wait(i915, obj.handle, &timeout);
>>> +     gem_close(i915, obj.handle);
>>> +
>>> +     return err;
>>> +}
>>> +
>>> +static bool has_single_timeline(int i915)
>>> +{
>>> +     uint32_t ctx;
>>> +
>>> +     __gem_context_clone(i915, 0, 0,
>>> +                         I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
>>> +                         &ctx);
>>> +     if (ctx)
>>> +             gem_context_destroy(i915, ctx);
>>> +
>>> +     return ctx != 0;
>>> +}
>>> +
>>> +static bool ignore_engine(unsigned engine)
>>> +{
>>> +     if (engine == 0)
>>> +             return true;
>>> +
>>> +     if (engine == I915_EXEC_BSD)
>>> +             return true;
>>> +
>>> +     return false;
>>> +}
>>> +
>>> +static void single_timeline(int i915)
>>> +{
>>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +     struct drm_i915_gem_exec_object2 obj = {
>>> +             .handle = gem_create(i915, 4096),
>>> +     };
>>> +     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +             .buffers_ptr = to_user_pointer(&obj),
>>> +             .buffer_count = 1,
>>> +     };
>>> +     struct sync_fence_info rings[16];
>>
>> Could use for_each_physical_engine to count the engines. But we probably
>> have plenty of this around the code base.
>>
>>> +     struct sync_file_info sync_file_info = {
>>> +             .num_fences = 1,
>>> +     };
>>> +     unsigned int engine;
>>> +     int n;
>>> +
>>> +     igt_require(has_single_timeline(i915));
>>> +
>>> +     gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));
>>> +     gem_execbuf(i915, &execbuf);
>>> +     gem_sync(i915, obj.handle);
>>> +
>>> +     /*
>>> +      * For a "single timeline" context, each ring is on the common
>>> +      * timeline, unlike a normal context where each ring has an
>>> +      * independent timeline. That is no matter which engine we submit
>>> +      * to, it reports the same timeline name and fence context. However,
>>> +      * the fence context is not reported through the sync_fence_info.
>>
>> Is the test useful then? There was one I reviewed earlier in this series
>> which tested for execution ordering, which sounds like is what's needed.
> 
> It is a variant. This one is a couple of years older. Both accomplish
> similar things through very different means, the more the serendipitous.
> 
>>
>>> +      */
>>> +     execbuf.rsvd1 =
>>> +             gem_context_clone(i915, 0, 0,
>>> +                               I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
>>> +     execbuf.flags = I915_EXEC_FENCE_OUT;
>>> +     n = 0;
>>> +     for_each_engine(i915, engine) {
>>
>> for_each_physical_engine to align with Andi's work?
> 
> No, this would be an ABI iterator not a physical iterator.

Legacy ABI, why? It then covers a subset of engines so is it useful to 
cover more than one but not all?

>   
>>> +             gem_execbuf_wr(i915, &execbuf);
>>> +             sync_file_info.sync_fence_info = to_user_pointer(&rings[n]);
>>> +             do_ioctl(execbuf.rsvd2 >> 32, SYNC_IOC_FILE_INFO, &sync_file_info);
>>> +             close(execbuf.rsvd2 >> 32);
>>> +
>>> +             igt_info("ring[%d] fence: %s %s\n",
>>> +                      n, rings[n].driver_name, rings[n].obj_name);
>>> +             n++;
>>> +     }
>>> +     gem_sync(i915, obj.handle);
>>> +     gem_close(i915, obj.handle);
>>> +
>>> +     for (int i = 1; i < n; i++) {
>>> +             igt_assert(!strcmp(rings[0].driver_name, rings[i].driver_name));
>>> +             igt_assert(!strcmp(rings[0].obj_name, rings[i].obj_name));
>>
>> What is in obj_name?
> 
> The timeline name. sync_file is plain old useless. The asserts here are
> drivel.
> 
>>> +     }
>>> +}
>>> +
>>> +static void exec_single_timeline(int i915, unsigned int ring)
>>> +{
>>> +     unsigned int other;
>>> +     igt_spin_t *spin;
>>> +     uint32_t ctx;
>>> +
>>> +     gem_require_ring(i915, ring);
>>> +     igt_require(has_single_timeline(i915));
>>> +
>>> +     /*
>>> +      * On an ordinary context, a blockage on one ring doesn't prevent
>>> +      * execution on an other.
>>> +      */
>>> +     ctx = 0;
>>> +     spin = NULL;
>>> +     for_each_engine(i915, other) {
>>
>> for_each_physical
> 
> Modern inventions.
>   
>>> +             if (other == ring || ignore_engine(other))
>>> +                     continue;
>>> +
>>> +             if (spin == NULL) {
>>> +                     spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
>>> +             } else {
>>> +                     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                             .buffers_ptr = spin->execbuf.buffers_ptr,
>>> +                             .buffer_count = spin->execbuf.buffer_count,
>>> +                             .flags = other,
>>> +                             .rsvd1 = ctx,
>>> +                     };
>>> +                     gem_execbuf(i915, &execbuf);
>>> +             }
>>> +     }
>>> +     igt_require(spin);
>>> +     igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), 0);
>>> +     igt_spin_free(i915, spin);
>>> +
>>> +     /*
>>> +      * But if we create a context with just a single shared timeline,
>>> +      * then it will block waiting for the earlier requests on the
>>> +      * other engines.
>>> +      */
>>> +     ctx = gem_context_clone(i915, 0, 0,
>>> +                             I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
>>> +     spin = NULL;
>>> +     for_each_engine(i915, other) {
>>
>> Ditto.
> 
> Hey! Where was that when this code was written^W copied :-p
>   
>>> +             if (other == ring || ignore_engine(other))
>>> +                     continue;
>>> +
>>> +             if (spin == NULL) {
>>> +                     spin = __igt_spin_new(i915, .ctx = ctx, .engine = other);
>>> +             } else {
>>> +                     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                             .buffers_ptr = spin->execbuf.buffers_ptr,
>>> +                             .buffer_count = spin->execbuf.buffer_count,
>>> +                             .flags = other,
>>> +                             .rsvd1 = ctx,
>>> +                     };
>>> +                     gem_execbuf(i915, &execbuf);
>>> +             }
>>> +     }
>>> +     igt_assert(spin);
>>> +     igt_assert_eq(nop_sync(i915, ctx, ring, NSEC_PER_SEC), -ETIME);
>>> +     igt_spin_free(i915, spin);
>>> +}
>>> +
>>> +static void store_dword(int i915, uint32_t ctx, unsigned ring,
>>> +                     uint32_t target, uint32_t offset, uint32_t value,
>>> +                     uint32_t cork, unsigned write_domain)
>>> +{
>>> +     const int gen = intel_gen(intel_get_drm_devid(i915));
>>> +     struct drm_i915_gem_exec_object2 obj[3];
>>> +     struct drm_i915_gem_relocation_entry reloc;
>>> +     struct drm_i915_gem_execbuffer2 execbuf;
>>> +     uint32_t batch[16];
>>> +     int i;
>>> +
>>> +     memset(&execbuf, 0, sizeof(execbuf));
>>> +     execbuf.buffers_ptr = to_user_pointer(obj + !cork);
>>> +     execbuf.buffer_count = 2 + !!cork;
>>> +     execbuf.flags = ring;
>>> +     if (gen < 6)
>>> +             execbuf.flags |= I915_EXEC_SECURE;
>>> +     execbuf.rsvd1 = ctx;
>>> +
>>> +     memset(obj, 0, sizeof(obj));
>>> +     obj[0].handle = cork;
>>> +     obj[1].handle = target;
>>> +     obj[2].handle = gem_create(i915, 4096);
>>> +
>>> +     memset(&reloc, 0, sizeof(reloc));
>>> +     reloc.target_handle = obj[1].handle;
>>> +     reloc.presumed_offset = 0;
>>> +     reloc.offset = sizeof(uint32_t);
>>> +     reloc.delta = offset;
>>> +     reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
>>> +     reloc.write_domain = write_domain;
>>> +     obj[2].relocs_ptr = to_user_pointer(&reloc);
>>> +     obj[2].relocation_count = 1;
>>> +
>>> +     i = 0;
>>> +     batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
>>> +     if (gen >= 8) {
>>> +             batch[++i] = offset;
>>> +             batch[++i] = 0;
>>> +     } else if (gen >= 4) {
>>> +             batch[++i] = 0;
>>> +             batch[++i] = offset;
>>> +             reloc.offset += sizeof(uint32_t);
>>> +     } else {
>>> +             batch[i]--;
>>> +             batch[++i] = offset;
>>> +     }
>>> +     batch[++i] = value;
>>> +     batch[++i] = MI_BATCH_BUFFER_END;
>>> +     gem_write(i915, obj[2].handle, 0, batch, sizeof(batch));
>>> +     gem_execbuf(i915, &execbuf);
>>> +     gem_close(i915, obj[2].handle);
>>> +}
>>> +
>>> +static uint32_t create_highest_priority(int i915)
>>> +{
>>> +     uint32_t ctx = gem_context_create(i915);
>>> +
>>> +     /*
>>> +      * If there is no priority support, all contexts will have equal
>>> +      * priority (and therefore the max user priority), so no context
>>> +      * can overtake us, and we effectively can form a plug.
>>> +      */
>>> +     __gem_context_set_priority(i915, ctx, MAX_PRIO);
>>> +
>>> +     return ctx;
>>> +}
>>> +
>>> +static void unplug_show_queue(int i915, struct igt_cork *c, unsigned int engine)
>>> +{
>>> +     igt_spin_t *spin[MAX_ELSP_QLEN];
>>
>> Why is this 16?
> 
> 2x as big as the deepest known qlen. And 16 is that number that crops up
> everywhere as a "just big enough number"
>   
>>> +
>>> +     for (int n = 0; n < ARRAY_SIZE(spin); n++) {
>>> +             const struct igt_spin_factory opts = {
>>> +                     .ctx = create_highest_priority(i915),
>>> +                     .engine = engine,
>>> +             };
>>> +             spin[n] = __igt_spin_factory(i915, &opts);
>>> +             gem_context_destroy(i915, opts.ctx);
>>> +     }
>>> +
>>> +     igt_cork_unplug(c); /* batches will now be queued on the engine */
>>> +     igt_debugfs_dump(i915, "i915_engine_info");
>>> +
>>> +     for (int n = 0; n < ARRAY_SIZE(spin); n++)
>>> +             igt_spin_free(i915, spin[n]);
>>> +}
>>> +
>>> +static uint32_t store_timestamp(int i915,
>>> +                             uint32_t ctx, unsigned ring,
>>> +                             unsigned mmio_base)
>>> +{
>>> +     const bool r64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
>>> +     struct drm_i915_gem_exec_object2 obj = {
>>> +             .handle = gem_create(i915, 4096),
>>> +             .relocation_count = 1,
>>> +     };
>>> +     struct drm_i915_gem_relocation_entry reloc = {
>>> +             .target_handle = obj.handle,
>>> +             .offset = 2 * sizeof(uint32_t),
>>> +             .delta = 4092,
>>> +             .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
>>> +     };
>>> +     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +             .buffers_ptr = to_user_pointer(&obj),
>>> +             .buffer_count = 1,
>>> +             .flags = ring,
>>> +             .rsvd1 = ctx,
>>> +     };
>>> +     uint32_t batch[] = {
>>> +             0x24 << 23 | (1 + r64b), /* SRM */
>>> +             mmio_base + 0x358,
>>> +             4092,
>>> +             0,
>>> +             MI_BATCH_BUFFER_END
>>> +     };
>>> +
>>> +     igt_require(intel_gen(intel_get_drm_devid(i915)) >= 7);
>>> +
>>> +     gem_write(i915, obj.handle, 0, batch, sizeof(batch));
>>> +     obj.relocs_ptr = to_user_pointer(&reloc);
>>> +
>>> +     gem_execbuf(i915, &execbuf);
>>> +
>>> +     return obj.handle;
>>> +}
>>> +
>>> +static void independent(int i915, unsigned ring, unsigned flags)
>>> +{
>>> +     uint32_t handle[ARRAY_SIZE(priorities)];
>>> +     igt_spin_t *spin[MAX_ELSP_QLEN];
>>> +     unsigned int mmio_base;
>>> +
>>> +     /* XXX i915_query()! */
>>> +     switch (ring) {
>>> +     case I915_EXEC_DEFAULT:
>>> +     case I915_EXEC_RENDER:
>>> +             mmio_base = 0x2000;
>>> +             break;
>>> +#if 0
>>> +     case I915_EXEC_BSD:
>>> +             mmio_base = 0x12000;
>>> +             break;
>>> +#endif
>>> +     case I915_EXEC_BLT:
>>> +             mmio_base = 0x22000;
>>> +             break;
>>> +
>>> +     case I915_EXEC_VEBOX:
>>> +             if (intel_gen(intel_get_drm_devid(i915)) >= 11)
>>> +                     mmio_base = 0x1d8000;
>>> +             else
>>> +                     mmio_base = 0x1a000;
>>> +             break;
>>> +
>>> +     default:
>>> +             igt_skip("mmio base not known\n");
>>> +     }
>>
>> Ufff this is quite questionable. Should we rather have this subtest in
>> selftests only?
> 
> We should be exporting this information. It is a non-privileged register
> that is used by normal clients to measure elapsed time.

I see.. who uses it? Mesa? Is it just one register? What would make more 
sense - add a query to read this register, or add a query to get the 
register address, or mmio_base (as you were proposing some time ago)?

> 
>>> +
>>> +     for (int n = 0; n < ARRAY_SIZE(spin); n++) {
>>> +             const struct igt_spin_factory opts = {
>>> +                     .ctx = create_highest_priority(i915),
>>> +                     .engine = ring,
>>> +             };
>>> +             spin[n] = __igt_spin_factory(i915, &opts);
>>> +             gem_context_destroy(i915, opts.ctx);
>>> +     }
>>> +
>>> +     for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
>>> +             uint32_t ctx = gem_queue_create(i915);
>>> +             gem_context_set_priority(i915, ctx, priorities[i]);
>>> +             handle[i] = store_timestamp(i915, ctx, ring, mmio_base);
>>> +             gem_context_destroy(i915, ctx);
>>> +     }
>>> +
>>> +     for (int n = 0; n < ARRAY_SIZE(spin); n++)
>>> +             igt_spin_free(i915, spin[n]);
>>> +
>>> +     for (int i = 0; i < ARRAY_SIZE(priorities); i++) {
>>> +             uint32_t *ptr;
>>> +
>>> +             ptr = gem_mmap__gtt(i915, handle[i], 4096, PROT_READ);
>>> +             gem_set_domain(i915, handle[i], /* no write hazard lies! */
>>> +                            I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>>> +             gem_close(i915, handle[i]);
>>> +
>>> +             handle[i] = ptr[1023];
>>
>> 1023 relates to 4092 from store_timestamp I gather. The two need to be
>> defined closer together.
>>
>>> +             munmap(ptr, 4096);
>>> +
>>> +             igt_debug("ctx[%d] .prio=%d, timestamp=%u\n",
>>> +                       i, priorities[i], handle[i]);
>>> +     }
>>> +
>>> +     igt_assert((int32_t)(handle[HI] - handle[LO]) < 0);
>>> +}
>>> +
>>> +static void reorder(int i915, unsigned ring, unsigned flags)
>>> +#define EQUAL 1
>>> +{
>>> +     IGT_CORK_HANDLE(cork);
>>> +     uint32_t scratch;
>>> +     uint32_t *ptr;
>>> +     uint32_t ctx[2];
>>> +     uint32_t plug;
>>> +
>>> +     ctx[LO] = gem_queue_create(i915);
>>> +     gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
>>> +
>>> +     ctx[HI] = gem_queue_create(i915);
>>> +     gem_context_set_priority(i915, ctx[HI], flags & EQUAL ? MIN_PRIO : 0);
>>> +
>>> +     scratch = gem_create(i915, 4096);
>>> +     plug = igt_cork_plug(&cork, i915);
>>> +
>>> +     /* We expect the high priority context to be executed first, and
>>> +      * so the final result will be value from the low priority context.
>>> +      */
>>> +     store_dword(i915, ctx[LO], ring, scratch, 0, ctx[LO], plug, 0);
>>> +     store_dword(i915, ctx[HI], ring, scratch, 0, ctx[HI], plug, 0);
>>> +
>>> +     unplug_show_queue(i915, &cork, ring);
>>> +     gem_close(i915, plug);
>>> +
>>> +     gem_context_destroy(i915, ctx[LO]);
>>> +     gem_context_destroy(i915, ctx[HI]);
>>> +
>>> +     ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
>>> +     gem_set_domain(i915, scratch, /* no write hazard lies! */
>>> +                    I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>>> +     gem_close(i915, scratch);
>>> +
>>> +     if (flags & EQUAL) /* equal priority, result will be fifo */
>>> +             igt_assert_eq_u32(ptr[0], ctx[HI]);
>>> +     else
>>> +             igt_assert_eq_u32(ptr[0], ctx[LO]);
>>> +     munmap(ptr, 4096);
>>> +}
>>> +
>>> +static void promotion(int i915, unsigned ring)
>>> +{
>>> +     IGT_CORK_HANDLE(cork);
>>> +     uint32_t result, dep;
>>> +     uint32_t *ptr;
>>> +     uint32_t ctx[3];
>>> +     uint32_t plug;
>>> +
>>> +     ctx[LO] = gem_queue_create(i915);
>>> +     gem_context_set_priority(i915, ctx[LO], MIN_PRIO);
>>> +
>>> +     ctx[HI] = gem_queue_create(i915);
>>> +     gem_context_set_priority(i915, ctx[HI], 0);
>>> +
>>> +     ctx[NOISE] = gem_queue_create(i915);
>>> +     gem_context_set_priority(i915, ctx[NOISE], MIN_PRIO/2);
>>> +
>>> +     result = gem_create(i915, 4096);
>>> +     dep = gem_create(i915, 4096);
>>> +
>>> +     plug = igt_cork_plug(&cork, i915);
>>> +
>>> +     /* Expect that HI promotes LO, so the order will be LO, HI, NOISE.
>>> +      *
>>> +      * fifo would be NOISE, LO, HI.
>>> +      * strict priority would be  HI, NOISE, LO
>>> +      */
>>> +     store_dword(i915, ctx[NOISE], ring, result, 0, ctx[NOISE], plug, 0);
>>> +     store_dword(i915, ctx[LO], ring, result, 0, ctx[LO], plug, 0);
>>> +
>>> +     /* link LO <-> HI via a dependency on another buffer */
>>> +     store_dword(i915, ctx[LO], ring, dep, 0, ctx[LO], 0, I915_GEM_DOMAIN_INSTRUCTION);
>>> +     store_dword(i915, ctx[HI], ring, dep, 0, ctx[HI], 0, 0);
>>> +
>>> +     store_dword(i915, ctx[HI], ring, result, 0, ctx[HI], 0, 0);
>>> +
>>> +     unplug_show_queue(i915, &cork, ring);
>>> +     gem_close(i915, plug);
>>> +
>>> +     gem_context_destroy(i915, ctx[NOISE]);
>>> +     gem_context_destroy(i915, ctx[LO]);
>>> +     gem_context_destroy(i915, ctx[HI]);
>>> +
>>> +     ptr = gem_mmap__gtt(i915, dep, 4096, PROT_READ);
>>> +     gem_set_domain(i915, dep, /* no write hazard lies! */
>>> +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>>> +     gem_close(i915, dep);
>>> +
>>> +     igt_assert_eq_u32(ptr[0], ctx[HI]);
>>> +     munmap(ptr, 4096);
>>> +
>>> +     ptr = gem_mmap__gtt(i915, result, 4096, PROT_READ);
>>> +     gem_set_domain(i915, result, /* no write hazard lies! */
>>> +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>>> +     gem_close(i915, result);
>>> +
>>> +     igt_assert_eq_u32(ptr[0], ctx[NOISE]);
>>> +     munmap(ptr, 4096);
>>> +}
>>> +
>>> +static void smoketest(int i915, unsigned ring, unsigned timeout)
>>> +{
>>> +     const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
>>> +     unsigned engines[16];
>>> +     unsigned nengine;
>>> +     unsigned engine;
>>> +     uint32_t scratch;
>>> +     uint32_t *ptr;
>>> +
>>> +     nengine = 0;
>>> +     for_each_engine(i915, engine) {
>>> +             if (ignore_engine(engine))
>>> +                     continue;
>>> +
>>> +             engines[nengine++] = engine;
>>> +     }
>>> +     igt_require(nengine);
>>
>> for_each_physical and counring the engines for engines array would be
>> better I think.
>>
>>> +
>>> +     scratch = gem_create(i915, 4096);
>>> +     igt_fork(child, ncpus) {
>>> +             unsigned long count = 0;
>>> +             uint32_t ctx;
>>> +
>>> +             hars_petruska_f54_1_random_perturb(child);
>>> +
>>> +             ctx = gem_queue_create(i915);
>>> +             igt_until_timeout(timeout) {
>>> +                     int prio;
>>> +
>>> +                     prio = hars_petruska_f54_1_random_unsafe_max(MAX_PRIO - MIN_PRIO) + MIN_PRIO;
>>> +                     gem_context_set_priority(i915, ctx, prio);
>>> +
>>> +                     engine = engines[hars_petruska_f54_1_random_unsafe_max(nengine)];
>>> +                     store_dword(i915, ctx, engine, scratch,
>>> +                                 8*child + 0, ~child,
>>> +                                 0, 0);
>>> +                     for (unsigned int step = 0; step < 8; step++)
>>> +                             store_dword(i915, ctx, engine, scratch,
>>> +                                         8*child + 4, count++,
>>> +                                         0, 0);
>>> +             }
>>> +             gem_context_destroy(i915, ctx);
>>> +     }
>>> +     igt_waitchildren();
>>> +
>>> +     ptr = gem_mmap__gtt(i915, scratch, 4096, PROT_READ);
>>> +     gem_set_domain(i915, scratch, /* no write hazard lies! */
>>> +                     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
>>> +     gem_close(i915, scratch);
>>> +
>>> +     for (unsigned n = 0; n < ncpus; n++) {
>>> +             igt_assert_eq_u32(ptr[2*n], ~n);
>>> +             /*
>>> +              * Note this count is approximate due to unconstrained
>>> +              * ordering of the dword writes between engines.
>>> +              *
>>> +              * Take the result with a pinch of salt.
>>> +              */
>>> +             igt_info("Child[%d] completed %u cycles\n",  n, ptr[2*n+1]);
>>> +     }
>>> +     munmap(ptr, 4096);
>>> +}
>>> +
>>> +igt_main
>>> +{
>>> +     const struct intel_execution_engine *e;
>>> +     int i915 = -1;
>>> +
>>> +     igt_fixture {
>>> +             i915 = drm_open_driver(DRIVER_INTEL);
>>> +             igt_require_gem(i915);
>>> +     }
>>> +
>>> +     igt_subtest_group {
>>> +             igt_fixture {
>>> +                     igt_require(gem_contexts_has_shared_gtt(i915));
>>> +                     igt_fork_hang_detector(i915);
>>> +             }
>>> +
>>> +             igt_subtest("create-shared-gtt")
>>> +                     create_shared_gtt(i915, 0);
>>> +
>>> +             igt_subtest("detached-shared-gtt")
>>> +                     create_shared_gtt(i915, DETACHED);
>>> +
>>> +             igt_subtest("disjoint-timelines")
>>> +                     disjoint_timelines(i915);
>>> +
>>> +             igt_subtest("single-timeline")
>>> +                     single_timeline(i915);
>>> +
>>> +             igt_subtest("exhaust-shared-gtt")
>>> +                     exhaust_shared_gtt(i915, 0);
>>> +
>>> +             igt_subtest("exhaust-shared-gtt-lrc")
>>> +                     exhaust_shared_gtt(i915, EXHAUST_LRC);
>>> +
>>> +             for (e = intel_execution_engines; e->name; e++) {
>>> +                     igt_subtest_f("exec-shared-gtt-%s", e->name)
>>> +                             exec_shared_gtt(i915, e->exec_id | e->flags);
>>
>> The same previously raised question on should it iterate the legacy
>> execbuf engines or physical engines. Maybe you won't different subtests
>> do both?
> 
> It should be testing the cross between the context and execbuf uABI, not
> physical.

Same question as earlier, why is this cross interesting? I mean what is 
interesting in intersection between legacy execbuf engine selection and 
ppgtt sharing? And at the same time not relevant to exercise the new 
execbuf engine selection abi?

Regards,

Tvrtko
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 10/16] i915/gem_exec_whisper: Fork all-engine tests one-per-engine
  2019-05-15 19:35       ` [igt-dev] " Chris Wilson
@ 2019-05-16  8:57         ` Tvrtko Ursulin
  -1 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-16  8:57 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 15/05/2019 20:35, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-14 13:57:26)
>>
>> On 08/05/2019 11:09, Chris Wilson wrote:
>>> Add a new mode for some more stress, submit the all-engines tests
>>> simultaneously, a stream per engine.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    tests/i915/gem_exec_whisper.c | 27 ++++++++++++++++++++++-----
>>>    1 file changed, 22 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
>>> index d3e0b0ba2..d5afc8119 100644
>>> --- a/tests/i915/gem_exec_whisper.c
>>> +++ b/tests/i915/gem_exec_whisper.c
>>> @@ -88,6 +88,7 @@ static void verify_reloc(int fd, uint32_t handle,
>>>    #define SYNC 0x40
>>>    #define PRIORITY 0x80
>>>    #define QUEUES 0x100
>>> +#define ALL 0x200
>>>    
>>>    struct hang {
>>>        struct drm_i915_gem_exec_object2 obj;
>>> @@ -199,6 +200,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>>>        uint64_t old_offset;
>>>        int i, n, loc;
>>>        int debugfs;
>>> +     int nchild;
>>>    
>>>        if (flags & PRIORITY) {
>>>                igt_require(gem_scheduler_enabled(fd));
>>> @@ -215,6 +217,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>>>                                engines[nengine++] = engine;
>>>                }
>>>        } else {
>>> +             igt_assert(!(flags & ALL));
>>>                igt_require(gem_has_ring(fd, engine));
>>>                igt_require(gem_can_store_dword(fd, engine));
>>>                engines[nengine++] = engine;
>>> @@ -233,11 +236,22 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>>>        if (flags & HANG)
>>>                init_hang(&hang);
>>>    
>>> +     nchild = 1;
>>> +     if (flags & FORKED)
>>> +             nchild *= sysconf(_SC_NPROCESSORS_ONLN);
>>> +     if (flags & ALL)
>>> +             nchild *= nengine;
>>> +
>>>        intel_detect_and_clear_missed_interrupts(fd);
>>>        gpu_power_read(&power, &sample[0]);
>>> -     igt_fork(child, flags & FORKED ? sysconf(_SC_NPROCESSORS_ONLN) : 1)  {
>>> +     igt_fork(child, nchild) {
>>>                unsigned int pass;
>>>    
>>> +             if (flags & ALL) {
>>> +                     engines[0] = engines[child % nengine];
>>
>> Relying on PIDs being sequential feels fragile but suggesting pipes or
>> shared memory would be overkill. How about another loop:
> 
> Where are you getting pid_t from? child is an integer [0, nchild).

Add a core helper to get it?

I am coming from an angle that I remember some time in the past there 
was a security thing which randomized pid allocation. TBH I am not sure 
if that still exists, but if it does then it would not be good for this 
test. May be moot point to think such security hardening measures would 
be active on a machine running IGT tests.. hm.. not sure. But it is 
still a quite hidden assumption.

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 10/16] i915/gem_exec_whisper: Fork all-engine tests one-per-engine
@ 2019-05-16  8:57         ` Tvrtko Ursulin
  0 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-16  8:57 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 15/05/2019 20:35, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-14 13:57:26)
>>
>> On 08/05/2019 11:09, Chris Wilson wrote:
>>> Add a new mode for some more stress, submit the all-engines tests
>>> simultaneously, a stream per engine.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    tests/i915/gem_exec_whisper.c | 27 ++++++++++++++++++++++-----
>>>    1 file changed, 22 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
>>> index d3e0b0ba2..d5afc8119 100644
>>> --- a/tests/i915/gem_exec_whisper.c
>>> +++ b/tests/i915/gem_exec_whisper.c
>>> @@ -88,6 +88,7 @@ static void verify_reloc(int fd, uint32_t handle,
>>>    #define SYNC 0x40
>>>    #define PRIORITY 0x80
>>>    #define QUEUES 0x100
>>> +#define ALL 0x200
>>>    
>>>    struct hang {
>>>        struct drm_i915_gem_exec_object2 obj;
>>> @@ -199,6 +200,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>>>        uint64_t old_offset;
>>>        int i, n, loc;
>>>        int debugfs;
>>> +     int nchild;
>>>    
>>>        if (flags & PRIORITY) {
>>>                igt_require(gem_scheduler_enabled(fd));
>>> @@ -215,6 +217,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>>>                                engines[nengine++] = engine;
>>>                }
>>>        } else {
>>> +             igt_assert(!(flags & ALL));
>>>                igt_require(gem_has_ring(fd, engine));
>>>                igt_require(gem_can_store_dword(fd, engine));
>>>                engines[nengine++] = engine;
>>> @@ -233,11 +236,22 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>>>        if (flags & HANG)
>>>                init_hang(&hang);
>>>    
>>> +     nchild = 1;
>>> +     if (flags & FORKED)
>>> +             nchild *= sysconf(_SC_NPROCESSORS_ONLN);
>>> +     if (flags & ALL)
>>> +             nchild *= nengine;
>>> +
>>>        intel_detect_and_clear_missed_interrupts(fd);
>>>        gpu_power_read(&power, &sample[0]);
>>> -     igt_fork(child, flags & FORKED ? sysconf(_SC_NPROCESSORS_ONLN) : 1)  {
>>> +     igt_fork(child, nchild) {
>>>                unsigned int pass;
>>>    
>>> +             if (flags & ALL) {
>>> +                     engines[0] = engines[child % nengine];
>>
>> Relying on PIDs being sequential feels fragile but suggesting pipes or
>> shared memory would be overkill. How about another loop:
> 
> Where are you getting pid_t from? child is an integer [0, nchild).

Add a core helper to get it?

I am coming from an angle that I remember some time in the past there 
was a security thing which randomized pid allocation. TBH I am not sure 
if that still exists, but if it does then it would not be good for this 
test. May be moot point to think such security hardening measures would 
be active on a machine running IGT tests.. hm.. not sure. But it is 
still a quite hidden assumption.

Regards,

Tvrtko

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 13/16] i915: Add gem_exec_balancer
  2019-05-15 19:50       ` [igt-dev] " Chris Wilson
@ 2019-05-16  9:20         ` Tvrtko Ursulin
  -1 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-16  9:20 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 15/05/2019 20:50, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-15 11:49:45)
>>
>> On 08/05/2019 11:09, Chris Wilson wrote:
>>> Exercise the in-kernel load balancer checking that we can distribute
>>> batches across the set of ctx->engines to avoid load.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    tests/Makefile.am              |    1 +
>>>    tests/Makefile.sources         |    1 +
>>>    tests/i915/gem_exec_balancer.c | 1050 ++++++++++++++++++++++++++++++++
>>>    tests/meson.build              |    7 +
>>>    4 files changed, 1059 insertions(+)
>>>    create mode 100644 tests/i915/gem_exec_balancer.c
>>>
>>> diff --git a/tests/Makefile.am b/tests/Makefile.am
>>> index 5097debf6..c6af0aeaf 100644
>>> --- a/tests/Makefile.am
>>> +++ b/tests/Makefile.am
>>> @@ -96,6 +96,7 @@ gem_close_race_LDADD = $(LDADD) -lpthread
>>>    gem_ctx_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
>>>    gem_ctx_thrash_LDADD = $(LDADD) -lpthread
>>>    gem_ctx_sseu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
>>> +i915_gem_exec_balancer_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
>>>    gem_exec_capture_LDADD = $(LDADD) -lz
>>>    gem_exec_parallel_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
>>>    gem_exec_parallel_LDADD = $(LDADD) -lpthread
>>> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
>>> index e7ee27e81..323b625aa 100644
>>> --- a/tests/Makefile.sources
>>> +++ b/tests/Makefile.sources
>>> @@ -24,6 +24,7 @@ TESTS_progs = \
>>>        i915/gem_ctx_clone \
>>>        i915/gem_ctx_engines \
>>>        i915/gem_ctx_shared \
>>> +     i915/gem_exec_balancer \
>>>        i915/gem_vm_create \
>>>        kms_3d \
>>>        kms_addfb_basic \
>>> diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
>>> new file mode 100644
>>> index 000000000..25195d478
>>> --- /dev/null
>>> +++ b/tests/i915/gem_exec_balancer.c
>>> @@ -0,0 +1,1050 @@
>>> +/*
>>> + * Copyright © 2018 Intel Corporation
>>
>> 2019 I guess, even though work was started in 2018?
>>
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person obtaining a
>>> + * copy of this software and associated documentation files (the "Software"),
>>> + * to deal in the Software without restriction, including without limitation
>>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>>> + * and/or sell copies of the Software, and to permit persons to whom the
>>> + * Software is furnished to do so, subject to the following conditions:
>>> + *
>>> + * The above copyright notice and this permission notice (including the next
>>> + * paragraph) shall be included in all copies or substantial portions of the
>>> + * Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>>> + * IN THE SOFTWARE.
>>> + */
>>> +
>>> +#include <sched.h>
>>> +
>>> +#include "igt.h"
>>> +#include "igt_perf.h"
>>> +#include "i915/gem_ring.h"
>>> +#include "sw_sync.h"
>>> +
>>> +IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
>>> +
>>> +#define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
>>
>> Hmm.. this is a strange surrogate but I guess it works.
>>
>>> +
>>> +static bool has_class_instance(int i915, uint16_t class, uint16_t instance)
>>> +{
>>> +     int fd;
>>> +
>>> +     fd = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance));
>>
>> More work for Andi to replace with real engine discovery. :)
>>
>>> +     if (fd != -1) {
>>> +             close(fd);
>>> +             return true;
>>> +     }
>>> +
>>> +     return false;
>>> +}
>>> +
>>> +static struct i915_engine_class_instance *
>>> +list_engines(int i915, uint32_t class_mask, unsigned int *out)
>>> +{
>>> +     unsigned int count = 0, size = 64;
>>> +     struct i915_engine_class_instance *engines;
>>> +
>>> +     engines = malloc(size * sizeof(*engines));
>>> +     if (!engines) {
>>> +             *out = 0;
>>> +             return NULL;
>>> +     }
>>> +
>>> +     for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER;
>>> +          class_mask;
>>> +          class++, class_mask >>= 1) {
>>> +             if (!(class_mask & 1))
>>> +                     continue;
>>> +
>>> +             for (unsigned int instance = 0;
>>> +                  instance < INSTANCE_COUNT;
>>> +                  instance++) {
>>> +                  if (!has_class_instance(i915, class, instance))
>>> +                          continue;
>>> +
>>> +                     if (count == size) {
>>> +                             struct i915_engine_class_instance *e;
>>> +
>>> +                             size *= 2;
>>> +                             e = realloc(engines, size*sizeof(*engines));
>>> +                             if (!e) {
>>
>> I'd just assert. On malloc as well.
>>
>>> +                                     *out = count;
>>> +                                     return engines;
>>> +                             }
>>> +
>>> +                             engines = e;
>>> +                     }
>>> +
>>> +                     engines[count++] = (struct i915_engine_class_instance){
>>> +                             .engine_class = class,
>>> +                             .engine_instance = instance,
>>> +                     };
>>> +             }
>>> +     }
>>> +
>>> +     if (!count) {
>>> +             free(engines);
>>> +             engines = NULL;
>>> +     }
>>> +
>>> +     *out = count;
>>> +     return engines;
>>> +}
>>> +
>>> +static int __set_load_balancer(int i915, uint32_t ctx,
>>> +                            const struct i915_engine_class_instance *ci,
>>> +                            unsigned int count)
>>> +{
>>> +     I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, count);
>>> +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1 + count);
>>> +     struct drm_i915_gem_context_param p = {
>>> +             .ctx_id = ctx,
>>> +             .param = I915_CONTEXT_PARAM_ENGINES,
>>> +             .size = sizeof(engines),
>>> +             .value = to_user_pointer(&engines)
>>> +     };
>>> +
>>> +     memset(&balancer, 0, sizeof(balancer));
>>> +     balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
>>> +
>>> +     igt_assert(count);
>>> +     balancer.num_siblings = count;
>>> +     memcpy(balancer.engines, ci, count * sizeof(*ci));
>>> +
>>> +     memset(&engines, 0, sizeof(engines));
>>> +     engines.extensions = to_user_pointer(&balancer);
>>> +     engines.engines[0].engine_class =
>>> +             I915_ENGINE_CLASS_INVALID;
>>> +     engines.engines[0].engine_instance =
>>> +             I915_ENGINE_CLASS_INVALID_NONE;
>>> +     memcpy(engines.engines + 1, ci, count * sizeof(*ci));
>>> +
>>> +     return __gem_context_set_param(i915, &p);
>>> +}
>>> +
>>> +static void set_load_balancer(int i915, uint32_t ctx,
>>> +                           const struct i915_engine_class_instance *ci,
>>> +                           unsigned int count)
>>> +{
>>> +     igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0);
>>> +}
>>> +
>>> +static uint32_t load_balancer_create(int i915,
>>> +                                  const struct i915_engine_class_instance *ci,
>>> +                                  unsigned int count)
>>> +{
>>> +     uint32_t ctx;
>>> +
>>> +     ctx = gem_context_create(i915);
>>> +     set_load_balancer(i915, ctx, ci, count);
>>> +
>>> +     return ctx;
>>> +}
>>> +
>>> +static uint32_t __batch_create(int i915, uint32_t offset)
>>> +{
>>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +     uint32_t handle;
>>> +
>>> +     handle = gem_create(i915, ALIGN(offset + 4, 4096));
>>> +     gem_write(i915, handle, offset, &bbe, sizeof(bbe));
>>> +
>>> +     return handle;
>>> +}
>>> +
>>> +static uint32_t batch_create(int i915)
>>> +{
>>> +     return __batch_create(i915, 0);
>>> +}
>>> +
>>> +static void invalid_balancer(int i915)
>>> +{
>>> +     I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, 64);
>>> +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 64);
>>> +     struct drm_i915_gem_context_param p = {
>>> +             .param = I915_CONTEXT_PARAM_ENGINES,
>>> +             .value = to_user_pointer(&engines)
>>> +     };
>>> +     uint32_t handle;
>>> +     void *ptr;
>>> +
>>> +     /*
>>> +      * Assume that I915_CONTEXT_PARAM_ENGINE validates the array
>>> +      * of engines[], our job is to determine if the load_balancer
>>> +      * extension explodes.
>>> +      */
>>> +
>>> +     for (int class = 0; class < 32; class++) {
>>> +             struct i915_engine_class_instance *ci;
>>> +             unsigned int count;
>>> +
>>> +             ci = list_engines(i915, 1 << class, &count);
>>> +             if (!ci)
>>> +                     continue;
>>> +
>>> +             igt_debug("Found %d engines\n", count);
>>> +             igt_assert_lte(count, 64);
>>
>> Hey.. you always say trust the kernel! ;)
> 
> This code was placeholder that you said you would replace by a proper
> query api...
> 
>>
>>> +
>>> +             p.ctx_id = gem_context_create(i915);
>>> +             p.size = (sizeof(struct i915_context_param_engines) +
>>> +                             (count + 1) * sizeof(*engines.engines));
>>
>> Alignment looks off.
>>
>>> +
>>> +             memset(&engines, 0, sizeof(engines));
>>> +             engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
>>> +             engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
>>> +             memcpy(engines.engines + 1, ci, count * sizeof(*ci));
>>> +             gem_context_set_param(i915, &p);
>>> +
>>> +             engines.extensions = -1ull;
>>> +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
>>> +
>>> +             engines.extensions = 1ull;
>>> +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
>>> +
>>> +             memset(&balancer, 0, sizeof(balancer));
>>> +             balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
>>> +             balancer.num_siblings = count;
>>> +             memcpy(balancer.engines, ci, count * sizeof(*ci));
>>> +
>>> +             engines.extensions = to_user_pointer(&balancer);
>>> +             gem_context_set_param(i915, &p);
>>> +
>>> +             balancer.engine_index = 1;
>>> +             igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
>>> +
>>> +             balancer.engine_index = count;
>>> +             igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
>>> +
>>> +             balancer.engine_index = count + 1;
>>> +             igt_assert_eq(__gem_context_set_param(i915, &p), -EINVAL);
>>> +
>>> +             balancer.engine_index = 0;
>>> +             gem_context_set_param(i915, &p);
>>> +
>>> +             balancer.base.next_extension = to_user_pointer(&balancer);
>>> +             igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
>>> +
>>> +             balancer.base.next_extension = -1ull;
>>> +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
>>> +
>>> +             handle = gem_create(i915, 4096 * 3);
>>> +             ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE);
>>> +             gem_close(i915, handle);
>>> +
>>> +             memset(&engines, 0, sizeof(engines));
>>> +             engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
>>> +             engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
>>> +             engines.engines[1].engine_class = I915_ENGINE_CLASS_INVALID;
>>> +             engines.engines[1].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
>>> +             memcpy(engines.engines + 2, ci, count * sizeof(ci));
>>> +             p.size = (sizeof(struct i915_context_param_engines) +
>>> +                             (count + 2) * sizeof(*engines.engines));
>>
>> Alignment again.
>>
>>> +             gem_context_set_param(i915, &p);
>>> +
>>> +             balancer.base.next_extension = 0;
>>> +             balancer.engine_index = 1;
>>> +             engines.extensions = to_user_pointer(&balancer);
>>> +             gem_context_set_param(i915, &p);
>>> +
>>> +             memcpy(ptr + 4096 - 8, &balancer, sizeof(balancer));
>>> +             memcpy(ptr + 8192 - 8, &balancer, sizeof(balancer));
>>> +             balancer.engine_index = 0;
>>> +
>>> +             engines.extensions = to_user_pointer(ptr) + 4096 - 8;
>>> +             gem_context_set_param(i915, &p);
>>> +
>>> +             balancer.base.next_extension = engines.extensions;
>>> +             engines.extensions = to_user_pointer(&balancer);
>>> +             gem_context_set_param(i915, &p);
>>
>> mmap_gtt and unmapped area testing in one?
> 
> Neighbouring.
> 
>>> +             munmap(ptr, 4096); >+           igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
>>> +             engines.extensions = to_user_pointer(ptr) + 4096 - 8;
>>> +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
>>> +
>>> +             engines.extensions = to_user_pointer(ptr) + 8192 - 8;
>>> +             gem_context_set_param(i915, &p);
>>> +
>>> +             balancer.base.next_extension = engines.extensions;
>>> +             engines.extensions = to_user_pointer(&balancer);
>>> +             gem_context_set_param(i915, &p);
>>> +
>>> +             munmap(ptr + 8192, 4096);
>>> +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
>>> +             engines.extensions = to_user_pointer(ptr) + 8192 - 8;
>>> +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
>>> +
>>> +             munmap(ptr + 4096, 4096);
>>> +
>>> +             gem_context_destroy(i915, p.ctx_id);
>>> +             free(ci);
>>> +     }
>>> +}
>>> +
>>> +static void kick_kthreads(int period_us)
>>> +{
>>> +     sched_yield();
>>> +     usleep(period_us);
>>
>> yield and sleep hm.. calling with zero period_us? Doesn't seem like it.
>> So what's it about?
> 
> Historically yield may have been a no-op, but sleep(0) actually yielded.
> 
>>> +}
>>> +
>>> +static double measure_load(int pmu, int period_us)
>>> +{
>>> +     uint64_t data[2];
>>> +     uint64_t d_t, d_v;
>>> +
>>> +     kick_kthreads(period_us);
>>> +
>>> +     igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
>>> +     d_v = -data[0];
>>> +     d_t = -data[1];
>>> +
>>> +     usleep(period_us);
>>> +
>>> +     igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
>>> +     d_v += data[0];
>>> +     d_t += data[1];
>>
>> This -val + val trick with uint64_t works?
> 
> Yes, unsigned overflow is defined.
> 
>>
>>> +
>>> +     return d_v / (double)d_t;
>>> +}
>>> +
>>> +static double measure_min_load(int pmu, unsigned int num, int period_us)
>>> +{
>>> +     uint64_t data[2 + num];
>>> +     uint64_t d_t, d_v[num];
>>> +     uint64_t min = -1, max = 0;
>>> +
>>> +     kick_kthreads(period_us);
>>> +
>>> +     igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
>>> +     for (unsigned int n = 0; n < num; n++)
>>> +             d_v[n] = -data[2 + n];
>>> +     d_t = -data[1];
>>> +
>>> +     usleep(period_us);
>>> +
>>> +     igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
>>> +
>>> +     d_t += data[1];
>>> +     for (unsigned int n = 0; n < num; n++) {
>>> +             d_v[n] += data[2 + n];
>>> +             igt_debug("engine[%d]: %.1f%%\n",
>>> +                       n, d_v[n] / (double)d_t * 100);
>>> +             if (d_v[n] < min)
>>> +                     min = d_v[n];
>>> +             if (d_v[n] > max)
>>> +                     max = d_v[n];
>>> +     }
>>> +
>>> +     igt_debug("elapsed: %"PRIu64"ns, load [%.1f, %.1f]%%\n",
>>> +               d_t, min / (double)d_t * 100,  max / (double)d_t * 100);
>>> +
>>> +     return min / (double)d_t;
>>> +}
>>> +
>>> +static void check_individual_engine(int i915,
>>> +                                 uint32_t ctx,
>>> +                                 const struct i915_engine_class_instance *ci,
>>> +                                 int idx)
>>> +{
>>> +     igt_spin_t *spin;
>>> +     double load;
>>> +     int pmu;
>>> +
>>> +     pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(ci[idx].engine_class,
>>> +                                               ci[idx].engine_instance));
>>> +
>>> +     spin = igt_spin_new(i915, .ctx = ctx, .engine = idx + 1);
>>> +     load = measure_load(pmu, 10000);
>>
>> Hm usleep before start of measuring and between two samples is the same.
>> The one before should be fixed I think, no?
> 
> Could be, that would require thought as to what the appropriate period
> for kicking should be. Yay for ksoftirqd.
> 
>>> +     igt_spin_free(i915, spin);
>>> +
>>> +     close(pmu);
>>> +
>>> +     igt_assert_f(load > 0.90,
>>> +                  "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
>>> +                  idx, ci[idx].engine_class, ci[idx].engine_instance, load*100);
>>> +}
>>> +
>>> +static void individual(int i915)
>>> +{
>>> +     uint32_t ctx;
>>> +
>>> +     /*
>>> +      * I915_CONTEXT_PARAM_ENGINE allows us to index into the user
>>> +      * supplied array from gem_execbuf(). Our check is to build the
>>> +      * ctx->engine[] with various different engine classes, feed in
>>> +      * a spinner and then ask pmu to confirm it the expected engine
>>> +      * was busy.
>>> +      */
>>> +
>>> +     ctx = gem_context_create(i915);
>>> +
>>> +     for (int mask = 0; mask < 32; mask++) {
>>> +             struct i915_engine_class_instance *ci;
>>> +             unsigned int count;
>>> +
>>> +             ci = list_engines(i915, 1u << mask, &count);
>>> +             if (!ci)
>>> +                     continue;
>>> +
>>> +             igt_debug("Found %d engines of class %d\n", count, mask);
>>> +
>>> +             for (int pass = 0; pass < count; pass++) { /* approx. count! */
>>> +                     igt_permute_array(ci, count, igt_exchange_int64);
>>
>> struct i915_engine_class_instance is four bytes long, so swap func looks
>> wrong. Unless for some reason you want to swap in blocks of two. Don't
>> know. Last index would grab into random memory though. I must be missing
>> something or it wouldn't have worked..
> 
> Once upon a time class_instance was 2xu32.
> 
>>
>>> +                     set_load_balancer(i915, ctx, ci, count);
>>> +                     for (unsigned int n = 0; n < count; n++)
>>> +                             check_individual_engine(i915, ctx, ci, n);
>>> +             }
>>> +
>>> +             free(ci);
>>> +     }
>>> +
>>> +     gem_context_destroy(i915, ctx);
>>> +     gem_quiescent_gpu(i915);
>>> +}
>>> +
>>> +static void indicies(int i915)
>>
>> indices?
>>
>>> +{
>>> +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
>>> +     struct drm_i915_gem_context_param p = {
>>> +             .ctx_id = gem_context_create(i915),
>>> +             .param = I915_CONTEXT_PARAM_ENGINES,
>>> +             .value = to_user_pointer(&engines)
>>> +     };
>>> +
>>> +     struct drm_i915_gem_exec_object2 batch = {
>>> +             .handle = batch_create(i915),
>>> +     };
>>> +
>>> +     unsigned int nengines = 0;
>>> +     void *balancers = NULL;
>>> +
>>> +     /*
>>> +      * We can populate our engine map with multiple virtual engines.
>>> +      * Do so.
>>> +      */
>>> +
>>> +     for (int class = 0; class < 32; class++) {
>>> +             struct i915_engine_class_instance *ci;
>>> +             unsigned int count;
>>> +
>>> +             ci = list_engines(i915, 1u << class, &count);
>>> +             if (!ci)
>>> +                     continue;
>>> +
>>> +             igt_debug("Found %d engines of class %d\n", count, class);
>>
>> Maybe this debug message should go into list_engines, since it seems
>> repeated a few times already.
> 
> Or remove the debug, I hear you.
> 
>>> +
>>> +             for (int n = 0; n < count; n++) {
>>> +                     I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(*balancer,
>>> +                                                              count);
>>> +
>>> +                     engines.engines[nengines].engine_class =
>>> +                             I915_ENGINE_CLASS_INVALID;
>>> +                     engines.engines[nengines].engine_instance =
>>> +                             I915_ENGINE_CLASS_INVALID_NONE;
>>> +
>>> +                     balancer = calloc(sizeof(*balancer), 1);
>>> +                     igt_assert(balancer);
>>> +
>>> +                     balancer->base.name =
>>> +                             I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
>>> +                     balancer->base.next_extension =
>>> +                             to_user_pointer(balancers);
>>> +                     balancers = balancer;
>>> +
>>> +                     balancer->engine_index = nengines++;
>>> +                     balancer->num_siblings = count;
>>> +
>>> +                     memcpy(balancer->engines,
>>> +                            ci, count * sizeof(*ci));
>>> +             }
>>> +             free(ci);
>>> +     }
>>> +
>>> +     igt_require(balancers);
>>> +     engines.extensions = to_user_pointer(balancers);
>>> +     p.size = (sizeof(struct i915_engine_class_instance) * nengines +
>>> +               sizeof(struct i915_context_param_engines));
>>> +     gem_context_set_param(i915, &p);
>>> +
>>> +     for (unsigned int n = 0; n < nengines; n++) {
>>> +             struct drm_i915_gem_execbuffer2 eb = {
>>> +                     .buffers_ptr = to_user_pointer(&batch),
>>> +                     .buffer_count = 1,
>>> +                     .flags = n,
>>> +                     .rsvd1 = p.ctx_id,
>>> +             };
>>> +             igt_debug("Executing on index=%d\n", n);
>>> +             gem_execbuf(i915, &eb);
>>> +     }
>>> +     gem_context_destroy(i915, p.ctx_id);
>>> +
>>> +     gem_sync(i915, batch.handle);
>>> +     gem_close(i915, batch.handle);
>>> +
>>> +     while (balancers) {
>>> +             struct i915_context_engines_load_balance *b, *n;
>>> +
>>> +             b = balancers;
>>> +             n = from_user_pointer(b->base.next_extension);
>>> +             free(b);
>>> +
>>> +             balancers = n;
>>> +     }
>>> +
>>> +     gem_quiescent_gpu(i915);
>>> +}
>>> +
>>> +static void busy(int i915)
>>> +{
>>> +     uint32_t scratch = gem_create(i915, 4096);
>>> +
>>> +     /*
>>> +      * Check that virtual engines are reported via GEM_BUSY.
>>> +      *
>>> +      * When running, the batch will be on the real engine and report
>>> +      * the actual class.
>>> +      *
>>> +      * Prior to running, if the load-balancer is across multiple
>>> +      * classes we don't know which engine the batch will
>>> +      * execute on, so we report them all!
>>> +      *
>>> +      * However, as we only support (and test) creating a load-balancer
>>> +      * from engines of only one class, that can be propagated accurately
>>> +      * through to GEM_BUSY.
>>> +      */
>>> +
>>> +     for (int class = 0; class < 16; class++) {
>>> +             struct drm_i915_gem_busy busy;
>>> +             struct i915_engine_class_instance *ci;
>>> +             unsigned int count;
>>> +             igt_spin_t *spin[2];
>>> +             uint32_t ctx;
>>> +
>>> +             ci = list_engines(i915, 1u << class, &count);
>>> +             if (!ci)
>>> +                     continue;
>>> +
>>> +             igt_debug("Found %d engines of class %d\n", count, class);
>>> +             ctx = load_balancer_create(i915, ci, count);
>>> +             free(ci);
>>> +
>>> +             spin[0] = __igt_spin_new(i915,
>>> +                                      .ctx = ctx,
>>> +                                      .flags = IGT_SPIN_POLL_RUN);
>>> +             spin[1] = __igt_spin_new(i915,
>>> +                                      .ctx = ctx,
>>> +                                      .dependency = scratch);
>>> +
>>> +             igt_spin_busywait_until_started(spin[0]);
>>> +
>>> +             /* Running: actual class */
>>> +             busy.handle = spin[0]->handle;
>>> +             do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
>>> +             igt_assert_eq_u32(busy.busy, 1u << (class + 16));
>>> +
>>> +             /* Queued(read): expected class */
>>> +             busy.handle = spin[1]->handle;
>>> +             do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
>>> +             igt_assert_eq_u32(busy.busy, 1u << (class + 16));
>>> +
>>> +             /* Queued(write): expected class */
>>> +             busy.handle = scratch;
>>> +             do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
>>> +             igt_assert_eq_u32(busy.busy,
>>> +                               (1u << (class + 16)) | (class + 1));
>>> +
>>> +             igt_spin_free(i915, spin[1]);
>>> +             igt_spin_free(i915, spin[0]);
>>> +
>>> +             gem_context_destroy(i915, ctx);
>>> +     }
>>> +
>>> +     gem_close(i915, scratch);
>>> +     gem_quiescent_gpu(i915);
>>> +}
>>> +
>>> +static int add_pmu(int pmu, const struct i915_engine_class_instance *ci)
>>> +{
>>> +     return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->engine_class,
>>> +                                                      ci->engine_instance),
>>> +                                 pmu);
>>> +}
>>> +
>>> +static void full(int i915, unsigned int flags)
>>> +#define PULSE 0x1
>>> +#define LATE 0x2
>>> +{
>>> +     struct drm_i915_gem_exec_object2 batch = {
>>> +             .handle = batch_create(i915),
>>> +     };
>>> +
>>> +     if (flags & LATE)
>>> +             igt_require_sw_sync();
>>> +
>>> +     /*
>>> +      * I915_CONTEXT_PARAM_ENGINE changes the meaning of I915_EXEC_DEFAULT
>>> +      * to provide an automatic selection from the ctx->engine[]. It
>>> +      * employs load-balancing to evenly distribute the workload the
>>
>> The leading section needs rewritting for truth. It is the load balance
>> extensions which _can_ redefine the meanign of I915_EXEC_DEFAULT etc..
>> I'm sure I didn't need to explain, but have just to make it clear which
>> part I am complaining about. :)
> 
> Hey, remember this is 2018!

You give me too much credit, I just go and mindlessly review. :)

> 
>>> +      * array. If we submit N spinners, we expect them to be simultaneously
>>> +      * running across N engines and use PMU to confirm that the entire
>>> +      * set of engines are busy.
>>
>> Clarify it is only if using N contexts.
>>
>>> +      *
>>> +      * We complicate matters by interpersing shortlived tasks to challenge
>>> +      * the kernel to search for space in which to insert new batches.
>>> +      */
>>> +
>>> +
>>> +     for (int mask = 0; mask < 32; mask++) {
>>> +             struct i915_engine_class_instance *ci;
>>> +             igt_spin_t *spin = NULL;
>>> +             IGT_CORK_FENCE(cork);
>>> +             unsigned int count;
>>> +             double load;
>>> +             int fence = -1;
>>> +             int *pmu;
>>> +
>>> +             ci = list_engines(i915, 1u << mask, &count);
>>> +             if (!ci)
>>> +                     continue;
>>> +
>>> +             igt_debug("Found %d engines of class %d\n", count, mask);
>>> +
>>> +             pmu = malloc(sizeof(*pmu) * count);
>>> +             igt_assert(pmu);
>>> +
>>> +             if (flags & LATE)
>>> +                     fence = igt_cork_plug(&cork, i915);
>>> +
>>> +             pmu[0] = -1;
>>> +             for (unsigned int n = 0; n < count; n++) {
>>> +                     uint32_t ctx;
>>> +
>>> +                     pmu[n] = add_pmu(pmu[0], &ci[n]);
>>> +
>>> +                     if (flags & PULSE) {
>>> +                             struct drm_i915_gem_execbuffer2 eb = {
>>> +                                     .buffers_ptr = to_user_pointer(&batch),
>>> +                                     .buffer_count = 1,
>>> +                                     .rsvd2 = fence,
>>> +                                     .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
>>> +                             };
>>> +                             gem_execbuf(i915, &eb);
>>> +                     }
>>> +
>>> +                     /*
>>> +                      * Each spinner needs to be one a new timeline,
>>> +                      * otherwise they will just sit in the single queue
>>> +                      * and not run concurrently.
>>> +                      */
>>> +                     ctx = load_balancer_create(i915, ci, count);
>>> +
>>> +                     if (spin == NULL) {
>>> +                             spin = __igt_spin_new(i915, .ctx = ctx);
>>> +                     } else {
>>> +                             struct drm_i915_gem_execbuffer2 eb = {
>>> +                                     .buffers_ptr = spin->execbuf.buffers_ptr,
>>> +                                     .buffer_count = spin->execbuf.buffer_count,
>>> +                                     .rsvd1 = ctx,
>>> +                                     .rsvd2 = fence,
>>> +                                     .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
>>> +                             };
>>> +                             gem_execbuf(i915, &eb);
>>> +                     }
>>> +
>>> +                     gem_context_destroy(i915, ctx);
>>> +             }
>>> +
>>> +             if (flags & LATE) {
>>> +                     igt_cork_unplug(&cork);
>>> +                     close(fence);
>>> +             }
>>> +
>>> +             load = measure_min_load(pmu[0], count, 10000);
>>> +             igt_spin_free(i915, spin);
>>> +
>>> +             close(pmu[0]);
>>> +             free(pmu);
>>> +
>>> +             free(ci);
>>> +
>>> +             igt_assert_f(load > 0.90,
>>> +                          "minimum load for %d x class:%d was found to be only %.1f%% busy\n",
>>> +                          count, mask, load*100);
>>> +             gem_quiescent_gpu(i915);
>>> +     }
>>> +
>>> +     gem_close(i915, batch.handle);
>>> +     gem_quiescent_gpu(i915);
>>> +}
>>> +
>>> +static void nop(int i915)
>>> +{
>>> +     struct drm_i915_gem_exec_object2 batch = {
>>> +             .handle = batch_create(i915),
>>> +     };
>>> +
>>> +     for (int mask = 0; mask < 32; mask++) {
>>
>> s/mask/class/
>>
>>> +             struct i915_engine_class_instance *ci;
>>> +             unsigned int count;
>>> +             uint32_t ctx;
>>> +
>>> +             ci = list_engines(i915, 1u << mask, &count);
>>> +             if (!ci)
>>> +                     continue;
>>> +
>>> +             if (count < 2) {
>>> +                     free(ci);
>>> +                     continue;
>>
>> Benchamrk only subtest for real veng?
> 
> Sure, that's a bit of internal knowledge leaking.
> 
>>> +             }
>>> +
>>> +             igt_debug("Found %d engines of class %d\n", count, mask);
>>> +             ctx = load_balancer_create(i915, ci, count);
>>> +
>>> +             for (int n = 0; n < count; n++) {
>>> +                     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                             .buffers_ptr = to_user_pointer(&batch),
>>> +                             .buffer_count = 1,
>>> +                             .flags = n + 1,
>>> +                             .rsvd1 = ctx,
>>> +                     };
>>> +                     struct timespec tv = {};
>>> +                     unsigned long nops;
>>> +                     double t;
>>> +
>>> +                     igt_nsec_elapsed(&tv);
>>> +                     nops = 0;
>>> +                     do {
>>> +                             for (int r = 0; r < 1024; r++)
>>> +                                     gem_execbuf(i915, &execbuf);
>>> +                             nops += 1024;
>>> +                     } while (igt_seconds_elapsed(&tv) < 2);
>>> +                     gem_sync(i915, batch.handle);
>>> +
>>> +                     t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
>>> +                     igt_info("%x:%d %.3fus\n", mask, n, t);
>>
>> Class in decimal is better I think.
> 
> But it's mask :-p
> 
> It's treated as just a number and not as a class identifier.

1 << mask is a mask, while mask is a class. :)

> 
>> And some descriptive labels to info messages would be good. Like
>> "individual engines", "virtual engine" etc.
> 
> It does describe the individual engines and their composites. The output
> looks clear and concise. You may want mask translated to a string... but
> this code is oblivious as to what mask actually is.
> 
> The way it is used definitely looks more like mask than class.

My eyeballing the igt_info lines made me think it will be a series of 
lines with numbers so not very readable what's what. A word or two on 
each line to say what scenario do the numbers relate to shouldn't be 
such a chore, no? :)

Regards,

Tvrtko

> 
>>> +             }
>>> +
>>> +             {
>>> +                     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                             .buffers_ptr = to_user_pointer(&batch),
>>> +                             .buffer_count = 1,
>>> +                             .rsvd1 = ctx,
>>> +                     };
>>> +                     struct timespec tv = {};
>>> +                     unsigned long nops;
>>> +                     double t;
>>> +
>>> +                     igt_nsec_elapsed(&tv);
>>> +                     nops = 0;
>>> +                     do {
>>> +                             for (int r = 0; r < 1024; r++)
>>> +                                     gem_execbuf(i915, &execbuf);
>>> +                             nops += 1024;
>>> +                     } while (igt_seconds_elapsed(&tv) < 2);
>>> +                     gem_sync(i915, batch.handle);
>>> +
>>> +                     t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
>>> +                     igt_info("%x:* %.3fus\n", mask, t);
>>> +             }
>>> +
>>> +
>>> +             igt_fork(child, count) {
>>> +                     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                             .buffers_ptr = to_user_pointer(&batch),
>>> +                             .buffer_count = 1,
>>> +                             .flags = child + 1,
>>> +                             .rsvd1 = gem_context_clone(i915, ctx,
>>> +                                                        I915_CONTEXT_CLONE_ENGINES, 0),
>>> +                     };
>>> +                     struct timespec tv = {};
>>> +                     unsigned long nops;
>>> +                     double t;
>>> +
>>> +                     igt_nsec_elapsed(&tv);
>>> +                     nops = 0;
>>> +                     do {
>>> +                             for (int r = 0; r < 1024; r++)
>>> +                                     gem_execbuf(i915, &execbuf);
>>> +                             nops += 1024;
>>> +                     } while (igt_seconds_elapsed(&tv) < 2);
>>> +                     gem_sync(i915, batch.handle);
>>> +
>>> +                     t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
>>> +                     igt_info("[%d] %x:%d %.3fus\n", child, mask, child, t);
>>> +
>>> +                     memset(&tv, 0, sizeof(tv));
>>> +                     execbuf.flags = 0;
>>> +
>>> +                     igt_nsec_elapsed(&tv);
>>> +                     nops = 0;
>>> +                     do {
>>> +                             for (int r = 0; r < 1024; r++)
>>> +                                     gem_execbuf(i915, &execbuf);
>>> +                             nops += 1024;
>>> +                     } while (igt_seconds_elapsed(&tv) < 2);
>>> +                     gem_sync(i915, batch.handle);
>>> +
>>> +                     t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
>>> +                     igt_info("[%d] %x:* %.3fus\n", child, mask, t);
>>> +
>>> +                     gem_context_destroy(i915, execbuf.rsvd1);
>>> +             }
>>> +
>>> +             igt_waitchildren();
>>> +
>>> +             gem_context_destroy(i915, ctx);
>>> +             free(ci);
>>> +     }
>>> +
>>> +     gem_close(i915, batch.handle);
>>> +     gem_quiescent_gpu(i915);
>>> +}
>>> +
>>> +static void ping(int i915, uint32_t ctx, unsigned int engine)
>>> +{
>>> +     struct drm_i915_gem_exec_object2 obj = {
>>> +             .handle = batch_create(i915),
>>> +     };
>>> +     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +             .buffers_ptr = to_user_pointer(&obj),
>>> +             .buffer_count = 1,
>>> +             .flags = engine,
>>> +             .rsvd1 = ctx,
>>> +     };
>>> +     gem_execbuf(i915, &execbuf);
>>> +     gem_sync(i915, obj.handle);
>>> +     gem_close(i915, obj.handle);
>>> +}
>>> +
>>> +static void semaphore(int i915)
>>> +{
>>> +     uint32_t block[2], scratch;
>>> +     igt_spin_t *spin[3];
>>> +
>>> +     /*
>>> +      * If we are using HW semaphores to launch serialised requests
>>> +      * on different engine concurrently, we want to verify that real
>>> +      * work is unimpeded.
>>> +      */
>>> +     igt_require(gem_scheduler_has_preemption(i915));
>>> +
>>> +     block[0] = gem_context_create(i915);
>>> +     block[1] = gem_context_create(i915);
>>> +
>>> +     scratch = gem_create(i915, 4096);
>>> +     spin[2] = igt_spin_new(i915, .dependency = scratch);
>>> +     for (int mask = 1; mask < 32; mask++) {
>>
>> s/mask/class/ throughout.
>>
>>> +             struct i915_engine_class_instance *ci;
>>> +             unsigned int count;
>>> +             uint32_t vip;
>>> +
>>> +             ci = list_engines(i915, 1u << mask, &count);
>>> +             if (!ci)
>>> +                     continue;
>>> +
>>> +             if (count < ARRAY_SIZE(block))
>>> +                     continue;
>>> +
>>> +             /* Ensure that we completely occupy all engines in this group */
>>> +             count = ARRAY_SIZE(block);
>>> +
>>> +             for (int i = 0; i < count; i++) {
>>> +                     set_load_balancer(i915, block[i], ci, count);
>>> +                     spin[i] = __igt_spin_new(i915,
>>> +                                                    .ctx = block[i],
>>> +                                                    .dependency = scratch);
>>
>> Alignment.
>>
>>> +             }
>>> +
>>> +             /*
>>> +              * Either we haven't blocked both engines with semaphores,
>>> +              * or we let the vip through. If not, we hang.
>>> +              */
>>> +             vip = gem_context_create(i915);
>>> +             set_load_balancer(i915, vip, ci, count);
>>> +             ping(i915, vip, 0);
>>> +             gem_context_destroy(i915, vip);
>>> +
>>> +             for (int i = 0; i < count; i++)
>>> +                     igt_spin_free(i915, spin[i]);
>>> +
>>> +             free(ci);
>>> +     }
>>> +     igt_spin_free(i915, spin[2]);
>>> +     gem_close(i915, scratch);
>>> +
>>> +     gem_context_destroy(i915, block[1]);
>>> +     gem_context_destroy(i915, block[0]);
>>> +
>>> +     gem_quiescent_gpu(i915);
>>> +}
>>> +
>>> +static void smoketest(int i915, int timeout)
>>> +{
>>> +     struct drm_i915_gem_exec_object2 batch[2] = {
>>> +             { .handle = __batch_create(i915, 16380) }
>>> +     };
>>> +     unsigned int ncontext = 0;
>>> +     uint32_t *contexts = NULL;
>>> +     uint32_t *handles = NULL;
>>> +
>>> +     igt_require_sw_sync();
>>> +
>>> +     for (int mask = 0; mask < 32; mask++) {
>>> +             struct i915_engine_class_instance *ci;
>>> +             unsigned int count = 0;
>>> +
>>> +             ci = list_engines(i915, 1u << mask, &count);
>>> +             if (!ci || count < 2) {
>>> +                     free(ci);
>>> +                     continue;
>>> +             }
>>> +
>>> +             igt_debug("Found %d engines of class %d\n", count, mask);
>>> +
>>> +             ncontext += 128;
>>> +             contexts = realloc(contexts, sizeof(*contexts) * ncontext);
>>> +             igt_assert(contexts);
>>> +
>>> +             for (unsigned int n = ncontext - 128; n < ncontext; n++) {
>>> +                     contexts[n] = load_balancer_create(i915, ci, count);
>>> +                     igt_assert(contexts[n]);
>>> +             }
>>> +
>>> +             free(ci);
>>> +     }
>>> +     igt_debug("Created %d virtual engines (one per context)\n", ncontext);
>>> +     igt_require(ncontext);
>>> +
>>> +     contexts = realloc(contexts, sizeof(*contexts) * ncontext * 4);
>>> +     igt_assert(contexts);
>>> +     memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
>>> +     ncontext *= 2;
>>> +     memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
>>> +     ncontext *= 2;
>>> +
>>> +     handles = malloc(sizeof(*handles) * ncontext);
>>> +     igt_assert(handles);
>>> +     for (unsigned int n = 0; n < ncontext; n++)
>>> +             handles[n] = gem_create(i915, 4096);
>>> +
>>> +     igt_until_timeout(timeout) {
>>> +             unsigned int count = 1 + (rand() % (ncontext - 1));
>>> +             IGT_CORK_FENCE(cork);
>>> +             int fence = igt_cork_plug(&cork, i915);
>>> +
>>> +             for (unsigned int n = 0; n < count; n++) {
>>> +                     struct drm_i915_gem_execbuffer2 eb = {
>>> +                             .buffers_ptr = to_user_pointer(batch),
>>> +                             .buffer_count = ARRAY_SIZE(batch),
>>> +                             .rsvd1 = contexts[n],
>>> +                             .rsvd2 = fence,
>>> +                             .flags = I915_EXEC_BATCH_FIRST | I915_EXEC_FENCE_IN,
>>> +                     };
>>> +                     batch[1].handle = handles[n];
>>> +                     gem_execbuf(i915, &eb);
>>> +             }
>>> +             igt_permute_array(handles, count, igt_exchange_int);
>>> +
>>> +             igt_cork_unplug(&cork);
>>> +             for (unsigned int n = 0; n < count; n++)
>>> +                     gem_sync(i915, handles[n]);
>>> +
>>> +             close(fence);
>>> +     }
>>> +
>>> +     for (unsigned int n = 0; n < ncontext; n++) {
>>> +             gem_close(i915, handles[n]);
>>> +             __gem_context_destroy(i915, contexts[n]);
>>> +     }
>>> +     free(handles);
>>> +     free(contexts);
>>> +     gem_close(i915, batch[0].handle);
>>> +}
>>> +
>>> +static bool has_context_engines(int i915)
>>> +{
>>> +     struct drm_i915_gem_context_param p = {
>>> +             .param = I915_CONTEXT_PARAM_ENGINES,
>>> +     };
>>> +
>>> +     return __gem_context_set_param(i915, &p) == 0;
>>> +}
>>> +
>>> +static bool has_load_balancer(int i915)
>>> +{
>>> +     struct i915_engine_class_instance ci = {};
>>> +     uint32_t ctx;
>>> +     int err;
>>> +
>>> +     ctx = gem_context_create(i915);
>>> +     err = __set_load_balancer(i915, ctx, &ci, 1);
>>> +     gem_context_destroy(i915, ctx);
>>> +
>>> +     return err == 0;
>>> +}
>>> +
>>> +igt_main
>>> +{
>>> +     int i915 = -1;
>>> +
>>> +     igt_skip_on_simulation();
>>> +
>>> +     igt_fixture {
>>> +             i915 = drm_open_driver(DRIVER_INTEL);
>>> +             igt_require_gem(i915);
>>> +
>>> +             gem_require_contexts(i915);
>>> +             igt_require(has_context_engines(i915));
>>> +             igt_require(has_load_balancer(i915));
>>> +
>>> +             igt_fork_hang_detector(i915);
>>> +     }
>>> +
>>> +     igt_subtest("invalid-balancer")
>>> +             invalid_balancer(i915);
>>> +
>>> +     igt_subtest("individual")
>>> +             individual(i915);
>>> +
>>> +     igt_subtest("indicies")
>>> +             indicies(i915);
>>> +
>>> +     igt_subtest("busy")
>>> +             busy(i915);
>>> +
>>> +     igt_subtest_group {
>>> +             static const struct {
>>> +                     const char *name;
>>> +                     unsigned int flags;
>>> +             } phases[] = {
>>> +                     { "", 0 },
>>> +                     { "-pulse", PULSE },
>>> +                     { "-late", LATE },
>>> +                     { "-late-pulse", PULSE | LATE },
>>> +                     { }
>>> +             };
>>> +             for (typeof(*phases) *p = phases; p->name; p++)
>>> +                     igt_subtest_f("full%s", p->name)
>>> +                             full(i915, p->flags);
>>> +     }
>>> +
>>> +     igt_subtest("nop")
>>> +             nop(i915);
>>> +
>>> +     igt_subtest("semaphore")
>>> +             semaphore(i915);
>>> +
>>> +     igt_subtest("smoke")
>>> +             smoketest(i915, 20);
>>> +
>>> +     igt_fixture {
>>> +             igt_stop_hang_detector();
>>> +     }
>>> +}
>>> diff --git a/tests/meson.build b/tests/meson.build
>>> index 7e0089e74..eeea3611d 100644
>>> --- a/tests/meson.build
>>> +++ b/tests/meson.build
>>> @@ -288,6 +288,13 @@ test_executables += executable('gem_eio',
>>>           install : true)
>>>    test_list += 'gem_eio'
>>>    
>>> +test_executables += executable('gem_exec_balancer', 'i915/gem_exec_balancer.c',
>>> +        dependencies : test_deps + [ lib_igt_perf ],
>>> +        install_dir : libexecdir,
>>> +        install_rpath : libexecdir_rpathdir,
>>> +        install : true)
>>> +test_progs += 'gem_exec_balancer'
>>> +
>>>    test_executables += executable('gem_mocs_settings',
>>>           join_paths('i915', 'gem_mocs_settings.c'),
>>>           dependencies : test_deps + [ lib_igt_perf ],
>>>
>>
>> Regards,
>>
>> Tvrtko
>>
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 13/16] i915: Add gem_exec_balancer
@ 2019-05-16  9:20         ` Tvrtko Ursulin
  0 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-16  9:20 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 15/05/2019 20:50, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-15 11:49:45)
>>
>> On 08/05/2019 11:09, Chris Wilson wrote:
>>> Exercise the in-kernel load balancer checking that we can distribute
>>> batches across the set of ctx->engines to avoid load.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    tests/Makefile.am              |    1 +
>>>    tests/Makefile.sources         |    1 +
>>>    tests/i915/gem_exec_balancer.c | 1050 ++++++++++++++++++++++++++++++++
>>>    tests/meson.build              |    7 +
>>>    4 files changed, 1059 insertions(+)
>>>    create mode 100644 tests/i915/gem_exec_balancer.c
>>>
>>> diff --git a/tests/Makefile.am b/tests/Makefile.am
>>> index 5097debf6..c6af0aeaf 100644
>>> --- a/tests/Makefile.am
>>> +++ b/tests/Makefile.am
>>> @@ -96,6 +96,7 @@ gem_close_race_LDADD = $(LDADD) -lpthread
>>>    gem_ctx_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
>>>    gem_ctx_thrash_LDADD = $(LDADD) -lpthread
>>>    gem_ctx_sseu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
>>> +i915_gem_exec_balancer_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
>>>    gem_exec_capture_LDADD = $(LDADD) -lz
>>>    gem_exec_parallel_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
>>>    gem_exec_parallel_LDADD = $(LDADD) -lpthread
>>> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
>>> index e7ee27e81..323b625aa 100644
>>> --- a/tests/Makefile.sources
>>> +++ b/tests/Makefile.sources
>>> @@ -24,6 +24,7 @@ TESTS_progs = \
>>>        i915/gem_ctx_clone \
>>>        i915/gem_ctx_engines \
>>>        i915/gem_ctx_shared \
>>> +     i915/gem_exec_balancer \
>>>        i915/gem_vm_create \
>>>        kms_3d \
>>>        kms_addfb_basic \
>>> diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
>>> new file mode 100644
>>> index 000000000..25195d478
>>> --- /dev/null
>>> +++ b/tests/i915/gem_exec_balancer.c
>>> @@ -0,0 +1,1050 @@
>>> +/*
>>> + * Copyright © 2018 Intel Corporation
>>
>> 2019 I guess, even though work was started in 2018?
>>
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person obtaining a
>>> + * copy of this software and associated documentation files (the "Software"),
>>> + * to deal in the Software without restriction, including without limitation
>>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>>> + * and/or sell copies of the Software, and to permit persons to whom the
>>> + * Software is furnished to do so, subject to the following conditions:
>>> + *
>>> + * The above copyright notice and this permission notice (including the next
>>> + * paragraph) shall be included in all copies or substantial portions of the
>>> + * Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>>> + * IN THE SOFTWARE.
>>> + */
>>> +
>>> +#include <sched.h>
>>> +
>>> +#include "igt.h"
>>> +#include "igt_perf.h"
>>> +#include "i915/gem_ring.h"
>>> +#include "sw_sync.h"
>>> +
>>> +IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
>>> +
>>> +#define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
>>
>> Hmm.. this is a strange surrogate but I guess it works.
>>
>>> +
>>> +static bool has_class_instance(int i915, uint16_t class, uint16_t instance)
>>> +{
>>> +     int fd;
>>> +
>>> +     fd = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance));
>>
>> More work for Andi to replace with real engine discovery. :)
>>
>>> +     if (fd != -1) {
>>> +             close(fd);
>>> +             return true;
>>> +     }
>>> +
>>> +     return false;
>>> +}
>>> +
>>> +static struct i915_engine_class_instance *
>>> +list_engines(int i915, uint32_t class_mask, unsigned int *out)
>>> +{
>>> +     unsigned int count = 0, size = 64;
>>> +     struct i915_engine_class_instance *engines;
>>> +
>>> +     engines = malloc(size * sizeof(*engines));
>>> +     if (!engines) {
>>> +             *out = 0;
>>> +             return NULL;
>>> +     }
>>> +
>>> +     for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER;
>>> +          class_mask;
>>> +          class++, class_mask >>= 1) {
>>> +             if (!(class_mask & 1))
>>> +                     continue;
>>> +
>>> +             for (unsigned int instance = 0;
>>> +                  instance < INSTANCE_COUNT;
>>> +                  instance++) {
>>> +                  if (!has_class_instance(i915, class, instance))
>>> +                          continue;
>>> +
>>> +                     if (count == size) {
>>> +                             struct i915_engine_class_instance *e;
>>> +
>>> +                             size *= 2;
>>> +                             e = realloc(engines, size*sizeof(*engines));
>>> +                             if (!e) {
>>
>> I'd just assert. On malloc as well.
>>
>>> +                                     *out = count;
>>> +                                     return engines;
>>> +                             }
>>> +
>>> +                             engines = e;
>>> +                     }
>>> +
>>> +                     engines[count++] = (struct i915_engine_class_instance){
>>> +                             .engine_class = class,
>>> +                             .engine_instance = instance,
>>> +                     };
>>> +             }
>>> +     }
>>> +
>>> +     if (!count) {
>>> +             free(engines);
>>> +             engines = NULL;
>>> +     }
>>> +
>>> +     *out = count;
>>> +     return engines;
>>> +}
>>> +
>>> +static int __set_load_balancer(int i915, uint32_t ctx,
>>> +                            const struct i915_engine_class_instance *ci,
>>> +                            unsigned int count)
>>> +{
>>> +     I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, count);
>>> +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1 + count);
>>> +     struct drm_i915_gem_context_param p = {
>>> +             .ctx_id = ctx,
>>> +             .param = I915_CONTEXT_PARAM_ENGINES,
>>> +             .size = sizeof(engines),
>>> +             .value = to_user_pointer(&engines)
>>> +     };
>>> +
>>> +     memset(&balancer, 0, sizeof(balancer));
>>> +     balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
>>> +
>>> +     igt_assert(count);
>>> +     balancer.num_siblings = count;
>>> +     memcpy(balancer.engines, ci, count * sizeof(*ci));
>>> +
>>> +     memset(&engines, 0, sizeof(engines));
>>> +     engines.extensions = to_user_pointer(&balancer);
>>> +     engines.engines[0].engine_class =
>>> +             I915_ENGINE_CLASS_INVALID;
>>> +     engines.engines[0].engine_instance =
>>> +             I915_ENGINE_CLASS_INVALID_NONE;
>>> +     memcpy(engines.engines + 1, ci, count * sizeof(*ci));
>>> +
>>> +     return __gem_context_set_param(i915, &p);
>>> +}
>>> +
>>> +static void set_load_balancer(int i915, uint32_t ctx,
>>> +                           const struct i915_engine_class_instance *ci,
>>> +                           unsigned int count)
>>> +{
>>> +     igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0);
>>> +}
>>> +
>>> +static uint32_t load_balancer_create(int i915,
>>> +                                  const struct i915_engine_class_instance *ci,
>>> +                                  unsigned int count)
>>> +{
>>> +     uint32_t ctx;
>>> +
>>> +     ctx = gem_context_create(i915);
>>> +     set_load_balancer(i915, ctx, ci, count);
>>> +
>>> +     return ctx;
>>> +}
>>> +
>>> +static uint32_t __batch_create(int i915, uint32_t offset)
>>> +{
>>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +     uint32_t handle;
>>> +
>>> +     handle = gem_create(i915, ALIGN(offset + 4, 4096));
>>> +     gem_write(i915, handle, offset, &bbe, sizeof(bbe));
>>> +
>>> +     return handle;
>>> +}
>>> +
>>> +static uint32_t batch_create(int i915)
>>> +{
>>> +     return __batch_create(i915, 0);
>>> +}
>>> +
>>> +static void invalid_balancer(int i915)
>>> +{
>>> +     I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, 64);
>>> +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 64);
>>> +     struct drm_i915_gem_context_param p = {
>>> +             .param = I915_CONTEXT_PARAM_ENGINES,
>>> +             .value = to_user_pointer(&engines)
>>> +     };
>>> +     uint32_t handle;
>>> +     void *ptr;
>>> +
>>> +     /*
>>> +      * Assume that I915_CONTEXT_PARAM_ENGINE validates the array
>>> +      * of engines[], our job is to determine if the load_balancer
>>> +      * extension explodes.
>>> +      */
>>> +
>>> +     for (int class = 0; class < 32; class++) {
>>> +             struct i915_engine_class_instance *ci;
>>> +             unsigned int count;
>>> +
>>> +             ci = list_engines(i915, 1 << class, &count);
>>> +             if (!ci)
>>> +                     continue;
>>> +
>>> +             igt_debug("Found %d engines\n", count);
>>> +             igt_assert_lte(count, 64);
>>
>> Hey.. you always say trust the kernel! ;)
> 
> This code was placeholder that you said you would replace by a proper
> query api...
> 
>>
>>> +
>>> +             p.ctx_id = gem_context_create(i915);
>>> +             p.size = (sizeof(struct i915_context_param_engines) +
>>> +                             (count + 1) * sizeof(*engines.engines));
>>
>> Alignment looks off.
>>
>>> +
>>> +             memset(&engines, 0, sizeof(engines));
>>> +             engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
>>> +             engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
>>> +             memcpy(engines.engines + 1, ci, count * sizeof(*ci));
>>> +             gem_context_set_param(i915, &p);
>>> +
>>> +             engines.extensions = -1ull;
>>> +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
>>> +
>>> +             engines.extensions = 1ull;
>>> +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
>>> +
>>> +             memset(&balancer, 0, sizeof(balancer));
>>> +             balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
>>> +             balancer.num_siblings = count;
>>> +             memcpy(balancer.engines, ci, count * sizeof(*ci));
>>> +
>>> +             engines.extensions = to_user_pointer(&balancer);
>>> +             gem_context_set_param(i915, &p);
>>> +
>>> +             balancer.engine_index = 1;
>>> +             igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
>>> +
>>> +             balancer.engine_index = count;
>>> +             igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
>>> +
>>> +             balancer.engine_index = count + 1;
>>> +             igt_assert_eq(__gem_context_set_param(i915, &p), -EINVAL);
>>> +
>>> +             balancer.engine_index = 0;
>>> +             gem_context_set_param(i915, &p);
>>> +
>>> +             balancer.base.next_extension = to_user_pointer(&balancer);
>>> +             igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
>>> +
>>> +             balancer.base.next_extension = -1ull;
>>> +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
>>> +
>>> +             handle = gem_create(i915, 4096 * 3);
>>> +             ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE);
>>> +             gem_close(i915, handle);
>>> +
>>> +             memset(&engines, 0, sizeof(engines));
>>> +             engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
>>> +             engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
>>> +             engines.engines[1].engine_class = I915_ENGINE_CLASS_INVALID;
>>> +             engines.engines[1].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
>>> +             memcpy(engines.engines + 2, ci, count * sizeof(ci));
>>> +             p.size = (sizeof(struct i915_context_param_engines) +
>>> +                             (count + 2) * sizeof(*engines.engines));
>>
>> Alignment again.
>>
>>> +             gem_context_set_param(i915, &p);
>>> +
>>> +             balancer.base.next_extension = 0;
>>> +             balancer.engine_index = 1;
>>> +             engines.extensions = to_user_pointer(&balancer);
>>> +             gem_context_set_param(i915, &p);
>>> +
>>> +             memcpy(ptr + 4096 - 8, &balancer, sizeof(balancer));
>>> +             memcpy(ptr + 8192 - 8, &balancer, sizeof(balancer));
>>> +             balancer.engine_index = 0;
>>> +
>>> +             engines.extensions = to_user_pointer(ptr) + 4096 - 8;
>>> +             gem_context_set_param(i915, &p);
>>> +
>>> +             balancer.base.next_extension = engines.extensions;
>>> +             engines.extensions = to_user_pointer(&balancer);
>>> +             gem_context_set_param(i915, &p);
>>
>> mmap_gtt and unmapped area testing in one?
> 
> Neighbouring.
> 
>>> +             munmap(ptr, 4096); >+           igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
>>> +             engines.extensions = to_user_pointer(ptr) + 4096 - 8;
>>> +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
>>> +
>>> +             engines.extensions = to_user_pointer(ptr) + 8192 - 8;
>>> +             gem_context_set_param(i915, &p);
>>> +
>>> +             balancer.base.next_extension = engines.extensions;
>>> +             engines.extensions = to_user_pointer(&balancer);
>>> +             gem_context_set_param(i915, &p);
>>> +
>>> +             munmap(ptr + 8192, 4096);
>>> +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
>>> +             engines.extensions = to_user_pointer(ptr) + 8192 - 8;
>>> +             igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
>>> +
>>> +             munmap(ptr + 4096, 4096);
>>> +
>>> +             gem_context_destroy(i915, p.ctx_id);
>>> +             free(ci);
>>> +     }
>>> +}
>>> +
>>> +static void kick_kthreads(int period_us)
>>> +{
>>> +     sched_yield();
>>> +     usleep(period_us);
>>
>> yield and sleep hm.. calling with zero period_us? Doesn't seem like it.
>> So what's it about?
> 
> Historically yield may have been a no-op, but sleep(0) actually yielded.
> 
>>> +}
>>> +
>>> +static double measure_load(int pmu, int period_us)
>>> +{
>>> +     uint64_t data[2];
>>> +     uint64_t d_t, d_v;
>>> +
>>> +     kick_kthreads(period_us);
>>> +
>>> +     igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
>>> +     d_v = -data[0];
>>> +     d_t = -data[1];
>>> +
>>> +     usleep(period_us);
>>> +
>>> +     igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
>>> +     d_v += data[0];
>>> +     d_t += data[1];
>>
>> This -val + val trick with uint64_t works?
> 
> Yes, unsigned overflow is defined.
> 
>>
>>> +
>>> +     return d_v / (double)d_t;
>>> +}
>>> +
>>> +static double measure_min_load(int pmu, unsigned int num, int period_us)
>>> +{
>>> +     uint64_t data[2 + num];
>>> +     uint64_t d_t, d_v[num];
>>> +     uint64_t min = -1, max = 0;
>>> +
>>> +     kick_kthreads(period_us);
>>> +
>>> +     igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
>>> +     for (unsigned int n = 0; n < num; n++)
>>> +             d_v[n] = -data[2 + n];
>>> +     d_t = -data[1];
>>> +
>>> +     usleep(period_us);
>>> +
>>> +     igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
>>> +
>>> +     d_t += data[1];
>>> +     for (unsigned int n = 0; n < num; n++) {
>>> +             d_v[n] += data[2 + n];
>>> +             igt_debug("engine[%d]: %.1f%%\n",
>>> +                       n, d_v[n] / (double)d_t * 100);
>>> +             if (d_v[n] < min)
>>> +                     min = d_v[n];
>>> +             if (d_v[n] > max)
>>> +                     max = d_v[n];
>>> +     }
>>> +
>>> +     igt_debug("elapsed: %"PRIu64"ns, load [%.1f, %.1f]%%\n",
>>> +               d_t, min / (double)d_t * 100,  max / (double)d_t * 100);
>>> +
>>> +     return min / (double)d_t;
>>> +}
>>> +
>>> +static void check_individual_engine(int i915,
>>> +                                 uint32_t ctx,
>>> +                                 const struct i915_engine_class_instance *ci,
>>> +                                 int idx)
>>> +{
>>> +     igt_spin_t *spin;
>>> +     double load;
>>> +     int pmu;
>>> +
>>> +     pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(ci[idx].engine_class,
>>> +                                               ci[idx].engine_instance));
>>> +
>>> +     spin = igt_spin_new(i915, .ctx = ctx, .engine = idx + 1);
>>> +     load = measure_load(pmu, 10000);
>>
>> Hm usleep before start of measuring and between two samples is the same.
>> The one before should be fixed I think, no?
> 
> Could be, that would require thought as to what the appropriate period
> for kicking should be. Yay for ksoftirqd.
> 
>>> +     igt_spin_free(i915, spin);
>>> +
>>> +     close(pmu);
>>> +
>>> +     igt_assert_f(load > 0.90,
>>> +                  "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
>>> +                  idx, ci[idx].engine_class, ci[idx].engine_instance, load*100);
>>> +}
>>> +
>>> +static void individual(int i915)
>>> +{
>>> +     uint32_t ctx;
>>> +
>>> +     /*
>>> +      * I915_CONTEXT_PARAM_ENGINE allows us to index into the user
>>> +      * supplied array from gem_execbuf(). Our check is to build the
>>> +      * ctx->engine[] with various different engine classes, feed in
>>> +      * a spinner and then ask pmu to confirm it the expected engine
>>> +      * was busy.
>>> +      */
>>> +
>>> +     ctx = gem_context_create(i915);
>>> +
>>> +     for (int mask = 0; mask < 32; mask++) {
>>> +             struct i915_engine_class_instance *ci;
>>> +             unsigned int count;
>>> +
>>> +             ci = list_engines(i915, 1u << mask, &count);
>>> +             if (!ci)
>>> +                     continue;
>>> +
>>> +             igt_debug("Found %d engines of class %d\n", count, mask);
>>> +
>>> +             for (int pass = 0; pass < count; pass++) { /* approx. count! */
>>> +                     igt_permute_array(ci, count, igt_exchange_int64);
>>
>> struct i915_engine_class_instance is four bytes long, so swap func looks
>> wrong. Unless for some reason you want to swap in blocks of two. Don't
>> know. Last index would grab into random memory though. I must be missing
>> something or it wouldn't have worked..
> 
> Once upon a time class_instance was 2xu32.
> 
>>
>>> +                     set_load_balancer(i915, ctx, ci, count);
>>> +                     for (unsigned int n = 0; n < count; n++)
>>> +                             check_individual_engine(i915, ctx, ci, n);
>>> +             }
>>> +
>>> +             free(ci);
>>> +     }
>>> +
>>> +     gem_context_destroy(i915, ctx);
>>> +     gem_quiescent_gpu(i915);
>>> +}
>>> +
>>> +static void indicies(int i915)
>>
>> indices?
>>
>>> +{
>>> +     I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
>>> +     struct drm_i915_gem_context_param p = {
>>> +             .ctx_id = gem_context_create(i915),
>>> +             .param = I915_CONTEXT_PARAM_ENGINES,
>>> +             .value = to_user_pointer(&engines)
>>> +     };
>>> +
>>> +     struct drm_i915_gem_exec_object2 batch = {
>>> +             .handle = batch_create(i915),
>>> +     };
>>> +
>>> +     unsigned int nengines = 0;
>>> +     void *balancers = NULL;
>>> +
>>> +     /*
>>> +      * We can populate our engine map with multiple virtual engines.
>>> +      * Do so.
>>> +      */
>>> +
>>> +     for (int class = 0; class < 32; class++) {
>>> +             struct i915_engine_class_instance *ci;
>>> +             unsigned int count;
>>> +
>>> +             ci = list_engines(i915, 1u << class, &count);
>>> +             if (!ci)
>>> +                     continue;
>>> +
>>> +             igt_debug("Found %d engines of class %d\n", count, class);
>>
>> Maybe this debug message should go into list_engines, since it seems
>> repeated a few times already.
> 
> Or remove the debug, I hear you.
> 
>>> +
>>> +             for (int n = 0; n < count; n++) {
>>> +                     I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(*balancer,
>>> +                                                              count);
>>> +
>>> +                     engines.engines[nengines].engine_class =
>>> +                             I915_ENGINE_CLASS_INVALID;
>>> +                     engines.engines[nengines].engine_instance =
>>> +                             I915_ENGINE_CLASS_INVALID_NONE;
>>> +
>>> +                     balancer = calloc(sizeof(*balancer), 1);
>>> +                     igt_assert(balancer);
>>> +
>>> +                     balancer->base.name =
>>> +                             I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
>>> +                     balancer->base.next_extension =
>>> +                             to_user_pointer(balancers);
>>> +                     balancers = balancer;
>>> +
>>> +                     balancer->engine_index = nengines++;
>>> +                     balancer->num_siblings = count;
>>> +
>>> +                     memcpy(balancer->engines,
>>> +                            ci, count * sizeof(*ci));
>>> +             }
>>> +             free(ci);
>>> +     }
>>> +
>>> +     igt_require(balancers);
>>> +     engines.extensions = to_user_pointer(balancers);
>>> +     p.size = (sizeof(struct i915_engine_class_instance) * nengines +
>>> +               sizeof(struct i915_context_param_engines));
>>> +     gem_context_set_param(i915, &p);
>>> +
>>> +     for (unsigned int n = 0; n < nengines; n++) {
>>> +             struct drm_i915_gem_execbuffer2 eb = {
>>> +                     .buffers_ptr = to_user_pointer(&batch),
>>> +                     .buffer_count = 1,
>>> +                     .flags = n,
>>> +                     .rsvd1 = p.ctx_id,
>>> +             };
>>> +             igt_debug("Executing on index=%d\n", n);
>>> +             gem_execbuf(i915, &eb);
>>> +     }
>>> +     gem_context_destroy(i915, p.ctx_id);
>>> +
>>> +     gem_sync(i915, batch.handle);
>>> +     gem_close(i915, batch.handle);
>>> +
>>> +     while (balancers) {
>>> +             struct i915_context_engines_load_balance *b, *n;
>>> +
>>> +             b = balancers;
>>> +             n = from_user_pointer(b->base.next_extension);
>>> +             free(b);
>>> +
>>> +             balancers = n;
>>> +     }
>>> +
>>> +     gem_quiescent_gpu(i915);
>>> +}
>>> +
>>> +static void busy(int i915)
>>> +{
>>> +     uint32_t scratch = gem_create(i915, 4096);
>>> +
>>> +     /*
>>> +      * Check that virtual engines are reported via GEM_BUSY.
>>> +      *
>>> +      * When running, the batch will be on the real engine and report
>>> +      * the actual class.
>>> +      *
>>> +      * Prior to running, if the load-balancer is across multiple
>>> +      * classes we don't know which engine the batch will
>>> +      * execute on, so we report them all!
>>> +      *
>>> +      * However, as we only support (and test) creating a load-balancer
>>> +      * from engines of only one class, that can be propagated accurately
>>> +      * through to GEM_BUSY.
>>> +      */
>>> +
>>> +     for (int class = 0; class < 16; class++) {
>>> +             struct drm_i915_gem_busy busy;
>>> +             struct i915_engine_class_instance *ci;
>>> +             unsigned int count;
>>> +             igt_spin_t *spin[2];
>>> +             uint32_t ctx;
>>> +
>>> +             ci = list_engines(i915, 1u << class, &count);
>>> +             if (!ci)
>>> +                     continue;
>>> +
>>> +             igt_debug("Found %d engines of class %d\n", count, class);
>>> +             ctx = load_balancer_create(i915, ci, count);
>>> +             free(ci);
>>> +
>>> +             spin[0] = __igt_spin_new(i915,
>>> +                                      .ctx = ctx,
>>> +                                      .flags = IGT_SPIN_POLL_RUN);
>>> +             spin[1] = __igt_spin_new(i915,
>>> +                                      .ctx = ctx,
>>> +                                      .dependency = scratch);
>>> +
>>> +             igt_spin_busywait_until_started(spin[0]);
>>> +
>>> +             /* Running: actual class */
>>> +             busy.handle = spin[0]->handle;
>>> +             do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
>>> +             igt_assert_eq_u32(busy.busy, 1u << (class + 16));
>>> +
>>> +             /* Queued(read): expected class */
>>> +             busy.handle = spin[1]->handle;
>>> +             do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
>>> +             igt_assert_eq_u32(busy.busy, 1u << (class + 16));
>>> +
>>> +             /* Queued(write): expected class */
>>> +             busy.handle = scratch;
>>> +             do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
>>> +             igt_assert_eq_u32(busy.busy,
>>> +                               (1u << (class + 16)) | (class + 1));
>>> +
>>> +             igt_spin_free(i915, spin[1]);
>>> +             igt_spin_free(i915, spin[0]);
>>> +
>>> +             gem_context_destroy(i915, ctx);
>>> +     }
>>> +
>>> +     gem_close(i915, scratch);
>>> +     gem_quiescent_gpu(i915);
>>> +}
>>> +
>>> +static int add_pmu(int pmu, const struct i915_engine_class_instance *ci)
>>> +{
>>> +     return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->engine_class,
>>> +                                                      ci->engine_instance),
>>> +                                 pmu);
>>> +}
>>> +
>>> +static void full(int i915, unsigned int flags)
>>> +#define PULSE 0x1
>>> +#define LATE 0x2
>>> +{
>>> +     struct drm_i915_gem_exec_object2 batch = {
>>> +             .handle = batch_create(i915),
>>> +     };
>>> +
>>> +     if (flags & LATE)
>>> +             igt_require_sw_sync();
>>> +
>>> +     /*
>>> +      * I915_CONTEXT_PARAM_ENGINE changes the meaning of I915_EXEC_DEFAULT
>>> +      * to provide an automatic selection from the ctx->engine[]. It
>>> +      * employs load-balancing to evenly distribute the workload the
>>
>> The leading section needs rewritting for truth. It is the load balance
>> extensions which _can_ redefine the meanign of I915_EXEC_DEFAULT etc..
>> I'm sure I didn't need to explain, but have just to make it clear which
>> part I am complaining about. :)
> 
> Hey, remember this is 2018!

You give me too much credit, I just go and mindlessly review. :)

> 
>>> +      * array. If we submit N spinners, we expect them to be simultaneously
>>> +      * running across N engines and use PMU to confirm that the entire
>>> +      * set of engines are busy.
>>
>> Clarify it is only if using N contexts.
>>
>>> +      *
>>> +      * We complicate matters by interpersing shortlived tasks to challenge
>>> +      * the kernel to search for space in which to insert new batches.
>>> +      */
>>> +
>>> +
>>> +     for (int mask = 0; mask < 32; mask++) {
>>> +             struct i915_engine_class_instance *ci;
>>> +             igt_spin_t *spin = NULL;
>>> +             IGT_CORK_FENCE(cork);
>>> +             unsigned int count;
>>> +             double load;
>>> +             int fence = -1;
>>> +             int *pmu;
>>> +
>>> +             ci = list_engines(i915, 1u << mask, &count);
>>> +             if (!ci)
>>> +                     continue;
>>> +
>>> +             igt_debug("Found %d engines of class %d\n", count, mask);
>>> +
>>> +             pmu = malloc(sizeof(*pmu) * count);
>>> +             igt_assert(pmu);
>>> +
>>> +             if (flags & LATE)
>>> +                     fence = igt_cork_plug(&cork, i915);
>>> +
>>> +             pmu[0] = -1;
>>> +             for (unsigned int n = 0; n < count; n++) {
>>> +                     uint32_t ctx;
>>> +
>>> +                     pmu[n] = add_pmu(pmu[0], &ci[n]);
>>> +
>>> +                     if (flags & PULSE) {
>>> +                             struct drm_i915_gem_execbuffer2 eb = {
>>> +                                     .buffers_ptr = to_user_pointer(&batch),
>>> +                                     .buffer_count = 1,
>>> +                                     .rsvd2 = fence,
>>> +                                     .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
>>> +                             };
>>> +                             gem_execbuf(i915, &eb);
>>> +                     }
>>> +
>>> +                     /*
>>> +                      * Each spinner needs to be one a new timeline,
>>> +                      * otherwise they will just sit in the single queue
>>> +                      * and not run concurrently.
>>> +                      */
>>> +                     ctx = load_balancer_create(i915, ci, count);
>>> +
>>> +                     if (spin == NULL) {
>>> +                             spin = __igt_spin_new(i915, .ctx = ctx);
>>> +                     } else {
>>> +                             struct drm_i915_gem_execbuffer2 eb = {
>>> +                                     .buffers_ptr = spin->execbuf.buffers_ptr,
>>> +                                     .buffer_count = spin->execbuf.buffer_count,
>>> +                                     .rsvd1 = ctx,
>>> +                                     .rsvd2 = fence,
>>> +                                     .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
>>> +                             };
>>> +                             gem_execbuf(i915, &eb);
>>> +                     }
>>> +
>>> +                     gem_context_destroy(i915, ctx);
>>> +             }
>>> +
>>> +             if (flags & LATE) {
>>> +                     igt_cork_unplug(&cork);
>>> +                     close(fence);
>>> +             }
>>> +
>>> +             load = measure_min_load(pmu[0], count, 10000);
>>> +             igt_spin_free(i915, spin);
>>> +
>>> +             close(pmu[0]);
>>> +             free(pmu);
>>> +
>>> +             free(ci);
>>> +
>>> +             igt_assert_f(load > 0.90,
>>> +                          "minimum load for %d x class:%d was found to be only %.1f%% busy\n",
>>> +                          count, mask, load*100);
>>> +             gem_quiescent_gpu(i915);
>>> +     }
>>> +
>>> +     gem_close(i915, batch.handle);
>>> +     gem_quiescent_gpu(i915);
>>> +}
>>> +
>>> +static void nop(int i915)
>>> +{
>>> +     struct drm_i915_gem_exec_object2 batch = {
>>> +             .handle = batch_create(i915),
>>> +     };
>>> +
>>> +     for (int mask = 0; mask < 32; mask++) {
>>
>> s/mask/class/
>>
>>> +             struct i915_engine_class_instance *ci;
>>> +             unsigned int count;
>>> +             uint32_t ctx;
>>> +
>>> +             ci = list_engines(i915, 1u << mask, &count);
>>> +             if (!ci)
>>> +                     continue;
>>> +
>>> +             if (count < 2) {
>>> +                     free(ci);
>>> +                     continue;
>>
>> Benchamrk only subtest for real veng?
> 
> Sure, that's a bit of internal knowledge leaking.
> 
>>> +             }
>>> +
>>> +             igt_debug("Found %d engines of class %d\n", count, mask);
>>> +             ctx = load_balancer_create(i915, ci, count);
>>> +
>>> +             for (int n = 0; n < count; n++) {
>>> +                     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                             .buffers_ptr = to_user_pointer(&batch),
>>> +                             .buffer_count = 1,
>>> +                             .flags = n + 1,
>>> +                             .rsvd1 = ctx,
>>> +                     };
>>> +                     struct timespec tv = {};
>>> +                     unsigned long nops;
>>> +                     double t;
>>> +
>>> +                     igt_nsec_elapsed(&tv);
>>> +                     nops = 0;
>>> +                     do {
>>> +                             for (int r = 0; r < 1024; r++)
>>> +                                     gem_execbuf(i915, &execbuf);
>>> +                             nops += 1024;
>>> +                     } while (igt_seconds_elapsed(&tv) < 2);
>>> +                     gem_sync(i915, batch.handle);
>>> +
>>> +                     t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
>>> +                     igt_info("%x:%d %.3fus\n", mask, n, t);
>>
>> Class in decimal is better I think.
> 
> But it's mask :-p
> 
> It's treated as just a number and not as a class identifier.

1 << mask is a mask, while mask is a class. :)

> 
>> And some descriptive labels to info messages would be good. Like
>> "individual engines", "virtual engine" etc.
> 
> It does describe the individual engines and their composites. The output
> looks clear and concise. You may want mask translated to a string... but
> this code is oblivious as to what mask actually is.
> 
> The way it is used definitely looks more like mask than class.

My eyeballing the igt_info lines made me think it will be a series of 
lines with numbers so not very readable what's what. A word or two on 
each line to say what scenario do the numbers relate to shouldn't be 
such a chore, no? :)

Regards,

Tvrtko

> 
>>> +             }
>>> +
>>> +             {
>>> +                     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                             .buffers_ptr = to_user_pointer(&batch),
>>> +                             .buffer_count = 1,
>>> +                             .rsvd1 = ctx,
>>> +                     };
>>> +                     struct timespec tv = {};
>>> +                     unsigned long nops;
>>> +                     double t;
>>> +
>>> +                     igt_nsec_elapsed(&tv);
>>> +                     nops = 0;
>>> +                     do {
>>> +                             for (int r = 0; r < 1024; r++)
>>> +                                     gem_execbuf(i915, &execbuf);
>>> +                             nops += 1024;
>>> +                     } while (igt_seconds_elapsed(&tv) < 2);
>>> +                     gem_sync(i915, batch.handle);
>>> +
>>> +                     t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
>>> +                     igt_info("%x:* %.3fus\n", mask, t);
>>> +             }
>>> +
>>> +
>>> +             igt_fork(child, count) {
>>> +                     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                             .buffers_ptr = to_user_pointer(&batch),
>>> +                             .buffer_count = 1,
>>> +                             .flags = child + 1,
>>> +                             .rsvd1 = gem_context_clone(i915, ctx,
>>> +                                                        I915_CONTEXT_CLONE_ENGINES, 0),
>>> +                     };
>>> +                     struct timespec tv = {};
>>> +                     unsigned long nops;
>>> +                     double t;
>>> +
>>> +                     igt_nsec_elapsed(&tv);
>>> +                     nops = 0;
>>> +                     do {
>>> +                             for (int r = 0; r < 1024; r++)
>>> +                                     gem_execbuf(i915, &execbuf);
>>> +                             nops += 1024;
>>> +                     } while (igt_seconds_elapsed(&tv) < 2);
>>> +                     gem_sync(i915, batch.handle);
>>> +
>>> +                     t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
>>> +                     igt_info("[%d] %x:%d %.3fus\n", child, mask, child, t);
>>> +
>>> +                     memset(&tv, 0, sizeof(tv));
>>> +                     execbuf.flags = 0;
>>> +
>>> +                     igt_nsec_elapsed(&tv);
>>> +                     nops = 0;
>>> +                     do {
>>> +                             for (int r = 0; r < 1024; r++)
>>> +                                     gem_execbuf(i915, &execbuf);
>>> +                             nops += 1024;
>>> +                     } while (igt_seconds_elapsed(&tv) < 2);
>>> +                     gem_sync(i915, batch.handle);
>>> +
>>> +                     t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
>>> +                     igt_info("[%d] %x:* %.3fus\n", child, mask, t);
>>> +
>>> +                     gem_context_destroy(i915, execbuf.rsvd1);
>>> +             }
>>> +
>>> +             igt_waitchildren();
>>> +
>>> +             gem_context_destroy(i915, ctx);
>>> +             free(ci);
>>> +     }
>>> +
>>> +     gem_close(i915, batch.handle);
>>> +     gem_quiescent_gpu(i915);
>>> +}
>>> +
>>> +static void ping(int i915, uint32_t ctx, unsigned int engine)
>>> +{
>>> +     struct drm_i915_gem_exec_object2 obj = {
>>> +             .handle = batch_create(i915),
>>> +     };
>>> +     struct drm_i915_gem_execbuffer2 execbuf = {
>>> +             .buffers_ptr = to_user_pointer(&obj),
>>> +             .buffer_count = 1,
>>> +             .flags = engine,
>>> +             .rsvd1 = ctx,
>>> +     };
>>> +     gem_execbuf(i915, &execbuf);
>>> +     gem_sync(i915, obj.handle);
>>> +     gem_close(i915, obj.handle);
>>> +}
>>> +
>>> +static void semaphore(int i915)
>>> +{
>>> +     uint32_t block[2], scratch;
>>> +     igt_spin_t *spin[3];
>>> +
>>> +     /*
>>> +      * If we are using HW semaphores to launch serialised requests
>>> +      * on different engine concurrently, we want to verify that real
>>> +      * work is unimpeded.
>>> +      */
>>> +     igt_require(gem_scheduler_has_preemption(i915));
>>> +
>>> +     block[0] = gem_context_create(i915);
>>> +     block[1] = gem_context_create(i915);
>>> +
>>> +     scratch = gem_create(i915, 4096);
>>> +     spin[2] = igt_spin_new(i915, .dependency = scratch);
>>> +     for (int mask = 1; mask < 32; mask++) {
>>
>> s/mask/class/ throughout.
>>
>>> +             struct i915_engine_class_instance *ci;
>>> +             unsigned int count;
>>> +             uint32_t vip;
>>> +
>>> +             ci = list_engines(i915, 1u << mask, &count);
>>> +             if (!ci)
>>> +                     continue;
>>> +
>>> +             if (count < ARRAY_SIZE(block))
>>> +                     continue;
>>> +
>>> +             /* Ensure that we completely occupy all engines in this group */
>>> +             count = ARRAY_SIZE(block);
>>> +
>>> +             for (int i = 0; i < count; i++) {
>>> +                     set_load_balancer(i915, block[i], ci, count);
>>> +                     spin[i] = __igt_spin_new(i915,
>>> +                                                    .ctx = block[i],
>>> +                                                    .dependency = scratch);
>>
>> Alignment.
>>
>>> +             }
>>> +
>>> +             /*
>>> +              * Either we haven't blocked both engines with semaphores,
>>> +              * or we let the vip through. If not, we hang.
>>> +              */
>>> +             vip = gem_context_create(i915);
>>> +             set_load_balancer(i915, vip, ci, count);
>>> +             ping(i915, vip, 0);
>>> +             gem_context_destroy(i915, vip);
>>> +
>>> +             for (int i = 0; i < count; i++)
>>> +                     igt_spin_free(i915, spin[i]);
>>> +
>>> +             free(ci);
>>> +     }
>>> +     igt_spin_free(i915, spin[2]);
>>> +     gem_close(i915, scratch);
>>> +
>>> +     gem_context_destroy(i915, block[1]);
>>> +     gem_context_destroy(i915, block[0]);
>>> +
>>> +     gem_quiescent_gpu(i915);
>>> +}
>>> +
>>> +static void smoketest(int i915, int timeout)
>>> +{
>>> +     struct drm_i915_gem_exec_object2 batch[2] = {
>>> +             { .handle = __batch_create(i915, 16380) }
>>> +     };
>>> +     unsigned int ncontext = 0;
>>> +     uint32_t *contexts = NULL;
>>> +     uint32_t *handles = NULL;
>>> +
>>> +     igt_require_sw_sync();
>>> +
>>> +     for (int mask = 0; mask < 32; mask++) {
>>> +             struct i915_engine_class_instance *ci;
>>> +             unsigned int count = 0;
>>> +
>>> +             ci = list_engines(i915, 1u << mask, &count);
>>> +             if (!ci || count < 2) {
>>> +                     free(ci);
>>> +                     continue;
>>> +             }
>>> +
>>> +             igt_debug("Found %d engines of class %d\n", count, mask);
>>> +
>>> +             ncontext += 128;
>>> +             contexts = realloc(contexts, sizeof(*contexts) * ncontext);
>>> +             igt_assert(contexts);
>>> +
>>> +             for (unsigned int n = ncontext - 128; n < ncontext; n++) {
>>> +                     contexts[n] = load_balancer_create(i915, ci, count);
>>> +                     igt_assert(contexts[n]);
>>> +             }
>>> +
>>> +             free(ci);
>>> +     }
>>> +     igt_debug("Created %d virtual engines (one per context)\n", ncontext);
>>> +     igt_require(ncontext);
>>> +
>>> +     contexts = realloc(contexts, sizeof(*contexts) * ncontext * 4);
>>> +     igt_assert(contexts);
>>> +     memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
>>> +     ncontext *= 2;
>>> +     memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
>>> +     ncontext *= 2;
>>> +
>>> +     handles = malloc(sizeof(*handles) * ncontext);
>>> +     igt_assert(handles);
>>> +     for (unsigned int n = 0; n < ncontext; n++)
>>> +             handles[n] = gem_create(i915, 4096);
>>> +
>>> +     igt_until_timeout(timeout) {
>>> +             unsigned int count = 1 + (rand() % (ncontext - 1));
>>> +             IGT_CORK_FENCE(cork);
>>> +             int fence = igt_cork_plug(&cork, i915);
>>> +
>>> +             for (unsigned int n = 0; n < count; n++) {
>>> +                     struct drm_i915_gem_execbuffer2 eb = {
>>> +                             .buffers_ptr = to_user_pointer(batch),
>>> +                             .buffer_count = ARRAY_SIZE(batch),
>>> +                             .rsvd1 = contexts[n],
>>> +                             .rsvd2 = fence,
>>> +                             .flags = I915_EXEC_BATCH_FIRST | I915_EXEC_FENCE_IN,
>>> +                     };
>>> +                     batch[1].handle = handles[n];
>>> +                     gem_execbuf(i915, &eb);
>>> +             }
>>> +             igt_permute_array(handles, count, igt_exchange_int);
>>> +
>>> +             igt_cork_unplug(&cork);
>>> +             for (unsigned int n = 0; n < count; n++)
>>> +                     gem_sync(i915, handles[n]);
>>> +
>>> +             close(fence);
>>> +     }
>>> +
>>> +     for (unsigned int n = 0; n < ncontext; n++) {
>>> +             gem_close(i915, handles[n]);
>>> +             __gem_context_destroy(i915, contexts[n]);
>>> +     }
>>> +     free(handles);
>>> +     free(contexts);
>>> +     gem_close(i915, batch[0].handle);
>>> +}
>>> +
>>> +static bool has_context_engines(int i915)
>>> +{
>>> +     struct drm_i915_gem_context_param p = {
>>> +             .param = I915_CONTEXT_PARAM_ENGINES,
>>> +     };
>>> +
>>> +     return __gem_context_set_param(i915, &p) == 0;
>>> +}
>>> +
>>> +static bool has_load_balancer(int i915)
>>> +{
>>> +     struct i915_engine_class_instance ci = {};
>>> +     uint32_t ctx;
>>> +     int err;
>>> +
>>> +     ctx = gem_context_create(i915);
>>> +     err = __set_load_balancer(i915, ctx, &ci, 1);
>>> +     gem_context_destroy(i915, ctx);
>>> +
>>> +     return err == 0;
>>> +}
>>> +
>>> +igt_main
>>> +{
>>> +     int i915 = -1;
>>> +
>>> +     igt_skip_on_simulation();
>>> +
>>> +     igt_fixture {
>>> +             i915 = drm_open_driver(DRIVER_INTEL);
>>> +             igt_require_gem(i915);
>>> +
>>> +             gem_require_contexts(i915);
>>> +             igt_require(has_context_engines(i915));
>>> +             igt_require(has_load_balancer(i915));
>>> +
>>> +             igt_fork_hang_detector(i915);
>>> +     }
>>> +
>>> +     igt_subtest("invalid-balancer")
>>> +             invalid_balancer(i915);
>>> +
>>> +     igt_subtest("individual")
>>> +             individual(i915);
>>> +
>>> +     igt_subtest("indicies")
>>> +             indicies(i915);
>>> +
>>> +     igt_subtest("busy")
>>> +             busy(i915);
>>> +
>>> +     igt_subtest_group {
>>> +             static const struct {
>>> +                     const char *name;
>>> +                     unsigned int flags;
>>> +             } phases[] = {
>>> +                     { "", 0 },
>>> +                     { "-pulse", PULSE },
>>> +                     { "-late", LATE },
>>> +                     { "-late-pulse", PULSE | LATE },
>>> +                     { }
>>> +             };
>>> +             for (typeof(*phases) *p = phases; p->name; p++)
>>> +                     igt_subtest_f("full%s", p->name)
>>> +                             full(i915, p->flags);
>>> +     }
>>> +
>>> +     igt_subtest("nop")
>>> +             nop(i915);
>>> +
>>> +     igt_subtest("semaphore")
>>> +             semaphore(i915);
>>> +
>>> +     igt_subtest("smoke")
>>> +             smoketest(i915, 20);
>>> +
>>> +     igt_fixture {
>>> +             igt_stop_hang_detector();
>>> +     }
>>> +}
>>> diff --git a/tests/meson.build b/tests/meson.build
>>> index 7e0089e74..eeea3611d 100644
>>> --- a/tests/meson.build
>>> +++ b/tests/meson.build
>>> @@ -288,6 +288,13 @@ test_executables += executable('gem_eio',
>>>           install : true)
>>>    test_list += 'gem_eio'
>>>    
>>> +test_executables += executable('gem_exec_balancer', 'i915/gem_exec_balancer.c',
>>> +        dependencies : test_deps + [ lib_igt_perf ],
>>> +        install_dir : libexecdir,
>>> +        install_rpath : libexecdir_rpathdir,
>>> +        install : true)
>>> +test_progs += 'gem_exec_balancer'
>>> +
>>>    test_executables += executable('gem_mocs_settings',
>>>           join_paths('i915', 'gem_mocs_settings.c'),
>>>           dependencies : test_deps + [ lib_igt_perf ],
>>>
>>
>> Regards,
>>
>> Tvrtko
>>
> 
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 14/16] i915/gem_exec_balancer: Exercise bonded pairs
  2019-05-15 20:32         ` [igt-dev] " Chris Wilson
@ 2019-05-16  9:28           ` Tvrtko Ursulin
  -1 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-16  9:28 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 15/05/2019 21:32, Chris Wilson wrote:
> Quoting Chris Wilson (2019-05-15 20:57:18)
>> Quoting Tvrtko Ursulin (2019-05-15 11:58:20)
>>>
>>> On 08/05/2019 11:09, Chris Wilson wrote:
>>>> +                     igt_assert_f(load > 0.90,
>>>> +                                  "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
>>>> +                                  n, siblings[n].engine_class, siblings[n].engine_instance,
>>>> +                                  load*100);
>>>
>>> Master also needs to be checked I think. You have the infrastructure to
>>> open two pmus in the previous patch so should be easy.
>>
>> Haven't we checked precisely that in earlier tests? What would perhaps

Where? I see one subtest for bonding.

>> be fairer here would be to verify the other engine was idle, otherwise
>> we could say we fluked it. Furthermore, we should repeat a few times
>> with say (0, 1), (0, 1), (1, 0), (1, 0) to further rule out flukes, and
>> then to finish with a random smoketest of some description.

Hm maybe gpu idling before each pass is needed in this test.

Then I'd be happy if you just measured busyness on a bonded pair.

And yeah more permutation would be good for fluke prevention.

>> Perhaps even a test is closer to the typical workload would involve
>> semaphore communication across the bond. But I don't know a way in which
>> I can determine which engine I am on in order to record that from the
>> GPU itself.
> 
> To remind myself, the importance here is on uABI stressing, it's is much
> easier to prove the relationship in the kernel and that is where we do.

I didn't think it would be hard to read busyness from the master as well 
but if you insist okay.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 14/16] i915/gem_exec_balancer: Exercise bonded pairs
@ 2019-05-16  9:28           ` Tvrtko Ursulin
  0 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-16  9:28 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 15/05/2019 21:32, Chris Wilson wrote:
> Quoting Chris Wilson (2019-05-15 20:57:18)
>> Quoting Tvrtko Ursulin (2019-05-15 11:58:20)
>>>
>>> On 08/05/2019 11:09, Chris Wilson wrote:
>>>> +                     igt_assert_f(load > 0.90,
>>>> +                                  "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
>>>> +                                  n, siblings[n].engine_class, siblings[n].engine_instance,
>>>> +                                  load*100);
>>>
>>> Master also needs to be checked I think. You have the infrastructure to
>>> open two pmus in the previous patch so should be easy.
>>
>> Haven't we checked precisely that in earlier tests? What would perhaps

Where? I see one subtest for bonding.

>> be fairer here would be to verify the other engine was idle, otherwise
>> we could say we fluked it. Furthermore, we should repeat a few times
>> with say (0, 1), (0, 1), (1, 0), (1, 0) to further rule out flukes, and
>> then to finish with a random smoketest of some description.

Hm maybe gpu idling before each pass is needed in this test.

Then I'd be happy if you just measured busyness on a bonded pair.

And yeah more permutation would be good for fluke prevention.

>> Perhaps even a test is closer to the typical workload would involve
>> semaphore communication across the bond. But I don't know a way in which
>> I can determine which engine I am on in order to record that from the
>> GPU itself.
> 
> To remind myself, the importance here is on uABI stressing, it's is much
> easier to prove the relationship in the kernel and that is where we do.

I didn't think it would be hard to read busyness from the master as well 
but if you insist okay.

Regards,

Tvrtko
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 05/16] i915/gem_ctx_create: Basic checks for constructor properties
  2019-05-16  8:38         ` [igt-dev] " Tvrtko Ursulin
@ 2019-05-22 10:24           ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-22 10:24 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2019-05-16 09:38:15)
> 
> On 15/05/2019 20:05, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-05-14 11:15:12)
> >>
> >> On 08/05/2019 11:09, Chris Wilson wrote:
> >>> Check that the extended create interface accepts setparam.
> >>>
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> ---
> >>>    tests/i915/gem_ctx_create.c | 225 ++++++++++++++++++++++++++++++++++--
> >>>    1 file changed, 213 insertions(+), 12 deletions(-)
> >>>
> >>> diff --git a/tests/i915/gem_ctx_create.c b/tests/i915/gem_ctx_create.c
> >>> index a664070db..9b4fddbe7 100644
> >>> --- a/tests/i915/gem_ctx_create.c
> >>> +++ b/tests/i915/gem_ctx_create.c
> >>> @@ -33,6 +33,7 @@
> >>>    #include <time.h>
> >>>    
> >>>    #include "igt_rand.h"
> >>> +#include "sw_sync.h"
> >>>    
> >>>    #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
> >>>    #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
> >>> @@ -45,12 +46,33 @@ static unsigned all_nengine;
> >>>    static unsigned ppgtt_engines[16];
> >>>    static unsigned ppgtt_nengine;
> >>>    
> >>> -static int __gem_context_create_local(int fd, struct drm_i915_gem_context_create *arg)
> >>> +static int create_ioctl(int fd, struct drm_i915_gem_context_create *arg)
> >>>    {
> >>> -     int ret = 0;
> >>> -     if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg))
> >>> -             ret = -errno;
> >>> -     return ret;
> >>> +     int err;
> >>> +
> >>> +     err = 0;
> >>> +     if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg)) {
> >>> +             err = -errno;
> >>> +             igt_assert(err);
> >>> +     }
> >>> +
> >>> +     errno = 0;
> >>> +     return err;
> >>> +}
> >>> +
> >>> +static int create_ext_ioctl(int i915,
> >>> +                         struct drm_i915_gem_context_create_ext *arg)
> >>> +{
> >>> +     int err;
> >>> +
> >>> +     err = 0;
> >>> +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
> >>> +             err = -errno;
> >>> +             igt_assume(err);
> >>> +     }
> >>> +
> >>> +     errno = 0;
> >>> +     return err;
> >>>    }
> >>>    
> >>>    static double elapsed(const struct timespec *start,
> >>> @@ -308,6 +330,187 @@ static void maximum(int fd, int ncpus, unsigned mode)
> >>>        free(contexts);
> >>>    }
> >>>    
> >>> +static void basic_ext_param(int i915)
> >>> +{
> >>> +     struct drm_i915_gem_context_create_ext_setparam ext = {
> >>> +             { .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> >>> +     };
> >>> +     struct drm_i915_gem_context_create_ext create = {
> >>> +             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS
> >>> +     };
> >>> +     struct drm_i915_gem_context_param get;
> >>> +
> >>> +     igt_require(create_ext_ioctl(i915, &create) == 0);
> >>> +     gem_context_destroy(i915, create.ctx_id);
> >>> +
> >>> +     create.extensions = -1ull;
> >>> +     igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
> >>> +
> >>> +     create.extensions = to_user_pointer(&ext);
> >>> +     igt_assert_eq(create_ext_ioctl(i915, &create), -EINVAL);
> >>> +
> >>> +     ext.param.param = I915_CONTEXT_PARAM_PRIORITY;
> >>> +     if (create_ext_ioctl(i915, &create) != -ENODEV) {
> >>> +             gem_context_destroy(i915, create.ctx_id);
> >>> +
> >>> +             ext.base.next_extension = -1ull;
> >>> +             igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
> >>> +             ext.base.next_extension = to_user_pointer(&ext);
> >>> +             igt_assert_eq(create_ext_ioctl(i915, &create), -E2BIG);
> >>> +             ext.base.next_extension = 0;
> >>> +
> >>> +             ext.param.value = 32;
> >>> +             igt_assert_eq(create_ext_ioctl(i915, &create), 0);
> >>> +
> >>> +             memset(&get, 0, sizeof(get));
> >>> +             get.ctx_id = create.ctx_id;
> >>> +             get.param = I915_CONTEXT_PARAM_PRIORITY;
> >>> +             gem_context_get_param(i915, &get);
> >>> +             igt_assert_eq(get.value, ext.param.value);
> >>> +
> >>> +             gem_context_destroy(i915, create.ctx_id);
> >>> +     }
> >>> +}
> >>> +
> >>> +static void check_single_timeline(int i915, uint32_t ctx, int num_engines)
> >>> +{
> >>> +#define RCS_TIMESTAMP (0x2000 + 0x358)
> >>> +     const int gen = intel_gen(intel_get_drm_devid(i915));
> >>> +     const int has_64bit_reloc = gen >= 8;
> >>> +     struct drm_i915_gem_exec_object2 results = { .handle = gem_create(i915, 4096) };
> >>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> >>> +     int timeline = sw_sync_timeline_create();
> >>> +     uint32_t last, *map;
> >>> +
> >>> +     {
> >>> +             struct drm_i915_gem_execbuffer2 execbuf = {
> >>> +                     .buffers_ptr = to_user_pointer(&results),
> >>> +                     .buffer_count = 1,
> >>> +                     .rsvd1 = ctx,
> >>> +             };
> >>> +             gem_write(i915, results.handle, 0, &bbe, sizeof(bbe));
> >>> +             gem_execbuf(i915, &execbuf);
> >>> +             results.flags = EXEC_OBJECT_PINNED;
> >>> +     }
> >>> +
> >>> +     for (int i = 0; i < num_engines; i++) {
> >>> +             struct drm_i915_gem_exec_object2 obj[2] = {
> >>> +                     results, /* write hazard lies! */
> >>> +                     { .handle = gem_create(i915, 4096) },
> >>> +             };
> >>> +             struct drm_i915_gem_execbuffer2 execbuf = {
> >>> +                     .buffers_ptr = to_user_pointer(obj),
> >>> +                     .buffer_count = 2,
> >>> +                     .rsvd1 = ctx,
> >>> +                     .rsvd2 = sw_sync_timeline_create_fence(timeline, num_engines - i),
> >>> +                     .flags = i | I915_EXEC_FENCE_IN,
> >>> +             };
> >>> +             uint64_t offset = results.offset + 4 * i;
> >>> +             uint32_t *cs;
> >>> +             int j = 0;
> >>> +
> >>> +             cs = gem_mmap__cpu(i915, obj[1].handle, 0, 4096, PROT_WRITE);
> >>> +
> >>> +             cs[j] = 0x24 << 23 | 1; /* SRM */
> >>> +             if (has_64bit_reloc)
> >>> +                     cs[j]++;
> >>> +             j++;
> >>> +             cs[j++] = RCS_TIMESTAMP;
> >>> +             cs[j++] = offset;
> >>> +             if (has_64bit_reloc)
> >>> +                     cs[j++] = offset >> 32;
> >>> +             cs[j++] = MI_BATCH_BUFFER_END;
> >>> +
> >>> +             munmap(cs, 4096);
> >>> +
> >>> +             gem_execbuf(i915, &execbuf);
> >>> +             gem_close(i915, obj[1].handle);
> >>> +             close(execbuf.rsvd2);
> >>> +     }
> >>> +     close(timeline);
> >>> +     gem_sync(i915, results.handle);
> >>> +
> >>> +     map = gem_mmap__cpu(i915, results.handle, 0, 4096, PROT_READ);
> >>> +     gem_set_domain(i915, results.handle, I915_GEM_DOMAIN_CPU, 0);
> >>> +     gem_close(i915, results.handle);
> >>> +
> >>> +     last = map[0];
> >>> +     for (int i = 1; i < num_engines; i++) {
> >>> +             igt_assert_f((map[i] - last) > 0,
> >>> +                          "Engine instance [%d] executed too early: this:%x, last:%x\n",
> >>> +                          i, map[i], last);
> >>> +             last = map[i];
> >>> +     }
> >>
> >> Hm.. aren't two sw fences (two seqnos) just a needless complication -
> >> since the execution order in the single timeline is controlled by
> >> submission order. The statement is true only when compounded with the
> >> fact that you signal both fences at the same time. I am thinking that if
> >> it wasn't a single timeline context what would happen. Fences would be
> >> signaled in order, but execution does not have to happen in order. That
> >> it does is a property of single timeline and not fence ordering. So two
> >> input fences with two seqnos is misleading. Single plug would do I think
> > 
> > But that would not detect the case when it was multiple timelines...
> >   
> >> Or you are thinking to nudge the driver to do the right thing? But in
> >> that case I think you'd need to manually advance the first seqno (2nd
> >> batch) first and wait a bit to check it hasn't been execute. Then signal
> >> the second seqno (first batch) and run the above check to see they have
> >> been executed in order.
> > 
> > The challenge is that we detect if the driver uses 2 timelines instead
> > of one. So that is what we setup to detect.
> 
> With a single seqno advance what determines the order of signal delivery 
> on blocked fences? Is it defined in the dma-fence contract it happens in 
> order? If it is, then is it defined that would map to in order 
> submission in i915 (if the contexts/timelines were separate)? (Might not 
> I am thinking, scheduler can decide whatever it wants.)

We don't emit signals in order. Mostly we do, but not strictly.
 
> So I don't see a problem with being more explicit in this test and doing 
> a step by step timeline advance so it is completely under test's control 
> what's happening. And it would AFAICS detect the two timelines because 
> it would expect first timeline advance must not result in request execution.

I don't see the problem with the test, it does what I need it to.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 05/16] i915/gem_ctx_create: Basic checks for constructor properties
@ 2019-05-22 10:24           ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-22 10:24 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2019-05-16 09:38:15)
> 
> On 15/05/2019 20:05, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-05-14 11:15:12)
> >>
> >> On 08/05/2019 11:09, Chris Wilson wrote:
> >>> Check that the extended create interface accepts setparam.
> >>>
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> ---
> >>>    tests/i915/gem_ctx_create.c | 225 ++++++++++++++++++++++++++++++++++--
> >>>    1 file changed, 213 insertions(+), 12 deletions(-)
> >>>
> >>> diff --git a/tests/i915/gem_ctx_create.c b/tests/i915/gem_ctx_create.c
> >>> index a664070db..9b4fddbe7 100644
> >>> --- a/tests/i915/gem_ctx_create.c
> >>> +++ b/tests/i915/gem_ctx_create.c
> >>> @@ -33,6 +33,7 @@
> >>>    #include <time.h>
> >>>    
> >>>    #include "igt_rand.h"
> >>> +#include "sw_sync.h"
> >>>    
> >>>    #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
> >>>    #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
> >>> @@ -45,12 +46,33 @@ static unsigned all_nengine;
> >>>    static unsigned ppgtt_engines[16];
> >>>    static unsigned ppgtt_nengine;
> >>>    
> >>> -static int __gem_context_create_local(int fd, struct drm_i915_gem_context_create *arg)
> >>> +static int create_ioctl(int fd, struct drm_i915_gem_context_create *arg)
> >>>    {
> >>> -     int ret = 0;
> >>> -     if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg))
> >>> -             ret = -errno;
> >>> -     return ret;
> >>> +     int err;
> >>> +
> >>> +     err = 0;
> >>> +     if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg)) {
> >>> +             err = -errno;
> >>> +             igt_assert(err);
> >>> +     }
> >>> +
> >>> +     errno = 0;
> >>> +     return err;
> >>> +}
> >>> +
> >>> +static int create_ext_ioctl(int i915,
> >>> +                         struct drm_i915_gem_context_create_ext *arg)
> >>> +{
> >>> +     int err;
> >>> +
> >>> +     err = 0;
> >>> +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
> >>> +             err = -errno;
> >>> +             igt_assume(err);
> >>> +     }
> >>> +
> >>> +     errno = 0;
> >>> +     return err;
> >>>    }
> >>>    
> >>>    static double elapsed(const struct timespec *start,
> >>> @@ -308,6 +330,187 @@ static void maximum(int fd, int ncpus, unsigned mode)
> >>>        free(contexts);
> >>>    }
> >>>    
> >>> +static void basic_ext_param(int i915)
> >>> +{
> >>> +     struct drm_i915_gem_context_create_ext_setparam ext = {
> >>> +             { .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
> >>> +     };
> >>> +     struct drm_i915_gem_context_create_ext create = {
> >>> +             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS
> >>> +     };
> >>> +     struct drm_i915_gem_context_param get;
> >>> +
> >>> +     igt_require(create_ext_ioctl(i915, &create) == 0);
> >>> +     gem_context_destroy(i915, create.ctx_id);
> >>> +
> >>> +     create.extensions = -1ull;
> >>> +     igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
> >>> +
> >>> +     create.extensions = to_user_pointer(&ext);
> >>> +     igt_assert_eq(create_ext_ioctl(i915, &create), -EINVAL);
> >>> +
> >>> +     ext.param.param = I915_CONTEXT_PARAM_PRIORITY;
> >>> +     if (create_ext_ioctl(i915, &create) != -ENODEV) {
> >>> +             gem_context_destroy(i915, create.ctx_id);
> >>> +
> >>> +             ext.base.next_extension = -1ull;
> >>> +             igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
> >>> +             ext.base.next_extension = to_user_pointer(&ext);
> >>> +             igt_assert_eq(create_ext_ioctl(i915, &create), -E2BIG);
> >>> +             ext.base.next_extension = 0;
> >>> +
> >>> +             ext.param.value = 32;
> >>> +             igt_assert_eq(create_ext_ioctl(i915, &create), 0);
> >>> +
> >>> +             memset(&get, 0, sizeof(get));
> >>> +             get.ctx_id = create.ctx_id;
> >>> +             get.param = I915_CONTEXT_PARAM_PRIORITY;
> >>> +             gem_context_get_param(i915, &get);
> >>> +             igt_assert_eq(get.value, ext.param.value);
> >>> +
> >>> +             gem_context_destroy(i915, create.ctx_id);
> >>> +     }
> >>> +}
> >>> +
> >>> +static void check_single_timeline(int i915, uint32_t ctx, int num_engines)
> >>> +{
> >>> +#define RCS_TIMESTAMP (0x2000 + 0x358)
> >>> +     const int gen = intel_gen(intel_get_drm_devid(i915));
> >>> +     const int has_64bit_reloc = gen >= 8;
> >>> +     struct drm_i915_gem_exec_object2 results = { .handle = gem_create(i915, 4096) };
> >>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> >>> +     int timeline = sw_sync_timeline_create();
> >>> +     uint32_t last, *map;
> >>> +
> >>> +     {
> >>> +             struct drm_i915_gem_execbuffer2 execbuf = {
> >>> +                     .buffers_ptr = to_user_pointer(&results),
> >>> +                     .buffer_count = 1,
> >>> +                     .rsvd1 = ctx,
> >>> +             };
> >>> +             gem_write(i915, results.handle, 0, &bbe, sizeof(bbe));
> >>> +             gem_execbuf(i915, &execbuf);
> >>> +             results.flags = EXEC_OBJECT_PINNED;
> >>> +     }
> >>> +
> >>> +     for (int i = 0; i < num_engines; i++) {
> >>> +             struct drm_i915_gem_exec_object2 obj[2] = {
> >>> +                     results, /* write hazard lies! */
> >>> +                     { .handle = gem_create(i915, 4096) },
> >>> +             };
> >>> +             struct drm_i915_gem_execbuffer2 execbuf = {
> >>> +                     .buffers_ptr = to_user_pointer(obj),
> >>> +                     .buffer_count = 2,
> >>> +                     .rsvd1 = ctx,
> >>> +                     .rsvd2 = sw_sync_timeline_create_fence(timeline, num_engines - i),
> >>> +                     .flags = i | I915_EXEC_FENCE_IN,
> >>> +             };
> >>> +             uint64_t offset = results.offset + 4 * i;
> >>> +             uint32_t *cs;
> >>> +             int j = 0;
> >>> +
> >>> +             cs = gem_mmap__cpu(i915, obj[1].handle, 0, 4096, PROT_WRITE);
> >>> +
> >>> +             cs[j] = 0x24 << 23 | 1; /* SRM */
> >>> +             if (has_64bit_reloc)
> >>> +                     cs[j]++;
> >>> +             j++;
> >>> +             cs[j++] = RCS_TIMESTAMP;
> >>> +             cs[j++] = offset;
> >>> +             if (has_64bit_reloc)
> >>> +                     cs[j++] = offset >> 32;
> >>> +             cs[j++] = MI_BATCH_BUFFER_END;
> >>> +
> >>> +             munmap(cs, 4096);
> >>> +
> >>> +             gem_execbuf(i915, &execbuf);
> >>> +             gem_close(i915, obj[1].handle);
> >>> +             close(execbuf.rsvd2);
> >>> +     }
> >>> +     close(timeline);
> >>> +     gem_sync(i915, results.handle);
> >>> +
> >>> +     map = gem_mmap__cpu(i915, results.handle, 0, 4096, PROT_READ);
> >>> +     gem_set_domain(i915, results.handle, I915_GEM_DOMAIN_CPU, 0);
> >>> +     gem_close(i915, results.handle);
> >>> +
> >>> +     last = map[0];
> >>> +     for (int i = 1; i < num_engines; i++) {
> >>> +             igt_assert_f((map[i] - last) > 0,
> >>> +                          "Engine instance [%d] executed too early: this:%x, last:%x\n",
> >>> +                          i, map[i], last);
> >>> +             last = map[i];
> >>> +     }
> >>
> >> Hm.. aren't two sw fences (two seqnos) just a needless complication -
> >> since the execution order in the single timeline is controlled by
> >> submission order. The statement is true only when compounded with the
> >> fact that you signal both fences at the same time. I am thinking that if
> >> it wasn't a single timeline context what would happen. Fences would be
> >> signaled in order, but execution does not have to happen in order. That
> >> it does is a property of single timeline and not fence ordering. So two
> >> input fences with two seqnos is misleading. Single plug would do I think
> > 
> > But that would not detect the case when it was multiple timelines...
> >   
> >> Or you are thinking to nudge the driver to do the right thing? But in
> >> that case I think you'd need to manually advance the first seqno (2nd
> >> batch) first and wait a bit to check it hasn't been execute. Then signal
> >> the second seqno (first batch) and run the above check to see they have
> >> been executed in order.
> > 
> > The challenge is that we detect if the driver uses 2 timelines instead
> > of one. So that is what we setup to detect.
> 
> With a single seqno advance what determines the order of signal delivery 
> on blocked fences? Is it defined in the dma-fence contract it happens in 
> order? If it is, then is it defined that would map to in order 
> submission in i915 (if the contexts/timelines were separate)? (Might not 
> I am thinking, scheduler can decide whatever it wants.)

We don't emit signals in order. Mostly we do, but not strictly.
 
> So I don't see a problem with being more explicit in this test and doing 
> a step by step timeline advance so it is completely under test's control 
> what's happening. And it would AFAICS detect the two timelines because 
> it would expect first timeline advance must not result in request execution.

I don't see the problem with the test, it does what I need it to.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 05/16] i915/gem_ctx_create: Basic checks for constructor properties
  2019-05-22 10:24           ` [igt-dev] " Chris Wilson
@ 2019-05-22 10:32             ` Tvrtko Ursulin
  -1 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-22 10:32 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 22/05/2019 11:24, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-16 09:38:15)
>>
>> On 15/05/2019 20:05, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2019-05-14 11:15:12)
>>>>
>>>> On 08/05/2019 11:09, Chris Wilson wrote:
>>>>> Check that the extended create interface accepts setparam.
>>>>>
>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> ---
>>>>>     tests/i915/gem_ctx_create.c | 225 ++++++++++++++++++++++++++++++++++--
>>>>>     1 file changed, 213 insertions(+), 12 deletions(-)
>>>>>
>>>>> diff --git a/tests/i915/gem_ctx_create.c b/tests/i915/gem_ctx_create.c
>>>>> index a664070db..9b4fddbe7 100644
>>>>> --- a/tests/i915/gem_ctx_create.c
>>>>> +++ b/tests/i915/gem_ctx_create.c
>>>>> @@ -33,6 +33,7 @@
>>>>>     #include <time.h>
>>>>>     
>>>>>     #include "igt_rand.h"
>>>>> +#include "sw_sync.h"
>>>>>     
>>>>>     #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
>>>>>     #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
>>>>> @@ -45,12 +46,33 @@ static unsigned all_nengine;
>>>>>     static unsigned ppgtt_engines[16];
>>>>>     static unsigned ppgtt_nengine;
>>>>>     
>>>>> -static int __gem_context_create_local(int fd, struct drm_i915_gem_context_create *arg)
>>>>> +static int create_ioctl(int fd, struct drm_i915_gem_context_create *arg)
>>>>>     {
>>>>> -     int ret = 0;
>>>>> -     if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg))
>>>>> -             ret = -errno;
>>>>> -     return ret;
>>>>> +     int err;
>>>>> +
>>>>> +     err = 0;
>>>>> +     if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg)) {
>>>>> +             err = -errno;
>>>>> +             igt_assert(err);
>>>>> +     }
>>>>> +
>>>>> +     errno = 0;
>>>>> +     return err;
>>>>> +}
>>>>> +
>>>>> +static int create_ext_ioctl(int i915,
>>>>> +                         struct drm_i915_gem_context_create_ext *arg)
>>>>> +{
>>>>> +     int err;
>>>>> +
>>>>> +     err = 0;
>>>>> +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
>>>>> +             err = -errno;
>>>>> +             igt_assume(err);
>>>>> +     }
>>>>> +
>>>>> +     errno = 0;
>>>>> +     return err;
>>>>>     }
>>>>>     
>>>>>     static double elapsed(const struct timespec *start,
>>>>> @@ -308,6 +330,187 @@ static void maximum(int fd, int ncpus, unsigned mode)
>>>>>         free(contexts);
>>>>>     }
>>>>>     
>>>>> +static void basic_ext_param(int i915)
>>>>> +{
>>>>> +     struct drm_i915_gem_context_create_ext_setparam ext = {
>>>>> +             { .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
>>>>> +     };
>>>>> +     struct drm_i915_gem_context_create_ext create = {
>>>>> +             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS
>>>>> +     };
>>>>> +     struct drm_i915_gem_context_param get;
>>>>> +
>>>>> +     igt_require(create_ext_ioctl(i915, &create) == 0);
>>>>> +     gem_context_destroy(i915, create.ctx_id);
>>>>> +
>>>>> +     create.extensions = -1ull;
>>>>> +     igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
>>>>> +
>>>>> +     create.extensions = to_user_pointer(&ext);
>>>>> +     igt_assert_eq(create_ext_ioctl(i915, &create), -EINVAL);
>>>>> +
>>>>> +     ext.param.param = I915_CONTEXT_PARAM_PRIORITY;
>>>>> +     if (create_ext_ioctl(i915, &create) != -ENODEV) {
>>>>> +             gem_context_destroy(i915, create.ctx_id);
>>>>> +
>>>>> +             ext.base.next_extension = -1ull;
>>>>> +             igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
>>>>> +             ext.base.next_extension = to_user_pointer(&ext);
>>>>> +             igt_assert_eq(create_ext_ioctl(i915, &create), -E2BIG);
>>>>> +             ext.base.next_extension = 0;
>>>>> +
>>>>> +             ext.param.value = 32;
>>>>> +             igt_assert_eq(create_ext_ioctl(i915, &create), 0);
>>>>> +
>>>>> +             memset(&get, 0, sizeof(get));
>>>>> +             get.ctx_id = create.ctx_id;
>>>>> +             get.param = I915_CONTEXT_PARAM_PRIORITY;
>>>>> +             gem_context_get_param(i915, &get);
>>>>> +             igt_assert_eq(get.value, ext.param.value);
>>>>> +
>>>>> +             gem_context_destroy(i915, create.ctx_id);
>>>>> +     }
>>>>> +}
>>>>> +
>>>>> +static void check_single_timeline(int i915, uint32_t ctx, int num_engines)
>>>>> +{
>>>>> +#define RCS_TIMESTAMP (0x2000 + 0x358)
>>>>> +     const int gen = intel_gen(intel_get_drm_devid(i915));
>>>>> +     const int has_64bit_reloc = gen >= 8;
>>>>> +     struct drm_i915_gem_exec_object2 results = { .handle = gem_create(i915, 4096) };
>>>>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
>>>>> +     int timeline = sw_sync_timeline_create();
>>>>> +     uint32_t last, *map;
>>>>> +
>>>>> +     {
>>>>> +             struct drm_i915_gem_execbuffer2 execbuf = {
>>>>> +                     .buffers_ptr = to_user_pointer(&results),
>>>>> +                     .buffer_count = 1,
>>>>> +                     .rsvd1 = ctx,
>>>>> +             };
>>>>> +             gem_write(i915, results.handle, 0, &bbe, sizeof(bbe));
>>>>> +             gem_execbuf(i915, &execbuf);
>>>>> +             results.flags = EXEC_OBJECT_PINNED;
>>>>> +     }
>>>>> +
>>>>> +     for (int i = 0; i < num_engines; i++) {
>>>>> +             struct drm_i915_gem_exec_object2 obj[2] = {
>>>>> +                     results, /* write hazard lies! */
>>>>> +                     { .handle = gem_create(i915, 4096) },
>>>>> +             };
>>>>> +             struct drm_i915_gem_execbuffer2 execbuf = {
>>>>> +                     .buffers_ptr = to_user_pointer(obj),
>>>>> +                     .buffer_count = 2,
>>>>> +                     .rsvd1 = ctx,
>>>>> +                     .rsvd2 = sw_sync_timeline_create_fence(timeline, num_engines - i),
>>>>> +                     .flags = i | I915_EXEC_FENCE_IN,
>>>>> +             };
>>>>> +             uint64_t offset = results.offset + 4 * i;
>>>>> +             uint32_t *cs;
>>>>> +             int j = 0;
>>>>> +
>>>>> +             cs = gem_mmap__cpu(i915, obj[1].handle, 0, 4096, PROT_WRITE);
>>>>> +
>>>>> +             cs[j] = 0x24 << 23 | 1; /* SRM */
>>>>> +             if (has_64bit_reloc)
>>>>> +                     cs[j]++;
>>>>> +             j++;
>>>>> +             cs[j++] = RCS_TIMESTAMP;
>>>>> +             cs[j++] = offset;
>>>>> +             if (has_64bit_reloc)
>>>>> +                     cs[j++] = offset >> 32;
>>>>> +             cs[j++] = MI_BATCH_BUFFER_END;
>>>>> +
>>>>> +             munmap(cs, 4096);
>>>>> +
>>>>> +             gem_execbuf(i915, &execbuf);
>>>>> +             gem_close(i915, obj[1].handle);
>>>>> +             close(execbuf.rsvd2);
>>>>> +     }
>>>>> +     close(timeline);
>>>>> +     gem_sync(i915, results.handle);
>>>>> +
>>>>> +     map = gem_mmap__cpu(i915, results.handle, 0, 4096, PROT_READ);
>>>>> +     gem_set_domain(i915, results.handle, I915_GEM_DOMAIN_CPU, 0);
>>>>> +     gem_close(i915, results.handle);
>>>>> +
>>>>> +     last = map[0];
>>>>> +     for (int i = 1; i < num_engines; i++) {
>>>>> +             igt_assert_f((map[i] - last) > 0,
>>>>> +                          "Engine instance [%d] executed too early: this:%x, last:%x\n",
>>>>> +                          i, map[i], last);
>>>>> +             last = map[i];
>>>>> +     }
>>>>
>>>> Hm.. aren't two sw fences (two seqnos) just a needless complication -
>>>> since the execution order in the single timeline is controlled by
>>>> submission order. The statement is true only when compounded with the
>>>> fact that you signal both fences at the same time. I am thinking that if
>>>> it wasn't a single timeline context what would happen. Fences would be
>>>> signaled in order, but execution does not have to happen in order. That
>>>> it does is a property of single timeline and not fence ordering. So two
>>>> input fences with two seqnos is misleading. Single plug would do I think
>>>
>>> But that would not detect the case when it was multiple timelines...
>>>    
>>>> Or you are thinking to nudge the driver to do the right thing? But in
>>>> that case I think you'd need to manually advance the first seqno (2nd
>>>> batch) first and wait a bit to check it hasn't been execute. Then signal
>>>> the second seqno (first batch) and run the above check to see they have
>>>> been executed in order.
>>>
>>> The challenge is that we detect if the driver uses 2 timelines instead
>>> of one. So that is what we setup to detect.
>>
>> With a single seqno advance what determines the order of signal delivery
>> on blocked fences? Is it defined in the dma-fence contract it happens in
>> order? If it is, then is it defined that would map to in order
>> submission in i915 (if the contexts/timelines were separate)? (Might not
>> I am thinking, scheduler can decide whatever it wants.)
> 
> We don't emit signals in order. Mostly we do, but not strictly.
>   
>> So I don't see a problem with being more explicit in this test and doing
>> a step by step timeline advance so it is completely under test's control
>> what's happening. And it would AFAICS detect the two timelines because
>> it would expect first timeline advance must not result in request execution.
> 
> I don't see the problem with the test, it does what I need it to.

Now my memory on how exactly this test works has slightly faded.

Let me put it this way - if you just removed the single timeline flag 
form the test and left the rest as is - would it 100% reliably fail? If 
it would pass 100%, then is it because of undefined implementation 
details rather than the ABI contract?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 05/16] i915/gem_ctx_create: Basic checks for constructor properties
@ 2019-05-22 10:32             ` Tvrtko Ursulin
  0 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-22 10:32 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 22/05/2019 11:24, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-16 09:38:15)
>>
>> On 15/05/2019 20:05, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2019-05-14 11:15:12)
>>>>
>>>> On 08/05/2019 11:09, Chris Wilson wrote:
>>>>> Check that the extended create interface accepts setparam.
>>>>>
>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> ---
>>>>>     tests/i915/gem_ctx_create.c | 225 ++++++++++++++++++++++++++++++++++--
>>>>>     1 file changed, 213 insertions(+), 12 deletions(-)
>>>>>
>>>>> diff --git a/tests/i915/gem_ctx_create.c b/tests/i915/gem_ctx_create.c
>>>>> index a664070db..9b4fddbe7 100644
>>>>> --- a/tests/i915/gem_ctx_create.c
>>>>> +++ b/tests/i915/gem_ctx_create.c
>>>>> @@ -33,6 +33,7 @@
>>>>>     #include <time.h>
>>>>>     
>>>>>     #include "igt_rand.h"
>>>>> +#include "sw_sync.h"
>>>>>     
>>>>>     #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
>>>>>     #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
>>>>> @@ -45,12 +46,33 @@ static unsigned all_nengine;
>>>>>     static unsigned ppgtt_engines[16];
>>>>>     static unsigned ppgtt_nengine;
>>>>>     
>>>>> -static int __gem_context_create_local(int fd, struct drm_i915_gem_context_create *arg)
>>>>> +static int create_ioctl(int fd, struct drm_i915_gem_context_create *arg)
>>>>>     {
>>>>> -     int ret = 0;
>>>>> -     if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg))
>>>>> -             ret = -errno;
>>>>> -     return ret;
>>>>> +     int err;
>>>>> +
>>>>> +     err = 0;
>>>>> +     if (igt_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, arg)) {
>>>>> +             err = -errno;
>>>>> +             igt_assert(err);
>>>>> +     }
>>>>> +
>>>>> +     errno = 0;
>>>>> +     return err;
>>>>> +}
>>>>> +
>>>>> +static int create_ext_ioctl(int i915,
>>>>> +                         struct drm_i915_gem_context_create_ext *arg)
>>>>> +{
>>>>> +     int err;
>>>>> +
>>>>> +     err = 0;
>>>>> +     if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) {
>>>>> +             err = -errno;
>>>>> +             igt_assume(err);
>>>>> +     }
>>>>> +
>>>>> +     errno = 0;
>>>>> +     return err;
>>>>>     }
>>>>>     
>>>>>     static double elapsed(const struct timespec *start,
>>>>> @@ -308,6 +330,187 @@ static void maximum(int fd, int ncpus, unsigned mode)
>>>>>         free(contexts);
>>>>>     }
>>>>>     
>>>>> +static void basic_ext_param(int i915)
>>>>> +{
>>>>> +     struct drm_i915_gem_context_create_ext_setparam ext = {
>>>>> +             { .name = I915_CONTEXT_CREATE_EXT_SETPARAM },
>>>>> +     };
>>>>> +     struct drm_i915_gem_context_create_ext create = {
>>>>> +             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS
>>>>> +     };
>>>>> +     struct drm_i915_gem_context_param get;
>>>>> +
>>>>> +     igt_require(create_ext_ioctl(i915, &create) == 0);
>>>>> +     gem_context_destroy(i915, create.ctx_id);
>>>>> +
>>>>> +     create.extensions = -1ull;
>>>>> +     igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
>>>>> +
>>>>> +     create.extensions = to_user_pointer(&ext);
>>>>> +     igt_assert_eq(create_ext_ioctl(i915, &create), -EINVAL);
>>>>> +
>>>>> +     ext.param.param = I915_CONTEXT_PARAM_PRIORITY;
>>>>> +     if (create_ext_ioctl(i915, &create) != -ENODEV) {
>>>>> +             gem_context_destroy(i915, create.ctx_id);
>>>>> +
>>>>> +             ext.base.next_extension = -1ull;
>>>>> +             igt_assert_eq(create_ext_ioctl(i915, &create), -EFAULT);
>>>>> +             ext.base.next_extension = to_user_pointer(&ext);
>>>>> +             igt_assert_eq(create_ext_ioctl(i915, &create), -E2BIG);
>>>>> +             ext.base.next_extension = 0;
>>>>> +
>>>>> +             ext.param.value = 32;
>>>>> +             igt_assert_eq(create_ext_ioctl(i915, &create), 0);
>>>>> +
>>>>> +             memset(&get, 0, sizeof(get));
>>>>> +             get.ctx_id = create.ctx_id;
>>>>> +             get.param = I915_CONTEXT_PARAM_PRIORITY;
>>>>> +             gem_context_get_param(i915, &get);
>>>>> +             igt_assert_eq(get.value, ext.param.value);
>>>>> +
>>>>> +             gem_context_destroy(i915, create.ctx_id);
>>>>> +     }
>>>>> +}
>>>>> +
>>>>> +static void check_single_timeline(int i915, uint32_t ctx, int num_engines)
>>>>> +{
>>>>> +#define RCS_TIMESTAMP (0x2000 + 0x358)
>>>>> +     const int gen = intel_gen(intel_get_drm_devid(i915));
>>>>> +     const int has_64bit_reloc = gen >= 8;
>>>>> +     struct drm_i915_gem_exec_object2 results = { .handle = gem_create(i915, 4096) };
>>>>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
>>>>> +     int timeline = sw_sync_timeline_create();
>>>>> +     uint32_t last, *map;
>>>>> +
>>>>> +     {
>>>>> +             struct drm_i915_gem_execbuffer2 execbuf = {
>>>>> +                     .buffers_ptr = to_user_pointer(&results),
>>>>> +                     .buffer_count = 1,
>>>>> +                     .rsvd1 = ctx,
>>>>> +             };
>>>>> +             gem_write(i915, results.handle, 0, &bbe, sizeof(bbe));
>>>>> +             gem_execbuf(i915, &execbuf);
>>>>> +             results.flags = EXEC_OBJECT_PINNED;
>>>>> +     }
>>>>> +
>>>>> +     for (int i = 0; i < num_engines; i++) {
>>>>> +             struct drm_i915_gem_exec_object2 obj[2] = {
>>>>> +                     results, /* write hazard lies! */
>>>>> +                     { .handle = gem_create(i915, 4096) },
>>>>> +             };
>>>>> +             struct drm_i915_gem_execbuffer2 execbuf = {
>>>>> +                     .buffers_ptr = to_user_pointer(obj),
>>>>> +                     .buffer_count = 2,
>>>>> +                     .rsvd1 = ctx,
>>>>> +                     .rsvd2 = sw_sync_timeline_create_fence(timeline, num_engines - i),
>>>>> +                     .flags = i | I915_EXEC_FENCE_IN,
>>>>> +             };
>>>>> +             uint64_t offset = results.offset + 4 * i;
>>>>> +             uint32_t *cs;
>>>>> +             int j = 0;
>>>>> +
>>>>> +             cs = gem_mmap__cpu(i915, obj[1].handle, 0, 4096, PROT_WRITE);
>>>>> +
>>>>> +             cs[j] = 0x24 << 23 | 1; /* SRM */
>>>>> +             if (has_64bit_reloc)
>>>>> +                     cs[j]++;
>>>>> +             j++;
>>>>> +             cs[j++] = RCS_TIMESTAMP;
>>>>> +             cs[j++] = offset;
>>>>> +             if (has_64bit_reloc)
>>>>> +                     cs[j++] = offset >> 32;
>>>>> +             cs[j++] = MI_BATCH_BUFFER_END;
>>>>> +
>>>>> +             munmap(cs, 4096);
>>>>> +
>>>>> +             gem_execbuf(i915, &execbuf);
>>>>> +             gem_close(i915, obj[1].handle);
>>>>> +             close(execbuf.rsvd2);
>>>>> +     }
>>>>> +     close(timeline);
>>>>> +     gem_sync(i915, results.handle);
>>>>> +
>>>>> +     map = gem_mmap__cpu(i915, results.handle, 0, 4096, PROT_READ);
>>>>> +     gem_set_domain(i915, results.handle, I915_GEM_DOMAIN_CPU, 0);
>>>>> +     gem_close(i915, results.handle);
>>>>> +
>>>>> +     last = map[0];
>>>>> +     for (int i = 1; i < num_engines; i++) {
>>>>> +             igt_assert_f((map[i] - last) > 0,
>>>>> +                          "Engine instance [%d] executed too early: this:%x, last:%x\n",
>>>>> +                          i, map[i], last);
>>>>> +             last = map[i];
>>>>> +     }
>>>>
>>>> Hm.. aren't two sw fences (two seqnos) just a needless complication -
>>>> since the execution order in the single timeline is controlled by
>>>> submission order. The statement is true only when compounded with the
>>>> fact that you signal both fences at the same time. I am thinking that if
>>>> it wasn't a single timeline context what would happen. Fences would be
>>>> signaled in order, but execution does not have to happen in order. That
>>>> it does is a property of single timeline and not fence ordering. So two
>>>> input fences with two seqnos is misleading. Single plug would do I think
>>>
>>> But that would not detect the case when it was multiple timelines...
>>>    
>>>> Or you are thinking to nudge the driver to do the right thing? But in
>>>> that case I think you'd need to manually advance the first seqno (2nd
>>>> batch) first and wait a bit to check it hasn't been execute. Then signal
>>>> the second seqno (first batch) and run the above check to see they have
>>>> been executed in order.
>>>
>>> The challenge is that we detect if the driver uses 2 timelines instead
>>> of one. So that is what we setup to detect.
>>
>> With a single seqno advance what determines the order of signal delivery
>> on blocked fences? Is it defined in the dma-fence contract it happens in
>> order? If it is, then is it defined that would map to in order
>> submission in i915 (if the contexts/timelines were separate)? (Might not
>> I am thinking, scheduler can decide whatever it wants.)
> 
> We don't emit signals in order. Mostly we do, but not strictly.
>   
>> So I don't see a problem with being more explicit in this test and doing
>> a step by step timeline advance so it is completely under test's control
>> what's happening. And it would AFAICS detect the two timelines because
>> it would expect first timeline advance must not result in request execution.
> 
> I don't see the problem with the test, it does what I need it to.

Now my memory on how exactly this test works has slightly faded.

Let me put it this way - if you just removed the single timeline flag 
form the test and left the rest as is - would it 100% reliably fail? If 
it would pass 100%, then is it because of undefined implementation 
details rather than the ABI contract?

Regards,

Tvrtko
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 10/16] i915/gem_exec_whisper: Fork all-engine tests one-per-engine
  2019-05-16  8:57         ` [igt-dev] " Tvrtko Ursulin
@ 2019-05-22 10:59           ` Chris Wilson
  -1 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-22 10:59 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2019-05-16 09:57:08)
> 
> On 15/05/2019 20:35, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-05-14 13:57:26)
> >>
> >> On 08/05/2019 11:09, Chris Wilson wrote:
> >>> Add a new mode for some more stress, submit the all-engines tests
> >>> simultaneously, a stream per engine.
> >>>
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> ---
> >>>    tests/i915/gem_exec_whisper.c | 27 ++++++++++++++++++++++-----
> >>>    1 file changed, 22 insertions(+), 5 deletions(-)
> >>>
> >>> diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
> >>> index d3e0b0ba2..d5afc8119 100644
> >>> --- a/tests/i915/gem_exec_whisper.c
> >>> +++ b/tests/i915/gem_exec_whisper.c
> >>> @@ -88,6 +88,7 @@ static void verify_reloc(int fd, uint32_t handle,
> >>>    #define SYNC 0x40
> >>>    #define PRIORITY 0x80
> >>>    #define QUEUES 0x100
> >>> +#define ALL 0x200
> >>>    
> >>>    struct hang {
> >>>        struct drm_i915_gem_exec_object2 obj;
> >>> @@ -199,6 +200,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
> >>>        uint64_t old_offset;
> >>>        int i, n, loc;
> >>>        int debugfs;
> >>> +     int nchild;
> >>>    
> >>>        if (flags & PRIORITY) {
> >>>                igt_require(gem_scheduler_enabled(fd));
> >>> @@ -215,6 +217,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
> >>>                                engines[nengine++] = engine;
> >>>                }
> >>>        } else {
> >>> +             igt_assert(!(flags & ALL));
> >>>                igt_require(gem_has_ring(fd, engine));
> >>>                igt_require(gem_can_store_dword(fd, engine));
> >>>                engines[nengine++] = engine;
> >>> @@ -233,11 +236,22 @@ static void whisper(int fd, unsigned engine, unsigned flags)
> >>>        if (flags & HANG)
> >>>                init_hang(&hang);
> >>>    
> >>> +     nchild = 1;
> >>> +     if (flags & FORKED)
> >>> +             nchild *= sysconf(_SC_NPROCESSORS_ONLN);
> >>> +     if (flags & ALL)
> >>> +             nchild *= nengine;
> >>> +
> >>>        intel_detect_and_clear_missed_interrupts(fd);
> >>>        gpu_power_read(&power, &sample[0]);
> >>> -     igt_fork(child, flags & FORKED ? sysconf(_SC_NPROCESSORS_ONLN) : 1)  {
> >>> +     igt_fork(child, nchild) {
> >>>                unsigned int pass;
> >>>    
> >>> +             if (flags & ALL) {
> >>> +                     engines[0] = engines[child % nengine];
> >>
> >> Relying on PIDs being sequential feels fragile but suggesting pipes or
> >> shared memory would be overkill. How about another loop:
> > 
> > Where are you getting pid_t from? child is an integer [0, nchild).
> 
> Add a core helper to get it?
> 
> I am coming from an angle that I remember some time in the past there 
> was a security thing which randomized pid allocation. TBH I am not sure 
> if that still exists, but if it does then it would not be good for this 
> test. May be moot point to think such security hardening measures would 
> be active on a machine running IGT tests.. hm.. not sure. But it is 
> still a quite hidden assumption.

But we are not using pid_t here. It is just an array of child processes,
with each child getting its own engine, using the child index as an
index.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 10/16] i915/gem_exec_whisper: Fork all-engine tests one-per-engine
@ 2019-05-22 10:59           ` Chris Wilson
  0 siblings, 0 replies; 92+ messages in thread
From: Chris Wilson @ 2019-05-22 10:59 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2019-05-16 09:57:08)
> 
> On 15/05/2019 20:35, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-05-14 13:57:26)
> >>
> >> On 08/05/2019 11:09, Chris Wilson wrote:
> >>> Add a new mode for some more stress, submit the all-engines tests
> >>> simultaneously, a stream per engine.
> >>>
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> ---
> >>>    tests/i915/gem_exec_whisper.c | 27 ++++++++++++++++++++++-----
> >>>    1 file changed, 22 insertions(+), 5 deletions(-)
> >>>
> >>> diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
> >>> index d3e0b0ba2..d5afc8119 100644
> >>> --- a/tests/i915/gem_exec_whisper.c
> >>> +++ b/tests/i915/gem_exec_whisper.c
> >>> @@ -88,6 +88,7 @@ static void verify_reloc(int fd, uint32_t handle,
> >>>    #define SYNC 0x40
> >>>    #define PRIORITY 0x80
> >>>    #define QUEUES 0x100
> >>> +#define ALL 0x200
> >>>    
> >>>    struct hang {
> >>>        struct drm_i915_gem_exec_object2 obj;
> >>> @@ -199,6 +200,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
> >>>        uint64_t old_offset;
> >>>        int i, n, loc;
> >>>        int debugfs;
> >>> +     int nchild;
> >>>    
> >>>        if (flags & PRIORITY) {
> >>>                igt_require(gem_scheduler_enabled(fd));
> >>> @@ -215,6 +217,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
> >>>                                engines[nengine++] = engine;
> >>>                }
> >>>        } else {
> >>> +             igt_assert(!(flags & ALL));
> >>>                igt_require(gem_has_ring(fd, engine));
> >>>                igt_require(gem_can_store_dword(fd, engine));
> >>>                engines[nengine++] = engine;
> >>> @@ -233,11 +236,22 @@ static void whisper(int fd, unsigned engine, unsigned flags)
> >>>        if (flags & HANG)
> >>>                init_hang(&hang);
> >>>    
> >>> +     nchild = 1;
> >>> +     if (flags & FORKED)
> >>> +             nchild *= sysconf(_SC_NPROCESSORS_ONLN);
> >>> +     if (flags & ALL)
> >>> +             nchild *= nengine;
> >>> +
> >>>        intel_detect_and_clear_missed_interrupts(fd);
> >>>        gpu_power_read(&power, &sample[0]);
> >>> -     igt_fork(child, flags & FORKED ? sysconf(_SC_NPROCESSORS_ONLN) : 1)  {
> >>> +     igt_fork(child, nchild) {
> >>>                unsigned int pass;
> >>>    
> >>> +             if (flags & ALL) {
> >>> +                     engines[0] = engines[child % nengine];
> >>
> >> Relying on PIDs being sequential feels fragile but suggesting pipes or
> >> shared memory would be overkill. How about another loop:
> > 
> > Where are you getting pid_t from? child is an integer [0, nchild).
> 
> Add a core helper to get it?
> 
> I am coming from an angle that I remember some time in the past there 
> was a security thing which randomized pid allocation. TBH I am not sure 
> if that still exists, but if it does then it would not be good for this 
> test. May be moot point to think such security hardening measures would 
> be active on a machine running IGT tests.. hm.. not sure. But it is 
> still a quite hidden assumption.

But we are not using pid_t here. It is just an array of child processes,
with each child getting its own engine, using the child index as an
index.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [PATCH i-g-t 10/16] i915/gem_exec_whisper: Fork all-engine tests one-per-engine
  2019-05-22 10:59           ` [igt-dev] " Chris Wilson
@ 2019-05-22 11:39             ` Tvrtko Ursulin
  -1 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-22 11:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 22/05/2019 11:59, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-16 09:57:08)
>>
>> On 15/05/2019 20:35, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2019-05-14 13:57:26)
>>>>
>>>> On 08/05/2019 11:09, Chris Wilson wrote:
>>>>> Add a new mode for some more stress, submit the all-engines tests
>>>>> simultaneously, a stream per engine.
>>>>>
>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> ---
>>>>>     tests/i915/gem_exec_whisper.c | 27 ++++++++++++++++++++++-----
>>>>>     1 file changed, 22 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
>>>>> index d3e0b0ba2..d5afc8119 100644
>>>>> --- a/tests/i915/gem_exec_whisper.c
>>>>> +++ b/tests/i915/gem_exec_whisper.c
>>>>> @@ -88,6 +88,7 @@ static void verify_reloc(int fd, uint32_t handle,
>>>>>     #define SYNC 0x40
>>>>>     #define PRIORITY 0x80
>>>>>     #define QUEUES 0x100
>>>>> +#define ALL 0x200
>>>>>     
>>>>>     struct hang {
>>>>>         struct drm_i915_gem_exec_object2 obj;
>>>>> @@ -199,6 +200,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>>>>>         uint64_t old_offset;
>>>>>         int i, n, loc;
>>>>>         int debugfs;
>>>>> +     int nchild;
>>>>>     
>>>>>         if (flags & PRIORITY) {
>>>>>                 igt_require(gem_scheduler_enabled(fd));
>>>>> @@ -215,6 +217,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>>>>>                                 engines[nengine++] = engine;
>>>>>                 }
>>>>>         } else {
>>>>> +             igt_assert(!(flags & ALL));
>>>>>                 igt_require(gem_has_ring(fd, engine));
>>>>>                 igt_require(gem_can_store_dword(fd, engine));
>>>>>                 engines[nengine++] = engine;
>>>>> @@ -233,11 +236,22 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>>>>>         if (flags & HANG)
>>>>>                 init_hang(&hang);
>>>>>     
>>>>> +     nchild = 1;
>>>>> +     if (flags & FORKED)
>>>>> +             nchild *= sysconf(_SC_NPROCESSORS_ONLN);
>>>>> +     if (flags & ALL)
>>>>> +             nchild *= nengine;
>>>>> +
>>>>>         intel_detect_and_clear_missed_interrupts(fd);
>>>>>         gpu_power_read(&power, &sample[0]);
>>>>> -     igt_fork(child, flags & FORKED ? sysconf(_SC_NPROCESSORS_ONLN) : 1)  {
>>>>> +     igt_fork(child, nchild) {
>>>>>                 unsigned int pass;
>>>>>     
>>>>> +             if (flags & ALL) {
>>>>> +                     engines[0] = engines[child % nengine];
>>>>
>>>> Relying on PIDs being sequential feels fragile but suggesting pipes or
>>>> shared memory would be overkill. How about another loop:
>>>
>>> Where are you getting pid_t from? child is an integer [0, nchild).
>>
>> Add a core helper to get it?
>>
>> I am coming from an angle that I remember some time in the past there
>> was a security thing which randomized pid allocation. TBH I am not sure
>> if that still exists, but if it does then it would not be good for this
>> test. May be moot point to think such security hardening measures would
>> be active on a machine running IGT tests.. hm.. not sure. But it is
>> still a quite hidden assumption.
> 
> But we are not using pid_t here. It is just an array of child processes,
> with each child getting its own engine, using the child index as an
> index.

Oh right.. both is the same. Sorry, context-switching fail on my part..

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 92+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 10/16] i915/gem_exec_whisper: Fork all-engine tests one-per-engine
@ 2019-05-22 11:39             ` Tvrtko Ursulin
  0 siblings, 0 replies; 92+ messages in thread
From: Tvrtko Ursulin @ 2019-05-22 11:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 22/05/2019 11:59, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-16 09:57:08)
>>
>> On 15/05/2019 20:35, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2019-05-14 13:57:26)
>>>>
>>>> On 08/05/2019 11:09, Chris Wilson wrote:
>>>>> Add a new mode for some more stress, submit the all-engines tests
>>>>> simultaneously, a stream per engine.
>>>>>
>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> ---
>>>>>     tests/i915/gem_exec_whisper.c | 27 ++++++++++++++++++++++-----
>>>>>     1 file changed, 22 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
>>>>> index d3e0b0ba2..d5afc8119 100644
>>>>> --- a/tests/i915/gem_exec_whisper.c
>>>>> +++ b/tests/i915/gem_exec_whisper.c
>>>>> @@ -88,6 +88,7 @@ static void verify_reloc(int fd, uint32_t handle,
>>>>>     #define SYNC 0x40
>>>>>     #define PRIORITY 0x80
>>>>>     #define QUEUES 0x100
>>>>> +#define ALL 0x200
>>>>>     
>>>>>     struct hang {
>>>>>         struct drm_i915_gem_exec_object2 obj;
>>>>> @@ -199,6 +200,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>>>>>         uint64_t old_offset;
>>>>>         int i, n, loc;
>>>>>         int debugfs;
>>>>> +     int nchild;
>>>>>     
>>>>>         if (flags & PRIORITY) {
>>>>>                 igt_require(gem_scheduler_enabled(fd));
>>>>> @@ -215,6 +217,7 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>>>>>                                 engines[nengine++] = engine;
>>>>>                 }
>>>>>         } else {
>>>>> +             igt_assert(!(flags & ALL));
>>>>>                 igt_require(gem_has_ring(fd, engine));
>>>>>                 igt_require(gem_can_store_dword(fd, engine));
>>>>>                 engines[nengine++] = engine;
>>>>> @@ -233,11 +236,22 @@ static void whisper(int fd, unsigned engine, unsigned flags)
>>>>>         if (flags & HANG)
>>>>>                 init_hang(&hang);
>>>>>     
>>>>> +     nchild = 1;
>>>>> +     if (flags & FORKED)
>>>>> +             nchild *= sysconf(_SC_NPROCESSORS_ONLN);
>>>>> +     if (flags & ALL)
>>>>> +             nchild *= nengine;
>>>>> +
>>>>>         intel_detect_and_clear_missed_interrupts(fd);
>>>>>         gpu_power_read(&power, &sample[0]);
>>>>> -     igt_fork(child, flags & FORKED ? sysconf(_SC_NPROCESSORS_ONLN) : 1)  {
>>>>> +     igt_fork(child, nchild) {
>>>>>                 unsigned int pass;
>>>>>     
>>>>> +             if (flags & ALL) {
>>>>> +                     engines[0] = engines[child % nengine];
>>>>
>>>> Relying on PIDs being sequential feels fragile but suggesting pipes or
>>>> shared memory would be overkill. How about another loop:
>>>
>>> Where are you getting pid_t from? child is an integer [0, nchild).
>>
>> Add a core helper to get it?
>>
>> I am coming from an angle that I remember some time in the past there
>> was a security thing which randomized pid allocation. TBH I am not sure
>> if that still exists, but if it does then it would not be good for this
>> test. May be moot point to think such security hardening measures would
>> be active on a machine running IGT tests.. hm.. not sure. But it is
>> still a quite hidden assumption.
> 
> But we are not using pid_t here. It is just an array of child processes,
> with each child getting its own engine, using the child index as an
> index.

Oh right.. both is the same. Sorry, context-switching fail on my part..

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 92+ messages in thread

end of thread, other threads:[~2019-05-22 11:39 UTC | newest]

Thread overview: 92+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-08 10:09 [PATCH i-g-t 01/16] i915/gem_exec_schedule: Semaphore priority fixups Chris Wilson
2019-05-08 10:09 ` [igt-dev] " Chris Wilson
2019-05-08 10:09 ` [PATCH i-g-t 02/16] drm-uapi: Import i915_drm.h upto 53073249452d Chris Wilson
2019-05-08 10:09   ` [igt-dev] " Chris Wilson
2019-05-08 10:09 ` [PATCH i-g-t 03/16] i915: Add gem_vm_create Chris Wilson
2019-05-08 10:09   ` [igt-dev] " Chris Wilson
2019-05-08 10:09 ` [PATCH i-g-t 04/16] i915/gem_ctx_param: Test set/get (copy) VM Chris Wilson
2019-05-08 10:09   ` [igt-dev] " Chris Wilson
2019-05-14  9:47   ` Tvrtko Ursulin
2019-05-14  9:47     ` [igt-dev] " Tvrtko Ursulin
2019-05-08 10:09 ` [PATCH i-g-t 05/16] i915/gem_ctx_create: Basic checks for constructor properties Chris Wilson
2019-05-08 10:09   ` [igt-dev] " Chris Wilson
2019-05-14 10:15   ` Tvrtko Ursulin
2019-05-14 10:15     ` [igt-dev] " Tvrtko Ursulin
2019-05-15 19:05     ` Chris Wilson
2019-05-15 19:05       ` [igt-dev] " Chris Wilson
2019-05-16  8:38       ` Tvrtko Ursulin
2019-05-16  8:38         ` [igt-dev] " Tvrtko Ursulin
2019-05-22 10:24         ` Chris Wilson
2019-05-22 10:24           ` [igt-dev] " Chris Wilson
2019-05-22 10:32           ` Tvrtko Ursulin
2019-05-22 10:32             ` [igt-dev] " Tvrtko Ursulin
2019-05-14 12:27   ` Tvrtko Ursulin
2019-05-14 12:27     ` [igt-dev] " Tvrtko Ursulin
2019-05-15 19:06     ` Chris Wilson
2019-05-15 19:06       ` [igt-dev] " Chris Wilson
2019-05-15 19:09       ` Chris Wilson
2019-05-15 19:09         ` [igt-dev] " Chris Wilson
2019-05-08 10:09 ` [PATCH i-g-t 06/16] drm-uapi: Import i915_drm.h upto 364df3d04d51 Chris Wilson
2019-05-08 10:09   ` [igt-dev] " Chris Wilson
2019-05-08 10:09 ` [PATCH i-g-t 07/16] i915: Add gem_ctx_clone Chris Wilson
2019-05-08 10:09   ` [igt-dev] " Chris Wilson
2019-05-14 12:41   ` Tvrtko Ursulin
2019-05-14 12:41     ` [igt-dev] " Tvrtko Ursulin
2019-05-15 19:14     ` Chris Wilson
2019-05-15 19:14       ` [igt-dev] " Chris Wilson
2019-05-08 10:09 ` [PATCH i-g-t 08/16] i915: Exercise creating context with shared GTT Chris Wilson
2019-05-08 10:09   ` [igt-dev] " Chris Wilson
2019-05-15  6:37   ` Tvrtko Ursulin
2019-05-15  6:37     ` [igt-dev] " Tvrtko Ursulin
2019-05-15 19:33     ` Chris Wilson
2019-05-15 19:33       ` [igt-dev] " Chris Wilson
2019-05-16  8:51       ` Tvrtko Ursulin
2019-05-16  8:51         ` [igt-dev] " Tvrtko Ursulin
2019-05-08 10:09 ` [PATCH i-g-t 09/16] i915/gem_ctx_switch: Exercise queues Chris Wilson
2019-05-08 10:09   ` [igt-dev] " Chris Wilson
2019-05-14 12:47   ` Tvrtko Ursulin
2019-05-14 12:47     ` [igt-dev] " Tvrtko Ursulin
2019-05-08 10:09 ` [PATCH i-g-t 10/16] i915/gem_exec_whisper: Fork all-engine tests one-per-engine Chris Wilson
2019-05-08 10:09   ` [igt-dev] " Chris Wilson
2019-05-14 12:57   ` Tvrtko Ursulin
2019-05-14 12:57     ` [igt-dev] " Tvrtko Ursulin
2019-05-15 19:35     ` Chris Wilson
2019-05-15 19:35       ` [igt-dev] " Chris Wilson
2019-05-16  8:57       ` Tvrtko Ursulin
2019-05-16  8:57         ` [igt-dev] " Tvrtko Ursulin
2019-05-22 10:59         ` Chris Wilson
2019-05-22 10:59           ` [igt-dev] " Chris Wilson
2019-05-22 11:39           ` Tvrtko Ursulin
2019-05-22 11:39             ` [igt-dev] " Tvrtko Ursulin
2019-05-08 10:09 ` [PATCH i-g-t 11/16] i915/gem_exec_whisper: debugfs/next_seqno is defunct Chris Wilson
2019-05-08 10:09   ` [Intel-gfx] " Chris Wilson
2019-05-14 12:48   ` Tvrtko Ursulin
2019-05-14 12:48     ` [igt-dev] " Tvrtko Ursulin
2019-05-08 10:09 ` [PATCH i-g-t 12/16] i915: Add gem_ctx_engines Chris Wilson
2019-05-08 10:09   ` [igt-dev] " Chris Wilson
2019-05-08 10:09 ` [PATCH i-g-t 13/16] i915: Add gem_exec_balancer Chris Wilson
2019-05-08 10:09   ` [igt-dev] " Chris Wilson
2019-05-15 10:49   ` Tvrtko Ursulin
2019-05-15 10:49     ` [igt-dev] " Tvrtko Ursulin
2019-05-15 19:50     ` Chris Wilson
2019-05-15 19:50       ` [igt-dev] " Chris Wilson
2019-05-16  9:20       ` Tvrtko Ursulin
2019-05-16  9:20         ` [igt-dev] " Tvrtko Ursulin
2019-05-08 10:09 ` [PATCH i-g-t 14/16] i915/gem_exec_balancer: Exercise bonded pairs Chris Wilson
2019-05-08 10:09   ` [igt-dev] " Chris Wilson
2019-05-15 10:58   ` Tvrtko Ursulin
2019-05-15 10:58     ` [igt-dev] " Tvrtko Ursulin
2019-05-15 19:57     ` Chris Wilson
2019-05-15 19:57       ` [igt-dev] " Chris Wilson
2019-05-15 20:32       ` Chris Wilson
2019-05-15 20:32         ` [igt-dev] " Chris Wilson
2019-05-16  9:28         ` Tvrtko Ursulin
2019-05-16  9:28           ` [igt-dev] " Tvrtko Ursulin
2019-05-08 10:09 ` [PATCH i-g-t 15/16] i915/gem_exec_latency: Measure the latency of context switching Chris Wilson
2019-05-08 10:09   ` [Intel-gfx] " Chris Wilson
2019-05-08 10:09 ` [PATCH i-g-t 16/16] i915/gem_exec_latency: Add another variant of waiter latency Chris Wilson
2019-05-08 10:09   ` [Intel-gfx] " Chris Wilson
2019-05-08 10:41 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,01/16] i915/gem_exec_schedule: Semaphore priority fixups Patchwork
2019-05-08 12:38 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
2019-05-14  9:39 ` [PATCH i-g-t 01/16] " Tvrtko Ursulin
2019-05-14  9:39   ` [igt-dev] " Tvrtko Ursulin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.