All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH igt 1/3] benchmark/gem_busy: Compare polling with syncobj_wait
@ 2017-09-25 20:26 Chris Wilson
  2017-09-25 20:26 ` [PATCH igt 2/3] benchmarks/gem_syslatency: Apply vmpressure, measure page allocation Chris Wilson
                   ` (4 more replies)
  0 siblings, 5 replies; 15+ messages in thread
From: Chris Wilson @ 2017-09-25 20:26 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 benchmarks/gem_busy.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 72 insertions(+), 1 deletion(-)

diff --git a/benchmarks/gem_busy.c b/benchmarks/gem_busy.c
index f050454b..9649ea02 100644
--- a/benchmarks/gem_busy.c
+++ b/benchmarks/gem_busy.c
@@ -58,6 +58,15 @@
 #define DMABUF 0x4
 #define WAIT 0x8
 #define SYNC 0x10
+#define SYNCOBJ 0x20
+
+#define LOCAL_I915_EXEC_FENCE_ARRAY (1 << 19)
+struct local_gem_exec_fence {
+	uint32_t handle;
+	uint32_t flags;
+#define LOCAL_EXEC_FENCE_WAIT (1 << 0)
+#define LOCAL_EXEC_FENCE_SIGNAL (1 << 1)
+};
 
 static void gem_busy(int fd, uint32_t handle)
 {
@@ -109,11 +118,54 @@ static int sync_merge(int fd1, int fd2)
 	return data.fence;
 }
 
+static uint32_t __syncobj_create(int fd)
+{
+	struct local_syncobj_create {
+		uint32_t handle, flags;
+	} arg;
+#define LOCAL_IOCTL_SYNCOBJ_CREATE        DRM_IOWR(0xBF, struct local_syncobj_create)
+
+	memset(&arg, 0, sizeof(arg));
+	ioctl(fd, LOCAL_IOCTL_SYNCOBJ_CREATE, &arg);
+
+	return arg.handle;
+}
+
+static uint32_t syncobj_create(int fd)
+{
+	uint32_t ret;
+
+	igt_assert_neq((ret = __syncobj_create(fd)), 0);
+
+	return ret;
+}
+
+#define LOCAL_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
+#define LOCAL_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
+struct local_syncobj_wait {
+       __u64 handles;
+       /* absolute timeout */
+       __s64 timeout_nsec;
+       __u32 count_handles;
+       __u32 flags;
+       __u32 first_signaled; /* only valid when not waiting all */
+       __u32 pad;
+};
+#define LOCAL_IOCTL_SYNCOBJ_WAIT	DRM_IOWR(0xC3, struct local_syncobj_wait)
+static int __syncobj_wait(int fd, struct local_syncobj_wait *args)
+{
+	int err = 0;
+	if (drmIoctl(fd, LOCAL_IOCTL_SYNCOBJ_WAIT, args))
+		err = -errno;
+	return err;
+}
+
 static int loop(unsigned ring, int reps, int ncpus, unsigned flags)
 {
 	struct drm_i915_gem_execbuffer2 execbuf;
 	struct drm_i915_gem_exec_object2 obj[2];
 	struct drm_i915_gem_relocation_entry reloc[2];
+	struct local_gem_exec_fence syncobj;
 	unsigned engines[16];
 	unsigned nengine;
 	uint32_t *batch;
@@ -126,6 +178,11 @@ static int loop(unsigned ring, int reps, int ncpus, unsigned flags)
 	fd = drm_open_driver(DRIVER_INTEL);
 	gen = intel_gen(intel_get_drm_devid(fd));
 
+	if (flags & SYNCOBJ) {
+		syncobj.handle = syncobj_create(fd);
+		syncobj.flags = LOCAL_EXEC_FENCE_SIGNAL;
+	}
+
 	memset(obj, 0, sizeof(obj));
 	obj[0].handle = gem_create(fd, 4096);
 	if (flags & WRITE)
@@ -144,6 +201,8 @@ static int loop(unsigned ring, int reps, int ncpus, unsigned flags)
 	execbuf.buffer_count = 2;
 	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
 	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+	if (flags & SYNCOBJ)
+		execbuf.flags |= LOCAL_I915_EXEC_FENCE_ARRAY;
 	if (__gem_execbuf(fd, &execbuf)) {
 		execbuf.flags = 0;
 		if (__gem_execbuf(fd, &execbuf))
@@ -235,6 +294,14 @@ static int loop(unsigned ring, int reps, int ncpus, unsigned flags)
 					struct pollfd pfd = { .fd = dmabuf, .events = POLLOUT };
 					for (int inner = 0; inner < 1024; inner++)
 						poll(&pfd, 1, 0);
+				} else if (flags & SYNCOBJ) {
+					struct local_syncobj_wait arg = {
+						.handles = to_user_pointer(&syncobj.handle),
+						.count_handles = 1,
+					};
+
+					for (int inner = 0; inner < 1024; inner++)
+						__syncobj_wait(fd, &arg);
 				} else if (flags & SYNC) {
 					struct pollfd pfd = { .fd = fence, .events = POLLOUT };
 					for (int inner = 0; inner < 1024; inner++)
@@ -275,7 +342,7 @@ int main(int argc, char **argv)
 	int ncpus = 1;
 	int c;
 
-	while ((c = getopt (argc, argv, "e:r:dfswWI")) != -1) {
+	while ((c = getopt (argc, argv, "e:r:dfsSwWI")) != -1) {
 		switch (c) {
 		case 'e':
 			if (strcmp(optarg, "rcs") == 0)
@@ -314,6 +381,10 @@ int main(int argc, char **argv)
 			flags |= SYNC;
 			break;
 
+		case 'S':
+			flags |= SYNCOBJ;
+			break;
+
 		case 'W':
 			flags |= WRITE;
 			break;
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH igt 2/3] benchmarks/gem_syslatency: Apply vmpressure, measure page allocation
  2017-09-25 20:26 [PATCH igt 1/3] benchmark/gem_busy: Compare polling with syncobj_wait Chris Wilson
@ 2017-09-25 20:26 ` Chris Wilson
  2017-09-27  8:52   ` Joonas Lahtinen
  2017-09-25 20:26 ` [PATCH igt 3/3] benchmarks/gem_exec_fault: Update for tryhard kernels Chris Wilson
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2017-09-25 20:26 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 benchmarks/gem_syslatency.c | 86 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 83 insertions(+), 3 deletions(-)

diff --git a/benchmarks/gem_syslatency.c b/benchmarks/gem_syslatency.c
index 4ed23638..b8788497 100644
--- a/benchmarks/gem_syslatency.c
+++ b/benchmarks/gem_syslatency.c
@@ -29,6 +29,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <fcntl.h>
+#include <ftw.h>
 #include <inttypes.h>
 #include <pthread.h>
 #include <sched.h>
@@ -51,6 +52,7 @@ static volatile int done;
 struct gem_busyspin {
 	pthread_t thread;
 	unsigned long count;
+	bool leak;
 };
 
 struct sys_wait {
@@ -93,6 +95,7 @@ static void *gem_busyspin(void *arg)
 	struct gem_busyspin *bs = arg;
 	struct drm_i915_gem_execbuffer2 execbuf;
 	struct drm_i915_gem_exec_object2 obj;
+	const unsigned sz = bs->leak ? 16 << 20 : 4 << 10;
 	unsigned engines[16];
 	unsigned nengine;
 	unsigned engine;
@@ -105,7 +108,7 @@ static void *gem_busyspin(void *arg)
 		if (!ignore_engine(fd, engine)) engines[nengine++] = engine;
 
 	memset(&obj, 0, sizeof(obj));
-	obj.handle = gem_create(fd, 4096);
+	obj.handle = gem_create(fd, sz);
 	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
 
 	memset(&execbuf, 0, sizeof(execbuf));
@@ -125,6 +128,11 @@ static void *gem_busyspin(void *arg)
 			gem_execbuf(fd, &execbuf);
 		}
 		bs->count += nengine;
+		if (bs->leak) {
+			gem_madvise(fd, obj.handle, I915_MADV_DONTNEED);
+			obj.handle = gem_create(fd, sz);
+			gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+		}
 	}
 
 	close(fd);
@@ -180,6 +188,33 @@ static void *sys_wait(void *arg)
 	return NULL;
 }
 
+static void *sys_thp_alloc(void *arg)
+{
+	struct sys_wait *w = arg;
+	struct timespec now;
+
+	clock_gettime(CLOCK_MONOTONIC, &now);
+	while (!done) {
+		const size_t sz = 2 << 20;
+		const struct timespec start = now;
+		void *ptr;
+
+		ptr = mmap(NULL, sz,
+			   PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
+			   -1, 0);
+		assert(ptr != MAP_FAILED);
+		madvise(ptr, sz, MADV_HUGEPAGE);
+		for (int page = 0; page < 2 << 20 >> 12; page++)
+			*((volatile uint32_t *)ptr + (page << 12 >> 2)) = 0;
+		munmap(ptr, sz);
+
+		clock_gettime(CLOCK_MONOTONIC, &now);
+		igt_mean_add(&w->mean, elapsed(&start, &now));
+	}
+
+	return NULL;
+}
+
 static void bind_cpu(pthread_attr_t *attr, int cpu)
 {
 #ifdef __USE_GNU
@@ -229,20 +264,51 @@ static double min_measurement_error(void)
 	return elapsed(&start, &end) / n;
 }
 
+static int print_entry(const char *filepath, const struct stat *info,
+		       const int typeflag, struct FTW *pathinfo)
+{
+	int fd;
+
+	fd = open(filepath, O_RDONLY);
+	if (fd != -1)  {
+		void *ptr;
+
+		ptr = mmap(NULL, info->st_size,
+			   PROT_READ, MAP_SHARED | MAP_POPULATE,
+			   fd, 0);
+		if (ptr != MAP_FAILED)
+			munmap(ptr, info->st_size);
+
+		close(fd);
+	}
+
+	return 0;
+}
+
+static void *background_fs(void *path)
+{
+	while (1)
+		nftw(path, print_entry, 20, FTW_PHYS | FTW_MOUNT);
+	return NULL;
+}
+
 int main(int argc, char **argv)
 {
 	struct gem_busyspin *busy;
 	struct sys_wait *wait;
+	void *sys_fn = sys_wait;
 	pthread_attr_t attr;
+	pthread_t bg_fs = 0;
 	int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
 	igt_stats_t cycles, mean, max;
 	double min;
 	int time = 10;
 	int field = -1;
 	int enable_gem_sysbusy = 1;
+	bool leak = false;
 	int n, c;
 
-	while ((c = getopt(argc, argv, "t:f:n")) != -1) {
+	while ((c = getopt(argc, argv, "t:f:bmn")) != -1) {
 		switch (c) {
 		case 'n': /* dry run, measure baseline system latency */
 			enable_gem_sysbusy = 0;
@@ -257,6 +323,15 @@ int main(int argc, char **argv)
 			/* Select an output field */
 			field = atoi(optarg);
 			break;
+		case 'b':
+			pthread_create(&bg_fs, NULL,
+				       background_fs, (void *)"/");
+			sleep(5);
+			break;
+		case 'm':
+			sys_fn = sys_thp_alloc;
+			leak = true;
+			break;
 		default:
 			break;
 		}
@@ -271,6 +346,7 @@ int main(int argc, char **argv)
 	if (enable_gem_sysbusy) {
 		for (n = 0; n < ncpus; n++) {
 			bind_cpu(&attr, n);
+			busy[n].leak = leak;
 			pthread_create(&busy[n].thread, &attr,
 				       gem_busyspin, &busy[n]);
 		}
@@ -282,7 +358,7 @@ int main(int argc, char **argv)
 	for (n = 0; n < ncpus; n++) {
 		igt_mean_init(&wait[n].mean);
 		bind_cpu(&attr, n);
-		pthread_create(&wait[n].thread, &attr, sys_wait, &wait[n]);
+		pthread_create(&wait[n].thread, &attr, sys_fn, &wait[n]);
 	}
 
 	sleep(time);
@@ -303,6 +379,10 @@ int main(int argc, char **argv)
 		igt_stats_push_float(&mean, wait[n].mean.mean);
 		igt_stats_push_float(&max, wait[n].mean.max);
 	}
+	if (bg_fs) {
+		pthread_cancel(bg_fs);
+		pthread_join(bg_fs, NULL);
+	}
 
 	switch (field) {
 	default:
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH igt 3/3] benchmarks/gem_exec_fault: Update for tryhard kernels.
  2017-09-25 20:26 [PATCH igt 1/3] benchmark/gem_busy: Compare polling with syncobj_wait Chris Wilson
  2017-09-25 20:26 ` [PATCH igt 2/3] benchmarks/gem_syslatency: Apply vmpressure, measure page allocation Chris Wilson
@ 2017-09-25 20:26 ` Chris Wilson
  2017-09-26 10:56   ` Matthew Auld
  2017-09-25 21:06 ` ✓ Fi.CI.BAT: success for series starting with [1/3] benchmark/gem_busy: Compare polling with syncobj_wait Patchwork
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2017-09-25 20:26 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
---
 benchmarks/gem_exec_fault.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/benchmarks/gem_exec_fault.c b/benchmarks/gem_exec_fault.c
index a5446ea1..21bdbc9b 100644
--- a/benchmarks/gem_exec_fault.c
+++ b/benchmarks/gem_exec_fault.c
@@ -60,15 +60,15 @@ static double elapsed(const struct timespec *start,
 	return (end->tv_sec - start->tv_sec) + 1e-9*(end->tv_nsec - start->tv_nsec);
 }
 
-static uint32_t batch(int fd, int size)
+static uint32_t batch(int fd, uint64_t size)
 {
-	const uint32_t buf[] = {MI_BATCH_BUFFER_END};
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
 	uint32_t handle = gem_create(fd, size);
-	gem_write(fd, handle, 0, buf, sizeof(buf));
+	gem_write(fd, handle, 0, &bbe, sizeof(bbe));
 	return handle;
 }
 
-static int loop(int size, unsigned ring, int reps, int ncpus, unsigned flags)
+static int loop(uint64_t size, unsigned ring, int reps, int ncpus, unsigned flags)
 {
 	struct drm_i915_gem_execbuffer2 execbuf;
 	struct drm_i915_gem_exec_object2 obj;
@@ -82,7 +82,7 @@ static int loop(int size, unsigned ring, int reps, int ncpus, unsigned flags)
 	fd = drm_open_driver(DRIVER_INTEL);
 
 	memset(&obj, 0, sizeof(obj));
-	obj.handle = batch(fd, size);
+	obj.handle = batch(fd, 4096);
 
 	memset(&execbuf, 0, sizeof(execbuf));
 	execbuf.buffers_ptr = (uintptr_t)&obj;
@@ -94,7 +94,7 @@ static int loop(int size, unsigned ring, int reps, int ncpus, unsigned flags)
 		if (__gem_execbuf(fd, &execbuf))
 			return 77;
 	}
-	gem_close(fd, obj.handle);
+	/* let the small object leak; ideally blocking the low address */
 
 	nengine = 0;
 	if (ring == -1) {
@@ -107,6 +107,9 @@ static int loop(int size, unsigned ring, int reps, int ncpus, unsigned flags)
 	} else
 		engines[nengine++] = ring;
 
+	if (size > 1ul << 31)
+		obj.flags |= 1 << 3;
+
 	while (reps--) {
 		memset(shared, 0, 4096);
 
@@ -115,6 +118,7 @@ static int loop(int size, unsigned ring, int reps, int ncpus, unsigned flags)
 			unsigned count = 0;
 
 			obj.handle = batch(fd, size);
+			obj.offset = -1;
 
 			clock_gettime(CLOCK_MONOTONIC, &start);
 			do {
@@ -152,7 +156,7 @@ int main(int argc, char **argv)
 {
 	unsigned ring = I915_EXEC_RENDER;
 	unsigned flags = 0;
-	int size = 4096;
+	uint64_t size = 4096;
 	int reps = 1;
 	int ncpus = 1;
 	int c;
@@ -185,7 +189,7 @@ int main(int argc, char **argv)
 			break;
 
 		case 's':
-			size = atoi(optarg);
+			size = strtoull(optarg, NULL, 0);
 			if (size < 4096)
 				size = 4096;
 			break;
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/3] benchmark/gem_busy: Compare polling with syncobj_wait
  2017-09-25 20:26 [PATCH igt 1/3] benchmark/gem_busy: Compare polling with syncobj_wait Chris Wilson
  2017-09-25 20:26 ` [PATCH igt 2/3] benchmarks/gem_syslatency: Apply vmpressure, measure page allocation Chris Wilson
  2017-09-25 20:26 ` [PATCH igt 3/3] benchmarks/gem_exec_fault: Update for tryhard kernels Chris Wilson
@ 2017-09-25 21:06 ` Patchwork
  2017-09-26  2:35 ` ✓ Fi.CI.IGT: " Patchwork
  2017-09-28  6:53 ` [PATCH igt 1/3] " Tvrtko Ursulin
  4 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2017-09-25 21:06 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] benchmark/gem_busy: Compare polling with syncobj_wait
URL   : https://patchwork.freedesktop.org/series/30858/
State : success

== Summary ==

IGT patchset tested on top of latest successful build
c117213c06d0f47937c1f225ebead5e1fe8c7a0e igt/gem_exec_whisper: Smoketest context priorities

with latest DRM-Tip kernel build CI_DRM_3131
f4eb3c100b0b drm-tip: 2017y-09m-25d-19h-36m-33s UTC integration manifest

Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-c:
                incomplete -> PASS       (fi-kbl-r)
Test pm_rpm:
        Subgroup basic-rte:
                pass       -> DMESG-WARN (fi-cfl-s) fdo#102294
Test drv_module_reload:
        Subgroup basic-reload-inject:
                dmesg-warn -> PASS       (fi-glk-1) fdo#102777

fdo#102294 https://bugs.freedesktop.org/show_bug.cgi?id=102294
fdo#102777 https://bugs.freedesktop.org/show_bug.cgi?id=102777

fi-bdw-5557u     total:289  pass:268  dwarn:0   dfail:0   fail:0   skip:21  time:443s
fi-bdw-gvtdvm    total:289  pass:265  dwarn:0   dfail:0   fail:0   skip:24  time:466s
fi-blb-e6850     total:289  pass:224  dwarn:1   dfail:0   fail:0   skip:64  time:423s
fi-bsw-n3050     total:289  pass:243  dwarn:0   dfail:0   fail:0   skip:46  time:523s
fi-bwr-2160      total:289  pass:184  dwarn:0   dfail:0   fail:0   skip:105 time:281s
fi-bxt-j4205     total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:501s
fi-byt-j1900     total:289  pass:254  dwarn:1   dfail:0   fail:0   skip:34  time:500s
fi-byt-n2820     total:289  pass:250  dwarn:1   dfail:0   fail:0   skip:38  time:499s
fi-cfl-s         total:289  pass:222  dwarn:35  dfail:0   fail:0   skip:32  time:540s
fi-cnl-y         total:289  pass:257  dwarn:0   dfail:0   fail:5   skip:27  time:675s
fi-elk-e7500     total:289  pass:230  dwarn:0   dfail:0   fail:0   skip:59  time:415s
fi-glk-1         total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:567s
fi-hsw-4770      total:289  pass:263  dwarn:0   dfail:0   fail:0   skip:26  time:426s
fi-hsw-4770r     total:289  pass:263  dwarn:0   dfail:0   fail:0   skip:26  time:405s
fi-ilk-650       total:289  pass:229  dwarn:0   dfail:0   fail:0   skip:60  time:431s
fi-ivb-3520m     total:289  pass:261  dwarn:0   dfail:0   fail:0   skip:28  time:494s
fi-ivb-3770      total:289  pass:261  dwarn:0   dfail:0   fail:0   skip:28  time:465s
fi-kbl-7500u     total:289  pass:264  dwarn:1   dfail:0   fail:0   skip:24  time:478s
fi-kbl-7560u     total:289  pass:270  dwarn:0   dfail:0   fail:0   skip:19  time:577s
fi-kbl-r         total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:586s
fi-pnv-d510      total:289  pass:223  dwarn:1   dfail:0   fail:0   skip:65  time:539s
fi-skl-6260u     total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:451s
fi-skl-6770hq    total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:495s
fi-skl-gvtdvm    total:289  pass:266  dwarn:0   dfail:0   fail:0   skip:23  time:479s
fi-snb-2520m     total:289  pass:251  dwarn:0   dfail:0   fail:0   skip:38  time:564s
fi-snb-2600      total:289  pass:250  dwarn:0   dfail:0   fail:0   skip:39  time:417s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_250/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✓ Fi.CI.IGT: success for series starting with [1/3] benchmark/gem_busy: Compare polling with syncobj_wait
  2017-09-25 20:26 [PATCH igt 1/3] benchmark/gem_busy: Compare polling with syncobj_wait Chris Wilson
                   ` (2 preceding siblings ...)
  2017-09-25 21:06 ` ✓ Fi.CI.BAT: success for series starting with [1/3] benchmark/gem_busy: Compare polling with syncobj_wait Patchwork
@ 2017-09-26  2:35 ` Patchwork
  2017-09-28  6:53 ` [PATCH igt 1/3] " Tvrtko Ursulin
  4 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2017-09-26  2:35 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] benchmark/gem_busy: Compare polling with syncobj_wait
URL   : https://patchwork.freedesktop.org/series/30858/
State : success

== Summary ==

Test prime_mmap:
        Subgroup test_userptr:
                dmesg-warn -> PASS       (shard-hsw) fdo#102939
Test perf:
        Subgroup polling:
                pass       -> FAIL       (shard-hsw) fdo#102252 +1
Test gem_eio:
        Subgroup wait:
                dmesg-warn -> PASS       (shard-hsw) fdo#102886 +1

fdo#102939 https://bugs.freedesktop.org/show_bug.cgi?id=102939
fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252
fdo#102886 https://bugs.freedesktop.org/show_bug.cgi?id=102886

shard-hsw        total:2429 pass:1325 dwarn:4   dfail:0   fail:17  skip:1083 time:9938s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_250/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH igt 3/3] benchmarks/gem_exec_fault: Update for tryhard kernels.
  2017-09-25 20:26 ` [PATCH igt 3/3] benchmarks/gem_exec_fault: Update for tryhard kernels Chris Wilson
@ 2017-09-26 10:56   ` Matthew Auld
  0 siblings, 0 replies; 15+ messages in thread
From: Matthew Auld @ 2017-09-26 10:56 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development

On 25 September 2017 at 21:26, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH igt 2/3] benchmarks/gem_syslatency: Apply vmpressure, measure page allocation
  2017-09-25 20:26 ` [PATCH igt 2/3] benchmarks/gem_syslatency: Apply vmpressure, measure page allocation Chris Wilson
@ 2017-09-27  8:52   ` Joonas Lahtinen
  2017-09-27  9:36     ` Chris Wilson
  2017-09-27  9:41     ` Chris Wilson
  0 siblings, 2 replies; 15+ messages in thread
From: Joonas Lahtinen @ 2017-09-27  8:52 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Mon, 2017-09-25 at 21:26 +0100, Chris Wilson wrote:
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  benchmarks/gem_syslatency.c | 86 +++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 83 insertions(+), 3 deletions(-)
> 
> diff --git a/benchmarks/gem_syslatency.c b/benchmarks/gem_syslatency.c
> index 4ed23638..b8788497 100644
> --- a/benchmarks/gem_syslatency.c
> +++ b/benchmarks/gem_syslatency.c
> @@ -29,6 +29,7 @@
>  #include <stdio.h>
>  #include <string.h>
>  #include <fcntl.h>
> +#include <ftw.h>
>  #include <inttypes.h>
>  #include <pthread.h>
>  #include <sched.h>
> @@ -51,6 +52,7 @@ static volatile int done;
>  struct gem_busyspin {
>  	pthread_t thread;
>  	unsigned long count;
> +	bool leak;
>  };

I know we all binary arithmetic, but can I still ask


#define M()
#define K()

Or something.

>  
>  struct sys_wait {
> @@ -93,6 +95,7 @@ static void *gem_busyspin(void *arg)
>  	struct gem_busyspin *bs = arg;
>  	struct drm_i915_gem_execbuffer2 execbuf;
>  	struct drm_i915_gem_exec_object2 obj;
> +	const unsigned sz = bs->leak ? 16 << 20 : 4 << 10;

Beause, this is quite OK still.

> @@ -180,6 +188,33 @@ static void *sys_wait(void *arg)
>  	return NULL;
>  }
>  
> +static void *sys_thp_alloc(void *arg)
> +{
> +	struct sys_wait *w = arg;
> +	struct timespec now;
> +
> +	clock_gettime(CLOCK_MONOTONIC, &now);
> +	while (!done) {
> +		const size_t sz = 2 << 20;
> +		const struct timespec start = now;
> +		void *ptr;
> +
> +		ptr = mmap(NULL, sz,
> +			   PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
> +			   -1, 0);
> +		assert(ptr != MAP_FAILED);
> +		madvise(ptr, sz, MADV_HUGEPAGE);
> +		for (int page = 0; page < 2 << 20 >> 12; page++)
> +			*((volatile uint32_t *)ptr + (page << 12 >> 2)) = 0;

But what's the point in this iteration, we iterate from 0 to 512 page
index (sz/PAGE_SIZE would be so much easier) and then write to to not
each page but interleave four page writes per page and 3/4 of pages
never get written? If this is intentional, please drop a comment.

Other than that,

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH igt 2/3] benchmarks/gem_syslatency: Apply vmpressure, measure page allocation
  2017-09-27  8:52   ` Joonas Lahtinen
@ 2017-09-27  9:36     ` Chris Wilson
  2017-09-27  9:41     ` Chris Wilson
  1 sibling, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2017-09-27  9:36 UTC (permalink / raw)
  To: Joonas Lahtinen, intel-gfx

Quoting Joonas Lahtinen (2017-09-27 09:52:42)
> On Mon, 2017-09-25 at 21:26 +0100, Chris Wilson wrote:
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  benchmarks/gem_syslatency.c | 86 +++++++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 83 insertions(+), 3 deletions(-)
> > 
> > diff --git a/benchmarks/gem_syslatency.c b/benchmarks/gem_syslatency.c
> > index 4ed23638..b8788497 100644
> > --- a/benchmarks/gem_syslatency.c
> > +++ b/benchmarks/gem_syslatency.c
> > @@ -29,6 +29,7 @@
> >  #include <stdio.h>
> >  #include <string.h>
> >  #include <fcntl.h>
> > +#include <ftw.h>
> >  #include <inttypes.h>
> >  #include <pthread.h>
> >  #include <sched.h>
> > @@ -51,6 +52,7 @@ static volatile int done;
> >  struct gem_busyspin {
> >       pthread_t thread;
> >       unsigned long count;
> > +     bool leak;
> >  };
> 
> I know we all binary arithmetic, but can I still ask
> 
> 
> #define M()
> #define K()
> 
> Or something.
> 
> >  
> >  struct sys_wait {
> > @@ -93,6 +95,7 @@ static void *gem_busyspin(void *arg)
> >       struct gem_busyspin *bs = arg;
> >       struct drm_i915_gem_execbuffer2 execbuf;
> >       struct drm_i915_gem_exec_object2 obj;
> > +     const unsigned sz = bs->leak ? 16 << 20 : 4 << 10;
> 
> Beause, this is quite OK still.
> 
> > @@ -180,6 +188,33 @@ static void *sys_wait(void *arg)
> >       return NULL;
> >  }
> >  
> > +static void *sys_thp_alloc(void *arg)
> > +{
> > +     struct sys_wait *w = arg;
> > +     struct timespec now;
> > +
> > +     clock_gettime(CLOCK_MONOTONIC, &now);
> > +     while (!done) {
> > +             const size_t sz = 2 << 20;
> > +             const struct timespec start = now;
> > +             void *ptr;
> > +
> > +             ptr = mmap(NULL, sz,
> > +                        PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
> > +                        -1, 0);
> > +             assert(ptr != MAP_FAILED);
> > +             madvise(ptr, sz, MADV_HUGEPAGE);
> > +             for (int page = 0; page < 2 << 20 >> 12; page++)
> > +                     *((volatile uint32_t *)ptr + (page << 12 >> 2)) = 0;
> 
> But what's the point in this iteration, we iterate from 0 to 512 page
> index (sz/PAGE_SIZE would be so much easier) and then write to to not
> each page but interleave four page writes per page and 3/4 of pages
> never get written? If this is intentional, please drop a comment.

:) As you later realised, there's an implicit <<2 from the pointer
arithmetic.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH igt 2/3] benchmarks/gem_syslatency: Apply vmpressure, measure page allocation
  2017-09-27  8:52   ` Joonas Lahtinen
  2017-09-27  9:36     ` Chris Wilson
@ 2017-09-27  9:41     ` Chris Wilson
  2017-09-27 10:02       ` Joonas Lahtinen
  1 sibling, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2017-09-27  9:41 UTC (permalink / raw)
  To: Joonas Lahtinen, intel-gfx

Quoting Joonas Lahtinen (2017-09-27 09:52:42)
> On Mon, 2017-09-25 at 21:26 +0100, Chris Wilson wrote:
> > +static void *sys_thp_alloc(void *arg)
> > +{
> > +     struct sys_wait *w = arg;
> > +     struct timespec now;
> > +
> > +     clock_gettime(CLOCK_MONOTONIC, &now);
> > +     while (!done) {
> > +             const size_t sz = 2 << 20;
> > +             const struct timespec start = now;
> > +             void *ptr;
> > +
> > +             ptr = mmap(NULL, sz,
> > +                        PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
> > +                        -1, 0);
> > +             assert(ptr != MAP_FAILED);
> > +             madvise(ptr, sz, MADV_HUGEPAGE);
> > +             for (int page = 0; page < 2 << 20 >> 12; page++)
> > +                     *((volatile uint32_t *)ptr + (page << 12 >> 2)) = 0;
> 
> But what's the point in this iteration, we iterate from 0 to 512 page
> index (sz/PAGE_SIZE would be so much easier) and then write to to not
> each page but interleave four page writes per page and 3/4 of pages
> never get written? If this is intentional, please drop a comment.

-               for (int page = 0; page < 2 << 20 >> 12; page++)
-                       *((volatile uint32_t *)ptr + (page << 12 >> 2)) = 0;
+               for (size_t page = 0; page < sz; page += PAGE_SIZE)
+                       *(volatile uint32_t *)(ptr + page) = 0;
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH igt 2/3] benchmarks/gem_syslatency: Apply vmpressure, measure page allocation
  2017-09-27  9:41     ` Chris Wilson
@ 2017-09-27 10:02       ` Joonas Lahtinen
  2017-09-27 11:07         ` Chris Wilson
  0 siblings, 1 reply; 15+ messages in thread
From: Joonas Lahtinen @ 2017-09-27 10:02 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Wed, 2017-09-27 at 10:41 +0100, Chris Wilson wrote:
> Quoting Joonas Lahtinen (2017-09-27 09:52:42)
> > On Mon, 2017-09-25 at 21:26 +0100, Chris Wilson wrote:
> > > +static void *sys_thp_alloc(void *arg)
> > > +{
> > > +     struct sys_wait *w = arg;
> > > +     struct timespec now;
> > > +
> > > +     clock_gettime(CLOCK_MONOTONIC, &now);
> > > +     while (!done) {
> > > +             const size_t sz = 2 << 20;
> > > +             const struct timespec start = now;
> > > +             void *ptr;
> > > +
> > > +             ptr = mmap(NULL, sz,
> > > +                        PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
> > > +                        -1, 0);
> > > +             assert(ptr != MAP_FAILED);
> > > +             madvise(ptr, sz, MADV_HUGEPAGE);
> > > +             for (int page = 0; page < 2 << 20 >> 12; page++)
> > > +                     *((volatile uint32_t *)ptr + (page << 12 >> 2)) = 0;
> > 
> > But what's the point in this iteration, we iterate from 0 to 512 page
> > index (sz/PAGE_SIZE would be so much easier) and then write to to not
> > each page but interleave four page writes per page and 3/4 of pages
> > never get written? If this is intentional, please drop a comment.
> 
> -               for (int page = 0; page < 2 << 20 >> 12; page++)
> -                       *((volatile uint32_t *)ptr + (page << 12 >> 2)) = 0;

Yes, why not write /sizeof() like civilized people do :P

> +               for (size_t page = 0; page < sz; page += PAGE_SIZE)
> +                       *(volatile uint32_t *)(ptr + page) = 0;

Thats much more clear.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

It seems program usage info is not a hot feature for benchmarks.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH igt 2/3] benchmarks/gem_syslatency: Apply vmpressure, measure page allocation
  2017-09-27 10:02       ` Joonas Lahtinen
@ 2017-09-27 11:07         ` Chris Wilson
  0 siblings, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2017-09-27 11:07 UTC (permalink / raw)
  To: Joonas Lahtinen, intel-gfx

Quoting Joonas Lahtinen (2017-09-27 11:02:08)
> On Wed, 2017-09-27 at 10:41 +0100, Chris Wilson wrote:
> > Quoting Joonas Lahtinen (2017-09-27 09:52:42)
> > > On Mon, 2017-09-25 at 21:26 +0100, Chris Wilson wrote:
> > > > +static void *sys_thp_alloc(void *arg)
> > > > +{
> > > > +     struct sys_wait *w = arg;
> > > > +     struct timespec now;
> > > > +
> > > > +     clock_gettime(CLOCK_MONOTONIC, &now);
> > > > +     while (!done) {
> > > > +             const size_t sz = 2 << 20;
> > > > +             const struct timespec start = now;
> > > > +             void *ptr;
> > > > +
> > > > +             ptr = mmap(NULL, sz,
> > > > +                        PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
> > > > +                        -1, 0);
> > > > +             assert(ptr != MAP_FAILED);
> > > > +             madvise(ptr, sz, MADV_HUGEPAGE);
> > > > +             for (int page = 0; page < 2 << 20 >> 12; page++)
> > > > +                     *((volatile uint32_t *)ptr + (page << 12 >> 2)) = 0;
> > > 
> > > But what's the point in this iteration, we iterate from 0 to 512 page
> > > index (sz/PAGE_SIZE would be so much easier) and then write to to not
> > > each page but interleave four page writes per page and 3/4 of pages
> > > never get written? If this is intentional, please drop a comment.
> > 
> > -               for (int page = 0; page < 2 << 20 >> 12; page++)
> > -                       *((volatile uint32_t *)ptr + (page << 12 >> 2)) = 0;
> 
> Yes, why not write /sizeof() like civilized people do :P
> 
> > +               for (size_t page = 0; page < sz; page += PAGE_SIZE)
> > +                       *(volatile uint32_t *)(ptr + page) = 0;
> 
> Thats much more clear.
> 
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> 
> It seems program usage info is not a hot feature for benchmarks.

Indeed. The intention is that you never run these directly but through
an ezbench wrapper. Paging Martin.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH igt 1/3] benchmark/gem_busy: Compare polling with syncobj_wait
  2017-09-25 20:26 [PATCH igt 1/3] benchmark/gem_busy: Compare polling with syncobj_wait Chris Wilson
                   ` (3 preceding siblings ...)
  2017-09-26  2:35 ` ✓ Fi.CI.IGT: " Patchwork
@ 2017-09-28  6:53 ` Tvrtko Ursulin
  2017-09-28  9:07   ` Chris Wilson
  4 siblings, 1 reply; 15+ messages in thread
From: Tvrtko Ursulin @ 2017-09-28  6:53 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/09/2017 21:26, Chris Wilson wrote:
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   benchmarks/gem_busy.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++-
>   1 file changed, 72 insertions(+), 1 deletion(-)
> 
> diff --git a/benchmarks/gem_busy.c b/benchmarks/gem_busy.c
> index f050454b..9649ea02 100644
> --- a/benchmarks/gem_busy.c
> +++ b/benchmarks/gem_busy.c
> @@ -58,6 +58,15 @@
>   #define DMABUF 0x4
>   #define WAIT 0x8
>   #define SYNC 0x10
> +#define SYNCOBJ 0x20
> +
> +#define LOCAL_I915_EXEC_FENCE_ARRAY (1 << 19)
> +struct local_gem_exec_fence {
> +	uint32_t handle;
> +	uint32_t flags;
> +#define LOCAL_EXEC_FENCE_WAIT (1 << 0)
> +#define LOCAL_EXEC_FENCE_SIGNAL (1 << 1)
> +};
>   
>   static void gem_busy(int fd, uint32_t handle)
>   {
> @@ -109,11 +118,54 @@ static int sync_merge(int fd1, int fd2)
>   	return data.fence;
>   }
>   
> +static uint32_t __syncobj_create(int fd)
> +{
> +	struct local_syncobj_create {
> +		uint32_t handle, flags;
> +	} arg;
> +#define LOCAL_IOCTL_SYNCOBJ_CREATE        DRM_IOWR(0xBF, struct local_syncobj_create)
> +
> +	memset(&arg, 0, sizeof(arg));
> +	ioctl(fd, LOCAL_IOCTL_SYNCOBJ_CREATE, &arg);
> +
> +	return arg.handle;
> +}
> +
> +static uint32_t syncobj_create(int fd)
> +{
> +	uint32_t ret;
> +
> +	igt_assert_neq((ret = __syncobj_create(fd)), 0);
> +
> +	return ret;
> +}
> +
> +#define LOCAL_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
> +#define LOCAL_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
> +struct local_syncobj_wait {
> +       __u64 handles;
> +       /* absolute timeout */
> +       __s64 timeout_nsec;
> +       __u32 count_handles;
> +       __u32 flags;
> +       __u32 first_signaled; /* only valid when not waiting all */
> +       __u32 pad;
> +};
> +#define LOCAL_IOCTL_SYNCOBJ_WAIT	DRM_IOWR(0xC3, struct local_syncobj_wait)
> +static int __syncobj_wait(int fd, struct local_syncobj_wait *args)
> +{
> +	int err = 0;
> +	if (drmIoctl(fd, LOCAL_IOCTL_SYNCOBJ_WAIT, args))
> +		err = -errno;
> +	return err;
> +}
> +
>   static int loop(unsigned ring, int reps, int ncpus, unsigned flags)
>   {
>   	struct drm_i915_gem_execbuffer2 execbuf;
>   	struct drm_i915_gem_exec_object2 obj[2];
>   	struct drm_i915_gem_relocation_entry reloc[2];
> +	struct local_gem_exec_fence syncobj;
>   	unsigned engines[16];
>   	unsigned nengine;
>   	uint32_t *batch;
> @@ -126,6 +178,11 @@ static int loop(unsigned ring, int reps, int ncpus, unsigned flags)
>   	fd = drm_open_driver(DRIVER_INTEL);
>   	gen = intel_gen(intel_get_drm_devid(fd));
>   
> +	if (flags & SYNCOBJ) {
> +		syncobj.handle = syncobj_create(fd);
> +		syncobj.flags = LOCAL_EXEC_FENCE_SIGNAL;
> +	}
> +
>   	memset(obj, 0, sizeof(obj));
>   	obj[0].handle = gem_create(fd, 4096);
>   	if (flags & WRITE)
> @@ -144,6 +201,8 @@ static int loop(unsigned ring, int reps, int ncpus, unsigned flags)
>   	execbuf.buffer_count = 2;
>   	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
>   	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
> +	if (flags & SYNCOBJ)
> +		execbuf.flags |= LOCAL_I915_EXEC_FENCE_ARRAY;

According to the comment in i915_drm.h, when this is specified, syncobj 
should be also passed in in cliprects_ptr but that's not happening?

Regards,

Tvrtko

>   	if (__gem_execbuf(fd, &execbuf)) {
>   		execbuf.flags = 0;
>   		if (__gem_execbuf(fd, &execbuf))
> @@ -235,6 +294,14 @@ static int loop(unsigned ring, int reps, int ncpus, unsigned flags)
>   					struct pollfd pfd = { .fd = dmabuf, .events = POLLOUT };
>   					for (int inner = 0; inner < 1024; inner++)
>   						poll(&pfd, 1, 0);
> +				} else if (flags & SYNCOBJ) {
> +					struct local_syncobj_wait arg = {
> +						.handles = to_user_pointer(&syncobj.handle),
> +						.count_handles = 1,
> +					};
> +
> +					for (int inner = 0; inner < 1024; inner++)
> +						__syncobj_wait(fd, &arg);
>   				} else if (flags & SYNC) {
>   					struct pollfd pfd = { .fd = fence, .events = POLLOUT };
>   					for (int inner = 0; inner < 1024; inner++)
> @@ -275,7 +342,7 @@ int main(int argc, char **argv)
>   	int ncpus = 1;
>   	int c;
>   
> -	while ((c = getopt (argc, argv, "e:r:dfswWI")) != -1) {
> +	while ((c = getopt (argc, argv, "e:r:dfsSwWI")) != -1) {
>   		switch (c) {
>   		case 'e':
>   			if (strcmp(optarg, "rcs") == 0)
> @@ -314,6 +381,10 @@ int main(int argc, char **argv)
>   			flags |= SYNC;
>   			break;
>   
> +		case 'S':
> +			flags |= SYNCOBJ;
> +			break;
> +
>   		case 'W':
>   			flags |= WRITE;
>   			break;
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH igt 1/3] benchmark/gem_busy: Compare polling with syncobj_wait
  2017-09-28  6:53 ` [PATCH igt 1/3] " Tvrtko Ursulin
@ 2017-09-28  9:07   ` Chris Wilson
  2017-09-28  9:16     ` Tvrtko Ursulin
  0 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2017-09-28  9:07 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2017-09-28 07:53:56)
> 
> On 25/09/2017 21:26, Chris Wilson wrote:
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   benchmarks/gem_busy.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++-
> >   1 file changed, 72 insertions(+), 1 deletion(-)
> > 
> > diff --git a/benchmarks/gem_busy.c b/benchmarks/gem_busy.c
> > index f050454b..9649ea02 100644
> > --- a/benchmarks/gem_busy.c
> > +++ b/benchmarks/gem_busy.c
> > @@ -58,6 +58,15 @@
> >   #define DMABUF 0x4
> >   #define WAIT 0x8
> >   #define SYNC 0x10
> > +#define SYNCOBJ 0x20
> > +
> > +#define LOCAL_I915_EXEC_FENCE_ARRAY (1 << 19)
> > +struct local_gem_exec_fence {
> > +     uint32_t handle;
> > +     uint32_t flags;
> > +#define LOCAL_EXEC_FENCE_WAIT (1 << 0)
> > +#define LOCAL_EXEC_FENCE_SIGNAL (1 << 1)
> > +};
> >   
> >   static void gem_busy(int fd, uint32_t handle)
> >   {
> > @@ -109,11 +118,54 @@ static int sync_merge(int fd1, int fd2)
> >       return data.fence;
> >   }
> >   
> > +static uint32_t __syncobj_create(int fd)
> > +{
> > +     struct local_syncobj_create {
> > +             uint32_t handle, flags;
> > +     } arg;
> > +#define LOCAL_IOCTL_SYNCOBJ_CREATE        DRM_IOWR(0xBF, struct local_syncobj_create)
> > +
> > +     memset(&arg, 0, sizeof(arg));
> > +     ioctl(fd, LOCAL_IOCTL_SYNCOBJ_CREATE, &arg);
> > +
> > +     return arg.handle;
> > +}
> > +
> > +static uint32_t syncobj_create(int fd)
> > +{
> > +     uint32_t ret;
> > +
> > +     igt_assert_neq((ret = __syncobj_create(fd)), 0);
> > +
> > +     return ret;
> > +}
> > +
> > +#define LOCAL_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
> > +#define LOCAL_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
> > +struct local_syncobj_wait {
> > +       __u64 handles;
> > +       /* absolute timeout */
> > +       __s64 timeout_nsec;
> > +       __u32 count_handles;
> > +       __u32 flags;
> > +       __u32 first_signaled; /* only valid when not waiting all */
> > +       __u32 pad;
> > +};
> > +#define LOCAL_IOCTL_SYNCOBJ_WAIT     DRM_IOWR(0xC3, struct local_syncobj_wait)
> > +static int __syncobj_wait(int fd, struct local_syncobj_wait *args)
> > +{
> > +     int err = 0;
> > +     if (drmIoctl(fd, LOCAL_IOCTL_SYNCOBJ_WAIT, args))
> > +             err = -errno;
> > +     return err;
> > +}
> > +
> >   static int loop(unsigned ring, int reps, int ncpus, unsigned flags)
> >   {
> >       struct drm_i915_gem_execbuffer2 execbuf;
> >       struct drm_i915_gem_exec_object2 obj[2];
> >       struct drm_i915_gem_relocation_entry reloc[2];
> > +     struct local_gem_exec_fence syncobj;
> >       unsigned engines[16];
> >       unsigned nengine;
> >       uint32_t *batch;
> > @@ -126,6 +178,11 @@ static int loop(unsigned ring, int reps, int ncpus, unsigned flags)
> >       fd = drm_open_driver(DRIVER_INTEL);
> >       gen = intel_gen(intel_get_drm_devid(fd));
> >   
> > +     if (flags & SYNCOBJ) {
> > +             syncobj.handle = syncobj_create(fd);
> > +             syncobj.flags = LOCAL_EXEC_FENCE_SIGNAL;
> > +     }
> > +
> >       memset(obj, 0, sizeof(obj));
> >       obj[0].handle = gem_create(fd, 4096);
> >       if (flags & WRITE)
> > @@ -144,6 +201,8 @@ static int loop(unsigned ring, int reps, int ncpus, unsigned flags)
> >       execbuf.buffer_count = 2;
> >       execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
> >       execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
> > +     if (flags & SYNCOBJ)
> > +             execbuf.flags |= LOCAL_I915_EXEC_FENCE_ARRAY;
> 
> According to the comment in i915_drm.h, when this is specified, syncobj 
> should be also passed in in cliprects_ptr but that's not happening?

You want -b support as well! :)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH igt 1/3] benchmark/gem_busy: Compare polling with syncobj_wait
  2017-09-28  9:07   ` Chris Wilson
@ 2017-09-28  9:16     ` Tvrtko Ursulin
  2017-09-28  9:50       ` Chris Wilson
  0 siblings, 1 reply; 15+ messages in thread
From: Tvrtko Ursulin @ 2017-09-28  9:16 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 28/09/2017 10:07, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2017-09-28 07:53:56)
>>
>> On 25/09/2017 21:26, Chris Wilson wrote:
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    benchmarks/gem_busy.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++-
>>>    1 file changed, 72 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/benchmarks/gem_busy.c b/benchmarks/gem_busy.c
>>> index f050454b..9649ea02 100644
>>> --- a/benchmarks/gem_busy.c
>>> +++ b/benchmarks/gem_busy.c
>>> @@ -58,6 +58,15 @@
>>>    #define DMABUF 0x4
>>>    #define WAIT 0x8
>>>    #define SYNC 0x10
>>> +#define SYNCOBJ 0x20
>>> +
>>> +#define LOCAL_I915_EXEC_FENCE_ARRAY (1 << 19)
>>> +struct local_gem_exec_fence {
>>> +     uint32_t handle;
>>> +     uint32_t flags;
>>> +#define LOCAL_EXEC_FENCE_WAIT (1 << 0)
>>> +#define LOCAL_EXEC_FENCE_SIGNAL (1 << 1)
>>> +};
>>>    
>>>    static void gem_busy(int fd, uint32_t handle)
>>>    {
>>> @@ -109,11 +118,54 @@ static int sync_merge(int fd1, int fd2)
>>>        return data.fence;
>>>    }
>>>    
>>> +static uint32_t __syncobj_create(int fd)
>>> +{
>>> +     struct local_syncobj_create {
>>> +             uint32_t handle, flags;
>>> +     } arg;
>>> +#define LOCAL_IOCTL_SYNCOBJ_CREATE        DRM_IOWR(0xBF, struct local_syncobj_create)
>>> +
>>> +     memset(&arg, 0, sizeof(arg));
>>> +     ioctl(fd, LOCAL_IOCTL_SYNCOBJ_CREATE, &arg);
>>> +
>>> +     return arg.handle;
>>> +}
>>> +
>>> +static uint32_t syncobj_create(int fd)
>>> +{
>>> +     uint32_t ret;
>>> +
>>> +     igt_assert_neq((ret = __syncobj_create(fd)), 0);
>>> +
>>> +     return ret;
>>> +}
>>> +
>>> +#define LOCAL_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>>> +#define LOCAL_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
>>> +struct local_syncobj_wait {
>>> +       __u64 handles;
>>> +       /* absolute timeout */
>>> +       __s64 timeout_nsec;
>>> +       __u32 count_handles;
>>> +       __u32 flags;
>>> +       __u32 first_signaled; /* only valid when not waiting all */
>>> +       __u32 pad;
>>> +};
>>> +#define LOCAL_IOCTL_SYNCOBJ_WAIT     DRM_IOWR(0xC3, struct local_syncobj_wait)
>>> +static int __syncobj_wait(int fd, struct local_syncobj_wait *args)
>>> +{
>>> +     int err = 0;
>>> +     if (drmIoctl(fd, LOCAL_IOCTL_SYNCOBJ_WAIT, args))
>>> +             err = -errno;
>>> +     return err;
>>> +}
>>> +
>>>    static int loop(unsigned ring, int reps, int ncpus, unsigned flags)
>>>    {
>>>        struct drm_i915_gem_execbuffer2 execbuf;
>>>        struct drm_i915_gem_exec_object2 obj[2];
>>>        struct drm_i915_gem_relocation_entry reloc[2];
>>> +     struct local_gem_exec_fence syncobj;
>>>        unsigned engines[16];
>>>        unsigned nengine;
>>>        uint32_t *batch;
>>> @@ -126,6 +178,11 @@ static int loop(unsigned ring, int reps, int ncpus, unsigned flags)
>>>        fd = drm_open_driver(DRIVER_INTEL);
>>>        gen = intel_gen(intel_get_drm_devid(fd));
>>>    
>>> +     if (flags & SYNCOBJ) {
>>> +             syncobj.handle = syncobj_create(fd);
>>> +             syncobj.flags = LOCAL_EXEC_FENCE_SIGNAL;
>>> +     }
>>> +
>>>        memset(obj, 0, sizeof(obj));
>>>        obj[0].handle = gem_create(fd, 4096);
>>>        if (flags & WRITE)
>>> @@ -144,6 +201,8 @@ static int loop(unsigned ring, int reps, int ncpus, unsigned flags)
>>>        execbuf.buffer_count = 2;
>>>        execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
>>>        execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
>>> +     if (flags & SYNCOBJ)
>>> +             execbuf.flags |= LOCAL_I915_EXEC_FENCE_ARRAY;
>>
>> According to the comment in i915_drm.h, when this is specified, syncobj
>> should be also passed in in cliprects_ptr but that's not happening?
> 
> You want -b support as well! :)

I just failed to figure out where is the connection between syncobj 
(local var) and execbuf. Flag is set, so what happens next? Execbuf 
fails since cliprects_ptr is not set?

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH igt 1/3] benchmark/gem_busy: Compare polling with syncobj_wait
  2017-09-28  9:16     ` Tvrtko Ursulin
@ 2017-09-28  9:50       ` Chris Wilson
  0 siblings, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2017-09-28  9:50 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2017-09-28 10:16:58)
> 
> On 28/09/2017 10:07, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2017-09-28 07:53:56)
> >>
> >> On 25/09/2017 21:26, Chris Wilson wrote:
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> ---
> >>>    benchmarks/gem_busy.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++-
> >>>    1 file changed, 72 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/benchmarks/gem_busy.c b/benchmarks/gem_busy.c
> >>> index f050454b..9649ea02 100644
> >>> --- a/benchmarks/gem_busy.c
> >>> +++ b/benchmarks/gem_busy.c
> >>> @@ -58,6 +58,15 @@
> >>>    #define DMABUF 0x4
> >>>    #define WAIT 0x8
> >>>    #define SYNC 0x10
> >>> +#define SYNCOBJ 0x20
> >>> +
> >>> +#define LOCAL_I915_EXEC_FENCE_ARRAY (1 << 19)
> >>> +struct local_gem_exec_fence {
> >>> +     uint32_t handle;
> >>> +     uint32_t flags;
> >>> +#define LOCAL_EXEC_FENCE_WAIT (1 << 0)
> >>> +#define LOCAL_EXEC_FENCE_SIGNAL (1 << 1)
> >>> +};
> >>>    
> >>>    static void gem_busy(int fd, uint32_t handle)
> >>>    {
> >>> @@ -109,11 +118,54 @@ static int sync_merge(int fd1, int fd2)
> >>>        return data.fence;
> >>>    }
> >>>    
> >>> +static uint32_t __syncobj_create(int fd)
> >>> +{
> >>> +     struct local_syncobj_create {
> >>> +             uint32_t handle, flags;
> >>> +     } arg;
> >>> +#define LOCAL_IOCTL_SYNCOBJ_CREATE        DRM_IOWR(0xBF, struct local_syncobj_create)
> >>> +
> >>> +     memset(&arg, 0, sizeof(arg));
> >>> +     ioctl(fd, LOCAL_IOCTL_SYNCOBJ_CREATE, &arg);
> >>> +
> >>> +     return arg.handle;
> >>> +}
> >>> +
> >>> +static uint32_t syncobj_create(int fd)
> >>> +{
> >>> +     uint32_t ret;
> >>> +
> >>> +     igt_assert_neq((ret = __syncobj_create(fd)), 0);
> >>> +
> >>> +     return ret;
> >>> +}
> >>> +
> >>> +#define LOCAL_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
> >>> +#define LOCAL_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
> >>> +struct local_syncobj_wait {
> >>> +       __u64 handles;
> >>> +       /* absolute timeout */
> >>> +       __s64 timeout_nsec;
> >>> +       __u32 count_handles;
> >>> +       __u32 flags;
> >>> +       __u32 first_signaled; /* only valid when not waiting all */
> >>> +       __u32 pad;
> >>> +};
> >>> +#define LOCAL_IOCTL_SYNCOBJ_WAIT     DRM_IOWR(0xC3, struct local_syncobj_wait)
> >>> +static int __syncobj_wait(int fd, struct local_syncobj_wait *args)
> >>> +{
> >>> +     int err = 0;
> >>> +     if (drmIoctl(fd, LOCAL_IOCTL_SYNCOBJ_WAIT, args))
> >>> +             err = -errno;
> >>> +     return err;
> >>> +}
> >>> +
> >>>    static int loop(unsigned ring, int reps, int ncpus, unsigned flags)
> >>>    {
> >>>        struct drm_i915_gem_execbuffer2 execbuf;
> >>>        struct drm_i915_gem_exec_object2 obj[2];
> >>>        struct drm_i915_gem_relocation_entry reloc[2];
> >>> +     struct local_gem_exec_fence syncobj;
> >>>        unsigned engines[16];
> >>>        unsigned nengine;
> >>>        uint32_t *batch;
> >>> @@ -126,6 +178,11 @@ static int loop(unsigned ring, int reps, int ncpus, unsigned flags)
> >>>        fd = drm_open_driver(DRIVER_INTEL);
> >>>        gen = intel_gen(intel_get_drm_devid(fd));
> >>>    
> >>> +     if (flags & SYNCOBJ) {
> >>> +             syncobj.handle = syncobj_create(fd);
> >>> +             syncobj.flags = LOCAL_EXEC_FENCE_SIGNAL;
> >>> +     }
> >>> +
> >>>        memset(obj, 0, sizeof(obj));
> >>>        obj[0].handle = gem_create(fd, 4096);
> >>>        if (flags & WRITE)
> >>> @@ -144,6 +201,8 @@ static int loop(unsigned ring, int reps, int ncpus, unsigned flags)
> >>>        execbuf.buffer_count = 2;
> >>>        execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
> >>>        execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
> >>> +     if (flags & SYNCOBJ)
> >>> +             execbuf.flags |= LOCAL_I915_EXEC_FENCE_ARRAY;
> >>
> >> According to the comment in i915_drm.h, when this is specified, syncobj
> >> should be also passed in in cliprects_ptr but that's not happening?
> > 
> > You want -b support as well! :)
> 
> I just failed to figure out where is the connection between syncobj 
> (local var) and execbuf. Flag is set, so what happens next? Execbuf 
> fails since cliprects_ptr is not set?

Your observation is correct. The code never set the syncobj to be
signaled. I was just saying that in the manner of polling on the idle
syncobj, it should be ok, though strictly it will be reporting that
there is no fence attached rather than idle. I need to add code to
support the '-b' mode of polling on a busy fence.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2017-09-28  9:50 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-25 20:26 [PATCH igt 1/3] benchmark/gem_busy: Compare polling with syncobj_wait Chris Wilson
2017-09-25 20:26 ` [PATCH igt 2/3] benchmarks/gem_syslatency: Apply vmpressure, measure page allocation Chris Wilson
2017-09-27  8:52   ` Joonas Lahtinen
2017-09-27  9:36     ` Chris Wilson
2017-09-27  9:41     ` Chris Wilson
2017-09-27 10:02       ` Joonas Lahtinen
2017-09-27 11:07         ` Chris Wilson
2017-09-25 20:26 ` [PATCH igt 3/3] benchmarks/gem_exec_fault: Update for tryhard kernels Chris Wilson
2017-09-26 10:56   ` Matthew Auld
2017-09-25 21:06 ` ✓ Fi.CI.BAT: success for series starting with [1/3] benchmark/gem_busy: Compare polling with syncobj_wait Patchwork
2017-09-26  2:35 ` ✓ Fi.CI.IGT: " Patchwork
2017-09-28  6:53 ` [PATCH igt 1/3] " Tvrtko Ursulin
2017-09-28  9:07   ` Chris Wilson
2017-09-28  9:16     ` Tvrtko Ursulin
2017-09-28  9:50       ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.