All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH i-g-t v4 00/12] Improve robustness of the i915 perf tests
@ 2017-08-29 13:55 Lionel Landwerlin
  2017-08-29 13:55 ` [PATCH i-g-t v4 01/12] tests/perf: make stream_fd a global variable Lionel Landwerlin
                   ` (13 more replies)
  0 siblings, 14 replies; 15+ messages in thread
From: Lionel Landwerlin @ 2017-08-29 13:55 UTC (permalink / raw)
  To: intel-gfx

Hi,

Some nits by Matthew in patch 6.

Cheers,

Lionel Landwerlin (11):
  tests/perf: make stream_fd a global variable
  tests/perf: update max buffer size for reading reports
  tests/perf: rc6: try to guess when rc6 is disabled
  tests/perf: remove frequency related changes
  tests/perf: rework oa-exponent test
  tests/perf: make enable-disable more reliable
  tests/perf: make buffer-fill more reliable
  tests/perf: add Kabylake support
  tests/perf: add Geminilake support
  tests/perf: estimate number of blocking/polling based on time spent
  tests/perf: prevent power management to kick in when necessary

Robert Bragg (1):
  tests/perf: add per context filtering test for gen8+

 tests/perf.c | 2037 +++++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 1672 insertions(+), 365 deletions(-)

--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH i-g-t v4 01/12] tests/perf: make stream_fd a global variable
  2017-08-29 13:55 [PATCH i-g-t v4 00/12] Improve robustness of the i915 perf tests Lionel Landwerlin
@ 2017-08-29 13:55 ` Lionel Landwerlin
  2017-08-29 13:55 ` [PATCH i-g-t v4 02/12] tests/perf: add per context filtering test for gen8+ Lionel Landwerlin
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Lionel Landwerlin @ 2017-08-29 13:55 UTC (permalink / raw)
  To: intel-gfx

When debugging unstable tests on new platforms we currently we don't
cleanup everything well in between different tests. Since only a
single OA stream fd can be opened at a time, having the stream_fd as a
global variable helps us cleanup the state between tests.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 tests/perf.c | 121 ++++++++++++++++++++++++++++++++---------------------------
 1 file changed, 65 insertions(+), 56 deletions(-)

diff --git a/tests/perf.c b/tests/perf.c
index dd2263ee..ca5bfdc5 100644
--- a/tests/perf.c
+++ b/tests/perf.c
@@ -285,6 +285,7 @@ static bool hsw_undefined_a_counters[45] = {
 static bool gen8_undefined_a_counters[45];
 
 static int drm_fd = -1;
+static int stream_fd = -1;
 static uint32_t devid;
 static int card = -1;
 static int n_eus;
@@ -306,10 +307,22 @@ static uint32_t (*read_report_ticks)(uint32_t *report,
 static void (*sanity_check_reports)(uint32_t *oa_report0, uint32_t *oa_report1,
 				    enum drm_i915_oa_format format);
 
+static void
+__perf_close(int fd)
+{
+	close(fd);
+	stream_fd = -1;
+}
+
 static int
 __perf_open(int fd, struct drm_i915_perf_open_param *param)
 {
-	int ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param);
+	int ret;
+
+	if (stream_fd >= 0)
+		__perf_close(stream_fd);
+
+	ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param);
 
 	igt_assert(ret >= 0);
 	errno = 0;
@@ -960,14 +973,12 @@ test_system_wide_paranoid(void)
 			.num_properties = sizeof(properties) / 16,
 			.properties_ptr = to_user_pointer(properties),
 		};
-		int stream_fd;
-
 		write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 0);
 
 		igt_drop_root();
 
 		stream_fd = __perf_open(drm_fd, &param);
-		close(stream_fd);
+		__perf_close(stream_fd);
 	}
 
 	igt_waitchildren();
@@ -1015,7 +1026,6 @@ test_invalid_oa_metric_set_id(void)
 		.num_properties = sizeof(properties) / 16,
 		.properties_ptr = to_user_pointer(properties),
 	};
-	int stream_fd;
 
 	do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
 
@@ -1025,7 +1035,7 @@ test_invalid_oa_metric_set_id(void)
 	/* Check that we aren't just seeing false positives... */
 	properties[ARRAY_SIZE(properties) - 1] = test_metric_set_id;
 	stream_fd = __perf_open(drm_fd, &param);
-	close(stream_fd);
+	__perf_close(stream_fd);
 
 	/* There's no valid default OA metric set ID... */
 	param.num_properties--;
@@ -1050,7 +1060,6 @@ test_invalid_oa_format_id(void)
 		.num_properties = sizeof(properties) / 16,
 		.properties_ptr = to_user_pointer(properties),
 	};
-	int stream_fd;
 
 	do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
 
@@ -1060,7 +1069,7 @@ test_invalid_oa_format_id(void)
 	/* Check that we aren't just seeing false positives... */
 	properties[ARRAY_SIZE(properties) - 1] = test_oa_format;
 	stream_fd = __perf_open(drm_fd, &param);
-	close(stream_fd);
+	__perf_close(stream_fd);
 
 	/* There's no valid default OA format... */
 	param.num_properties--;
@@ -1088,8 +1097,7 @@ test_missing_sample_flags(void)
 }
 
 static void
-read_2_oa_reports(int stream_fd,
-		  int format_id,
+read_2_oa_reports(int format_id,
 		  int exponent,
 		  uint32_t *oa_report0,
 		  uint32_t *oa_report1,
@@ -1223,12 +1231,13 @@ open_and_read_2_oa_reports(int format_id,
 		.num_properties = sizeof(properties) / 16,
 		.properties_ptr = to_user_pointer(properties),
 	};
-	int stream_fd = __perf_open(drm_fd, &param);
 
-	read_2_oa_reports(stream_fd, format_id, exponent,
+	stream_fd = __perf_open(drm_fd, &param);
+
+	read_2_oa_reports(format_id, exponent,
 			  oa_report0, oa_report1, timer_only);
 
-	close(stream_fd);
+	__perf_close(stream_fd);
 }
 
 static void
@@ -1528,9 +1537,10 @@ test_invalid_oa_exponent(void)
 		.num_properties = sizeof(properties) / 16,
 		.properties_ptr = to_user_pointer(properties),
 	};
-	int stream_fd = __perf_open(drm_fd, &param);
 
-	close(stream_fd);
+	stream_fd = __perf_open(drm_fd, &param);
+
+	__perf_close(stream_fd);
 
 	for (int i = 32; i < 65; i++) {
 		properties[7] = i;
@@ -1580,12 +1590,10 @@ test_low_oa_exponent_permissions(void)
 	properties[7] = ok_exponent;
 
 	igt_fork(child, 1) {
-		int stream_fd;
-
 		igt_drop_root();
 
 		stream_fd = __perf_open(drm_fd, &param);
-		close(stream_fd);
+		__perf_close(stream_fd);
 	}
 
 	igt_waitchildren();
@@ -1634,7 +1642,6 @@ test_per_context_mode_unprivileged(void)
 	igt_fork(child, 1) {
 		drm_intel_context *context;
 		drm_intel_bufmgr *bufmgr;
-		int stream_fd;
 		uint32_t ctx_id = 0xffffffff; /* invalid id */
 		int ret;
 
@@ -1652,7 +1659,7 @@ test_per_context_mode_unprivileged(void)
 		properties[1] = ctx_id;
 
 		stream_fd = __perf_open(drm_fd, &param);
-		close(stream_fd);
+		__perf_close(stream_fd);
 
 		drm_intel_gem_context_destroy(context);
 		drm_intel_bufmgr_destroy(bufmgr);
@@ -1715,7 +1722,6 @@ test_blocking(void)
 		.num_properties = sizeof(properties) / 16,
 		.properties_ptr = to_user_pointer(properties),
 	};
-	int stream_fd = __perf_open(drm_fd, &param);
 	uint8_t buf[1024 * 1024];
 	struct tms start_times;
 	struct tms end_times;
@@ -1740,6 +1746,8 @@ test_blocking(void)
 	int64_t start;
 	int n = 0;
 
+	stream_fd = __perf_open(drm_fd, &param);
+
 	times(&start_times);
 
 	igt_debug("tick length = %dns, test duration = %"PRIu64"ns, min iter. = %d, max iter. = %d\n",
@@ -1837,7 +1845,7 @@ test_blocking(void)
 
 	igt_assert(kernel_ns <= (test_duration_ns / 100ull));
 
-	close(stream_fd);
+	__perf_close(stream_fd);
 }
 
 static void
@@ -1866,7 +1874,6 @@ test_polling(void)
 		.num_properties = sizeof(properties) / 16,
 		.properties_ptr = to_user_pointer(properties),
 	};
-	int stream_fd = __perf_open(drm_fd, &param);
 	uint8_t buf[1024 * 1024];
 	struct tms start_times;
 	struct tms end_times;
@@ -1890,6 +1897,8 @@ test_polling(void)
 	int64_t start;
 	int n = 0;
 
+	stream_fd = __perf_open(drm_fd, &param);
+
 	times(&start_times);
 
 	igt_debug("tick length = %dns, test duration = %"PRIu64"ns, min iter. = %d, max iter. = %d\n",
@@ -2018,7 +2027,7 @@ test_polling(void)
 
 	igt_assert(kernel_ns <= (test_duration_ns / 100ull));
 
-	close(stream_fd);
+	__perf_close(stream_fd);
 }
 
 static void
@@ -2041,7 +2050,6 @@ test_buffer_fill(void)
 		.num_properties = sizeof(properties) / 16,
 		.properties_ptr = to_user_pointer(properties),
 	};
-	int stream_fd = __perf_open(drm_fd, &param);
 	int buf_size = 65536 * (256 + sizeof(struct drm_i915_perf_record_header));
 	uint8_t *buf = malloc(buf_size);
 	size_t oa_buf_size = 16 * 1024 * 1024;
@@ -2051,6 +2059,8 @@ test_buffer_fill(void)
 
 	igt_assert(fill_duration < 1000000000);
 
+	stream_fd = __perf_open(drm_fd, &param);
+
 	for (int i = 0; i < 5; i++) {
 		struct drm_i915_perf_record_header *header;
 		bool overflow_seen;
@@ -2101,7 +2111,7 @@ test_buffer_fill(void)
 
 	free(buf);
 
-	close(stream_fd);
+	__perf_close(stream_fd);
 }
 
 static void
@@ -2125,7 +2135,6 @@ test_enable_disable(void)
 		.num_properties = sizeof(properties) / 16,
 		.properties_ptr = to_user_pointer(properties),
 	};
-	int stream_fd = __perf_open(drm_fd, &param);
 	int buf_size = 65536 * (256 + sizeof(struct drm_i915_perf_record_header));
 	uint8_t *buf = malloc(buf_size);
 	size_t oa_buf_size = 16 * 1024 * 1024;
@@ -2133,6 +2142,7 @@ test_enable_disable(void)
 	int n_full_oa_reports = oa_buf_size / report_size;
 	uint64_t fill_duration = n_full_oa_reports * oa_period;
 
+	stream_fd = __perf_open(drm_fd, &param);
 
 	for (int i = 0; i < 5; i++) {
 		int len;
@@ -2178,7 +2188,7 @@ test_enable_disable(void)
 
 	free(buf);
 
-	close(stream_fd);
+	__perf_close(stream_fd);
 }
 
 static void
@@ -2205,7 +2215,6 @@ test_short_reads(void)
 	uint8_t *pages = mmap(NULL, page_size * 2,
 			      PROT_READ|PROT_WRITE, MAP_PRIVATE, zero_fd, 0);
 	struct drm_i915_perf_record_header *header;
-	int stream_fd;
 	int ret;
 
 	igt_assert_neq(zero_fd, -1);
@@ -2262,7 +2271,7 @@ test_short_reads(void)
 	igt_assert_eq(ret, -1);
 	igt_assert_eq(errno, ENOSPC);
 
-	close(stream_fd);
+	__perf_close(stream_fd);
 
 	munmap(pages, page_size * 2);
 }
@@ -2287,14 +2296,16 @@ test_non_sampling_read_error(void)
 		.num_properties = sizeof(properties) / 16,
 		.properties_ptr = to_user_pointer(properties),
 	};
-	int stream_fd = __perf_open(drm_fd, &param);
+	int ret;
 	uint8_t buf[1024];
 
-	int ret = read(stream_fd, buf, sizeof(buf));
+	stream_fd = __perf_open(drm_fd, &param);
+
+	ret = read(stream_fd, buf, sizeof(buf));
 	igt_assert_eq(ret, -1);
 	igt_assert_eq(errno, EIO);
 
-	close(stream_fd);
+	__perf_close(stream_fd);
 }
 
 /* Check that attempts to read from a stream while it is disable will return
@@ -2321,25 +2332,24 @@ test_disabled_read_error(void)
 		.num_properties = sizeof(properties) / 16,
 		.properties_ptr = to_user_pointer(properties),
 	};
-	int stream_fd = __perf_open(drm_fd, &param);
 	uint32_t oa_report0[64];
 	uint32_t oa_report1[64];
 	uint32_t buf[128] = { 0 };
 	int ret;
 
+	stream_fd = __perf_open(drm_fd, &param);
 
 	ret = read(stream_fd, buf, sizeof(buf));
 	igt_assert_eq(ret, -1);
 	igt_assert_eq(errno, EIO);
 
-	close(stream_fd);
+	__perf_close(stream_fd);
 
 
 	param.flags &= ~I915_PERF_FLAG_DISABLED;
 	stream_fd = __perf_open(drm_fd, &param);
 
-	read_2_oa_reports(stream_fd,
-			  test_oa_format,
+	read_2_oa_reports(test_oa_format,
 			  oa_exponent,
 			  oa_report0,
 			  oa_report1,
@@ -2353,14 +2363,13 @@ test_disabled_read_error(void)
 
 	do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0);
 
-	read_2_oa_reports(stream_fd,
-			  test_oa_format,
+	read_2_oa_reports(test_oa_format,
 			  oa_exponent,
 			  oa_report0,
 			  oa_report1,
 			  false); /* not just timer reports */
 
-	close(stream_fd);
+	__perf_close(stream_fd);
 }
 
 static void
@@ -2409,7 +2418,6 @@ test_mi_rpc(void)
 		.num_properties = sizeof(properties) / 16,
 		.properties_ptr = to_user_pointer(properties),
 	};
-	int stream_fd = __perf_open(drm_fd, &param);
 	drm_intel_bufmgr *bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
 	drm_intel_context *context;
 	struct intel_batchbuffer *batch;
@@ -2417,6 +2425,8 @@ test_mi_rpc(void)
 	uint32_t *report32;
 	int ret;
 
+	stream_fd = __perf_open(drm_fd, &param);
+
 	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
 
 	context = drm_intel_gem_context_create(bufmgr);
@@ -2454,7 +2464,7 @@ test_mi_rpc(void)
 	intel_batchbuffer_free(batch);
 	drm_intel_gem_context_destroy(context);
 	drm_intel_bufmgr_destroy(bufmgr);
-	close(stream_fd);
+	__perf_close(stream_fd);
 }
 
 static void
@@ -2545,7 +2555,6 @@ hsw_test_single_ctx_counters(void)
 	igt_fork(child, 1) {
 		drm_intel_bufmgr *bufmgr;
 		drm_intel_context *context0, *context1;
-		int stream_fd;
 		struct intel_batchbuffer *batch;
 		struct igt_buf src, dst;
 		drm_intel_bo *bo;
@@ -2724,7 +2733,7 @@ hsw_test_single_ctx_counters(void)
 		drm_intel_gem_context_destroy(context0);
 		drm_intel_gem_context_destroy(context1);
 		drm_intel_bufmgr_destroy(bufmgr);
-		close(stream_fd);
+		__perf_close(stream_fd);
 	}
 
 	igt_waitchildren();
@@ -2747,10 +2756,12 @@ test_rc6_disable(void)
 		.num_properties = sizeof(properties) / 16,
 		.properties_ptr = to_user_pointer(properties),
 	};
-	int stream_fd = __perf_open(drm_fd, &param);
-	uint64_t n_events_start = read_debugfs_u64_record(drm_fd, "i915_drpc_info",
-							  "RC6 residency since boot");
-	uint64_t n_events_end;
+	uint64_t n_events_start, n_events_end;
+
+	stream_fd = __perf_open(drm_fd, &param);
+
+	n_events_start = read_debugfs_u64_record(drm_fd, "i915_drpc_info",
+						 "RC6 residency since boot");
 
 	nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 500000000 }, NULL);
 
@@ -2759,12 +2770,12 @@ test_rc6_disable(void)
 
 	igt_assert_eq(n_events_end - n_events_start, 0);
 
-	close(stream_fd);
+	__perf_close(stream_fd);
 
 	n_events_start = read_debugfs_u64_record(drm_fd, "i915_drpc_info",
 						 "RC6 residency since boot");
 
-	nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 500000000 }, NULL);
+	nanosleep(&(struct timespec){ .tv_sec = 1, .tv_nsec = 0 }, NULL);
 
 	n_events_end = read_debugfs_u64_record(drm_fd, "i915_drpc_info",
 					       "RC6 residency since boot");
@@ -2902,7 +2913,7 @@ test_create_destroy_userspace_config(void)
 	const char *uuid = "01234567-0123-0123-0123-0123456789ab";
 	uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 };
 	uint32_t flex_regs[100];
-	int i, stream_fd;
+	int i;
 	uint64_t config_id;
 	uint64_t properties[] = {
 		DRM_I915_PERF_PROP_OA_METRICS_SET, 0, /* Filled later */
@@ -2970,7 +2981,7 @@ test_create_destroy_userspace_config(void)
 	/* Read the config to verify shouldn't raise any issue. */
 	config_id = i915_perf_add_config(drm_fd, &config);
 
-	close(stream_fd);
+	__perf_close(stream_fd);
 
 	i915_perf_remove_config(drm_fd, config_id);
 }
@@ -3142,7 +3153,6 @@ test_i915_ref_count(void)
 		.properties_ptr = to_user_pointer(properties),
 	};
 	unsigned baseline, ref_count0, ref_count1;
-	int stream_fd;
 	uint32_t oa_report0[64];
 	uint32_t oa_report1[64];
 
@@ -3182,14 +3192,13 @@ test_i915_ref_count(void)
 
 	igt_assert(ref_count0 > baseline);
 
-	read_2_oa_reports(stream_fd,
-			  test_oa_format,
+	read_2_oa_reports(test_oa_format,
 			  oa_exp_1_millisec,
 			  oa_report0,
 			  oa_report1,
 			  false); /* not just timer reports */
 
-	close(stream_fd);
+	__perf_close(stream_fd);
 	ref_count0 = read_i915_module_ref();
 	igt_debug("ref count after closing i915 perf stream fd = %u\n", ref_count0);
 	igt_assert_eq(ref_count0, baseline);
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH i-g-t v4 02/12] tests/perf: add per context filtering test for gen8+
  2017-08-29 13:55 [PATCH i-g-t v4 00/12] Improve robustness of the i915 perf tests Lionel Landwerlin
  2017-08-29 13:55 ` [PATCH i-g-t v4 01/12] tests/perf: make stream_fd a global variable Lionel Landwerlin
@ 2017-08-29 13:55 ` Lionel Landwerlin
  2017-08-29 13:55 ` [PATCH i-g-t v4 03/12] tests/perf: update max buffer size for reading reports Lionel Landwerlin
                   ` (11 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Lionel Landwerlin @ 2017-08-29 13:55 UTC (permalink / raw)
  To: intel-gfx

From: Robert Bragg <robert@sixbynine.org>

Signed-off-by: Robert Bragg <robert@sixbynine.org>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 tests/perf.c | 777 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 745 insertions(+), 32 deletions(-)

diff --git a/tests/perf.c b/tests/perf.c
index ca5bfdc5..5058315c 100644
--- a/tests/perf.c
+++ b/tests/perf.c
@@ -48,7 +48,9 @@ IGT_TEST_DESCRIPTION("Test the i915 perf metrics streaming interface");
 #define OAREPORT_REASON_MASK           0x3f
 #define OAREPORT_REASON_SHIFT          19
 #define OAREPORT_REASON_TIMER          (1<<0)
+#define OAREPORT_REASON_INTERNAL       (3<<1)
 #define OAREPORT_REASON_CTX_SWITCH     (1<<3)
+#define OAREPORT_REASON_GO             (1<<4)
 #define OAREPORT_REASON_CLK_RATIO      (1<<5)
 
 #define GFX_OP_PIPE_CONTROL     ((3 << 29) | (3 << 27) | (2 << 24))
@@ -574,6 +576,22 @@ oa_exponent_to_ns(int exponent)
        return 1000000000ULL * (2ULL << exponent) / timestamp_frequency;
 }
 
+static bool
+oa_report_ctx_is_valid(uint32_t *report)
+{
+	if (IS_HASWELL(devid)) {
+		return false; /* TODO */
+	} else if (IS_GEN8(devid)) {
+		return report[0] & (1ul << 25);
+	} else if (IS_GEN9(devid)) {
+		return report[0] & (1ul << 16);
+	}
+
+	/* Need to update this function for newer Gen. */
+	igt_assert(!"reached");
+}
+
+
 static void
 hsw_sanity_check_render_basic_reports(uint32_t *oa_report0, uint32_t *oa_report1,
 				      enum drm_i915_oa_format fmt)
@@ -678,6 +696,100 @@ gen8_40bit_a_delta(uint64_t value0, uint64_t value1)
 		return value1 - value0;
 }
 
+static void
+accumulate_uint32(size_t offset,
+		  uint32_t *report0,
+                  uint32_t *report1,
+                  uint64_t *delta)
+{
+	uint32_t value0 = *(uint32_t *)(((uint8_t *)report0) + offset);
+	uint32_t value1 = *(uint32_t *)(((uint8_t *)report1) + offset);
+
+	*delta += (uint32_t)(value1 - value0);
+}
+
+static void
+accumulate_uint40(int a_index,
+                  uint32_t *report0,
+                  uint32_t *report1,
+		  enum drm_i915_oa_format format,
+                  uint64_t *delta)
+{
+	uint64_t value0 = gen8_read_40bit_a_counter(report0, format, a_index),
+		 value1 = gen8_read_40bit_a_counter(report1, format, a_index);
+
+	*delta += gen8_40bit_a_delta(value0, value1);
+}
+
+static void
+accumulate_reports(struct accumulator *accumulator,
+		   uint32_t *start,
+		   uint32_t *end)
+{
+	enum drm_i915_oa_format format = accumulator->format;
+	uint64_t *deltas = accumulator->deltas;
+	int idx = 0;
+
+	if (intel_gen(devid) >= 8) {
+		/* timestamp */
+		accumulate_uint32(4, start, end, deltas + idx++);
+
+		/* clock cycles */
+		accumulate_uint32(12, start, end, deltas + idx++);
+	} else {
+		/* timestamp */
+		accumulate_uint32(4, start, end, deltas + idx++);
+	}
+
+	for (int i = 0; i < oa_formats[format].n_a40; i++)
+		accumulate_uint40(i, start, end, format, deltas + idx++);
+
+	for (int i = 0; i < oa_formats[format].n_a; i++) {
+		accumulate_uint32(oa_formats[format].a_off + 4 * i,
+				  start, end, deltas + idx++);
+	}
+
+	for (int i = 0; i < oa_formats[format].n_b; i++) {
+		accumulate_uint32(oa_formats[format].b_off + 4 * i,
+				  start, end, deltas + idx++);
+	}
+
+	for (int i = 0; i < oa_formats[format].n_c; i++) {
+		accumulate_uint32(oa_formats[format].c_off + 4 * i,
+				  start, end, deltas + idx++);
+	}
+}
+
+static void
+accumulator_print(struct accumulator *accumulator, const char *title)
+{
+	enum drm_i915_oa_format format = accumulator->format;
+	uint64_t *deltas = accumulator->deltas;
+	int idx = 0;
+
+	igt_debug("%s:\n", title);
+	if (intel_gen(devid) >= 8) {
+		igt_debug("\ttime delta = %lu\n", deltas[idx++]);
+		igt_debug("\tclock cycle delta = %lu\n", deltas[idx++]);
+
+		for (int i = 0; i < oa_formats[format].n_a40; i++)
+			igt_debug("\tA%u = %lu\n", i, deltas[idx++]);
+	} else {
+		igt_debug("\ttime delta = %lu\n", deltas[idx++]);
+	}
+
+	for (int i = 0; i < oa_formats[format].n_a; i++) {
+		int a_id = oa_formats[format].first_a + i;
+		igt_debug("\tA%u = %lu\n", a_id, deltas[idx++]);
+	}
+
+	for (int i = 0; i < oa_formats[format].n_a; i++)
+		igt_debug("\tB%u = %lu\n", i, deltas[idx++]);
+
+	for (int i = 0; i < oa_formats[format].n_c; i++)
+		igt_debug("\tC%u = %lu\n", i, deltas[idx++]);
+}
+
 /* The TestOa metric set is designed so */
 static void
 gen8_sanity_check_test_oa_reports(uint32_t *oa_report0, uint32_t *oa_report1,
@@ -926,6 +1038,62 @@ gt_frequency_range_restore(void)
 	gt_max_freq_mhz = gt_max_freq_mhz_saved;
 }
 
+static int
+i915_read_reports_until_timestamp(enum drm_i915_oa_format oa_format,
+				  uint8_t *buf,
+				  uint32_t max_size,
+				  uint32_t start_timestamp,
+				  uint32_t end_timestamp)
+{
+	size_t format_size = oa_formats[oa_format].size;
+	uint32_t last_seen_timestamp = start_timestamp;
+	int total_len = 0;
+
+	while (last_seen_timestamp < end_timestamp) {
+		int offset, len;
+
+		/* Running out of space. */
+		if ((max_size - total_len) < format_size) {
+			igt_warn("run out of space before reaching "
+				 "end timestamp (%u/%u)\n",
+				 last_seen_timestamp, end_timestamp);
+			return -1;
+		}
+
+		while ((len = read(stream_fd, &buf[total_len],
+				   max_size - total_len)) < 0 &&
+		       errno == EINTR)
+			;
+
+		/* Intentionally return an error. */
+		if (len <= 0) {
+			if (errno == EAGAIN)
+				return total_len;
+			else {
+				igt_warn("error read OA stream : %i\n", errno);
+				return -1;
+			}
+		}
+
+		offset = total_len;
+		total_len += len;
+
+		while (offset < total_len) {
+		  const struct drm_i915_perf_record_header *header =
+		    (const struct drm_i915_perf_record_header *) &buf[offset];
+		  uint32_t *report = (uint32_t *) (header + 1);
+
+		  if (header->type == DRM_I915_PERF_RECORD_SAMPLE)
+		    last_seen_timestamp = report[1];
+
+		  offset += header->size;
+		}
+	}
+
+	return total_len;
+}
+
+
 /* CAP_SYS_ADMIN is required to open system wide metrics, unless the system
  * control parameter dev.i915.perf_stream_paranoid == 0 */
 static void
@@ -1344,6 +1512,70 @@ print_reports(uint32_t *oa_report0, uint32_t *oa_report1, int fmt)
 	}
 }
 
+/* Debug function, only useful when reports don't make sense. */
+#if 0
+static void
+print_report(uint32_t *report, int fmt)
+{
+	igt_debug("TIMESTAMP: %"PRIu32"\n", report[1]);
+
+	if (IS_HASWELL(devid) && oa_formats[fmt].n_c == 0) {
+		igt_debug("CLOCK = N/A\n");
+	} else {
+		uint32_t clock = read_report_ticks(report, fmt);
+
+		igt_debug("CLOCK: %"PRIu32"\n", clock);
+	}
+
+	if (intel_gen(devid) >= 8) {
+		uint32_t slice_freq, unslice_freq;
+		const char *reason = gen8_read_report_reason(report);
+
+		gen8_read_report_clock_ratios(report, &slice_freq, &unslice_freq);
+
+		igt_debug("SLICE CLK: %umhz\n", slice_freq);
+		igt_debug("UNSLICE CLK: %umhz\n", unslice_freq);
+		igt_debug("REASON: \"%s\"\n", reason);
+		igt_debug("CTX ID: %"PRIu32"/%"PRIx32"\n", report[2], report[2]);
+	}
+
+	/* Gen8+ has some 40bit A counters... */
+	for (int j = 0; j < oa_formats[fmt].n_a40; j++) {
+		uint64_t value = gen8_read_40bit_a_counter(report, fmt, j);
+
+		if (undefined_a_counters[j])
+			continue;
+
+		igt_debug("A%d: %"PRIu64"\n", j, value);
+	}
+
+	for (int j = 0; j < oa_formats[fmt].n_a; j++) {
+		uint32_t *a = (uint32_t *)(((uint8_t *)report) +
+					   oa_formats[fmt].a_off);
+		int a_id = oa_formats[fmt].first_a + j;
+
+		if (undefined_a_counters[a_id])
+			continue;
+
+		igt_debug("A%d: %"PRIu32"\n", a_id, a[j]);
+	}
+
+	for (int j = 0; j < oa_formats[fmt].n_b; j++) {
+		uint32_t *b = (uint32_t *)(((uint8_t *)report) +
+					   oa_formats[fmt].b_off);
+
+		igt_debug("B%d: %"PRIu32"\n", j, b[j]);
+	}
+
+	for (int j = 0; j < oa_formats[fmt].n_c; j++) {
+		uint32_t *c = (uint32_t *)(((uint8_t *)report) +
+					   oa_formats[fmt].c_off);
+
+		igt_debug("C%d: %"PRIu32"\n", j, c[j]);
+	}
+}
+#endif
+
 static void
 test_oa_formats(void)
 {
@@ -2468,14 +2700,8 @@ test_mi_rpc(void)
 }
 
 static void
-scratch_buf_init(drm_intel_bufmgr *bufmgr,
-		 struct igt_buf *buf,
-		 int width, int height,
-		 uint32_t color)
+scratch_buf_memset(drm_intel_bo *bo, int width, int height, uint32_t color)
 {
-	size_t stride = width * 4;
-	size_t size = stride * height;
-	drm_intel_bo *bo = drm_intel_bo_alloc(bufmgr, "", size, 4096);
 	int ret;
 
 	ret = drm_intel_bo_map(bo, true /* writable */);
@@ -2485,6 +2711,19 @@ scratch_buf_init(drm_intel_bufmgr *bufmgr,
 		((uint32_t *)bo->virtual)[i] = color;
 
 	drm_intel_bo_unmap(bo);
+}
+
+static void
+scratch_buf_init(drm_intel_bufmgr *bufmgr,
+		 struct igt_buf *buf,
+		 int width, int height,
+		 uint32_t color)
+{
+	size_t stride = width * 4;
+	size_t size = stride * height;
+	drm_intel_bo *bo = drm_intel_bo_alloc(bufmgr, "", size, 4096);
+
+	scratch_buf_memset(bo, width, height, color);
 
 	buf->bo = bo;
 	buf->stride = stride;
@@ -2503,14 +2742,25 @@ emit_stall_timestamp_and_rpc(struct intel_batchbuffer *batch,
 				   PIPE_CONTROL_RENDER_TARGET_FLUSH |
 				   PIPE_CONTROL_WRITE_TIMESTAMP);
 
-	BEGIN_BATCH(5, 1);
-	OUT_BATCH(GFX_OP_PIPE_CONTROL | (5 - 2));
-	OUT_BATCH(pipe_ctl_flags);
-	OUT_RELOC(dst, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-		  timestamp_offset);
-	OUT_BATCH(0); /* imm lower */
-	OUT_BATCH(0); /* imm upper */
-	ADVANCE_BATCH();
+	if (intel_gen(devid) >= 8) {
+		BEGIN_BATCH(5, 1);
+		OUT_BATCH(GFX_OP_PIPE_CONTROL | (6 - 2));
+		OUT_BATCH(pipe_ctl_flags);
+		OUT_RELOC(dst, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+			  timestamp_offset);
+		OUT_BATCH(0); /* imm lower */
+		OUT_BATCH(0); /* imm upper */
+		ADVANCE_BATCH();
+	} else {
+		BEGIN_BATCH(5, 1);
+		OUT_BATCH(GFX_OP_PIPE_CONTROL | (5 - 2));
+		OUT_BATCH(pipe_ctl_flags);
+		OUT_RELOC(dst, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+			  timestamp_offset);
+		OUT_BATCH(0); /* imm lower */
+		OUT_BATCH(0); /* imm upper */
+		ADVANCE_BATCH();
+	}
 
 	emit_report_perf_count(batch, dst, report_dst_offset, report_id);
 }
@@ -2556,7 +2806,7 @@ hsw_test_single_ctx_counters(void)
 		drm_intel_bufmgr *bufmgr;
 		drm_intel_context *context0, *context1;
 		struct intel_batchbuffer *batch;
-		struct igt_buf src, dst;
+		struct igt_buf src[3], dst[3];
 		drm_intel_bo *bo;
 		uint32_t *report0_32, *report1_32;
 		uint64_t timestamp0_64, timestamp1_64;
@@ -2574,8 +2824,10 @@ hsw_test_single_ctx_counters(void)
 		bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
 		drm_intel_bufmgr_gem_enable_reuse(bufmgr);
 
-		scratch_buf_init(bufmgr, &src, width, height, 0xff0000ff);
-		scratch_buf_init(bufmgr, &dst, width, height, 0x00ff00ff);
+		for (int i = 0; i < ARRAY_SIZE(src); i++) {
+			scratch_buf_init(bufmgr, &src[i], width, height, 0xff0000ff);
+			scratch_buf_init(bufmgr, &dst[i], width, height, 0x00ff00ff);
+		}
 
 		batch = intel_batchbuffer_alloc(bufmgr, devid);
 
@@ -2609,14 +2861,19 @@ hsw_test_single_ctx_counters(void)
 		 */
 		render_copy(batch,
 			    context0,
-			    &src, 0, 0, width, height,
-			    &dst, 0, 0);
+			    &src[0], 0, 0, width, height,
+			    &dst[0], 0, 0);
 
 		ret = drm_intel_gem_context_get_id(context0, &ctx_id);
 		igt_assert_eq(ret, 0);
 		igt_assert_neq(ctx_id, 0xffffffff);
 		properties[1] = ctx_id;
 
+		intel_batchbuffer_flush_with_context(batch, context0);
+
+		scratch_buf_memset(src[0].bo, width, height, 0xff0000ff);
+		scratch_buf_memset(dst[0].bo, width, height, 0x00ff00ff);
+
 		igt_debug("opening i915-perf stream\n");
 		stream_fd = __perf_open(drm_fd, &param);
 
@@ -2643,8 +2900,8 @@ hsw_test_single_ctx_counters(void)
 
 		render_copy(batch,
 			    context0,
-			    &src, 0, 0, width, height,
-			    &dst, 0, 0);
+			    &src[0], 0, 0, width, height,
+			    &dst[0], 0, 0);
 
 		/* Another redundant flush to clarify batch bo is free to reuse */
 		intel_batchbuffer_flush_with_context(batch, context0);
@@ -2655,13 +2912,13 @@ hsw_test_single_ctx_counters(void)
 		 */
 		render_copy(batch,
 			    context1,
-			    &src, 0, 0, width, height,
-			    &dst, 0, 0);
+			    &src[1], 0, 0, width, height,
+			    &dst[1], 0, 0);
 
 		render_copy(batch,
 			    context1,
-			    &src, 0, 0, width, height,
-			    &dst, 0, 0);
+			    &src[2], 0, 0, width, height,
+			    &dst[2], 0, 0);
 
 		/* And another */
 		intel_batchbuffer_flush_with_context(batch, context1);
@@ -2690,6 +2947,7 @@ hsw_test_single_ctx_counters(void)
 
 		/* A40 == N samples written to all render targets */
 		n_samples_written = report1_32[43] - report0_32[43];
+
 		igt_debug("n samples written = %d\n", n_samples_written);
 		igt_assert_eq(n_samples_written, width * height);
 
@@ -2724,8 +2982,10 @@ hsw_test_single_ctx_counters(void)
 			(delta_oa32_ns - delta_ts64_ns);
 		igt_assert(delta_delta <= 320);
 
-		drm_intel_bo_unreference(src.bo);
-		drm_intel_bo_unreference(dst.bo);
+		for (int i = 0; i < ARRAY_SIZE(src); i++) {
+			drm_intel_bo_unreference(src[i].bo);
+			drm_intel_bo_unreference(dst[i].bo);
+		}
 
 		drm_intel_bo_unmap(bo);
 		drm_intel_bo_unreference(bo);
@@ -2739,6 +2999,452 @@ hsw_test_single_ctx_counters(void)
 	igt_waitchildren();
 }
 
+/* Tests the INTEL_performance_query use case where an unprivileged process
+ * should be able to configure the OA unit for per-context metrics (for a
+ * context associated with that process' drm file descriptor) and the counters
+ * should only relate to that specific context.
+ *
+ * For Gen8+ although reports read via i915 perf can be filtered for a single
+ * context the counters themselves always progress as global/system-wide
+ * counters affected by all contexts. To support the INTEL_performance_query
+ * use case on Gen8+ it's necessary to combine OABUFFER and
+ * MI_REPORT_PERF_COUNT reports so that counter normalisation can take into
+ * account context-switch reports and factor out any counter progression not
+ * associated with the current context.
+ */
+static void
+gen8_test_single_ctx_render_target_writes_a_counter(void)
+{
+	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
+	uint64_t properties[] = {
+		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
+
+		/* Note: we have to specify at least one sample property even
+		 * though we aren't interested in samples in this case
+		 */
+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
+
+		/* OA unit configuration */
+		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
+		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
+
+		/* Note: no OA exponent specified in this case */
+	};
+	struct drm_i915_perf_open_param param = {
+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
+		.num_properties = ARRAY_SIZE(properties) / 2,
+		.properties_ptr = to_user_pointer(properties),
+	};
+	size_t format_size = oa_formats[test_oa_format].size;
+	size_t sample_size = (sizeof(struct drm_i915_perf_record_header) +
+			      format_size);
+	int max_reports = (16 * 1024 * 1024) / format_size;
+	int buf_size = sample_size * max_reports * 1.5;
+	int child_ret;
+	uint8_t *buf = malloc(buf_size);
+	ssize_t len;
+	struct igt_helper_process child = {};
+
+	/* should be default, but just to be sure... */
+	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
+
+	do {
+
+		igt_fork_helper(&child) {
+			struct drm_i915_perf_record_header *header;
+			drm_intel_bufmgr *bufmgr;
+			drm_intel_context *context0, *context1;
+			struct intel_batchbuffer *batch;
+			struct igt_buf src[3], dst[3];
+			drm_intel_bo *bo;
+			uint32_t *report0_32, *report1_32;
+			uint32_t *prev, *lprev = NULL;
+			uint64_t timestamp0_64, timestamp1_64;
+			uint32_t delta_ts64, delta_oa32;
+			uint64_t delta_ts64_ns, delta_oa32_ns;
+			uint32_t delta_delta;
+			int width = 800;
+			int height = 600;
+			uint32_t ctx_id = 0xffffffff; /* invalid handle */
+			uint32_t ctx1_id = 0xffffffff;  /* invalid handle */
+			uint32_t current_ctx_id = 0xffffffff;
+			uint32_t n_invalid_ctx = 0;
+			int ret;
+			struct accumulator accumulator = {
+				.format = test_oa_format
+			};
+
+			bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
+			drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+			for (int i = 0; i < ARRAY_SIZE(src); i++) {
+				scratch_buf_init(bufmgr, &src[i], width, height, 0xff0000ff);
+				scratch_buf_init(bufmgr, &dst[i], width, height, 0x00ff00ff);
+			}
+
+			batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+			context0 = drm_intel_gem_context_create(bufmgr);
+			igt_assert(context0);
+
+			context1 = drm_intel_gem_context_create(bufmgr);
+			igt_assert(context1);
+
+			igt_debug("submitting warm up render_copy\n");
+
+			/* Submit some early, unmeasured, work to the context we want
+			 * to measure to try and catch issues with i915-perf
+			 * initializing the HW context ID for filtering.
+			 *
+			 * We do this because i915-perf single context filtering had
+			 * previously only relied on a hook into context pinning to
+			 * initialize the HW context ID, instead of also trying to
+			 * determine the HW ID while opening the stream, in case it
+			 * has already been pinned.
+			 *
+			 * This wasn't noticed by the previous unit test because we
+			 * were opening the stream while the context hadn't been
+			 * touched or pinned yet and so it worked out correctly to wait
+			 * for the pinning hook.
+			 *
+			 * Now a buggy version of i915-perf will fail to measure
+			 * anything for context0 once this initial render_copy() ends
+			 * up pinning the context since there won't ever be a pinning
+			 * hook callback.
+			 */
+			render_copy(batch,
+				    context0,
+				    &src[0], 0, 0, width, height,
+				    &dst[0], 0, 0);
+
+			ret = drm_intel_gem_context_get_id(context0, &ctx_id);
+			igt_assert_eq(ret, 0);
+			igt_assert_neq(ctx_id, 0xffffffff);
+			properties[1] = ctx_id;
+
+			scratch_buf_memset(src[0].bo, width, height, 0xff0000ff);
+			scratch_buf_memset(dst[0].bo, width, height, 0x00ff00ff);
+
+			igt_debug("opening i915-perf stream\n");
+			stream_fd = __perf_open(drm_fd, &param);
+
+			bo = drm_intel_bo_alloc(bufmgr, "mi_rpc dest bo", 4096, 64);
+
+			ret = drm_intel_bo_map(bo, true /* write enable */);
+			igt_assert_eq(ret, 0);
+
+			memset(bo->virtual, 0x80, 4096);
+			drm_intel_bo_unmap(bo);
+
+			emit_stall_timestamp_and_rpc(batch,
+						     bo,
+						     512 /* timestamp offset */,
+						     0, /* report dst offset */
+						     0xdeadbeef); /* report id */
+
+			/* Explicitly flush here (even though the render_copy() call
+			 * will itself flush before/after the copy) to clarify that
+			 * that the PIPE_CONTROL + MI_RPC commands will be in a
+			 * separate batch from the copy.
+			 */
+			intel_batchbuffer_flush_with_context(batch, context0);
+
+			render_copy(batch,
+				    context0,
+				    &src[0], 0, 0, width, height,
+				    &dst[0], 0, 0);
+
+			/* Another redundant flush to clarify batch bo is free to reuse */
+			intel_batchbuffer_flush_with_context(batch, context0);
+
+			/* submit two copies on the other context to avoid a false
+			 * positive in case the driver somehow ended up filtering for
+			 * context1
+			 */
+			render_copy(batch,
+				    context1,
+				    &src[1], 0, 0, width, height,
+				    &dst[1], 0, 0);
+
+			ret = drm_intel_gem_context_get_id(context1, &ctx1_id);
+			igt_assert_eq(ret, 0);
+			igt_assert_neq(ctx1_id, 0xffffffff);
+
+			render_copy(batch,
+				    context1,
+				    &src[2], 0, 0, width, height,
+				    &dst[2], 0, 0);
+
+			/* And another */
+			intel_batchbuffer_flush_with_context(batch, context1);
+
+			emit_stall_timestamp_and_rpc(batch,
+						     bo,
+						     520 /* timestamp offset */,
+						     256, /* report dst offset */
+						     0xbeefbeef); /* report id */
+
+			intel_batchbuffer_flush_with_context(batch, context1);
+
+			ret = drm_intel_bo_map(bo, false /* write enable */);
+			igt_assert_eq(ret, 0);
+
+			report0_32 = bo->virtual;
+			igt_assert_eq(report0_32[0], 0xdeadbeef); /* report ID */
+			igt_assert_neq(report0_32[1], 0); /* timestamp */
+			//report0_32[2] = 0xffffffff;
+			prev = report0_32;
+			ctx_id = prev[2];
+			igt_debug("MI_RPC(start) CTX ID: %u\n", ctx_id);
+
+			report1_32 = report0_32 + 64; /* 64 uint32_t = 256bytes offset */
+			igt_assert_eq(report1_32[0], 0xbeefbeef); /* report ID */
+			igt_assert_neq(report1_32[1], 0); /* timestamp */
+			//report1_32[2] = 0xffffffff;
+			ctx1_id = report1_32[2];
+
+			memset(accumulator.deltas, 0, sizeof(accumulator.deltas));
+			accumulate_reports(&accumulator, report0_32, report1_32);
+			igt_debug("total: A0 = %lu, A21 = %lu, A26 = %lu\n",
+				  accumulator.deltas[2 + 0], /* skip timestamp + clock cycles */
+				  accumulator.deltas[2 + 21],
+				  accumulator.deltas[2 + 26]);
+
+			igt_debug("oa_timestamp32 0 = %u\n", report0_32[1]);
+			igt_debug("oa_timestamp32 1 = %u\n", report1_32[1]);
+			igt_debug("ctx_id 0 = %u\n", report0_32[2]);
+			igt_debug("ctx_id 1 = %u\n", report1_32[2]);
+
+			timestamp0_64 = *(uint64_t *)(((uint8_t *)bo->virtual) + 512);
+			timestamp1_64 = *(uint64_t *)(((uint8_t *)bo->virtual) + 520);
+
+			igt_debug("ts_timestamp64 0 = %"PRIu64"\n", timestamp0_64);
+			igt_debug("ts_timestamp64 1 = %"PRIu64"\n", timestamp1_64);
+
+			delta_ts64 = timestamp1_64 - timestamp0_64;
+			delta_oa32 = report1_32[1] - report0_32[1];
+
+			/* sanity check that we can pass the delta to timebase_scale */
+			igt_assert(delta_ts64 < UINT32_MAX);
+			delta_oa32_ns = timebase_scale(delta_oa32);
+			delta_ts64_ns = timebase_scale(delta_ts64);
+
+			igt_debug("oa32 delta = %u, = %uns\n",
+				  delta_oa32, (unsigned)delta_oa32_ns);
+			igt_debug("ts64 delta = %u, = %uns\n",
+				  delta_ts64, (unsigned)delta_ts64_ns);
+
+			/* The delta as calculated via the PIPE_CONTROL timestamp or
+			 * the OA report timestamps should be almost identical but
+			 * allow a 500 nanoseconds margin.
+			 */
+			delta_delta = delta_ts64_ns > delta_oa32_ns ?
+				(delta_ts64_ns - delta_oa32_ns) :
+				(delta_oa32_ns - delta_ts64_ns);
+			if (delta_delta > 500) {
+				igt_debug("skipping\n");
+				exit(EAGAIN);
+			}
+
+			len = i915_read_reports_until_timestamp(test_oa_format,
+								buf, buf_size,
+								report0_32[1],
+								report1_32[1]);
+
+			igt_assert(len > 0);
+			igt_debug("read %d bytes\n", (int)len);
+
+			memset(accumulator.deltas, 0, sizeof(accumulator.deltas));
+
+			for (size_t offset = 0; offset < len; offset += header->size) {
+				uint32_t *report;
+				uint32_t reason;
+				const char *skip_reason = NULL, *report_reason = NULL;
+				struct accumulator laccumulator = {
+					.format = test_oa_format
+				};
+
+
+				header = (void *)(buf + offset);
+
+				igt_assert_eq(header->pad, 0); /* Reserved */
+
+				/* Currently the only test that should ever expect to
+				 * see a _BUFFER_LOST error is the buffer_fill test,
+				 * otherwise something bad has probably happened...
+				 */
+				igt_assert_neq(header->type, DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
+
+				/* At high sampling frequencies the OA HW might not be
+				 * able to cope with all write requests and will notify
+				 * us that a report was lost.
+				 *
+				 * XXX: we should maybe restart the test in this case?
+				 */
+				if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST) {
+					igt_debug("OA trigger collision / report lost\n");
+					exit(EAGAIN);
+				}
+
+				/* Currently the only other record type expected is a
+				 * _SAMPLE. Notably this test will need updating if
+				 * i915-perf is extended in the future with additional
+				 * record types.
+				 */
+				igt_assert_eq(header->type, DRM_I915_PERF_RECORD_SAMPLE);
+
+				igt_assert_eq(header->size, sample_size);
+
+				report = (void *)(header + 1);
+
+				/* Don't expect zero for timestamps */
+				igt_assert_neq(report[1], 0);
+
+				igt_debug("report %p:\n", report);
+
+				/* Discard reports not contained in between the
+				 * timestamps we're looking at. */
+				{
+					uint32_t time_delta = report[1] - report0_32[1];
+
+					if (timebase_scale(time_delta) > 1000000000) {
+						skip_reason = "prior first mi-rpc";
+					}
+				}
+
+				{
+					uint32_t time_delta = report[1] - report1_32[1];
+
+					if (timebase_scale(time_delta) <= 1000000000) {
+						igt_debug("    comes after last MI_RPC (%u)\n",
+							  report1_32[1]);
+						report = report1_32;
+					}
+				}
+
+				/* Print out deltas for a few significant
+				 * counters for each report. */
+				if (lprev) {
+					memset(laccumulator.deltas, 0, sizeof(laccumulator.deltas));
+					accumulate_reports(&laccumulator, lprev, report);
+					igt_debug("    deltas: A0=%lu A21=%lu, A26=%lu\n",
+						  laccumulator.deltas[2 + 0], /* skip timestamp + clock cycles */
+						  laccumulator.deltas[2 + 21],
+						  laccumulator.deltas[2 + 26]);
+				}
+				lprev = report;
+
+				/* Print out reason for the report. */
+				reason = ((report[0] >> OAREPORT_REASON_SHIFT) &
+					  OAREPORT_REASON_MASK);
+
+				if (reason & OAREPORT_REASON_CTX_SWITCH) {
+					report_reason = "ctx-load";
+				} else if (reason & OAREPORT_REASON_TIMER) {
+					report_reason = "timer";
+				} else if (reason & OAREPORT_REASON_INTERNAL ||
+					   reason & OAREPORT_REASON_GO ||
+					   reason & OAREPORT_REASON_CLK_RATIO) {
+					report_reason = "internal/go/clk-ratio";
+				} else {
+					report_reason = "end-mi-rpc";
+				}
+				igt_debug("    ctx_id=%u/%x reason=%s oa_timestamp32=%u\n",
+					  report[2], report[2], report_reason, report[1]);
+
+				/* Should we skip this report?
+				 *
+				 *   Only if the current context id of
+				 *   the stream is not the one we want
+				 *   to measure.
+				 */
+				if (current_ctx_id != ctx_id) {
+					skip_reason = "not our context";
+				}
+
+				if (n_invalid_ctx > 1) {
+					skip_reason = "too many invalid context events";
+				}
+
+				if (!skip_reason) {
+					accumulate_reports(&accumulator, prev, report);
+					igt_debug(" -> Accumulated deltas A0=%lu A21=%lu, A26=%lu\n",
+						  accumulator.deltas[2 + 0], /* skip timestamp + clock cycles */
+						  accumulator.deltas[2 + 21],
+						  accumulator.deltas[2 + 26]);
+				} else {
+					igt_debug(" -> Skipping: %s\n", skip_reason);
+				}
+
+
+				/* Finally update current-ctx_id, only possible
+				 * with a valid context id. */
+				if (oa_report_ctx_is_valid(report)) {
+					current_ctx_id = report[2];
+					n_invalid_ctx = 0;
+				} else {
+					n_invalid_ctx++;
+				}
+
+				prev = report;
+
+				if (report == report1_32) {
+					igt_debug("Breaking on end of report\n");
+					print_reports(report0_32, report1_32,
+						      lookup_format(test_oa_format));
+					break;
+				}
+			}
+
+			igt_debug("n samples written = %ld/%lu (%ix%i)\n",
+				  accumulator.deltas[2 + 21],/* skip timestamp + clock cycles */
+				  accumulator.deltas[2 + 26],
+				  width, height);
+			accumulator_print(&accumulator, "filtered");
+
+			ret = drm_intel_bo_map(src[0].bo, false /* write enable */);
+			igt_assert_eq(ret, 0);
+			ret = drm_intel_bo_map(dst[0].bo, false /* write enable */);
+			igt_assert_eq(ret, 0);
+
+			ret = memcmp(src[0].bo->virtual, dst[0].bo->virtual, 4 * width * height);
+			if (ret != 0) {
+				accumulator_print(&accumulator, "total");
+				/* This needs to be investigated... From time
+				 * to time, the work we kick off doesn't seem
+				 * to happen. WTH?? */
+				exit(EAGAIN);
+			}
+			//igt_assert_eq(ret, 0);
+
+			drm_intel_bo_unmap(src[0].bo);
+			drm_intel_bo_unmap(dst[0].bo);
+
+			igt_assert_eq(accumulator.deltas[2 + 26], width * height);
+
+			for (int i = 0; i < ARRAY_SIZE(src); i++) {
+				drm_intel_bo_unreference(src[i].bo);
+				drm_intel_bo_unreference(dst[i].bo);
+			}
+
+			drm_intel_bo_unmap(bo);
+			drm_intel_bo_unreference(bo);
+			intel_batchbuffer_free(batch);
+			drm_intel_gem_context_destroy(context0);
+			drm_intel_gem_context_destroy(context1);
+			drm_intel_bufmgr_destroy(bufmgr);
+			__perf_close(stream_fd);
+		}
+
+		child_ret = igt_wait_helper(&child);
+
+		igt_assert(WEXITSTATUS(child_ret) == EAGAIN ||
+			   WEXITSTATUS(child_ret) == 0);
+
+	} while (WEXITSTATUS(child_ret) == EAGAIN);
+}
+
 static void
 test_rc6_disable(void)
 {
@@ -3280,8 +3986,10 @@ igt_main
 		test_oa_exponents(550);
 	}
 
-	igt_subtest("per-context-mode-unprivileged")
+	igt_subtest("per-context-mode-unprivileged") {
+		igt_require(IS_HASWELL(devid));
 		test_per_context_mode_unprivileged();
+	}
 
 	igt_subtest("buffer-fill")
 		test_buffer_fill();
@@ -3306,15 +4014,20 @@ igt_main
 	igt_subtest("mi-rpc")
 		test_mi_rpc();
 
-	igt_subtest("unprivileged-singled-ctx-counters") {
+	igt_subtest("unprivileged-single-ctx-counters") {
+		igt_require(IS_HASWELL(devid));
+		hsw_test_single_ctx_counters();
+	}
+
+	igt_subtest("gen8-unprivileged-single-ctx-counters") {
 		/* For Gen8+ the OA unit can no longer be made to clock gate
 		 * for a specific context. Additionally the partial-replacement
 		 * functionality to HW filter timer reports for a specific
 		 * context (SKL+) can't stop multiple applications viewing
 		 * system-wide data via MI_REPORT_PERF_COUNT commands.
 		 */
-		igt_require(IS_HASWELL(devid));
-		hsw_test_single_ctx_counters();
+		igt_require(intel_gen(devid) >= 8);
+		gen8_test_single_ctx_render_target_writes_a_counter();
 	}
 
 	igt_subtest("rc6-disable")
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH i-g-t v4 03/12] tests/perf: update max buffer size for reading reports
  2017-08-29 13:55 [PATCH i-g-t v4 00/12] Improve robustness of the i915 perf tests Lionel Landwerlin
  2017-08-29 13:55 ` [PATCH i-g-t v4 01/12] tests/perf: make stream_fd a global variable Lionel Landwerlin
  2017-08-29 13:55 ` [PATCH i-g-t v4 02/12] tests/perf: add per context filtering test for gen8+ Lionel Landwerlin
@ 2017-08-29 13:55 ` Lionel Landwerlin
  2017-08-29 13:55 ` [PATCH i-g-t v4 04/12] tests/perf: rc6: try to guess when rc6 is disabled Lionel Landwerlin
                   ` (10 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Lionel Landwerlin @ 2017-08-29 13:55 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 tests/perf.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/perf.c b/tests/perf.c
index 5058315c..bc5ea133 100644
--- a/tests/perf.c
+++ b/tests/perf.c
@@ -1280,9 +1280,7 @@ read_2_oa_reports(int format_id,
 	/* Note: we allocate a large buffer so that each read() iteration
 	 * should scrape *all* pending records.
 	 *
-	 * The largest buffer the OA unit supports is 16MB and the smallest
-	 * OA report format is 64bytes allowing up to 262144 reports to
-	 * be buffered.
+	 * The largest buffer the OA unit supports is 16MB.
 	 *
 	 * Being sure we are fetching all buffered reports allows us to
 	 * potentially throw away / skip all reports whenever we see
@@ -1295,7 +1293,8 @@ read_2_oa_reports(int format_id,
 	 * to indicate that the OA unit may be over taxed if lots of reports
 	 * are being lost.
 	 */
-	int buf_size = 262144 * (64 + sizeof(struct drm_i915_perf_record_header));
+	int max_reports = (16 * 1024 * 1024) / format_size;
+	int buf_size = sample_size * max_reports * 1.5;
 	uint8_t *buf = malloc(buf_size);
 	int n = 0;
 
@@ -1307,6 +1306,7 @@ read_2_oa_reports(int format_id,
 			;
 
 		igt_assert(len > 0);
+		igt_debug("read %d bytes\n", (int)len);
 
 		for (size_t offset = 0; offset < len; offset += header->size) {
 			const uint32_t *report;
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH i-g-t v4 04/12] tests/perf: rc6: try to guess when rc6 is disabled
  2017-08-29 13:55 [PATCH i-g-t v4 00/12] Improve robustness of the i915 perf tests Lionel Landwerlin
                   ` (2 preceding siblings ...)
  2017-08-29 13:55 ` [PATCH i-g-t v4 03/12] tests/perf: update max buffer size for reading reports Lionel Landwerlin
@ 2017-08-29 13:55 ` Lionel Landwerlin
  2017-08-29 13:55 ` [PATCH i-g-t v4 05/12] tests/perf: remove frequency related changes Lionel Landwerlin
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Lionel Landwerlin @ 2017-08-29 13:55 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 tests/perf.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/perf.c b/tests/perf.c
index bc5ea133..1b441601 100644
--- a/tests/perf.c
+++ b/tests/perf.c
@@ -3445,6 +3445,17 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
 	} while (WEXITSTATUS(child_ret) == EAGAIN);
 }
 
+static bool
+rc6_enabled(void)
+{
+	char *rc6_status = read_debugfs_record(drm_fd, "i915_drpc_info",
+					       "RC6 Enabled");
+	bool enabled = strcmp(rc6_status, "yes") == 0;
+
+	free(rc6_status);
+	return enabled;
+}
+
 static void
 test_rc6_disable(void)
 {
@@ -3464,6 +3475,8 @@ test_rc6_disable(void)
 	};
 	uint64_t n_events_start, n_events_end;
 
+	igt_skip_on(!rc6_enabled());
+
 	stream_fd = __perf_open(drm_fd, &param);
 
 	n_events_start = read_debugfs_u64_record(drm_fd, "i915_drpc_info",
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH i-g-t v4 05/12] tests/perf: remove frequency related changes
  2017-08-29 13:55 [PATCH i-g-t v4 00/12] Improve robustness of the i915 perf tests Lionel Landwerlin
                   ` (3 preceding siblings ...)
  2017-08-29 13:55 ` [PATCH i-g-t v4 04/12] tests/perf: rc6: try to guess when rc6 is disabled Lionel Landwerlin
@ 2017-08-29 13:55 ` Lionel Landwerlin
  2017-08-29 13:55 ` [PATCH i-g-t v4 06/12] tests/perf: rework oa-exponent test Lionel Landwerlin
                   ` (8 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Lionel Landwerlin @ 2017-08-29 13:55 UTC (permalink / raw)
  To: intel-gfx

Experience shows that most of the issues we face with periodicity of
the reports produced by the OA unit are related to power management,
not frequency.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 tests/perf.c | 141 ++++-------------------------------------------------------
 1 file changed, 9 insertions(+), 132 deletions(-)

diff --git a/tests/perf.c b/tests/perf.c
index 1b441601..ca69440d 100644
--- a/tests/perf.c
+++ b/tests/perf.c
@@ -293,12 +293,9 @@ static int card = -1;
 static int n_eus;
 
 static uint64_t test_metric_set_id = UINT64_MAX;
-static uint64_t gt_min_freq_mhz_saved = 0;
-static uint64_t gt_max_freq_mhz_saved = 0;
-static uint64_t gt_min_freq_mhz = 0;
-static uint64_t gt_max_freq_mhz = 0;
 
 static uint64_t timestamp_frequency = 12500000;
+static uint64_t gt_max_freq_mhz = 0;
 static enum drm_i915_oa_format test_oa_format;
 static bool *undefined_a_counters;
 static uint64_t oa_exp_1_millisec;
@@ -402,16 +399,6 @@ sysfs_read(const char *file)
 	return read_u64_file(buf);
 }
 
-static void
-sysfs_write(const char *file, uint64_t val)
-{
-	char buf[512];
-
-	snprintf(buf, sizeof(buf), "/sys/class/drm/card%d/%s", card, file);
-
-	write_u64_file(buf, val);
-}
-
 static char *
 read_debugfs_record(int device, const char *file, const char *key)
 {
@@ -990,54 +977,6 @@ init_sys_info(void)
 	return try_read_u64_file(buf, &test_metric_set_id);
 }
 
-static void
-gt_frequency_range_save(void)
-{
-	gt_min_freq_mhz_saved = sysfs_read("gt_min_freq_mhz");
-	gt_max_freq_mhz_saved = sysfs_read("gt_max_freq_mhz");
-
-	gt_min_freq_mhz = gt_min_freq_mhz_saved;
-	gt_max_freq_mhz = gt_max_freq_mhz_saved;
-}
-
-static void
-gt_frequency_pin(int gt_freq_mhz)
-{
-	igt_debug("requesting pinned GT freq = %dmhz\n", gt_freq_mhz);
-
-	if (gt_freq_mhz > gt_max_freq_mhz) {
-		sysfs_write("gt_max_freq_mhz", gt_freq_mhz);
-		sysfs_write("gt_min_freq_mhz", gt_freq_mhz);
-	} else {
-		sysfs_write("gt_min_freq_mhz", gt_freq_mhz);
-		sysfs_write("gt_max_freq_mhz", gt_freq_mhz);
-	}
-	gt_min_freq_mhz = gt_freq_mhz;
-	gt_max_freq_mhz = gt_freq_mhz;
-}
-
-static void
-gt_frequency_range_restore(void)
-{
-	igt_debug("restoring GT frequency range: min = %dmhz, max =%dmhz, current: min=%dmhz, max=%dmhz\n",
-		  (int)gt_min_freq_mhz_saved,
-		  (int)gt_max_freq_mhz_saved,
-		  (int)gt_min_freq_mhz,
-		  (int)gt_max_freq_mhz);
-
-	/* Assume current min/max are the same */
-	if (gt_min_freq_mhz_saved > gt_max_freq_mhz) {
-		sysfs_write("gt_max_freq_mhz", gt_max_freq_mhz_saved);
-		sysfs_write("gt_min_freq_mhz", gt_min_freq_mhz_saved);
-	} else {
-		sysfs_write("gt_min_freq_mhz", gt_min_freq_mhz_saved);
-		sysfs_write("gt_max_freq_mhz", gt_max_freq_mhz_saved);
-	}
-
-	gt_min_freq_mhz = gt_min_freq_mhz_saved;
-	gt_max_freq_mhz = gt_max_freq_mhz_saved;
-}
-
 static int
 i915_read_reports_until_timestamp(enum drm_i915_oa_format oa_format,
 				  uint8_t *buf,
@@ -1614,33 +1553,9 @@ test_oa_formats(void)
 }
 
 static void
-test_oa_exponents(int gt_freq_mhz)
+test_oa_exponents(void)
 {
-	uint32_t freq_margin;
-
-	/* This test tries to use the sysfs interface for pinning the GT
-	 * frequency so we have another point of reference for comparing with
-	 * the clock frequency as derived from OA reports.
-	 *
-	 * This test has been finicky to stabilise while the
-	 * gt_min/max_freq_mhz files in sysfs don't seem to be a reliable
-	 * mechanism for fixing the gpu frequency.
-	 *
-	 * Since these unit tests are focused on the OA unit not the ability to
-	 * pin the frequency via sysfs we make the test account for pinning not
-	 * being reliable and read back the current frequency for each
-	 * iteration of this test to take this into account.
-	 */
-	gt_frequency_pin(gt_freq_mhz);
-
-	igt_debug("Testing OA timer exponents with requested GT frequency = %dmhz\n",
-		  gt_freq_mhz);
-
-	/* allow a +- 10% error margin when checking that the frequency
-	 * calculated from the OA reports matches the frequency according to
-	 * sysfs.
-	 */
-	freq_margin = gt_freq_mhz * 0.1;
+	igt_debug("Testing OA timer exponents\n");
 
 	/* It's asking a lot to sample with a 160 nanosecond period and the
 	 * test can fail due to buffer overflows if it wasn't possible to
@@ -1655,7 +1570,6 @@ test_oa_exponents(int gt_freq_mhz)
 		uint32_t clock_delta;
 		uint32_t freq;
 		int n_tested = 0;
-		int n_freq_matches = 0;
 
 		/* The exponent is effectively selecting a bit in the timestamp
 		 * to trigger reports on and so in practice we expect the raw
@@ -1665,15 +1579,10 @@ test_oa_exponents(int gt_freq_mhz)
 		expected_timestamp_delta = 2 << i;
 
 		for (int j = 0; n_tested < 10 && j < 100; j++) {
-			int gt_freq_mhz_0, gt_freq_mhz_1;
 			uint32_t ticks0, ticks1;
 
-			gt_freq_mhz_0 = sysfs_read("gt_act_freq_mhz");
-
-			igt_debug("ITER %d: testing OA exponent %d (period = %"PRIu64"ns) with sysfs GT freq = %dmhz +- %u\n",
-				  j, i,
-				  oa_exponent_to_ns(i),
-				  gt_freq_mhz_0, freq_margin);
+			igt_debug("ITER %d: testing OA exponent %d (period = %"PRIu64"ns)\n",
+				  j, i, oa_exponent_to_ns(i));
 
 			open_and_read_2_oa_reports(test_oa_format,
 						   i, /* exponent */
@@ -1682,16 +1591,6 @@ test_oa_exponents(int gt_freq_mhz)
 						   true); /* timer triggered
 							     reports only */
 
-			gt_freq_mhz_1 = sysfs_read("gt_act_freq_mhz");
-
-			/* If it looks like the frequency has changed according
-			 * to sysfs then skip looking at this pair of reports
-			 */
-			if (gt_freq_mhz_0 != gt_freq_mhz_1) {
-				igt_debug("skipping OA reports pair due to GT frequency change according to sysfs\n");
-				continue;
-			}
-
 			timestamp_delta = oa_report1[1] - oa_report0[1];
 			igt_assert_neq(timestamp_delta, 0);
 
@@ -1714,31 +1613,13 @@ test_oa_exponents(int gt_freq_mhz)
 			igt_debug("ITER %d: time delta = %"PRIu32"(ns) clock delta = %"PRIu32" freq = %"PRIu32"(mhz)\n",
 				  j, time_delta, clock_delta, freq);
 
-                        if (freq < (gt_freq_mhz_1 + freq_margin) &&
-                            freq > (gt_freq_mhz_1 - freq_margin))
-				n_freq_matches++;
-
 			n_tested++;
 		}
 
 		if (n_tested < 10)
 			igt_debug("sysfs frequency pinning too unstable for cross-referencing with OA derived frequency");
 		igt_assert_eq(n_tested, 10);
-
-		igt_debug("number of iterations with expected clock frequency = %d\n",
-			  n_freq_matches);
-
-		/* Don't assert the calculated frequency for extremely short
-		 * durations.
-		 *
-		 * Allow some mismatches since can't be can't be sure about
-		 * frequency changes between sysfs reads.
-		 */
-		if (i > 3)
-			igt_assert(n_freq_matches >= 7);
 	}
-
-	gt_frequency_range_restore();
 }
 
 /* The OA exponent selects a timestamp counter bit to trigger reports on.
@@ -3963,11 +3844,11 @@ igt_main
 
 		igt_require(init_sys_info());
 
-		gt_frequency_range_save();
-
 		write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
 		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
 
+		gt_max_freq_mhz = sysfs_read("gt_boost_freq_mhz");
+
 		render_copy = igt_get_render_copyfunc(devid);
 		igt_require_f(render_copy, "no render-copy function\n");
 	}
@@ -3994,10 +3875,8 @@ igt_main
 		test_invalid_oa_exponent();
 	igt_subtest("low-oa-exponent-permissions")
 		test_low_oa_exponent_permissions();
-	igt_subtest("oa-exponents") {
-		test_oa_exponents(450);
-		test_oa_exponents(550);
-	}
+	igt_subtest("oa-exponents")
+		test_oa_exponents();
 
 	igt_subtest("per-context-mode-unprivileged") {
 		igt_require(IS_HASWELL(devid));
@@ -4063,8 +3942,6 @@ igt_main
 		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
 		write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
 
-		gt_frequency_range_restore();
-
 		close(drm_fd);
 	}
 }
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH i-g-t v4 06/12] tests/perf: rework oa-exponent test
  2017-08-29 13:55 [PATCH i-g-t v4 00/12] Improve robustness of the i915 perf tests Lionel Landwerlin
                   ` (4 preceding siblings ...)
  2017-08-29 13:55 ` [PATCH i-g-t v4 05/12] tests/perf: remove frequency related changes Lionel Landwerlin
@ 2017-08-29 13:55 ` Lionel Landwerlin
  2017-08-29 13:55 ` [PATCH i-g-t v4 07/12] tests/perf: make enable-disable more reliable Lionel Landwerlin
                   ` (7 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Lionel Landwerlin @ 2017-08-29 13:55 UTC (permalink / raw)
  To: intel-gfx

New issues that were discovered while making the tests work on Gen8+ :

 - we need to measure timings between periodic reports and discard all
   other kind of reports

 - it seems periodicity of the reports can be affected outside of RC6
   (frequency change), we can detect this by looking at the amount of
   clock cycles per timestamp deltas

v2: Drop some unused variables (Matthew)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 tests/perf.c | 733 ++++++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 599 insertions(+), 134 deletions(-)

diff --git a/tests/perf.c b/tests/perf.c
index ca69440d..efaa4a06 100644
--- a/tests/perf.c
+++ b/tests/perf.c
@@ -28,6 +28,7 @@
 #include <fcntl.h>
 #include <inttypes.h>
 #include <errno.h>
+#include <signal.h>
 #include <sys/stat.h>
 #include <sys/time.h>
 #include <sys/times.h>
@@ -306,6 +307,25 @@ static uint32_t (*read_report_ticks)(uint32_t *report,
 static void (*sanity_check_reports)(uint32_t *oa_report0, uint32_t *oa_report1,
 				    enum drm_i915_oa_format format);
 
+static bool
+timestamp_delta_within(uint32_t delta,
+		       uint32_t expected_delta,
+		       uint32_t margin)
+{
+	return delta >= (expected_delta - margin) &&
+	       delta <= (expected_delta + margin);
+}
+
+static bool
+double_value_within(double value,
+		    double expected,
+		    double percent_margin)
+{
+	return value >= (expected - expected * percent_margin / 100.0) &&
+	       value <= (expected + expected * percent_margin / 100.0);
+
+}
+
 static void
 __perf_close(int fd)
 {
@@ -472,6 +492,20 @@ gen8_read_report_ticks(uint32_t *report, enum drm_i915_oa_format format)
 	return report[3];
 }
 
+static void
+gen8_read_report_clock_ratios(uint32_t *report,
+			      uint32_t *slice_freq_mhz,
+			      uint32_t *unslice_freq_mhz)
+{
+	uint32_t unslice_freq = report[0] & 0x1ff;
+	uint32_t slice_freq_low = (report[0] >> 25) & 0x7f;
+	uint32_t slice_freq_high = (report[0] >> 9) & 0x3;
+	uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7);
+
+	*slice_freq_mhz = (slice_freq * 16666) / 1000;
+	*unslice_freq_mhz = (unslice_freq * 16666) / 1000;
+}
+
 static const char *
 gen8_read_report_reason(const uint32_t *report)
 {
@@ -494,29 +528,6 @@ gen8_read_report_reason(const uint32_t *report)
 		return "unknown";
 }
 
-static bool
-oa_report_is_periodic(uint32_t oa_exponent, const uint32_t *report)
-{
-	if (IS_HASWELL(devid)) {
-		/* For Haswell we don't have a documented report reason field
-		 * (though empirically report[0] bit 10 does seem to correlate
-		 * with a timer trigger reason) so we instead infer which
-		 * reports are timer triggered by checking if the least
-		 * significant bits are zero and the exponent bit is set.
-		 */
-		uint32_t oa_exponent_mask = (1 << (oa_exponent + 1)) - 1;
-
-		if ((report[1] & oa_exponent_mask) != (1 << oa_exponent))
-			return true;
-	} else {
-		if ((report[0] >> OAREPORT_REASON_SHIFT) &
-		    OAREPORT_REASON_TIMER)
-			return true;
-	}
-
-	return false;
-}
-
 static uint64_t
 timebase_scale(uint32_t u32_delta)
 {
@@ -563,6 +574,29 @@ oa_exponent_to_ns(int exponent)
        return 1000000000ULL * (2ULL << exponent) / timestamp_frequency;
 }
 
+static bool
+oa_report_is_periodic(uint32_t oa_exponent, const uint32_t *report)
+{
+	if (IS_HASWELL(devid)) {
+		/* For Haswell we don't have a documented report reason field
+		 * (though empirically report[0] bit 10 does seem to correlate
+		 * with a timer trigger reason) so we instead infer which
+		 * reports are timer triggered by checking if the least
+		 * significant bits are zero and the exponent bit is set.
+		 */
+		uint32_t oa_exponent_mask = (1 << (oa_exponent + 1)) - 1;
+
+		if ((report[1] & oa_exponent_mask) == (1 << oa_exponent))
+			return true;
+	} else {
+		if ((report[0] >> OAREPORT_REASON_SHIFT) &
+		    OAREPORT_REASON_TIMER)
+			return true;
+	}
+
+	return false;
+}
+
 static bool
 oa_report_ctx_is_valid(uint32_t *report)
 {
@@ -578,6 +612,128 @@ oa_report_ctx_is_valid(uint32_t *report)
 	igt_assert(!"reached");
 }
 
+static uint32_t
+oa_report_get_ctx_id(uint32_t *report)
+{
+	if (!oa_report_ctx_is_valid(report))
+		return 0xffffffff;
+	return report[2];
+}
+
+static double
+oa_reports_tick_per_period(uint32_t *report0, uint32_t *report1)
+{
+	if (intel_gen(devid) < 8)
+		return 0.0;
+
+	/* Measure the number GPU tick delta to timestamp delta. */
+	return (double) (report1[3] - report0[3]) /
+	       (double) (report1[1] - report0[1]);
+}
+
+static void
+scratch_buf_memset(drm_intel_bo *bo, int width, int height, uint32_t color)
+{
+	int ret;
+
+	ret = drm_intel_bo_map(bo, true /* writable */);
+	igt_assert_eq(ret, 0);
+
+	for (int i = 0; i < width * height; i++)
+		((uint32_t *)bo->virtual)[i] = color;
+
+	drm_intel_bo_unmap(bo);
+}
+
+static void
+scratch_buf_init(drm_intel_bufmgr *bufmgr,
+		 struct igt_buf *buf,
+		 int width, int height,
+		 uint32_t color)
+{
+	size_t stride = width * 4;
+	size_t size = stride * height;
+	drm_intel_bo *bo = drm_intel_bo_alloc(bufmgr, "", size, 4096);
+
+	scratch_buf_memset(bo, width, height, color);
+
+	buf->bo = bo;
+	buf->stride = stride;
+	buf->tiling = I915_TILING_NONE;
+	buf->size = size;
+}
+
+static void
+emit_report_perf_count(struct intel_batchbuffer *batch,
+		       drm_intel_bo *dst_bo,
+		       int dst_offset,
+		       uint32_t report_id)
+{
+	if (IS_HASWELL(devid)) {
+		BEGIN_BATCH(3, 1);
+		OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT);
+		OUT_RELOC(dst_bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+			  dst_offset);
+		OUT_BATCH(report_id);
+		ADVANCE_BATCH();
+	} else {
+		/* XXX: NB: n dwords arg is actually magic since it internally
+		 * automatically accounts for larger addresses on gen >= 8...
+		 */
+		BEGIN_BATCH(3, 1);
+		OUT_BATCH(GEN8_MI_REPORT_PERF_COUNT);
+		OUT_RELOC(dst_bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+			  dst_offset);
+		OUT_BATCH(report_id);
+		ADVANCE_BATCH();
+	}
+}
+
+static uint32_t
+i915_get_one_gpu_timestamp(uint32_t *context_id)
+{
+	drm_intel_bufmgr *bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
+	drm_intel_context *mi_rpc_ctx = drm_intel_gem_context_create(bufmgr);
+	drm_intel_bo *mi_rpc_bo = drm_intel_bo_alloc(bufmgr, "mi_rpc dest bo", 4096, 64);
+	struct intel_batchbuffer *mi_rpc_batch = intel_batchbuffer_alloc(bufmgr, devid);
+	int ret;
+	uint32_t timestamp;
+
+	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+	if (context_id) {
+		ret = drm_intel_gem_context_get_id(mi_rpc_ctx, context_id);
+		igt_assert_eq(ret, 0);
+	}
+
+	igt_assert(mi_rpc_ctx);
+	igt_assert(mi_rpc_bo);
+	igt_assert(mi_rpc_batch);
+
+	ret = drm_intel_bo_map(mi_rpc_bo, true);
+	igt_assert_eq(ret, 0);
+	memset(mi_rpc_bo->virtual, 0x80, 4096);
+	drm_intel_bo_unmap(mi_rpc_bo);
+
+	emit_report_perf_count(mi_rpc_batch,
+			       mi_rpc_bo, /* dst */
+			       0, /* dst offset in bytes */
+			       0xdeadbeef); /* report ID */
+
+	intel_batchbuffer_flush_with_context(mi_rpc_batch, mi_rpc_ctx);
+
+	ret = drm_intel_bo_map(mi_rpc_bo, false /* write enable */);
+	igt_assert_eq(ret, 0);
+	timestamp = ((uint32_t *)mi_rpc_bo->virtual)[1];
+	drm_intel_bo_unmap(mi_rpc_bo);
+
+	drm_intel_bo_unreference(mi_rpc_bo);
+	intel_batchbuffer_free(mi_rpc_batch);
+	drm_intel_gem_context_destroy(mi_rpc_ctx);
+	drm_intel_bufmgr_destroy(bufmgr);
+
+	return timestamp;
+}
 
 static void
 hsw_sanity_check_render_basic_reports(uint32_t *oa_report0, uint32_t *oa_report1,
@@ -1032,7 +1188,6 @@ i915_read_reports_until_timestamp(enum drm_i915_oa_format oa_format,
 	return total_len;
 }
 
-
 /* CAP_SYS_ADMIN is required to open system wide metrics, unless the system
  * control parameter dev.i915.perf_stream_paranoid == 0 */
 static void
@@ -1347,20 +1502,6 @@ open_and_read_2_oa_reports(int format_id,
 	__perf_close(stream_fd);
 }
 
-static void
-gen8_read_report_clock_ratios(uint32_t *report,
-			      uint32_t *slice_freq_mhz,
-			      uint32_t *unslice_freq_mhz)
-{
-	uint32_t unslice_freq = report[0] & 0x1ff;
-	uint32_t slice_freq_low = (report[0] >> 25) & 0x7f;
-	uint32_t slice_freq_high = (report[0] >> 9) & 0x3;
-	uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7);
-
-	*slice_freq_mhz = (slice_freq * 16666) / 1000;
-	*unslice_freq_mhz = (unslice_freq * 16666) / 1000;
-}
-
 static void
 print_reports(uint32_t *oa_report0, uint32_t *oa_report1, int fmt)
 {
@@ -1552,74 +1693,456 @@ test_oa_formats(void)
 	}
 }
 
+
+enum load {
+	LOW,
+	HIGH
+};
+
+#define LOAD_HELPER_PAUSE_USEC 500
+
+static struct load_helper {
+	int devid;
+	int has_ppgtt;
+	drm_intel_bufmgr *bufmgr;
+	drm_intel_context *context;
+	uint32_t context_id;
+	struct intel_batchbuffer *batch;
+	enum load load;
+	bool exit;
+	struct igt_helper_process igt_proc;
+	struct igt_buf src, dst;
+} lh = { 0, };
+
+static void load_helper_signal_handler(int sig)
+{
+	if (sig == SIGUSR2)
+		lh.load = lh.load == LOW ? HIGH : LOW;
+	else
+		lh.exit = true;
+}
+
+static void load_helper_set_load(enum load load)
+{
+	igt_assert(lh.igt_proc.running);
+
+	if (lh.load == load)
+		return;
+
+	lh.load = load;
+	kill(lh.igt_proc.pid, SIGUSR2);
+}
+
+static void load_helper_run(enum load load)
+{
+	/*
+	 * FIXME fork helpers won't get cleaned up when started from within a
+	 * subtest, so handle the case where it sticks around a bit too long.
+	 */
+	if (lh.igt_proc.running) {
+		load_helper_set_load(load);
+		return;
+	}
+
+	lh.load = load;
+
+	igt_fork_helper(&lh.igt_proc) {
+		signal(SIGUSR1, load_helper_signal_handler);
+		signal(SIGUSR2, load_helper_signal_handler);
+
+		while (!lh.exit) {
+			int ret;
+
+			render_copy(lh.batch,
+				    lh.context,
+				    &lh.src, 0, 0, 1920, 1080,
+				    &lh.dst, 0, 0);
+
+			intel_batchbuffer_flush_with_context(lh.batch,
+							     lh.context);
+
+			ret = drm_intel_gem_context_get_id(lh.context,
+							   &lh.context_id);
+			igt_assert_eq(ret, 0);
+
+			drm_intel_bo_wait_rendering(lh.dst.bo);
+
+			/* Lower the load by pausing after every submitted
+			 * write. */
+			if (lh.load == LOW)
+				usleep(LOAD_HELPER_PAUSE_USEC);
+		}
+	}
+}
+
+static void load_helper_stop(void)
+{
+	kill(lh.igt_proc.pid, SIGUSR1);
+	igt_assert(igt_wait_helper(&lh.igt_proc) == 0);
+}
+
+static void load_helper_init(void)
+{
+	int ret;
+
+	lh.devid = intel_get_drm_devid(drm_fd);
+	lh.has_ppgtt = gem_uses_ppgtt(drm_fd);
+
+	/* MI_STORE_DATA can only use GTT address on gen4+/g33 and needs
+	 * snoopable mem on pre-gen6. Hence load-helper only works on gen6+, but
+	 * that's also all we care about for the rps testcase*/
+	igt_assert(intel_gen(lh.devid) >= 6);
+	lh.bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
+	igt_assert(lh.bufmgr);
+
+	drm_intel_bufmgr_gem_enable_reuse(lh.bufmgr);
+
+	lh.context = drm_intel_gem_context_create(lh.bufmgr);
+	igt_assert(lh.context);
+
+	lh.context_id = 0xffffffff;
+	ret = drm_intel_gem_context_get_id(lh.context, &lh.context_id);
+	igt_assert_eq(ret, 0);
+	igt_assert_neq(lh.context_id, 0xffffffff);
+
+	lh.batch = intel_batchbuffer_alloc(lh.bufmgr, lh.devid);
+	igt_assert(lh.batch);
+
+	scratch_buf_init(lh.bufmgr, &lh.dst, 1920, 1080, 0);
+	scratch_buf_init(lh.bufmgr, &lh.src, 1920, 1080, 0);
+}
+
+static void load_helper_fini(void)
+{
+	if (lh.igt_proc.running)
+		load_helper_stop();
+
+	if (lh.src.bo)
+		drm_intel_bo_unreference(lh.src.bo);
+	if (lh.dst.bo)
+		drm_intel_bo_unreference(lh.dst.bo);
+
+	if (lh.batch)
+		intel_batchbuffer_free(lh.batch);
+
+	if (lh.context)
+		drm_intel_gem_context_destroy(lh.context);
+
+	if (lh.bufmgr)
+		drm_intel_bufmgr_destroy(lh.bufmgr);
+}
+
 static void
 test_oa_exponents(void)
 {
-	igt_debug("Testing OA timer exponents\n");
+	load_helper_init();
+	load_helper_run(HIGH);
 
 	/* It's asking a lot to sample with a 160 nanosecond period and the
 	 * test can fail due to buffer overflows if it wasn't possible to
 	 * keep up, so we don't start from an exponent of zero...
 	 */
-	for (int i = 5; i < 20; i++) {
-		uint32_t expected_timestamp_delta;
-		uint32_t timestamp_delta;
-		uint32_t oa_report0[64];
-		uint32_t oa_report1[64];
+	for (int exponent = 5; exponent < 18; exponent++) {
+		uint64_t expected_timestamp_delta;
 		uint32_t time_delta;
-		uint32_t clock_delta;
-		uint32_t freq;
 		int n_tested = 0;
+		int n_time_delta_matches = 0;
 
 		/* The exponent is effectively selecting a bit in the timestamp
 		 * to trigger reports on and so in practice we expect the raw
 		 * timestamp deltas for periodic reports to exactly match the
 		 * value of next bit.
 		 */
-		expected_timestamp_delta = 2 << i;
+		expected_timestamp_delta = 2UL << exponent;
 
 		for (int j = 0; n_tested < 10 && j < 100; j++) {
-			uint32_t ticks0, ticks1;
-
-			igt_debug("ITER %d: testing OA exponent %d (period = %"PRIu64"ns)\n",
-				  j, i, oa_exponent_to_ns(i));
-
-			open_and_read_2_oa_reports(test_oa_format,
-						   i, /* exponent */
-						   oa_report0,
-						   oa_report1,
-						   true); /* timer triggered
-							     reports only */
-
-			timestamp_delta = oa_report1[1] - oa_report0[1];
-			igt_assert_neq(timestamp_delta, 0);
-
-			if (timestamp_delta != expected_timestamp_delta) {
-				igt_debug("timestamp0 = %u/0x%x\n",
-					  oa_report0[1], oa_report0[1]);
-				igt_debug("timestamp1 = %u/0x%x\n",
-					  oa_report1[1], oa_report1[1]);
+			uint64_t properties[] = {
+				/* Include OA reports in samples */
+				DRM_I915_PERF_PROP_SAMPLE_OA, true,
+
+				/* OA unit configuration */
+				DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
+				DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
+				DRM_I915_PERF_PROP_OA_EXPONENT, exponent,
+			};
+			struct drm_i915_perf_open_param param = {
+				.flags = I915_PERF_FLAG_FD_CLOEXEC,
+				.num_properties = ARRAY_SIZE(properties) / 2,
+				.properties_ptr = to_user_pointer(properties),
+			};
+			int ret;
+			uint64_t average_timestamp_delta;
+			uint32_t n_reports = 0;
+			uint32_t n_idle_reports = 0;
+			uint32_t n_reads = 0;
+			uint32_t context_id;
+			uint64_t first_timestamp = 0;
+			bool check_first_timestamp = true;
+			struct drm_i915_perf_record_header *header;
+			uint64_t delta_delta;
+			struct {
+				uint32_t report[64];
+			} reports[30];
+			struct {
+				uint8_t *buf;
+				size_t len;
+			} reads[1000];
+			double error;
+			double tick_per_period;
+
+			igt_debug("ITER %d: testing OA exponent %d,"
+				  " expected ts delta = %"PRIu64" (%"PRIu64"ns/%.2fus/%.2fms)\n",
+				  j, exponent,
+				  expected_timestamp_delta,
+				  oa_exponent_to_ns(exponent),
+				  oa_exponent_to_ns(exponent) / 1000.0,
+				  oa_exponent_to_ns(exponent) / (1000.0 * 1000.0));
+
+			stream_fd = __perf_open(drm_fd, &param);
+
+			/* Right after opening the OA stream, read a
+			 * first timestamp as way to filter previously
+			 * scheduled work that would have configured
+			 * the OA unit at a different period. */
+			first_timestamp = i915_get_one_gpu_timestamp(&context_id);
+
+			while (n_reads < ARRAY_SIZE(reads) &&
+			       n_reports < ARRAY_SIZE(reports)) {
+				const size_t buf_size = 1024 * 1024;
+				uint8_t *buf = reads[n_reads++].buf = calloc(1, buf_size);
+
+				while ((ret = read(stream_fd, buf, buf_size)) < 0 &&
+				       errno == EINTR)
+					;
+
+				/* We should never have no data. */
+				igt_assert(ret > 0);
+				reads[n_reads - 1].len = ret;
+
+				igt_debug(" > read %i bytes\n", ret);
+
+				for (int offset = 0;
+				     offset < ret && n_reports < ARRAY_SIZE(reports);
+				     offset += header->size) {
+					uint32_t *report;
+					double previous_tick_per_period;
+
+					header = (void *)(buf + offset);
+
+					if (header->type == DRM_I915_PERF_RECORD_OA_BUFFER_LOST) {
+						igt_assert(!"reached");
+						break;
+					}
+
+					if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST) {
+						n_reports = 0;
+						n_idle_reports = 0;
+						for (int r = 0; r < n_reads; r++)
+							free(reads[r].buf);
+						n_reads = 0;
+						break;
+					}
+
+					if (header->type != DRM_I915_PERF_RECORD_SAMPLE)
+						continue;
+
+					report = (void *)(header + 1);
+
+					/* Skip anything before the first
+					 * timestamp, it might not be at the
+					 * right periodic exponent. */
+					if (check_first_timestamp &&
+					    report[1] < first_timestamp) {
+						igt_debug(" > Dropping ts=%u (prior %"PRIu64")\n",
+							  report[1], first_timestamp);
+						continue;
+					}
+
+					/* Once we've passed the first
+					 * timestamp, no need to check. */
+					check_first_timestamp = false;
+
+					if (!oa_report_ctx_is_valid(report))
+						n_idle_reports++;
+
+					/* We only measure timestamps between
+					 * periodic reports. */
+					if (!oa_report_is_periodic(exponent, report))
+						continue;
+
+					igt_debug(" > write %i timestamp=%u\n", n_reports, report[1]);
+					memcpy(reports[n_reports].report, report,
+					       sizeof(reports[n_reports].report));
+					n_reports++;
+
+					previous_tick_per_period = tick_per_period;
+
+					if (n_reports > 1) {
+						tick_per_period =
+							oa_reports_tick_per_period(reports[n_reports - 2].report,
+										   reports[n_reports - 1].report);
+
+						/* Dismiss the series of report
+						 * if we notice clock frequency
+						 * changes. */
+						if (!double_value_within(tick_per_period,
+									 previous_tick_per_period, 5)) {
+								igt_debug("Noticed clock frequency change at ts=%u (%f / %f), "
+									  "dropping reports and trying again\n",
+									  report[1], previous_tick_per_period, tick_per_period);
+								n_reports = 0;
+								n_idle_reports = 0;
+								for (int r = 0; r < n_reads; r++)
+									free(reads[r].buf);
+								n_reads = 0;
+								break;
+						}
+					}
+				}
 			}
 
-			igt_assert_eq(timestamp_delta, expected_timestamp_delta);
+			__perf_close(stream_fd);
+			igt_debug("closed stream\n");
 
-			ticks0 = read_report_ticks(oa_report0, test_oa_format);
-			ticks1 = read_report_ticks(oa_report1, test_oa_format);
-			clock_delta = ticks1 - ticks0;
+			igt_assert_eq(n_reports, ARRAY_SIZE(reports));
 
-			time_delta = timebase_scale(timestamp_delta);
+			average_timestamp_delta = 0;
+			for (int i = 0; i < (n_reports - 1); i++) {
+				/* XXX: calculating with u32 arithmetic to account for overflow */
+				uint32_t u32_delta = reports[i + 1].report[1] - reports[i].report[1];
 
-			freq = ((uint64_t)clock_delta * 1000) / time_delta;
-			igt_debug("ITER %d: time delta = %"PRIu32"(ns) clock delta = %"PRIu32" freq = %"PRIu32"(mhz)\n",
-				  j, time_delta, clock_delta, freq);
+				average_timestamp_delta += u32_delta;
+			}
+			average_timestamp_delta /= (n_reports - 1);
+
+			if (average_timestamp_delta > expected_timestamp_delta)
+				delta_delta  = average_timestamp_delta - expected_timestamp_delta;
+			else
+				delta_delta = expected_timestamp_delta - average_timestamp_delta;
+			error = (delta_delta / (double)expected_timestamp_delta) * 100.0;
+
+			time_delta = timebase_scale(average_timestamp_delta);
+
+			igt_debug(" > Avg. time delta = %"PRIu32"(ns),"
+				  " n idle reports = %u, n reads = %u, error=%f\n",
+				  time_delta, n_idle_reports, n_reads, error);
+			if (error > 5) {
+				uint32_t *rpt = NULL, *last = NULL, *last_periodic = NULL;
+
+				igt_debug(" > More than 5%% error: avg_ts_delta = %"PRIu64", delta_delta = %"PRIu64", "
+					  "expected_delta = %"PRIu64", first_timestamp = %"PRIu64" ctx_id=%"PRIu32"\n",
+					  average_timestamp_delta, delta_delta, expected_timestamp_delta, first_timestamp, context_id);
+				for (int i = 0; i < (n_reports - 1); i++) {
+					/* XXX: calculating with u32 arithmetic to account for overflow */
+					uint32_t u32_delta =
+						reports[i + 1].report[1] - reports[i].report[1];
+
+					if (u32_delta > expected_timestamp_delta)
+						delta_delta  = u32_delta - expected_timestamp_delta;
+					else
+						delta_delta = expected_timestamp_delta - u32_delta;
+					error = (delta_delta / (double)expected_timestamp_delta) * 100.0;
+
+					igt_debug(" > ts=%u-%u timestamp delta from %2d to %2d = %-8u (error = %u%%, ctx_id = %x)\n",
+						  reports[i + 1].report[1], reports[i].report[1],
+						  i, i + 1, u32_delta, (unsigned)error,
+						  oa_report_get_ctx_id(reports[i + 1].report));
+				}
+				for (int r = 0; r < n_reads; r++) {
+					igt_debug(" > read\n");
+					for (int offset = 0;
+					     offset < reads[r].len;
+					     offset += header->size) {
+						/* Just a random counter,
+						 * helpful to notice
+						 * inconsistency in debug.
+						 */
+						int counter_print = 13;
+						uint64_t a0 = 0, aN = 0;
+						double local_period = 0;
+
+						header = (void *) &reads[r].buf[offset];
+
+						if (header->type != DRM_I915_PERF_RECORD_SAMPLE) {
+							igt_debug(" > loss\n");
+							continue;
+						}
+
+						rpt = (void *)(header + 1);
+
+						if (last) {
+							a0 = gen8_read_40bit_a_counter(rpt, test_oa_format, 0) -
+								gen8_read_40bit_a_counter(last, test_oa_format, 0);
+							aN = gen8_read_40bit_a_counter(rpt, test_oa_format, counter_print) -
+								gen8_read_40bit_a_counter(last, test_oa_format, counter_print);
+						}
+
+						if (last_periodic &&
+						    intel_gen(devid) >= 8 &&
+						    oa_report_is_periodic(exponent, rpt)) {
+							local_period =
+								((uint64_t) rpt[3] - last_periodic[3])  /
+								((uint64_t) rpt[1] - last_periodic[1]);
+						}
+
+						igt_debug(" > report ts=%u"
+							  " ts_delta_last=%8u ts_delta_last_periodic=%8u is_timer=%i ctx_id=%8x gpu_ticks=%u period=%.2f A0=%lu A%i=%lu\n",
+							  rpt[1],
+							  (last != NULL) ? (rpt[1] - last[1]) : 0,
+							  (last_periodic != NULL) ? (rpt[1] - last_periodic[1]) : 0,
+							  oa_report_is_periodic(exponent, rpt),
+							  oa_report_get_ctx_id(rpt),
+							  (last != NULL) ? (rpt[3] - last[3]) : 0,
+							  local_period,
+							  a0, counter_print, aN);
+
+						last = rpt;
+						if (oa_report_is_periodic(exponent, rpt))
+							last_periodic = rpt;
+					}
+				}
+
+				igt_assert(!"reached");
+			}
+
+			if (timestamp_delta_within(average_timestamp_delta,
+						   expected_timestamp_delta,
+						   expected_timestamp_delta * 0.05)) {
+				igt_debug(" > timestamp delta matching %"PRIu64"ns ~= expected %"PRIu64"! :)\n",
+					  timebase_scale(average_timestamp_delta),
+					  timebase_scale(expected_timestamp_delta));
+				n_time_delta_matches++;
+			} else {
+				igt_debug(" > timestamp delta mismatch: %"PRIu64"ns != expected %"PRIu64"ns\n",
+					  timebase_scale(average_timestamp_delta),
+					  timebase_scale(expected_timestamp_delta));
+				igt_assert(average_timestamp_delta <
+					   (expected_timestamp_delta * 2));
+			}
 
 			n_tested++;
+
+			for (int r = 0; r < n_reads; r++)
+				free(reads[r].buf);
 		}
 
 		if (n_tested < 10)
-			igt_debug("sysfs frequency pinning too unstable for cross-referencing with OA derived frequency");
+			igt_debug("Too many test iterations had to be skipped\n");
 		igt_assert_eq(n_tested, 10);
+
+		igt_debug("Number of iterations with expected timestamp delta = %d\n",
+			  n_time_delta_matches);
+
+		/* The HW doesn't give us any strict guarantee that the
+		 * timestamps are exactly aligned with the exponent mask but
+		 * in practice it seems very rare for that not to be the case
+		 * so it a useful sanity check to assert quite strictly...
+		 */
+		igt_assert(n_time_delta_matches >= 9);
 	}
+
+	load_helper_stop();
+	load_helper_fini();
 }
 
 /* The OA exponent selects a timestamp counter bit to trigger reports on.
@@ -2485,32 +3008,6 @@ test_disabled_read_error(void)
 	__perf_close(stream_fd);
 }
 
-static void
-emit_report_perf_count(struct intel_batchbuffer *batch,
-		       drm_intel_bo *dst_bo,
-		       int dst_offset,
-		       uint32_t report_id)
-{
-	if (IS_HASWELL(devid)) {
-		BEGIN_BATCH(3, 1);
-		OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT);
-		OUT_RELOC(dst_bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-			  dst_offset);
-		OUT_BATCH(report_id);
-		ADVANCE_BATCH();
-	} else {
-		/* XXX: NB: n dwords arg is actually magic since it internally
-		 * automatically accounts for larger addresses on gen >= 8...
-		 */
-		BEGIN_BATCH(3, 1);
-		OUT_BATCH(GEN8_MI_REPORT_PERF_COUNT);
-		OUT_RELOC(dst_bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-			  dst_offset);
-		OUT_BATCH(report_id);
-		ADVANCE_BATCH();
-	}
-}
-
 static void
 test_mi_rpc(void)
 {
@@ -2580,38 +3077,6 @@ test_mi_rpc(void)
 	__perf_close(stream_fd);
 }
 
-static void
-scratch_buf_memset(drm_intel_bo *bo, int width, int height, uint32_t color)
-{
-	int ret;
-
-	ret = drm_intel_bo_map(bo, true /* writable */);
-	igt_assert_eq(ret, 0);
-
-	for (int i = 0; i < width * height; i++)
-		((uint32_t *)bo->virtual)[i] = color;
-
-	drm_intel_bo_unmap(bo);
-}
-
-static void
-scratch_buf_init(drm_intel_bufmgr *bufmgr,
-		 struct igt_buf *buf,
-		 int width, int height,
-		 uint32_t color)
-{
-	size_t stride = width * 4;
-	size_t size = stride * height;
-	drm_intel_bo *bo = drm_intel_bo_alloc(bufmgr, "", size, 4096);
-
-	scratch_buf_memset(bo, width, height, color);
-
-	buf->bo = bo;
-	buf->stride = stride;
-	buf->tiling = I915_TILING_NONE;
-	buf->size = size;
-}
-
 static void
 emit_stall_timestamp_and_rpc(struct intel_batchbuffer *batch,
 			     drm_intel_bo *dst,
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH i-g-t v4 07/12] tests/perf: make enable-disable more reliable
  2017-08-29 13:55 [PATCH i-g-t v4 00/12] Improve robustness of the i915 perf tests Lionel Landwerlin
                   ` (5 preceding siblings ...)
  2017-08-29 13:55 ` [PATCH i-g-t v4 06/12] tests/perf: rework oa-exponent test Lionel Landwerlin
@ 2017-08-29 13:55 ` Lionel Landwerlin
  2017-08-29 13:55 ` [PATCH i-g-t v4 08/12] tests/perf: make buffer-fill " Lionel Landwerlin
                   ` (6 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Lionel Landwerlin @ 2017-08-29 13:55 UTC (permalink / raw)
  To: intel-gfx

Estimation of the amount of reports can only refer to periodic ones,
as context switch reports completely depend on what happens on the
system. Also generate some load to prevent clock frequency changes to
impact our measurement.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 tests/perf.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 90 insertions(+), 6 deletions(-)

diff --git a/tests/perf.c b/tests/perf.c
index efaa4a06..0177f017 100644
--- a/tests/perf.c
+++ b/tests/perf.c
@@ -2778,10 +2778,18 @@ test_enable_disable(void)
 	int n_full_oa_reports = oa_buf_size / report_size;
 	uint64_t fill_duration = n_full_oa_reports * oa_period;
 
+	load_helper_init();
+	load_helper_run(HIGH);
+
 	stream_fd = __perf_open(drm_fd, &param);
 
 	for (int i = 0; i < 5; i++) {
 		int len;
+		uint32_t n_periodic_reports;
+		struct drm_i915_perf_record_header *header;
+		uint32_t first_timestamp = 0, last_timestamp = 0;
+		uint32_t last_periodic_report[64];
+		double tick_per_period;
 
 		/* Giving enough time for an overflow might help catch whether
 		 * the OA unit has been enabled even if the driver might at
@@ -2801,18 +2809,91 @@ test_enable_disable(void)
 
 		nanosleep(&(struct timespec){ .tv_sec = 0,
 					      .tv_nsec = fill_duration / 2 },
-			  NULL);
+			NULL);
 
-		while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR)
-			;
+		n_periodic_reports = 0;
 
-		igt_assert_neq(len, -1);
+		/* Because of the race condition between notification of new
+		 * reports and reports landing in memory, we need to rely on
+		 * timestamps to figure whether we've read enough of them.
+		 */
+		while (((last_timestamp - first_timestamp) * oa_exponent_to_ns(oa_exponent)) <
+		       (fill_duration / 2)) {
 
-		igt_assert(len > report_size * n_full_oa_reports * 0.45);
-		igt_assert(len < report_size * n_full_oa_reports * 0.55);
+			while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR)
+				;
+
+			igt_assert_neq(len, -1);
+
+			for (int offset = 0; offset < len; offset += header->size) {
+				uint32_t *report;
+				double previous_tick_per_period;
+
+				header = (void *) (buf + offset);
+				report = (void *) (header + 1);
+
+				switch (header->type) {
+				case DRM_I915_PERF_RECORD_OA_REPORT_LOST:
+					break;
+				case DRM_I915_PERF_RECORD_SAMPLE:
+					if (first_timestamp == 0)
+						first_timestamp = report[1];
+					last_timestamp = report[1];
+
+					previous_tick_per_period = tick_per_period;
+
+					if (n_periodic_reports > 0 &&
+					    oa_report_is_periodic(oa_exponent, report)) {
+						tick_per_period =
+							oa_reports_tick_per_period(last_periodic_report,
+										   report);
+
+						if (!double_value_within(tick_per_period,
+									 previous_tick_per_period, 5))
+							igt_debug("clock change!\n");
+
+						igt_debug(" > report ts=%u"
+							  " ts_delta_last_periodic=%8u is_timer=%i ctx_id=%8x gpu_ticks=%u nb_periodic=%u\n",
+							  report[1],
+							  report[1] - last_periodic_report[1],
+							  oa_report_is_periodic(oa_exponent, report),
+							  oa_report_get_ctx_id(report),
+							  report[3] - last_periodic_report[3],
+							  n_periodic_reports);
+
+						memcpy(last_periodic_report, report,
+						       sizeof(last_periodic_report));
+					}
+
+					/* We want to measure only the periodic
+					 * reports, ctx-switch might inflate the
+					 * content of the buffer and skew or
+					 * measurement.
+					 */
+					n_periodic_reports +=
+						oa_report_is_periodic(oa_exponent, report);
+					break;
+				case DRM_I915_PERF_RECORD_OA_BUFFER_LOST:
+					igt_assert(!"unexpected overflow");
+					break;
+				}
+			}
+
+		}
 
 		do_ioctl(stream_fd, I915_PERF_IOCTL_DISABLE, 0);
 
+		igt_debug("%f < %lu < %f\n",
+			  report_size * n_full_oa_reports * 0.45,
+			  n_periodic_reports * report_size,
+			  report_size * n_full_oa_reports * 0.55);
+
+		igt_assert((n_periodic_reports * report_size) >
+			   (report_size * n_full_oa_reports * 0.45));
+		igt_assert((n_periodic_reports * report_size) <
+			   report_size * n_full_oa_reports * 0.55);
+
+
 		/* It's considered an error to read a stream while it's disabled
 		 * since it would block indefinitely...
 		 */
@@ -2825,6 +2906,9 @@ test_enable_disable(void)
 	free(buf);
 
 	__perf_close(stream_fd);
+
+	load_helper_stop();
+	load_helper_fini();
 }
 
 static void
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH i-g-t v4 08/12] tests/perf: make buffer-fill more reliable
  2017-08-29 13:55 [PATCH i-g-t v4 00/12] Improve robustness of the i915 perf tests Lionel Landwerlin
                   ` (6 preceding siblings ...)
  2017-08-29 13:55 ` [PATCH i-g-t v4 07/12] tests/perf: make enable-disable more reliable Lionel Landwerlin
@ 2017-08-29 13:55 ` Lionel Landwerlin
  2017-08-29 13:55 ` [PATCH i-g-t v4 09/12] tests/perf: add Kabylake support Lionel Landwerlin
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Lionel Landwerlin @ 2017-08-29 13:55 UTC (permalink / raw)
  To: intel-gfx

Filling rate of the buffer must discard context switch reports as they
do not depend upon the periodicity, instead they're a factor on the
amount of different applications concurrently running on the system.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Tested-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 tests/perf.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 103 insertions(+), 17 deletions(-)

diff --git a/tests/perf.c b/tests/perf.c
index 0177f017..32d53ea1 100644
--- a/tests/perf.c
+++ b/tests/perf.c
@@ -2686,22 +2686,30 @@ test_buffer_fill(void)
 		.num_properties = sizeof(properties) / 16,
 		.properties_ptr = to_user_pointer(properties),
 	};
+	struct drm_i915_perf_record_header *header;
 	int buf_size = 65536 * (256 + sizeof(struct drm_i915_perf_record_header));
 	uint8_t *buf = malloc(buf_size);
+	int len;
 	size_t oa_buf_size = 16 * 1024 * 1024;
 	size_t report_size = oa_formats[test_oa_format].size;
 	int n_full_oa_reports = oa_buf_size / report_size;
 	uint64_t fill_duration = n_full_oa_reports * oa_period;
 
+	load_helper_init();
+	load_helper_run(HIGH);
+
 	igt_assert(fill_duration < 1000000000);
 
 	stream_fd = __perf_open(drm_fd, &param);
 
 	for (int i = 0; i < 5; i++) {
-		struct drm_i915_perf_record_header *header;
 		bool overflow_seen;
-		int offset = 0;
-		int len;
+		uint32_t n_periodic_reports;
+		uint32_t first_timestamp = 0, last_timestamp = 0;
+		uint32_t last_periodic_report[64];
+		double tick_per_period;
+
+		do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0);
 
 		nanosleep(&(struct timespec){ .tv_sec = 0,
 					      .tv_nsec = fill_duration * 1.25 },
@@ -2713,7 +2721,7 @@ test_buffer_fill(void)
 		igt_assert_neq(len, -1);
 
 		overflow_seen = false;
-		for (offset = 0; offset < len; offset += header->size) {
+		for (int offset = 0; offset < len; offset += header->size) {
 			header = (void *)(buf + offset);
 
 			if (header->type == DRM_I915_PERF_RECORD_OA_BUFFER_LOST)
@@ -2722,32 +2730,110 @@ test_buffer_fill(void)
 
 		igt_assert_eq(overflow_seen, true);
 
+		do_ioctl(stream_fd, I915_PERF_IOCTL_DISABLE, 0);
+
+		igt_debug("fill_duration = %luns, oa_exponent = %u\n",
+			  fill_duration, oa_exponent);
+
+		do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0);
+
 		nanosleep(&(struct timespec){ .tv_sec = 0,
-					      .tv_nsec = fill_duration / 2 },
-			  NULL);
+					.tv_nsec = fill_duration / 2 },
+			NULL);
 
-		while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR)
-			;
+		n_periodic_reports = 0;
 
-		igt_assert_neq(len, -1);
+		/* Because of the race condition between notification of new
+		 * reports and reports landing in memory, we need to rely on
+		 * timestamps to figure whether we've read enough of them.
+		 */
+		while (((last_timestamp - first_timestamp) * oa_period) < (fill_duration / 2)) {
 
-		igt_assert(len > report_size * n_full_oa_reports * 0.45);
-		igt_assert(len < report_size * n_full_oa_reports * 0.55);
+			igt_debug("dts=%u elapsed=%lu duration=%lu\n",
+				  last_timestamp - first_timestamp,
+				  (last_timestamp - first_timestamp) * oa_period,
+				  fill_duration / 2);
 
-		overflow_seen = false;
-		for (offset = 0; offset < len; offset += header->size) {
-			header = (void *)(buf + offset);
+			while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR)
+				;
 
-			if (header->type == DRM_I915_PERF_RECORD_OA_BUFFER_LOST)
-				overflow_seen = true;
+			igt_assert_neq(len, -1);
+
+			for (int offset = 0; offset < len; offset += header->size) {
+				uint32_t *report;
+				double previous_tick_per_period;
+
+				header = (void *) (buf + offset);
+				report = (void *) (header + 1);
+
+				switch (header->type) {
+				case DRM_I915_PERF_RECORD_OA_REPORT_LOST:
+					igt_debug("report loss, trying again\n");
+					break;
+				case DRM_I915_PERF_RECORD_SAMPLE:
+					igt_debug(" > report ts=%u"
+						  " ts_delta_last_periodic=%8u is_timer=%i ctx_id=%8x gpu_ticks=%u nb_periodic=%u\n",
+						  report[1],
+						  n_periodic_reports > 0 ? report[1] - last_periodic_report[1] : 0,
+						  oa_report_is_periodic(oa_exponent, report),
+						  oa_report_get_ctx_id(report),
+						  n_periodic_reports > 0 ? report[3] - last_periodic_report[3] : 0,
+						  n_periodic_reports);
+
+					if (first_timestamp == 0)
+						first_timestamp = report[1];
+					last_timestamp = report[1];
+
+					previous_tick_per_period = tick_per_period;
+
+					if (n_periodic_reports > 1 &&
+					    oa_report_is_periodic(oa_exponent, report)) {
+						tick_per_period =
+							oa_reports_tick_per_period(last_periodic_report,
+										   report);
+
+						if (!double_value_within(previous_tick_per_period,
+									 tick_per_period, 5))
+							igt_debug("clock change!\n");
+
+						memcpy(last_periodic_report, report,
+						       sizeof(last_periodic_report));
+					}
+
+					/* We want to measure only the periodic
+					 * reports, ctx-switch might inflate the
+					 * content of the buffer and skew or
+					 * measurement.
+					 */
+					n_periodic_reports +=
+						oa_report_is_periodic(oa_exponent, report);
+					break;
+				case DRM_I915_PERF_RECORD_OA_BUFFER_LOST:
+					igt_assert(!"unexpected overflow");
+					break;
+				}
+			}
 		}
 
-		igt_assert_eq(overflow_seen, false);
+		do_ioctl(stream_fd, I915_PERF_IOCTL_DISABLE, 0);
+
+		igt_debug("%f < %lu < %f\n",
+			  report_size * n_full_oa_reports * 0.45,
+			  n_periodic_reports * report_size,
+			  report_size * n_full_oa_reports * 0.55);
+
+		igt_assert(n_periodic_reports * report_size >
+			   report_size * n_full_oa_reports * 0.45);
+		igt_assert(n_periodic_reports * report_size <
+			   report_size * n_full_oa_reports * 0.55);
 	}
 
 	free(buf);
 
 	__perf_close(stream_fd);
+
+	load_helper_stop();
+	load_helper_fini();
 }
 
 static void
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH i-g-t v4 09/12] tests/perf: add Kabylake support
  2017-08-29 13:55 [PATCH i-g-t v4 00/12] Improve robustness of the i915 perf tests Lionel Landwerlin
                   ` (7 preceding siblings ...)
  2017-08-29 13:55 ` [PATCH i-g-t v4 08/12] tests/perf: make buffer-fill " Lionel Landwerlin
@ 2017-08-29 13:55 ` Lionel Landwerlin
  2017-08-29 13:55 ` [PATCH i-g-t v4 10/12] tests/perf: add Geminilake support Lionel Landwerlin
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Lionel Landwerlin @ 2017-08-29 13:55 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 tests/perf.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/tests/perf.c b/tests/perf.c
index 32d53ea1..32f34ec4 100644
--- a/tests/perf.c
+++ b/tests/perf.c
@@ -1111,8 +1111,23 @@ init_sys_info(void)
 		} else if (IS_BROXTON(devid)) {
 			test_set_uuid = "5ee72f5c-092f-421e-8b70-225f7c3e9612";
 			timestamp_frequency = 19200000;
-		} else
+		} else if (IS_KABYLAKE(devid)) {
+			switch (intel_gt(devid)) {
+			case 1:
+				test_set_uuid = "baa3c7e4-52b6-4b85-801e-465a94b746dd";
+				break;
+			case 2:
+				test_set_uuid = "f1792f32-6db2-4b50-b4b2-557128f1688d";
+				break;
+			default:
+				igt_debug("unsupported Kabylake GT size\n");
+				return false;
+			}
+			timestamp_frequency = 12000000;
+		} else {
+			igt_debug("unsupported GT\n");
 			return false;
+		}
 
 		gp.param = I915_PARAM_EU_TOTAL;
 		gp.value = &n_eus;
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH i-g-t v4 10/12] tests/perf: add Geminilake support
  2017-08-29 13:55 [PATCH i-g-t v4 00/12] Improve robustness of the i915 perf tests Lionel Landwerlin
                   ` (8 preceding siblings ...)
  2017-08-29 13:55 ` [PATCH i-g-t v4 09/12] tests/perf: add Kabylake support Lionel Landwerlin
@ 2017-08-29 13:55 ` Lionel Landwerlin
  2017-08-29 13:55 ` [PATCH i-g-t v4 11/12] tests/perf: estimate number of blocking/polling based on time spent Lionel Landwerlin
                   ` (3 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Lionel Landwerlin @ 2017-08-29 13:55 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 tests/perf.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/perf.c b/tests/perf.c
index 32f34ec4..24df7c2a 100644
--- a/tests/perf.c
+++ b/tests/perf.c
@@ -1124,6 +1124,9 @@ init_sys_info(void)
 				return false;
 			}
 			timestamp_frequency = 12000000;
+		} else if (IS_GEMINILAKE(devid)) {
+			test_set_uuid = "dd3fd789-e783-4204-8cd0-b671bbccb0cf";
+			timestamp_frequency = 19200000;
 		} else {
 			igt_debug("unsupported GT\n");
 			return false;
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH i-g-t v4 11/12] tests/perf: estimate number of blocking/polling based on time spent
  2017-08-29 13:55 [PATCH i-g-t v4 00/12] Improve robustness of the i915 perf tests Lionel Landwerlin
                   ` (9 preceding siblings ...)
  2017-08-29 13:55 ` [PATCH i-g-t v4 10/12] tests/perf: add Geminilake support Lionel Landwerlin
@ 2017-08-29 13:55 ` Lionel Landwerlin
  2017-08-29 13:55 ` [PATCH i-g-t v4 12/12] tests/perf: prevent power management to kick in when necessary Lionel Landwerlin
                   ` (2 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Lionel Landwerlin @ 2017-08-29 13:55 UTC (permalink / raw)
  To: intel-gfx

Blocking & polling tests define an amount of time to spend in the test
and then estimate the number of syscalls that should successfully
return. The problem is that while running the test we might spend
slightly more time than initiallly planned. This change estimates the
number of syscalls based on time spent after the fact.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 tests/perf.c | 42 +++++++++++++++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 9 deletions(-)

diff --git a/tests/perf.c b/tests/perf.c
index 24df7c2a..6c062d20 100644
--- a/tests/perf.c
+++ b/tests/perf.c
@@ -2372,7 +2372,8 @@ test_blocking(void)
 		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
 	};
 	struct drm_i915_perf_open_param param = {
-		.flags = I915_PERF_FLAG_FD_CLOEXEC,
+		.flags = I915_PERF_FLAG_FD_CLOEXEC |
+			I915_PERF_FLAG_DISABLED,
 		.num_properties = sizeof(properties) / 16,
 		.properties_ptr = to_user_pointer(properties),
 	};
@@ -2397,16 +2398,17 @@ test_blocking(void)
 	 */
 	int min_iterations = (test_duration_ns / (oa_period + 6000000ull));
 
-	int64_t start;
+	int64_t start, end;
 	int n = 0;
 
 	stream_fd = __perf_open(drm_fd, &param);
 
 	times(&start_times);
 
-	igt_debug("tick length = %dns, test duration = %"PRIu64"ns, min iter. = %d, max iter. = %d\n",
+	igt_debug("tick length = %dns, test duration = %"PRIu64"ns, min iter. = %d,"
+		  " estimated max iter. = %d, oa_period = %"PRIu64"ns\n",
 		  (int)tick_ns, test_duration_ns,
-		  min_iterations, max_iterations);
+		  min_iterations, max_iterations, oa_period);
 
 	/* In the loop we perform blocking polls while the HW is sampling at
 	 * ~25Hz, with the expectation that we spend most of our time blocked
@@ -2425,8 +2427,13 @@ test_blocking(void)
 	 * floor(real_stime)).
 	 *
 	 * We Loop for 1000 x tick_ns so one tick corresponds to 0.1%
+	 *
+	 * Also enable the stream just before poll/read to minimize
+	 * the error delta.
 	 */
-	for (start = get_time(); (get_time() - start) < test_duration_ns; /* nop */) {
+	start = get_time();
+	do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0);
+	for (/* nop */; ((end = get_time()) - start) < test_duration_ns; /* nop */) {
 		struct drm_i915_perf_record_header *header;
 		bool timer_report_read = false;
 		bool non_timer_report_read = false;
@@ -2468,6 +2475,12 @@ test_blocking(void)
 		n++;
 	}
 
+	/* Updated the maximum of iterations based on the time spent
+	 * in the loop.
+	 */
+	max_iterations = (end - start) / oa_period + 1;
+	igt_debug("adjusted max iter. = %d\n", max_iterations);
+
 	times(&end_times);
 
 	/* Using nanosecond units is fairly silly here, given the tick in-
@@ -2524,6 +2537,7 @@ test_polling(void)
 	};
 	struct drm_i915_perf_open_param param = {
 		.flags = I915_PERF_FLAG_FD_CLOEXEC |
+			I915_PERF_FLAG_DISABLED |
 			I915_PERF_FLAG_FD_NONBLOCK,
 		.num_properties = sizeof(properties) / 16,
 		.properties_ptr = to_user_pointer(properties),
@@ -2548,7 +2562,7 @@ test_polling(void)
 	 * to check for data and giving some time to read().
 	 */
 	int min_iterations = (test_duration_ns / (oa_period + 6000000ull));
-	int64_t start;
+	int64_t start, end;
 	int n = 0;
 
 	stream_fd = __perf_open(drm_fd, &param);
@@ -2576,8 +2590,13 @@ test_polling(void)
 	 * floor(real_stime)).
 	 *
 	 * We Loop for 1000 x tick_ns so one tick corresponds to 0.1%
+	 *
+	 * Also enable the stream just before poll/read to minimize
+	 * the error delta.
 	 */
-	for (start = get_time(); (get_time() - start) < test_duration_ns; /* nop */) {
+	start = get_time();
+	do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0);
+	for (/* nop */; ((end = get_time()) - start) < test_duration_ns; /* nop */) {
 		struct pollfd pollfd = { .fd = stream_fd, .events = POLLIN };
 		struct drm_i915_perf_record_header *header;
 		bool timer_report_read = false;
@@ -2625,8 +2644,7 @@ test_polling(void)
 				if (header->type == DRM_I915_PERF_RECORD_SAMPLE) {
 					uint32_t *report = (void *)(header + 1);
 
-					if (oa_report_is_periodic(oa_exponent,
-								  report))
+					if (oa_report_is_periodic(oa_exponent, report))
 						timer_report_read = true;
 					else
 						non_timer_report_read = true;
@@ -2650,6 +2668,12 @@ test_polling(void)
 		n++;
 	}
 
+	/* Updated the maximum of iterations based on the time spent
+	 * in the loop.
+	 */
+	max_iterations = (end - start) / oa_period + 1;
+	igt_debug("adjusted max iter. = %d\n", max_iterations);
+
 	times(&end_times);
 
 	/* Using nanosecond units is fairly silly here, given the tick in-
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH i-g-t v4 12/12] tests/perf: prevent power management to kick in when necessary
  2017-08-29 13:55 [PATCH i-g-t v4 00/12] Improve robustness of the i915 perf tests Lionel Landwerlin
                   ` (10 preceding siblings ...)
  2017-08-29 13:55 ` [PATCH i-g-t v4 11/12] tests/perf: estimate number of blocking/polling based on time spent Lionel Landwerlin
@ 2017-08-29 13:55 ` Lionel Landwerlin
  2017-08-29 14:14 ` ✓ Fi.CI.BAT: success for Improve robustness of the i915 perf tests (rev2) Patchwork
  2017-08-29 15:14 ` ✓ Fi.CI.IGT: " Patchwork
  13 siblings, 0 replies; 15+ messages in thread
From: Lionel Landwerlin @ 2017-08-29 13:55 UTC (permalink / raw)
  To: intel-gfx

Some of our tests measure that the OA unit produces reports at
expected time intervals (as configured through the PERF_OPEN
ioctl). It turns out the power management plays a role in the decision
of the OA unit to write reports to memory. Under normal circumstances
we don't really mind if the unit misses one report here or there, but
for our tests it makes pretty difficult to verify whether we've made a
mistake in the configuration.

To work around this, let's prevent power management to kick in by
holding /dev/cpu_dma_latency opened for the following tests :

  - blocking
  - polling
  - buffer-fill
  - oa-exponents

Many thanks to Chris Wilson for suggesting this!

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 tests/perf.c | 64 ++++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 41 insertions(+), 23 deletions(-)

diff --git a/tests/perf.c b/tests/perf.c
index 6c062d20..070dee97 100644
--- a/tests/perf.c
+++ b/tests/perf.c
@@ -288,6 +288,7 @@ static bool hsw_undefined_a_counters[45] = {
 static bool gen8_undefined_a_counters[45];
 
 static int drm_fd = -1;
+static int pm_fd = -1;
 static int stream_fd = -1;
 static uint32_t devid;
 static int card = -1;
@@ -331,21 +332,38 @@ __perf_close(int fd)
 {
 	close(fd);
 	stream_fd = -1;
+
+	if (pm_fd >= 0) {
+		close(pm_fd);
+		pm_fd = -1;
+	}
 }
 
 static int
-__perf_open(int fd, struct drm_i915_perf_open_param *param)
+__perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
 {
 	int ret;
+	int32_t pm_value = 0;
 
 	if (stream_fd >= 0)
 		__perf_close(stream_fd);
+	if (pm_fd >= 0) {
+		close(pm_fd);
+		pm_fd = -1;
+	}
 
 	ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param);
 
 	igt_assert(ret >= 0);
 	errno = 0;
 
+	if (prevent_pm) {
+		pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
+		igt_assert(pm_fd >= 0);
+
+		igt_assert_eq(write(pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
+	}
+
 	return ret;
 }
 
@@ -1257,7 +1275,7 @@ test_system_wide_paranoid(void)
 
 		igt_drop_root();
 
-		stream_fd = __perf_open(drm_fd, &param);
+		stream_fd = __perf_open(drm_fd, &param, false);
 		__perf_close(stream_fd);
 	}
 
@@ -1314,7 +1332,7 @@ test_invalid_oa_metric_set_id(void)
 
 	/* Check that we aren't just seeing false positives... */
 	properties[ARRAY_SIZE(properties) - 1] = test_metric_set_id;
-	stream_fd = __perf_open(drm_fd, &param);
+	stream_fd = __perf_open(drm_fd, &param, false);
 	__perf_close(stream_fd);
 
 	/* There's no valid default OA metric set ID... */
@@ -1348,7 +1366,7 @@ test_invalid_oa_format_id(void)
 
 	/* Check that we aren't just seeing false positives... */
 	properties[ARRAY_SIZE(properties) - 1] = test_oa_format;
-	stream_fd = __perf_open(drm_fd, &param);
+	stream_fd = __perf_open(drm_fd, &param, false);
 	__perf_close(stream_fd);
 
 	/* There's no valid default OA format... */
@@ -1512,7 +1530,7 @@ open_and_read_2_oa_reports(int format_id,
 		.properties_ptr = to_user_pointer(properties),
 	};
 
-	stream_fd = __perf_open(drm_fd, &param);
+	stream_fd = __perf_open(drm_fd, &param, false);
 
 	read_2_oa_reports(format_id, exponent,
 			  oa_report0, oa_report1, timer_only);
@@ -1916,7 +1934,7 @@ test_oa_exponents(void)
 				  oa_exponent_to_ns(exponent) / 1000.0,
 				  oa_exponent_to_ns(exponent) / (1000.0 * 1000.0));
 
-			stream_fd = __perf_open(drm_fd, &param);
+			stream_fd = __perf_open(drm_fd, &param, true /* prevent_pm */);
 
 			/* Right after opening the OA stream, read a
 			 * first timestamp as way to filter previously
@@ -2192,7 +2210,7 @@ test_invalid_oa_exponent(void)
 		.properties_ptr = to_user_pointer(properties),
 	};
 
-	stream_fd = __perf_open(drm_fd, &param);
+	stream_fd = __perf_open(drm_fd, &param, false);
 
 	__perf_close(stream_fd);
 
@@ -2246,7 +2264,7 @@ test_low_oa_exponent_permissions(void)
 	igt_fork(child, 1) {
 		igt_drop_root();
 
-		stream_fd = __perf_open(drm_fd, &param);
+		stream_fd = __perf_open(drm_fd, &param, false);
 		__perf_close(stream_fd);
 	}
 
@@ -2312,7 +2330,7 @@ test_per_context_mode_unprivileged(void)
 
 		properties[1] = ctx_id;
 
-		stream_fd = __perf_open(drm_fd, &param);
+		stream_fd = __perf_open(drm_fd, &param, false);
 		__perf_close(stream_fd);
 
 		drm_intel_gem_context_destroy(context);
@@ -2401,7 +2419,7 @@ test_blocking(void)
 	int64_t start, end;
 	int n = 0;
 
-	stream_fd = __perf_open(drm_fd, &param);
+	stream_fd = __perf_open(drm_fd, &param, true /* prevent_pm */);
 
 	times(&start_times);
 
@@ -2565,7 +2583,7 @@ test_polling(void)
 	int64_t start, end;
 	int n = 0;
 
-	stream_fd = __perf_open(drm_fd, &param);
+	stream_fd = __perf_open(drm_fd, &param, true /* prevent_pm */);
 
 	times(&start_times);
 
@@ -2742,7 +2760,7 @@ test_buffer_fill(void)
 
 	igt_assert(fill_duration < 1000000000);
 
-	stream_fd = __perf_open(drm_fd, &param);
+	stream_fd = __perf_open(drm_fd, &param, true /* prevent_pm */);
 
 	for (int i = 0; i < 5; i++) {
 		bool overflow_seen;
@@ -2909,7 +2927,7 @@ test_enable_disable(void)
 	load_helper_init();
 	load_helper_run(HIGH);
 
-	stream_fd = __perf_open(drm_fd, &param);
+	stream_fd = __perf_open(drm_fd, &param, true /* prevent_pm */);
 
 	for (int i = 0; i < 5; i++) {
 		int len;
@@ -3074,7 +3092,7 @@ test_short_reads(void)
 	ret = mprotect(pages + page_size, page_size, PROT_NONE);
 	igt_assert_eq(ret, 0);
 
-	stream_fd = __perf_open(drm_fd, &param);
+	stream_fd = __perf_open(drm_fd, &param, false);
 
 	nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 5000000 }, NULL);
 
@@ -3147,7 +3165,7 @@ test_non_sampling_read_error(void)
 	int ret;
 	uint8_t buf[1024];
 
-	stream_fd = __perf_open(drm_fd, &param);
+	stream_fd = __perf_open(drm_fd, &param, false);
 
 	ret = read(stream_fd, buf, sizeof(buf));
 	igt_assert_eq(ret, -1);
@@ -3185,7 +3203,7 @@ test_disabled_read_error(void)
 	uint32_t buf[128] = { 0 };
 	int ret;
 
-	stream_fd = __perf_open(drm_fd, &param);
+	stream_fd = __perf_open(drm_fd, &param, false);
 
 	ret = read(stream_fd, buf, sizeof(buf));
 	igt_assert_eq(ret, -1);
@@ -3195,7 +3213,7 @@ test_disabled_read_error(void)
 
 
 	param.flags &= ~I915_PERF_FLAG_DISABLED;
-	stream_fd = __perf_open(drm_fd, &param);
+	stream_fd = __perf_open(drm_fd, &param, false);
 
 	read_2_oa_reports(test_oa_format,
 			  oa_exponent,
@@ -3247,7 +3265,7 @@ test_mi_rpc(void)
 	uint32_t *report32;
 	int ret;
 
-	stream_fd = __perf_open(drm_fd, &param);
+	stream_fd = __perf_open(drm_fd, &param, false);
 
 	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
 
@@ -3433,7 +3451,7 @@ hsw_test_single_ctx_counters(void)
 		scratch_buf_memset(dst[0].bo, width, height, 0x00ff00ff);
 
 		igt_debug("opening i915-perf stream\n");
-		stream_fd = __perf_open(drm_fd, &param);
+		stream_fd = __perf_open(drm_fd, &param, false);
 
 		bo = drm_intel_bo_alloc(bufmgr, "mi_rpc dest bo", 4096, 64);
 
@@ -3685,7 +3703,7 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
 			scratch_buf_memset(dst[0].bo, width, height, 0x00ff00ff);
 
 			igt_debug("opening i915-perf stream\n");
-			stream_fd = __perf_open(drm_fd, &param);
+			stream_fd = __perf_open(drm_fd, &param, false);
 
 			bo = drm_intel_bo_alloc(bufmgr, "mi_rpc dest bo", 4096, 64);
 
@@ -4035,7 +4053,7 @@ test_rc6_disable(void)
 
 	igt_skip_on(!rc6_enabled());
 
-	stream_fd = __perf_open(drm_fd, &param);
+	stream_fd = __perf_open(drm_fd, &param, false);
 
 	n_events_start = read_debugfs_u64_record(drm_fd, "i915_drpc_info",
 						 "RC6 residency since boot");
@@ -4250,7 +4268,7 @@ test_create_destroy_userspace_config(void)
 
 	/* Try to use the new config */
 	properties[1] = config_id;
-	stream_fd = __perf_open(drm_fd, &param);
+	stream_fd = __perf_open(drm_fd, &param, false);
 
 	/* Verify that destroying the config doesn't yield any error. */
 	i915_perf_remove_config(drm_fd, config_id);
@@ -4457,7 +4475,7 @@ test_i915_ref_count(void)
 	igt_debug("initial ref count with drm_fd open = %u\n", ref_count0);
 	igt_assert(ref_count0 > baseline);
 
-	stream_fd = __perf_open(drm_fd, &param);
+	stream_fd = __perf_open(drm_fd, &param, false);
 	ref_count1 = read_i915_module_ref();
 	igt_debug("ref count after opening i915 perf stream = %u\n", ref_count1);
 	igt_assert(ref_count1 > ref_count0);
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* ✓ Fi.CI.BAT: success for Improve robustness of the i915 perf tests (rev2)
  2017-08-29 13:55 [PATCH i-g-t v4 00/12] Improve robustness of the i915 perf tests Lionel Landwerlin
                   ` (11 preceding siblings ...)
  2017-08-29 13:55 ` [PATCH i-g-t v4 12/12] tests/perf: prevent power management to kick in when necessary Lionel Landwerlin
@ 2017-08-29 14:14 ` Patchwork
  2017-08-29 15:14 ` ✓ Fi.CI.IGT: " Patchwork
  13 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2017-08-29 14:14 UTC (permalink / raw)
  To: Lionel Landwerlin; +Cc: intel-gfx

== Series Details ==

Series: Improve robustness of the i915 perf tests (rev2)
URL   : https://patchwork.freedesktop.org/series/28373/
State : success

== Summary ==

IGT patchset tested on top of latest successful build
bf45d253648250fc402eee02237366c8882b2053 igt: Add gem_close

with latest DRM-Tip kernel build CI_DRM_3014
627598734e5e drm-tip: 2017y-08m-29d-09h-52m-12s UTC integration manifest

Test kms_cursor_legacy:
        Subgroup basic-busy-flip-before-cursor-legacy:
                fail       -> PASS       (fi-snb-2600) fdo#100215
Test kms_flip:
        Subgroup basic-flip-vs-modeset:
                skip       -> PASS       (fi-skl-x1585l) fdo#101781
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                dmesg-warn -> PASS       (fi-byt-j1900) fdo#101705

fdo#100215 https://bugs.freedesktop.org/show_bug.cgi?id=100215
fdo#101781 https://bugs.freedesktop.org/show_bug.cgi?id=101781
fdo#101705 https://bugs.freedesktop.org/show_bug.cgi?id=101705

fi-bdw-5557u     total:279  pass:268  dwarn:0   dfail:0   fail:0   skip:11  time:456s
fi-bdw-gvtdvm    total:279  pass:265  dwarn:0   dfail:0   fail:0   skip:14  time:440s
fi-blb-e6850     total:279  pass:224  dwarn:1   dfail:0   fail:0   skip:54  time:361s
fi-bsw-n3050     total:279  pass:243  dwarn:0   dfail:0   fail:0   skip:36  time:557s
fi-bwr-2160      total:279  pass:184  dwarn:0   dfail:0   fail:0   skip:95  time:253s
fi-bxt-j4205     total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:521s
fi-byt-j1900     total:279  pass:255  dwarn:0   dfail:0   fail:0   skip:24  time:526s
fi-byt-n2820     total:279  pass:250  dwarn:1   dfail:0   fail:0   skip:28  time:518s
fi-elk-e7500     total:279  pass:230  dwarn:0   dfail:0   fail:0   skip:49  time:444s
fi-glk-2a        total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:618s
fi-hsw-4770      total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:443s
fi-hsw-4770r     total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:426s
fi-ilk-650       total:279  pass:229  dwarn:0   dfail:0   fail:0   skip:50  time:426s
fi-ivb-3520m     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:506s
fi-ivb-3770      total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:478s
fi-kbl-7500u     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:477s
fi-kbl-7560u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:601s
fi-pnv-d510      total:279  pass:223  dwarn:1   dfail:0   fail:0   skip:55  time:523s
fi-skl-6260u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:472s
fi-skl-6700k     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:482s
fi-skl-6770hq    total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:490s
fi-skl-gvtdvm    total:279  pass:266  dwarn:0   dfail:0   fail:0   skip:13  time:439s
fi-skl-x1585l    total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:506s
fi-snb-2520m     total:279  pass:251  dwarn:0   dfail:0   fail:0   skip:28  time:544s
fi-snb-2600      total:279  pass:250  dwarn:0   dfail:0   fail:0   skip:29  time:405s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_118/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✓ Fi.CI.IGT: success for Improve robustness of the i915 perf tests (rev2)
  2017-08-29 13:55 [PATCH i-g-t v4 00/12] Improve robustness of the i915 perf tests Lionel Landwerlin
                   ` (12 preceding siblings ...)
  2017-08-29 14:14 ` ✓ Fi.CI.BAT: success for Improve robustness of the i915 perf tests (rev2) Patchwork
@ 2017-08-29 15:14 ` Patchwork
  13 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2017-08-29 15:14 UTC (permalink / raw)
  To: Lionel Landwerlin; +Cc: intel-gfx

== Series Details ==

Series: Improve robustness of the i915 perf tests (rev2)
URL   : https://patchwork.freedesktop.org/series/28373/
State : success

== Summary ==

Test perf:
        Subgroup oa-exponents:
                fail       -> PASS       (shard-hsw) fdo#102254
        Subgroup polling:
                fail       -> PASS       (shard-hsw) fdo#102252
Test kms_atomic_transition:
        Subgroup plane-all-transition-fencing:
                skip       -> PASS       (shard-hsw)
Test kms_setmode:
        Subgroup basic:
                pass       -> FAIL       (shard-hsw) fdo#99912
Test kms_plane:
        Subgroup plane-panning-bottom-right-suspend-pipe-C-planes:
                skip       -> PASS       (shard-hsw)
        Subgroup plane-position-hole-dpms-pipe-C-planes:
                skip       -> PASS       (shard-hsw)
Test kms_properties:
        Subgroup plane-properties-legacy:
                skip       -> PASS       (shard-hsw)

fdo#102254 https://bugs.freedesktop.org/show_bug.cgi?id=102254
fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252
fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912

shard-hsw        total:2231 pass:1231 dwarn:0   dfail:0   fail:17  skip:983 time:9558s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_118/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2017-08-29 15:14 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-29 13:55 [PATCH i-g-t v4 00/12] Improve robustness of the i915 perf tests Lionel Landwerlin
2017-08-29 13:55 ` [PATCH i-g-t v4 01/12] tests/perf: make stream_fd a global variable Lionel Landwerlin
2017-08-29 13:55 ` [PATCH i-g-t v4 02/12] tests/perf: add per context filtering test for gen8+ Lionel Landwerlin
2017-08-29 13:55 ` [PATCH i-g-t v4 03/12] tests/perf: update max buffer size for reading reports Lionel Landwerlin
2017-08-29 13:55 ` [PATCH i-g-t v4 04/12] tests/perf: rc6: try to guess when rc6 is disabled Lionel Landwerlin
2017-08-29 13:55 ` [PATCH i-g-t v4 05/12] tests/perf: remove frequency related changes Lionel Landwerlin
2017-08-29 13:55 ` [PATCH i-g-t v4 06/12] tests/perf: rework oa-exponent test Lionel Landwerlin
2017-08-29 13:55 ` [PATCH i-g-t v4 07/12] tests/perf: make enable-disable more reliable Lionel Landwerlin
2017-08-29 13:55 ` [PATCH i-g-t v4 08/12] tests/perf: make buffer-fill " Lionel Landwerlin
2017-08-29 13:55 ` [PATCH i-g-t v4 09/12] tests/perf: add Kabylake support Lionel Landwerlin
2017-08-29 13:55 ` [PATCH i-g-t v4 10/12] tests/perf: add Geminilake support Lionel Landwerlin
2017-08-29 13:55 ` [PATCH i-g-t v4 11/12] tests/perf: estimate number of blocking/polling based on time spent Lionel Landwerlin
2017-08-29 13:55 ` [PATCH i-g-t v4 12/12] tests/perf: prevent power management to kick in when necessary Lionel Landwerlin
2017-08-29 14:14 ` ✓ Fi.CI.BAT: success for Improve robustness of the i915 perf tests (rev2) Patchwork
2017-08-29 15:14 ` ✓ Fi.CI.IGT: " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.