All of lore.kernel.org
 help / color / mirror / Atom feed
* [igt-dev] [PATCH 1/2] i915/perf: add tests for triggered OA reports
@ 2020-07-17 23:58 Umesh Nerlige Ramappa
  2020-07-17 23:58 ` [igt-dev] [PATCH 2/2] i915/perf: Sanity check reports in mapped OA buffer Umesh Nerlige Ramappa
  2020-07-18  0:26 ` [igt-dev] ✗ Fi.CI.BAT: failure for series starting with [1/2] i915/perf: add tests for triggered OA reports Patchwork
  0 siblings, 2 replies; 10+ messages in thread
From: Umesh Nerlige Ramappa @ 2020-07-17 23:58 UTC (permalink / raw)
  To: igt-dev

From: Lionel G Landwerlin <lionel.g.landwerlin@intel.com>

By whitelisting a couple of registers we can allow an application
batch to trigger OA reports in the OA buffer by switching back & forth
an inverter on the condition logic.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 tests/i915/perf.c | 254 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 252 insertions(+), 2 deletions(-)

diff --git a/tests/i915/perf.c b/tests/i915/perf.c
index 92edc9f1..eb38ea12 100644
--- a/tests/i915/perf.c
+++ b/tests/i915/perf.c
@@ -53,6 +53,8 @@ IGT_TEST_DESCRIPTION("Test the i915 perf metrics streaming interface");
 #define OAREPORT_REASON_SHIFT          19
 #define OAREPORT_REASON_TIMER          (1<<0)
 #define OAREPORT_REASON_INTERNAL       (3<<1)
+#define OAREPORT_REASON_TRIGGER1       (1<<1)
+#define OAREPORT_REASON_TRIGGER2       (1<<2)
 #define OAREPORT_REASON_CTX_SWITCH     (1<<3)
 #define OAREPORT_REASON_GO             (1<<4)
 #define OAREPORT_REASON_CLK_RATIO      (1<<5)
@@ -383,11 +385,17 @@ gen8_read_report_clock_ratios(uint32_t *report,
 	*unslice_freq_mhz = (unslice_freq * 16666) / 1000;
 }
 
+static uint32_t
+gen8_report_reason(const uint32_t *report)
+{
+	return ((report[0] >> OAREPORT_REASON_SHIFT) &
+		OAREPORT_REASON_MASK);
+}
+
 static const char *
 gen8_read_report_reason(const uint32_t *report)
 {
-	uint32_t reason = ((report[0] >> OAREPORT_REASON_SHIFT) &
-			   OAREPORT_REASON_MASK);
+	uint32_t reason = gen8_report_reason(report);
 
 	if (reason & (1<<0))
 		return "timer";
@@ -3118,6 +3126,241 @@ emit_stall_timestamp_and_rpc(struct intel_batchbuffer *batch,
 	emit_report_perf_count(batch, dst, report_dst_offset, report_id);
 }
 
+/* The following register all have the same layout. */
+#define OAREPORTTRIG2 (0x2744)
+#define   OAREPORTTRIG2_INVERT_A_0  (1 << 0)
+#define   OAREPORTTRIG2_INVERT_A_1  (1 << 1)
+#define   OAREPORTTRIG2_INVERT_A_2  (1 << 2)
+#define   OAREPORTTRIG2_INVERT_A_3  (1 << 3)
+#define   OAREPORTTRIG2_INVERT_A_4  (1 << 4)
+#define   OAREPORTTRIG2_INVERT_A_5  (1 << 5)
+#define   OAREPORTTRIG2_INVERT_A_6  (1 << 6)
+#define   OAREPORTTRIG2_INVERT_A_7  (1 << 7)
+#define   OAREPORTTRIG2_INVERT_A_8  (1 << 8)
+#define   OAREPORTTRIG2_INVERT_A_9  (1 << 9)
+#define   OAREPORTTRIG2_INVERT_A_10 (1 << 10)
+#define   OAREPORTTRIG2_INVERT_A_11 (1 << 11)
+#define   OAREPORTTRIG2_INVERT_A_12 (1 << 12)
+#define   OAREPORTTRIG2_INVERT_A_13 (1 << 13)
+#define   OAREPORTTRIG2_INVERT_A_14 (1 << 14)
+#define   OAREPORTTRIG2_INVERT_A_15 (1 << 15)
+#define   OAREPORTTRIG2_INVERT_B_0  (1 << 16)
+#define   OAREPORTTRIG2_INVERT_B_1  (1 << 17)
+#define   OAREPORTTRIG2_INVERT_B_2  (1 << 18)
+#define   OAREPORTTRIG2_INVERT_B_3  (1 << 19)
+#define   OAREPORTTRIG2_INVERT_C_0  (1 << 20)
+#define   OAREPORTTRIG2_INVERT_C_1  (1 << 21)
+#define   OAREPORTTRIG2_INVERT_D_0  (1 << 22)
+#define   OAREPORTTRIG2_THRESHOLD_ENABLE      (1 << 23)
+#define   OAREPORTTRIG2_REPORT_TRIGGER_ENABLE (1 << 31)
+#define OAREPORTTRIG6 (0x2754)
+#define GEN12_OAREPORTTRIG2 (0xd924)
+#define GEN12_OAREPORTTRIG6 (0xd934)
+
+static void
+emit_triggered_oa_report(struct intel_batchbuffer *batch,
+			 uint32_t trigger)
+{
+	/*
+	 * We have 2 trigger registers that each generate a different
+	 * report reason.
+	 */
+	static const uint32_t gen8_triggers[] = {
+		OAREPORTTRIG2,
+		OAREPORTTRIG6,
+	};
+	static const uint32_t gen12_triggers[] = {
+		GEN12_OAREPORTTRIG2,
+		GEN12_OAREPORTTRIG6,
+	};
+	const uint32_t *triggers = intel_gen(devid) >= 12 ? gen12_triggers : gen8_triggers;
+
+	assert(trigger <= 1);
+
+	BEGIN_BATCH(6, 0);
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(triggers[trigger]);
+	OUT_BATCH(OAREPORTTRIG2_INVERT_C_1 |
+		  OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(triggers[trigger]);
+	OUT_BATCH(OAREPORTTRIG2_INVERT_C_1 |
+		  OAREPORTTRIG2_INVERT_D_0 |
+		  OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
+	ADVANCE_BATCH();
+}
+
+static uint64_t
+rcs_timestmap_reg_read(int fd)
+{
+	struct drm_i915_reg_read rr = {
+		.offset = 0x2358 | I915_REG_READ_8B_WA, /* render ring timestamp */
+	};
+
+	do_ioctl(fd, DRM_IOCTL_I915_REG_READ, &rr);
+
+	return rr.val;
+}
+
+/*
+ * Verify that we can trigger OA reports into the OA buffer using
+ * MI_LRI.
+ */
+static void
+test_triggered_oa_reports(void)
+{
+	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
+	uint64_t properties[] = {
+		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
+
+		/* Note: we have to specify at least one sample property even
+		 * though we aren't interested in samples in this case
+		 */
+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
+
+		/* OA unit configuration */
+		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
+		DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
+
+		/* Note: no OA exponent specified in this case */
+	};
+	struct drm_i915_perf_open_param param = {
+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
+		.num_properties = ARRAY_SIZE(properties) / 2,
+		.properties_ptr = to_user_pointer(properties),
+	};
+	struct drm_i915_perf_record_header *header;
+	drm_intel_bufmgr *bufmgr;
+	drm_intel_context *context;
+	struct igt_helper_process child = {};
+	struct intel_batchbuffer *batch;
+	struct igt_buf src[2], dst[2];
+	uint64_t timestamp32_mask = (1ull << 32) - 1;
+	uint64_t timestamps[2];
+	uint32_t buf_size = 16 * 1024 * 1024;
+	uint8_t *buf = malloc(buf_size);
+	uint32_t ctx_id;
+	int width = 800;
+	int height = 600;
+	uint32_t trigger_counts[2] = { 0, };
+	int ret;
+
+	do {
+		igt_fork_helper(&child) {
+			bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
+			drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+			scratch_buf_init(bufmgr, &src[0], width, height, 0xff0000ff);
+			scratch_buf_init(bufmgr, &dst[0], width, height, 0x00ff00ff);
+			scratch_buf_init(bufmgr, &src[1], 2 * width, height, 0xff0000ff);
+			scratch_buf_init(bufmgr, &dst[1], 2 * width, height, 0x00ff00ff);
+
+			batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+			context = drm_intel_gem_context_create(bufmgr);
+			igt_assert(context);
+
+			ret = drm_intel_gem_context_get_id(context, &ctx_id);
+			properties[1] = ctx_id;
+
+
+			timestamps[0] = rcs_timestmap_reg_read(drm_fd);
+
+			stream_fd = __perf_open(drm_fd, &param, false);
+
+			emit_triggered_oa_report(batch, 0);
+
+			render_copy(batch,
+				    context,
+				    &src[0], 0, 0, width, height,
+				    &dst[0], 0, 0);
+
+			emit_triggered_oa_report(batch, 0);
+
+			emit_triggered_oa_report(batch, 1);
+
+			render_copy(batch,
+				    context,
+				    &src[1], 0, 0, 2 * width, height,
+				    &dst[1], 0, 0);
+
+			emit_triggered_oa_report(batch, 1);
+
+			intel_batchbuffer_flush_with_context(batch, context);
+
+			timestamps[1] = rcs_timestmap_reg_read(drm_fd);
+
+			if (timestamps[1] < timestamps[0] ||
+			    (timestamps[1] & timestamp32_mask) < (timestamps[1] & timestamp32_mask)) {
+				igt_debug("Timestamp rollover, trying again\n");
+				exit(EAGAIN);
+			}
+
+			ret = i915_read_reports_until_timestamp(test_set->perf_oa_format,
+								buf, buf_size,
+								timestamps[0] & timestamp32_mask,
+								timestamps[1] & timestamp32_mask);
+
+			for (size_t offset = 0; offset < ret; offset += header->size) {
+				uint32_t *report;
+
+				header = (void *)(buf + offset);
+
+				igt_assert_eq(header->pad, 0); /* Reserved */
+
+				igt_assert_neq(header->type, DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
+
+				if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST)
+					continue;
+
+				/* Currently the only other record type expected is a
+				 * _SAMPLE. Notably this test will need updating if
+				 * i915-perf is extended in the future with additional
+				 * record types.
+				 */
+				igt_assert_eq(header->type, DRM_I915_PERF_RECORD_SAMPLE);
+
+				report = (void *)(header + 1);
+
+				igt_debug("report ts=0x%08x hw_id=0x%08x reason=%s\n",
+					  report[1], report[2],
+					  gen8_read_report_reason(report));
+
+				if (gen8_report_reason(report) & OAREPORT_REASON_TRIGGER1) {
+					igt_assert_eq(trigger_counts[1], 0);
+					trigger_counts[0]++;
+				}
+				if (gen8_report_reason(report) & OAREPORT_REASON_TRIGGER2) {
+					igt_assert_eq(trigger_counts[0], 2);
+					trigger_counts[1]++;
+				}
+			}
+
+			igt_assert_eq(trigger_counts[0], 2);
+			igt_assert_eq(trigger_counts[1], 2);
+
+			for (int i = 0; i < ARRAY_SIZE(src); i++) {
+				drm_intel_bo_unreference(src[i].bo);
+				drm_intel_bo_unreference(dst[i].bo);
+			}
+
+			intel_batchbuffer_free(batch);
+			drm_intel_gem_context_destroy(context);
+			drm_intel_bufmgr_destroy(bufmgr);
+			__perf_close(stream_fd);
+		}
+
+		ret = igt_wait_helper(&child);
+
+		igt_assert(WEXITSTATUS(ret) == EAGAIN ||
+			   WEXITSTATUS(ret) == 0);
+
+	} while (WEXITSTATUS(ret) == EAGAIN);
+
+	free(buf);
+}
+
 /* Tests the INTEL_performance_query use case where an unprivileged process
  * should be able to configure the OA unit for per-context metrics (for a
  * context associated with that process' drm file descriptor) and the counters
@@ -5096,6 +5339,13 @@ igt_main
 	igt_subtest("whitelisted-registers-userspace-config")
 		test_whitelisted_registers_userspace_config();
 
+	igt_describe("Verify that triggered reports work");
+	igt_subtest("triggered-oa-reports") {
+		igt_require(intel_gen(devid) >= 8);
+		igt_require(i915_perf_revision(drm_fd) >= 6);
+		test_triggered_oa_reports();
+	}
+
 	igt_fixture {
 		/* leave sysctl options in their default state... */
 		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [igt-dev] [PATCH 2/2] i915/perf: Sanity check reports in mapped OA buffer
  2020-07-17 23:58 [igt-dev] [PATCH 1/2] i915/perf: add tests for triggered OA reports Umesh Nerlige Ramappa
@ 2020-07-17 23:58 ` Umesh Nerlige Ramappa
  2020-07-18  0:26 ` [igt-dev] ✗ Fi.CI.BAT: failure for series starting with [1/2] i915/perf: add tests for triggered OA reports Patchwork
  1 sibling, 0 replies; 10+ messages in thread
From: Umesh Nerlige Ramappa @ 2020-07-17 23:58 UTC (permalink / raw)
  To: igt-dev

For applications that need a faster way to access reports in the OA
buffer, i915 now provides a way to map the OA buffer to privileged user
space. Add a test to sanity check reports in the mapped OA buffer.

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 include/drm-uapi/i915_drm.h |  32 ++++++++++
 tests/i915/perf.c           | 113 ++++++++++++++++++++++++++++++++++++
 2 files changed, 145 insertions(+)

diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
index 2b55af13..f7523d55 100644
--- a/include/drm-uapi/i915_drm.h
+++ b/include/drm-uapi/i915_drm.h
@@ -2048,6 +2048,38 @@ struct drm_i915_perf_open_param {
  */
 #define I915_PERF_IOCTL_CONFIG	_IO('i', 0x2)
 
+/**
+ * Returns OA buffer properties to be used with mmap.
+ *
+ * This ioctl is available in perf revision 6.
+ */
+#define I915_PERF_IOCTL_GET_OA_BUFFER_INFO _IO('i', 0x3)
+
+/**
+ * OA buffer size and offset.
+ */
+struct drm_i915_perf_oa_buffer_info {
+	__u32 size;
+	__u32 offset;
+	__u64 reserved[4];
+};
+
+/**
+ * Returns current position of OA buffer head and tail.
+ *
+ * This ioctl is available in perf revision 6.
+ */
+#define I915_PERF_IOCTL_GET_OA_BUFFER_HEAD_TAIL _IO('i', 0x4)
+
+/**
+ * OA buffer head and tail.
+ */
+struct drm_i915_perf_oa_buffer_head_tail {
+	__u32 head;
+	__u32 tail;
+	__u64 reserved[4];
+};
+
 /**
  * Common to all i915 perf records
  */
diff --git a/tests/i915/perf.c b/tests/i915/perf.c
index eb38ea12..a02896e5 100644
--- a/tests/i915/perf.c
+++ b/tests/i915/perf.c
@@ -206,6 +206,7 @@ static struct intel_perf *intel_perf = NULL;
 static struct intel_perf_metric_set *test_set = NULL;
 static bool *undefined_a_counters;
 static uint64_t oa_exp_1_millisec;
+struct intel_mmio_data mmio_data;
 
 static igt_render_copyfunc_t render_copy = NULL;
 static uint32_t (*read_report_ticks)(uint32_t *report,
@@ -5011,6 +5012,108 @@ test_whitelisted_registers_userspace_config(void)
 	i915_perf_remove_config(drm_fd, config_id);
 }
 
+#define OA_BUFFER_DATA(tail, head, oa_buffer_size) \
+	(((tail) - (head)) & ((oa_buffer_size) - 1))
+
+static uint32_t oa_status_reg(void)
+{
+	if (IS_HASWELL(devid))
+		return intel_register_read(&mmio_data, 0x2346) & 0x7;
+	else if (IS_GEN12(devid))
+		return intel_register_read(&mmio_data, 0xdafc) & 0x7;
+	else
+		return intel_register_read(&mmio_data, 0x2b08) & 0xf;
+}
+
+static void check_reports_from_mapped_buffer(enum drm_i915_oa_format fmt,
+					     int oa_exponent)
+{
+	struct drm_i915_perf_oa_buffer_info oa_buffer;
+	struct drm_i915_perf_oa_buffer_head_tail oa_ht;
+	struct oa_format format = get_oa_format(fmt);
+	size_t report_size = format.size;
+	uint8_t *reports;
+	uint32_t *report0, *report1;
+	uint32_t num_reports, timer_reports = 0;
+	uint32_t period_us = oa_exponent_to_ns(oa_exponent) / 1000;
+	void *oa_vaddr;
+	int i;
+
+	do_ioctl(stream_fd, I915_PERF_IOCTL_GET_OA_BUFFER_INFO, &oa_buffer);
+
+	igt_debug("size        = %d\n", oa_buffer.size);
+	igt_debug("offset      = %x\n", oa_buffer.offset);
+
+	igt_assert_eq(oa_buffer.size & (oa_buffer.size - 1), 0);
+	igt_assert_eq(oa_status_reg(), 0);
+
+	/* try a couple invalid mmaps */
+	/* bad offsets */
+	igt_assert(mmap(0, oa_buffer.size, PROT_READ, MAP_PRIVATE,
+			stream_fd, 0) == (void *) -1);
+	igt_assert(mmap(0, oa_buffer.size, PROT_READ, MAP_PRIVATE,
+			stream_fd, 8192) == (void *) -1);
+	igt_assert(mmap(0, oa_buffer.size, PROT_READ, MAP_PRIVATE,
+			stream_fd, 11) == (void *) -1);
+
+	/* bad size */
+	igt_assert(mmap(0, oa_buffer.size + 4096, PROT_READ, MAP_PRIVATE,
+			stream_fd, oa_buffer.offset) == (void *) -1);
+
+	/* do the right thing */
+	oa_vaddr = mmap(0, oa_buffer.size, PROT_READ, MAP_PRIVATE, stream_fd, oa_buffer.offset);
+
+	/* wait for approx 100 reports */
+	usleep(100 * period_us);
+
+	do_ioctl(stream_fd, I915_PERF_IOCTL_GET_OA_BUFFER_HEAD_TAIL, &oa_ht);
+
+	igt_debug("head = %x\n", oa_ht.head);
+	igt_debug("tail = %x\n", oa_ht.tail);
+
+	reports = (uint8_t *) (oa_vaddr + oa_ht.head);
+
+	num_reports = OA_BUFFER_DATA(oa_ht.tail,
+				     oa_ht.head,
+				     oa_buffer.size) / report_size;
+
+	for (i = 0; i < num_reports; i++) {
+		report1 = (uint32_t *)(reports + (i * report_size));
+		if (!oa_report_is_periodic(oa_exponent, report1))
+			continue;
+
+		timer_reports++;
+		if (timer_reports >= 2)
+			sanity_check_reports(report0, report1, fmt);
+
+		report0 = report1;
+	}
+
+	munmap(oa_vaddr, oa_buffer.size);
+}
+
+static void test_mapped_oa_buffer(void)
+{
+	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
+	enum drm_i915_oa_format fmt = test_set->perf_oa_format;
+	uint64_t properties[] = {
+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
+		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
+		DRM_I915_PERF_PROP_OA_FORMAT, fmt,
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
+
+	};
+	struct drm_i915_perf_open_param param = {
+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
+		.num_properties = sizeof(properties) / 16,
+		.properties_ptr = to_user_pointer(properties),
+	};
+
+	stream_fd = __perf_open(drm_fd, &param, false);
+	check_reports_from_mapped_buffer(fmt, oa_exponent);
+	__perf_close(stream_fd);
+}
+
 static unsigned
 read_i915_module_ref(void)
 {
@@ -5179,6 +5282,9 @@ igt_main
 
 		render_copy = igt_get_render_copyfunc(devid);
 		igt_require_f(render_copy, "no render-copy function\n");
+
+		intel_register_access_init(&mmio_data, intel_get_pci_device(),
+					   0, drm_fd);
 	}
 
 	igt_subtest("non-system-wide-paranoid")
@@ -5346,6 +5452,12 @@ igt_main
 		test_triggered_oa_reports();
 	}
 
+	igt_describe("Verify mapping of oa buffer");
+	igt_subtest("mapped-oa-buffer") {
+		igt_require(i915_perf_revision(drm_fd) >= 8);
+		test_mapped_oa_buffer();
+	}
+
 	igt_fixture {
 		/* leave sysctl options in their default state... */
 		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
@@ -5354,6 +5466,7 @@ igt_main
 		if (intel_perf)
 			intel_perf_free(intel_perf);
 
+		intel_register_access_fini(&mmio_data);
 		close(drm_fd);
 	}
 }
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [igt-dev] ✗ Fi.CI.BAT: failure for series starting with [1/2] i915/perf: add tests for triggered OA reports
  2020-07-17 23:58 [igt-dev] [PATCH 1/2] i915/perf: add tests for triggered OA reports Umesh Nerlige Ramappa
  2020-07-17 23:58 ` [igt-dev] [PATCH 2/2] i915/perf: Sanity check reports in mapped OA buffer Umesh Nerlige Ramappa
@ 2020-07-18  0:26 ` Patchwork
  1 sibling, 0 replies; 10+ messages in thread
From: Patchwork @ 2020-07-18  0:26 UTC (permalink / raw)
  To: Umesh Nerlige Ramappa; +Cc: igt-dev


[-- Attachment #1.1: Type: text/plain, Size: 7382 bytes --]

== Series Details ==

Series: series starting with [1/2] i915/perf: add tests for triggered OA reports
URL   : https://patchwork.freedesktop.org/series/79617/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_8761 -> IGTPW_4776
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with IGTPW_4776 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in IGTPW_4776, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4776/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in IGTPW_4776:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live@gt_lrc:
    - fi-bsw-n3050:       [PASS][1] -> [DMESG-FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8761/fi-bsw-n3050/igt@i915_selftest@live@gt_lrc.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4776/fi-bsw-n3050/igt@i915_selftest@live@gt_lrc.html

  
Known issues
------------

  Here are the changes found in IGTPW_4776 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_suspend@basic-s0:
    - fi-tgl-u2:          [PASS][3] -> [FAIL][4] ([i915#1888])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8761/fi-tgl-u2/igt@gem_exec_suspend@basic-s0.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4776/fi-tgl-u2/igt@gem_exec_suspend@basic-s0.html

  * igt@kms_cursor_legacy@basic-flip-before-cursor-atomic:
    - fi-icl-u2:          [PASS][5] -> [DMESG-WARN][6] ([i915#1982])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8761/fi-icl-u2/igt@kms_cursor_legacy@basic-flip-before-cursor-atomic.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4776/fi-icl-u2/igt@kms_cursor_legacy@basic-flip-before-cursor-atomic.html

  * igt@vgem_basic@mmap:
    - fi-tgl-y:           [PASS][7] -> [DMESG-WARN][8] ([i915#402]) +1 similar issue
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8761/fi-tgl-y/igt@vgem_basic@mmap.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4776/fi-tgl-y/igt@vgem_basic@mmap.html

  
#### Possible fixes ####

  * igt@i915_pm_rpm@basic-pci-d3-state:
    - fi-tgl-y:           [DMESG-WARN][9] ([i915#1982]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8761/fi-tgl-y/igt@i915_pm_rpm@basic-pci-d3-state.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4776/fi-tgl-y/igt@i915_pm_rpm@basic-pci-d3-state.html

  * igt@i915_pm_rpm@module-reload:
    - fi-kbl-guc:         [FAIL][11] ([i915#138]) -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8761/fi-kbl-guc/igt@i915_pm_rpm@module-reload.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4776/fi-kbl-guc/igt@i915_pm_rpm@module-reload.html

  * igt@i915_selftest@live@execlists:
    - fi-cfl-8700k:       [INCOMPLETE][13] ([i915#2089]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8761/fi-cfl-8700k/igt@i915_selftest@live@execlists.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4776/fi-cfl-8700k/igt@i915_selftest@live@execlists.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic:
    - fi-bsw-n3050:       [DMESG-WARN][15] ([i915#1982]) -> [PASS][16]
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8761/fi-bsw-n3050/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4776/fi-bsw-n3050/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy:
    - fi-icl-u2:          [DMESG-WARN][17] ([i915#1982]) -> [PASS][18] +1 similar issue
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8761/fi-icl-u2/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4776/fi-icl-u2/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html

  * igt@prime_self_import@basic-with_two_bos:
    - fi-tgl-y:           [DMESG-WARN][19] ([i915#402]) -> [PASS][20] +1 similar issue
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8761/fi-tgl-y/igt@prime_self_import@basic-with_two_bos.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4776/fi-tgl-y/igt@prime_self_import@basic-with_two_bos.html

  
#### Warnings ####

  * igt@i915_pm_rpm@module-reload:
    - fi-kbl-x1275:       [SKIP][21] ([fdo#109271]) -> [DMESG-FAIL][22] ([i915#62])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8761/fi-kbl-x1275/igt@i915_pm_rpm@module-reload.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4776/fi-kbl-x1275/igt@i915_pm_rpm@module-reload.html

  * igt@kms_cursor_legacy@basic-flip-after-cursor-varying-size:
    - fi-kbl-x1275:       [DMESG-WARN][23] ([i915#62] / [i915#92]) -> [DMESG-WARN][24] ([i915#62] / [i915#92] / [i915#95]) +5 similar issues
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8761/fi-kbl-x1275/igt@kms_cursor_legacy@basic-flip-after-cursor-varying-size.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4776/fi-kbl-x1275/igt@kms_cursor_legacy@basic-flip-after-cursor-varying-size.html

  * igt@kms_force_connector_basic@force-connector-state:
    - fi-kbl-x1275:       [DMESG-WARN][25] ([i915#62] / [i915#92] / [i915#95]) -> [DMESG-WARN][26] ([i915#62] / [i915#92]) +4 similar issues
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8761/fi-kbl-x1275/igt@kms_force_connector_basic@force-connector-state.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4776/fi-kbl-x1275/igt@kms_force_connector_basic@force-connector-state.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [i915#138]: https://gitlab.freedesktop.org/drm/intel/issues/138
  [i915#1888]: https://gitlab.freedesktop.org/drm/intel/issues/1888
  [i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982
  [i915#2089]: https://gitlab.freedesktop.org/drm/intel/issues/2089
  [i915#402]: https://gitlab.freedesktop.org/drm/intel/issues/402
  [i915#62]: https://gitlab.freedesktop.org/drm/intel/issues/62
  [i915#92]: https://gitlab.freedesktop.org/drm/intel/issues/92
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (47 -> 40)
------------------------------

  Missing    (7): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * IGT: IGT_5739 -> IGTPW_4776

  CI-20190529: 20190529
  CI_DRM_8761: b665aabc40b8c7e86f10a74171d3d3fd71251781 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_4776: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4776/index.html
  IGT_5739: 9b964d7359db9799f2b5b905403dda668ae28c87 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools



== Testlist changes ==

+igt@perf@mapped-oa-buffer
+igt@perf@triggered-oa-reports

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4776/index.html

[-- Attachment #1.2: Type: text/html, Size: 9118 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [igt-dev] [PATCH 1/2] i915/perf: add tests for triggered OA reports
  2020-08-18 20:35 Umesh Nerlige Ramappa
@ 2020-09-24 16:24 ` Umesh Nerlige Ramappa
  0 siblings, 0 replies; 10+ messages in thread
From: Umesh Nerlige Ramappa @ 2020-09-24 16:24 UTC (permalink / raw)
  To: igt-dev; +Cc: Chris Wilson

On Tue, Aug 18, 2020 at 01:35:46PM -0700, Umesh Nerlige Ramappa wrote:
>From: Lionel G Landwerlin <lionel.g.landwerlin@intel.com>
>
>By whitelisting a couple of registers we can allow an application
>batch to trigger OA reports in the OA buffer by switching back & forth
>an inverter on the condition logic.
>
>v2: Wait before sampling the timestamp used to end the OA buffer search
>v3:
>- Ensure OA regs are whitelisted and reports are triggered only when
>  perf_stream_paranoid is set to 0.
>- Drop root to trigger reports.
>
>Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>

Thanks,
Umesh

>---
> tests/i915/perf.c | 449 ++++++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 430 insertions(+), 19 deletions(-)
>
>diff --git a/tests/i915/perf.c b/tests/i915/perf.c
>index 92edc9f1..b030cfad 100644
>--- a/tests/i915/perf.c
>+++ b/tests/i915/perf.c
>@@ -53,6 +53,8 @@ IGT_TEST_DESCRIPTION("Test the i915 perf metrics streaming interface");
> #define OAREPORT_REASON_SHIFT          19
> #define OAREPORT_REASON_TIMER          (1<<0)
> #define OAREPORT_REASON_INTERNAL       (3<<1)
>+#define OAREPORT_REASON_TRIGGER1       (1<<1)
>+#define OAREPORT_REASON_TRIGGER2       (1<<2)
> #define OAREPORT_REASON_CTX_SWITCH     (1<<3)
> #define OAREPORT_REASON_GO             (1<<4)
> #define OAREPORT_REASON_CLK_RATIO      (1<<5)
>@@ -204,6 +206,7 @@ static struct intel_perf *intel_perf = NULL;
> static struct intel_perf_metric_set *test_set = NULL;
> static bool *undefined_a_counters;
> static uint64_t oa_exp_1_millisec;
>+struct intel_mmio_data mmio_data;
>
> static igt_render_copyfunc_t render_copy = NULL;
> static uint32_t (*read_report_ticks)(uint32_t *report,
>@@ -293,6 +296,23 @@ __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
> 	return ret;
> }
>
>+static int i915_perf_revision(int fd)
>+{
>+	drm_i915_getparam_t gp;
>+	int value = 1, ret;
>+
>+	gp.param = I915_PARAM_PERF_REVISION;
>+	gp.value = &value;
>+	ret = igt_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
>+	if (ret == -1) {
>+		/* If the param is missing, consider version 1. */
>+		igt_assert_eq(errno, EINVAL);
>+		return 1;
>+	}
>+
>+	return value;
>+}
>+
> static int
> lookup_format(int i915_perf_fmt_id)
> {
>@@ -383,11 +403,17 @@ gen8_read_report_clock_ratios(uint32_t *report,
> 	*unslice_freq_mhz = (unslice_freq * 16666) / 1000;
> }
>
>+static uint32_t
>+gen8_report_reason(const uint32_t *report)
>+{
>+	return ((report[0] >> OAREPORT_REASON_SHIFT) &
>+		OAREPORT_REASON_MASK);
>+}
>+
> static const char *
> gen8_read_report_reason(const uint32_t *report)
> {
>-	uint32_t reason = ((report[0] >> OAREPORT_REASON_SHIFT) &
>-			   OAREPORT_REASON_MASK);
>+	uint32_t reason = gen8_report_reason(report);
>
> 	if (reason & (1<<0))
> 		return "timer";
>@@ -3118,6 +3144,268 @@ emit_stall_timestamp_and_rpc(struct intel_batchbuffer *batch,
> 	emit_report_perf_count(batch, dst, report_dst_offset, report_id);
> }
>
>+/* The following register all have the same layout. */
>+#define OAREPORTTRIG2 (0x2744)
>+#define   OAREPORTTRIG2_INVERT_A_0  (1 << 0)
>+#define   OAREPORTTRIG2_INVERT_A_1  (1 << 1)
>+#define   OAREPORTTRIG2_INVERT_A_2  (1 << 2)
>+#define   OAREPORTTRIG2_INVERT_A_3  (1 << 3)
>+#define   OAREPORTTRIG2_INVERT_A_4  (1 << 4)
>+#define   OAREPORTTRIG2_INVERT_A_5  (1 << 5)
>+#define   OAREPORTTRIG2_INVERT_A_6  (1 << 6)
>+#define   OAREPORTTRIG2_INVERT_A_7  (1 << 7)
>+#define   OAREPORTTRIG2_INVERT_A_8  (1 << 8)
>+#define   OAREPORTTRIG2_INVERT_A_9  (1 << 9)
>+#define   OAREPORTTRIG2_INVERT_A_10 (1 << 10)
>+#define   OAREPORTTRIG2_INVERT_A_11 (1 << 11)
>+#define   OAREPORTTRIG2_INVERT_A_12 (1 << 12)
>+#define   OAREPORTTRIG2_INVERT_A_13 (1 << 13)
>+#define   OAREPORTTRIG2_INVERT_A_14 (1 << 14)
>+#define   OAREPORTTRIG2_INVERT_A_15 (1 << 15)
>+#define   OAREPORTTRIG2_INVERT_B_0  (1 << 16)
>+#define   OAREPORTTRIG2_INVERT_B_1  (1 << 17)
>+#define   OAREPORTTRIG2_INVERT_B_2  (1 << 18)
>+#define   OAREPORTTRIG2_INVERT_B_3  (1 << 19)
>+#define   OAREPORTTRIG2_INVERT_C_0  (1 << 20)
>+#define   OAREPORTTRIG2_INVERT_C_1  (1 << 21)
>+#define   OAREPORTTRIG2_INVERT_D_0  (1 << 22)
>+#define   OAREPORTTRIG2_THRESHOLD_ENABLE      (1 << 23)
>+#define   OAREPORTTRIG2_REPORT_TRIGGER_ENABLE (1 << 31)
>+#define OAREPORTTRIG6 (0x2754)
>+#define OA_PERF_COUNTER_A(idx) (0x2800 + 8 * (idx))
>+#define GEN8_OASTATUS (0x2b08)
>+
>+#define GEN12_OAREPORTTRIG2 (0xd924)
>+#define GEN12_OAREPORTTRIG6 (0xd934)
>+#define GEN12_OAG_PERF_COUNTER_A(idx) (0xD980 + 8 * (idx))
>+#define GEN12_OAG_OASTATUS (0xdafc)
>+
>+/*
>+ * We have 2 trigger registers that each generate a different
>+ * report reason.
>+ */
>+static const uint32_t gen8_oa_wl[] = {
>+	OAREPORTTRIG2,
>+	OAREPORTTRIG6,
>+	OA_PERF_COUNTER_A(18),
>+	GEN8_OASTATUS,
>+};
>+static const uint32_t gen12_oa_wl[] = {
>+	GEN12_OAREPORTTRIG2,
>+	GEN12_OAREPORTTRIG6,
>+	GEN12_OAG_PERF_COUNTER_A(18),
>+	GEN12_OAG_OASTATUS,
>+};
>+
>+static void
>+emit_triggered_oa_report(struct intel_batchbuffer *batch,
>+			 uint32_t trigger)
>+{
>+	const uint32_t *triggers = intel_gen(devid) >= 12 ? gen12_oa_wl: gen8_oa_wl;
>+
>+	assert(trigger <= 1);
>+
>+	BEGIN_BATCH(6, 0);
>+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
>+	OUT_BATCH(triggers[trigger]);
>+	OUT_BATCH(OAREPORTTRIG2_INVERT_C_1 |
>+		  OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
>+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
>+	OUT_BATCH(triggers[trigger]);
>+	OUT_BATCH(OAREPORTTRIG2_INVERT_C_1 |
>+		  OAREPORTTRIG2_INVERT_D_0 |
>+		  OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
>+	ADVANCE_BATCH();
>+}
>+
>+static uint64_t
>+rcs_timestmap_reg_read(int fd)
>+{
>+	struct drm_i915_reg_read rr = {
>+		.offset = 0x2358 | I915_REG_READ_8B_WA, /* render ring timestamp */
>+	};
>+
>+	do_ioctl(fd, DRM_IOCTL_I915_REG_READ, &rr);
>+
>+	return rr.val;
>+}
>+
>+/*
>+ * Verify that we can trigger OA reports into the OA buffer using
>+ * MI_LRI.
>+ */
>+static void
>+test_triggered_oa_reports(int paranoid)
>+{
>+	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
>+	uint64_t properties[] = {
>+		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
>+
>+		/* Note: we have to specify at least one sample property even
>+		 * though we aren't interested in samples in this case
>+		 */
>+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
>+
>+		/* OA unit configuration */
>+		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
>+		DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
>+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
>+
>+		/* Note: no OA exponent specified in this case */
>+	};
>+	struct drm_i915_perf_open_param param = {
>+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
>+		.num_properties = ARRAY_SIZE(properties) / 2,
>+		.properties_ptr = to_user_pointer(properties),
>+	};
>+	struct drm_i915_perf_record_header *header;
>+	drm_intel_bufmgr *bufmgr;
>+	drm_intel_context *context;
>+	struct igt_helper_process child = {};
>+	struct intel_batchbuffer *batch;
>+	struct igt_buf src[2], dst[2];
>+	uint64_t timestamp32_mask = (1ull << 32) - 1;
>+	uint64_t timestamps[2];
>+	uint32_t buf_size = 16 * 1024 * 1024;
>+	uint8_t *buf = malloc(buf_size);
>+	uint32_t ctx_id;
>+	int width = 800;
>+	int height = 600;
>+	uint32_t trigger_counts[2] = { 0, };
>+	int ret;
>+
>+	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", paranoid);
>+
>+	do {
>+		igt_fork_helper(&child) {
>+			if (!paranoid)
>+				igt_drop_root();
>+
>+			bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
>+			drm_intel_bufmgr_gem_enable_reuse(bufmgr);
>+
>+			scratch_buf_init(bufmgr, &src[0], width, height, 0xff0000ff);
>+			scratch_buf_init(bufmgr, &dst[0], width, height, 0x00ff00ff);
>+			scratch_buf_init(bufmgr, &src[1], 2 * width, height, 0xff0000ff);
>+			scratch_buf_init(bufmgr, &dst[1], 2 * width, height, 0x00ff00ff);
>+
>+			batch = intel_batchbuffer_alloc(bufmgr, devid);
>+
>+			context = drm_intel_gem_context_create(bufmgr);
>+			igt_assert(context);
>+
>+			ret = drm_intel_gem_context_get_id(context, &ctx_id);
>+			properties[1] = ctx_id;
>+
>+
>+			timestamps[0] = rcs_timestmap_reg_read(drm_fd);
>+
>+			stream_fd = __perf_open(drm_fd, &param, false);
>+
>+			emit_triggered_oa_report(batch, 0);
>+
>+			render_copy(batch,
>+				    context,
>+				    &src[0], 0, 0, width, height,
>+				    &dst[0], 0, 0);
>+
>+			emit_triggered_oa_report(batch, 0);
>+
>+			emit_triggered_oa_report(batch, 1);
>+
>+			render_copy(batch,
>+				    context,
>+				    &src[1], 0, 0, 2 * width, height,
>+				    &dst[1], 0, 0);
>+
>+			emit_triggered_oa_report(batch, 1);
>+
>+			intel_batchbuffer_flush_with_context(batch, context);
>+
>+			/* On some failures, this timestamp is too early as in
>+			 * we bail out before seeing the triggered report. Wait
>+			 * a little more and then check.
>+			 */
>+			usleep(50000);
>+
>+			timestamps[1] = rcs_timestmap_reg_read(drm_fd);
>+
>+			if (timestamps[1] < timestamps[0] ||
>+			    (timestamps[1] & timestamp32_mask) < (timestamps[1] & timestamp32_mask)) {
>+				igt_debug("Timestamp rollover, trying again\n");
>+				exit(EAGAIN);
>+			}
>+
>+			ret = i915_read_reports_until_timestamp(test_set->perf_oa_format,
>+								buf, buf_size,
>+								timestamps[0] & timestamp32_mask,
>+								timestamps[1] & timestamp32_mask);
>+
>+			for (size_t offset = 0; offset < ret; offset += header->size) {
>+				uint32_t *report;
>+
>+				header = (void *)(buf + offset);
>+
>+				igt_assert_eq(header->pad, 0); /* Reserved */
>+
>+				igt_assert_neq(header->type, DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
>+
>+				if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST)
>+					continue;
>+
>+				/* Currently the only other record type expected is a
>+				 * _SAMPLE. Notably this test will need updating if
>+				 * i915-perf is extended in the future with additional
>+				 * record types.
>+				 */
>+				igt_assert_eq(header->type, DRM_I915_PERF_RECORD_SAMPLE);
>+
>+				report = (void *)(header + 1);
>+
>+				igt_debug("report ts=0x%08x hw_id=0x%08x reason=%s\n",
>+					  report[1], report[2],
>+					  gen8_read_report_reason(report));
>+
>+				if (gen8_report_reason(report) & OAREPORT_REASON_TRIGGER1) {
>+					igt_assert_eq(trigger_counts[1], 0);
>+					trigger_counts[0]++;
>+				}
>+				if (gen8_report_reason(report) & OAREPORT_REASON_TRIGGER2) {
>+					igt_assert_eq(trigger_counts[0], 2);
>+					trigger_counts[1]++;
>+				}
>+			}
>+
>+			if (paranoid) {
>+				igt_assert_eq(trigger_counts[0], 0);
>+				igt_assert_eq(trigger_counts[1], 0);
>+			} else {
>+				igt_assert_eq(trigger_counts[0], 2);
>+				igt_assert_eq(trigger_counts[1], 2);
>+			}
>+
>+			for (int i = 0; i < ARRAY_SIZE(src); i++) {
>+				drm_intel_bo_unreference(src[i].bo);
>+				drm_intel_bo_unreference(dst[i].bo);
>+			}
>+
>+			intel_batchbuffer_free(batch);
>+			drm_intel_gem_context_destroy(context);
>+			drm_intel_bufmgr_destroy(bufmgr);
>+			__perf_close(stream_fd);
>+		}
>+
>+
>+		ret = igt_wait_helper(&child);
>+
>+		igt_assert(WEXITSTATUS(ret) == EAGAIN ||
>+			   WEXITSTATUS(ret) == 0);
>+
>+	} while (WEXITSTATUS(ret) == EAGAIN);
>+
>+	free(buf);
>+}
>+
> /* Tests the INTEL_performance_query use case where an unprivileged process
>  * should be able to configure the OA unit for per-context metrics (for a
>  * context associated with that process' drm file descriptor) and the counters
>@@ -4768,6 +5056,122 @@ test_whitelisted_registers_userspace_config(void)
> 	i915_perf_remove_config(drm_fd, config_id);
> }
>
>+#define RING_FORCE_TO_NONPRIV_ADDRESS_MASK 0x03fffffc
>+
>+static uint32_t gen12_wl_slots[] = {
>+	0x24d0, 0x24d4, 0x24d8, 0x24dc, 0x24e0, 0x24e4, 0x24e8, 0x24ec,
>+	0x24f0, 0x24f4, 0x24f8, 0x24fc, 0x2010, 0x2014, 0x2018, 0x201c,
>+	0x21e0, 0x21e4, 0x21e8, 0x21ec,
>+};
>+
>+static uint32_t gen9_wl_slots[] = {
>+	0x24d0, 0x24d4, 0x24d8, 0x24dc, 0x24e0, 0x24e4, 0x24e8, 0x24ec,
>+	0x24f0, 0x24f4, 0x24f8, 0x24fc,
>+};
>+
>+static void dump_wl(uint32_t *slots, uint32_t count)
>+{
>+	int i;
>+
>+	for (i = 0; i < count; i++)
>+		igt_debug("LOCAL: FORCE_TO_NON_PRIV_%02d = %08x\n",
>+			  i, intel_register_read(&mmio_data, slots[i]));
>+}
>+
>+static void dump_whitelist(const char *msg)
>+{
>+	igt_debug("%s\n", msg);
>+
>+	if (intel_gen(devid) >= 12)
>+		dump_wl(gen12_wl_slots, ARRAY_SIZE(gen12_wl_slots));
>+	else if (intel_gen(devid) > 8)
>+		dump_wl(gen9_wl_slots, ARRAY_SIZE(gen9_wl_slots));
>+	else
>+		return;
>+}
>+
>+static bool in_whitelist(uint32_t reg)
>+{
>+	uint32_t *slots, count;
>+	int i;
>+
>+	if (intel_gen(devid) >= 12) {
>+		slots = gen12_wl_slots;
>+		count = ARRAY_SIZE(gen12_wl_slots);
>+	} else {
>+		slots = gen9_wl_slots;
>+		count = ARRAY_SIZE(gen9_wl_slots);
>+	}
>+
>+	for (i = 0; i < count; i++) {
>+		uint32_t fpriv = intel_register_read(&mmio_data, slots[i]);
>+
>+		if ((fpriv & RING_FORCE_TO_NONPRIV_ADDRESS_MASK) == reg)
>+			return true;
>+	}
>+
>+	return false;
>+}
>+
>+static void oa_regs_in_whitelist(bool are_present)
>+{
>+	const uint32_t *regs;
>+	uint32_t count;
>+	int i;
>+
>+	if (intel_gen(devid) >= 12) {
>+		regs = gen12_oa_wl;
>+		count = i915_perf_revision(drm_fd) >= 7 ?
>+			ARRAY_SIZE(gen12_oa_wl) : 2;
>+	} else {
>+		regs = gen8_oa_wl;
>+		count = i915_perf_revision(drm_fd) >= 7 ?
>+			ARRAY_SIZE(gen8_oa_wl) : 2;
>+	}
>+
>+	for (i = 0; i < count; i++)
>+		if (are_present)
>+			igt_assert(in_whitelist(regs[i]));
>+		else
>+			igt_assert(!in_whitelist(regs[i]));
>+}
>+
>+static void test_oa_regs_whitelist(int paranoid)
>+{
>+	uint64_t properties[] = {
>+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
>+		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
>+		DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
>+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
>+
>+	};
>+	struct drm_i915_perf_open_param param = {
>+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
>+		.num_properties = sizeof(properties) / 16,
>+		.properties_ptr = to_user_pointer(properties),
>+	};
>+	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", paranoid);
>+	intel_register_access_init(&mmio_data, intel_get_pci_device(),
>+				   0, drm_fd);
>+	stream_fd = __perf_open(drm_fd, &param, false);
>+
>+	dump_whitelist("oa whitelisted");
>+
>+	if (paranoid)
>+		oa_regs_in_whitelist(false);
>+	else
>+		oa_regs_in_whitelist(true);
>+
>+	__perf_close(stream_fd);
>+
>+	dump_whitelist("oa remove whitelist");
>+
>+	/* after perf close, whitelist should be removed */
>+	oa_regs_in_whitelist(false);
>+
>+	intel_register_access_fini(&mmio_data);
>+}
>+
> static unsigned
> read_i915_module_ref(void)
> {
>@@ -4880,23 +5284,6 @@ test_sysctl_defaults(void)
> 	igt_assert_eq(max_freq, 100000);
> }
>
>-static int i915_perf_revision(int fd)
>-{
>-	drm_i915_getparam_t gp;
>-	int value = 1, ret;
>-
>-	gp.param = I915_PARAM_PERF_REVISION;
>-	gp.value = &value;
>-	ret = igt_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
>-	if (ret == -1) {
>-		/* If the param is missing, consider version 1. */
>-		igt_assert_eq(errno, EINVAL);
>-		return 1;
>-	}
>-
>-	return value;
>-}
>-
> igt_main
> {
> 	igt_fixture {
>@@ -5096,6 +5483,30 @@ igt_main
> 	igt_subtest("whitelisted-registers-userspace-config")
> 		test_whitelisted_registers_userspace_config();
>
>+
>+	igt_subtest_group {
>+		igt_fixture {
>+			igt_require(intel_gen(devid) > 8);
>+			igt_require(i915_perf_revision(drm_fd) >= 6);
>+		}
>+
>+		igt_describe("Verify that OA registers are whitelisted for paranoid 0");
>+		igt_subtest("oa-regs-whitelisted")
>+			test_oa_regs_whitelist(0);
>+
>+		igt_describe("Verify that OA registers are not whitelisted for paranoid 1");
>+		igt_subtest("oa-regs-not-whitelisted")
>+			test_oa_regs_whitelist(1);
>+
>+		igt_describe("Verify reports triggered when perf_stream_paranoid is 0");
>+		igt_subtest("triggered-oa-reports-paranoid-0")
>+			test_triggered_oa_reports(0);
>+
>+		igt_describe("Verify reports not triggered when perf_stream_paranoid is 1");
>+		igt_subtest("triggered-oa-reports-paranoid-1")
>+			test_triggered_oa_reports(1);
>+	}
>+
> 	igt_fixture {
> 		/* leave sysctl options in their default state... */
> 		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
>-- 
>2.20.1
>
>_______________________________________________
>igt-dev mailing list
>igt-dev@lists.freedesktop.org
>https://lists.freedesktop.org/mailman/listinfo/igt-dev
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [igt-dev] [PATCH 1/2] i915/perf: add tests for triggered OA reports
@ 2020-08-18 20:35 Umesh Nerlige Ramappa
  2020-09-24 16:24 ` Umesh Nerlige Ramappa
  0 siblings, 1 reply; 10+ messages in thread
From: Umesh Nerlige Ramappa @ 2020-08-18 20:35 UTC (permalink / raw)
  To: igt-dev; +Cc: Chris Wilson

From: Lionel G Landwerlin <lionel.g.landwerlin@intel.com>

By whitelisting a couple of registers we can allow an application
batch to trigger OA reports in the OA buffer by switching back & forth
an inverter on the condition logic.

v2: Wait before sampling the timestamp used to end the OA buffer search
v3:
- Ensure OA regs are whitelisted and reports are triggered only when
  perf_stream_paranoid is set to 0.
- Drop root to trigger reports.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 tests/i915/perf.c | 449 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 430 insertions(+), 19 deletions(-)

diff --git a/tests/i915/perf.c b/tests/i915/perf.c
index 92edc9f1..b030cfad 100644
--- a/tests/i915/perf.c
+++ b/tests/i915/perf.c
@@ -53,6 +53,8 @@ IGT_TEST_DESCRIPTION("Test the i915 perf metrics streaming interface");
 #define OAREPORT_REASON_SHIFT          19
 #define OAREPORT_REASON_TIMER          (1<<0)
 #define OAREPORT_REASON_INTERNAL       (3<<1)
+#define OAREPORT_REASON_TRIGGER1       (1<<1)
+#define OAREPORT_REASON_TRIGGER2       (1<<2)
 #define OAREPORT_REASON_CTX_SWITCH     (1<<3)
 #define OAREPORT_REASON_GO             (1<<4)
 #define OAREPORT_REASON_CLK_RATIO      (1<<5)
@@ -204,6 +206,7 @@ static struct intel_perf *intel_perf = NULL;
 static struct intel_perf_metric_set *test_set = NULL;
 static bool *undefined_a_counters;
 static uint64_t oa_exp_1_millisec;
+struct intel_mmio_data mmio_data;
 
 static igt_render_copyfunc_t render_copy = NULL;
 static uint32_t (*read_report_ticks)(uint32_t *report,
@@ -293,6 +296,23 @@ __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
 	return ret;
 }
 
+static int i915_perf_revision(int fd)
+{
+	drm_i915_getparam_t gp;
+	int value = 1, ret;
+
+	gp.param = I915_PARAM_PERF_REVISION;
+	gp.value = &value;
+	ret = igt_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
+	if (ret == -1) {
+		/* If the param is missing, consider version 1. */
+		igt_assert_eq(errno, EINVAL);
+		return 1;
+	}
+
+	return value;
+}
+
 static int
 lookup_format(int i915_perf_fmt_id)
 {
@@ -383,11 +403,17 @@ gen8_read_report_clock_ratios(uint32_t *report,
 	*unslice_freq_mhz = (unslice_freq * 16666) / 1000;
 }
 
+static uint32_t
+gen8_report_reason(const uint32_t *report)
+{
+	return ((report[0] >> OAREPORT_REASON_SHIFT) &
+		OAREPORT_REASON_MASK);
+}
+
 static const char *
 gen8_read_report_reason(const uint32_t *report)
 {
-	uint32_t reason = ((report[0] >> OAREPORT_REASON_SHIFT) &
-			   OAREPORT_REASON_MASK);
+	uint32_t reason = gen8_report_reason(report);
 
 	if (reason & (1<<0))
 		return "timer";
@@ -3118,6 +3144,268 @@ emit_stall_timestamp_and_rpc(struct intel_batchbuffer *batch,
 	emit_report_perf_count(batch, dst, report_dst_offset, report_id);
 }
 
+/* The following register all have the same layout. */
+#define OAREPORTTRIG2 (0x2744)
+#define   OAREPORTTRIG2_INVERT_A_0  (1 << 0)
+#define   OAREPORTTRIG2_INVERT_A_1  (1 << 1)
+#define   OAREPORTTRIG2_INVERT_A_2  (1 << 2)
+#define   OAREPORTTRIG2_INVERT_A_3  (1 << 3)
+#define   OAREPORTTRIG2_INVERT_A_4  (1 << 4)
+#define   OAREPORTTRIG2_INVERT_A_5  (1 << 5)
+#define   OAREPORTTRIG2_INVERT_A_6  (1 << 6)
+#define   OAREPORTTRIG2_INVERT_A_7  (1 << 7)
+#define   OAREPORTTRIG2_INVERT_A_8  (1 << 8)
+#define   OAREPORTTRIG2_INVERT_A_9  (1 << 9)
+#define   OAREPORTTRIG2_INVERT_A_10 (1 << 10)
+#define   OAREPORTTRIG2_INVERT_A_11 (1 << 11)
+#define   OAREPORTTRIG2_INVERT_A_12 (1 << 12)
+#define   OAREPORTTRIG2_INVERT_A_13 (1 << 13)
+#define   OAREPORTTRIG2_INVERT_A_14 (1 << 14)
+#define   OAREPORTTRIG2_INVERT_A_15 (1 << 15)
+#define   OAREPORTTRIG2_INVERT_B_0  (1 << 16)
+#define   OAREPORTTRIG2_INVERT_B_1  (1 << 17)
+#define   OAREPORTTRIG2_INVERT_B_2  (1 << 18)
+#define   OAREPORTTRIG2_INVERT_B_3  (1 << 19)
+#define   OAREPORTTRIG2_INVERT_C_0  (1 << 20)
+#define   OAREPORTTRIG2_INVERT_C_1  (1 << 21)
+#define   OAREPORTTRIG2_INVERT_D_0  (1 << 22)
+#define   OAREPORTTRIG2_THRESHOLD_ENABLE      (1 << 23)
+#define   OAREPORTTRIG2_REPORT_TRIGGER_ENABLE (1 << 31)
+#define OAREPORTTRIG6 (0x2754)
+#define OA_PERF_COUNTER_A(idx) (0x2800 + 8 * (idx))
+#define GEN8_OASTATUS (0x2b08)
+
+#define GEN12_OAREPORTTRIG2 (0xd924)
+#define GEN12_OAREPORTTRIG6 (0xd934)
+#define GEN12_OAG_PERF_COUNTER_A(idx) (0xD980 + 8 * (idx))
+#define GEN12_OAG_OASTATUS (0xdafc)
+
+/*
+ * We have 2 trigger registers that each generate a different
+ * report reason.
+ */
+static const uint32_t gen8_oa_wl[] = {
+	OAREPORTTRIG2,
+	OAREPORTTRIG6,
+	OA_PERF_COUNTER_A(18),
+	GEN8_OASTATUS,
+};
+static const uint32_t gen12_oa_wl[] = {
+	GEN12_OAREPORTTRIG2,
+	GEN12_OAREPORTTRIG6,
+	GEN12_OAG_PERF_COUNTER_A(18),
+	GEN12_OAG_OASTATUS,
+};
+
+static void
+emit_triggered_oa_report(struct intel_batchbuffer *batch,
+			 uint32_t trigger)
+{
+	const uint32_t *triggers = intel_gen(devid) >= 12 ? gen12_oa_wl: gen8_oa_wl;
+
+	assert(trigger <= 1);
+
+	BEGIN_BATCH(6, 0);
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(triggers[trigger]);
+	OUT_BATCH(OAREPORTTRIG2_INVERT_C_1 |
+		  OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(triggers[trigger]);
+	OUT_BATCH(OAREPORTTRIG2_INVERT_C_1 |
+		  OAREPORTTRIG2_INVERT_D_0 |
+		  OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
+	ADVANCE_BATCH();
+}
+
+static uint64_t
+rcs_timestmap_reg_read(int fd)
+{
+	struct drm_i915_reg_read rr = {
+		.offset = 0x2358 | I915_REG_READ_8B_WA, /* render ring timestamp */
+	};
+
+	do_ioctl(fd, DRM_IOCTL_I915_REG_READ, &rr);
+
+	return rr.val;
+}
+
+/*
+ * Verify that we can trigger OA reports into the OA buffer using
+ * MI_LRI.
+ */
+static void
+test_triggered_oa_reports(int paranoid)
+{
+	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
+	uint64_t properties[] = {
+		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
+
+		/* Note: we have to specify at least one sample property even
+		 * though we aren't interested in samples in this case
+		 */
+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
+
+		/* OA unit configuration */
+		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
+		DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
+
+		/* Note: no OA exponent specified in this case */
+	};
+	struct drm_i915_perf_open_param param = {
+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
+		.num_properties = ARRAY_SIZE(properties) / 2,
+		.properties_ptr = to_user_pointer(properties),
+	};
+	struct drm_i915_perf_record_header *header;
+	drm_intel_bufmgr *bufmgr;
+	drm_intel_context *context;
+	struct igt_helper_process child = {};
+	struct intel_batchbuffer *batch;
+	struct igt_buf src[2], dst[2];
+	uint64_t timestamp32_mask = (1ull << 32) - 1;
+	uint64_t timestamps[2];
+	uint32_t buf_size = 16 * 1024 * 1024;
+	uint8_t *buf = malloc(buf_size);
+	uint32_t ctx_id;
+	int width = 800;
+	int height = 600;
+	uint32_t trigger_counts[2] = { 0, };
+	int ret;
+
+	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", paranoid);
+
+	do {
+		igt_fork_helper(&child) {
+			if (!paranoid)
+				igt_drop_root();
+
+			bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
+			drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+			scratch_buf_init(bufmgr, &src[0], width, height, 0xff0000ff);
+			scratch_buf_init(bufmgr, &dst[0], width, height, 0x00ff00ff);
+			scratch_buf_init(bufmgr, &src[1], 2 * width, height, 0xff0000ff);
+			scratch_buf_init(bufmgr, &dst[1], 2 * width, height, 0x00ff00ff);
+
+			batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+			context = drm_intel_gem_context_create(bufmgr);
+			igt_assert(context);
+
+			ret = drm_intel_gem_context_get_id(context, &ctx_id);
+			properties[1] = ctx_id;
+
+
+			timestamps[0] = rcs_timestmap_reg_read(drm_fd);
+
+			stream_fd = __perf_open(drm_fd, &param, false);
+
+			emit_triggered_oa_report(batch, 0);
+
+			render_copy(batch,
+				    context,
+				    &src[0], 0, 0, width, height,
+				    &dst[0], 0, 0);
+
+			emit_triggered_oa_report(batch, 0);
+
+			emit_triggered_oa_report(batch, 1);
+
+			render_copy(batch,
+				    context,
+				    &src[1], 0, 0, 2 * width, height,
+				    &dst[1], 0, 0);
+
+			emit_triggered_oa_report(batch, 1);
+
+			intel_batchbuffer_flush_with_context(batch, context);
+
+			/* On some failures, this timestamp is too early as in
+			 * we bail out before seeing the triggered report. Wait
+			 * a little more and then check.
+			 */
+			usleep(50000);
+
+			timestamps[1] = rcs_timestmap_reg_read(drm_fd);
+
+			if (timestamps[1] < timestamps[0] ||
+			    (timestamps[1] & timestamp32_mask) < (timestamps[1] & timestamp32_mask)) {
+				igt_debug("Timestamp rollover, trying again\n");
+				exit(EAGAIN);
+			}
+
+			ret = i915_read_reports_until_timestamp(test_set->perf_oa_format,
+								buf, buf_size,
+								timestamps[0] & timestamp32_mask,
+								timestamps[1] & timestamp32_mask);
+
+			for (size_t offset = 0; offset < ret; offset += header->size) {
+				uint32_t *report;
+
+				header = (void *)(buf + offset);
+
+				igt_assert_eq(header->pad, 0); /* Reserved */
+
+				igt_assert_neq(header->type, DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
+
+				if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST)
+					continue;
+
+				/* Currently the only other record type expected is a
+				 * _SAMPLE. Notably this test will need updating if
+				 * i915-perf is extended in the future with additional
+				 * record types.
+				 */
+				igt_assert_eq(header->type, DRM_I915_PERF_RECORD_SAMPLE);
+
+				report = (void *)(header + 1);
+
+				igt_debug("report ts=0x%08x hw_id=0x%08x reason=%s\n",
+					  report[1], report[2],
+					  gen8_read_report_reason(report));
+
+				if (gen8_report_reason(report) & OAREPORT_REASON_TRIGGER1) {
+					igt_assert_eq(trigger_counts[1], 0);
+					trigger_counts[0]++;
+				}
+				if (gen8_report_reason(report) & OAREPORT_REASON_TRIGGER2) {
+					igt_assert_eq(trigger_counts[0], 2);
+					trigger_counts[1]++;
+				}
+			}
+
+			if (paranoid) {
+				igt_assert_eq(trigger_counts[0], 0);
+				igt_assert_eq(trigger_counts[1], 0);
+			} else {
+				igt_assert_eq(trigger_counts[0], 2);
+				igt_assert_eq(trigger_counts[1], 2);
+			}
+
+			for (int i = 0; i < ARRAY_SIZE(src); i++) {
+				drm_intel_bo_unreference(src[i].bo);
+				drm_intel_bo_unreference(dst[i].bo);
+			}
+
+			intel_batchbuffer_free(batch);
+			drm_intel_gem_context_destroy(context);
+			drm_intel_bufmgr_destroy(bufmgr);
+			__perf_close(stream_fd);
+		}
+
+
+		ret = igt_wait_helper(&child);
+
+		igt_assert(WEXITSTATUS(ret) == EAGAIN ||
+			   WEXITSTATUS(ret) == 0);
+
+	} while (WEXITSTATUS(ret) == EAGAIN);
+
+	free(buf);
+}
+
 /* Tests the INTEL_performance_query use case where an unprivileged process
  * should be able to configure the OA unit for per-context metrics (for a
  * context associated with that process' drm file descriptor) and the counters
@@ -4768,6 +5056,122 @@ test_whitelisted_registers_userspace_config(void)
 	i915_perf_remove_config(drm_fd, config_id);
 }
 
+#define RING_FORCE_TO_NONPRIV_ADDRESS_MASK 0x03fffffc
+
+static uint32_t gen12_wl_slots[] = {
+	0x24d0, 0x24d4, 0x24d8, 0x24dc, 0x24e0, 0x24e4, 0x24e8, 0x24ec,
+	0x24f0, 0x24f4, 0x24f8, 0x24fc, 0x2010, 0x2014, 0x2018, 0x201c,
+	0x21e0, 0x21e4, 0x21e8, 0x21ec,
+};
+
+static uint32_t gen9_wl_slots[] = {
+	0x24d0, 0x24d4, 0x24d8, 0x24dc, 0x24e0, 0x24e4, 0x24e8, 0x24ec,
+	0x24f0, 0x24f4, 0x24f8, 0x24fc,
+};
+
+static void dump_wl(uint32_t *slots, uint32_t count)
+{
+	int i;
+
+	for (i = 0; i < count; i++)
+		igt_debug("LOCAL: FORCE_TO_NON_PRIV_%02d = %08x\n",
+			  i, intel_register_read(&mmio_data, slots[i]));
+}
+
+static void dump_whitelist(const char *msg)
+{
+	igt_debug("%s\n", msg);
+
+	if (intel_gen(devid) >= 12)
+		dump_wl(gen12_wl_slots, ARRAY_SIZE(gen12_wl_slots));
+	else if (intel_gen(devid) > 8)
+		dump_wl(gen9_wl_slots, ARRAY_SIZE(gen9_wl_slots));
+	else
+		return;
+}
+
+static bool in_whitelist(uint32_t reg)
+{
+	uint32_t *slots, count;
+	int i;
+
+	if (intel_gen(devid) >= 12) {
+		slots = gen12_wl_slots;
+		count = ARRAY_SIZE(gen12_wl_slots);
+	} else {
+		slots = gen9_wl_slots;
+		count = ARRAY_SIZE(gen9_wl_slots);
+	}
+
+	for (i = 0; i < count; i++) {
+		uint32_t fpriv = intel_register_read(&mmio_data, slots[i]);
+
+		if ((fpriv & RING_FORCE_TO_NONPRIV_ADDRESS_MASK) == reg)
+			return true;
+	}
+
+	return false;
+}
+
+static void oa_regs_in_whitelist(bool are_present)
+{
+	const uint32_t *regs;
+	uint32_t count;
+	int i;
+
+	if (intel_gen(devid) >= 12) {
+		regs = gen12_oa_wl;
+		count = i915_perf_revision(drm_fd) >= 7 ?
+			ARRAY_SIZE(gen12_oa_wl) : 2;
+	} else {
+		regs = gen8_oa_wl;
+		count = i915_perf_revision(drm_fd) >= 7 ?
+			ARRAY_SIZE(gen8_oa_wl) : 2;
+	}
+
+	for (i = 0; i < count; i++)
+		if (are_present)
+			igt_assert(in_whitelist(regs[i]));
+		else
+			igt_assert(!in_whitelist(regs[i]));
+}
+
+static void test_oa_regs_whitelist(int paranoid)
+{
+	uint64_t properties[] = {
+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
+		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
+		DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
+
+	};
+	struct drm_i915_perf_open_param param = {
+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
+		.num_properties = sizeof(properties) / 16,
+		.properties_ptr = to_user_pointer(properties),
+	};
+	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", paranoid);
+	intel_register_access_init(&mmio_data, intel_get_pci_device(),
+				   0, drm_fd);
+	stream_fd = __perf_open(drm_fd, &param, false);
+
+	dump_whitelist("oa whitelisted");
+
+	if (paranoid)
+		oa_regs_in_whitelist(false);
+	else
+		oa_regs_in_whitelist(true);
+
+	__perf_close(stream_fd);
+
+	dump_whitelist("oa remove whitelist");
+
+	/* after perf close, whitelist should be removed */
+	oa_regs_in_whitelist(false);
+
+	intel_register_access_fini(&mmio_data);
+}
+
 static unsigned
 read_i915_module_ref(void)
 {
@@ -4880,23 +5284,6 @@ test_sysctl_defaults(void)
 	igt_assert_eq(max_freq, 100000);
 }
 
-static int i915_perf_revision(int fd)
-{
-	drm_i915_getparam_t gp;
-	int value = 1, ret;
-
-	gp.param = I915_PARAM_PERF_REVISION;
-	gp.value = &value;
-	ret = igt_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
-	if (ret == -1) {
-		/* If the param is missing, consider version 1. */
-		igt_assert_eq(errno, EINVAL);
-		return 1;
-	}
-
-	return value;
-}
-
 igt_main
 {
 	igt_fixture {
@@ -5096,6 +5483,30 @@ igt_main
 	igt_subtest("whitelisted-registers-userspace-config")
 		test_whitelisted_registers_userspace_config();
 
+
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(intel_gen(devid) > 8);
+			igt_require(i915_perf_revision(drm_fd) >= 6);
+		}
+
+		igt_describe("Verify that OA registers are whitelisted for paranoid 0");
+		igt_subtest("oa-regs-whitelisted")
+			test_oa_regs_whitelist(0);
+
+		igt_describe("Verify that OA registers are not whitelisted for paranoid 1");
+		igt_subtest("oa-regs-not-whitelisted")
+			test_oa_regs_whitelist(1);
+
+		igt_describe("Verify reports triggered when perf_stream_paranoid is 0");
+		igt_subtest("triggered-oa-reports-paranoid-0")
+			test_triggered_oa_reports(0);
+
+		igt_describe("Verify reports not triggered when perf_stream_paranoid is 1");
+		igt_subtest("triggered-oa-reports-paranoid-1")
+			test_triggered_oa_reports(1);
+	}
+
 	igt_fixture {
 		/* leave sysctl options in their default state... */
 		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [igt-dev] [PATCH 1/2] i915/perf: add tests for triggered OA reports
@ 2020-07-30 23:00 Umesh Nerlige Ramappa
  0 siblings, 0 replies; 10+ messages in thread
From: Umesh Nerlige Ramappa @ 2020-07-30 23:00 UTC (permalink / raw)
  To: igt-dev; +Cc: Chris Wilson

From: Lionel G Landwerlin <lionel.g.landwerlin@intel.com>

By whitelisting a couple of registers we can allow an application
batch to trigger OA reports in the OA buffer by switching back & forth
an inverter on the condition logic.

v2: Wait before sampling the timestamp used to end the OA buffer search
v3:
- Ensure OA regs are whitelisted and reports are triggered only when
  perf_stream_paranoid is set to 0.
- Drop root to trigger reports.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 tests/i915/perf.c | 449 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 430 insertions(+), 19 deletions(-)

diff --git a/tests/i915/perf.c b/tests/i915/perf.c
index 92edc9f1..b030cfad 100644
--- a/tests/i915/perf.c
+++ b/tests/i915/perf.c
@@ -53,6 +53,8 @@ IGT_TEST_DESCRIPTION("Test the i915 perf metrics streaming interface");
 #define OAREPORT_REASON_SHIFT          19
 #define OAREPORT_REASON_TIMER          (1<<0)
 #define OAREPORT_REASON_INTERNAL       (3<<1)
+#define OAREPORT_REASON_TRIGGER1       (1<<1)
+#define OAREPORT_REASON_TRIGGER2       (1<<2)
 #define OAREPORT_REASON_CTX_SWITCH     (1<<3)
 #define OAREPORT_REASON_GO             (1<<4)
 #define OAREPORT_REASON_CLK_RATIO      (1<<5)
@@ -204,6 +206,7 @@ static struct intel_perf *intel_perf = NULL;
 static struct intel_perf_metric_set *test_set = NULL;
 static bool *undefined_a_counters;
 static uint64_t oa_exp_1_millisec;
+struct intel_mmio_data mmio_data;
 
 static igt_render_copyfunc_t render_copy = NULL;
 static uint32_t (*read_report_ticks)(uint32_t *report,
@@ -293,6 +296,23 @@ __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
 	return ret;
 }
 
+static int i915_perf_revision(int fd)
+{
+	drm_i915_getparam_t gp;
+	int value = 1, ret;
+
+	gp.param = I915_PARAM_PERF_REVISION;
+	gp.value = &value;
+	ret = igt_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
+	if (ret == -1) {
+		/* If the param is missing, consider version 1. */
+		igt_assert_eq(errno, EINVAL);
+		return 1;
+	}
+
+	return value;
+}
+
 static int
 lookup_format(int i915_perf_fmt_id)
 {
@@ -383,11 +403,17 @@ gen8_read_report_clock_ratios(uint32_t *report,
 	*unslice_freq_mhz = (unslice_freq * 16666) / 1000;
 }
 
+static uint32_t
+gen8_report_reason(const uint32_t *report)
+{
+	return ((report[0] >> OAREPORT_REASON_SHIFT) &
+		OAREPORT_REASON_MASK);
+}
+
 static const char *
 gen8_read_report_reason(const uint32_t *report)
 {
-	uint32_t reason = ((report[0] >> OAREPORT_REASON_SHIFT) &
-			   OAREPORT_REASON_MASK);
+	uint32_t reason = gen8_report_reason(report);
 
 	if (reason & (1<<0))
 		return "timer";
@@ -3118,6 +3144,268 @@ emit_stall_timestamp_and_rpc(struct intel_batchbuffer *batch,
 	emit_report_perf_count(batch, dst, report_dst_offset, report_id);
 }
 
+/* The following register all have the same layout. */
+#define OAREPORTTRIG2 (0x2744)
+#define   OAREPORTTRIG2_INVERT_A_0  (1 << 0)
+#define   OAREPORTTRIG2_INVERT_A_1  (1 << 1)
+#define   OAREPORTTRIG2_INVERT_A_2  (1 << 2)
+#define   OAREPORTTRIG2_INVERT_A_3  (1 << 3)
+#define   OAREPORTTRIG2_INVERT_A_4  (1 << 4)
+#define   OAREPORTTRIG2_INVERT_A_5  (1 << 5)
+#define   OAREPORTTRIG2_INVERT_A_6  (1 << 6)
+#define   OAREPORTTRIG2_INVERT_A_7  (1 << 7)
+#define   OAREPORTTRIG2_INVERT_A_8  (1 << 8)
+#define   OAREPORTTRIG2_INVERT_A_9  (1 << 9)
+#define   OAREPORTTRIG2_INVERT_A_10 (1 << 10)
+#define   OAREPORTTRIG2_INVERT_A_11 (1 << 11)
+#define   OAREPORTTRIG2_INVERT_A_12 (1 << 12)
+#define   OAREPORTTRIG2_INVERT_A_13 (1 << 13)
+#define   OAREPORTTRIG2_INVERT_A_14 (1 << 14)
+#define   OAREPORTTRIG2_INVERT_A_15 (1 << 15)
+#define   OAREPORTTRIG2_INVERT_B_0  (1 << 16)
+#define   OAREPORTTRIG2_INVERT_B_1  (1 << 17)
+#define   OAREPORTTRIG2_INVERT_B_2  (1 << 18)
+#define   OAREPORTTRIG2_INVERT_B_3  (1 << 19)
+#define   OAREPORTTRIG2_INVERT_C_0  (1 << 20)
+#define   OAREPORTTRIG2_INVERT_C_1  (1 << 21)
+#define   OAREPORTTRIG2_INVERT_D_0  (1 << 22)
+#define   OAREPORTTRIG2_THRESHOLD_ENABLE      (1 << 23)
+#define   OAREPORTTRIG2_REPORT_TRIGGER_ENABLE (1 << 31)
+#define OAREPORTTRIG6 (0x2754)
+#define OA_PERF_COUNTER_A(idx) (0x2800 + 8 * (idx))
+#define GEN8_OASTATUS (0x2b08)
+
+#define GEN12_OAREPORTTRIG2 (0xd924)
+#define GEN12_OAREPORTTRIG6 (0xd934)
+#define GEN12_OAG_PERF_COUNTER_A(idx) (0xD980 + 8 * (idx))
+#define GEN12_OAG_OASTATUS (0xdafc)
+
+/*
+ * We have 2 trigger registers that each generate a different
+ * report reason.
+ */
+static const uint32_t gen8_oa_wl[] = {
+	OAREPORTTRIG2,
+	OAREPORTTRIG6,
+	OA_PERF_COUNTER_A(18),
+	GEN8_OASTATUS,
+};
+static const uint32_t gen12_oa_wl[] = {
+	GEN12_OAREPORTTRIG2,
+	GEN12_OAREPORTTRIG6,
+	GEN12_OAG_PERF_COUNTER_A(18),
+	GEN12_OAG_OASTATUS,
+};
+
+static void
+emit_triggered_oa_report(struct intel_batchbuffer *batch,
+			 uint32_t trigger)
+{
+	const uint32_t *triggers = intel_gen(devid) >= 12 ? gen12_oa_wl: gen8_oa_wl;
+
+	assert(trigger <= 1);
+
+	BEGIN_BATCH(6, 0);
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(triggers[trigger]);
+	OUT_BATCH(OAREPORTTRIG2_INVERT_C_1 |
+		  OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(triggers[trigger]);
+	OUT_BATCH(OAREPORTTRIG2_INVERT_C_1 |
+		  OAREPORTTRIG2_INVERT_D_0 |
+		  OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
+	ADVANCE_BATCH();
+}
+
+static uint64_t
+rcs_timestmap_reg_read(int fd)
+{
+	struct drm_i915_reg_read rr = {
+		.offset = 0x2358 | I915_REG_READ_8B_WA, /* render ring timestamp */
+	};
+
+	do_ioctl(fd, DRM_IOCTL_I915_REG_READ, &rr);
+
+	return rr.val;
+}
+
+/*
+ * Verify that we can trigger OA reports into the OA buffer using
+ * MI_LRI.
+ */
+static void
+test_triggered_oa_reports(int paranoid)
+{
+	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
+	uint64_t properties[] = {
+		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
+
+		/* Note: we have to specify at least one sample property even
+		 * though we aren't interested in samples in this case
+		 */
+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
+
+		/* OA unit configuration */
+		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
+		DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
+
+		/* Note: no OA exponent specified in this case */
+	};
+	struct drm_i915_perf_open_param param = {
+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
+		.num_properties = ARRAY_SIZE(properties) / 2,
+		.properties_ptr = to_user_pointer(properties),
+	};
+	struct drm_i915_perf_record_header *header;
+	drm_intel_bufmgr *bufmgr;
+	drm_intel_context *context;
+	struct igt_helper_process child = {};
+	struct intel_batchbuffer *batch;
+	struct igt_buf src[2], dst[2];
+	uint64_t timestamp32_mask = (1ull << 32) - 1;
+	uint64_t timestamps[2];
+	uint32_t buf_size = 16 * 1024 * 1024;
+	uint8_t *buf = malloc(buf_size);
+	uint32_t ctx_id;
+	int width = 800;
+	int height = 600;
+	uint32_t trigger_counts[2] = { 0, };
+	int ret;
+
+	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", paranoid);
+
+	do {
+		igt_fork_helper(&child) {
+			if (!paranoid)
+				igt_drop_root();
+
+			bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
+			drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+			scratch_buf_init(bufmgr, &src[0], width, height, 0xff0000ff);
+			scratch_buf_init(bufmgr, &dst[0], width, height, 0x00ff00ff);
+			scratch_buf_init(bufmgr, &src[1], 2 * width, height, 0xff0000ff);
+			scratch_buf_init(bufmgr, &dst[1], 2 * width, height, 0x00ff00ff);
+
+			batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+			context = drm_intel_gem_context_create(bufmgr);
+			igt_assert(context);
+
+			ret = drm_intel_gem_context_get_id(context, &ctx_id);
+			properties[1] = ctx_id;
+
+
+			timestamps[0] = rcs_timestmap_reg_read(drm_fd);
+
+			stream_fd = __perf_open(drm_fd, &param, false);
+
+			emit_triggered_oa_report(batch, 0);
+
+			render_copy(batch,
+				    context,
+				    &src[0], 0, 0, width, height,
+				    &dst[0], 0, 0);
+
+			emit_triggered_oa_report(batch, 0);
+
+			emit_triggered_oa_report(batch, 1);
+
+			render_copy(batch,
+				    context,
+				    &src[1], 0, 0, 2 * width, height,
+				    &dst[1], 0, 0);
+
+			emit_triggered_oa_report(batch, 1);
+
+			intel_batchbuffer_flush_with_context(batch, context);
+
+			/* On some failures, this timestamp is too early as in
+			 * we bail out before seeing the triggered report. Wait
+			 * a little more and then check.
+			 */
+			usleep(50000);
+
+			timestamps[1] = rcs_timestmap_reg_read(drm_fd);
+
+			if (timestamps[1] < timestamps[0] ||
+			    (timestamps[1] & timestamp32_mask) < (timestamps[1] & timestamp32_mask)) {
+				igt_debug("Timestamp rollover, trying again\n");
+				exit(EAGAIN);
+			}
+
+			ret = i915_read_reports_until_timestamp(test_set->perf_oa_format,
+								buf, buf_size,
+								timestamps[0] & timestamp32_mask,
+								timestamps[1] & timestamp32_mask);
+
+			for (size_t offset = 0; offset < ret; offset += header->size) {
+				uint32_t *report;
+
+				header = (void *)(buf + offset);
+
+				igt_assert_eq(header->pad, 0); /* Reserved */
+
+				igt_assert_neq(header->type, DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
+
+				if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST)
+					continue;
+
+				/* Currently the only other record type expected is a
+				 * _SAMPLE. Notably this test will need updating if
+				 * i915-perf is extended in the future with additional
+				 * record types.
+				 */
+				igt_assert_eq(header->type, DRM_I915_PERF_RECORD_SAMPLE);
+
+				report = (void *)(header + 1);
+
+				igt_debug("report ts=0x%08x hw_id=0x%08x reason=%s\n",
+					  report[1], report[2],
+					  gen8_read_report_reason(report));
+
+				if (gen8_report_reason(report) & OAREPORT_REASON_TRIGGER1) {
+					igt_assert_eq(trigger_counts[1], 0);
+					trigger_counts[0]++;
+				}
+				if (gen8_report_reason(report) & OAREPORT_REASON_TRIGGER2) {
+					igt_assert_eq(trigger_counts[0], 2);
+					trigger_counts[1]++;
+				}
+			}
+
+			if (paranoid) {
+				igt_assert_eq(trigger_counts[0], 0);
+				igt_assert_eq(trigger_counts[1], 0);
+			} else {
+				igt_assert_eq(trigger_counts[0], 2);
+				igt_assert_eq(trigger_counts[1], 2);
+			}
+
+			for (int i = 0; i < ARRAY_SIZE(src); i++) {
+				drm_intel_bo_unreference(src[i].bo);
+				drm_intel_bo_unreference(dst[i].bo);
+			}
+
+			intel_batchbuffer_free(batch);
+			drm_intel_gem_context_destroy(context);
+			drm_intel_bufmgr_destroy(bufmgr);
+			__perf_close(stream_fd);
+		}
+
+
+		ret = igt_wait_helper(&child);
+
+		igt_assert(WEXITSTATUS(ret) == EAGAIN ||
+			   WEXITSTATUS(ret) == 0);
+
+	} while (WEXITSTATUS(ret) == EAGAIN);
+
+	free(buf);
+}
+
 /* Tests the INTEL_performance_query use case where an unprivileged process
  * should be able to configure the OA unit for per-context metrics (for a
  * context associated with that process' drm file descriptor) and the counters
@@ -4768,6 +5056,122 @@ test_whitelisted_registers_userspace_config(void)
 	i915_perf_remove_config(drm_fd, config_id);
 }
 
+#define RING_FORCE_TO_NONPRIV_ADDRESS_MASK 0x03fffffc
+
+static uint32_t gen12_wl_slots[] = {
+	0x24d0, 0x24d4, 0x24d8, 0x24dc, 0x24e0, 0x24e4, 0x24e8, 0x24ec,
+	0x24f0, 0x24f4, 0x24f8, 0x24fc, 0x2010, 0x2014, 0x2018, 0x201c,
+	0x21e0, 0x21e4, 0x21e8, 0x21ec,
+};
+
+static uint32_t gen9_wl_slots[] = {
+	0x24d0, 0x24d4, 0x24d8, 0x24dc, 0x24e0, 0x24e4, 0x24e8, 0x24ec,
+	0x24f0, 0x24f4, 0x24f8, 0x24fc,
+};
+
+static void dump_wl(uint32_t *slots, uint32_t count)
+{
+	int i;
+
+	for (i = 0; i < count; i++)
+		igt_debug("LOCAL: FORCE_TO_NON_PRIV_%02d = %08x\n",
+			  i, intel_register_read(&mmio_data, slots[i]));
+}
+
+static void dump_whitelist(const char *msg)
+{
+	igt_debug("%s\n", msg);
+
+	if (intel_gen(devid) >= 12)
+		dump_wl(gen12_wl_slots, ARRAY_SIZE(gen12_wl_slots));
+	else if (intel_gen(devid) > 8)
+		dump_wl(gen9_wl_slots, ARRAY_SIZE(gen9_wl_slots));
+	else
+		return;
+}
+
+static bool in_whitelist(uint32_t reg)
+{
+	uint32_t *slots, count;
+	int i;
+
+	if (intel_gen(devid) >= 12) {
+		slots = gen12_wl_slots;
+		count = ARRAY_SIZE(gen12_wl_slots);
+	} else {
+		slots = gen9_wl_slots;
+		count = ARRAY_SIZE(gen9_wl_slots);
+	}
+
+	for (i = 0; i < count; i++) {
+		uint32_t fpriv = intel_register_read(&mmio_data, slots[i]);
+
+		if ((fpriv & RING_FORCE_TO_NONPRIV_ADDRESS_MASK) == reg)
+			return true;
+	}
+
+	return false;
+}
+
+static void oa_regs_in_whitelist(bool are_present)
+{
+	const uint32_t *regs;
+	uint32_t count;
+	int i;
+
+	if (intel_gen(devid) >= 12) {
+		regs = gen12_oa_wl;
+		count = i915_perf_revision(drm_fd) >= 7 ?
+			ARRAY_SIZE(gen12_oa_wl) : 2;
+	} else {
+		regs = gen8_oa_wl;
+		count = i915_perf_revision(drm_fd) >= 7 ?
+			ARRAY_SIZE(gen8_oa_wl) : 2;
+	}
+
+	for (i = 0; i < count; i++)
+		if (are_present)
+			igt_assert(in_whitelist(regs[i]));
+		else
+			igt_assert(!in_whitelist(regs[i]));
+}
+
+static void test_oa_regs_whitelist(int paranoid)
+{
+	uint64_t properties[] = {
+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
+		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
+		DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
+
+	};
+	struct drm_i915_perf_open_param param = {
+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
+		.num_properties = sizeof(properties) / 16,
+		.properties_ptr = to_user_pointer(properties),
+	};
+	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", paranoid);
+	intel_register_access_init(&mmio_data, intel_get_pci_device(),
+				   0, drm_fd);
+	stream_fd = __perf_open(drm_fd, &param, false);
+
+	dump_whitelist("oa whitelisted");
+
+	if (paranoid)
+		oa_regs_in_whitelist(false);
+	else
+		oa_regs_in_whitelist(true);
+
+	__perf_close(stream_fd);
+
+	dump_whitelist("oa remove whitelist");
+
+	/* after perf close, whitelist should be removed */
+	oa_regs_in_whitelist(false);
+
+	intel_register_access_fini(&mmio_data);
+}
+
 static unsigned
 read_i915_module_ref(void)
 {
@@ -4880,23 +5284,6 @@ test_sysctl_defaults(void)
 	igt_assert_eq(max_freq, 100000);
 }
 
-static int i915_perf_revision(int fd)
-{
-	drm_i915_getparam_t gp;
-	int value = 1, ret;
-
-	gp.param = I915_PARAM_PERF_REVISION;
-	gp.value = &value;
-	ret = igt_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
-	if (ret == -1) {
-		/* If the param is missing, consider version 1. */
-		igt_assert_eq(errno, EINVAL);
-		return 1;
-	}
-
-	return value;
-}
-
 igt_main
 {
 	igt_fixture {
@@ -5096,6 +5483,30 @@ igt_main
 	igt_subtest("whitelisted-registers-userspace-config")
 		test_whitelisted_registers_userspace_config();
 
+
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(intel_gen(devid) > 8);
+			igt_require(i915_perf_revision(drm_fd) >= 6);
+		}
+
+		igt_describe("Verify that OA registers are whitelisted for paranoid 0");
+		igt_subtest("oa-regs-whitelisted")
+			test_oa_regs_whitelist(0);
+
+		igt_describe("Verify that OA registers are not whitelisted for paranoid 1");
+		igt_subtest("oa-regs-not-whitelisted")
+			test_oa_regs_whitelist(1);
+
+		igt_describe("Verify reports triggered when perf_stream_paranoid is 0");
+		igt_subtest("triggered-oa-reports-paranoid-0")
+			test_triggered_oa_reports(0);
+
+		igt_describe("Verify reports not triggered when perf_stream_paranoid is 1");
+		igt_subtest("triggered-oa-reports-paranoid-1")
+			test_triggered_oa_reports(1);
+	}
+
 	igt_fixture {
 		/* leave sysctl options in their default state... */
 		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [igt-dev] [PATCH 1/2] i915/perf: add tests for triggered OA reports
@ 2020-07-30  0:46 Umesh Nerlige Ramappa
  0 siblings, 0 replies; 10+ messages in thread
From: Umesh Nerlige Ramappa @ 2020-07-30  0:46 UTC (permalink / raw)
  To: igt-dev; +Cc: Chris Wilson

From: Lionel G Landwerlin <lionel.g.landwerlin@intel.com>

By whitelisting a couple of registers we can allow an application
batch to trigger OA reports in the OA buffer by switching back & forth
an inverter on the condition logic.

v2: Wait before sampling the timestamp used to end the OA buffer search
v3:
- Ensure OA regs are whitelisted and reports are triggered only when
  perf_stream_paranoid is set to 0.
- Drop root to trigger reports.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 tests/i915/perf.c | 449 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 430 insertions(+), 19 deletions(-)

diff --git a/tests/i915/perf.c b/tests/i915/perf.c
index 92edc9f1..b030cfad 100644
--- a/tests/i915/perf.c
+++ b/tests/i915/perf.c
@@ -53,6 +53,8 @@ IGT_TEST_DESCRIPTION("Test the i915 perf metrics streaming interface");
 #define OAREPORT_REASON_SHIFT          19
 #define OAREPORT_REASON_TIMER          (1<<0)
 #define OAREPORT_REASON_INTERNAL       (3<<1)
+#define OAREPORT_REASON_TRIGGER1       (1<<1)
+#define OAREPORT_REASON_TRIGGER2       (1<<2)
 #define OAREPORT_REASON_CTX_SWITCH     (1<<3)
 #define OAREPORT_REASON_GO             (1<<4)
 #define OAREPORT_REASON_CLK_RATIO      (1<<5)
@@ -204,6 +206,7 @@ static struct intel_perf *intel_perf = NULL;
 static struct intel_perf_metric_set *test_set = NULL;
 static bool *undefined_a_counters;
 static uint64_t oa_exp_1_millisec;
+struct intel_mmio_data mmio_data;
 
 static igt_render_copyfunc_t render_copy = NULL;
 static uint32_t (*read_report_ticks)(uint32_t *report,
@@ -293,6 +296,23 @@ __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
 	return ret;
 }
 
+static int i915_perf_revision(int fd)
+{
+	drm_i915_getparam_t gp;
+	int value = 1, ret;
+
+	gp.param = I915_PARAM_PERF_REVISION;
+	gp.value = &value;
+	ret = igt_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
+	if (ret == -1) {
+		/* If the param is missing, consider version 1. */
+		igt_assert_eq(errno, EINVAL);
+		return 1;
+	}
+
+	return value;
+}
+
 static int
 lookup_format(int i915_perf_fmt_id)
 {
@@ -383,11 +403,17 @@ gen8_read_report_clock_ratios(uint32_t *report,
 	*unslice_freq_mhz = (unslice_freq * 16666) / 1000;
 }
 
+static uint32_t
+gen8_report_reason(const uint32_t *report)
+{
+	return ((report[0] >> OAREPORT_REASON_SHIFT) &
+		OAREPORT_REASON_MASK);
+}
+
 static const char *
 gen8_read_report_reason(const uint32_t *report)
 {
-	uint32_t reason = ((report[0] >> OAREPORT_REASON_SHIFT) &
-			   OAREPORT_REASON_MASK);
+	uint32_t reason = gen8_report_reason(report);
 
 	if (reason & (1<<0))
 		return "timer";
@@ -3118,6 +3144,268 @@ emit_stall_timestamp_and_rpc(struct intel_batchbuffer *batch,
 	emit_report_perf_count(batch, dst, report_dst_offset, report_id);
 }
 
+/* The following register all have the same layout. */
+#define OAREPORTTRIG2 (0x2744)
+#define   OAREPORTTRIG2_INVERT_A_0  (1 << 0)
+#define   OAREPORTTRIG2_INVERT_A_1  (1 << 1)
+#define   OAREPORTTRIG2_INVERT_A_2  (1 << 2)
+#define   OAREPORTTRIG2_INVERT_A_3  (1 << 3)
+#define   OAREPORTTRIG2_INVERT_A_4  (1 << 4)
+#define   OAREPORTTRIG2_INVERT_A_5  (1 << 5)
+#define   OAREPORTTRIG2_INVERT_A_6  (1 << 6)
+#define   OAREPORTTRIG2_INVERT_A_7  (1 << 7)
+#define   OAREPORTTRIG2_INVERT_A_8  (1 << 8)
+#define   OAREPORTTRIG2_INVERT_A_9  (1 << 9)
+#define   OAREPORTTRIG2_INVERT_A_10 (1 << 10)
+#define   OAREPORTTRIG2_INVERT_A_11 (1 << 11)
+#define   OAREPORTTRIG2_INVERT_A_12 (1 << 12)
+#define   OAREPORTTRIG2_INVERT_A_13 (1 << 13)
+#define   OAREPORTTRIG2_INVERT_A_14 (1 << 14)
+#define   OAREPORTTRIG2_INVERT_A_15 (1 << 15)
+#define   OAREPORTTRIG2_INVERT_B_0  (1 << 16)
+#define   OAREPORTTRIG2_INVERT_B_1  (1 << 17)
+#define   OAREPORTTRIG2_INVERT_B_2  (1 << 18)
+#define   OAREPORTTRIG2_INVERT_B_3  (1 << 19)
+#define   OAREPORTTRIG2_INVERT_C_0  (1 << 20)
+#define   OAREPORTTRIG2_INVERT_C_1  (1 << 21)
+#define   OAREPORTTRIG2_INVERT_D_0  (1 << 22)
+#define   OAREPORTTRIG2_THRESHOLD_ENABLE      (1 << 23)
+#define   OAREPORTTRIG2_REPORT_TRIGGER_ENABLE (1 << 31)
+#define OAREPORTTRIG6 (0x2754)
+#define OA_PERF_COUNTER_A(idx) (0x2800 + 8 * (idx))
+#define GEN8_OASTATUS (0x2b08)
+
+#define GEN12_OAREPORTTRIG2 (0xd924)
+#define GEN12_OAREPORTTRIG6 (0xd934)
+#define GEN12_OAG_PERF_COUNTER_A(idx) (0xD980 + 8 * (idx))
+#define GEN12_OAG_OASTATUS (0xdafc)
+
+/*
+ * We have 2 trigger registers that each generate a different
+ * report reason.
+ */
+static const uint32_t gen8_oa_wl[] = {
+	OAREPORTTRIG2,
+	OAREPORTTRIG6,
+	OA_PERF_COUNTER_A(18),
+	GEN8_OASTATUS,
+};
+static const uint32_t gen12_oa_wl[] = {
+	GEN12_OAREPORTTRIG2,
+	GEN12_OAREPORTTRIG6,
+	GEN12_OAG_PERF_COUNTER_A(18),
+	GEN12_OAG_OASTATUS,
+};
+
+static void
+emit_triggered_oa_report(struct intel_batchbuffer *batch,
+			 uint32_t trigger)
+{
+	const uint32_t *triggers = intel_gen(devid) >= 12 ? gen12_oa_wl: gen8_oa_wl;
+
+	assert(trigger <= 1);
+
+	BEGIN_BATCH(6, 0);
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(triggers[trigger]);
+	OUT_BATCH(OAREPORTTRIG2_INVERT_C_1 |
+		  OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(triggers[trigger]);
+	OUT_BATCH(OAREPORTTRIG2_INVERT_C_1 |
+		  OAREPORTTRIG2_INVERT_D_0 |
+		  OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
+	ADVANCE_BATCH();
+}
+
+static uint64_t
+rcs_timestmap_reg_read(int fd)
+{
+	struct drm_i915_reg_read rr = {
+		.offset = 0x2358 | I915_REG_READ_8B_WA, /* render ring timestamp */
+	};
+
+	do_ioctl(fd, DRM_IOCTL_I915_REG_READ, &rr);
+
+	return rr.val;
+}
+
+/*
+ * Verify that we can trigger OA reports into the OA buffer using
+ * MI_LRI.
+ */
+static void
+test_triggered_oa_reports(int paranoid)
+{
+	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
+	uint64_t properties[] = {
+		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
+
+		/* Note: we have to specify at least one sample property even
+		 * though we aren't interested in samples in this case
+		 */
+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
+
+		/* OA unit configuration */
+		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
+		DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
+
+		/* Note: no OA exponent specified in this case */
+	};
+	struct drm_i915_perf_open_param param = {
+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
+		.num_properties = ARRAY_SIZE(properties) / 2,
+		.properties_ptr = to_user_pointer(properties),
+	};
+	struct drm_i915_perf_record_header *header;
+	drm_intel_bufmgr *bufmgr;
+	drm_intel_context *context;
+	struct igt_helper_process child = {};
+	struct intel_batchbuffer *batch;
+	struct igt_buf src[2], dst[2];
+	uint64_t timestamp32_mask = (1ull << 32) - 1;
+	uint64_t timestamps[2];
+	uint32_t buf_size = 16 * 1024 * 1024;
+	uint8_t *buf = malloc(buf_size);
+	uint32_t ctx_id;
+	int width = 800;
+	int height = 600;
+	uint32_t trigger_counts[2] = { 0, };
+	int ret;
+
+	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", paranoid);
+
+	do {
+		igt_fork_helper(&child) {
+			if (!paranoid)
+				igt_drop_root();
+
+			bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
+			drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+			scratch_buf_init(bufmgr, &src[0], width, height, 0xff0000ff);
+			scratch_buf_init(bufmgr, &dst[0], width, height, 0x00ff00ff);
+			scratch_buf_init(bufmgr, &src[1], 2 * width, height, 0xff0000ff);
+			scratch_buf_init(bufmgr, &dst[1], 2 * width, height, 0x00ff00ff);
+
+			batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+			context = drm_intel_gem_context_create(bufmgr);
+			igt_assert(context);
+
+			ret = drm_intel_gem_context_get_id(context, &ctx_id);
+			properties[1] = ctx_id;
+
+
+			timestamps[0] = rcs_timestmap_reg_read(drm_fd);
+
+			stream_fd = __perf_open(drm_fd, &param, false);
+
+			emit_triggered_oa_report(batch, 0);
+
+			render_copy(batch,
+				    context,
+				    &src[0], 0, 0, width, height,
+				    &dst[0], 0, 0);
+
+			emit_triggered_oa_report(batch, 0);
+
+			emit_triggered_oa_report(batch, 1);
+
+			render_copy(batch,
+				    context,
+				    &src[1], 0, 0, 2 * width, height,
+				    &dst[1], 0, 0);
+
+			emit_triggered_oa_report(batch, 1);
+
+			intel_batchbuffer_flush_with_context(batch, context);
+
+			/* On some failures, this timestamp is too early as in
+			 * we bail out before seeing the triggered report. Wait
+			 * a little more and then check.
+			 */
+			usleep(50000);
+
+			timestamps[1] = rcs_timestmap_reg_read(drm_fd);
+
+			if (timestamps[1] < timestamps[0] ||
+			    (timestamps[1] & timestamp32_mask) < (timestamps[1] & timestamp32_mask)) {
+				igt_debug("Timestamp rollover, trying again\n");
+				exit(EAGAIN);
+			}
+
+			ret = i915_read_reports_until_timestamp(test_set->perf_oa_format,
+								buf, buf_size,
+								timestamps[0] & timestamp32_mask,
+								timestamps[1] & timestamp32_mask);
+
+			for (size_t offset = 0; offset < ret; offset += header->size) {
+				uint32_t *report;
+
+				header = (void *)(buf + offset);
+
+				igt_assert_eq(header->pad, 0); /* Reserved */
+
+				igt_assert_neq(header->type, DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
+
+				if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST)
+					continue;
+
+				/* Currently the only other record type expected is a
+				 * _SAMPLE. Notably this test will need updating if
+				 * i915-perf is extended in the future with additional
+				 * record types.
+				 */
+				igt_assert_eq(header->type, DRM_I915_PERF_RECORD_SAMPLE);
+
+				report = (void *)(header + 1);
+
+				igt_debug("report ts=0x%08x hw_id=0x%08x reason=%s\n",
+					  report[1], report[2],
+					  gen8_read_report_reason(report));
+
+				if (gen8_report_reason(report) & OAREPORT_REASON_TRIGGER1) {
+					igt_assert_eq(trigger_counts[1], 0);
+					trigger_counts[0]++;
+				}
+				if (gen8_report_reason(report) & OAREPORT_REASON_TRIGGER2) {
+					igt_assert_eq(trigger_counts[0], 2);
+					trigger_counts[1]++;
+				}
+			}
+
+			if (paranoid) {
+				igt_assert_eq(trigger_counts[0], 0);
+				igt_assert_eq(trigger_counts[1], 0);
+			} else {
+				igt_assert_eq(trigger_counts[0], 2);
+				igt_assert_eq(trigger_counts[1], 2);
+			}
+
+			for (int i = 0; i < ARRAY_SIZE(src); i++) {
+				drm_intel_bo_unreference(src[i].bo);
+				drm_intel_bo_unreference(dst[i].bo);
+			}
+
+			intel_batchbuffer_free(batch);
+			drm_intel_gem_context_destroy(context);
+			drm_intel_bufmgr_destroy(bufmgr);
+			__perf_close(stream_fd);
+		}
+
+
+		ret = igt_wait_helper(&child);
+
+		igt_assert(WEXITSTATUS(ret) == EAGAIN ||
+			   WEXITSTATUS(ret) == 0);
+
+	} while (WEXITSTATUS(ret) == EAGAIN);
+
+	free(buf);
+}
+
 /* Tests the INTEL_performance_query use case where an unprivileged process
  * should be able to configure the OA unit for per-context metrics (for a
  * context associated with that process' drm file descriptor) and the counters
@@ -4768,6 +5056,122 @@ test_whitelisted_registers_userspace_config(void)
 	i915_perf_remove_config(drm_fd, config_id);
 }
 
+#define RING_FORCE_TO_NONPRIV_ADDRESS_MASK 0x03fffffc
+
+static uint32_t gen12_wl_slots[] = {
+	0x24d0, 0x24d4, 0x24d8, 0x24dc, 0x24e0, 0x24e4, 0x24e8, 0x24ec,
+	0x24f0, 0x24f4, 0x24f8, 0x24fc, 0x2010, 0x2014, 0x2018, 0x201c,
+	0x21e0, 0x21e4, 0x21e8, 0x21ec,
+};
+
+static uint32_t gen9_wl_slots[] = {
+	0x24d0, 0x24d4, 0x24d8, 0x24dc, 0x24e0, 0x24e4, 0x24e8, 0x24ec,
+	0x24f0, 0x24f4, 0x24f8, 0x24fc,
+};
+
+static void dump_wl(uint32_t *slots, uint32_t count)
+{
+	int i;
+
+	for (i = 0; i < count; i++)
+		igt_debug("LOCAL: FORCE_TO_NON_PRIV_%02d = %08x\n",
+			  i, intel_register_read(&mmio_data, slots[i]));
+}
+
+static void dump_whitelist(const char *msg)
+{
+	igt_debug("%s\n", msg);
+
+	if (intel_gen(devid) >= 12)
+		dump_wl(gen12_wl_slots, ARRAY_SIZE(gen12_wl_slots));
+	else if (intel_gen(devid) > 8)
+		dump_wl(gen9_wl_slots, ARRAY_SIZE(gen9_wl_slots));
+	else
+		return;
+}
+
+static bool in_whitelist(uint32_t reg)
+{
+	uint32_t *slots, count;
+	int i;
+
+	if (intel_gen(devid) >= 12) {
+		slots = gen12_wl_slots;
+		count = ARRAY_SIZE(gen12_wl_slots);
+	} else {
+		slots = gen9_wl_slots;
+		count = ARRAY_SIZE(gen9_wl_slots);
+	}
+
+	for (i = 0; i < count; i++) {
+		uint32_t fpriv = intel_register_read(&mmio_data, slots[i]);
+
+		if ((fpriv & RING_FORCE_TO_NONPRIV_ADDRESS_MASK) == reg)
+			return true;
+	}
+
+	return false;
+}
+
+static void oa_regs_in_whitelist(bool are_present)
+{
+	const uint32_t *regs;
+	uint32_t count;
+	int i;
+
+	if (intel_gen(devid) >= 12) {
+		regs = gen12_oa_wl;
+		count = i915_perf_revision(drm_fd) >= 7 ?
+			ARRAY_SIZE(gen12_oa_wl) : 2;
+	} else {
+		regs = gen8_oa_wl;
+		count = i915_perf_revision(drm_fd) >= 7 ?
+			ARRAY_SIZE(gen8_oa_wl) : 2;
+	}
+
+	for (i = 0; i < count; i++)
+		if (are_present)
+			igt_assert(in_whitelist(regs[i]));
+		else
+			igt_assert(!in_whitelist(regs[i]));
+}
+
+static void test_oa_regs_whitelist(int paranoid)
+{
+	uint64_t properties[] = {
+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
+		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
+		DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
+
+	};
+	struct drm_i915_perf_open_param param = {
+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
+		.num_properties = sizeof(properties) / 16,
+		.properties_ptr = to_user_pointer(properties),
+	};
+	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", paranoid);
+	intel_register_access_init(&mmio_data, intel_get_pci_device(),
+				   0, drm_fd);
+	stream_fd = __perf_open(drm_fd, &param, false);
+
+	dump_whitelist("oa whitelisted");
+
+	if (paranoid)
+		oa_regs_in_whitelist(false);
+	else
+		oa_regs_in_whitelist(true);
+
+	__perf_close(stream_fd);
+
+	dump_whitelist("oa remove whitelist");
+
+	/* after perf close, whitelist should be removed */
+	oa_regs_in_whitelist(false);
+
+	intel_register_access_fini(&mmio_data);
+}
+
 static unsigned
 read_i915_module_ref(void)
 {
@@ -4880,23 +5284,6 @@ test_sysctl_defaults(void)
 	igt_assert_eq(max_freq, 100000);
 }
 
-static int i915_perf_revision(int fd)
-{
-	drm_i915_getparam_t gp;
-	int value = 1, ret;
-
-	gp.param = I915_PARAM_PERF_REVISION;
-	gp.value = &value;
-	ret = igt_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
-	if (ret == -1) {
-		/* If the param is missing, consider version 1. */
-		igt_assert_eq(errno, EINVAL);
-		return 1;
-	}
-
-	return value;
-}
-
 igt_main
 {
 	igt_fixture {
@@ -5096,6 +5483,30 @@ igt_main
 	igt_subtest("whitelisted-registers-userspace-config")
 		test_whitelisted_registers_userspace_config();
 
+
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(intel_gen(devid) > 8);
+			igt_require(i915_perf_revision(drm_fd) >= 6);
+		}
+
+		igt_describe("Verify that OA registers are whitelisted for paranoid 0");
+		igt_subtest("oa-regs-whitelisted")
+			test_oa_regs_whitelist(0);
+
+		igt_describe("Verify that OA registers are not whitelisted for paranoid 1");
+		igt_subtest("oa-regs-not-whitelisted")
+			test_oa_regs_whitelist(1);
+
+		igt_describe("Verify reports triggered when perf_stream_paranoid is 0");
+		igt_subtest("triggered-oa-reports-paranoid-0")
+			test_triggered_oa_reports(0);
+
+		igt_describe("Verify reports not triggered when perf_stream_paranoid is 1");
+		igt_subtest("triggered-oa-reports-paranoid-1")
+			test_triggered_oa_reports(1);
+	}
+
 	igt_fixture {
 		/* leave sysctl options in their default state... */
 		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [igt-dev] [PATCH 1/2] i915/perf: add tests for triggered OA reports
@ 2020-07-24  0:15 Umesh Nerlige Ramappa
  0 siblings, 0 replies; 10+ messages in thread
From: Umesh Nerlige Ramappa @ 2020-07-24  0:15 UTC (permalink / raw)
  To: igt-dev

From: Lionel G Landwerlin <lionel.g.landwerlin@intel.com>

By whitelisting a couple of registers we can allow an application
batch to trigger OA reports in the OA buffer by switching back & forth
an inverter on the condition logic.

v2: Wait before sampling the timestamp used to end the OA buffer search

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 tests/i915/perf.c | 260 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 258 insertions(+), 2 deletions(-)

diff --git a/tests/i915/perf.c b/tests/i915/perf.c
index 92edc9f1..7d09e009 100644
--- a/tests/i915/perf.c
+++ b/tests/i915/perf.c
@@ -53,6 +53,8 @@ IGT_TEST_DESCRIPTION("Test the i915 perf metrics streaming interface");
 #define OAREPORT_REASON_SHIFT          19
 #define OAREPORT_REASON_TIMER          (1<<0)
 #define OAREPORT_REASON_INTERNAL       (3<<1)
+#define OAREPORT_REASON_TRIGGER1       (1<<1)
+#define OAREPORT_REASON_TRIGGER2       (1<<2)
 #define OAREPORT_REASON_CTX_SWITCH     (1<<3)
 #define OAREPORT_REASON_GO             (1<<4)
 #define OAREPORT_REASON_CLK_RATIO      (1<<5)
@@ -383,11 +385,17 @@ gen8_read_report_clock_ratios(uint32_t *report,
 	*unslice_freq_mhz = (unslice_freq * 16666) / 1000;
 }
 
+static uint32_t
+gen8_report_reason(const uint32_t *report)
+{
+	return ((report[0] >> OAREPORT_REASON_SHIFT) &
+		OAREPORT_REASON_MASK);
+}
+
 static const char *
 gen8_read_report_reason(const uint32_t *report)
 {
-	uint32_t reason = ((report[0] >> OAREPORT_REASON_SHIFT) &
-			   OAREPORT_REASON_MASK);
+	uint32_t reason = gen8_report_reason(report);
 
 	if (reason & (1<<0))
 		return "timer";
@@ -3118,6 +3126,247 @@ emit_stall_timestamp_and_rpc(struct intel_batchbuffer *batch,
 	emit_report_perf_count(batch, dst, report_dst_offset, report_id);
 }
 
+/* The following register all have the same layout. */
+#define OAREPORTTRIG2 (0x2744)
+#define   OAREPORTTRIG2_INVERT_A_0  (1 << 0)
+#define   OAREPORTTRIG2_INVERT_A_1  (1 << 1)
+#define   OAREPORTTRIG2_INVERT_A_2  (1 << 2)
+#define   OAREPORTTRIG2_INVERT_A_3  (1 << 3)
+#define   OAREPORTTRIG2_INVERT_A_4  (1 << 4)
+#define   OAREPORTTRIG2_INVERT_A_5  (1 << 5)
+#define   OAREPORTTRIG2_INVERT_A_6  (1 << 6)
+#define   OAREPORTTRIG2_INVERT_A_7  (1 << 7)
+#define   OAREPORTTRIG2_INVERT_A_8  (1 << 8)
+#define   OAREPORTTRIG2_INVERT_A_9  (1 << 9)
+#define   OAREPORTTRIG2_INVERT_A_10 (1 << 10)
+#define   OAREPORTTRIG2_INVERT_A_11 (1 << 11)
+#define   OAREPORTTRIG2_INVERT_A_12 (1 << 12)
+#define   OAREPORTTRIG2_INVERT_A_13 (1 << 13)
+#define   OAREPORTTRIG2_INVERT_A_14 (1 << 14)
+#define   OAREPORTTRIG2_INVERT_A_15 (1 << 15)
+#define   OAREPORTTRIG2_INVERT_B_0  (1 << 16)
+#define   OAREPORTTRIG2_INVERT_B_1  (1 << 17)
+#define   OAREPORTTRIG2_INVERT_B_2  (1 << 18)
+#define   OAREPORTTRIG2_INVERT_B_3  (1 << 19)
+#define   OAREPORTTRIG2_INVERT_C_0  (1 << 20)
+#define   OAREPORTTRIG2_INVERT_C_1  (1 << 21)
+#define   OAREPORTTRIG2_INVERT_D_0  (1 << 22)
+#define   OAREPORTTRIG2_THRESHOLD_ENABLE      (1 << 23)
+#define   OAREPORTTRIG2_REPORT_TRIGGER_ENABLE (1 << 31)
+#define OAREPORTTRIG6 (0x2754)
+#define GEN12_OAREPORTTRIG2 (0xd924)
+#define GEN12_OAREPORTTRIG6 (0xd934)
+
+static void
+emit_triggered_oa_report(struct intel_batchbuffer *batch,
+			 uint32_t trigger)
+{
+	/*
+	 * We have 2 trigger registers that each generate a different
+	 * report reason.
+	 */
+	static const uint32_t gen8_triggers[] = {
+		OAREPORTTRIG2,
+		OAREPORTTRIG6,
+	};
+	static const uint32_t gen12_triggers[] = {
+		GEN12_OAREPORTTRIG2,
+		GEN12_OAREPORTTRIG6,
+	};
+	const uint32_t *triggers = intel_gen(devid) >= 12 ? gen12_triggers : gen8_triggers;
+
+	assert(trigger <= 1);
+
+	BEGIN_BATCH(6, 0);
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(triggers[trigger]);
+	OUT_BATCH(OAREPORTTRIG2_INVERT_C_1 |
+		  OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(triggers[trigger]);
+	OUT_BATCH(OAREPORTTRIG2_INVERT_C_1 |
+		  OAREPORTTRIG2_INVERT_D_0 |
+		  OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
+	ADVANCE_BATCH();
+}
+
+static uint64_t
+rcs_timestmap_reg_read(int fd)
+{
+	struct drm_i915_reg_read rr = {
+		.offset = 0x2358 | I915_REG_READ_8B_WA, /* render ring timestamp */
+	};
+
+	do_ioctl(fd, DRM_IOCTL_I915_REG_READ, &rr);
+
+	return rr.val;
+}
+
+/*
+ * Verify that we can trigger OA reports into the OA buffer using
+ * MI_LRI.
+ */
+static void
+test_triggered_oa_reports(void)
+{
+	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
+	uint64_t properties[] = {
+		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
+
+		/* Note: we have to specify at least one sample property even
+		 * though we aren't interested in samples in this case
+		 */
+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
+
+		/* OA unit configuration */
+		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
+		DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
+
+		/* Note: no OA exponent specified in this case */
+	};
+	struct drm_i915_perf_open_param param = {
+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
+		.num_properties = ARRAY_SIZE(properties) / 2,
+		.properties_ptr = to_user_pointer(properties),
+	};
+	struct drm_i915_perf_record_header *header;
+	drm_intel_bufmgr *bufmgr;
+	drm_intel_context *context;
+	struct igt_helper_process child = {};
+	struct intel_batchbuffer *batch;
+	struct igt_buf src[2], dst[2];
+	uint64_t timestamp32_mask = (1ull << 32) - 1;
+	uint64_t timestamps[2];
+	uint32_t buf_size = 16 * 1024 * 1024;
+	uint8_t *buf = malloc(buf_size);
+	uint32_t ctx_id;
+	int width = 800;
+	int height = 600;
+	uint32_t trigger_counts[2] = { 0, };
+	int ret;
+
+	do {
+		igt_fork_helper(&child) {
+			bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
+			drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+			scratch_buf_init(bufmgr, &src[0], width, height, 0xff0000ff);
+			scratch_buf_init(bufmgr, &dst[0], width, height, 0x00ff00ff);
+			scratch_buf_init(bufmgr, &src[1], 2 * width, height, 0xff0000ff);
+			scratch_buf_init(bufmgr, &dst[1], 2 * width, height, 0x00ff00ff);
+
+			batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+			context = drm_intel_gem_context_create(bufmgr);
+			igt_assert(context);
+
+			ret = drm_intel_gem_context_get_id(context, &ctx_id);
+			properties[1] = ctx_id;
+
+
+			timestamps[0] = rcs_timestmap_reg_read(drm_fd);
+
+			stream_fd = __perf_open(drm_fd, &param, false);
+
+			emit_triggered_oa_report(batch, 0);
+
+			render_copy(batch,
+				    context,
+				    &src[0], 0, 0, width, height,
+				    &dst[0], 0, 0);
+
+			emit_triggered_oa_report(batch, 0);
+
+			emit_triggered_oa_report(batch, 1);
+
+			render_copy(batch,
+				    context,
+				    &src[1], 0, 0, 2 * width, height,
+				    &dst[1], 0, 0);
+
+			emit_triggered_oa_report(batch, 1);
+
+			intel_batchbuffer_flush_with_context(batch, context);
+
+			/* On some failures, this timestamp is too early as in
+			 * we bail out before seeing the triggered report. Wait
+			 * a little more and then check.
+			 */
+			usleep(50000);
+
+			timestamps[1] = rcs_timestmap_reg_read(drm_fd);
+
+			if (timestamps[1] < timestamps[0] ||
+			    (timestamps[1] & timestamp32_mask) < (timestamps[1] & timestamp32_mask)) {
+				igt_debug("Timestamp rollover, trying again\n");
+				exit(EAGAIN);
+			}
+
+			ret = i915_read_reports_until_timestamp(test_set->perf_oa_format,
+								buf, buf_size,
+								timestamps[0] & timestamp32_mask,
+								timestamps[1] & timestamp32_mask);
+
+			for (size_t offset = 0; offset < ret; offset += header->size) {
+				uint32_t *report;
+
+				header = (void *)(buf + offset);
+
+				igt_assert_eq(header->pad, 0); /* Reserved */
+
+				igt_assert_neq(header->type, DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
+
+				if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST)
+					continue;
+
+				/* Currently the only other record type expected is a
+				 * _SAMPLE. Notably this test will need updating if
+				 * i915-perf is extended in the future with additional
+				 * record types.
+				 */
+				igt_assert_eq(header->type, DRM_I915_PERF_RECORD_SAMPLE);
+
+				report = (void *)(header + 1);
+
+				igt_debug("report ts=0x%08x hw_id=0x%08x reason=%s\n",
+					  report[1], report[2],
+					  gen8_read_report_reason(report));
+
+				if (gen8_report_reason(report) & OAREPORT_REASON_TRIGGER1) {
+					igt_assert_eq(trigger_counts[1], 0);
+					trigger_counts[0]++;
+				}
+				if (gen8_report_reason(report) & OAREPORT_REASON_TRIGGER2) {
+					igt_assert_eq(trigger_counts[0], 2);
+					trigger_counts[1]++;
+				}
+			}
+
+			igt_assert_eq(trigger_counts[0], 2);
+			igt_assert_eq(trigger_counts[1], 2);
+
+			for (int i = 0; i < ARRAY_SIZE(src); i++) {
+				drm_intel_bo_unreference(src[i].bo);
+				drm_intel_bo_unreference(dst[i].bo);
+			}
+
+			intel_batchbuffer_free(batch);
+			drm_intel_gem_context_destroy(context);
+			drm_intel_bufmgr_destroy(bufmgr);
+			__perf_close(stream_fd);
+		}
+
+		ret = igt_wait_helper(&child);
+
+		igt_assert(WEXITSTATUS(ret) == EAGAIN ||
+			   WEXITSTATUS(ret) == 0);
+
+	} while (WEXITSTATUS(ret) == EAGAIN);
+
+	free(buf);
+}
+
 /* Tests the INTEL_performance_query use case where an unprivileged process
  * should be able to configure the OA unit for per-context metrics (for a
  * context associated with that process' drm file descriptor) and the counters
@@ -5096,6 +5345,13 @@ igt_main
 	igt_subtest("whitelisted-registers-userspace-config")
 		test_whitelisted_registers_userspace_config();
 
+	igt_describe("Verify that triggered reports work");
+	igt_subtest("triggered-oa-reports") {
+		igt_require(intel_gen(devid) >= 8);
+		igt_require(i915_perf_revision(drm_fd) >= 6);
+		test_triggered_oa_reports();
+	}
+
 	igt_fixture {
 		/* leave sysctl options in their default state... */
 		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [igt-dev] [PATCH 1/2] i915/perf: add tests for triggered OA reports
@ 2020-07-22  5:38 Umesh Nerlige Ramappa
  0 siblings, 0 replies; 10+ messages in thread
From: Umesh Nerlige Ramappa @ 2020-07-22  5:38 UTC (permalink / raw)
  To: igt-dev

From: Lionel G Landwerlin <lionel.g.landwerlin@intel.com>

By whitelisting a couple of registers we can allow an application
batch to trigger OA reports in the OA buffer by switching back & forth
an inverter on the condition logic.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 tests/i915/perf.c | 254 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 252 insertions(+), 2 deletions(-)

diff --git a/tests/i915/perf.c b/tests/i915/perf.c
index 92edc9f1..eb38ea12 100644
--- a/tests/i915/perf.c
+++ b/tests/i915/perf.c
@@ -53,6 +53,8 @@ IGT_TEST_DESCRIPTION("Test the i915 perf metrics streaming interface");
 #define OAREPORT_REASON_SHIFT          19
 #define OAREPORT_REASON_TIMER          (1<<0)
 #define OAREPORT_REASON_INTERNAL       (3<<1)
+#define OAREPORT_REASON_TRIGGER1       (1<<1)
+#define OAREPORT_REASON_TRIGGER2       (1<<2)
 #define OAREPORT_REASON_CTX_SWITCH     (1<<3)
 #define OAREPORT_REASON_GO             (1<<4)
 #define OAREPORT_REASON_CLK_RATIO      (1<<5)
@@ -383,11 +385,17 @@ gen8_read_report_clock_ratios(uint32_t *report,
 	*unslice_freq_mhz = (unslice_freq * 16666) / 1000;
 }
 
+static uint32_t
+gen8_report_reason(const uint32_t *report)
+{
+	return ((report[0] >> OAREPORT_REASON_SHIFT) &
+		OAREPORT_REASON_MASK);
+}
+
 static const char *
 gen8_read_report_reason(const uint32_t *report)
 {
-	uint32_t reason = ((report[0] >> OAREPORT_REASON_SHIFT) &
-			   OAREPORT_REASON_MASK);
+	uint32_t reason = gen8_report_reason(report);
 
 	if (reason & (1<<0))
 		return "timer";
@@ -3118,6 +3126,241 @@ emit_stall_timestamp_and_rpc(struct intel_batchbuffer *batch,
 	emit_report_perf_count(batch, dst, report_dst_offset, report_id);
 }
 
+/* The following register all have the same layout. */
+#define OAREPORTTRIG2 (0x2744)
+#define   OAREPORTTRIG2_INVERT_A_0  (1 << 0)
+#define   OAREPORTTRIG2_INVERT_A_1  (1 << 1)
+#define   OAREPORTTRIG2_INVERT_A_2  (1 << 2)
+#define   OAREPORTTRIG2_INVERT_A_3  (1 << 3)
+#define   OAREPORTTRIG2_INVERT_A_4  (1 << 4)
+#define   OAREPORTTRIG2_INVERT_A_5  (1 << 5)
+#define   OAREPORTTRIG2_INVERT_A_6  (1 << 6)
+#define   OAREPORTTRIG2_INVERT_A_7  (1 << 7)
+#define   OAREPORTTRIG2_INVERT_A_8  (1 << 8)
+#define   OAREPORTTRIG2_INVERT_A_9  (1 << 9)
+#define   OAREPORTTRIG2_INVERT_A_10 (1 << 10)
+#define   OAREPORTTRIG2_INVERT_A_11 (1 << 11)
+#define   OAREPORTTRIG2_INVERT_A_12 (1 << 12)
+#define   OAREPORTTRIG2_INVERT_A_13 (1 << 13)
+#define   OAREPORTTRIG2_INVERT_A_14 (1 << 14)
+#define   OAREPORTTRIG2_INVERT_A_15 (1 << 15)
+#define   OAREPORTTRIG2_INVERT_B_0  (1 << 16)
+#define   OAREPORTTRIG2_INVERT_B_1  (1 << 17)
+#define   OAREPORTTRIG2_INVERT_B_2  (1 << 18)
+#define   OAREPORTTRIG2_INVERT_B_3  (1 << 19)
+#define   OAREPORTTRIG2_INVERT_C_0  (1 << 20)
+#define   OAREPORTTRIG2_INVERT_C_1  (1 << 21)
+#define   OAREPORTTRIG2_INVERT_D_0  (1 << 22)
+#define   OAREPORTTRIG2_THRESHOLD_ENABLE      (1 << 23)
+#define   OAREPORTTRIG2_REPORT_TRIGGER_ENABLE (1 << 31)
+#define OAREPORTTRIG6 (0x2754)
+#define GEN12_OAREPORTTRIG2 (0xd924)
+#define GEN12_OAREPORTTRIG6 (0xd934)
+
+static void
+emit_triggered_oa_report(struct intel_batchbuffer *batch,
+			 uint32_t trigger)
+{
+	/*
+	 * We have 2 trigger registers that each generate a different
+	 * report reason.
+	 */
+	static const uint32_t gen8_triggers[] = {
+		OAREPORTTRIG2,
+		OAREPORTTRIG6,
+	};
+	static const uint32_t gen12_triggers[] = {
+		GEN12_OAREPORTTRIG2,
+		GEN12_OAREPORTTRIG6,
+	};
+	const uint32_t *triggers = intel_gen(devid) >= 12 ? gen12_triggers : gen8_triggers;
+
+	assert(trigger <= 1);
+
+	BEGIN_BATCH(6, 0);
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(triggers[trigger]);
+	OUT_BATCH(OAREPORTTRIG2_INVERT_C_1 |
+		  OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(triggers[trigger]);
+	OUT_BATCH(OAREPORTTRIG2_INVERT_C_1 |
+		  OAREPORTTRIG2_INVERT_D_0 |
+		  OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
+	ADVANCE_BATCH();
+}
+
+static uint64_t
+rcs_timestmap_reg_read(int fd)
+{
+	struct drm_i915_reg_read rr = {
+		.offset = 0x2358 | I915_REG_READ_8B_WA, /* render ring timestamp */
+	};
+
+	do_ioctl(fd, DRM_IOCTL_I915_REG_READ, &rr);
+
+	return rr.val;
+}
+
+/*
+ * Verify that we can trigger OA reports into the OA buffer using
+ * MI_LRI.
+ */
+static void
+test_triggered_oa_reports(void)
+{
+	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
+	uint64_t properties[] = {
+		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
+
+		/* Note: we have to specify at least one sample property even
+		 * though we aren't interested in samples in this case
+		 */
+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
+
+		/* OA unit configuration */
+		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
+		DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
+
+		/* Note: no OA exponent specified in this case */
+	};
+	struct drm_i915_perf_open_param param = {
+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
+		.num_properties = ARRAY_SIZE(properties) / 2,
+		.properties_ptr = to_user_pointer(properties),
+	};
+	struct drm_i915_perf_record_header *header;
+	drm_intel_bufmgr *bufmgr;
+	drm_intel_context *context;
+	struct igt_helper_process child = {};
+	struct intel_batchbuffer *batch;
+	struct igt_buf src[2], dst[2];
+	uint64_t timestamp32_mask = (1ull << 32) - 1;
+	uint64_t timestamps[2];
+	uint32_t buf_size = 16 * 1024 * 1024;
+	uint8_t *buf = malloc(buf_size);
+	uint32_t ctx_id;
+	int width = 800;
+	int height = 600;
+	uint32_t trigger_counts[2] = { 0, };
+	int ret;
+
+	do {
+		igt_fork_helper(&child) {
+			bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
+			drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+			scratch_buf_init(bufmgr, &src[0], width, height, 0xff0000ff);
+			scratch_buf_init(bufmgr, &dst[0], width, height, 0x00ff00ff);
+			scratch_buf_init(bufmgr, &src[1], 2 * width, height, 0xff0000ff);
+			scratch_buf_init(bufmgr, &dst[1], 2 * width, height, 0x00ff00ff);
+
+			batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+			context = drm_intel_gem_context_create(bufmgr);
+			igt_assert(context);
+
+			ret = drm_intel_gem_context_get_id(context, &ctx_id);
+			properties[1] = ctx_id;
+
+
+			timestamps[0] = rcs_timestmap_reg_read(drm_fd);
+
+			stream_fd = __perf_open(drm_fd, &param, false);
+
+			emit_triggered_oa_report(batch, 0);
+
+			render_copy(batch,
+				    context,
+				    &src[0], 0, 0, width, height,
+				    &dst[0], 0, 0);
+
+			emit_triggered_oa_report(batch, 0);
+
+			emit_triggered_oa_report(batch, 1);
+
+			render_copy(batch,
+				    context,
+				    &src[1], 0, 0, 2 * width, height,
+				    &dst[1], 0, 0);
+
+			emit_triggered_oa_report(batch, 1);
+
+			intel_batchbuffer_flush_with_context(batch, context);
+
+			timestamps[1] = rcs_timestmap_reg_read(drm_fd);
+
+			if (timestamps[1] < timestamps[0] ||
+			    (timestamps[1] & timestamp32_mask) < (timestamps[1] & timestamp32_mask)) {
+				igt_debug("Timestamp rollover, trying again\n");
+				exit(EAGAIN);
+			}
+
+			ret = i915_read_reports_until_timestamp(test_set->perf_oa_format,
+								buf, buf_size,
+								timestamps[0] & timestamp32_mask,
+								timestamps[1] & timestamp32_mask);
+
+			for (size_t offset = 0; offset < ret; offset += header->size) {
+				uint32_t *report;
+
+				header = (void *)(buf + offset);
+
+				igt_assert_eq(header->pad, 0); /* Reserved */
+
+				igt_assert_neq(header->type, DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
+
+				if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST)
+					continue;
+
+				/* Currently the only other record type expected is a
+				 * _SAMPLE. Notably this test will need updating if
+				 * i915-perf is extended in the future with additional
+				 * record types.
+				 */
+				igt_assert_eq(header->type, DRM_I915_PERF_RECORD_SAMPLE);
+
+				report = (void *)(header + 1);
+
+				igt_debug("report ts=0x%08x hw_id=0x%08x reason=%s\n",
+					  report[1], report[2],
+					  gen8_read_report_reason(report));
+
+				if (gen8_report_reason(report) & OAREPORT_REASON_TRIGGER1) {
+					igt_assert_eq(trigger_counts[1], 0);
+					trigger_counts[0]++;
+				}
+				if (gen8_report_reason(report) & OAREPORT_REASON_TRIGGER2) {
+					igt_assert_eq(trigger_counts[0], 2);
+					trigger_counts[1]++;
+				}
+			}
+
+			igt_assert_eq(trigger_counts[0], 2);
+			igt_assert_eq(trigger_counts[1], 2);
+
+			for (int i = 0; i < ARRAY_SIZE(src); i++) {
+				drm_intel_bo_unreference(src[i].bo);
+				drm_intel_bo_unreference(dst[i].bo);
+			}
+
+			intel_batchbuffer_free(batch);
+			drm_intel_gem_context_destroy(context);
+			drm_intel_bufmgr_destroy(bufmgr);
+			__perf_close(stream_fd);
+		}
+
+		ret = igt_wait_helper(&child);
+
+		igt_assert(WEXITSTATUS(ret) == EAGAIN ||
+			   WEXITSTATUS(ret) == 0);
+
+	} while (WEXITSTATUS(ret) == EAGAIN);
+
+	free(buf);
+}
+
 /* Tests the INTEL_performance_query use case where an unprivileged process
  * should be able to configure the OA unit for per-context metrics (for a
  * context associated with that process' drm file descriptor) and the counters
@@ -5096,6 +5339,13 @@ igt_main
 	igt_subtest("whitelisted-registers-userspace-config")
 		test_whitelisted_registers_userspace_config();
 
+	igt_describe("Verify that triggered reports work");
+	igt_subtest("triggered-oa-reports") {
+		igt_require(intel_gen(devid) >= 8);
+		igt_require(i915_perf_revision(drm_fd) >= 6);
+		test_triggered_oa_reports();
+	}
+
 	igt_fixture {
 		/* leave sysctl options in their default state... */
 		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [igt-dev] [PATCH 1/2] i915/perf: add tests for triggered OA reports
@ 2020-07-21  1:57 Umesh Nerlige Ramappa
  0 siblings, 0 replies; 10+ messages in thread
From: Umesh Nerlige Ramappa @ 2020-07-21  1:57 UTC (permalink / raw)
  To: igt-dev

From: Lionel G Landwerlin <lionel.g.landwerlin@intel.com>

By whitelisting a couple of registers we can allow an application
batch to trigger OA reports in the OA buffer by switching back & forth
an inverter on the condition logic.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 tests/i915/perf.c | 254 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 252 insertions(+), 2 deletions(-)

diff --git a/tests/i915/perf.c b/tests/i915/perf.c
index 92edc9f1..eb38ea12 100644
--- a/tests/i915/perf.c
+++ b/tests/i915/perf.c
@@ -53,6 +53,8 @@ IGT_TEST_DESCRIPTION("Test the i915 perf metrics streaming interface");
 #define OAREPORT_REASON_SHIFT          19
 #define OAREPORT_REASON_TIMER          (1<<0)
 #define OAREPORT_REASON_INTERNAL       (3<<1)
+#define OAREPORT_REASON_TRIGGER1       (1<<1)
+#define OAREPORT_REASON_TRIGGER2       (1<<2)
 #define OAREPORT_REASON_CTX_SWITCH     (1<<3)
 #define OAREPORT_REASON_GO             (1<<4)
 #define OAREPORT_REASON_CLK_RATIO      (1<<5)
@@ -383,11 +385,17 @@ gen8_read_report_clock_ratios(uint32_t *report,
 	*unslice_freq_mhz = (unslice_freq * 16666) / 1000;
 }
 
+static uint32_t
+gen8_report_reason(const uint32_t *report)
+{
+	return ((report[0] >> OAREPORT_REASON_SHIFT) &
+		OAREPORT_REASON_MASK);
+}
+
 static const char *
 gen8_read_report_reason(const uint32_t *report)
 {
-	uint32_t reason = ((report[0] >> OAREPORT_REASON_SHIFT) &
-			   OAREPORT_REASON_MASK);
+	uint32_t reason = gen8_report_reason(report);
 
 	if (reason & (1<<0))
 		return "timer";
@@ -3118,6 +3126,241 @@ emit_stall_timestamp_and_rpc(struct intel_batchbuffer *batch,
 	emit_report_perf_count(batch, dst, report_dst_offset, report_id);
 }
 
+/* The following register all have the same layout. */
+#define OAREPORTTRIG2 (0x2744)
+#define   OAREPORTTRIG2_INVERT_A_0  (1 << 0)
+#define   OAREPORTTRIG2_INVERT_A_1  (1 << 1)
+#define   OAREPORTTRIG2_INVERT_A_2  (1 << 2)
+#define   OAREPORTTRIG2_INVERT_A_3  (1 << 3)
+#define   OAREPORTTRIG2_INVERT_A_4  (1 << 4)
+#define   OAREPORTTRIG2_INVERT_A_5  (1 << 5)
+#define   OAREPORTTRIG2_INVERT_A_6  (1 << 6)
+#define   OAREPORTTRIG2_INVERT_A_7  (1 << 7)
+#define   OAREPORTTRIG2_INVERT_A_8  (1 << 8)
+#define   OAREPORTTRIG2_INVERT_A_9  (1 << 9)
+#define   OAREPORTTRIG2_INVERT_A_10 (1 << 10)
+#define   OAREPORTTRIG2_INVERT_A_11 (1 << 11)
+#define   OAREPORTTRIG2_INVERT_A_12 (1 << 12)
+#define   OAREPORTTRIG2_INVERT_A_13 (1 << 13)
+#define   OAREPORTTRIG2_INVERT_A_14 (1 << 14)
+#define   OAREPORTTRIG2_INVERT_A_15 (1 << 15)
+#define   OAREPORTTRIG2_INVERT_B_0  (1 << 16)
+#define   OAREPORTTRIG2_INVERT_B_1  (1 << 17)
+#define   OAREPORTTRIG2_INVERT_B_2  (1 << 18)
+#define   OAREPORTTRIG2_INVERT_B_3  (1 << 19)
+#define   OAREPORTTRIG2_INVERT_C_0  (1 << 20)
+#define   OAREPORTTRIG2_INVERT_C_1  (1 << 21)
+#define   OAREPORTTRIG2_INVERT_D_0  (1 << 22)
+#define   OAREPORTTRIG2_THRESHOLD_ENABLE      (1 << 23)
+#define   OAREPORTTRIG2_REPORT_TRIGGER_ENABLE (1 << 31)
+#define OAREPORTTRIG6 (0x2754)
+#define GEN12_OAREPORTTRIG2 (0xd924)
+#define GEN12_OAREPORTTRIG6 (0xd934)
+
+static void
+emit_triggered_oa_report(struct intel_batchbuffer *batch,
+			 uint32_t trigger)
+{
+	/*
+	 * We have 2 trigger registers that each generate a different
+	 * report reason.
+	 */
+	static const uint32_t gen8_triggers[] = {
+		OAREPORTTRIG2,
+		OAREPORTTRIG6,
+	};
+	static const uint32_t gen12_triggers[] = {
+		GEN12_OAREPORTTRIG2,
+		GEN12_OAREPORTTRIG6,
+	};
+	const uint32_t *triggers = intel_gen(devid) >= 12 ? gen12_triggers : gen8_triggers;
+
+	assert(trigger <= 1);
+
+	BEGIN_BATCH(6, 0);
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(triggers[trigger]);
+	OUT_BATCH(OAREPORTTRIG2_INVERT_C_1 |
+		  OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(triggers[trigger]);
+	OUT_BATCH(OAREPORTTRIG2_INVERT_C_1 |
+		  OAREPORTTRIG2_INVERT_D_0 |
+		  OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
+	ADVANCE_BATCH();
+}
+
+static uint64_t
+rcs_timestmap_reg_read(int fd)
+{
+	struct drm_i915_reg_read rr = {
+		.offset = 0x2358 | I915_REG_READ_8B_WA, /* render ring timestamp */
+	};
+
+	do_ioctl(fd, DRM_IOCTL_I915_REG_READ, &rr);
+
+	return rr.val;
+}
+
+/*
+ * Verify that we can trigger OA reports into the OA buffer using
+ * MI_LRI.
+ */
+static void
+test_triggered_oa_reports(void)
+{
+	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
+	uint64_t properties[] = {
+		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
+
+		/* Note: we have to specify at least one sample property even
+		 * though we aren't interested in samples in this case
+		 */
+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
+
+		/* OA unit configuration */
+		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
+		DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
+
+		/* Note: no OA exponent specified in this case */
+	};
+	struct drm_i915_perf_open_param param = {
+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
+		.num_properties = ARRAY_SIZE(properties) / 2,
+		.properties_ptr = to_user_pointer(properties),
+	};
+	struct drm_i915_perf_record_header *header;
+	drm_intel_bufmgr *bufmgr;
+	drm_intel_context *context;
+	struct igt_helper_process child = {};
+	struct intel_batchbuffer *batch;
+	struct igt_buf src[2], dst[2];
+	uint64_t timestamp32_mask = (1ull << 32) - 1;
+	uint64_t timestamps[2];
+	uint32_t buf_size = 16 * 1024 * 1024;
+	uint8_t *buf = malloc(buf_size);
+	uint32_t ctx_id;
+	int width = 800;
+	int height = 600;
+	uint32_t trigger_counts[2] = { 0, };
+	int ret;
+
+	do {
+		igt_fork_helper(&child) {
+			bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
+			drm_intel_bufmgr_gem_enable_reuse(bufmgr);
+
+			scratch_buf_init(bufmgr, &src[0], width, height, 0xff0000ff);
+			scratch_buf_init(bufmgr, &dst[0], width, height, 0x00ff00ff);
+			scratch_buf_init(bufmgr, &src[1], 2 * width, height, 0xff0000ff);
+			scratch_buf_init(bufmgr, &dst[1], 2 * width, height, 0x00ff00ff);
+
+			batch = intel_batchbuffer_alloc(bufmgr, devid);
+
+			context = drm_intel_gem_context_create(bufmgr);
+			igt_assert(context);
+
+			ret = drm_intel_gem_context_get_id(context, &ctx_id);
+			properties[1] = ctx_id;
+
+
+			timestamps[0] = rcs_timestmap_reg_read(drm_fd);
+
+			stream_fd = __perf_open(drm_fd, &param, false);
+
+			emit_triggered_oa_report(batch, 0);
+
+			render_copy(batch,
+				    context,
+				    &src[0], 0, 0, width, height,
+				    &dst[0], 0, 0);
+
+			emit_triggered_oa_report(batch, 0);
+
+			emit_triggered_oa_report(batch, 1);
+
+			render_copy(batch,
+				    context,
+				    &src[1], 0, 0, 2 * width, height,
+				    &dst[1], 0, 0);
+
+			emit_triggered_oa_report(batch, 1);
+
+			intel_batchbuffer_flush_with_context(batch, context);
+
+			timestamps[1] = rcs_timestmap_reg_read(drm_fd);
+
+			if (timestamps[1] < timestamps[0] ||
+			    (timestamps[1] & timestamp32_mask) < (timestamps[1] & timestamp32_mask)) {
+				igt_debug("Timestamp rollover, trying again\n");
+				exit(EAGAIN);
+			}
+
+			ret = i915_read_reports_until_timestamp(test_set->perf_oa_format,
+								buf, buf_size,
+								timestamps[0] & timestamp32_mask,
+								timestamps[1] & timestamp32_mask);
+
+			for (size_t offset = 0; offset < ret; offset += header->size) {
+				uint32_t *report;
+
+				header = (void *)(buf + offset);
+
+				igt_assert_eq(header->pad, 0); /* Reserved */
+
+				igt_assert_neq(header->type, DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
+
+				if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST)
+					continue;
+
+				/* Currently the only other record type expected is a
+				 * _SAMPLE. Notably this test will need updating if
+				 * i915-perf is extended in the future with additional
+				 * record types.
+				 */
+				igt_assert_eq(header->type, DRM_I915_PERF_RECORD_SAMPLE);
+
+				report = (void *)(header + 1);
+
+				igt_debug("report ts=0x%08x hw_id=0x%08x reason=%s\n",
+					  report[1], report[2],
+					  gen8_read_report_reason(report));
+
+				if (gen8_report_reason(report) & OAREPORT_REASON_TRIGGER1) {
+					igt_assert_eq(trigger_counts[1], 0);
+					trigger_counts[0]++;
+				}
+				if (gen8_report_reason(report) & OAREPORT_REASON_TRIGGER2) {
+					igt_assert_eq(trigger_counts[0], 2);
+					trigger_counts[1]++;
+				}
+			}
+
+			igt_assert_eq(trigger_counts[0], 2);
+			igt_assert_eq(trigger_counts[1], 2);
+
+			for (int i = 0; i < ARRAY_SIZE(src); i++) {
+				drm_intel_bo_unreference(src[i].bo);
+				drm_intel_bo_unreference(dst[i].bo);
+			}
+
+			intel_batchbuffer_free(batch);
+			drm_intel_gem_context_destroy(context);
+			drm_intel_bufmgr_destroy(bufmgr);
+			__perf_close(stream_fd);
+		}
+
+		ret = igt_wait_helper(&child);
+
+		igt_assert(WEXITSTATUS(ret) == EAGAIN ||
+			   WEXITSTATUS(ret) == 0);
+
+	} while (WEXITSTATUS(ret) == EAGAIN);
+
+	free(buf);
+}
+
 /* Tests the INTEL_performance_query use case where an unprivileged process
  * should be able to configure the OA unit for per-context metrics (for a
  * context associated with that process' drm file descriptor) and the counters
@@ -5096,6 +5339,13 @@ igt_main
 	igt_subtest("whitelisted-registers-userspace-config")
 		test_whitelisted_registers_userspace_config();
 
+	igt_describe("Verify that triggered reports work");
+	igt_subtest("triggered-oa-reports") {
+		igt_require(intel_gen(devid) >= 8);
+		igt_require(i915_perf_revision(drm_fd) >= 6);
+		test_triggered_oa_reports();
+	}
+
 	igt_fixture {
 		/* leave sysctl options in their default state... */
 		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
-- 
2.20.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-09-24 16:25 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-17 23:58 [igt-dev] [PATCH 1/2] i915/perf: add tests for triggered OA reports Umesh Nerlige Ramappa
2020-07-17 23:58 ` [igt-dev] [PATCH 2/2] i915/perf: Sanity check reports in mapped OA buffer Umesh Nerlige Ramappa
2020-07-18  0:26 ` [igt-dev] ✗ Fi.CI.BAT: failure for series starting with [1/2] i915/perf: add tests for triggered OA reports Patchwork
2020-07-21  1:57 [igt-dev] [PATCH 1/2] " Umesh Nerlige Ramappa
2020-07-22  5:38 Umesh Nerlige Ramappa
2020-07-24  0:15 Umesh Nerlige Ramappa
2020-07-30  0:46 Umesh Nerlige Ramappa
2020-07-30 23:00 Umesh Nerlige Ramappa
2020-08-18 20:35 Umesh Nerlige Ramappa
2020-09-24 16:24 ` Umesh Nerlige Ramappa

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.