All of lore.kernel.org
 help / color / mirror / Atom feed
* [igt-dev] [PATCH 1/5] i915/perf: add tests for triggered OA reports
@ 2021-08-03 20:07 Umesh Nerlige Ramappa
  2021-08-03 20:07 ` [igt-dev] [PATCH 2/5] i915/perf: Add tests for mapped OA buffer Umesh Nerlige Ramappa
                   ` (6 more replies)
  0 siblings, 7 replies; 20+ messages in thread
From: Umesh Nerlige Ramappa @ 2021-08-03 20:07 UTC (permalink / raw)
  To: igt-dev, Ashutosh Dixit, Lionel G Landwerlin

From: Lionel G Landwerlin <lionel.g.landwerlin@intel.com>

By whitelisting a couple of registers we can allow an application
batch to trigger OA reports in the OA buffer by switching back & forth
an inverter on the condition logic.

v2: Wait before sampling the timestamp used to end the OA buffer search
v3:
- Ensure OA regs are whitelisted and reports are triggered only when
  perf_stream_paranoid is set to 0.
- Drop root to trigger reports.
v4:
- wait for children after igt_assert
- use new api for intel batch buffer
- clean up test code

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 tests/i915/perf.c | 421 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 404 insertions(+), 17 deletions(-)

diff --git a/tests/i915/perf.c b/tests/i915/perf.c
index e641d5d2..fa3840eb 100644
--- a/tests/i915/perf.c
+++ b/tests/i915/perf.c
@@ -53,6 +53,8 @@ IGT_TEST_DESCRIPTION("Test the i915 perf metrics streaming interface");
 #define OAREPORT_REASON_SHIFT          19
 #define OAREPORT_REASON_TIMER          (1<<0)
 #define OAREPORT_REASON_INTERNAL       (3<<1)
+#define OAREPORT_REASON_TRIGGER1       (1<<1)
+#define OAREPORT_REASON_TRIGGER2       (1<<2)
 #define OAREPORT_REASON_CTX_SWITCH     (1<<3)
 #define OAREPORT_REASON_GO             (1<<4)
 #define OAREPORT_REASON_CLK_RATIO      (1<<5)
@@ -204,6 +206,7 @@ static struct intel_perf *intel_perf = NULL;
 static struct intel_perf_metric_set *test_set = NULL;
 static bool *undefined_a_counters;
 static uint64_t oa_exp_1_millisec;
+struct intel_mmio_data mmio_data;
 
 static igt_render_copyfunc_t render_copy = NULL;
 static uint32_t (*read_report_ticks)(const uint32_t *report,
@@ -294,6 +297,23 @@ __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
 	return ret;
 }
 
+static int i915_perf_revision(int fd)
+{
+	drm_i915_getparam_t gp;
+	int value = 1, ret;
+
+	gp.param = I915_PARAM_PERF_REVISION;
+	gp.value = &value;
+	ret = igt_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
+	if (ret == -1) {
+		/* If the param is missing, consider version 1. */
+		igt_assert_eq(errno, EINVAL);
+		return 1;
+	}
+
+	return value;
+}
+
 static int
 lookup_format(int i915_perf_fmt_id)
 {
@@ -3151,6 +3171,283 @@ emit_stall_timestamp_and_rpc(struct intel_bb *ibb,
 	emit_report_perf_count(ibb, dst, report_dst_offset, report_id);
 }
 
+/* The following register all have the same layout. */
+#define OAREPORTTRIG2 (0x2744)
+#define   OAREPORTTRIG2_INVERT_A_0  (1 << 0)
+#define   OAREPORTTRIG2_INVERT_A_1  (1 << 1)
+#define   OAREPORTTRIG2_INVERT_A_2  (1 << 2)
+#define   OAREPORTTRIG2_INVERT_A_3  (1 << 3)
+#define   OAREPORTTRIG2_INVERT_A_4  (1 << 4)
+#define   OAREPORTTRIG2_INVERT_A_5  (1 << 5)
+#define   OAREPORTTRIG2_INVERT_A_6  (1 << 6)
+#define   OAREPORTTRIG2_INVERT_A_7  (1 << 7)
+#define   OAREPORTTRIG2_INVERT_A_8  (1 << 8)
+#define   OAREPORTTRIG2_INVERT_A_9  (1 << 9)
+#define   OAREPORTTRIG2_INVERT_A_10 (1 << 10)
+#define   OAREPORTTRIG2_INVERT_A_11 (1 << 11)
+#define   OAREPORTTRIG2_INVERT_A_12 (1 << 12)
+#define   OAREPORTTRIG2_INVERT_A_13 (1 << 13)
+#define   OAREPORTTRIG2_INVERT_A_14 (1 << 14)
+#define   OAREPORTTRIG2_INVERT_A_15 (1 << 15)
+#define   OAREPORTTRIG2_INVERT_B_0  (1 << 16)
+#define   OAREPORTTRIG2_INVERT_B_1  (1 << 17)
+#define   OAREPORTTRIG2_INVERT_B_2  (1 << 18)
+#define   OAREPORTTRIG2_INVERT_B_3  (1 << 19)
+#define   OAREPORTTRIG2_INVERT_C_0  (1 << 20)
+#define   OAREPORTTRIG2_INVERT_C_1  (1 << 21)
+#define   OAREPORTTRIG2_INVERT_D_0  (1 << 22)
+#define   OAREPORTTRIG2_THRESHOLD_ENABLE      (1 << 23)
+#define   OAREPORTTRIG2_REPORT_TRIGGER_ENABLE (1 << 31)
+#define OAREPORTTRIG6 (0x2754)
+#define OA_PERF_COUNTER_A(idx) (0x2800 + 8 * (idx))
+#define GEN8_OASTATUS (0x2b08)
+
+#define GEN12_OAREPORTTRIG2 (0xd924)
+#define GEN12_OAREPORTTRIG6 (0xd934)
+#define GEN12_OAG_PERF_COUNTER_A(idx) (0xD980 + 8 * (idx))
+#define GEN12_OAG_OASTATUS (0xdafc)
+
+#define RING_FORCE_TO_NONPRIV_ADDRESS_MASK 0x03fffffc
+
+/*
+ * We have 2 trigger registers that each generate a different
+ * report reason.
+ */
+static const uint32_t gen9_oa_wl[] = {
+	OAREPORTTRIG2,
+	OAREPORTTRIG6,
+	OA_PERF_COUNTER_A(18),
+	GEN8_OASTATUS,
+};
+static const uint32_t gen12_oa_wl[] = {
+	GEN12_OAREPORTTRIG2,
+	GEN12_OAREPORTTRIG6,
+	GEN12_OAG_PERF_COUNTER_A(18),
+	GEN12_OAG_OASTATUS,
+};
+
+static const uint32_t nonpriv_slots[] = {
+	0x24d0, 0x24d4, 0x24d8, 0x24dc, 0x24e0, 0x24e4, 0x24e8, 0x24ec,
+	0x24f0, 0x24f4, 0x24f8, 0x24fc, 0x2010, 0x2014, 0x2018, 0x201c,
+	0x21e0, 0x21e4, 0x21e8, 0x21ec,
+};
+
+struct test_perf {
+	const uint32_t *slots;
+	uint32_t num_slots;
+	const uint32_t *wl;
+	uint32_t num_wl;
+} perf;
+
+static void perf_init_whitelist(void)
+{
+	perf.slots = nonpriv_slots;
+
+	if (intel_gen(devid) >= 12) {
+		perf.num_slots = 20;
+		perf.wl = gen12_oa_wl;
+		perf.num_wl = i915_perf_revision(drm_fd) < 7 ? 2 :
+			       ARRAY_SIZE(gen12_oa_wl);
+	} else {
+		perf.num_slots = 12;
+		perf.wl = gen9_oa_wl;
+		perf.num_wl = i915_perf_revision(drm_fd) < 7 ? 2 :
+			       ARRAY_SIZE(gen9_oa_wl);
+	}
+}
+
+static void
+emit_triggered_oa_report(struct intel_bb *ibb, uint32_t trigger)
+{
+	const uint32_t *triggers = perf.wl;
+
+	assert(trigger <= 1);
+
+	intel_bb_out(ibb, MI_LOAD_REGISTER_IMM);
+	intel_bb_out(ibb, triggers[trigger]);
+	intel_bb_out(ibb, OAREPORTTRIG2_INVERT_C_1 |
+			  OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
+	intel_bb_out(ibb, MI_LOAD_REGISTER_IMM);
+	intel_bb_out(ibb, triggers[trigger]);
+	intel_bb_out(ibb, OAREPORTTRIG2_INVERT_C_1 |
+			  OAREPORTTRIG2_INVERT_D_0 |
+			  OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
+}
+
+static uint64_t
+rcs_timestmap_reg_read(int fd)
+{
+	struct drm_i915_reg_read rr = {
+		.offset = 0x2358 | I915_REG_READ_8B_WA, /* render ring timestamp */
+	};
+
+	do_ioctl(fd, DRM_IOCTL_I915_REG_READ, &rr);
+
+	return rr.val;
+}
+
+/*
+ * Verify that we can trigger OA reports into the OA buffer using
+ * MI_LRI.
+ */
+static void
+test_triggered_oa_reports(int paranoid)
+{
+	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
+	uint64_t properties[] = {
+		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
+
+		/* Note: we have to specify at least one sample property even
+		 * though we aren't interested in samples in this case
+		 */
+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
+
+		/* OA unit configuration */
+		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
+		DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
+
+		/* Note: no OA exponent specified in this case */
+	};
+	struct drm_i915_perf_open_param param = {
+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
+		.num_properties = ARRAY_SIZE(properties) / 2,
+		.properties_ptr = to_user_pointer(properties),
+	};
+	struct drm_i915_perf_record_header *header;
+	struct buf_ops *bops;
+	uint32_t context;
+	struct igt_helper_process child = {};
+	struct intel_bb *ibb;
+	struct intel_buf src[2], dst[2];
+	uint64_t timestamp32_mask = (1ull << 32) - 1;
+	uint64_t timestamps[2];
+	uint32_t buf_size = 16 * 1024 * 1024;
+	uint8_t *buf = malloc(buf_size);
+	int width = 800;
+	int height = 600;
+	uint32_t trigger_counts[2] = { 0, };
+	int ret;
+
+	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", paranoid);
+
+	do {
+		igt_fork_helper(&child) {
+			if (!paranoid)
+				igt_drop_root();
+
+			bops = buf_ops_create(drm_fd);
+
+			scratch_buf_init(bops, &src[0], width, height, 0xff0000ff);
+			scratch_buf_init(bops, &dst[0], width, height, 0x00ff00ff);
+			scratch_buf_init(bops, &src[1], 2 * width, height, 0xff0000ff);
+			scratch_buf_init(bops, &dst[1], 2 * width, height, 0x00ff00ff);
+
+			context = gem_context_create(drm_fd);
+			igt_assert(context);
+			ibb = intel_bb_create_with_context(drm_fd, context, BATCH_SZ);
+			properties[1] = context;
+
+			timestamps[0] = rcs_timestmap_reg_read(drm_fd);
+
+			stream_fd = __perf_open(drm_fd, &param, false);
+
+			emit_triggered_oa_report(ibb, 0);
+
+			render_copy(ibb,
+				    &src[0], 0, 0, width, height,
+				    &dst[0], 0, 0);
+
+			emit_triggered_oa_report(ibb, 0);
+
+			emit_triggered_oa_report(ibb, 1);
+
+			render_copy(ibb,
+				    &src[1], 0, 0, 2 * width, height,
+				    &dst[1], 0, 0);
+
+			emit_triggered_oa_report(ibb, 1);
+
+			intel_bb_flush_render(ibb);
+			intel_bb_sync(ibb);
+
+			timestamps[1] = rcs_timestmap_reg_read(drm_fd);
+
+			if (timestamps[1] < timestamps[0] ||
+			    (timestamps[1] & timestamp32_mask) < (timestamps[1] & timestamp32_mask)) {
+				igt_debug("Timestamp rollover, trying again\n");
+				exit(EAGAIN);
+			}
+
+			ret = i915_read_reports_until_timestamp(test_set->perf_oa_format,
+								buf, buf_size,
+								timestamps[0] & timestamp32_mask,
+								timestamps[1] & timestamp32_mask);
+
+			for (size_t offset = 0; offset < ret; offset += header->size) {
+				uint32_t *report;
+
+				header = (void *)(buf + offset);
+
+				igt_assert_eq(header->pad, 0); /* Reserved */
+
+				igt_assert_neq(header->type, DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
+
+				if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST)
+					continue;
+
+				/* Currently the only other record type expected is a
+				 * _SAMPLE. Notably this test will need updating if
+				 * i915-perf is extended in the future with additional
+				 * record types.
+				 */
+				igt_assert_eq(header->type, DRM_I915_PERF_RECORD_SAMPLE);
+
+				report = (void *)(header + 1);
+
+				igt_debug("report ts=0x%08x hw_id=0x%08x reason=%s\n",
+					  report[1], report[2],
+					  gen8_read_report_reason(report));
+
+				if (gen8_report_reason(report) & OAREPORT_REASON_TRIGGER1) {
+					igt_assert_eq(trigger_counts[1], 0);
+					trigger_counts[0]++;
+				}
+				if (gen8_report_reason(report) & OAREPORT_REASON_TRIGGER2) {
+					igt_assert_eq(trigger_counts[0], 2);
+					trigger_counts[1]++;
+				}
+			}
+
+			if (paranoid) {
+				igt_assert_eq(trigger_counts[0], 0);
+				igt_assert_eq(trigger_counts[1], 0);
+			} else {
+				igt_assert_eq(trigger_counts[0], 2);
+				igt_assert_eq(trigger_counts[1], 2);
+			}
+
+			for (int i = 0; i < ARRAY_SIZE(src); i++) {
+				intel_buf_close(bops, &src[i]);
+				intel_buf_close(bops, &dst[i]);
+			}
+
+			intel_bb_destroy(ibb);
+			gem_context_destroy(drm_fd, context);
+			buf_ops_destroy(bops);
+			__perf_close(stream_fd);
+		}
+
+		ret = igt_wait_helper(&child);
+
+		igt_assert(WEXITSTATUS(ret) == EAGAIN ||
+			   WEXITSTATUS(ret) == 0);
+
+	} while (WEXITSTATUS(ret) == EAGAIN);
+
+	free(buf);
+}
+
 /* Tests the INTEL_performance_query use case where an unprivileged process
  * should be able to configure the OA unit for per-context metrics (for a
  * context associated with that process' drm file descriptor) and the counters
@@ -4777,6 +5074,88 @@ test_whitelisted_registers_userspace_config(void)
 	i915_perf_remove_config(drm_fd, config_id);
 }
 
+static void dump_whitelist(const char *msg)
+{
+	int i;
+
+	igt_debug("%s\n", msg);
+
+	for (i = 0; i < perf.num_slots; i++)
+		igt_debug("FORCE_TO_NON_PRIV_%02d = %08x\n",
+			  i, intel_register_read(&mmio_data, perf.slots[i]));
+}
+
+static bool in_whitelist(uint32_t reg)
+{
+	int i;
+
+	for (i = 0; i < perf.num_slots; i++) {
+		uint32_t fpriv = intel_register_read(&mmio_data, perf.slots[i]);
+
+		if ((fpriv & RING_FORCE_TO_NONPRIV_ADDRESS_MASK) == reg)
+			return true;
+	}
+
+	return false;
+}
+
+static void oa_regs_in_whitelist(bool are_present)
+{
+	int i;
+
+	if (are_present) {
+		for (i = 0; i < perf.num_wl; i++)
+			igt_assert(in_whitelist(perf.wl[i]));
+	} else {
+		for (i = 0; i < perf.num_wl; i++)
+			igt_assert(!in_whitelist(perf.wl[i]));
+	}
+}
+
+static void test_oa_regs_whitelist(int paranoid)
+{
+	uint64_t properties[] = {
+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
+		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
+		DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
+
+	};
+	struct drm_i915_perf_open_param param = {
+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
+		.num_properties = sizeof(properties) / 16,
+		.properties_ptr = to_user_pointer(properties),
+	};
+	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", paranoid);
+	intel_register_access_init(&mmio_data, intel_get_pci_device(),
+				   0, drm_fd);
+	stream_fd = __perf_open(drm_fd, &param, false);
+
+	dump_whitelist("oa whitelisted");
+
+	/*
+	 * oa registers are whitelisted only if paranoid = 0. if so, make sure
+	 * that the registers are in the nonpriv slots. if not, make sure the
+	 * registers are NOT present in the nonpriv slots.
+	 */
+	if (paranoid)
+		oa_regs_in_whitelist(false);
+	else
+		oa_regs_in_whitelist(true);
+
+	__perf_close(stream_fd);
+
+	dump_whitelist("oa remove whitelist");
+
+	/*
+	 * after perf close, check that registers are removed from the nonpriv
+	 * slots
+	 */
+	oa_regs_in_whitelist(false);
+
+	intel_register_access_fini(&mmio_data);
+}
+
 static unsigned
 read_i915_module_ref(void)
 {
@@ -4889,23 +5268,6 @@ test_sysctl_defaults(void)
 	igt_assert_eq(max_freq, 100000);
 }
 
-static int i915_perf_revision(int fd)
-{
-	drm_i915_getparam_t gp;
-	int value = 1, ret;
-
-	gp.param = I915_PARAM_PERF_REVISION;
-	gp.value = &value;
-	ret = igt_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
-	if (ret == -1) {
-		/* If the param is missing, consider version 1. */
-		igt_assert_eq(errno, EINVAL);
-		return 1;
-	}
-
-	return value;
-}
-
 igt_main
 {
 	igt_fixture {
@@ -5119,6 +5481,31 @@ igt_main
 	igt_subtest("whitelisted-registers-userspace-config")
 		test_whitelisted_registers_userspace_config();
 
+
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(intel_gen(devid) > 8);
+			igt_require(i915_perf_revision(drm_fd) >= 6);
+			perf_init_whitelist();
+		}
+
+		igt_describe("Verify that OA registers are whitelisted for paranoid 0");
+		igt_subtest("oa-regs-whitelisted")
+			test_oa_regs_whitelist(0);
+
+		igt_describe("Verify that OA registers are not whitelisted for paranoid 1");
+		igt_subtest("oa-regs-not-whitelisted")
+			test_oa_regs_whitelist(1);
+
+		igt_describe("Verify reports triggered when perf_stream_paranoid is 0");
+		igt_subtest("triggered-oa-reports-paranoid-0")
+			test_triggered_oa_reports(0);
+
+		igt_describe("Verify reports not triggered when perf_stream_paranoid is 1");
+		igt_subtest("triggered-oa-reports-paranoid-1")
+			test_triggered_oa_reports(1);
+	}
+
 	igt_fixture {
 		/* leave sysctl options in their default state... */
 		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [igt-dev] [PATCH 2/5] i915/perf: Add tests for mapped OA buffer
  2021-08-03 20:07 [igt-dev] [PATCH 1/5] i915/perf: add tests for triggered OA reports Umesh Nerlige Ramappa
@ 2021-08-03 20:07 ` Umesh Nerlige Ramappa
  2021-08-23 21:31   ` Dixit, Ashutosh
  2021-08-03 20:07 ` [igt-dev] [PATCH 3/5] lib/i915/perf: Add new record for mmaped " Umesh Nerlige Ramappa
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 20+ messages in thread
From: Umesh Nerlige Ramappa @ 2021-08-03 20:07 UTC (permalink / raw)
  To: igt-dev, Ashutosh Dixit, Lionel G Landwerlin

For applications that need a faster way to access reports in the OA
buffer, i915 now provides a way to map the OA buffer to privileged user
space. Validate the mapped OA buffer.

v2: Fail on forked-privileged access to mapped oa buffer (Chris)

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 include/drm-uapi/i915_drm.h |  33 ++++
 tests/i915/perf.c           | 290 ++++++++++++++++++++++++++++++++++++
 2 files changed, 323 insertions(+)

diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
index a1c0030c..bb7d5e73 100644
--- a/include/drm-uapi/i915_drm.h
+++ b/include/drm-uapi/i915_drm.h
@@ -2151,6 +2151,39 @@ struct drm_i915_perf_open_param {
  */
 #define I915_PERF_IOCTL_CONFIG	_IO('i', 0x2)
 
+/*
+ * Returns OA buffer properties to be used with mmap.
+ *
+ * This ioctl is available in perf revision 8.
+ */
+#define I915_PERF_IOCTL_GET_OA_BUFFER_INFO _IOWR('i', 0x3, struct drm_i915_perf_oa_buffer_info)
+
+/**
+ * OA buffer size and offset.
+ *
+ * OA output buffer
+ *   type: 0
+ *   flags: mbz
+ *
+ *   After querying the info, pass (size,offset) to mmap(),
+ *
+ *   mmap(0, info.size, PROT_READ, MAP_PRIVATE, perf_fd, info.offset).
+ *
+ *   Note that only a private (not shared between processes, or across fork())
+ *   read-only mmapping is allowed.
+ *
+ *   Userspace must treat the incoming data as tainted, but it conforms to the OA
+ *   format as specified by user config. The buffer provides reports that have
+ *   OA counters - A, B and C.
+ */
+struct drm_i915_perf_oa_buffer_info {
+	__u32 type;   /* in */
+	__u32 flags;  /* in */
+	__u64 size;   /* out */
+	__u64 offset; /* out */
+	__u64 rsvd;   /* mbz */
+};
+
 /*
  * Common to all i915 perf records
  */
diff --git a/tests/i915/perf.c b/tests/i915/perf.c
index fa3840eb..4d4808ce 100644
--- a/tests/i915/perf.c
+++ b/tests/i915/perf.c
@@ -5156,6 +5156,266 @@ static void test_oa_regs_whitelist(int paranoid)
 	intel_register_access_fini(&mmio_data);
 }
 
+#define OA_BUFFER_DATA(tail, head, oa_buffer_size) \
+	(((tail) - (head)) & ((oa_buffer_size) - 1))
+
+#ifndef MAP_FAILED
+#define MAP_FAILED ((void *)-1)
+#endif
+
+static uint32_t oa_status_reg(void)
+{
+	uint32_t status;
+
+	intel_register_access_init(&mmio_data, intel_get_pci_device(),
+				   0, drm_fd);
+	if (IS_HASWELL(devid))
+		status = intel_register_read(&mmio_data, 0x2346) & 0x7;
+	else if (IS_GEN12(devid))
+		status = intel_register_read(&mmio_data, 0xdafc) & 0x7;
+	else
+		status = intel_register_read(&mmio_data, 0x2b08) & 0xf;
+
+	intel_register_access_fini(&mmio_data);
+
+	return status;
+}
+
+static jmp_buf jmp;
+static void __attribute__((noreturn)) sigtrap(int sig)
+{
+	siglongjmp(jmp, sig);
+}
+
+static void try_invalid_access(void *vaddr)
+{
+	sighandler_t old_sigsegv;
+	uint32_t dummy;
+
+	old_sigsegv = signal(SIGSEGV, sigtrap);
+	switch (sigsetjmp(jmp, SIGSEGV)) {
+	case SIGSEGV:
+		break;
+	case 0:
+		dummy = READ_ONCE(*((uint32_t *)vaddr + 1));
+		(void) dummy;
+	default:
+		igt_assert(!"reached");
+		break;
+	}
+	signal(SIGSEGV, old_sigsegv);
+}
+
+static void invalid_param_map_oa_buffer(void)
+{
+	struct drm_i915_perf_oa_buffer_info oa_buffer = { 0 };
+	void *oa_vaddr = NULL;
+
+	do_ioctl(stream_fd, I915_PERF_IOCTL_GET_OA_BUFFER_INFO, &oa_buffer);
+
+	igt_debug("size        = %llu\n", oa_buffer.size);
+	igt_debug("offset      = %llx\n", oa_buffer.offset);
+
+	igt_assert_eq(oa_buffer.size & (oa_buffer.size - 1), 0);
+
+	/* try a couple invalid mmaps */
+	/* bad prots */
+	oa_vaddr = mmap(0, oa_buffer.size, PROT_WRITE, MAP_PRIVATE, stream_fd, oa_buffer.offset);
+	igt_assert(oa_vaddr == MAP_FAILED);
+
+	oa_vaddr = mmap(0, oa_buffer.size, PROT_EXEC, MAP_PRIVATE, stream_fd, oa_buffer.offset);
+	igt_assert(oa_vaddr == MAP_FAILED);
+
+	/* bad MAPs */
+	oa_vaddr = mmap(0, oa_buffer.size, PROT_READ, MAP_SHARED, stream_fd, oa_buffer.offset);
+	igt_assert(oa_vaddr == MAP_FAILED);
+
+	/* bad offsets */
+	oa_vaddr = mmap(0, oa_buffer.size, PROT_READ, MAP_PRIVATE, stream_fd, 0);
+	igt_assert(oa_vaddr == MAP_FAILED);
+
+	oa_vaddr = mmap(0, oa_buffer.size, PROT_READ, MAP_PRIVATE, stream_fd, 8192);
+	igt_assert(oa_vaddr == MAP_FAILED);
+
+	oa_vaddr = mmap(0, oa_buffer.size, PROT_READ, MAP_PRIVATE, stream_fd, 11);
+	igt_assert(oa_vaddr == MAP_FAILED);
+
+	/* bad size */
+	oa_vaddr = mmap(0, oa_buffer.size + 1, PROT_READ, MAP_PRIVATE, stream_fd, oa_buffer.offset);
+	igt_assert(oa_vaddr == MAP_FAILED);
+
+	/* do the right thing */
+	oa_vaddr = mmap(0, oa_buffer.size, PROT_READ, MAP_PRIVATE, stream_fd, oa_buffer.offset);
+	igt_assert(oa_vaddr != MAP_FAILED && oa_vaddr != NULL);
+
+	munmap(oa_vaddr, oa_buffer.size);
+}
+
+static void *map_oa_buffer(uint32_t *size)
+{
+	struct drm_i915_perf_oa_buffer_info oa_buffer = { 0 };
+	void *vaddr;
+
+	do_ioctl(stream_fd, I915_PERF_IOCTL_GET_OA_BUFFER_INFO, &oa_buffer);
+
+	igt_debug("size        = %llu\n", oa_buffer.size);
+	igt_debug("offset      = %llx\n", oa_buffer.offset);
+
+	igt_assert_eq(oa_buffer.size & (oa_buffer.size - 1), 0);
+	igt_assert_eq(oa_status_reg(), 0);
+
+	vaddr = mmap(0, oa_buffer.size, PROT_READ, MAP_PRIVATE, stream_fd, oa_buffer.offset);
+	igt_assert(vaddr != NULL);
+
+	*size = oa_buffer.size;
+
+	return vaddr;
+}
+
+static void check_reports(void *oa_vaddr, uint32_t oa_size)
+{
+	struct oa_format format = get_oa_format(test_set->perf_oa_format);
+	size_t report_words = format.size >> 2;
+	uint32_t *reports;
+	uint32_t timer_reports = 0;
+
+	for (reports = (uint32_t *)oa_vaddr;
+	     timer_reports < 20 && reports[0] && reports[1];
+	     reports += report_words) {
+		if (!oa_report_is_periodic(oa_exp_1_millisec, reports))
+			continue;
+
+		timer_reports++;
+		if (timer_reports >= 3)
+			sanity_check_reports(reports - 2 * report_words,
+					     reports - report_words,
+					     test_set->perf_oa_format);
+	}
+
+	igt_assert(timer_reports >= 3);
+}
+
+static void check_reports_from_mapped_buffer(void)
+{
+	void *vaddr;
+	uint32_t size;
+	uint32_t period_us = oa_exponent_to_ns(oa_exp_1_millisec) / 1000;
+
+	vaddr = map_oa_buffer(&size);
+
+	/* wait for approx 100 reports */
+	usleep(100 * period_us);
+	check_reports(vaddr, size);
+
+	munmap(vaddr, size);
+}
+
+static void unprivileged_try_to_map_oa_buffer(void)
+{
+	struct drm_i915_perf_oa_buffer_info oa_buffer = { 0 };
+	void *oa_vaddr;
+
+	do_ioctl_err(stream_fd, I915_PERF_IOCTL_GET_OA_BUFFER_INFO,
+		     &oa_buffer, EACCES);
+
+	oa_vaddr = mmap(0, 4096, PROT_READ, MAP_PRIVATE, stream_fd, 4096);
+	igt_assert(oa_vaddr == MAP_FAILED);
+	igt_assert_eq(errno, EACCES);
+}
+
+static void unprivileged_map_oa_buffer(void)
+{
+	igt_fork(child, 1) {
+		igt_drop_root();
+		unprivileged_try_to_map_oa_buffer();
+	}
+	igt_waitchildren();
+}
+
+static void map_oa_buffer_unprivilege_access(void)
+{
+	void *vaddr;
+	uint32_t size;
+
+	vaddr = map_oa_buffer(&size);
+
+	igt_fork(child, 1) {
+		igt_drop_root();
+		try_invalid_access(vaddr);
+	}
+	igt_waitchildren();
+
+	munmap(vaddr, size);
+}
+
+static void map_oa_buffer_forked_access(void)
+{
+	void *vaddr;
+	uint32_t size;
+
+	vaddr = map_oa_buffer(&size);
+
+	igt_fork(child, 1) {
+		try_invalid_access(vaddr);
+	}
+	igt_waitchildren();
+
+	munmap(vaddr, size);
+}
+
+static void test_mapped_oa_buffer(void (*test_with_fd_open)(void))
+{
+	uint64_t properties[] = {
+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
+		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
+		DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
+
+	};
+	struct drm_i915_perf_open_param param = {
+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
+		.num_properties = sizeof(properties) / 16,
+		.properties_ptr = to_user_pointer(properties),
+	};
+
+	stream_fd = __perf_open(drm_fd, &param, false);
+
+	igt_assert(test_with_fd_open);
+	test_with_fd_open();
+
+	__perf_close(stream_fd);
+}
+
+static void closed_fd_and_unmapped_access(void)
+{
+	uint64_t properties[] = {
+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
+		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
+		DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
+
+	};
+	struct drm_i915_perf_open_param param = {
+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
+		.num_properties = sizeof(properties) / 16,
+		.properties_ptr = to_user_pointer(properties),
+	};
+	void *vaddr;
+	uint32_t size;
+	uint32_t period_us = oa_exponent_to_ns(oa_exp_1_millisec) / 1000;
+
+	stream_fd = __perf_open(drm_fd, &param, false);
+	vaddr = map_oa_buffer(&size);
+
+	usleep(100 * period_us);
+	check_reports(vaddr, size);
+
+	munmap(vaddr, size);
+	__perf_close(stream_fd);
+
+	try_invalid_access(vaddr);
+}
+
 static unsigned
 read_i915_module_ref(void)
 {
@@ -5506,6 +5766,36 @@ igt_main
 			test_triggered_oa_reports(1);
 	}
 
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(i915_perf_revision(drm_fd) >= 8);
+		}
+
+		igt_describe("Verify mapping of oa buffer");
+		igt_subtest("map-oa-buffer")
+			test_mapped_oa_buffer(check_reports_from_mapped_buffer);
+
+		igt_describe("Verify invalid mappings of oa buffer");
+		igt_subtest("invalid-map-oa-buffer")
+			test_mapped_oa_buffer(invalid_param_map_oa_buffer);
+
+		igt_describe("Verify if non-privileged user can map oa buffer");
+		igt_subtest("non-privileged-map-oa-buffer")
+			test_mapped_oa_buffer(unprivileged_map_oa_buffer);
+
+		igt_describe("Verify if non-privileged user can map oa buffer");
+		igt_subtest("non-privileged-access-vaddr")
+			test_mapped_oa_buffer(map_oa_buffer_unprivilege_access);
+
+		igt_describe("Verify that forked access to mapped buffer fails");
+		igt_subtest("privileged-forked-access-vaddr")
+			test_mapped_oa_buffer(map_oa_buffer_forked_access);
+
+		igt_describe("Unmap buffer, close fd and try to access");
+		igt_subtest("closed-fd-and-unmapped-access")
+			closed_fd_and_unmapped_access();
+	}
+
 	igt_fixture {
 		/* leave sysctl options in their default state... */
 		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [igt-dev] [PATCH 3/5] lib/i915/perf: Add new record for mmaped OA buffer
  2021-08-03 20:07 [igt-dev] [PATCH 1/5] i915/perf: add tests for triggered OA reports Umesh Nerlige Ramappa
  2021-08-03 20:07 ` [igt-dev] [PATCH 2/5] i915/perf: Add tests for mapped OA buffer Umesh Nerlige Ramappa
@ 2021-08-03 20:07 ` Umesh Nerlige Ramappa
  2021-08-03 20:07 ` [igt-dev] [PATCH 4/5] tools/i915-perf: Add mmapped OA buffer support to i915-perf-recorder Umesh Nerlige Ramappa
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 20+ messages in thread
From: Umesh Nerlige Ramappa @ 2021-08-03 20:07 UTC (permalink / raw)
  To: igt-dev, Ashutosh Dixit, Lionel G Landwerlin

DRM_I915_PERF_RECORD_SAMPLE header is added by i915 when user issues a
read to read the counter reports from the OA buffer. When user mmaps the
OA buffer, user has a view into the raw reports without this header.

Introduce INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE in the perf library to
track reports from an mmapped OA buffer.

While each DRM_I915_PERF_RECORD_SAMPLE record corresponds to a single OA
report, INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE corresponds to multiple
OA reports.

By design, these 2 record types cannot be mixed. The i915-perf-recorder
chooses to use mmaped OA buffer using the -M option. Once -M is chosen,
all samples are INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE.

The way timeline events are created and displayed in GPUvis remains
the same, the only change is that the source of these events now is
multiple INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE records.

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 lib/i915/perf.c                    |  7 ++-
 lib/i915/perf.h                    |  4 +-
 lib/i915/perf_data.h               |  3 +
 lib/i915/perf_data_reader.c        | 95 +++++++++++++++++++++++++++++-
 lib/i915/perf_data_reader.h        |  2 +
 tools/i915-perf/i915_perf_reader.c |  3 +-
 6 files changed, 106 insertions(+), 8 deletions(-)

diff --git a/lib/i915/perf.c b/lib/i915/perf.c
index 9cfa3bca..3ace687c 100644
--- a/lib/i915/perf.c
+++ b/lib/i915/perf.c
@@ -626,10 +626,11 @@ accumulate_uint40(int a_index,
 void intel_perf_accumulate_reports(struct intel_perf_accumulator *acc,
 				   int oa_format,
 				   const struct drm_i915_perf_record_header *record0,
-				   const struct drm_i915_perf_record_header *record1)
+				   const struct drm_i915_perf_record_header *record1,
+				   uint32_t offset0, uint32_t offset1)
 {
-	const uint32_t *start = (const uint32_t *)(record0 + 1);
-	const uint32_t *end = (const uint32_t *)(record1 + 1);
+	const uint32_t *start = (const uint32_t *)(record0 + 1) + (offset0 / 4);
+	const uint32_t *end = (const uint32_t *)(record1 + 1) + (offset1 / 4);
 	uint64_t *deltas = acc->deltas;
 	int idx = 0;
 	int i;
diff --git a/lib/i915/perf.h b/lib/i915/perf.h
index d2429c47..7706eb43 100644
--- a/lib/i915/perf.h
+++ b/lib/i915/perf.h
@@ -238,7 +238,9 @@ void intel_perf_load_perf_configs(struct intel_perf *perf, int drm_fd);
 void intel_perf_accumulate_reports(struct intel_perf_accumulator *acc,
 				   int oa_format,
 				   const struct drm_i915_perf_record_header *record0,
-				   const struct drm_i915_perf_record_header *record1);
+				   const struct drm_i915_perf_record_header *record1,
+				   uint32_t report_start_offset,
+				   uint32_t report_end_offset);
 
 #ifdef __cplusplus
 };
diff --git a/lib/i915/perf_data.h b/lib/i915/perf_data.h
index fb3556f6..a730a0b4 100644
--- a/lib/i915/perf_data.h
+++ b/lib/i915/perf_data.h
@@ -52,6 +52,9 @@ enum intel_perf_record_type {
 
 	/* intel_perf_record_timestamp_correlation */
 	INTEL_PERF_RECORD_TYPE_TIMESTAMP_CORRELATION,
+
+	/* intel_perf_record_timestamp_correlation */
+	INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE,
 };
 
 /* This structure cannot ever change. */
diff --git a/lib/i915/perf_data_reader.c b/lib/i915/perf_data_reader.c
index e69189ac..ad0b2daf 100644
--- a/lib/i915/perf_data_reader.c
+++ b/lib/i915/perf_data_reader.c
@@ -131,6 +131,7 @@ parse_data(struct intel_perf_data_reader *reader)
 
 		switch (header->type) {
 		case DRM_I915_PERF_RECORD_SAMPLE:
+		case INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE:
 			append_record(reader, header);
 			break;
 
@@ -254,6 +255,7 @@ static void
 append_timeline_event(struct intel_perf_data_reader *reader,
 		      uint64_t ts_start, uint64_t ts_end,
 		      uint32_t record_start, uint32_t record_end,
+		      uint32_t start_offset, uint32_t end_offset,
 		      uint32_t hw_id)
 {
 	if (reader->n_timelines >= reader->n_allocated_timelines) {
@@ -274,12 +276,81 @@ append_timeline_event(struct intel_perf_data_reader *reader,
 		correlate_gpu_timestamp(reader, ts_end);
 	reader->timelines[reader->n_timelines].record_start = record_start;
 	reader->timelines[reader->n_timelines].record_end = record_end;
+	reader->timelines[reader->n_timelines].report_start_offset = start_offset;
+	reader->timelines[reader->n_timelines].report_end_offset = end_offset;
 	reader->timelines[reader->n_timelines].hw_id = hw_id;
 	reader->n_timelines++;
 }
 
+struct perf_record_report {
+	uint32_t record_idx;
+	uint32_t report_offset;
+	uint32_t ctx_id;
+	uint64_t gpu_ts;
+};
+
 static void
-generate_cpu_events(struct intel_perf_data_reader *reader)
+__init_perf_record_report(struct intel_perf_data_reader *reader,
+			  struct perf_record_report *prr)
+{
+	const struct drm_i915_perf_record_header *record;
+	const uint8_t *report;
+
+	record = reader->records[prr->record_idx];
+	report = (const uint8_t *)(record + 1) + prr->report_offset;
+	prr->ctx_id = oa_report_ctx_id(&reader->devinfo, report);
+	prr->gpu_ts = oa_report_timestamp(report);
+}
+
+static bool
+__context_switched(struct intel_perf_data_reader *reader,
+		   struct perf_record_report *prev,
+		   struct perf_record_report *curr)
+{
+	__init_perf_record_report(reader, prev);
+	__init_perf_record_report(reader, curr);
+
+	return (prev->ctx_id != curr->ctx_id);
+}
+
+static void
+append_timeline(struct intel_perf_data_reader *reader,
+		struct perf_record_report *prev,
+		struct perf_record_report *curr)
+{
+	append_timeline_event(reader, prev->gpu_ts, curr->gpu_ts,
+			prev->record_idx, curr->record_idx,
+			prev->report_offset, curr->report_offset,
+			prev->ctx_id);
+}
+
+static void
+generate_cpu_events_multi_sample(struct intel_perf_data_reader *reader)
+{
+	uint32_t report_size = reader->metric_set->perf_raw_size;
+	struct perf_record_report prev = {0}, curr = {0};
+	int i;
+
+	for (i = 0; i < reader->n_records; i++) {
+		uint32_t length = reader->records[i]->size -
+				  sizeof(*(reader->records[i]));
+
+		curr.record_idx = i;
+		for (curr.report_offset = 0;
+		     curr.report_offset < length;
+		     curr.report_offset += report_size)
+			if (__context_switched(reader, &prev, &curr)) {
+				append_timeline(reader, &prev, &curr);
+				prev = curr;
+			}
+	}
+
+	if (!memcmp(&prev, &curr, sizeof(prev)))
+		append_timeline(reader, &prev, &curr);
+}
+
+static void
+generate_cpu_events_oa_sample(struct intel_perf_data_reader *reader)
 {
 	uint32_t last_header_idx = 0;
 	const struct drm_i915_perf_record_header *last_header = reader->records[0],
@@ -303,14 +374,32 @@ generate_cpu_events(struct intel_perf_data_reader *reader)
 		if (last_ctx_id == current_ctx_id)
 			continue;
 
-		append_timeline_event(reader, gpu_ts_start, gpu_ts_end, last_header_idx, i, last_ctx_id);
+		append_timeline_event(reader, gpu_ts_start, gpu_ts_end,
+				      last_header_idx, i,
+				      0, 0,
+				      last_ctx_id);
 
 		last_header = current_header;
 		last_header_idx = i;
 	}
 
 	if (last_header != current_header)
-		append_timeline_event(reader, gpu_ts_start, gpu_ts_end, last_header_idx, reader->n_records - 1, last_ctx_id);
+		append_timeline_event(reader, gpu_ts_start, gpu_ts_end,
+				      last_header_idx, reader->n_records - 1,
+				      0, 0,
+				      last_ctx_id);
+}
+
+static void
+generate_cpu_events(struct intel_perf_data_reader *reader)
+{
+	const struct drm_i915_perf_record_header *hdr = reader->records[0];
+
+	if (hdr->type == DRM_I915_PERF_RECORD_SAMPLE)
+		generate_cpu_events_oa_sample(reader);
+
+	if (hdr->type == INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE)
+		generate_cpu_events_multi_sample(reader);
 }
 
 static void
diff --git a/lib/i915/perf_data_reader.h b/lib/i915/perf_data_reader.h
index f625f12d..a9f14a1d 100644
--- a/lib/i915/perf_data_reader.h
+++ b/lib/i915/perf_data_reader.h
@@ -44,6 +44,8 @@ struct intel_perf_timeline_item {
 	/* Offsets into intel_perf_data_reader.records */
 	uint32_t record_start;
 	uint32_t record_end;
+	uint32_t report_start_offset;
+	uint32_t report_end_offset;
 
 	uint32_t hw_id;
 
diff --git a/tools/i915-perf/i915_perf_reader.c b/tools/i915-perf/i915_perf_reader.c
index e51f5a5d..12638685 100644
--- a/tools/i915-perf/i915_perf_reader.c
+++ b/tools/i915-perf/i915_perf_reader.c
@@ -252,7 +252,8 @@ main(int argc, char *argv[])
 			item->hw_id, item->hw_id == 0xffffffff ? "(idle)" : "");
 
 		intel_perf_accumulate_reports(&accu, reader.metric_set->perf_oa_format,
-					      i915_report0, i915_report1);
+					      i915_report0, i915_report1,
+					      item->report_start_offset, item->report_end_offset);
 
 		for (uint32_t c = 0; c < n_counters; c++) {
 			struct intel_perf_logical_counter *counter = counters[c];
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [igt-dev] [PATCH 4/5] tools/i915-perf: Add mmapped OA buffer support to i915-perf-recorder
  2021-08-03 20:07 [igt-dev] [PATCH 1/5] i915/perf: add tests for triggered OA reports Umesh Nerlige Ramappa
  2021-08-03 20:07 ` [igt-dev] [PATCH 2/5] i915/perf: Add tests for mapped OA buffer Umesh Nerlige Ramappa
  2021-08-03 20:07 ` [igt-dev] [PATCH 3/5] lib/i915/perf: Add new record for mmaped " Umesh Nerlige Ramappa
@ 2021-08-03 20:07 ` Umesh Nerlige Ramappa
  2021-08-24  1:05   ` Dixit, Ashutosh
                     ` (2 more replies)
  2021-08-03 20:07 ` [igt-dev] [PATCH 5/5] tools/i915-perf: Add a command to trigger a report in OA buffer Umesh Nerlige Ramappa
                   ` (3 subsequent siblings)
  6 siblings, 3 replies; 20+ messages in thread
From: Umesh Nerlige Ramappa @ 2021-08-03 20:07 UTC (permalink / raw)
  To: igt-dev, Ashutosh Dixit, Lionel G Landwerlin

Currently report from OA buffer are read from the perf_fd. The kernel
patches enable mmaping the OA buffer into user space to allow for faster
report queries across different platforms and engines.

Enable OA buffer to be mmaped by the recorder tool based on command line
option -M.

Example:
i915-perf-recorder -m RenderBasic -s 8000 -k "mono" -M

The recorder processes the mmaped OA buffer by periodically reading the
OA TAIL PTR register from a batch and determining the number of reports
available. These reports are then logged in the circular-buffer as
INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE records. In this implementation
the periodicity of checking the TAIL is the same as writing correlation
timestamps (1 sec).

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 tools/i915-perf/i915_perf_recorder.c | 488 ++++++++++++++++++++++++++-
 1 file changed, 476 insertions(+), 12 deletions(-)

diff --git a/tools/i915-perf/i915_perf_recorder.c b/tools/i915-perf/i915_perf_recorder.c
index 00195290..6b2f8710 100644
--- a/tools/i915-perf/i915_perf_recorder.c
+++ b/tools/i915-perf/i915_perf_recorder.c
@@ -34,6 +34,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/ioctl.h>
+#include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/sysmacros.h>
 #include <sys/time.h>
@@ -331,6 +332,16 @@ get_device_timestamp_frequency(const struct intel_device_info *devinfo, int drm_
 	return 12000000;
 }
 
+struct bb_context {
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint32_t *batch;
+	uint32_t *dest;
+	uint32_t offset;
+	uint32_t reloc_idx;
+};
+
 struct recording_context {
 	int drm_fd;
 	int perf_fd;
@@ -355,6 +366,22 @@ struct recording_context {
 	int command_fifo_fd;
 
 	uint64_t poll_period;
+	double perf_period;
+
+	uint32_t max_record_length;
+
+	uint8_t *oa_buffer_vaddr;
+	uint32_t oa_buffer_size;
+	uint32_t tail_offset;
+	uint32_t head_offset;
+	uint32_t oa_status_reg;
+	uint32_t oa_buffer_reg;
+	uint32_t oa_tail_reg;
+
+	int zero_fd;
+	void *zero_mem;
+
+	struct bb_context bb;
 };
 
 static int
@@ -527,6 +554,367 @@ write_i915_perf_data(FILE *output, int perf_fd)
 	return true;
 }
 
+static int gem_create(int fd, uint64_t size, uint32_t *handle)
+{
+	struct drm_i915_gem_create create = {
+		.size = size,
+		.handle = 0,
+	};
+	int ret = 0;
+
+	if (perf_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create))
+		ret = -errno;
+	else
+		*handle = create.handle;
+
+	errno = 0;
+	return ret;
+}
+
+static int gem_set_domain(int fd, uint32_t handle, uint32_t read, uint32_t write)
+{
+	struct drm_i915_gem_set_domain set_domain = {
+		.handle = handle,
+		.read_domains = read,
+		.write_domain = write,
+	};
+	int ret = 0;
+
+	if (perf_ioctl(fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain))
+		ret = -errno;
+
+	errno = 0;
+	return ret;
+}
+
+static void *gem_mmap_cpu(int fd, uint32_t handle, uint64_t offset, uint64_t size,
+			  unsigned int prot)
+{
+	struct drm_i915_gem_mmap arg = {
+		.handle = handle,
+		.offset = offset,
+		.size = size,
+		.addr_ptr = 0,
+		.flags = 0,
+	};
+
+	if (perf_ioctl(fd, DRM_IOCTL_I915_GEM_MMAP, &arg))
+		return NULL;
+
+	return (void *)(uintptr_t)arg.addr_ptr;
+}
+
+static void gem_close(int fd, uint32_t handle)
+{
+	struct drm_gem_close close_bo = {
+		.handle = handle,
+	};
+
+	perf_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
+}
+
+static int gem_execbuf(int fd, struct drm_i915_gem_execbuffer2 *execbuf)
+{
+	int ret = 0;
+	if (perf_ioctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf))
+		ret = -errno;
+
+	errno = 0;
+	return ret;
+}
+
+#define BATCH_SIZE 4096
+#define DEST_SIZE 4096
+
+#define _MI_INSTR(opcode, flags)	(((opcode) << 23) | (flags))
+#define MI_STORE_REGISTER_MEM      	_MI_INSTR(0x24, 1)
+#define MI_STORE_REGISTER_MEM_GEN8 	_MI_INSTR(0x24, 2)
+#define MI_BATCH_BUFFER_END		(0xA << 23)
+
+static void
+bb_emit_srm(struct bb_context *bb, uint32_t reg, uint32_t devid)
+{
+	bool gen8_plus = devid >= 8;
+
+	assert(bb->reloc_idx < ARRAY_SIZE(bb->reloc));
+	assert(bb->offset < BATCH_SIZE);
+
+	bb->batch[bb->offset++] = gen8_plus ? MI_STORE_REGISTER_MEM_GEN8 :
+					      MI_STORE_REGISTER_MEM;
+	bb->batch[bb->offset++] = reg;
+
+	bb->reloc[bb->reloc_idx].target_handle = bb->obj[0].handle;
+	bb->reloc[bb->reloc_idx].presumed_offset = bb->obj[0].offset;
+	bb->reloc[bb->reloc_idx].offset = bb->offset * sizeof(uint32_t);
+	bb->reloc[bb->reloc_idx].delta = bb->reloc_idx * sizeof(uint32_t);
+	bb->reloc[bb->reloc_idx].read_domains = I915_GEM_DOMAIN_RENDER;
+	bb->reloc[bb->reloc_idx].write_domain = I915_GEM_DOMAIN_RENDER;
+
+	bb->batch[bb->offset++] = bb->reloc[bb->reloc_idx].delta;
+	if (gen8_plus)
+		bb->batch[bb->offset++] = 0;
+
+	bb->reloc_idx++;
+}
+
+static void
+bb_emit_bbe(struct bb_context *bb)
+{
+	bb->batch[bb->offset++] = MI_BATCH_BUFFER_END;
+}
+
+static int
+bb_exec(int fd, struct bb_context *bb)
+{
+	struct drm_i915_gem_execbuffer2 *execbuf = &bb->execbuf;
+	int ret;
+
+	memset(execbuf, 0, sizeof(*execbuf));
+	if (bb->reloc_idx) {
+		bb->obj[1].relocs_ptr = (uintptr_t)bb->reloc;
+		bb->obj[1].relocation_count = bb->reloc_idx;
+		execbuf->buffers_ptr = (uintptr_t)bb->obj;
+		execbuf->buffer_count = 2;
+	} else {
+		bb->obj[1].relocs_ptr = 0;
+		bb->obj[1].relocation_count = 0;
+		execbuf->buffers_ptr = (uintptr_t)&bb->obj[1];
+		execbuf->buffer_count = 1;
+	}
+	execbuf->flags = I915_EXEC_RENDER;
+	execbuf->rsvd1 = 0;
+
+	ret = gem_execbuf(fd, execbuf);
+
+	bb->reloc_idx = 0;
+	bb->offset = 0;
+
+	return ret;
+}
+
+static void
+bb_ctx_fini(struct recording_context *ctx)
+{
+	struct bb_context *bb = &ctx->bb;
+
+	if (bb->batch)
+		munmap(bb->batch, BATCH_SIZE);
+
+	if (bb->obj[1].handle)
+		gem_close(ctx->drm_fd, bb->obj[1].handle);
+
+	if (bb->obj[0].handle)
+		gem_close(ctx->drm_fd, bb->obj[0].handle);
+}
+
+static int
+bb_ctx_init(struct recording_context *ctx)
+{
+	struct bb_context *bb = &ctx->bb;
+	struct drm_i915_gem_exec_object2 *obj = bb->obj;
+	int ret, fd = ctx->drm_fd;
+
+	memset(bb, 0, sizeof(struct bb_context));
+	ret = gem_create(fd, DEST_SIZE, &obj[0].handle);
+	if (ret)
+		goto err;
+
+	ret = gem_create(fd, BATCH_SIZE, &obj[1].handle);
+	if (ret)
+		goto err;
+
+	bb->batch = gem_mmap_cpu(fd, obj[1].handle, 0, BATCH_SIZE, PROT_WRITE);
+	if (!bb->batch || gem_set_domain(fd, obj[1].handle,
+					 I915_GEM_DOMAIN_CPU,
+					 I915_GEM_DOMAIN_CPU))
+		goto err;
+
+	return ret;
+err:
+	bb_ctx_fini(ctx);
+	return ret;
+}
+
+#define OA_PTR_MASK 0xffffffc0
+
+#define GEN8_OABUFFER	0x2b14
+#define GEN8_OATAILPTR	0x2B10
+#define GEN8_OASTATUS   0x2b08
+#define  GEN8_OASTATUS_OABUFFER_OVERFLOW    (1 << 1)
+#define  GEN8_OASTATUS_REPORT_LOST	    (1 << 0)
+
+#define GEN12_OAG_OABUFFER   0xdb08
+#define GEN12_OAG_OATAILPTR  0xdb04
+#define GEN12_OAG_OASTATUS   0xdafc
+
+static void
+init_oa_regs(struct recording_context *ctx)
+{
+	if (ctx->devinfo->graphics_ver >= 12) {
+		ctx->oa_status_reg = GEN12_OAG_OASTATUS;
+		ctx->oa_buffer_reg = GEN12_OAG_OABUFFER;
+		ctx->oa_tail_reg = GEN12_OAG_OATAILPTR;
+	} else if (ctx->devinfo->graphics_ver >= 9) {
+		ctx->oa_status_reg = GEN8_OASTATUS;
+		ctx->oa_buffer_reg = GEN8_OABUFFER;
+		ctx->oa_tail_reg = GEN8_OATAILPTR;
+	}
+}
+
+static int
+__read_oa_reg(struct recording_context *ctx, uint32_t reg, uint32_t *val)
+{
+	struct bb_context *bb = &ctx->bb;
+	int ret, fd = ctx->drm_fd;
+
+	bb_emit_srm(bb, reg, ctx->perf->devinfo.devid);
+	bb_emit_bbe(bb);
+	ret = bb_exec(fd, bb);
+	if (ret) {
+		fprintf(stderr, "failed to read register %08x, %s\n",
+			reg, strerror(errno));
+		return ret;
+	}
+
+	bb->dest = gem_mmap_cpu(fd, bb->obj[0].handle, 0, DEST_SIZE, PROT_READ);
+	assert(bb->dest);
+	ret = gem_set_domain(fd, bb->obj[0].handle, I915_GEM_DOMAIN_CPU, 0);
+	if (ret) {
+		fprintf(stderr, "failed to set read domain to cpu %s\n",
+			strerror(errno));
+		return ret;
+	}
+
+	*val = bb->dest[0];
+	munmap(bb->dest, DEST_SIZE);
+
+	return 0;
+}
+
+static bool
+__process_oa_status(struct recording_context *ctx)
+{
+	struct drm_i915_perf_record_header header = {
+		.type = 0,
+		.pad = 0,
+		.size = sizeof(header)
+	};
+	uint32_t status;
+
+	if (__read_oa_reg(ctx, ctx->oa_status_reg, &status))
+		return false;
+
+	if (status & GEN8_OASTATUS_OABUFFER_OVERFLOW) {
+		header.type = DRM_I915_PERF_RECORD_OA_BUFFER_LOST;
+		if (fwrite(&header, sizeof(header), 1, ctx->output_stream) != 1)
+			return false;
+
+		ctx->head_offset = 0;
+	}
+
+	if (status & GEN8_OASTATUS_REPORT_LOST) {
+		header.type = DRM_I915_PERF_RECORD_OA_REPORT_LOST;
+		if (fwrite(&header, sizeof(header), 1, ctx->output_stream) != 1)
+			return false;
+	}
+
+	return true;
+}
+
+static inline uint32_t
+__data_available(uint32_t tail, uint32_t head, uint32_t size)
+{
+	return tail >= head ? tail - head : size - (head - tail);
+}
+
+static inline uint32_t
+__rewind_tail(uint32_t tail, uint32_t report_size, uint32_t oa_buffer_size)
+{
+	return tail >= report_size ?
+	       tail - report_size :
+	       oa_buffer_size - (report_size - tail);
+}
+
+static bool
+write_i915_perf_mmapped_data(struct recording_context *ctx)
+{
+	uint32_t report_size = ctx->metric_set->perf_raw_size;
+	struct drm_i915_perf_record_header header;
+	uint32_t buff, tail, data_len;
+
+	if (!__process_oa_status(ctx))
+		return false;
+
+	if (__read_oa_reg(ctx, ctx->oa_buffer_reg, &buff))
+		return false;
+	buff = buff & OA_PTR_MASK;
+
+	if (__read_oa_reg(ctx, ctx->oa_tail_reg, &tail))
+		return false;
+	tail = (tail & OA_PTR_MASK) - buff;
+
+	/*
+	 * tail increments in 64 bytes, so round up to nearest report. note that
+	 * oa buffer size may not be a power of 2 and a report may split across
+	 * the boundary of the oa buffer
+	 */
+	data_len = __data_available(tail, ctx->head_offset, ctx->oa_buffer_size);
+	assert(data_len <= ctx->oa_buffer_size);
+
+	tail -= data_len % report_size;
+	ctx->tail_offset = tail;
+
+	while (ctx->tail_offset != ctx->head_offset) {
+		const uint32_t *report32 = (uint32_t *)(ctx->oa_buffer_vaddr +
+							ctx->tail_offset);
+
+		if (report32[0] || report32[1])
+			break;
+
+		ctx->tail_offset = __rewind_tail(ctx->tail_offset, report_size,
+						 ctx->oa_buffer_size);
+	}
+
+	data_len = __data_available(ctx->tail_offset, ctx->head_offset,
+				    ctx->oa_buffer_size);
+	if (!data_len)
+		return true;
+
+	assert(data_len < ctx->oa_buffer_size);
+
+	header.type = INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE;
+	while (data_len > 0) {
+		uint32_t len;
+
+		len = MIN(data_len, ctx->max_record_length);
+		len = MIN(len, ctx->oa_buffer_size - ctx->head_offset);
+
+		header.size = sizeof(header) + len;
+		if (fwrite(&header, sizeof(header), 1, ctx->output_stream) != 1)
+			return false;
+
+		if (fwrite(ctx->oa_buffer_vaddr + ctx->head_offset, len, 1, ctx->output_stream) != 1)
+			return false;
+
+		data_len -= len;
+		ctx->head_offset = ctx->head_offset + len;
+
+		assert(ctx->head_offset <= ctx->oa_buffer_size);
+		if (ctx->head_offset == ctx->oa_buffer_size)
+			ctx->head_offset = 0;
+	}
+
+	/*
+	 * We do not have permissions to update the OA HEAD register, so we
+	 * would end up with a buffer lost error once the OA buffer fills up. To
+	 * avoid that, drain the OA buffer into a zero mem device. The drain
+	 * eventually updates the head register in i915.
+	 */
+	while (read(ctx->perf_fd, ctx->zero_mem, ctx->oa_buffer_size) > 0 || errno == EINTR);
+
+	return true;
+}
+
 static uint64_t timespec_diff(struct timespec *begin,
 			      struct timespec *end)
 {
@@ -667,6 +1055,21 @@ read_command_file(struct recording_context *ctx)
 	}
 }
 
+static void
+mmap_oa_buffer(struct recording_context *ctx)
+{
+	struct drm_i915_perf_oa_buffer_info oa_info = {0};
+	void *vaddr;
+
+	perf_ioctl(ctx->perf_fd, I915_PERF_IOCTL_GET_OA_BUFFER_INFO, &oa_info);
+	vaddr = mmap(0, oa_info.size, PROT_READ, MAP_PRIVATE, ctx->perf_fd,
+		     oa_info.offset);
+	assert(vaddr != NULL);
+
+	ctx->oa_buffer_size = oa_info.size;
+	ctx->oa_buffer_vaddr = vaddr;
+}
+
 static void
 print_metric_sets(const struct intel_perf *perf)
 {
@@ -761,10 +1164,19 @@ teardown_recording_context(struct recording_context *ctx)
 
 	free(ctx->circular_buffer.data);
 
+	if (ctx->oa_buffer_vaddr)
+		munmap(ctx->oa_buffer_vaddr, ctx->oa_buffer_size);
+
+	bb_ctx_fini(ctx);
+
 	if (ctx->perf_fd != -1)
 		close(ctx->perf_fd);
 	if (ctx->drm_fd != -1)
 		close(ctx->drm_fd);
+	if (ctx->zero_mem)
+		munmap(ctx->zero_mem, ctx->oa_buffer_size);
+	if (ctx->zero_fd != -1)
+		close(ctx->zero_fd);
 }
 
 int
@@ -781,6 +1193,7 @@ main(int argc, char *argv[])
 		{"command-fifo",         required_argument, 0, 'f'},
 		{"cpu-clock",            required_argument, 0, 'k'},
 		{"poll-period",          required_argument, 0, 'P'},
+		{"mmap-buffer",                no_argument, 0, 'M'},
 		{0, 0, 0, 0}
 	};
 	const struct {
@@ -791,7 +1204,7 @@ main(int argc, char *argv[])
 		{ CLOCK_MONOTONIC,     "mono" },
 		{ CLOCK_MONOTONIC_RAW, "mono_raw" },
 	};
-	double corr_period = 1.0, perf_period = 0.001;
+	double corr_period = 1.0;
 	const char *metric_name = NULL, *output_file = "i915_perf.record";
 	struct intel_perf_metric_set *metric_set;
 	struct intel_perf_record_timestamp_correlation initial_correlation;
@@ -799,7 +1212,7 @@ main(int argc, char *argv[])
 	uint64_t corr_period_ns, poll_time_ns;
 	uint32_t circular_size = 0;
 	int opt;
-	bool list_counters = false;
+	bool list_counters = false, mmap_buffer = false;
 	FILE *output = NULL;
 	struct recording_context ctx = {
 		.drm_fd = -1,
@@ -810,9 +1223,19 @@ main(int argc, char *argv[])
 
 		/* 5 ms poll period */
 		.poll_period = 5 * 1000 * 1000,
+		.perf_period = 0.001,
+
+		.oa_buffer_vaddr = NULL,
+		.head_offset = 0,
+		.tail_offset = 0,
+		.oa_buffer_size = 0,
+
+		.zero_fd = -1,
+		.zero_mem = NULL,
 	};
 
-	while ((opt = getopt_long(argc, argv, "hc:p:m:Co:s:f:k:P:", long_options, NULL)) != -1) {
+	memset(&ctx.bb, 0, sizeof(ctx.bb));
+	while ((opt = getopt_long(argc, argv, "hc:p:m:Co:s:f:k:P:M", long_options, NULL)) != -1) {
 		switch (opt) {
 		case 'h':
 			usage(argv[0]);
@@ -821,7 +1244,7 @@ main(int argc, char *argv[])
 			corr_period = atof(optarg);
 			break;
 		case 'p':
-			perf_period = atof(optarg);
+			ctx.perf_period = atof(optarg);
 			break;
 		case 'm':
 			metric_name = optarg;
@@ -857,6 +1280,9 @@ main(int argc, char *argv[])
 		case 'P':
 			ctx.poll_period = MAX(100, atol(optarg)) * 1000;
 			break;
+		case 'M':
+			mmap_buffer = true;
+			break;
 		default:
 			fprintf(stderr, "Internal error: "
 				"unexpected getopt value: %d\n", opt);
@@ -876,6 +1302,10 @@ main(int argc, char *argv[])
 		fprintf(stderr, "No device info found.\n");
 		goto fail;
 	}
+	if (ctx.devinfo->graphics_ver < 9 && mmap_buffer) {
+		fprintf(stderr, "mmap_buffer not supported on graphics version less than 9\n");
+		goto fail;
+	}
 
 	fprintf(stdout, "Device name=%s gen=%i gt=%i id=0x%x\n",
 		ctx.devinfo->codename, ctx.devinfo->graphics_ver, ctx.devinfo->gt, ctx.devid);
@@ -926,6 +1356,11 @@ main(int argc, char *argv[])
 		goto fail;
 	}
 
+	/* header size is a uint16_t, so accomodate the header first */
+	ctx.max_record_length = 65535 - sizeof(struct drm_i915_perf_record_header);
+	/* accomodate only full report sizes */
+	ctx.max_record_length -= (ctx.max_record_length % ctx.metric_set->perf_raw_size);
+
 	intel_perf_load_perf_configs(ctx.perf, ctx.drm_fd);
 
 	ctx.timestamp_frequency = get_device_timestamp_frequency(ctx.devinfo, ctx.drm_fd);
@@ -1000,7 +1435,7 @@ main(int argc, char *argv[])
 		goto fail;
 	}
 
-	ctx.oa_exponent = oa_exponent_for_period(ctx.timestamp_frequency, perf_period);
+	ctx.oa_exponent = oa_exponent_for_period(ctx.timestamp_frequency, ctx.perf_period);
 	fprintf(stdout, "Opening perf stream with metric_id=%"PRIu64" oa_exponent=%u oa_format=%u\n",
 		ctx.metric_set->perf_oa_metrics_set, ctx.oa_exponent,
 		ctx.metric_set->perf_oa_format);
@@ -1015,16 +1450,40 @@ main(int argc, char *argv[])
 	corr_period_ns = corr_period * 1000000000ul;
 	poll_time_ns = corr_period_ns;
 
+	if (mmap_buffer) {
+		ctx.zero_fd = open("/dev/zero", O_RDWR | O_CLOEXEC);
+		if (ctx.zero_fd < 0) {
+			fprintf(stderr, "Unable to open zero device: %s\n", strerror(errno));
+			goto fail;
+		}
+
+		if (bb_ctx_init(&ctx)) {
+			fprintf(stderr, "Unable to initialize batch buffer %s\n", strerror(errno));
+			goto fail;
+		}
+
+		init_oa_regs(&ctx);
+		mmap_oa_buffer(&ctx);
+
+		ctx.zero_mem = mmap(NULL, ctx.oa_buffer_size, PROT_WRITE,
+				    MAP_PRIVATE, ctx.zero_fd, 0);
+		if (ctx.zero_mem == MAP_FAILED) {
+			fprintf(stderr, "Unable to mmap zero device: %s\n", strerror(errno));
+			goto fail;
+		}
+	}
+
 	while (!quit) {
 		struct pollfd pollfd[2] = {
-			{         ctx.perf_fd, POLLIN, 0 },
 			{ ctx.command_fifo_fd, POLLIN, 0 },
+			{ ctx.perf_fd, POLLIN, 0 },
 		};
 		uint64_t elapsed_ns;
+		nfds_t num_fds = mmap_buffer ? 1 : 2;
 		int ret;
 
 		igt_gettime(&now);
-		ret = poll(pollfd, ctx.command_fifo_fd != -1 ? 2 : 1, poll_time_ns / 1000000);
+		ret = poll(pollfd, num_fds, poll_time_ns / 1000000);
 		if (ret < 0 && errno != EINTR) {
 			fprintf(stderr, "Failed to poll i915-perf stream: %s\n",
 				strerror(errno));
@@ -1032,17 +1491,16 @@ main(int argc, char *argv[])
 		}
 
 		if (ret > 0) {
-			if (pollfd[0].revents & POLLIN) {
+			if (pollfd[0].revents & POLLIN)
+				read_command_file(&ctx);
+
+			if (num_fds > 1 && pollfd[1].revents & POLLIN) {
 				if (!write_i915_perf_data(ctx.output_stream, ctx.perf_fd)) {
 					fprintf(stderr, "Failed to write i915-perf data: %s\n",
 						strerror(errno));
 					break;
 				}
 			}
-
-			if (pollfd[1].revents & POLLIN) {
-				read_command_file(&ctx);
-			}
 		}
 
 		elapsed_ns = igt_nsec_elapsed(&now);
@@ -1054,6 +1512,12 @@ main(int argc, char *argv[])
 					strerror(errno));
 				break;
 			}
+
+			if (mmap_buffer && !write_i915_perf_mmapped_data(&ctx)) {
+				fprintf(stderr, "Failed to write i915-perf mmapped data: %s\n",
+					strerror(errno));
+				break;
+			}
 		} else {
 			poll_time_ns -= elapsed_ns;
 		}
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [igt-dev] [PATCH 5/5] tools/i915-perf: Add a command to trigger a report in OA buffer
  2021-08-03 20:07 [igt-dev] [PATCH 1/5] i915/perf: add tests for triggered OA reports Umesh Nerlige Ramappa
                   ` (2 preceding siblings ...)
  2021-08-03 20:07 ` [igt-dev] [PATCH 4/5] tools/i915-perf: Add mmapped OA buffer support to i915-perf-recorder Umesh Nerlige Ramappa
@ 2021-08-03 20:07 ` Umesh Nerlige Ramappa
  2021-08-03 20:39 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [1/5] i915/perf: add tests for triggered OA reports Patchwork
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 20+ messages in thread
From: Umesh Nerlige Ramappa @ 2021-08-03 20:07 UTC (permalink / raw)
  To: igt-dev, Ashutosh Dixit, Lionel G Landwerlin

Current OA captures used by GPUvis show timelines corresponding to
individual contexts as parsed from the OA buffer.

Add support to query OA report using the command line interface -
i915_perf_control. The query will take a snapshot of the counters and
store it in the OA buffer. The snapshots can be viewed as a separate
trigger event timeline in GPUvis. The command line allows passing a
distinct 32 bit trigger value that can be used to identify the triggers
in the visulaization.

The idea is to show counter deltas between these on-demand trigger
events. More fine grained triggerring can be achieved by adding the
trigger commands into a batch.

Example: i915_perf_control -t <distinct_32bit_value>

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 tools/i915-perf/i915_perf_control.c           |  43 ++++++-
 tools/i915-perf/i915_perf_recorder.c          | 118 ++++++++++++++++--
 tools/i915-perf/i915_perf_recorder_commands.h |   1 +
 3 files changed, 151 insertions(+), 11 deletions(-)

diff --git a/tools/i915-perf/i915_perf_control.c b/tools/i915-perf/i915_perf_control.c
index be5996c0..509549d8 100644
--- a/tools/i915-perf/i915_perf_control.c
+++ b/tools/i915-perf/i915_perf_control.c
@@ -26,9 +26,19 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <limits.h>
 
 #include "i915_perf_recorder_commands.h"
 
+static bool
+__valid_trigger_value(const char *value)
+{
+	char *endptr = '\0';
+
+	return strtoul(value, &endptr, 0) != ULONG_MAX &&
+	       *endptr == '\0';
+}
+
 static void
 usage(const char *name)
 {
@@ -37,7 +47,9 @@ usage(const char *name)
 		"\n"
 		"     --help,               -h         Print this screen\n"
 		"     --command-fifo,       -f <path>  Path to a command fifo\n"
-		"     --dump,               -d <path>  Write a content of circular buffer to path\n",
+		"     --dump,               -d <path>  Write a content of circular buffer to path\n"
+		"     --quit,               -q         Quit capturing reports fromm perf recorder\n"
+		"     --trigger,            -t <value> Trigger a report into OA buffer with this 32 bit unsigned value\n",
 		name);
 }
 
@@ -49,14 +61,16 @@ main(int argc, char *argv[])
 		{"dump",                 required_argument, 0, 'd'},
 		{"command-fifo",         required_argument, 0, 'f'},
 		{"quit",                       no_argument, 0, 'q'},
+		{"trigger",              required_argument, 0, 't'},
 		{0, 0, 0, 0}
 	};
 	const char *command_fifo = I915_PERF_RECORD_FIFO_PATH, *dump_file = NULL;
 	FILE *command_fifo_file;
 	int opt;
-	bool quit = false;
+	bool triggered = false, quit = false;
+	char *trigger_value;
 
-	while ((opt = getopt_long(argc, argv, "hd:f:q", long_options, NULL)) != -1) {
+	while ((opt = getopt_long(argc, argv, "hd:f:qt:", long_options, NULL)) != -1) {
 		switch (opt) {
 		case 'h':
 			usage(argv[0]);
@@ -70,6 +84,14 @@ main(int argc, char *argv[])
 		case 'q':
 			quit = true;
 			break;
+		case 't':
+			if (!__valid_trigger_value(optarg)) {
+				fprintf(stderr, "Invalid trigger value: %s\n", optarg);
+				return EXIT_FAILURE;
+			}
+			trigger_value = optarg;
+			triggered = true;
+			break;
 		default:
 			fprintf(stderr, "Internal error: "
 				"unexpected getopt value: %d\n", opt);
@@ -118,6 +140,21 @@ main(int argc, char *argv[])
 		}
 	}
 
+	if (triggered) {
+		uint32_t total_len =
+			sizeof(struct recorder_command_base) + strlen(trigger_value) + 1;
+		struct {
+			struct recorder_command_base base;
+			uint8_t trigger[];
+		} *data = malloc(total_len);
+
+		data->base.command = RECORDER_COMMAND_TRIGGER;
+		data->base.size = total_len;
+		snprintf((char *) data->trigger, strlen(trigger_value) + 1, "%s", trigger_value);
+
+		fwrite(data, total_len, 1, command_fifo_file);
+	}
+
 	if (quit) {
 		struct recorder_command_base base = {
 			.command = RECORDER_COMMAND_QUIT,
diff --git a/tools/i915-perf/i915_perf_recorder.c b/tools/i915-perf/i915_perf_recorder.c
index 6b2f8710..8da21595 100644
--- a/tools/i915-perf/i915_perf_recorder.c
+++ b/tools/i915-perf/i915_perf_recorder.c
@@ -26,6 +26,7 @@
 #include <fcntl.h>
 #include <getopt.h>
 #include <inttypes.h>
+#include <limits.h>
 #include <poll.h>
 #include <signal.h>
 #include <stdbool.h>
@@ -377,6 +378,8 @@ struct recording_context {
 	uint32_t oa_status_reg;
 	uint32_t oa_buffer_reg;
 	uint32_t oa_tail_reg;
+	uint32_t oa_trigger_reg;
+	uint32_t oa_marker_reg;
 
 	int zero_fd;
 	void *zero_mem;
@@ -629,8 +632,21 @@ static int gem_execbuf(int fd, struct drm_i915_gem_execbuffer2 *execbuf)
 #define _MI_INSTR(opcode, flags)	(((opcode) << 23) | (flags))
 #define MI_STORE_REGISTER_MEM      	_MI_INSTR(0x24, 1)
 #define MI_STORE_REGISTER_MEM_GEN8 	_MI_INSTR(0x24, 2)
+#define MI_LOAD_REGISTER_IMM		((0x22 << 23) | 1)
+#define MI_NOOP				0x00
 #define MI_BATCH_BUFFER_END		(0xA << 23)
 
+static void
+bb_emit_lri(struct bb_context *bb, uint32_t reg, uint32_t val)
+{
+	assert(bb->offset < BATCH_SIZE);
+
+	bb->batch[bb->offset++] = MI_LOAD_REGISTER_IMM;
+	bb->batch[bb->offset++] = reg;
+	bb->batch[bb->offset++] = val;
+	bb->batch[bb->offset++] = MI_NOOP;
+}
+
 static void
 bb_emit_srm(struct bb_context *bb, uint32_t reg, uint32_t devid)
 {
@@ -747,6 +763,18 @@ err:
 #define GEN12_OAG_OATAILPTR  0xdb04
 #define GEN12_OAG_OASTATUS   0xdafc
 
+#define OAREPORTTRIG2 (0x2744)
+#define   OAREPORTTRIG2_INVERT_C_1  (1 << 21)
+#define   OAREPORTTRIG2_INVERT_D_0  (1 << 22)
+#define   OAREPORTTRIG2_REPORT_TRIGGER_ENABLE (1 << 31)
+#define OAREPORTTRIG6 (0x2754)
+#define OA_PERF_COUNTER_A(idx) (0x2800 + 8 * (idx))
+
+#define GEN12_OAREPORTTRIG2 (0xd924)
+#define GEN12_OAREPORTTRIG6 (0xd934)
+#define GEN12_OAG_PERF_COUNTER_A(idx) (0xD980 + 8 * (idx))
+
+
 static void
 init_oa_regs(struct recording_context *ctx)
 {
@@ -754,10 +782,14 @@ init_oa_regs(struct recording_context *ctx)
 		ctx->oa_status_reg = GEN12_OAG_OASTATUS;
 		ctx->oa_buffer_reg = GEN12_OAG_OABUFFER;
 		ctx->oa_tail_reg = GEN12_OAG_OATAILPTR;
+		ctx->oa_trigger_reg = GEN12_OAREPORTTRIG2;
+		ctx->oa_marker_reg = GEN12_OAG_PERF_COUNTER_A(18);
 	} else if (ctx->devinfo->graphics_ver >= 9) {
 		ctx->oa_status_reg = GEN8_OASTATUS;
 		ctx->oa_buffer_reg = GEN8_OABUFFER;
 		ctx->oa_tail_reg = GEN8_OATAILPTR;
+		ctx->oa_trigger_reg = OAREPORTTRIG2;
+		ctx->oa_marker_reg = OA_PERF_COUNTER_A(18);
 	}
 }
 
@@ -791,6 +823,31 @@ __read_oa_reg(struct recording_context *ctx, uint32_t reg, uint32_t *val)
 	return 0;
 }
 
+static int
+oa_trigger_report(struct recording_context *ctx, uint32_t reg,
+		  uint32_t trigger_marker)
+{
+	int ret;
+
+	bb_emit_lri(&ctx->bb, ctx->oa_marker_reg, trigger_marker);
+	bb_emit_lri(&ctx->bb, reg,
+		    OAREPORTTRIG2_INVERT_C_1 |
+		    OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
+	bb_emit_lri(&ctx->bb, reg,
+		    OAREPORTTRIG2_INVERT_C_1 |
+		    OAREPORTTRIG2_INVERT_D_0 |
+		    OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
+	bb_emit_bbe(&ctx->bb);
+	ret = bb_exec(ctx->drm_fd, &ctx->bb);
+	if (ret) {
+		fprintf(stderr, "failed to trigger oa report %08x, %s\n",
+			reg, strerror(errno));
+		return ret;
+	}
+
+	return 0;
+}
+
 static bool
 __process_oa_status(struct recording_context *ctx)
 {
@@ -996,6 +1053,35 @@ write_correlation_timestamps(FILE *output, int drm_fd)
 	return write_saved_correlation_timestamps(output, &corr);
 }
 
+static bool
+__valid_trigger_value(const char *str, uint32_t *trigger_value)
+{
+	char *endptr = '\0';
+	uint32_t value = strtoul(str, &endptr, 0);
+
+	if (value != ULONG_MAX && *endptr == '\0') {
+		*trigger_value = value;
+		return true;
+	}
+
+	return false;
+}
+
+static int read_command_data(int fd, uint8_t *buf, uint32_t len)
+{
+	uint32_t offset = 0;
+	ssize_t ret;
+
+	while (offset < len &&
+	       ((ret = read(fd, (void *)(buf + offset), len - offset)) > 0
+		|| errno == EAGAIN)) {
+		if (ret > 0)
+			offset += ret;
+	}
+
+	return ret;
+}
+
 static void
 read_command_file(struct recording_context *ctx)
 {
@@ -1007,17 +1093,12 @@ read_command_file(struct recording_context *ctx)
 
 	switch (header.command) {
 	case RECORDER_COMMAND_DUMP: {
-		uint32_t len = header.size - sizeof(header), offset = 0;
+		uint32_t len = header.size - sizeof(header);
 		uint8_t *dump = malloc(len);
 		FILE *file;
 
-		while (offset < len &&
-		       ((ret = read(ctx->command_fifo_fd,
-				    (void *) dump + offset, len - offset)) > 0
-			|| errno == EAGAIN)) {
-			if (ret > 0)
-				offset += ret;
-		}
+		assert(dump);
+		assert(read_command_data(ctx->command_fifo_fd, dump, len) > 0);
 
 		fprintf(stdout, "Writing circular buffer to %s\n", dump);
 
@@ -1049,6 +1130,27 @@ read_command_file(struct recording_context *ctx)
 	case RECORDER_COMMAND_QUIT:
 		quit = true;
 		break;
+	case RECORDER_COMMAND_TRIGGER: {
+		uint32_t len = header.size - sizeof(header);
+		uint32_t value;
+		char *trigger;
+
+		if (ctx->perf->devinfo.devid < 9) {
+			fprintf(stderr, "OA report trigger not supported on gen %d\n",
+				ctx->perf->devinfo.devid);
+			break;
+		}
+
+		trigger = malloc(len);
+		assert(trigger);
+		assert(read_command_data(ctx->command_fifo_fd, (uint8_t *)trigger, len) > 0);
+
+		if (__valid_trigger_value(trigger, &value))
+			oa_trigger_report(ctx, ctx->oa_trigger_reg, value);
+
+		free(trigger);
+		break;
+	}
 	default:
 		fprintf(stderr, "Unknown command 0x%x\n", header.command);
 		break;
diff --git a/tools/i915-perf/i915_perf_recorder_commands.h b/tools/i915-perf/i915_perf_recorder_commands.h
index d9353cfa..e48d9426 100644
--- a/tools/i915-perf/i915_perf_recorder_commands.h
+++ b/tools/i915-perf/i915_perf_recorder_commands.h
@@ -30,6 +30,7 @@
 enum recorder_command {
 	RECORDER_COMMAND_DUMP = 1,
 	RECORDER_COMMAND_QUIT,
+	RECORDER_COMMAND_TRIGGER,
 };
 
 struct recorder_command_base {
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for series starting with [1/5] i915/perf: add tests for triggered OA reports
  2021-08-03 20:07 [igt-dev] [PATCH 1/5] i915/perf: add tests for triggered OA reports Umesh Nerlige Ramappa
                   ` (3 preceding siblings ...)
  2021-08-03 20:07 ` [igt-dev] [PATCH 5/5] tools/i915-perf: Add a command to trigger a report in OA buffer Umesh Nerlige Ramappa
@ 2021-08-03 20:39 ` Patchwork
  2021-08-03 23:21 ` [igt-dev] ✗ GitLab.Pipeline: warning " Patchwork
  2021-08-04 20:13 ` [igt-dev] ✓ Fi.CI.IGT: success " Patchwork
  6 siblings, 0 replies; 20+ messages in thread
From: Patchwork @ 2021-08-03 20:39 UTC (permalink / raw)
  To: Umesh Nerlige Ramappa; +Cc: igt-dev

[-- Attachment #1: Type: text/plain, Size: 1494 bytes --]

== Series Details ==

Series: series starting with [1/5] i915/perf: add tests for triggered OA reports
URL   : https://patchwork.freedesktop.org/series/93355/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_10442 -> IGTPW_6085
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/index.html


Changes
-------

  No changes found


Participating hosts (37 -> 33)
------------------------------

  Missing    (4): fi-bdw-samus fi-bsw-cyan bat-jsl-1 fi-hsw-4200u 


Build changes
-------------

  * CI: CI-20190529 -> None
  * IGT: IGT_6159 -> IGTPW_6085

  CI-20190529: 20190529
  CI_DRM_10442: d3816ffe379da79a69188424318fe2b5d458347b @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_6085: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/index.html
  IGT_6159: 6135b9cc319ed965e3aafb5b2ae2abf4762a06b2 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git



== Testlist changes ==

+igt@perf@closed-fd-and-unmapped-access
+igt@perf@invalid-map-oa-buffer
+igt@perf@map-oa-buffer
+igt@perf@non-privileged-access-vaddr
+igt@perf@non-privileged-map-oa-buffer
+igt@perf@oa-regs-not-whitelisted
+igt@perf@oa-regs-whitelisted
+igt@perf@privileged-forked-access-vaddr
+igt@perf@triggered-oa-reports-paranoid-0
+igt@perf@triggered-oa-reports-paranoid-1

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/index.html

[-- Attachment #2: Type: text/html, Size: 2121 bytes --]

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [igt-dev] ✗ GitLab.Pipeline: warning for series starting with [1/5] i915/perf: add tests for triggered OA reports
  2021-08-03 20:07 [igt-dev] [PATCH 1/5] i915/perf: add tests for triggered OA reports Umesh Nerlige Ramappa
                   ` (4 preceding siblings ...)
  2021-08-03 20:39 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [1/5] i915/perf: add tests for triggered OA reports Patchwork
@ 2021-08-03 23:21 ` Patchwork
  2021-08-04 20:13 ` [igt-dev] ✓ Fi.CI.IGT: success " Patchwork
  6 siblings, 0 replies; 20+ messages in thread
From: Patchwork @ 2021-08-03 23:21 UTC (permalink / raw)
  To: Umesh Nerlige Ramappa; +Cc: igt-dev

== Series Details ==

Series: series starting with [1/5] i915/perf: add tests for triggered OA reports
URL   : https://patchwork.freedesktop.org/series/93355/
State : warning

== Summary ==

Pipeline status: FAILED.

see https://gitlab.freedesktop.org/gfx-ci/igt-ci-tags/-/pipelines/374049 for the overview.

test:ninja-test-arm64 has failed (https://gitlab.freedesktop.org/gfx-ci/igt-ci-tags/-/jobs/12473678):
  Traceback (most recent call last):
    File "/usr/lib/python3/dist-packages/mesonbuild/mesonmain.py", line 112, in run
      return options.run_func(options)
    File "/usr/lib/python3/dist-packages/mesonbuild/mtest.py", line 805, in run
      return th.doit()
    File "/usr/lib/python3/dist-packages/mesonbuild/mtest.py", line 555, in doit
      self.run_tests(tests)
    File "/usr/lib/python3/dist-packages/mesonbuild/mtest.py", line 715, in run_tests
      self.drain_futures(futures)
    File "/usr/lib/python3/dist-packages/mesonbuild/mtest.py", line 732, in drain_futures
      self.print_stats(numlen, tests, name, result.result(), i)
    File "/usr/lib/python3/dist-packages/mesonbuild/mtest.py", line 505, in print_stats
      result_str += "\n\n" + result.get_log()
    File "/usr/lib/python3/dist-packages/mesonbuild/mtest.py", line 178, in get_log
      res += self.stde
  TypeError: can only concatenate str (not "bytes") to str
    1/291 lib igt_assert                          TIMEOUT 32.11 s 
  section_end:1628032688:step_script
  ERROR: Job failed: execution took longer than 1h0m0s seconds
  

test:ninja-test-clang has failed (https://gitlab.freedesktop.org/gfx-ci/igt-ci-tags/-/jobs/12473677):
  298/305 assembler test/rndd                     OK       0.02 s 
  299/305 assembler test/rndu                     OK       0.01 s 
  300/305 assembler test/rnde                     OK       0.02 s 
  301/305 assembler test/rnde-intsrc              OK       0.01 s 
  302/305 assembler test/rndz                     OK       0.02 s 
  303/305 assembler test/lzd                      OK       0.02 s 
  304/305 assembler test/not                      OK       0.02 s 
  305/305 assembler test/immediate                OK       0.01 s 
  
  Ok:                  280
  Expected Fail:         0
  Fail:                  0
  Unexpected Pass:       0
  Skipped:               0
  Timeout:              25
  
  Full log written to /builds/gfx-ci/igt-ci-tags/build/meson-logs/testlog.txt
  section_end:1628032686:step_script
  ERROR: Job failed: execution took longer than 1h0m0s seconds

== Logs ==

For more details see: https://gitlab.freedesktop.org/gfx-ci/igt-ci-tags/-/pipelines/374049

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [igt-dev] ✓ Fi.CI.IGT: success for series starting with [1/5] i915/perf: add tests for triggered OA reports
  2021-08-03 20:07 [igt-dev] [PATCH 1/5] i915/perf: add tests for triggered OA reports Umesh Nerlige Ramappa
                   ` (5 preceding siblings ...)
  2021-08-03 23:21 ` [igt-dev] ✗ GitLab.Pipeline: warning " Patchwork
@ 2021-08-04 20:13 ` Patchwork
  6 siblings, 0 replies; 20+ messages in thread
From: Patchwork @ 2021-08-04 20:13 UTC (permalink / raw)
  To: Umesh Nerlige Ramappa; +Cc: igt-dev

[-- Attachment #1: Type: text/plain, Size: 30290 bytes --]

== Series Details ==

Series: series starting with [1/5] i915/perf: add tests for triggered OA reports
URL   : https://patchwork.freedesktop.org/series/93355/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_10442_full -> IGTPW_6085_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in IGTPW_6085_full:

### IGT changes ###

#### Possible regressions ####

  * {igt@perf@non-privileged-map-oa-buffer} (NEW):
    - shard-tglb:         NOTRUN -> [SKIP][1] +8 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb7/igt@perf@non-privileged-map-oa-buffer.html

  * {igt@perf@oa-regs-not-whitelisted} (NEW):
    - shard-iclb:         NOTRUN -> [SKIP][2] +8 similar issues
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb5/igt@perf@oa-regs-not-whitelisted.html

  
New tests
---------

  New tests have been introduced between CI_DRM_10442_full and IGTPW_6085_full:

### New IGT tests (10) ###

  * igt@perf@closed-fd-and-unmapped-access:
    - Statuses : 4 skip(s)
    - Exec time: [0.0] s

  * igt@perf@invalid-map-oa-buffer:
    - Statuses : 5 skip(s)
    - Exec time: [0.0] s

  * igt@perf@map-oa-buffer:
    - Statuses : 5 skip(s)
    - Exec time: [0.0] s

  * igt@perf@non-privileged-access-vaddr:
    - Statuses :
    - Exec time: [None] s

  * igt@perf@non-privileged-map-oa-buffer:
    - Statuses : 4 skip(s)
    - Exec time: [0.0] s

  * igt@perf@oa-regs-not-whitelisted:
    - Statuses : 5 skip(s)
    - Exec time: [0.0] s

  * igt@perf@oa-regs-whitelisted:
    - Statuses : 6 skip(s)
    - Exec time: [0.0] s

  * igt@perf@privileged-forked-access-vaddr:
    - Statuses : 4 skip(s)
    - Exec time: [0.0] s

  * igt@perf@triggered-oa-reports-paranoid-0:
    - Statuses : 5 skip(s)
    - Exec time: [0.0] s

  * igt@perf@triggered-oa-reports-paranoid-1:
    - Statuses : 5 skip(s)
    - Exec time: [0.0] s

  

Known issues
------------

  Here are the changes found in IGTPW_6085_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_create@create-massive:
    - shard-snb:          NOTRUN -> [DMESG-WARN][3] ([i915#3002])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-snb6/igt@gem_create@create-massive.html

  * igt@gem_ctx_isolation@preservation-s3@bcs0:
    - shard-kbl:          [PASS][4] -> [DMESG-WARN][5] ([i915#180]) +2 similar issues
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-kbl1/igt@gem_ctx_isolation@preservation-s3@bcs0.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl7/igt@gem_ctx_isolation@preservation-s3@bcs0.html

  * igt@gem_ctx_persistence@legacy-engines-mixed:
    - shard-snb:          NOTRUN -> [SKIP][6] ([fdo#109271] / [i915#1099]) +3 similar issues
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-snb5/igt@gem_ctx_persistence@legacy-engines-mixed.html

  * igt@gem_exec_fair@basic-deadline:
    - shard-apl:          NOTRUN -> [FAIL][7] ([i915#2846])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-apl2/igt@gem_exec_fair@basic-deadline.html

  * igt@gem_exec_fair@basic-none-solo@rcs0:
    - shard-kbl:          NOTRUN -> [FAIL][8] ([i915#2842])
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl2/igt@gem_exec_fair@basic-none-solo@rcs0.html

  * igt@gem_exec_fair@basic-pace-share@rcs0:
    - shard-tglb:         [PASS][9] -> [FAIL][10] ([i915#2842]) +1 similar issue
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-tglb7/igt@gem_exec_fair@basic-pace-share@rcs0.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb7/igt@gem_exec_fair@basic-pace-share@rcs0.html

  * igt@gem_exec_fair@basic-pace@vcs1:
    - shard-iclb:         NOTRUN -> [FAIL][11] ([i915#2842])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb4/igt@gem_exec_fair@basic-pace@vcs1.html

  * igt@gem_exec_fair@basic-throttle@rcs0:
    - shard-glk:          [PASS][12] -> [FAIL][13] ([i915#2842]) +1 similar issue
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-glk5/igt@gem_exec_fair@basic-throttle@rcs0.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-glk7/igt@gem_exec_fair@basic-throttle@rcs0.html

  * igt@gem_exec_schedule@independent@vecs0:
    - shard-iclb:         [PASS][14] -> [FAIL][15] ([i915#3795])
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-iclb7/igt@gem_exec_schedule@independent@vecs0.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb3/igt@gem_exec_schedule@independent@vecs0.html

  * igt@gem_huc_copy@huc-copy:
    - shard-apl:          NOTRUN -> [SKIP][16] ([fdo#109271] / [i915#2190])
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-apl7/igt@gem_huc_copy@huc-copy.html

  * igt@gem_mmap_gtt@cpuset-big-copy-odd:
    - shard-glk:          [PASS][17] -> [FAIL][18] ([i915#1888] / [i915#307])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-glk7/igt@gem_mmap_gtt@cpuset-big-copy-odd.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-glk7/igt@gem_mmap_gtt@cpuset-big-copy-odd.html

  * igt@gem_render_copy@x-tiled-to-vebox-yf-tiled:
    - shard-kbl:          NOTRUN -> [SKIP][19] ([fdo#109271]) +160 similar issues
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl1/igt@gem_render_copy@x-tiled-to-vebox-yf-tiled.html

  * igt@gem_userptr_blits@dmabuf-sync:
    - shard-apl:          NOTRUN -> [SKIP][20] ([fdo#109271] / [i915#3323])
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-apl7/igt@gem_userptr_blits@dmabuf-sync.html

  * igt@gem_userptr_blits@invalid-mmap-offset-unsync:
    - shard-iclb:         NOTRUN -> [SKIP][21] ([i915#3297])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb2/igt@gem_userptr_blits@invalid-mmap-offset-unsync.html
    - shard-tglb:         NOTRUN -> [SKIP][22] ([i915#3297])
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb6/igt@gem_userptr_blits@invalid-mmap-offset-unsync.html

  * igt@gem_userptr_blits@vma-merge:
    - shard-snb:          NOTRUN -> [FAIL][23] ([i915#2724])
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-snb6/igt@gem_userptr_blits@vma-merge.html

  * igt@gem_workarounds@suspend-resume-fd:
    - shard-apl:          [PASS][24] -> [DMESG-WARN][25] ([i915#180])
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-apl3/igt@gem_workarounds@suspend-resume-fd.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-apl8/igt@gem_workarounds@suspend-resume-fd.html

  * igt@gen9_exec_parse@bb-start-cmd:
    - shard-tglb:         NOTRUN -> [SKIP][26] ([i915#2856])
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb6/igt@gen9_exec_parse@bb-start-cmd.html
    - shard-iclb:         NOTRUN -> [SKIP][27] ([i915#2856])
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb2/igt@gen9_exec_parse@bb-start-cmd.html

  * igt@i915_pm_dc@dc5-dpms:
    - shard-kbl:          NOTRUN -> [FAIL][28] ([i915#545])
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl1/igt@i915_pm_dc@dc5-dpms.html

  * igt@i915_pm_rpm@modeset-non-lpsp:
    - shard-iclb:         NOTRUN -> [SKIP][29] ([i915#579])
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb5/igt@i915_pm_rpm@modeset-non-lpsp.html
    - shard-tglb:         NOTRUN -> [SKIP][30] ([i915#579])
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb3/igt@i915_pm_rpm@modeset-non-lpsp.html

  * igt@i915_selftest@live@hangcheck:
    - shard-snb:          NOTRUN -> [INCOMPLETE][31] ([i915#2782])
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-snb7/igt@i915_selftest@live@hangcheck.html

  * igt@i915_suspend@fence-restore-tiled2untiled:
    - shard-kbl:          NOTRUN -> [INCOMPLETE][32] ([i915#155] / [i915#794])
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl2/igt@i915_suspend@fence-restore-tiled2untiled.html

  * igt@i915_suspend@forcewake:
    - shard-apl:          NOTRUN -> [DMESG-WARN][33] ([i915#180])
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-apl1/igt@i915_suspend@forcewake.html

  * igt@kms_addfb_basic@invalid-smem-bo-on-discrete:
    - shard-tglb:         NOTRUN -> [SKIP][34] ([i915#3826])
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb3/igt@kms_addfb_basic@invalid-smem-bo-on-discrete.html
    - shard-iclb:         NOTRUN -> [SKIP][35] ([i915#3826])
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb1/igt@kms_addfb_basic@invalid-smem-bo-on-discrete.html

  * igt@kms_big_fb@linear-64bpp-rotate-90:
    - shard-iclb:         NOTRUN -> [SKIP][36] ([fdo#110725] / [fdo#111614])
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb5/igt@kms_big_fb@linear-64bpp-rotate-90.html
    - shard-tglb:         NOTRUN -> [SKIP][37] ([fdo#111614])
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb2/igt@kms_big_fb@linear-64bpp-rotate-90.html

  * igt@kms_big_fb@x-tiled-max-hw-stride-32bpp-rotate-0-hflip:
    - shard-glk:          NOTRUN -> [SKIP][38] ([fdo#109271] / [i915#3777])
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-glk6/igt@kms_big_fb@x-tiled-max-hw-stride-32bpp-rotate-0-hflip.html

  * igt@kms_big_fb@x-tiled-max-hw-stride-64bpp-rotate-180-hflip:
    - shard-kbl:          NOTRUN -> [SKIP][39] ([fdo#109271] / [i915#3777]) +2 similar issues
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl4/igt@kms_big_fb@x-tiled-max-hw-stride-64bpp-rotate-180-hflip.html
    - shard-apl:          NOTRUN -> [SKIP][40] ([fdo#109271] / [i915#3777])
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-apl1/igt@kms_big_fb@x-tiled-max-hw-stride-64bpp-rotate-180-hflip.html

  * igt@kms_big_fb@y-tiled-32bpp-rotate-0:
    - shard-glk:          [PASS][41] -> [DMESG-WARN][42] ([i915#118] / [i915#95])
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-glk7/igt@kms_big_fb@y-tiled-32bpp-rotate-0.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-glk5/igt@kms_big_fb@y-tiled-32bpp-rotate-0.html

  * igt@kms_big_fb@yf-tiled-addfb:
    - shard-tglb:         NOTRUN -> [SKIP][43] ([fdo#111615])
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb1/igt@kms_big_fb@yf-tiled-addfb.html

  * igt@kms_ccs@pipe-a-ccs-on-another-bo-y_tiled_gen12_mc_ccs:
    - shard-apl:          NOTRUN -> [SKIP][44] ([fdo#109271] / [i915#3886]) +18 similar issues
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-apl1/igt@kms_ccs@pipe-a-ccs-on-another-bo-y_tiled_gen12_mc_ccs.html

  * igt@kms_ccs@pipe-a-crc-primary-rotation-180-y_tiled_gen12_mc_ccs:
    - shard-tglb:         NOTRUN -> [SKIP][45] ([i915#3689] / [i915#3886]) +1 similar issue
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb7/igt@kms_ccs@pipe-a-crc-primary-rotation-180-y_tiled_gen12_mc_ccs.html

  * igt@kms_ccs@pipe-a-crc-sprite-planes-basic-y_tiled_ccs:
    - shard-tglb:         NOTRUN -> [SKIP][46] ([i915#3689]) +4 similar issues
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb8/igt@kms_ccs@pipe-a-crc-sprite-planes-basic-y_tiled_ccs.html

  * igt@kms_ccs@pipe-b-missing-ccs-buffer-y_tiled_gen12_rc_ccs_cc:
    - shard-kbl:          NOTRUN -> [SKIP][47] ([fdo#109271] / [i915#3886]) +4 similar issues
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl4/igt@kms_ccs@pipe-b-missing-ccs-buffer-y_tiled_gen12_rc_ccs_cc.html
    - shard-glk:          NOTRUN -> [SKIP][48] ([fdo#109271] / [i915#3886]) +1 similar issue
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-glk6/igt@kms_ccs@pipe-b-missing-ccs-buffer-y_tiled_gen12_rc_ccs_cc.html
    - shard-iclb:         NOTRUN -> [SKIP][49] ([fdo#109278] / [i915#3886]) +1 similar issue
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb5/igt@kms_ccs@pipe-b-missing-ccs-buffer-y_tiled_gen12_rc_ccs_cc.html

  * igt@kms_ccs@pipe-d-bad-pixel-format-y_tiled_ccs:
    - shard-snb:          NOTRUN -> [SKIP][50] ([fdo#109271]) +386 similar issues
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-snb5/igt@kms_ccs@pipe-d-bad-pixel-format-y_tiled_ccs.html

  * igt@kms_cdclk@mode-transition:
    - shard-apl:          NOTRUN -> [SKIP][51] ([fdo#109271]) +349 similar issues
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-apl1/igt@kms_cdclk@mode-transition.html

  * igt@kms_chamelium@dp-frame-dump:
    - shard-iclb:         NOTRUN -> [SKIP][52] ([fdo#109284] / [fdo#111827]) +4 similar issues
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb8/igt@kms_chamelium@dp-frame-dump.html
    - shard-glk:          NOTRUN -> [SKIP][53] ([fdo#109271] / [fdo#111827]) +4 similar issues
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-glk7/igt@kms_chamelium@dp-frame-dump.html

  * igt@kms_chamelium@hdmi-mode-timings:
    - shard-kbl:          NOTRUN -> [SKIP][54] ([fdo#109271] / [fdo#111827]) +10 similar issues
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl3/igt@kms_chamelium@hdmi-mode-timings.html

  * igt@kms_color_chamelium@pipe-a-ctm-limited-range:
    - shard-apl:          NOTRUN -> [SKIP][55] ([fdo#109271] / [fdo#111827]) +31 similar issues
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-apl8/igt@kms_color_chamelium@pipe-a-ctm-limited-range.html

  * igt@kms_color_chamelium@pipe-b-ctm-0-75:
    - shard-tglb:         NOTRUN -> [SKIP][56] ([fdo#109284] / [fdo#111827]) +5 similar issues
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb1/igt@kms_color_chamelium@pipe-b-ctm-0-75.html

  * igt@kms_color_chamelium@pipe-invalid-ctm-matrix-sizes:
    - shard-snb:          NOTRUN -> [SKIP][57] ([fdo#109271] / [fdo#111827]) +18 similar issues
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-snb2/igt@kms_color_chamelium@pipe-invalid-ctm-matrix-sizes.html

  * igt@kms_content_protection@dp-mst-lic-type-1:
    - shard-iclb:         NOTRUN -> [SKIP][58] ([i915#3116])
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb2/igt@kms_content_protection@dp-mst-lic-type-1.html
    - shard-tglb:         NOTRUN -> [SKIP][59] ([i915#3116])
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb5/igt@kms_content_protection@dp-mst-lic-type-1.html

  * igt@kms_content_protection@legacy:
    - shard-kbl:          NOTRUN -> [TIMEOUT][60] ([i915#1319])
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl1/igt@kms_content_protection@legacy.html
    - shard-tglb:         NOTRUN -> [SKIP][61] ([fdo#111828])
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb2/igt@kms_content_protection@legacy.html

  * igt@kms_content_protection@srm:
    - shard-apl:          NOTRUN -> [TIMEOUT][62] ([i915#1319]) +1 similar issue
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-apl6/igt@kms_content_protection@srm.html

  * igt@kms_content_protection@uevent:
    - shard-kbl:          NOTRUN -> [FAIL][63] ([i915#2105])
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl4/igt@kms_content_protection@uevent.html

  * igt@kms_cursor_crc@pipe-a-cursor-max-size-rapid-movement:
    - shard-tglb:         NOTRUN -> [SKIP][64] ([i915#3359]) +1 similar issue
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb2/igt@kms_cursor_crc@pipe-a-cursor-max-size-rapid-movement.html

  * igt@kms_cursor_crc@pipe-b-cursor-512x512-sliding:
    - shard-iclb:         NOTRUN -> [SKIP][65] ([fdo#109278] / [fdo#109279])
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb3/igt@kms_cursor_crc@pipe-b-cursor-512x512-sliding.html
    - shard-tglb:         NOTRUN -> [SKIP][66] ([fdo#109279] / [i915#3359])
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb5/igt@kms_cursor_crc@pipe-b-cursor-512x512-sliding.html

  * igt@kms_cursor_crc@pipe-c-cursor-32x32-sliding:
    - shard-tglb:         NOTRUN -> [SKIP][67] ([i915#3319])
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb5/igt@kms_cursor_crc@pipe-c-cursor-32x32-sliding.html

  * igt@kms_cursor_legacy@cursorb-vs-flipa-atomic-transitions-varying-size:
    - shard-iclb:         NOTRUN -> [SKIP][68] ([fdo#109274] / [fdo#109278])
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb7/igt@kms_cursor_legacy@cursorb-vs-flipa-atomic-transitions-varying-size.html

  * igt@kms_cursor_legacy@pipe-d-single-move:
    - shard-iclb:         NOTRUN -> [SKIP][69] ([fdo#109278]) +11 similar issues
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb1/igt@kms_cursor_legacy@pipe-d-single-move.html

  * igt@kms_cursor_legacy@pipe-d-torture-bo:
    - shard-apl:          NOTRUN -> [SKIP][70] ([fdo#109271] / [i915#533]) +1 similar issue
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-apl8/igt@kms_cursor_legacy@pipe-d-torture-bo.html

  * igt@kms_dither@fb-8bpc-vs-panel-8bpc@edp-1-pipe-a:
    - shard-iclb:         [PASS][71] -> [SKIP][72] ([i915#3788])
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-iclb5/igt@kms_dither@fb-8bpc-vs-panel-8bpc@edp-1-pipe-a.html
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb2/igt@kms_dither@fb-8bpc-vs-panel-8bpc@edp-1-pipe-a.html

  * igt@kms_dsc@basic-dsc-enable@edp-1-pipe-c:
    - shard-iclb:         NOTRUN -> [DMESG-WARN][73] ([i915#1226]) +2 similar issues
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb2/igt@kms_dsc@basic-dsc-enable@edp-1-pipe-c.html

  * igt@kms_flip@2x-flip-vs-panning:
    - shard-iclb:         NOTRUN -> [SKIP][74] ([fdo#109274]) +1 similar issue
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb2/igt@kms_flip@2x-flip-vs-panning.html

  * igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytilegen12rcccs:
    - shard-apl:          NOTRUN -> [SKIP][75] ([fdo#109271] / [i915#2672]) +1 similar issue
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-apl8/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytilegen12rcccs.html

  * igt@kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile:
    - shard-tglb:         NOTRUN -> [SKIP][76] ([i915#2587])
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb1/igt@kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile.html

  * igt@kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilercccs:
    - shard-kbl:          NOTRUN -> [SKIP][77] ([fdo#109271] / [i915#2672])
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl2/igt@kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilercccs.html

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-pri-shrfb-draw-mmap-cpu:
    - shard-iclb:         NOTRUN -> [SKIP][78] ([fdo#109280]) +8 similar issues
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb2/igt@kms_frontbuffer_tracking@fbc-2p-primscrn-pri-shrfb-draw-mmap-cpu.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-offscren-pri-indfb-draw-mmap-cpu:
    - shard-glk:          NOTRUN -> [SKIP][79] ([fdo#109271]) +42 similar issues
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-glk6/igt@kms_frontbuffer_tracking@fbcpsr-1p-offscren-pri-indfb-draw-mmap-cpu.html

  * igt@kms_frontbuffer_tracking@psr-2p-primscrn-pri-shrfb-draw-render:
    - shard-tglb:         NOTRUN -> [SKIP][80] ([fdo#111825]) +15 similar issues
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb1/igt@kms_frontbuffer_tracking@psr-2p-primscrn-pri-shrfb-draw-render.html

  * igt@kms_plane_alpha_blend@pipe-a-alpha-transparent-fb:
    - shard-apl:          NOTRUN -> [FAIL][81] ([i915#265])
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-apl1/igt@kms_plane_alpha_blend@pipe-a-alpha-transparent-fb.html

  * igt@kms_plane_alpha_blend@pipe-b-alpha-basic:
    - shard-apl:          NOTRUN -> [FAIL][82] ([fdo#108145] / [i915#265]) +5 similar issues
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-apl2/igt@kms_plane_alpha_blend@pipe-b-alpha-basic.html
    - shard-kbl:          NOTRUN -> [FAIL][83] ([fdo#108145] / [i915#265])
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl2/igt@kms_plane_alpha_blend@pipe-b-alpha-basic.html

  * igt@kms_plane_alpha_blend@pipe-b-alpha-transparent-fb:
    - shard-kbl:          NOTRUN -> [FAIL][84] ([i915#265]) +1 similar issue
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl2/igt@kms_plane_alpha_blend@pipe-b-alpha-transparent-fb.html

  * igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area-4:
    - shard-apl:          NOTRUN -> [SKIP][85] ([fdo#109271] / [i915#658]) +3 similar issues
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-apl2/igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area-4.html

  * igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-1:
    - shard-kbl:          NOTRUN -> [SKIP][86] ([fdo#109271] / [i915#658]) +1 similar issue
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl3/igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-1.html

  * igt@kms_psr@psr2_cursor_plane_move:
    - shard-iclb:         NOTRUN -> [SKIP][87] ([fdo#109441]) +1 similar issue
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb8/igt@kms_psr@psr2_cursor_plane_move.html

  * igt@kms_psr@psr2_cursor_plane_onoff:
    - shard-tglb:         NOTRUN -> [FAIL][88] ([i915#132] / [i915#3467]) +1 similar issue
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb5/igt@kms_psr@psr2_cursor_plane_onoff.html

  * igt@kms_psr@psr2_primary_page_flip:
    - shard-iclb:         [PASS][89] -> [SKIP][90] ([fdo#109441])
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-iclb2/igt@kms_psr@psr2_primary_page_flip.html
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb5/igt@kms_psr@psr2_primary_page_flip.html

  * igt@kms_sysfs_edid_timing:
    - shard-apl:          NOTRUN -> [FAIL][91] ([IGT#2])
   [91]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-apl3/igt@kms_sysfs_edid_timing.html
    - shard-kbl:          NOTRUN -> [FAIL][92] ([IGT#2])
   [92]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl7/igt@kms_sysfs_edid_timing.html

  * igt@kms_writeback@writeback-check-output:
    - shard-apl:          NOTRUN -> [SKIP][93] ([fdo#109271] / [i915#2437]) +1 similar issue
   [93]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-apl6/igt@kms_writeback@writeback-check-output.html

  * igt@nouveau_crc@pipe-a-ctx-flip-skip-current-frame:
    - shard-tglb:         NOTRUN -> [SKIP][94] ([i915#2530])
   [94]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb6/igt@nouveau_crc@pipe-a-ctx-flip-skip-current-frame.html

  * igt@prime_nv_pcopy@test3_1:
    - shard-tglb:         NOTRUN -> [SKIP][95] ([fdo#109291]) +1 similar issue
   [95]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb7/igt@prime_nv_pcopy@test3_1.html

  * igt@prime_nv_test@nv_write_i915_gtt_mmap_read:
    - shard-iclb:         NOTRUN -> [SKIP][96] ([fdo#109291]) +1 similar issue
   [96]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb8/igt@prime_nv_test@nv_write_i915_gtt_mmap_read.html

  * igt@sysfs_clients@sema-50:
    - shard-iclb:         NOTRUN -> [SKIP][97] ([i915#2994])
   [97]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb2/igt@sysfs_clients@sema-50.html
    - shard-kbl:          NOTRUN -> [SKIP][98] ([fdo#109271] / [i915#2994])
   [98]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl2/igt@sysfs_clients@sema-50.html
    - shard-apl:          NOTRUN -> [SKIP][99] ([fdo#109271] / [i915#2994]) +1 similar issue
   [99]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-apl2/igt@sysfs_clients@sema-50.html
    - shard-glk:          NOTRUN -> [SKIP][100] ([fdo#109271] / [i915#2994])
   [100]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-glk7/igt@sysfs_clients@sema-50.html
    - shard-tglb:         NOTRUN -> [SKIP][101] ([i915#2994])
   [101]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb6/igt@sysfs_clients@sema-50.html

  
#### Possible fixes ####

  * igt@gem_ctx_persistence@legacy-engines-hang@render:
    - shard-kbl:          [FAIL][102] ([i915#2410]) -> [PASS][103]
   [102]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-kbl4/igt@gem_ctx_persistence@legacy-engines-hang@render.html
   [103]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl4/igt@gem_ctx_persistence@legacy-engines-hang@render.html

  * igt@gem_exec_fair@basic-deadline:
    - shard-kbl:          [FAIL][104] ([i915#2846]) -> [PASS][105]
   [104]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-kbl6/igt@gem_exec_fair@basic-deadline.html
   [105]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl2/igt@gem_exec_fair@basic-deadline.html

  * igt@gem_exec_fair@basic-none@vcs0:
    - shard-kbl:          [FAIL][106] ([i915#2842]) -> [PASS][107]
   [106]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-kbl7/igt@gem_exec_fair@basic-none@vcs0.html
   [107]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl4/igt@gem_exec_fair@basic-none@vcs0.html

  * igt@gem_exec_fair@basic-pace-share@rcs0:
    - shard-glk:          [FAIL][108] ([i915#2842]) -> [PASS][109] +1 similar issue
   [108]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-glk7/igt@gem_exec_fair@basic-pace-share@rcs0.html
   [109]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-glk6/igt@gem_exec_fair@basic-pace-share@rcs0.html

  * igt@gem_huc_copy@huc-copy:
    - shard-tglb:         [SKIP][110] ([i915#2190]) -> [PASS][111]
   [110]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-tglb6/igt@gem_huc_copy@huc-copy.html
   [111]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-tglb3/igt@gem_huc_copy@huc-copy.html

  * igt@gem_mmap_gtt@cpuset-medium-copy-xy:
    - shard-iclb:         [FAIL][112] ([i915#307]) -> [PASS][113]
   [112]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-iclb8/igt@gem_mmap_gtt@cpuset-medium-copy-xy.html
   [113]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb3/igt@gem_mmap_gtt@cpuset-medium-copy-xy.html

  * igt@i915_pm_dc@dc6-psr:
    - shard-iclb:         [FAIL][114] ([i915#454]) -> [PASS][115]
   [114]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-iclb6/igt@i915_pm_dc@dc6-psr.html
   [115]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb5/igt@i915_pm_dc@dc6-psr.html

  * igt@i915_suspend@fence-restore-untiled:
    - shard-apl:          [DMESG-WARN][116] ([i915#180]) -> [PASS][117] +1 similar issue
   [116]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-apl6/igt@i915_suspend@fence-restore-untiled.html
   [117]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-apl7/igt@i915_suspend@fence-restore-untiled.html

  * igt@kms_big_fb@linear-32bpp-rotate-180:
    - shard-glk:          [DMESG-WARN][118] ([i915#118] / [i915#95]) -> [PASS][119]
   [118]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-glk5/igt@kms_big_fb@linear-32bpp-rotate-180.html
   [119]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-glk5/igt@kms_big_fb@linear-32bpp-rotate-180.html

  * igt@kms_cursor_crc@pipe-a-cursor-suspend:
    - shard-kbl:          [DMESG-WARN][120] ([i915#180]) -> [PASS][121] +2 similar issues
   [120]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-kbl7/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
   [121]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl3/igt@kms_cursor_crc@pipe-a-cursor-suspend.html

  * igt@kms_fbcon_fbt@fbc-suspend:
    - shard-kbl:          [INCOMPLETE][122] ([i915#155] / [i915#180] / [i915#636]) -> [PASS][123]
   [122]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-kbl4/igt@kms_fbcon_fbt@fbc-suspend.html
   [123]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-kbl6/igt@kms_fbcon_fbt@fbc-suspend.html

  * igt@kms_psr@psr2_dpms:
    - shard-iclb:         [SKIP][124] ([fdo#109441]) -> [PASS][125]
   [124]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-iclb6/igt@kms_psr@psr2_dpms.html
   [125]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb2/igt@kms_psr@psr2_dpms.html

  
#### Warnings ####

  * igt@i915_pm_rc6_residency@rc6-fence:
    - shard-iclb:         [WARN][126] ([i915#1804] / [i915#2684]) -> [WARN][127] ([i915#2684])
   [126]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-iclb3/igt@i915_pm_rc6_residency@rc6-fence.html
   [127]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb1/igt@i915_pm_rc6_residency@rc6-fence.html

  * igt@i915_pm_rc6_residency@rc6-idle:
    - shard-iclb:         [WARN][128] ([i915#2684]) -> [WARN][129] ([i915#1804] / [i915#2684])
   [128]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-iclb1/igt@i915_pm_rc6_residency@rc6-idle.html
   [129]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb3/igt@i915_pm_rc6_residency@rc6-idle.html

  * igt@kms_psr2_sf@overlay-primary-update-sf-dmg-area-4:
    - shard-iclb:         [SKIP][130] ([i915#2920]) -> [SKIP][131] ([i915#658]) +2 similar issues
   [130]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-iclb2/igt@kms_psr2_sf@overlay-primary-update-sf-dmg-area-4.html
   [131]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/shard-iclb7/igt@kms_psr2_sf@overlay-primary-update-sf-dmg-area-4.html

  * igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-4:
    - shard-iclb:         [SKIP][132] ([i915#658]) -> [SKIP][133] ([i915#2920]) +1 similar issue
   [132]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10442/shard-iclb8/igt@kms_psr2_sf@primary-pla

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_6085/index.html

[-- Attachment #2: Type: text/html, Size: 34069 bytes --]

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [igt-dev] [PATCH 2/5] i915/perf: Add tests for mapped OA buffer
  2021-08-03 20:07 ` [igt-dev] [PATCH 2/5] i915/perf: Add tests for mapped OA buffer Umesh Nerlige Ramappa
@ 2021-08-23 21:31   ` Dixit, Ashutosh
  2021-08-24 18:58     ` Umesh Nerlige Ramappa
  0 siblings, 1 reply; 20+ messages in thread
From: Dixit, Ashutosh @ 2021-08-23 21:31 UTC (permalink / raw)
  To: Umesh Nerlige Ramappa; +Cc: igt-dev, Lionel G Landwerlin

On Tue, 03 Aug 2021 13:07:34 -0700, Umesh Nerlige Ramappa wrote:
>
> For applications that need a faster way to access reports in the OA
> buffer, i915 now provides a way to map the OA buffer to privileged user
> space. Validate the mapped OA buffer.
>
> v2: Fail on forked-privileged access to mapped oa buffer (Chris)

A few nits/questions below otherwise this is:

Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>

> diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
> index a1c0030c..bb7d5e73 100644
> --- a/include/drm-uapi/i915_drm.h
> +++ b/include/drm-uapi/i915_drm.h
> @@ -2151,6 +2151,39 @@ struct drm_i915_perf_open_param {
>   */
>  #define I915_PERF_IOCTL_CONFIG	_IO('i', 0x2)
>
> +/*
> + * Returns OA buffer properties to be used with mmap.
> + *
> + * This ioctl is available in perf revision 8.
> + */
> +#define I915_PERF_IOCTL_GET_OA_BUFFER_INFO _IOWR('i', 0x3, struct drm_i915_perf_oa_buffer_info)
> +
> +/**
> + * OA buffer size and offset.
> + *
> + * OA output buffer
> + *   type: 0
> + *   flags: mbz
> + *
> + *   After querying the info, pass (size,offset) to mmap(),
> + *
> + *   mmap(0, info.size, PROT_READ, MAP_PRIVATE, perf_fd, info.offset).
> + *
> + *   Note that only a private (not shared between processes, or across fork())
> + *   read-only mmapping is allowed.
> + *
> + *   Userspace must treat the incoming data as tainted, but it conforms to the OA

What does tainted mean?

> diff --git a/tests/i915/perf.c b/tests/i915/perf.c
> index fa3840eb..4d4808ce 100644
> --- a/tests/i915/perf.c
> +++ b/tests/i915/perf.c
> @@ -5156,6 +5156,266 @@ static void test_oa_regs_whitelist(int paranoid)
>	intel_register_access_fini(&mmio_data);
>  }
>
> +#define OA_BUFFER_DATA(tail, head, oa_buffer_size) \
> +	(((tail) - (head)) & ((oa_buffer_size) - 1))
> +
> +#ifndef MAP_FAILED
> +#define MAP_FAILED ((void *)-1)
> +#endif

Shouldn't need this, should just '#include <sys/mman.h>'.

> +static uint32_t oa_status_reg(void)
> +{
> +	uint32_t status;
> +
> +	intel_register_access_init(&mmio_data, intel_get_pci_device(),
> +				   0, drm_fd);
> +	if (IS_HASWELL(devid))
> +		status = intel_register_read(&mmio_data, 0x2346) & 0x7;
> +	else if (IS_GEN12(devid))
> +		status = intel_register_read(&mmio_data, 0xdafc) & 0x7;
> +	else
> +		status = intel_register_read(&mmio_data, 0x2b08) & 0xf;

OK, looks like these can be read directly after they are whitelisted by the
kernel.

> +static void try_invalid_access(void *vaddr)
> +{
> +	sighandler_t old_sigsegv;
> +	uint32_t dummy;
> +
> +	old_sigsegv = signal(SIGSEGV, sigtrap);
> +	switch (sigsetjmp(jmp, SIGSEGV)) {
> +	case SIGSEGV:
> +		break;
> +	case 0:
> +		dummy = READ_ONCE(*((uint32_t *)vaddr + 1));

I would just read vaddr, not (vaddr + 1).

> +static void *map_oa_buffer(uint32_t *size)
> +{
> +	struct drm_i915_perf_oa_buffer_info oa_buffer = { 0 };
> +	void *vaddr;
> +
> +	do_ioctl(stream_fd, I915_PERF_IOCTL_GET_OA_BUFFER_INFO, &oa_buffer);
> +
> +	igt_debug("size        = %llu\n", oa_buffer.size);
> +	igt_debug("offset      = %llx\n", oa_buffer.offset);
> +
> +	igt_assert_eq(oa_buffer.size & (oa_buffer.size - 1), 0);
> +	igt_assert_eq(oa_status_reg(), 0);
> +
> +	vaddr = mmap(0, oa_buffer.size, PROT_READ, MAP_PRIVATE, stream_fd, oa_buffer.offset);
> +	igt_assert(vaddr != NULL);

Should probably be:
	igt_assert(vaddr != MAP_FAILED && vaddr != NULL);

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [igt-dev] [PATCH 4/5] tools/i915-perf: Add mmapped OA buffer support to i915-perf-recorder
  2021-08-03 20:07 ` [igt-dev] [PATCH 4/5] tools/i915-perf: Add mmapped OA buffer support to i915-perf-recorder Umesh Nerlige Ramappa
@ 2021-08-24  1:05   ` Dixit, Ashutosh
  2021-08-24 19:14     ` Umesh Nerlige Ramappa
  2021-08-24  1:45   ` Dixit, Ashutosh
  2021-08-24  3:50   ` Dixit, Ashutosh
  2 siblings, 1 reply; 20+ messages in thread
From: Dixit, Ashutosh @ 2021-08-24  1:05 UTC (permalink / raw)
  To: Umesh Nerlige Ramappa; +Cc: igt-dev, Lionel G Landwerlin

On Tue, 03 Aug 2021 13:07:36 -0700, Umesh Nerlige Ramappa wrote:
>
> Currently report from OA buffer are read from the perf_fd. The kernel
> patches enable mmaping the OA buffer into user space to allow for faster
> report queries across different platforms and engines.
>
> Enable OA buffer to be mmaped by the recorder tool based on command line
> option -M.

Not completely reviewed yet but some changes are needed, please see below.

> +static int gem_set_domain(int fd, uint32_t handle, uint32_t read, uint32_t write)
> +{
> +	struct drm_i915_gem_set_domain set_domain = {
> +		.handle = handle,
> +		.read_domains = read,
> +		.write_domain = write,
> +	};
> +	int ret = 0;
> +
> +	if (perf_ioctl(fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain))

set_domain is not available for discrete, see IGT gem_set_domain().

> +static void *gem_mmap_cpu(int fd, uint32_t handle, uint64_t offset, uint64_t size,
> +			  unsigned int prot)
> +{
> +	struct drm_i915_gem_mmap arg = {
> +		.handle = handle,
> +		.offset = offset,
> +		.size = size,
> +		.addr_ptr = 0,
> +		.flags = 0,
> +	};
> +
> +	if (perf_ioctl(fd, DRM_IOCTL_I915_GEM_MMAP, &arg))

This needs to be changed to mmap_offset, DRM_IOCTL_I915_GEM_MMAP has been
discontinued for future products.

> +static void
> +bb_emit_srm(struct bb_context *bb, uint32_t reg, uint32_t devid)
> +{
> +	bool gen8_plus = devid >= 8;
> +
> +	assert(bb->reloc_idx < ARRAY_SIZE(bb->reloc));
> +	assert(bb->offset < BATCH_SIZE);
> +
> +	bb->batch[bb->offset++] = gen8_plus ? MI_STORE_REGISTER_MEM_GEN8 :
> +					      MI_STORE_REGISTER_MEM;
> +	bb->batch[bb->offset++] = reg;
> +
> +	bb->reloc[bb->reloc_idx].target_handle = bb->obj[0].handle;
> +	bb->reloc[bb->reloc_idx].presumed_offset = bb->obj[0].offset;
> +	bb->reloc[bb->reloc_idx].offset = bb->offset * sizeof(uint32_t);
> +	bb->reloc[bb->reloc_idx].delta = bb->reloc_idx * sizeof(uint32_t);
> +	bb->reloc[bb->reloc_idx].read_domains = I915_GEM_DOMAIN_RENDER;
> +	bb->reloc[bb->reloc_idx].write_domain = I915_GEM_DOMAIN_RENDER;
> +
> +	bb->batch[bb->offset++] = bb->reloc[bb->reloc_idx].delta;
> +	if (gen8_plus)
> +		bb->batch[bb->offset++] = 0;

Relocations are also not available for future products. Let's use softpin,
it is simple to do and several examples for this are already merged.

> @@ -1015,16 +1450,40 @@ main(int argc, char *argv[])
>	corr_period_ns = corr_period * 1000000000ul;
>	poll_time_ns = corr_period_ns;
>
> +	if (mmap_buffer) {
> +		ctx.zero_fd = open("/dev/zero", O_RDWR | O_CLOEXEC);

Don't we need /dev/null rather than /dev/zero? Anyway looks unnecessarily
complicated, just malloc a buffer and read repeatedly into it?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [igt-dev] [PATCH 4/5] tools/i915-perf: Add mmapped OA buffer support to i915-perf-recorder
  2021-08-03 20:07 ` [igt-dev] [PATCH 4/5] tools/i915-perf: Add mmapped OA buffer support to i915-perf-recorder Umesh Nerlige Ramappa
  2021-08-24  1:05   ` Dixit, Ashutosh
@ 2021-08-24  1:45   ` Dixit, Ashutosh
  2021-08-26 23:57     ` Umesh Nerlige Ramappa
  2021-08-24  3:50   ` Dixit, Ashutosh
  2 siblings, 1 reply; 20+ messages in thread
From: Dixit, Ashutosh @ 2021-08-24  1:45 UTC (permalink / raw)
  To: Umesh Nerlige Ramappa; +Cc: igt-dev, Lionel G Landwerlin

On Tue, 03 Aug 2021 13:07:36 -0700, Umesh Nerlige Ramappa wrote:
>
> +	/*
> +	 * We do not have permissions to update the OA HEAD register, so we
> +	 * would end up with a buffer lost error once the OA buffer fills up. To

s/buffer lost/data lost/ ?

> +	 * avoid that, drain the OA buffer into a zero mem device. The drain
> +	 * eventually updates the head register in i915.
> +	 */
> +	while (read(ctx->perf_fd, ctx->zero_mem, ctx->oa_buffer_size) > 0 || errno == EINTR);

Also this looks strange that in this mmap mode userspace cannot update the
head and it must re-read the data using read() which it has already
previously read from the mmap'd buffer. Is this an acceptable interface and
has it been reviewed? Maybe when the OA buffer is mmap'd i915 should not
report the data lost error (and silently clear it) and leave it to
userspace to manage this? And anyway I think the mmap and the read() modes
are mutually exclusive so the data lost error will be seen only in the
read() mode not in the mmap mode correct? So I don't understand why this
drain operation needs to be done. Thanks.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [igt-dev] [PATCH 4/5] tools/i915-perf: Add mmapped OA buffer support to i915-perf-recorder
  2021-08-03 20:07 ` [igt-dev] [PATCH 4/5] tools/i915-perf: Add mmapped OA buffer support to i915-perf-recorder Umesh Nerlige Ramappa
  2021-08-24  1:05   ` Dixit, Ashutosh
  2021-08-24  1:45   ` Dixit, Ashutosh
@ 2021-08-24  3:50   ` Dixit, Ashutosh
  2021-08-24 18:50     ` Umesh Nerlige Ramappa
  2 siblings, 1 reply; 20+ messages in thread
From: Dixit, Ashutosh @ 2021-08-24  3:50 UTC (permalink / raw)
  To: Umesh Nerlige Ramappa; +Cc: igt-dev, Lionel G Landwerlin

On Tue, 03 Aug 2021 13:07:36 -0700, Umesh Nerlige Ramappa wrote:
>
> Currently report from OA buffer are read from the perf_fd. The kernel
> patches enable mmaping the OA buffer into user space to allow for faster
> report queries across different platforms and engines.
>
> Enable OA buffer to be mmaped by the recorder tool based on command line
> option -M.
>
> Example:
> i915-perf-recorder -m RenderBasic -s 8000 -k "mono" -M
>
> The recorder processes the mmaped OA buffer by periodically reading the
> OA TAIL PTR register from a batch and determining the number of reports
> available. These reports are then logged in the circular-buffer as
> INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE records. In this implementation
> the periodicity of checking the TAIL is the same as writing correlation
> timestamps (1 sec).

I haven't looked at everything so correct me if I am wrong but I have this
other concern about INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE. What is the
reason for introducing this? I would have thought that whether OA data has
been collected using read's or mmap is a property only of the recorder and
should not be exposed to the reader or gpuvis. So in the mmap case the
recorder should basically do what the kernel does but not introduce a new
perf record type. But now we are seeing changes both in the reader and
gpuvis because we have introduced INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE?
Thanks.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [igt-dev] [PATCH 4/5] tools/i915-perf: Add mmapped OA buffer support to i915-perf-recorder
  2021-08-24  3:50   ` Dixit, Ashutosh
@ 2021-08-24 18:50     ` Umesh Nerlige Ramappa
  2021-08-24 19:40       ` Dixit, Ashutosh
  0 siblings, 1 reply; 20+ messages in thread
From: Umesh Nerlige Ramappa @ 2021-08-24 18:50 UTC (permalink / raw)
  To: Dixit, Ashutosh; +Cc: igt-dev, Lionel G Landwerlin

On Mon, Aug 23, 2021 at 08:50:38PM -0700, Dixit, Ashutosh wrote:
>On Tue, 03 Aug 2021 13:07:36 -0700, Umesh Nerlige Ramappa wrote:
>>
>> Currently report from OA buffer are read from the perf_fd. The kernel
>> patches enable mmaping the OA buffer into user space to allow for faster
>> report queries across different platforms and engines.
>>
>> Enable OA buffer to be mmaped by the recorder tool based on command line
>> option -M.
>>
>> Example:
>> i915-perf-recorder -m RenderBasic -s 8000 -k "mono" -M
>>
>> The recorder processes the mmaped OA buffer by periodically reading the
>> OA TAIL PTR register from a batch and determining the number of reports
>> available. These reports are then logged in the circular-buffer as
>> INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE records. In this implementation
>> the periodicity of checking the TAIL is the same as writing correlation
>> timestamps (1 sec).
>
>I haven't looked at everything so correct me if I am wrong but I have this
>other concern about INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE. What is the
>reason for introducing this? I would have thought that whether OA data has
>been collected using read's or mmap is a property only of the recorder and
>should not be exposed to the reader or gpuvis. So in the mmap case the
>recorder should basically do what the kernel does but not introduce a new
>perf record type. But now we are seeing changes both in the reader and
>gpuvis because we have introduced INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE?
>Thanks.

Data coming from the mmaped buffer is different from that coming from 
the read. When we issue a read, the kernel attaches a record header to 
each report. The data from the mmapped buffer is just raw reports 
without this header. Lionel had mentioned that we add a new record here 
with multiple reports in a record rather than add a header to each 
report at this stage (which I think might make us a little slow in 
reading the OA buffer).

Also note that this does not replace the old mechanism of capturing 
reports using read. Instead it's another way to capture reports.

Regards,
Umesh

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [igt-dev] [PATCH 2/5] i915/perf: Add tests for mapped OA buffer
  2021-08-23 21:31   ` Dixit, Ashutosh
@ 2021-08-24 18:58     ` Umesh Nerlige Ramappa
  2021-08-24 19:18       ` Dixit, Ashutosh
  0 siblings, 1 reply; 20+ messages in thread
From: Umesh Nerlige Ramappa @ 2021-08-24 18:58 UTC (permalink / raw)
  To: Dixit, Ashutosh; +Cc: igt-dev, Lionel G Landwerlin

On Mon, Aug 23, 2021 at 02:31:38PM -0700, Dixit, Ashutosh wrote:
>On Tue, 03 Aug 2021 13:07:34 -0700, Umesh Nerlige Ramappa wrote:
>>
>> For applications that need a faster way to access reports in the OA
>> buffer, i915 now provides a way to map the OA buffer to privileged user
>> space. Validate the mapped OA buffer.
>>
>> v2: Fail on forked-privileged access to mapped oa buffer (Chris)
>
>A few nits/questions below otherwise this is:
>
>Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>
>> diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
>> index a1c0030c..bb7d5e73 100644
>> --- a/include/drm-uapi/i915_drm.h
>> +++ b/include/drm-uapi/i915_drm.h
>> @@ -2151,6 +2151,39 @@ struct drm_i915_perf_open_param {
>>   */
>>  #define I915_PERF_IOCTL_CONFIG	_IO('i', 0x2)
>>
>> +/*
>> + * Returns OA buffer properties to be used with mmap.
>> + *
>> + * This ioctl is available in perf revision 8.
>> + */
>> +#define I915_PERF_IOCTL_GET_OA_BUFFER_INFO _IOWR('i', 0x3, struct drm_i915_perf_oa_buffer_info)
>> +
>> +/**
>> + * OA buffer size and offset.
>> + *
>> + * OA output buffer
>> + *   type: 0
>> + *   flags: mbz
>> + *
>> + *   After querying the info, pass (size,offset) to mmap(),
>> + *
>> + *   mmap(0, info.size, PROT_READ, MAP_PRIVATE, perf_fd, info.offset).
>> + *
>> + *   Note that only a private (not shared between processes, or across fork())
>> + *   read-only mmapping is allowed.
>> + *
>> + *   Userspace must treat the incoming data as tainted, but it conforms to the OA
>
>What does tainted mean?

I'd assume the data is changing as OA buffer reports are captured. I can 
change the comment to say that instead of tainted.

>
>> diff --git a/tests/i915/perf.c b/tests/i915/perf.c
>> index fa3840eb..4d4808ce 100644
>> --- a/tests/i915/perf.c
>> +++ b/tests/i915/perf.c
>> @@ -5156,6 +5156,266 @@ static void test_oa_regs_whitelist(int paranoid)
>>	intel_register_access_fini(&mmio_data);
>>  }
>>
>> +#define OA_BUFFER_DATA(tail, head, oa_buffer_size) \
>> +	(((tail) - (head)) & ((oa_buffer_size) - 1))
>> +
>> +#ifndef MAP_FAILED
>> +#define MAP_FAILED ((void *)-1)
>> +#endif
>
>Shouldn't need this, should just '#include <sys/mman.h>'.
>
>> +static uint32_t oa_status_reg(void)
>> +{
>> +	uint32_t status;
>> +
>> +	intel_register_access_init(&mmio_data, intel_get_pci_device(),
>> +				   0, drm_fd);
>> +	if (IS_HASWELL(devid))
>> +		status = intel_register_read(&mmio_data, 0x2346) & 0x7;
>> +	else if (IS_GEN12(devid))
>> +		status = intel_register_read(&mmio_data, 0xdafc) & 0x7;
>> +	else
>> +		status = intel_register_read(&mmio_data, 0x2b08) & 0xf;
>
>OK, looks like these can be read directly after they are whitelisted by the
>kernel.

This is reading from pci mmio space which can happen regardless of 
whitelisting. This is just for verification. Ideally we want to read 
these registers from a batch which I plan to do in a future series.

Regards,
Umesh

>
>> +static void try_invalid_access(void *vaddr)
>> +{
>> +	sighandler_t old_sigsegv;
>> +	uint32_t dummy;
>> +
>> +	old_sigsegv = signal(SIGSEGV, sigtrap);
>> +	switch (sigsetjmp(jmp, SIGSEGV)) {
>> +	case SIGSEGV:
>> +		break;
>> +	case 0:
>> +		dummy = READ_ONCE(*((uint32_t *)vaddr + 1));
>
>I would just read vaddr, not (vaddr + 1).
>
>> +static void *map_oa_buffer(uint32_t *size)
>> +{
>> +	struct drm_i915_perf_oa_buffer_info oa_buffer = { 0 };
>> +	void *vaddr;
>> +
>> +	do_ioctl(stream_fd, I915_PERF_IOCTL_GET_OA_BUFFER_INFO, &oa_buffer);
>> +
>> +	igt_debug("size        = %llu\n", oa_buffer.size);
>> +	igt_debug("offset      = %llx\n", oa_buffer.offset);
>> +
>> +	igt_assert_eq(oa_buffer.size & (oa_buffer.size - 1), 0);
>> +	igt_assert_eq(oa_status_reg(), 0);
>> +
>> +	vaddr = mmap(0, oa_buffer.size, PROT_READ, MAP_PRIVATE, stream_fd, oa_buffer.offset);
>> +	igt_assert(vaddr != NULL);
>
>Should probably be:
>	igt_assert(vaddr != MAP_FAILED && vaddr != NULL);

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [igt-dev] [PATCH 4/5] tools/i915-perf: Add mmapped OA buffer support to i915-perf-recorder
  2021-08-24  1:05   ` Dixit, Ashutosh
@ 2021-08-24 19:14     ` Umesh Nerlige Ramappa
  0 siblings, 0 replies; 20+ messages in thread
From: Umesh Nerlige Ramappa @ 2021-08-24 19:14 UTC (permalink / raw)
  To: Dixit, Ashutosh; +Cc: igt-dev, Lionel G Landwerlin

On Mon, Aug 23, 2021 at 06:05:05PM -0700, Dixit, Ashutosh wrote:
>On Tue, 03 Aug 2021 13:07:36 -0700, Umesh Nerlige Ramappa wrote:
>>
>> Currently report from OA buffer are read from the perf_fd. The kernel
>> patches enable mmaping the OA buffer into user space to allow for faster
>> report queries across different platforms and engines.
>>
>> Enable OA buffer to be mmaped by the recorder tool based on command line
>> option -M.
>
>Not completely reviewed yet but some changes are needed, please see below.
>
>> +static int gem_set_domain(int fd, uint32_t handle, uint32_t read, uint32_t write)
>> +{
>> +	struct drm_i915_gem_set_domain set_domain = {
>> +		.handle = handle,
>> +		.read_domains = read,
>> +		.write_domain = write,
>> +	};
>> +	int ret = 0;
>> +
>> +	if (perf_ioctl(fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain))
>
>set_domain is not available for discrete, see IGT gem_set_domain().
>
>> +static void *gem_mmap_cpu(int fd, uint32_t handle, uint64_t offset, uint64_t size,
>> +			  unsigned int prot)
>> +{
>> +	struct drm_i915_gem_mmap arg = {
>> +		.handle = handle,
>> +		.offset = offset,
>> +		.size = size,
>> +		.addr_ptr = 0,
>> +		.flags = 0,
>> +	};
>> +
>> +	if (perf_ioctl(fd, DRM_IOCTL_I915_GEM_MMAP, &arg))
>
>This needs to be changed to mmap_offset, DRM_IOCTL_I915_GEM_MMAP has been
>discontinued for future products.
>
>> +static void
>> +bb_emit_srm(struct bb_context *bb, uint32_t reg, uint32_t devid)
>> +{
>> +	bool gen8_plus = devid >= 8;
>> +
>> +	assert(bb->reloc_idx < ARRAY_SIZE(bb->reloc));
>> +	assert(bb->offset < BATCH_SIZE);
>> +
>> +	bb->batch[bb->offset++] = gen8_plus ? MI_STORE_REGISTER_MEM_GEN8 :
>> +					      MI_STORE_REGISTER_MEM;
>> +	bb->batch[bb->offset++] = reg;
>> +
>> +	bb->reloc[bb->reloc_idx].target_handle = bb->obj[0].handle;
>> +	bb->reloc[bb->reloc_idx].presumed_offset = bb->obj[0].offset;
>> +	bb->reloc[bb->reloc_idx].offset = bb->offset * sizeof(uint32_t);
>> +	bb->reloc[bb->reloc_idx].delta = bb->reloc_idx * sizeof(uint32_t);
>> +	bb->reloc[bb->reloc_idx].read_domains = I915_GEM_DOMAIN_RENDER;
>> +	bb->reloc[bb->reloc_idx].write_domain = I915_GEM_DOMAIN_RENDER;
>> +
>> +	bb->batch[bb->offset++] = bb->reloc[bb->reloc_idx].delta;
>> +	if (gen8_plus)
>> +		bb->batch[bb->offset++] = 0;
>
>Relocations are also not available for future products. Let's use softpin,
>it is simple to do and several examples for this are already merged.

Thanks for sharing the new way to do these things, I will look into it.

>
>> @@ -1015,16 +1450,40 @@ main(int argc, char *argv[])
>>	corr_period_ns = corr_period * 1000000000ul;
>>	poll_time_ns = corr_period_ns;
>>
>> +	if (mmap_buffer) {
>> +		ctx.zero_fd = open("/dev/zero", O_RDWR | O_CLOEXEC);
>
>Don't we need /dev/null rather than /dev/zero? Anyway looks unnecessarily
>complicated, just malloc a buffer and read repeatedly into it?

For this case, /dev/null should work too. Did not use malloc to avoid 
the allocation and reading OA data into actual memory. In the case of 
zero/null device, there's no backing memory, so I thought it's faster to 
drain.

Thanks,
Umesh

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [igt-dev] [PATCH 2/5] i915/perf: Add tests for mapped OA buffer
  2021-08-24 18:58     ` Umesh Nerlige Ramappa
@ 2021-08-24 19:18       ` Dixit, Ashutosh
  0 siblings, 0 replies; 20+ messages in thread
From: Dixit, Ashutosh @ 2021-08-24 19:18 UTC (permalink / raw)
  To: Umesh Nerlige Ramappa; +Cc: igt-dev, Lionel G Landwerlin

On Tue, 24 Aug 2021 11:58:42 -0700, Umesh Nerlige Ramappa wrote:
>
> >> +/**
> >> + * OA buffer size and offset.
> >> + *
> >> + * OA output buffer
> >> + *   type: 0
> >> + *   flags: mbz
> >> + *
> >> + *   After querying the info, pass (size,offset) to mmap(),
> >> + *
> >> + *   mmap(0, info.size, PROT_READ, MAP_PRIVATE, perf_fd, info.offset).
> >> + *
> >> + *   Note that only a private (not shared between processes, or across fork())
> >> + *   read-only mmapping is allowed.
> >> + *
> >> + *   Userspace must treat the incoming data as tainted, but it conforms to the OA
> >
> > What does tainted mean?
>
> I'd assume the data is changing as OA buffer reports are captured. I can
> change the comment to say that instead of tainted.

Changing is again ambiguous, I think we could just say something like:

HW is continually writing data to the mapped OA buffer and it conforms to
the OA format as specified by user config. The buffer provides reports that
have OA counters - A, B and C.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [igt-dev] [PATCH 4/5] tools/i915-perf: Add mmapped OA buffer support to i915-perf-recorder
  2021-08-24 18:50     ` Umesh Nerlige Ramappa
@ 2021-08-24 19:40       ` Dixit, Ashutosh
  2021-08-24 20:03         ` Dixit, Ashutosh
  0 siblings, 1 reply; 20+ messages in thread
From: Dixit, Ashutosh @ 2021-08-24 19:40 UTC (permalink / raw)
  To: Umesh Nerlige Ramappa; +Cc: igt-dev, Lionel G Landwerlin

On Tue, 24 Aug 2021 11:50:32 -0700, Umesh Nerlige Ramappa wrote:
>

@Lionel, requesting your comments on the discussion below. Thanks.

> On Mon, Aug 23, 2021 at 08:50:38PM -0700, Dixit, Ashutosh wrote:
> > On Tue, 03 Aug 2021 13:07:36 -0700, Umesh Nerlige Ramappa wrote:
> >>
> >> Currently report from OA buffer are read from the perf_fd. The kernel
> >> patches enable mmaping the OA buffer into user space to allow for faster
> >> report queries across different platforms and engines.
> >>
> >> Enable OA buffer to be mmaped by the recorder tool based on command line
> >> option -M.
> >>
> >> Example:
> >> i915-perf-recorder -m RenderBasic -s 8000 -k "mono" -M
> >>
> >> The recorder processes the mmaped OA buffer by periodically reading the
> >> OA TAIL PTR register from a batch and determining the number of reports
> >> available. These reports are then logged in the circular-buffer as
> >> INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE records. In this implementation
> >> the periodicity of checking the TAIL is the same as writing correlation
> >> timestamps (1 sec).
> >
> > I haven't looked at everything so correct me if I am wrong but I have this
> > other concern about INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE. What is the
> > reason for introducing this? I would have thought that whether OA data has
> > been collected using read's or mmap is a property only of the recorder and
> > should not be exposed to the reader or gpuvis. So in the mmap case the
> > recorder should basically do what the kernel does but not introduce a new
> > perf record type. But now we are seeing changes both in the reader and
> > gpuvis because we have introduced INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE?
> > Thanks.
>
> Data coming from the mmaped buffer is different from that coming from the
> read. When we issue a read, the kernel attaches a record header to each
> report. The data from the mmapped buffer is just raw reports without this
> header. Lionel had mentioned that we add a new record here with multiple
> reports in a record rather than add a header to each report at this stage

My point is that if the recorder adds the same header which the kernel does
we can have all changes done in one place, in the recorder, rather than
multiple places (reader and gpuvis). I also think this should avoid making
any changes to the reader and gpuvis and so will actually be simpler.

> (which I think might make us a little slow in reading the OA buffer).

Well we will just be doing in userspace what the kernel is already doing so
it shouldn't be any slower than the kernel.

> Also note that this does not replace the old mechanism of capturing reports
> using read. Instead it's another way to capture reports.

Yes that is clear and it also seems (from that drain code) that we can be
doing both (reads and mmap) simultaneously.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [igt-dev] [PATCH 4/5] tools/i915-perf: Add mmapped OA buffer support to i915-perf-recorder
  2021-08-24 19:40       ` Dixit, Ashutosh
@ 2021-08-24 20:03         ` Dixit, Ashutosh
  0 siblings, 0 replies; 20+ messages in thread
From: Dixit, Ashutosh @ 2021-08-24 20:03 UTC (permalink / raw)
  To: Umesh Nerlige Ramappa; +Cc: igt-dev, Lionel G Landwerlin

On Tue, 24 Aug 2021 12:40:29 -0700, Dixit, Ashutosh wrote:
>
> > > I haven't looked at everything so correct me if I am wrong but I have this
> > > other concern about INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE. What is the
> > > reason for introducing this? I would have thought that whether OA data has
> > > been collected using read's or mmap is a property only of the recorder and
> > > should not be exposed to the reader or gpuvis. So in the mmap case the
> > > recorder should basically do what the kernel does but not introduce a new
> > > perf record type. But now we are seeing changes both in the reader and
> > > gpuvis because we have introduced INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE?
> > > Thanks.
> >
> > Data coming from the mmaped buffer is different from that coming from the
> > read. When we issue a read, the kernel attaches a record header to each
> > report. The data from the mmapped buffer is just raw reports without this
> > header. Lionel had mentioned that we add a new record here with multiple
> > reports in a record rather than add a header to each report at this stage
>
> My point is that if the recorder adds the same header which the kernel does
> we can have all changes done in one place, in the recorder, rather than
> multiple places (reader and gpuvis). I also think this should avoid making
> any changes to the reader and gpuvis and so will actually be simpler.
>
> > (which I think might make us a little slow in reading the OA buffer).
>
> Well we will just be doing in userspace what the kernel is already doing so
> it shouldn't be any slower than the kernel.
>
> > Also note that this does not replace the old mechanism of capturing reports
> > using read. Instead it's another way to capture reports.
>
> Yes that is clear and it also seems (from that drain code) that we can be
> doing both (reads and mmap) simultaneously.

Since this seems already implemented and probably also working we may still
go with what we have, but at least I wanted to indicate my preference and
also see if @Lionel has any further input. Thanks.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [igt-dev] [PATCH 4/5] tools/i915-perf: Add mmapped OA buffer support to i915-perf-recorder
  2021-08-24  1:45   ` Dixit, Ashutosh
@ 2021-08-26 23:57     ` Umesh Nerlige Ramappa
  0 siblings, 0 replies; 20+ messages in thread
From: Umesh Nerlige Ramappa @ 2021-08-26 23:57 UTC (permalink / raw)
  To: Dixit, Ashutosh; +Cc: igt-dev, Lionel G Landwerlin

On Mon, Aug 23, 2021 at 06:45:22PM -0700, Dixit, Ashutosh wrote:
>On Tue, 03 Aug 2021 13:07:36 -0700, Umesh Nerlige Ramappa wrote:
>>
>> +	/*
>> +	 * We do not have permissions to update the OA HEAD register, so we
>> +	 * would end up with a buffer lost error once the OA buffer fills up. To
>
>s/buffer lost/data lost/ ?
>
>> +	 * avoid that, drain the OA buffer into a zero mem device. The drain
>> +	 * eventually updates the head register in i915.
>> +	 */
>> +	while (read(ctx->perf_fd, ctx->zero_mem, ctx->oa_buffer_size) > 0 || errno == EINTR);
>
>Also this looks strange that in this mmap mode userspace cannot update the
>head and it must re-read the data using read() which it has already
>previously read from the mmap'd buffer. Is this an acceptable interface and
>has it been reviewed? Maybe when the OA buffer is mmap'd i915 should not
>report the data lost error (and silently clear it) and leave it to
>userspace to manage this? And anyway I think the mmap and the read() modes
>are mutually exclusive so the data lost error will be seen only in the
>read() mode not in the mmap mode correct? So I don't understand why this
>drain operation needs to be done. Thanks.

Looks like I can do away with the drain and things still work the same 
way. I was trying to resolve the buffer overflow error, but didn't think 
that we can just ignore the error in the mmap case. Since this code 
already maintains a head pointer and reads the buffer periodically, we 
should be good.

Thanks,
Umesh

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [igt-dev] [PATCH 1/5] i915/perf: add tests for triggered OA reports
@ 2021-08-30 19:33 Umesh Nerlige Ramappa
  0 siblings, 0 replies; 20+ messages in thread
From: Umesh Nerlige Ramappa @ 2021-08-30 19:33 UTC (permalink / raw)
  To: igt-dev, Ashutosh Dixit; +Cc: Lionel G Landwerlin

From: Lionel G Landwerlin <lionel.g.landwerlin@intel.com>

By whitelisting a couple of registers we can allow an application
batch to trigger OA reports in the OA buffer by switching back & forth
an inverter on the condition logic.

v2: Wait before sampling the timestamp used to end the OA buffer search
v3:
- Ensure OA regs are whitelisted and reports are triggered only when
  perf_stream_paranoid is set to 0.
- Drop root to trigger reports.
v4:
- wait for children after igt_assert
- use new api for intel batch buffer
- clean up test code

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 tests/i915/perf.c | 421 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 404 insertions(+), 17 deletions(-)

diff --git a/tests/i915/perf.c b/tests/i915/perf.c
index e641d5d2..fa3840eb 100644
--- a/tests/i915/perf.c
+++ b/tests/i915/perf.c
@@ -53,6 +53,8 @@ IGT_TEST_DESCRIPTION("Test the i915 perf metrics streaming interface");
 #define OAREPORT_REASON_SHIFT          19
 #define OAREPORT_REASON_TIMER          (1<<0)
 #define OAREPORT_REASON_INTERNAL       (3<<1)
+#define OAREPORT_REASON_TRIGGER1       (1<<1)
+#define OAREPORT_REASON_TRIGGER2       (1<<2)
 #define OAREPORT_REASON_CTX_SWITCH     (1<<3)
 #define OAREPORT_REASON_GO             (1<<4)
 #define OAREPORT_REASON_CLK_RATIO      (1<<5)
@@ -204,6 +206,7 @@ static struct intel_perf *intel_perf = NULL;
 static struct intel_perf_metric_set *test_set = NULL;
 static bool *undefined_a_counters;
 static uint64_t oa_exp_1_millisec;
+struct intel_mmio_data mmio_data;
 
 static igt_render_copyfunc_t render_copy = NULL;
 static uint32_t (*read_report_ticks)(const uint32_t *report,
@@ -294,6 +297,23 @@ __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
 	return ret;
 }
 
+static int i915_perf_revision(int fd)
+{
+	drm_i915_getparam_t gp;
+	int value = 1, ret;
+
+	gp.param = I915_PARAM_PERF_REVISION;
+	gp.value = &value;
+	ret = igt_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
+	if (ret == -1) {
+		/* If the param is missing, consider version 1. */
+		igt_assert_eq(errno, EINVAL);
+		return 1;
+	}
+
+	return value;
+}
+
 static int
 lookup_format(int i915_perf_fmt_id)
 {
@@ -3151,6 +3171,283 @@ emit_stall_timestamp_and_rpc(struct intel_bb *ibb,
 	emit_report_perf_count(ibb, dst, report_dst_offset, report_id);
 }
 
+/* The following register all have the same layout. */
+#define OAREPORTTRIG2 (0x2744)
+#define   OAREPORTTRIG2_INVERT_A_0  (1 << 0)
+#define   OAREPORTTRIG2_INVERT_A_1  (1 << 1)
+#define   OAREPORTTRIG2_INVERT_A_2  (1 << 2)
+#define   OAREPORTTRIG2_INVERT_A_3  (1 << 3)
+#define   OAREPORTTRIG2_INVERT_A_4  (1 << 4)
+#define   OAREPORTTRIG2_INVERT_A_5  (1 << 5)
+#define   OAREPORTTRIG2_INVERT_A_6  (1 << 6)
+#define   OAREPORTTRIG2_INVERT_A_7  (1 << 7)
+#define   OAREPORTTRIG2_INVERT_A_8  (1 << 8)
+#define   OAREPORTTRIG2_INVERT_A_9  (1 << 9)
+#define   OAREPORTTRIG2_INVERT_A_10 (1 << 10)
+#define   OAREPORTTRIG2_INVERT_A_11 (1 << 11)
+#define   OAREPORTTRIG2_INVERT_A_12 (1 << 12)
+#define   OAREPORTTRIG2_INVERT_A_13 (1 << 13)
+#define   OAREPORTTRIG2_INVERT_A_14 (1 << 14)
+#define   OAREPORTTRIG2_INVERT_A_15 (1 << 15)
+#define   OAREPORTTRIG2_INVERT_B_0  (1 << 16)
+#define   OAREPORTTRIG2_INVERT_B_1  (1 << 17)
+#define   OAREPORTTRIG2_INVERT_B_2  (1 << 18)
+#define   OAREPORTTRIG2_INVERT_B_3  (1 << 19)
+#define   OAREPORTTRIG2_INVERT_C_0  (1 << 20)
+#define   OAREPORTTRIG2_INVERT_C_1  (1 << 21)
+#define   OAREPORTTRIG2_INVERT_D_0  (1 << 22)
+#define   OAREPORTTRIG2_THRESHOLD_ENABLE      (1 << 23)
+#define   OAREPORTTRIG2_REPORT_TRIGGER_ENABLE (1 << 31)
+#define OAREPORTTRIG6 (0x2754)
+#define OA_PERF_COUNTER_A(idx) (0x2800 + 8 * (idx))
+#define GEN8_OASTATUS (0x2b08)
+
+#define GEN12_OAREPORTTRIG2 (0xd924)
+#define GEN12_OAREPORTTRIG6 (0xd934)
+#define GEN12_OAG_PERF_COUNTER_A(idx) (0xD980 + 8 * (idx))
+#define GEN12_OAG_OASTATUS (0xdafc)
+
+#define RING_FORCE_TO_NONPRIV_ADDRESS_MASK 0x03fffffc
+
+/*
+ * We have 2 trigger registers that each generate a different
+ * report reason.
+ */
+static const uint32_t gen9_oa_wl[] = {
+	OAREPORTTRIG2,
+	OAREPORTTRIG6,
+	OA_PERF_COUNTER_A(18),
+	GEN8_OASTATUS,
+};
+static const uint32_t gen12_oa_wl[] = {
+	GEN12_OAREPORTTRIG2,
+	GEN12_OAREPORTTRIG6,
+	GEN12_OAG_PERF_COUNTER_A(18),
+	GEN12_OAG_OASTATUS,
+};
+
+static const uint32_t nonpriv_slots[] = {
+	0x24d0, 0x24d4, 0x24d8, 0x24dc, 0x24e0, 0x24e4, 0x24e8, 0x24ec,
+	0x24f0, 0x24f4, 0x24f8, 0x24fc, 0x2010, 0x2014, 0x2018, 0x201c,
+	0x21e0, 0x21e4, 0x21e8, 0x21ec,
+};
+
+struct test_perf {
+	const uint32_t *slots;
+	uint32_t num_slots;
+	const uint32_t *wl;
+	uint32_t num_wl;
+} perf;
+
+static void perf_init_whitelist(void)
+{
+	perf.slots = nonpriv_slots;
+
+	if (intel_gen(devid) >= 12) {
+		perf.num_slots = 20;
+		perf.wl = gen12_oa_wl;
+		perf.num_wl = i915_perf_revision(drm_fd) < 7 ? 2 :
+			       ARRAY_SIZE(gen12_oa_wl);
+	} else {
+		perf.num_slots = 12;
+		perf.wl = gen9_oa_wl;
+		perf.num_wl = i915_perf_revision(drm_fd) < 7 ? 2 :
+			       ARRAY_SIZE(gen9_oa_wl);
+	}
+}
+
+static void
+emit_triggered_oa_report(struct intel_bb *ibb, uint32_t trigger)
+{
+	const uint32_t *triggers = perf.wl;
+
+	assert(trigger <= 1);
+
+	intel_bb_out(ibb, MI_LOAD_REGISTER_IMM);
+	intel_bb_out(ibb, triggers[trigger]);
+	intel_bb_out(ibb, OAREPORTTRIG2_INVERT_C_1 |
+			  OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
+	intel_bb_out(ibb, MI_LOAD_REGISTER_IMM);
+	intel_bb_out(ibb, triggers[trigger]);
+	intel_bb_out(ibb, OAREPORTTRIG2_INVERT_C_1 |
+			  OAREPORTTRIG2_INVERT_D_0 |
+			  OAREPORTTRIG2_REPORT_TRIGGER_ENABLE);
+}
+
+static uint64_t
+rcs_timestmap_reg_read(int fd)
+{
+	struct drm_i915_reg_read rr = {
+		.offset = 0x2358 | I915_REG_READ_8B_WA, /* render ring timestamp */
+	};
+
+	do_ioctl(fd, DRM_IOCTL_I915_REG_READ, &rr);
+
+	return rr.val;
+}
+
+/*
+ * Verify that we can trigger OA reports into the OA buffer using
+ * MI_LRI.
+ */
+static void
+test_triggered_oa_reports(int paranoid)
+{
+	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
+	uint64_t properties[] = {
+		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
+
+		/* Note: we have to specify at least one sample property even
+		 * though we aren't interested in samples in this case
+		 */
+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
+
+		/* OA unit configuration */
+		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
+		DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
+
+		/* Note: no OA exponent specified in this case */
+	};
+	struct drm_i915_perf_open_param param = {
+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
+		.num_properties = ARRAY_SIZE(properties) / 2,
+		.properties_ptr = to_user_pointer(properties),
+	};
+	struct drm_i915_perf_record_header *header;
+	struct buf_ops *bops;
+	uint32_t context;
+	struct igt_helper_process child = {};
+	struct intel_bb *ibb;
+	struct intel_buf src[2], dst[2];
+	uint64_t timestamp32_mask = (1ull << 32) - 1;
+	uint64_t timestamps[2];
+	uint32_t buf_size = 16 * 1024 * 1024;
+	uint8_t *buf = malloc(buf_size);
+	int width = 800;
+	int height = 600;
+	uint32_t trigger_counts[2] = { 0, };
+	int ret;
+
+	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", paranoid);
+
+	do {
+		igt_fork_helper(&child) {
+			if (!paranoid)
+				igt_drop_root();
+
+			bops = buf_ops_create(drm_fd);
+
+			scratch_buf_init(bops, &src[0], width, height, 0xff0000ff);
+			scratch_buf_init(bops, &dst[0], width, height, 0x00ff00ff);
+			scratch_buf_init(bops, &src[1], 2 * width, height, 0xff0000ff);
+			scratch_buf_init(bops, &dst[1], 2 * width, height, 0x00ff00ff);
+
+			context = gem_context_create(drm_fd);
+			igt_assert(context);
+			ibb = intel_bb_create_with_context(drm_fd, context, BATCH_SZ);
+			properties[1] = context;
+
+			timestamps[0] = rcs_timestmap_reg_read(drm_fd);
+
+			stream_fd = __perf_open(drm_fd, &param, false);
+
+			emit_triggered_oa_report(ibb, 0);
+
+			render_copy(ibb,
+				    &src[0], 0, 0, width, height,
+				    &dst[0], 0, 0);
+
+			emit_triggered_oa_report(ibb, 0);
+
+			emit_triggered_oa_report(ibb, 1);
+
+			render_copy(ibb,
+				    &src[1], 0, 0, 2 * width, height,
+				    &dst[1], 0, 0);
+
+			emit_triggered_oa_report(ibb, 1);
+
+			intel_bb_flush_render(ibb);
+			intel_bb_sync(ibb);
+
+			timestamps[1] = rcs_timestmap_reg_read(drm_fd);
+
+			if (timestamps[1] < timestamps[0] ||
+			    (timestamps[1] & timestamp32_mask) < (timestamps[1] & timestamp32_mask)) {
+				igt_debug("Timestamp rollover, trying again\n");
+				exit(EAGAIN);
+			}
+
+			ret = i915_read_reports_until_timestamp(test_set->perf_oa_format,
+								buf, buf_size,
+								timestamps[0] & timestamp32_mask,
+								timestamps[1] & timestamp32_mask);
+
+			for (size_t offset = 0; offset < ret; offset += header->size) {
+				uint32_t *report;
+
+				header = (void *)(buf + offset);
+
+				igt_assert_eq(header->pad, 0); /* Reserved */
+
+				igt_assert_neq(header->type, DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
+
+				if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST)
+					continue;
+
+				/* Currently the only other record type expected is a
+				 * _SAMPLE. Notably this test will need updating if
+				 * i915-perf is extended in the future with additional
+				 * record types.
+				 */
+				igt_assert_eq(header->type, DRM_I915_PERF_RECORD_SAMPLE);
+
+				report = (void *)(header + 1);
+
+				igt_debug("report ts=0x%08x hw_id=0x%08x reason=%s\n",
+					  report[1], report[2],
+					  gen8_read_report_reason(report));
+
+				if (gen8_report_reason(report) & OAREPORT_REASON_TRIGGER1) {
+					igt_assert_eq(trigger_counts[1], 0);
+					trigger_counts[0]++;
+				}
+				if (gen8_report_reason(report) & OAREPORT_REASON_TRIGGER2) {
+					igt_assert_eq(trigger_counts[0], 2);
+					trigger_counts[1]++;
+				}
+			}
+
+			if (paranoid) {
+				igt_assert_eq(trigger_counts[0], 0);
+				igt_assert_eq(trigger_counts[1], 0);
+			} else {
+				igt_assert_eq(trigger_counts[0], 2);
+				igt_assert_eq(trigger_counts[1], 2);
+			}
+
+			for (int i = 0; i < ARRAY_SIZE(src); i++) {
+				intel_buf_close(bops, &src[i]);
+				intel_buf_close(bops, &dst[i]);
+			}
+
+			intel_bb_destroy(ibb);
+			gem_context_destroy(drm_fd, context);
+			buf_ops_destroy(bops);
+			__perf_close(stream_fd);
+		}
+
+		ret = igt_wait_helper(&child);
+
+		igt_assert(WEXITSTATUS(ret) == EAGAIN ||
+			   WEXITSTATUS(ret) == 0);
+
+	} while (WEXITSTATUS(ret) == EAGAIN);
+
+	free(buf);
+}
+
 /* Tests the INTEL_performance_query use case where an unprivileged process
  * should be able to configure the OA unit for per-context metrics (for a
  * context associated with that process' drm file descriptor) and the counters
@@ -4777,6 +5074,88 @@ test_whitelisted_registers_userspace_config(void)
 	i915_perf_remove_config(drm_fd, config_id);
 }
 
+static void dump_whitelist(const char *msg)
+{
+	int i;
+
+	igt_debug("%s\n", msg);
+
+	for (i = 0; i < perf.num_slots; i++)
+		igt_debug("FORCE_TO_NON_PRIV_%02d = %08x\n",
+			  i, intel_register_read(&mmio_data, perf.slots[i]));
+}
+
+static bool in_whitelist(uint32_t reg)
+{
+	int i;
+
+	for (i = 0; i < perf.num_slots; i++) {
+		uint32_t fpriv = intel_register_read(&mmio_data, perf.slots[i]);
+
+		if ((fpriv & RING_FORCE_TO_NONPRIV_ADDRESS_MASK) == reg)
+			return true;
+	}
+
+	return false;
+}
+
+static void oa_regs_in_whitelist(bool are_present)
+{
+	int i;
+
+	if (are_present) {
+		for (i = 0; i < perf.num_wl; i++)
+			igt_assert(in_whitelist(perf.wl[i]));
+	} else {
+		for (i = 0; i < perf.num_wl; i++)
+			igt_assert(!in_whitelist(perf.wl[i]));
+	}
+}
+
+static void test_oa_regs_whitelist(int paranoid)
+{
+	uint64_t properties[] = {
+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
+		DRM_I915_PERF_PROP_OA_METRICS_SET, test_set->perf_oa_metrics_set,
+		DRM_I915_PERF_PROP_OA_FORMAT, test_set->perf_oa_format,
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
+
+	};
+	struct drm_i915_perf_open_param param = {
+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
+		.num_properties = sizeof(properties) / 16,
+		.properties_ptr = to_user_pointer(properties),
+	};
+	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", paranoid);
+	intel_register_access_init(&mmio_data, intel_get_pci_device(),
+				   0, drm_fd);
+	stream_fd = __perf_open(drm_fd, &param, false);
+
+	dump_whitelist("oa whitelisted");
+
+	/*
+	 * oa registers are whitelisted only if paranoid = 0. if so, make sure
+	 * that the registers are in the nonpriv slots. if not, make sure the
+	 * registers are NOT present in the nonpriv slots.
+	 */
+	if (paranoid)
+		oa_regs_in_whitelist(false);
+	else
+		oa_regs_in_whitelist(true);
+
+	__perf_close(stream_fd);
+
+	dump_whitelist("oa remove whitelist");
+
+	/*
+	 * after perf close, check that registers are removed from the nonpriv
+	 * slots
+	 */
+	oa_regs_in_whitelist(false);
+
+	intel_register_access_fini(&mmio_data);
+}
+
 static unsigned
 read_i915_module_ref(void)
 {
@@ -4889,23 +5268,6 @@ test_sysctl_defaults(void)
 	igt_assert_eq(max_freq, 100000);
 }
 
-static int i915_perf_revision(int fd)
-{
-	drm_i915_getparam_t gp;
-	int value = 1, ret;
-
-	gp.param = I915_PARAM_PERF_REVISION;
-	gp.value = &value;
-	ret = igt_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
-	if (ret == -1) {
-		/* If the param is missing, consider version 1. */
-		igt_assert_eq(errno, EINVAL);
-		return 1;
-	}
-
-	return value;
-}
-
 igt_main
 {
 	igt_fixture {
@@ -5119,6 +5481,31 @@ igt_main
 	igt_subtest("whitelisted-registers-userspace-config")
 		test_whitelisted_registers_userspace_config();
 
+
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(intel_gen(devid) > 8);
+			igt_require(i915_perf_revision(drm_fd) >= 6);
+			perf_init_whitelist();
+		}
+
+		igt_describe("Verify that OA registers are whitelisted for paranoid 0");
+		igt_subtest("oa-regs-whitelisted")
+			test_oa_regs_whitelist(0);
+
+		igt_describe("Verify that OA registers are not whitelisted for paranoid 1");
+		igt_subtest("oa-regs-not-whitelisted")
+			test_oa_regs_whitelist(1);
+
+		igt_describe("Verify reports triggered when perf_stream_paranoid is 0");
+		igt_subtest("triggered-oa-reports-paranoid-0")
+			test_triggered_oa_reports(0);
+
+		igt_describe("Verify reports not triggered when perf_stream_paranoid is 1");
+		igt_subtest("triggered-oa-reports-paranoid-1")
+			test_triggered_oa_reports(1);
+	}
+
 	igt_fixture {
 		/* leave sysctl options in their default state... */
 		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
-- 
2.20.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2021-08-30 19:33 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-03 20:07 [igt-dev] [PATCH 1/5] i915/perf: add tests for triggered OA reports Umesh Nerlige Ramappa
2021-08-03 20:07 ` [igt-dev] [PATCH 2/5] i915/perf: Add tests for mapped OA buffer Umesh Nerlige Ramappa
2021-08-23 21:31   ` Dixit, Ashutosh
2021-08-24 18:58     ` Umesh Nerlige Ramappa
2021-08-24 19:18       ` Dixit, Ashutosh
2021-08-03 20:07 ` [igt-dev] [PATCH 3/5] lib/i915/perf: Add new record for mmaped " Umesh Nerlige Ramappa
2021-08-03 20:07 ` [igt-dev] [PATCH 4/5] tools/i915-perf: Add mmapped OA buffer support to i915-perf-recorder Umesh Nerlige Ramappa
2021-08-24  1:05   ` Dixit, Ashutosh
2021-08-24 19:14     ` Umesh Nerlige Ramappa
2021-08-24  1:45   ` Dixit, Ashutosh
2021-08-26 23:57     ` Umesh Nerlige Ramappa
2021-08-24  3:50   ` Dixit, Ashutosh
2021-08-24 18:50     ` Umesh Nerlige Ramappa
2021-08-24 19:40       ` Dixit, Ashutosh
2021-08-24 20:03         ` Dixit, Ashutosh
2021-08-03 20:07 ` [igt-dev] [PATCH 5/5] tools/i915-perf: Add a command to trigger a report in OA buffer Umesh Nerlige Ramappa
2021-08-03 20:39 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [1/5] i915/perf: add tests for triggered OA reports Patchwork
2021-08-03 23:21 ` [igt-dev] ✗ GitLab.Pipeline: warning " Patchwork
2021-08-04 20:13 ` [igt-dev] ✓ Fi.CI.IGT: success " Patchwork
2021-08-30 19:33 [igt-dev] [PATCH 1/5] " Umesh Nerlige Ramappa

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.