All of lore.kernel.org
 help / color / mirror / Atom feed
* [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public
@ 2023-02-07 10:11 Janusz Krzysztofik
  2023-02-07 11:38 ` [igt-dev] ✗ Fi.CI.BAT: failure for " Patchwork
                   ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Janusz Krzysztofik @ 2023-02-07 10:11 UTC (permalink / raw)
  To: Kamil Konieczny; +Cc: igt-dev, Chris Wilson

We need new subtests that exercise interaction between i915 perf open/
close and other i915 subsystems from the point of view of those other
subsystems.  Allow other tests to reuse __perf_open/close() family of
functions, now inside i915/perf test, by moving (sharable parts of)
them to i915/perf library.

Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
---
 lib/i915/perf.c   | 130 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/i915/perf.h   |  15 ++++++
 lib/meson.build   |   1 +
 tests/i915/perf.c | 121 ++++++++++--------------------------------
 4 files changed, 174 insertions(+), 93 deletions(-)

diff --git a/lib/i915/perf.c b/lib/i915/perf.c
index 6c7a192558..e71d637eb5 100644
--- a/lib/i915/perf.c
+++ b/lib/i915/perf.c
@@ -39,7 +39,9 @@
 
 #include "i915_pciids.h"
 
+#include "igt_aux.h"
 #include "intel_chipset.h"
+#include "ioctl_wrappers.h"
 #include "perf.h"
 
 #include "i915_perf_metrics_hsw.h"
@@ -1008,3 +1010,131 @@ const char *intel_perf_read_report_reason(const struct intel_perf *perf,
 
 	return "unknown";
 }
+
+uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta)
+{
+	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
+}
+
+/* Returns: the largest OA exponent that will still result in a sampling period
+ * less than or equal to the given @period.
+ */
+int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period)
+{
+	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
+	 * would already represent a period of ~3 minutes so there's
+	 * really no need to consider higher exponents.
+	 */
+	for (int i = 0; i < 30; i++) {
+		uint64_t oa_period = i915_perf_timebase_scale(intel_perf, 2 << i);
+
+		if (oa_period > period)
+			return max(0, i - 1);
+	}
+
+	igt_assert(!"reached");
+	return -1;
+}
+
+struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid)
+{
+	struct intel_perf_metric_set *metric_set = NULL, *metric_set_iter;
+	const char *metric_set_name = NULL;
+
+	igt_assert_neq(devid, 0);
+
+	/*
+	 * We don't have a TestOa metric set for Haswell so use
+	 * RenderBasic
+	 */
+	if (IS_HASWELL(devid))
+		metric_set_name = "RenderBasic";
+	else
+		metric_set_name = "TestOa";
+
+	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
+		if (strcmp(metric_set_iter->symbol_name, metric_set_name) == 0) {
+			metric_set = metric_set_iter;
+			break;
+		}
+	}
+
+	return metric_set;
+}
+
+struct intel_perf *i915_perf_init_sys_info(int drm_fd)
+{
+	struct intel_perf *intel_perf;
+
+	intel_perf = intel_perf_for_fd(drm_fd);
+	if (!intel_perf)
+		return NULL;
+
+	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
+	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
+	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
+	igt_debug("timestamp_frequency = %"PRIu64"\n",
+		  intel_perf->devinfo.timestamp_frequency);
+	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
+
+	intel_perf_load_perf_configs(intel_perf, drm_fd);
+
+	return intel_perf;
+}
+
+int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd)
+{
+	int32_t pm_value = 0;
+	int ret;
+
+	ret = perf_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, param);
+
+	igt_assert(ret >= 0);
+	errno = 0;
+
+	if (pm_fd) {
+		*pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
+		igt_assert(*pm_fd >= 0);
+
+		igt_assert_eq(write(*pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
+	}
+
+	return ret;
+}
+
+int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd)
+{
+	struct intel_perf_metric_set *metric_set = i915_perf_default_set(intel_perf, devid);
+	uint64_t oa_exp = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
+	uint64_t properties[] = {
+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
+		DRM_I915_PERF_PROP_OA_METRICS_SET, 0,
+		DRM_I915_PERF_PROP_OA_FORMAT, 0,
+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp,
+	};
+	struct drm_i915_perf_open_param param = {
+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
+		.num_properties = sizeof(properties) / 16,
+		.properties_ptr = to_user_pointer(properties),
+	};
+
+	igt_assert(metric_set);
+	igt_assert(metric_set->perf_oa_metrics_set);
+	igt_assert(oa_exp >= 0);
+
+	igt_debug("%s metric set UUID = %s\n",
+		  metric_set->symbol_name,
+		  metric_set->hw_config_guid);
+
+	properties[3] = metric_set->perf_oa_metrics_set;
+	properties[5] = metric_set->perf_oa_format;
+
+	return i915_perf_open(drm_fd, &param, pm_fd);
+}
+
+void i915_perf_close(int stream_fd, int pm_fd)
+{
+	close(stream_fd);
+	if (pm_fd >= 0)
+		close(pm_fd);
+}
diff --git a/lib/i915/perf.h b/lib/i915/perf.h
index e6e60dc997..c9cd28be47 100644
--- a/lib/i915/perf.h
+++ b/lib/i915/perf.h
@@ -351,6 +351,21 @@ uint64_t intel_perf_read_record_timestamp_raw(const struct intel_perf *perf,
 const char *intel_perf_read_report_reason(const struct intel_perf *perf,
 					  const struct drm_i915_perf_record_header *record);
 
+uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta);
+
+int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period);
+
+struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid);
+
+struct intel_perf *i915_perf_init_sys_info(int drm_fd);
+
+struct drm_i915_perf_open_param;
+int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd);
+
+int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd);
+
+void i915_perf_close(int drm_fd, int pm_fd);
+
 #ifdef __cplusplus
 };
 #endif
diff --git a/lib/meson.build b/lib/meson.build
index d49b78ca1a..e79b31090b 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -258,6 +258,7 @@ lib_igt_drm_fdinfo = declare_dependency(link_with : lib_igt_drm_fdinfo_build,
 				  include_directories : inc)
 i915_perf_files = [
   'igt_list.c',
+  'igt_tools_stub.c',
   'i915/perf.c',
   'i915/perf_data_reader.c',
 ]
diff --git a/tests/i915/perf.c b/tests/i915/perf.c
index dd1f1ac399..a3f59d143b 100644
--- a/tests/i915/perf.c
+++ b/tests/i915/perf.c
@@ -287,21 +287,16 @@ pretty_print_oa_period(uint64_t oa_period_ns)
 static void
 __perf_close(int fd)
 {
-	close(fd);
+	i915_perf_close(fd, pm_fd);
 	stream_fd = -1;
 
-	if (pm_fd >= 0) {
-		close(pm_fd);
+	if (pm_fd >= 0)
 		pm_fd = -1;
-	}
 }
 
 static int
 __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
 {
-	int ret;
-	int32_t pm_value = 0;
-
 	if (stream_fd >= 0)
 		__perf_close(stream_fd);
 	if (pm_fd >= 0) {
@@ -309,19 +304,7 @@ __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
 		pm_fd = -1;
 	}
 
-	ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param);
-
-	igt_assert(ret >= 0);
-	errno = 0;
-
-	if (prevent_pm) {
-		pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
-		igt_assert(pm_fd >= 0);
-
-		igt_assert_eq(write(pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
-	}
-
-	return ret;
+	return i915_perf_open(fd, param, prevent_pm ? &pm_fd : NULL);
 }
 
 static int
@@ -465,33 +448,6 @@ cs_timebase_scale(uint32_t u32_delta)
 	return ((uint64_t)u32_delta * NSEC_PER_SEC) / cs_timestamp_frequency(drm_fd);
 }
 
-static uint64_t
-timebase_scale(uint32_t u32_delta)
-{
-	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
-}
-
-/* Returns: the largest OA exponent that will still result in a sampling period
- * less than or equal to the given @period.
- */
-static int
-max_oa_exponent_for_period_lte(uint64_t period)
-{
-	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
-	 * would already represent a period of ~3 minutes so there's
-	 * really no need to consider higher exponents.
-	 */
-	for (int i = 0; i < 30; i++) {
-		uint64_t oa_period = timebase_scale(2 << i);
-
-		if (oa_period > period)
-			return max(0, i - 1);
-	}
-
-	igt_assert(!"reached");
-	return -1;
-}
-
 /* Return: the largest OA exponent that will still result in a sampling
  * frequency greater than the given @frequency.
  */
@@ -502,7 +458,7 @@ max_oa_exponent_for_freq_gt(uint64_t frequency)
 
 	igt_assert_neq(period, 0);
 
-	return max_oa_exponent_for_period_lte(period - 1);
+	return i915_perf_max_oa_exponent_for_period_lte(intel_perf, period - 1);
 }
 
 static uint64_t
@@ -626,7 +582,7 @@ hsw_sanity_check_render_basic_reports(const uint32_t *oa_report0,
 				      const uint32_t *oa_report1,
 				      enum drm_i915_oa_format fmt)
 {
-	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
+	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
 	uint32_t clock_delta;
 	uint32_t max_delta;
 	struct oa_format format = get_oa_format(fmt);
@@ -832,7 +788,7 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
 				  enum drm_i915_oa_format fmt)
 {
 	struct oa_format format = get_oa_format(fmt);
-	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
+	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
 	uint32_t ticks0 = read_report_ticks(oa_report0, fmt);
 	uint32_t ticks1 = read_report_ticks(oa_report1, fmt);
 	uint32_t clock_delta = ticks1 - ticks0;
@@ -950,43 +906,22 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
 static bool
 init_sys_info(void)
 {
-	const char *test_set_name = NULL;
-	struct intel_perf_metric_set *metric_set_iter;
-
 	igt_assert_neq(devid, 0);
 
-	intel_perf = intel_perf_for_fd(drm_fd);
+	intel_perf = i915_perf_init_sys_info(drm_fd);
 	igt_require(intel_perf);
 
-	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
-	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
-	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
-	igt_debug("timestamp_frequency = %"PRIu64"\n",
-		  intel_perf->devinfo.timestamp_frequency);
-	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
-
-	/* We don't have a TestOa metric set for Haswell so use
-	 * RenderBasic
-	 */
 	if (IS_HASWELL(devid)) {
-		test_set_name = "RenderBasic";
 		read_report_ticks = hsw_read_report_ticks;
 		sanity_check_reports = hsw_sanity_check_render_basic_reports;
 		undefined_a_counters = hsw_undefined_a_counters;
 	} else {
-		test_set_name = "TestOa";
 		read_report_ticks = gen8_read_report_ticks;
 		sanity_check_reports = gen8_sanity_check_test_oa_reports;
 		undefined_a_counters = gen8_undefined_a_counters;
 	}
 
-	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
-		if (strcmp(metric_set_iter->symbol_name, test_set_name) == 0) {
-			test_set = metric_set_iter;
-			break;
-		}
-	}
-
+	test_set = i915_perf_default_set(intel_perf, devid);
 	if (!test_set)
 		return false;
 
@@ -994,14 +929,12 @@ init_sys_info(void)
 		  test_set->symbol_name,
 		  test_set->hw_config_guid);
 
-	intel_perf_load_perf_configs(intel_perf, drm_fd);
-
 	if (test_set->perf_oa_metrics_set == 0) {
 		igt_debug("Unable to load configurations\n");
 		return false;
 	}
 
-	oa_exp_1_millisec = max_oa_exponent_for_period_lte(1000000);
+	oa_exp_1_millisec = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
 
 	return true;
 }
@@ -1911,7 +1844,7 @@ test_low_oa_exponent_permissions(void)
 
 	igt_waitchildren();
 
-	oa_period = timebase_scale(2 << ok_exponent);
+	oa_period = i915_perf_timebase_scale(intel_perf, 2 << ok_exponent);
 	oa_freq = NSEC_PER_SEC / oa_period;
 	write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", oa_freq - 100);
 
@@ -2003,7 +1936,7 @@ get_time(void)
 static void
 test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
 {
-	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
 	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
 	uint64_t properties[] = {
 		/* Include OA reports in samples */
@@ -2162,7 +2095,7 @@ test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ke
 static void
 test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
 {
-	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
 	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
 	uint64_t properties[] = {
 		/* Include OA reports in samples */
@@ -2358,7 +2291,7 @@ test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ker
 
 static void test_polling_small_buf(void)
 {
-	int oa_exponent = max_oa_exponent_for_period_lte(40 * 1000); /* 40us */
+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 40 * 1000); /* 40us */
 	uint64_t properties[] = {
 		/* Include OA reports in samples */
 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
@@ -2461,7 +2394,7 @@ num_valid_reports_captured(struct drm_i915_perf_open_param *param,
 static void
 gen12_test_oa_tlb_invalidate(void)
 {
-	int oa_exponent = max_oa_exponent_for_period_lte(30000000);
+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 30000000);
 	uint64_t properties[] = {
 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
 
@@ -2503,7 +2436,7 @@ static void
 test_buffer_fill(void)
 {
 	/* ~5 micro second period */
-	int oa_exponent = max_oa_exponent_for_period_lte(5000);
+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
 	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
 	uint64_t properties[] = {
 		/* Include OA reports in samples */
@@ -2651,7 +2584,7 @@ static void
 test_non_zero_reason(void)
 {
 	/* ~20 micro second period */
-	int oa_exponent = max_oa_exponent_for_period_lte(20000);
+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 20000);
 	uint64_t properties[] = {
 		/* Include OA reports in samples */
 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
@@ -2734,7 +2667,7 @@ static void
 test_enable_disable(void)
 {
 	/* ~5 micro second period */
-	int oa_exponent = max_oa_exponent_for_period_lte(5000);
+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
 	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
 	uint64_t properties[] = {
 		/* Include OA reports in samples */
@@ -2885,7 +2818,7 @@ test_enable_disable(void)
 static void
 test_short_reads(void)
 {
-	int oa_exponent = max_oa_exponent_for_period_lte(5000);
+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
 	uint64_t properties[] = {
 		/* Include OA reports in samples */
 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
@@ -3447,8 +3380,8 @@ hsw_test_single_ctx_counters(void)
 
 		/* sanity check that we can pass the delta to timebase_scale */
 		igt_assert(delta_ts64 < UINT32_MAX);
-		delta_oa32_ns = timebase_scale(delta_oa32);
-		delta_ts64_ns = timebase_scale(delta_ts64);
+		delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
+		delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
 
 		igt_debug("ts32 delta = %u, = %uns\n",
 			  delta_oa32, (unsigned)delta_oa32_ns);
@@ -3498,7 +3431,7 @@ hsw_test_single_ctx_counters(void)
 static void
 gen8_test_single_ctx_render_target_writes_a_counter(void)
 {
-	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
 	uint64_t properties[] = {
 		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
 
@@ -3700,8 +3633,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
 
 			/* sanity check that we can pass the delta to timebase_scale */
 			igt_assert(delta_ts64 < UINT32_MAX);
-			delta_oa32_ns = timebase_scale(delta_oa32);
-			delta_ts64_ns = timebase_scale(delta_ts64);
+			delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
+			delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
 
 			igt_debug("oa32 delta = %u, = %uns\n",
 				  delta_oa32, (unsigned)delta_oa32_ns);
@@ -3783,7 +3716,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
 				{
 					uint32_t time_delta = report[1] - report0_32[1];
 
-					if (timebase_scale(time_delta) > 1000000000) {
+					if (i915_perf_timebase_scale(intel_perf,
+								     time_delta) > 1000000000) {
 						skip_reason = "prior first mi-rpc";
 					}
 				}
@@ -3791,7 +3725,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
 				{
 					uint32_t time_delta = report[1] - report1_32[1];
 
-					if (timebase_scale(time_delta) <= 1000000000) {
+					if (i915_perf_timebase_scale(intel_perf,
+								     time_delta) <= 1000000000) {
 						igt_debug("    comes after last MI_RPC (%u)\n",
 							  report1_32[1]);
 						report = report1_32;
@@ -4164,7 +4099,7 @@ static void gen12_single_ctx_helper(void)
 
 	/* Sanity check that we can pass the delta to timebase_scale */
 	igt_assert(delta_ts64 < UINT32_MAX);
-	delta_oa32_ns = timebase_scale(delta_oa32);
+	delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
 	delta_ts64_ns = cs_timebase_scale(delta_ts64);
 
 	igt_debug("oa32 delta = %u, = %uns\n",
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [igt-dev] ✗ Fi.CI.BAT: failure for i915/perf: Make __perf_open() and friends public
  2023-02-07 10:11 [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public Janusz Krzysztofik
@ 2023-02-07 11:38 ` Patchwork
  2023-02-07 18:16 ` [igt-dev] [PATCH i-g-t] " Kamil Konieczny
  2023-02-07 19:25 ` Umesh Nerlige Ramappa
  2 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2023-02-07 11:38 UTC (permalink / raw)
  To: Janusz Krzysztofik; +Cc: igt-dev

[-- Attachment #1: Type: text/plain, Size: 4723 bytes --]

== Series Details ==

Series: i915/perf: Make __perf_open() and friends public
URL   : https://patchwork.freedesktop.org/series/113733/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_12708 -> IGTPW_8456
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with IGTPW_8456 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in IGTPW_8456, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8456/index.html

Participating hosts (36 -> 36)
------------------------------

  Additional (1): bat-atsm-1 
  Missing    (1): fi-snb-2520m 

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in IGTPW_8456:

### IGT changes ###

#### Possible regressions ####

  * igt@dmabuf@all-tests@dma_fence:
    - fi-cfl-8109u:       [PASS][1] -> [DMESG-FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12708/fi-cfl-8109u/igt@dmabuf@all-tests@dma_fence.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8456/fi-cfl-8109u/igt@dmabuf@all-tests@dma_fence.html

  * igt@dmabuf@all-tests@sanitycheck:
    - fi-cfl-8109u:       [PASS][3] -> [ABORT][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12708/fi-cfl-8109u/igt@dmabuf@all-tests@sanitycheck.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8456/fi-cfl-8109u/igt@dmabuf@all-tests@sanitycheck.html

  
Known issues
------------

  Here are the changes found in IGTPW_8456 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_gttfill@basic:
    - fi-pnv-d510:        [PASS][5] -> [FAIL][6] ([i915#7229])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12708/fi-pnv-d510/igt@gem_exec_gttfill@basic.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8456/fi-pnv-d510/igt@gem_exec_gttfill@basic.html

  
#### Possible fixes ####

  * igt@i915_selftest@live@slpc:
    - {bat-rpls-1}:       [DMESG-FAIL][7] ([i915#6367]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12708/bat-rpls-1/igt@i915_selftest@live@slpc.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8456/bat-rpls-1/igt@i915_selftest@live@slpc.html

  * igt@i915_selftest@live@workarounds:
    - {bat-dg2-11}:       [DMESG-WARN][9] -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12708/bat-dg2-11/igt@i915_selftest@live@workarounds.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8456/bat-dg2-11/igt@i915_selftest@live@workarounds.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor@atomic-transitions:
    - fi-bsw-n3050:       [FAIL][11] ([i915#6298]) -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12708/fi-bsw-n3050/igt@kms_cursor_legacy@basic-busy-flip-before-cursor@atomic-transitions.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8456/fi-bsw-n3050/igt@kms_cursor_legacy@basic-busy-flip-before-cursor@atomic-transitions.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [i915#2582]: https://gitlab.freedesktop.org/drm/intel/issues/2582
  [i915#4077]: https://gitlab.freedesktop.org/drm/intel/issues/4077
  [i915#4079]: https://gitlab.freedesktop.org/drm/intel/issues/4079
  [i915#4083]: https://gitlab.freedesktop.org/drm/intel/issues/4083
  [i915#4983]: https://gitlab.freedesktop.org/drm/intel/issues/4983
  [i915#5251]: https://gitlab.freedesktop.org/drm/intel/issues/5251
  [i915#6298]: https://gitlab.freedesktop.org/drm/intel/issues/6298
  [i915#6311]: https://gitlab.freedesktop.org/drm/intel/issues/6311
  [i915#6367]: https://gitlab.freedesktop.org/drm/intel/issues/6367
  [i915#7229]: https://gitlab.freedesktop.org/drm/intel/issues/7229
  [i915#7359]: https://gitlab.freedesktop.org/drm/intel/issues/7359
  [i915#8060]: https://gitlab.freedesktop.org/drm/intel/issues/8060
  [i915#8062]: https://gitlab.freedesktop.org/drm/intel/issues/8062


Build changes
-------------

  * CI: CI-20190529 -> None
  * IGT: IGT_7153 -> IGTPW_8456

  CI-20190529: 20190529
  CI_DRM_12708: e9426d9d1eeb72f84725043dd2a9e073d9a6f1d7 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_8456: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8456/index.html
  IGT_7153: f47f859f13376958a2bd199423b1f0ff53dddbe0 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_8456/index.html

[-- Attachment #2: Type: text/html, Size: 4817 bytes --]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public
  2023-02-07 10:11 [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public Janusz Krzysztofik
  2023-02-07 11:38 ` [igt-dev] ✗ Fi.CI.BAT: failure for " Patchwork
@ 2023-02-07 18:16 ` Kamil Konieczny
  2023-02-07 19:45   ` Janusz Krzysztofik
  2023-02-07 19:25 ` Umesh Nerlige Ramappa
  2 siblings, 1 reply; 18+ messages in thread
From: Kamil Konieczny @ 2023-02-07 18:16 UTC (permalink / raw)
  To: igt-dev; +Cc: Chris Wilson

Hi Janusz,

On 2023-02-07 at 11:11:21 +0100, Janusz Krzysztofik wrote:
> We need new subtests that exercise interaction between i915 perf open/
----------------------------------------------------------------- ^
put it in one line as open/close but imho you can omit that two names,
main focus is on interaction, these two functions are just usefull
now.

> close and other i915 subsystems from the point of view of those other
> subsystems.  Allow other tests to reuse __perf_open/close() family of
> functions, now inside i915/perf test, by moving (sharable parts of)
> them to i915/perf library.
> 
> Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
> ---
>  lib/i915/perf.c   | 130 ++++++++++++++++++++++++++++++++++++++++++++++
>  lib/i915/perf.h   |  15 ++++++
>  lib/meson.build   |   1 +
>  tests/i915/perf.c | 121 ++++++++++--------------------------------
>  4 files changed, 174 insertions(+), 93 deletions(-)
> 
> diff --git a/lib/i915/perf.c b/lib/i915/perf.c
> index 6c7a192558..e71d637eb5 100644
> --- a/lib/i915/perf.c
> +++ b/lib/i915/perf.c
> @@ -39,7 +39,9 @@
>  
>  #include "i915_pciids.h"
>  
> +#include "igt_aux.h"
>  #include "intel_chipset.h"
> +#include "ioctl_wrappers.h"
>  #include "perf.h"
>  
>  #include "i915_perf_metrics_hsw.h"
> @@ -1008,3 +1010,131 @@ const char *intel_perf_read_report_reason(const struct intel_perf *perf,
>  
>  	return "unknown";
>  }

All public functions need to be described (here and below),
maybe someone who worked on perf test could help ?
+cc Lionel Landwerlin <lionel.g.landwerlin@intel.com>

> +
> +uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta)
> +{
> +	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
> +}
> +

Description starts with /** and describes also parameters.

> +/* Returns: the largest OA exponent that will still result in a sampling period
> + * less than or equal to the given @period.
> + */
> +int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period)
> +{
> +	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
> +	 * would already represent a period of ~3 minutes so there's
> +	 * really no need to consider higher exponents.
> +	 */
> +	for (int i = 0; i < 30; i++) {
> +		uint64_t oa_period = i915_perf_timebase_scale(intel_perf, 2 << i);
> +
> +		if (oa_period > period)
> +			return max(0, i - 1);
> +	}
> +
> +	igt_assert(!"reached");

imho asserts in lib functions should be limited to very few cases.

> +	return -1;
> +}
> +
> +struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid)
> +{
> +	struct intel_perf_metric_set *metric_set = NULL, *metric_set_iter;
> +	const char *metric_set_name = NULL;
> +
> +	igt_assert_neq(devid, 0);
> +
> +	/*
> +	 * We don't have a TestOa metric set for Haswell so use
> +	 * RenderBasic
> +	 */
> +	if (IS_HASWELL(devid))
> +		metric_set_name = "RenderBasic";
> +	else
> +		metric_set_name = "TestOa";
> +
> +	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
> +		if (strcmp(metric_set_iter->symbol_name, metric_set_name) == 0) {
> +			metric_set = metric_set_iter;
> +			break;
> +		}
> +	}
> +
> +	return metric_set;
> +}
> +
> +struct intel_perf *i915_perf_init_sys_info(int drm_fd)
> +{
> +	struct intel_perf *intel_perf;
> +
> +	intel_perf = intel_perf_for_fd(drm_fd);
> +	if (!intel_perf)
> +		return NULL;
> +
> +	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
> +	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
> +	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
> +	igt_debug("timestamp_frequency = %"PRIu64"\n",
> +		  intel_perf->devinfo.timestamp_frequency);
> +	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
> +
> +	intel_perf_load_perf_configs(intel_perf, drm_fd);
> +
> +	return intel_perf;
> +}
> +
> +int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd)
> +{
> +	int32_t pm_value = 0;
> +	int ret;
> +
> +	ret = perf_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, param);
> +
> +	igt_assert(ret >= 0);
------- ^
imho here we should return error if ret < 0
Checks for errors should be done in tests.

> +	errno = 0;
> +
> +	if (pm_fd) {
----------- !
But imho this should done by separate function, like
i915_perf_disable

Regards,
Kamil

> +		*pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
> +		igt_assert(*pm_fd >= 0);
> +
> +		igt_assert_eq(write(*pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
> +	}
> +
> +	return ret;
> +}
> +
> +int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd)
> +{
> +	struct intel_perf_metric_set *metric_set = i915_perf_default_set(intel_perf, devid);
> +	uint64_t oa_exp = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> +	uint64_t properties[] = {
> +		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> +		DRM_I915_PERF_PROP_OA_METRICS_SET, 0,
> +		DRM_I915_PERF_PROP_OA_FORMAT, 0,
> +		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp,
> +	};
> +	struct drm_i915_perf_open_param param = {
> +		.flags = I915_PERF_FLAG_FD_CLOEXEC,
> +		.num_properties = sizeof(properties) / 16,
> +		.properties_ptr = to_user_pointer(properties),
> +	};
> +
> +	igt_assert(metric_set);
> +	igt_assert(metric_set->perf_oa_metrics_set);
> +	igt_assert(oa_exp >= 0);
> +
> +	igt_debug("%s metric set UUID = %s\n",
> +		  metric_set->symbol_name,
> +		  metric_set->hw_config_guid);
> +
> +	properties[3] = metric_set->perf_oa_metrics_set;
> +	properties[5] = metric_set->perf_oa_format;
> +
> +	return i915_perf_open(drm_fd, &param, pm_fd);
> +}
> +
> +void i915_perf_close(int stream_fd, int pm_fd)
> +{
> +	close(stream_fd);
> +	if (pm_fd >= 0)
> +		close(pm_fd);
> +}
> diff --git a/lib/i915/perf.h b/lib/i915/perf.h
> index e6e60dc997..c9cd28be47 100644
> --- a/lib/i915/perf.h
> +++ b/lib/i915/perf.h
> @@ -351,6 +351,21 @@ uint64_t intel_perf_read_record_timestamp_raw(const struct intel_perf *perf,
>  const char *intel_perf_read_report_reason(const struct intel_perf *perf,
>  					  const struct drm_i915_perf_record_header *record);
>  
> +uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta);
> +
> +int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period);
> +
> +struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid);
> +
> +struct intel_perf *i915_perf_init_sys_info(int drm_fd);
> +
> +struct drm_i915_perf_open_param;
> +int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd);
> +
> +int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd);
> +
> +void i915_perf_close(int drm_fd, int pm_fd);
> +
>  #ifdef __cplusplus
>  };
>  #endif
> diff --git a/lib/meson.build b/lib/meson.build
> index d49b78ca1a..e79b31090b 100644
> --- a/lib/meson.build
> +++ b/lib/meson.build
> @@ -258,6 +258,7 @@ lib_igt_drm_fdinfo = declare_dependency(link_with : lib_igt_drm_fdinfo_build,
>  				  include_directories : inc)
>  i915_perf_files = [
>    'igt_list.c',
> +  'igt_tools_stub.c',
>    'i915/perf.c',
>    'i915/perf_data_reader.c',
>  ]
> diff --git a/tests/i915/perf.c b/tests/i915/perf.c
> index dd1f1ac399..a3f59d143b 100644
> --- a/tests/i915/perf.c
> +++ b/tests/i915/perf.c
> @@ -287,21 +287,16 @@ pretty_print_oa_period(uint64_t oa_period_ns)
>  static void
>  __perf_close(int fd)
>  {
> -	close(fd);
> +	i915_perf_close(fd, pm_fd);
>  	stream_fd = -1;
>  
> -	if (pm_fd >= 0) {
> -		close(pm_fd);
> +	if (pm_fd >= 0)
>  		pm_fd = -1;
> -	}
>  }
>  
>  static int
>  __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
>  {
> -	int ret;
> -	int32_t pm_value = 0;
> -
>  	if (stream_fd >= 0)
>  		__perf_close(stream_fd);
>  	if (pm_fd >= 0) {
> @@ -309,19 +304,7 @@ __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
>  		pm_fd = -1;
>  	}
>  
> -	ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param);
> -
> -	igt_assert(ret >= 0);
> -	errno = 0;
> -
> -	if (prevent_pm) {
> -		pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
> -		igt_assert(pm_fd >= 0);
> -
> -		igt_assert_eq(write(pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
> -	}
> -
> -	return ret;
> +	return i915_perf_open(fd, param, prevent_pm ? &pm_fd : NULL);
>  }
>  
>  static int
> @@ -465,33 +448,6 @@ cs_timebase_scale(uint32_t u32_delta)
>  	return ((uint64_t)u32_delta * NSEC_PER_SEC) / cs_timestamp_frequency(drm_fd);
>  }
>  
> -static uint64_t
> -timebase_scale(uint32_t u32_delta)
> -{
> -	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
> -}
> -
> -/* Returns: the largest OA exponent that will still result in a sampling period
> - * less than or equal to the given @period.
> - */
> -static int
> -max_oa_exponent_for_period_lte(uint64_t period)
> -{
> -	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
> -	 * would already represent a period of ~3 minutes so there's
> -	 * really no need to consider higher exponents.
> -	 */
> -	for (int i = 0; i < 30; i++) {
> -		uint64_t oa_period = timebase_scale(2 << i);
> -
> -		if (oa_period > period)
> -			return max(0, i - 1);
> -	}
> -
> -	igt_assert(!"reached");
> -	return -1;
> -}
> -
>  /* Return: the largest OA exponent that will still result in a sampling
>   * frequency greater than the given @frequency.
>   */
> @@ -502,7 +458,7 @@ max_oa_exponent_for_freq_gt(uint64_t frequency)
>  
>  	igt_assert_neq(period, 0);
>  
> -	return max_oa_exponent_for_period_lte(period - 1);
> +	return i915_perf_max_oa_exponent_for_period_lte(intel_perf, period - 1);
>  }
>  
>  static uint64_t
> @@ -626,7 +582,7 @@ hsw_sanity_check_render_basic_reports(const uint32_t *oa_report0,
>  				      const uint32_t *oa_report1,
>  				      enum drm_i915_oa_format fmt)
>  {
> -	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
> +	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
>  	uint32_t clock_delta;
>  	uint32_t max_delta;
>  	struct oa_format format = get_oa_format(fmt);
> @@ -832,7 +788,7 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
>  				  enum drm_i915_oa_format fmt)
>  {
>  	struct oa_format format = get_oa_format(fmt);
> -	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
> +	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
>  	uint32_t ticks0 = read_report_ticks(oa_report0, fmt);
>  	uint32_t ticks1 = read_report_ticks(oa_report1, fmt);
>  	uint32_t clock_delta = ticks1 - ticks0;
> @@ -950,43 +906,22 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
>  static bool
>  init_sys_info(void)
>  {
> -	const char *test_set_name = NULL;
> -	struct intel_perf_metric_set *metric_set_iter;
> -
>  	igt_assert_neq(devid, 0);
>  
> -	intel_perf = intel_perf_for_fd(drm_fd);
> +	intel_perf = i915_perf_init_sys_info(drm_fd);
>  	igt_require(intel_perf);
>  
> -	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
> -	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
> -	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
> -	igt_debug("timestamp_frequency = %"PRIu64"\n",
> -		  intel_perf->devinfo.timestamp_frequency);
> -	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
> -
> -	/* We don't have a TestOa metric set for Haswell so use
> -	 * RenderBasic
> -	 */
>  	if (IS_HASWELL(devid)) {
> -		test_set_name = "RenderBasic";
>  		read_report_ticks = hsw_read_report_ticks;
>  		sanity_check_reports = hsw_sanity_check_render_basic_reports;
>  		undefined_a_counters = hsw_undefined_a_counters;
>  	} else {
> -		test_set_name = "TestOa";
>  		read_report_ticks = gen8_read_report_ticks;
>  		sanity_check_reports = gen8_sanity_check_test_oa_reports;
>  		undefined_a_counters = gen8_undefined_a_counters;
>  	}
>  
> -	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
> -		if (strcmp(metric_set_iter->symbol_name, test_set_name) == 0) {
> -			test_set = metric_set_iter;
> -			break;
> -		}
> -	}
> -
> +	test_set = i915_perf_default_set(intel_perf, devid);
>  	if (!test_set)
>  		return false;
>  
> @@ -994,14 +929,12 @@ init_sys_info(void)
>  		  test_set->symbol_name,
>  		  test_set->hw_config_guid);
>  
> -	intel_perf_load_perf_configs(intel_perf, drm_fd);
> -
>  	if (test_set->perf_oa_metrics_set == 0) {
>  		igt_debug("Unable to load configurations\n");
>  		return false;
>  	}
>  
> -	oa_exp_1_millisec = max_oa_exponent_for_period_lte(1000000);
> +	oa_exp_1_millisec = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
>  
>  	return true;
>  }
> @@ -1911,7 +1844,7 @@ test_low_oa_exponent_permissions(void)
>  
>  	igt_waitchildren();
>  
> -	oa_period = timebase_scale(2 << ok_exponent);
> +	oa_period = i915_perf_timebase_scale(intel_perf, 2 << ok_exponent);
>  	oa_freq = NSEC_PER_SEC / oa_period;
>  	write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", oa_freq - 100);
>  
> @@ -2003,7 +1936,7 @@ get_time(void)
>  static void
>  test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
>  {
> -	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
> +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
>  	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
>  	uint64_t properties[] = {
>  		/* Include OA reports in samples */
> @@ -2162,7 +2095,7 @@ test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ke
>  static void
>  test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
>  {
> -	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
> +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
>  	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
>  	uint64_t properties[] = {
>  		/* Include OA reports in samples */
> @@ -2358,7 +2291,7 @@ test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ker
>  
>  static void test_polling_small_buf(void)
>  {
> -	int oa_exponent = max_oa_exponent_for_period_lte(40 * 1000); /* 40us */
> +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 40 * 1000); /* 40us */
>  	uint64_t properties[] = {
>  		/* Include OA reports in samples */
>  		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> @@ -2461,7 +2394,7 @@ num_valid_reports_captured(struct drm_i915_perf_open_param *param,
>  static void
>  gen12_test_oa_tlb_invalidate(void)
>  {
> -	int oa_exponent = max_oa_exponent_for_period_lte(30000000);
> +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 30000000);
>  	uint64_t properties[] = {
>  		DRM_I915_PERF_PROP_SAMPLE_OA, true,
>  
> @@ -2503,7 +2436,7 @@ static void
>  test_buffer_fill(void)
>  {
>  	/* ~5 micro second period */
> -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
>  	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
>  	uint64_t properties[] = {
>  		/* Include OA reports in samples */
> @@ -2651,7 +2584,7 @@ static void
>  test_non_zero_reason(void)
>  {
>  	/* ~20 micro second period */
> -	int oa_exponent = max_oa_exponent_for_period_lte(20000);
> +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 20000);
>  	uint64_t properties[] = {
>  		/* Include OA reports in samples */
>  		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> @@ -2734,7 +2667,7 @@ static void
>  test_enable_disable(void)
>  {
>  	/* ~5 micro second period */
> -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
>  	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
>  	uint64_t properties[] = {
>  		/* Include OA reports in samples */
> @@ -2885,7 +2818,7 @@ test_enable_disable(void)
>  static void
>  test_short_reads(void)
>  {
> -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
>  	uint64_t properties[] = {
>  		/* Include OA reports in samples */
>  		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> @@ -3447,8 +3380,8 @@ hsw_test_single_ctx_counters(void)
>  
>  		/* sanity check that we can pass the delta to timebase_scale */
>  		igt_assert(delta_ts64 < UINT32_MAX);
> -		delta_oa32_ns = timebase_scale(delta_oa32);
> -		delta_ts64_ns = timebase_scale(delta_ts64);
> +		delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> +		delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
>  
>  		igt_debug("ts32 delta = %u, = %uns\n",
>  			  delta_oa32, (unsigned)delta_oa32_ns);
> @@ -3498,7 +3431,7 @@ hsw_test_single_ctx_counters(void)
>  static void
>  gen8_test_single_ctx_render_target_writes_a_counter(void)
>  {
> -	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
> +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
>  	uint64_t properties[] = {
>  		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
>  
> @@ -3700,8 +3633,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
>  
>  			/* sanity check that we can pass the delta to timebase_scale */
>  			igt_assert(delta_ts64 < UINT32_MAX);
> -			delta_oa32_ns = timebase_scale(delta_oa32);
> -			delta_ts64_ns = timebase_scale(delta_ts64);
> +			delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> +			delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
>  
>  			igt_debug("oa32 delta = %u, = %uns\n",
>  				  delta_oa32, (unsigned)delta_oa32_ns);
> @@ -3783,7 +3716,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
>  				{
>  					uint32_t time_delta = report[1] - report0_32[1];
>  
> -					if (timebase_scale(time_delta) > 1000000000) {
> +					if (i915_perf_timebase_scale(intel_perf,
> +								     time_delta) > 1000000000) {
>  						skip_reason = "prior first mi-rpc";
>  					}
>  				}
> @@ -3791,7 +3725,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
>  				{
>  					uint32_t time_delta = report[1] - report1_32[1];
>  
> -					if (timebase_scale(time_delta) <= 1000000000) {
> +					if (i915_perf_timebase_scale(intel_perf,
> +								     time_delta) <= 1000000000) {
>  						igt_debug("    comes after last MI_RPC (%u)\n",
>  							  report1_32[1]);
>  						report = report1_32;
> @@ -4164,7 +4099,7 @@ static void gen12_single_ctx_helper(void)
>  
>  	/* Sanity check that we can pass the delta to timebase_scale */
>  	igt_assert(delta_ts64 < UINT32_MAX);
> -	delta_oa32_ns = timebase_scale(delta_oa32);
> +	delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
>  	delta_ts64_ns = cs_timebase_scale(delta_ts64);
>  
>  	igt_debug("oa32 delta = %u, = %uns\n",
> -- 
> 2.25.1
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public
  2023-02-07 10:11 [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public Janusz Krzysztofik
  2023-02-07 11:38 ` [igt-dev] ✗ Fi.CI.BAT: failure for " Patchwork
  2023-02-07 18:16 ` [igt-dev] [PATCH i-g-t] " Kamil Konieczny
@ 2023-02-07 19:25 ` Umesh Nerlige Ramappa
  2023-02-07 19:33   ` Umesh Nerlige Ramappa
  2 siblings, 1 reply; 18+ messages in thread
From: Umesh Nerlige Ramappa @ 2023-02-07 19:25 UTC (permalink / raw)
  To: Janusz Krzysztofik; +Cc: igt-dev, Chris Wilson

I wouldn't do this. Please keep the changes local to the specific test 
that you implemented in your first rev. While it is a good idea to have 
the some of the perf capabilities in the library, this is way too much 
churn to implement a specific test for the original failure. Unless 
multiple IGT subsytems area already dependent on perf APIs to implement 
multiple tests, let's not do this.

Thanks,
Umesh

On Tue, Feb 07, 2023 at 11:11:21AM +0100, Janusz Krzysztofik wrote:
>We need new subtests that exercise interaction between i915 perf open/
>close and other i915 subsystems from the point of view of those other
>subsystems.  Allow other tests to reuse __perf_open/close() family of
>functions, now inside i915/perf test, by moving (sharable parts of)
>them to i915/perf library.
>
>Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
>---
> lib/i915/perf.c   | 130 ++++++++++++++++++++++++++++++++++++++++++++++
> lib/i915/perf.h   |  15 ++++++
> lib/meson.build   |   1 +
> tests/i915/perf.c | 121 ++++++++++--------------------------------
> 4 files changed, 174 insertions(+), 93 deletions(-)
>
>diff --git a/lib/i915/perf.c b/lib/i915/perf.c
>index 6c7a192558..e71d637eb5 100644
>--- a/lib/i915/perf.c
>+++ b/lib/i915/perf.c
>@@ -39,7 +39,9 @@
>
> #include "i915_pciids.h"
>
>+#include "igt_aux.h"
> #include "intel_chipset.h"
>+#include "ioctl_wrappers.h"
> #include "perf.h"
>
> #include "i915_perf_metrics_hsw.h"
>@@ -1008,3 +1010,131 @@ const char *intel_perf_read_report_reason(const struct intel_perf *perf,
>
> 	return "unknown";
> }
>+
>+uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta)
>+{
>+	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
>+}
>+
>+/* Returns: the largest OA exponent that will still result in a sampling period
>+ * less than or equal to the given @period.
>+ */
>+int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period)
>+{
>+	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
>+	 * would already represent a period of ~3 minutes so there's
>+	 * really no need to consider higher exponents.
>+	 */
>+	for (int i = 0; i < 30; i++) {
>+		uint64_t oa_period = i915_perf_timebase_scale(intel_perf, 2 << i);
>+
>+		if (oa_period > period)
>+			return max(0, i - 1);
>+	}
>+
>+	igt_assert(!"reached");
>+	return -1;
>+}
>+
>+struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid)
>+{
>+	struct intel_perf_metric_set *metric_set = NULL, *metric_set_iter;
>+	const char *metric_set_name = NULL;
>+
>+	igt_assert_neq(devid, 0);
>+
>+	/*
>+	 * We don't have a TestOa metric set for Haswell so use
>+	 * RenderBasic
>+	 */
>+	if (IS_HASWELL(devid))
>+		metric_set_name = "RenderBasic";
>+	else
>+		metric_set_name = "TestOa";
>+
>+	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
>+		if (strcmp(metric_set_iter->symbol_name, metric_set_name) == 0) {
>+			metric_set = metric_set_iter;
>+			break;
>+		}
>+	}
>+
>+	return metric_set;
>+}
>+
>+struct intel_perf *i915_perf_init_sys_info(int drm_fd)
>+{
>+	struct intel_perf *intel_perf;
>+
>+	intel_perf = intel_perf_for_fd(drm_fd);
>+	if (!intel_perf)
>+		return NULL;
>+
>+	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
>+	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
>+	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
>+	igt_debug("timestamp_frequency = %"PRIu64"\n",
>+		  intel_perf->devinfo.timestamp_frequency);
>+	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
>+
>+	intel_perf_load_perf_configs(intel_perf, drm_fd);
>+
>+	return intel_perf;
>+}
>+
>+int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd)
>+{
>+	int32_t pm_value = 0;
>+	int ret;
>+
>+	ret = perf_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, param);
>+
>+	igt_assert(ret >= 0);
>+	errno = 0;
>+
>+	if (pm_fd) {
>+		*pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
>+		igt_assert(*pm_fd >= 0);
>+
>+		igt_assert_eq(write(*pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
>+	}
>+
>+	return ret;
>+}
>+
>+int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd)
>+{
>+	struct intel_perf_metric_set *metric_set = i915_perf_default_set(intel_perf, devid);
>+	uint64_t oa_exp = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
>+	uint64_t properties[] = {
>+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
>+		DRM_I915_PERF_PROP_OA_METRICS_SET, 0,
>+		DRM_I915_PERF_PROP_OA_FORMAT, 0,
>+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp,
>+	};
>+	struct drm_i915_perf_open_param param = {
>+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
>+		.num_properties = sizeof(properties) / 16,
>+		.properties_ptr = to_user_pointer(properties),
>+	};
>+
>+	igt_assert(metric_set);
>+	igt_assert(metric_set->perf_oa_metrics_set);
>+	igt_assert(oa_exp >= 0);
>+
>+	igt_debug("%s metric set UUID = %s\n",
>+		  metric_set->symbol_name,
>+		  metric_set->hw_config_guid);
>+
>+	properties[3] = metric_set->perf_oa_metrics_set;
>+	properties[5] = metric_set->perf_oa_format;
>+
>+	return i915_perf_open(drm_fd, &param, pm_fd);
>+}
>+
>+void i915_perf_close(int stream_fd, int pm_fd)
>+{
>+	close(stream_fd);
>+	if (pm_fd >= 0)
>+		close(pm_fd);
>+}
>diff --git a/lib/i915/perf.h b/lib/i915/perf.h
>index e6e60dc997..c9cd28be47 100644
>--- a/lib/i915/perf.h
>+++ b/lib/i915/perf.h
>@@ -351,6 +351,21 @@ uint64_t intel_perf_read_record_timestamp_raw(const struct intel_perf *perf,
> const char *intel_perf_read_report_reason(const struct intel_perf *perf,
> 					  const struct drm_i915_perf_record_header *record);
>
>+uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta);
>+
>+int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period);
>+
>+struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid);
>+
>+struct intel_perf *i915_perf_init_sys_info(int drm_fd);
>+
>+struct drm_i915_perf_open_param;
>+int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd);
>+
>+int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd);
>+
>+void i915_perf_close(int drm_fd, int pm_fd);
>+
> #ifdef __cplusplus
> };
> #endif
>diff --git a/lib/meson.build b/lib/meson.build
>index d49b78ca1a..e79b31090b 100644
>--- a/lib/meson.build
>+++ b/lib/meson.build
>@@ -258,6 +258,7 @@ lib_igt_drm_fdinfo = declare_dependency(link_with : lib_igt_drm_fdinfo_build,
> 				  include_directories : inc)
> i915_perf_files = [
>   'igt_list.c',
>+  'igt_tools_stub.c',
>   'i915/perf.c',
>   'i915/perf_data_reader.c',
> ]
>diff --git a/tests/i915/perf.c b/tests/i915/perf.c
>index dd1f1ac399..a3f59d143b 100644
>--- a/tests/i915/perf.c
>+++ b/tests/i915/perf.c
>@@ -287,21 +287,16 @@ pretty_print_oa_period(uint64_t oa_period_ns)
> static void
> __perf_close(int fd)
> {
>-	close(fd);
>+	i915_perf_close(fd, pm_fd);
> 	stream_fd = -1;
>
>-	if (pm_fd >= 0) {
>-		close(pm_fd);
>+	if (pm_fd >= 0)
> 		pm_fd = -1;
>-	}
> }
>
> static int
> __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
> {
>-	int ret;
>-	int32_t pm_value = 0;
>-
> 	if (stream_fd >= 0)
> 		__perf_close(stream_fd);
> 	if (pm_fd >= 0) {
>@@ -309,19 +304,7 @@ __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
> 		pm_fd = -1;
> 	}
>
>-	ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param);
>-
>-	igt_assert(ret >= 0);
>-	errno = 0;
>-
>-	if (prevent_pm) {
>-		pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
>-		igt_assert(pm_fd >= 0);
>-
>-		igt_assert_eq(write(pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
>-	}
>-
>-	return ret;
>+	return i915_perf_open(fd, param, prevent_pm ? &pm_fd : NULL);
> }
>
> static int
>@@ -465,33 +448,6 @@ cs_timebase_scale(uint32_t u32_delta)
> 	return ((uint64_t)u32_delta * NSEC_PER_SEC) / cs_timestamp_frequency(drm_fd);
> }
>
>-static uint64_t
>-timebase_scale(uint32_t u32_delta)
>-{
>-	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
>-}
>-
>-/* Returns: the largest OA exponent that will still result in a sampling period
>- * less than or equal to the given @period.
>- */
>-static int
>-max_oa_exponent_for_period_lte(uint64_t period)
>-{
>-	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
>-	 * would already represent a period of ~3 minutes so there's
>-	 * really no need to consider higher exponents.
>-	 */
>-	for (int i = 0; i < 30; i++) {
>-		uint64_t oa_period = timebase_scale(2 << i);
>-
>-		if (oa_period > period)
>-			return max(0, i - 1);
>-	}
>-
>-	igt_assert(!"reached");
>-	return -1;
>-}
>-
> /* Return: the largest OA exponent that will still result in a sampling
>  * frequency greater than the given @frequency.
>  */
>@@ -502,7 +458,7 @@ max_oa_exponent_for_freq_gt(uint64_t frequency)
>
> 	igt_assert_neq(period, 0);
>
>-	return max_oa_exponent_for_period_lte(period - 1);
>+	return i915_perf_max_oa_exponent_for_period_lte(intel_perf, period - 1);
> }
>
> static uint64_t
>@@ -626,7 +582,7 @@ hsw_sanity_check_render_basic_reports(const uint32_t *oa_report0,
> 				      const uint32_t *oa_report1,
> 				      enum drm_i915_oa_format fmt)
> {
>-	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
>+	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
> 	uint32_t clock_delta;
> 	uint32_t max_delta;
> 	struct oa_format format = get_oa_format(fmt);
>@@ -832,7 +788,7 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
> 				  enum drm_i915_oa_format fmt)
> {
> 	struct oa_format format = get_oa_format(fmt);
>-	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
>+	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
> 	uint32_t ticks0 = read_report_ticks(oa_report0, fmt);
> 	uint32_t ticks1 = read_report_ticks(oa_report1, fmt);
> 	uint32_t clock_delta = ticks1 - ticks0;
>@@ -950,43 +906,22 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
> static bool
> init_sys_info(void)
> {
>-	const char *test_set_name = NULL;
>-	struct intel_perf_metric_set *metric_set_iter;
>-
> 	igt_assert_neq(devid, 0);
>
>-	intel_perf = intel_perf_for_fd(drm_fd);
>+	intel_perf = i915_perf_init_sys_info(drm_fd);
> 	igt_require(intel_perf);
>
>-	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
>-	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
>-	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
>-	igt_debug("timestamp_frequency = %"PRIu64"\n",
>-		  intel_perf->devinfo.timestamp_frequency);
>-	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
>-
>-	/* We don't have a TestOa metric set for Haswell so use
>-	 * RenderBasic
>-	 */
> 	if (IS_HASWELL(devid)) {
>-		test_set_name = "RenderBasic";
> 		read_report_ticks = hsw_read_report_ticks;
> 		sanity_check_reports = hsw_sanity_check_render_basic_reports;
> 		undefined_a_counters = hsw_undefined_a_counters;
> 	} else {
>-		test_set_name = "TestOa";
> 		read_report_ticks = gen8_read_report_ticks;
> 		sanity_check_reports = gen8_sanity_check_test_oa_reports;
> 		undefined_a_counters = gen8_undefined_a_counters;
> 	}
>
>-	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
>-		if (strcmp(metric_set_iter->symbol_name, test_set_name) == 0) {
>-			test_set = metric_set_iter;
>-			break;
>-		}
>-	}
>-
>+	test_set = i915_perf_default_set(intel_perf, devid);
> 	if (!test_set)
> 		return false;
>
>@@ -994,14 +929,12 @@ init_sys_info(void)
> 		  test_set->symbol_name,
> 		  test_set->hw_config_guid);
>
>-	intel_perf_load_perf_configs(intel_perf, drm_fd);
>-
> 	if (test_set->perf_oa_metrics_set == 0) {
> 		igt_debug("Unable to load configurations\n");
> 		return false;
> 	}
>
>-	oa_exp_1_millisec = max_oa_exponent_for_period_lte(1000000);
>+	oa_exp_1_millisec = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
>
> 	return true;
> }
>@@ -1911,7 +1844,7 @@ test_low_oa_exponent_permissions(void)
>
> 	igt_waitchildren();
>
>-	oa_period = timebase_scale(2 << ok_exponent);
>+	oa_period = i915_perf_timebase_scale(intel_perf, 2 << ok_exponent);
> 	oa_freq = NSEC_PER_SEC / oa_period;
> 	write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", oa_freq - 100);
>
>@@ -2003,7 +1936,7 @@ get_time(void)
> static void
> test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
> {
>-	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
> 	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> 	uint64_t properties[] = {
> 		/* Include OA reports in samples */
>@@ -2162,7 +2095,7 @@ test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ke
> static void
> test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
> {
>-	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
> 	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> 	uint64_t properties[] = {
> 		/* Include OA reports in samples */
>@@ -2358,7 +2291,7 @@ test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ker
>
> static void test_polling_small_buf(void)
> {
>-	int oa_exponent = max_oa_exponent_for_period_lte(40 * 1000); /* 40us */
>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 40 * 1000); /* 40us */
> 	uint64_t properties[] = {
> 		/* Include OA reports in samples */
> 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
>@@ -2461,7 +2394,7 @@ num_valid_reports_captured(struct drm_i915_perf_open_param *param,
> static void
> gen12_test_oa_tlb_invalidate(void)
> {
>-	int oa_exponent = max_oa_exponent_for_period_lte(30000000);
>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 30000000);
> 	uint64_t properties[] = {
> 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
>
>@@ -2503,7 +2436,7 @@ static void
> test_buffer_fill(void)
> {
> 	/* ~5 micro second period */
>-	int oa_exponent = max_oa_exponent_for_period_lte(5000);
>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> 	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> 	uint64_t properties[] = {
> 		/* Include OA reports in samples */
>@@ -2651,7 +2584,7 @@ static void
> test_non_zero_reason(void)
> {
> 	/* ~20 micro second period */
>-	int oa_exponent = max_oa_exponent_for_period_lte(20000);
>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 20000);
> 	uint64_t properties[] = {
> 		/* Include OA reports in samples */
> 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
>@@ -2734,7 +2667,7 @@ static void
> test_enable_disable(void)
> {
> 	/* ~5 micro second period */
>-	int oa_exponent = max_oa_exponent_for_period_lte(5000);
>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> 	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> 	uint64_t properties[] = {
> 		/* Include OA reports in samples */
>@@ -2885,7 +2818,7 @@ test_enable_disable(void)
> static void
> test_short_reads(void)
> {
>-	int oa_exponent = max_oa_exponent_for_period_lte(5000);
>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> 	uint64_t properties[] = {
> 		/* Include OA reports in samples */
> 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
>@@ -3447,8 +3380,8 @@ hsw_test_single_ctx_counters(void)
>
> 		/* sanity check that we can pass the delta to timebase_scale */
> 		igt_assert(delta_ts64 < UINT32_MAX);
>-		delta_oa32_ns = timebase_scale(delta_oa32);
>-		delta_ts64_ns = timebase_scale(delta_ts64);
>+		delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
>+		delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
>
> 		igt_debug("ts32 delta = %u, = %uns\n",
> 			  delta_oa32, (unsigned)delta_oa32_ns);
>@@ -3498,7 +3431,7 @@ hsw_test_single_ctx_counters(void)
> static void
> gen8_test_single_ctx_render_target_writes_a_counter(void)
> {
>-	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> 	uint64_t properties[] = {
> 		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
>
>@@ -3700,8 +3633,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
>
> 			/* sanity check that we can pass the delta to timebase_scale */
> 			igt_assert(delta_ts64 < UINT32_MAX);
>-			delta_oa32_ns = timebase_scale(delta_oa32);
>-			delta_ts64_ns = timebase_scale(delta_ts64);
>+			delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
>+			delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
>
> 			igt_debug("oa32 delta = %u, = %uns\n",
> 				  delta_oa32, (unsigned)delta_oa32_ns);
>@@ -3783,7 +3716,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> 				{
> 					uint32_t time_delta = report[1] - report0_32[1];
>
>-					if (timebase_scale(time_delta) > 1000000000) {
>+					if (i915_perf_timebase_scale(intel_perf,
>+								     time_delta) > 1000000000) {
> 						skip_reason = "prior first mi-rpc";
> 					}
> 				}
>@@ -3791,7 +3725,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> 				{
> 					uint32_t time_delta = report[1] - report1_32[1];
>
>-					if (timebase_scale(time_delta) <= 1000000000) {
>+					if (i915_perf_timebase_scale(intel_perf,
>+								     time_delta) <= 1000000000) {
> 						igt_debug("    comes after last MI_RPC (%u)\n",
> 							  report1_32[1]);
> 						report = report1_32;
>@@ -4164,7 +4099,7 @@ static void gen12_single_ctx_helper(void)
>
> 	/* Sanity check that we can pass the delta to timebase_scale */
> 	igt_assert(delta_ts64 < UINT32_MAX);
>-	delta_oa32_ns = timebase_scale(delta_oa32);
>+	delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> 	delta_ts64_ns = cs_timebase_scale(delta_ts64);
>
> 	igt_debug("oa32 delta = %u, = %uns\n",
>-- 
>2.25.1
>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public
  2023-02-07 19:25 ` Umesh Nerlige Ramappa
@ 2023-02-07 19:33   ` Umesh Nerlige Ramappa
  2023-02-07 20:04     ` Janusz Krzysztofik
  2023-02-08 14:35     ` Kamil Konieczny
  0 siblings, 2 replies; 18+ messages in thread
From: Umesh Nerlige Ramappa @ 2023-02-07 19:33 UTC (permalink / raw)
  To: Janusz Krzysztofik; +Cc: igt-dev, Chris Wilson

On Tue, Feb 07, 2023 at 11:25:00AM -0800, Umesh Nerlige Ramappa wrote:
>I wouldn't do this. Please keep the changes local to the specific test 
>that you implemented in your first rev. While it is a good idea to 
>have the some of the perf capabilities in the library, this is way too 
>much churn to implement a specific test for the original failure. 
>Unless multiple IGT subsytems area already dependent on perf APIs to 
>implement multiple tests, let's not do this.
>

Also note that the perf library implemented in IGT is not entirely used 
by IGT tests alone. The library is also linked to GPUvis software. Only 
a few pieces of reusable code in the perf library is used by IGT tests. 

>Thanks,
>Umesh
>
>On Tue, Feb 07, 2023 at 11:11:21AM +0100, Janusz Krzysztofik wrote:
>>We need new subtests that exercise interaction between i915 perf open/
>>close and other i915 subsystems from the point of view of those other
>>subsystems.  Allow other tests to reuse __perf_open/close() family of
>>functions, now inside i915/perf test, by moving (sharable parts of)
>>them to i915/perf library.
>>
>>Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
>>---
>>lib/i915/perf.c   | 130 ++++++++++++++++++++++++++++++++++++++++++++++
>>lib/i915/perf.h   |  15 ++++++
>>lib/meson.build   |   1 +
>>tests/i915/perf.c | 121 ++++++++++--------------------------------
>>4 files changed, 174 insertions(+), 93 deletions(-)
>>
>>diff --git a/lib/i915/perf.c b/lib/i915/perf.c
>>index 6c7a192558..e71d637eb5 100644
>>--- a/lib/i915/perf.c
>>+++ b/lib/i915/perf.c
>>@@ -39,7 +39,9 @@
>>
>>#include "i915_pciids.h"
>>
>>+#include "igt_aux.h"
>>#include "intel_chipset.h"
>>+#include "ioctl_wrappers.h"
>>#include "perf.h"
>>
>>#include "i915_perf_metrics_hsw.h"
>>@@ -1008,3 +1010,131 @@ const char *intel_perf_read_report_reason(const struct intel_perf *perf,
>>
>>	return "unknown";
>>}
>>+
>>+uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta)
>>+{
>>+	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
>>+}
>>+
>>+/* Returns: the largest OA exponent that will still result in a sampling period
>>+ * less than or equal to the given @period.
>>+ */
>>+int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period)
>>+{
>>+	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
>>+	 * would already represent a period of ~3 minutes so there's
>>+	 * really no need to consider higher exponents.
>>+	 */
>>+	for (int i = 0; i < 30; i++) {
>>+		uint64_t oa_period = i915_perf_timebase_scale(intel_perf, 2 << i);
>>+
>>+		if (oa_period > period)
>>+			return max(0, i - 1);
>>+	}
>>+
>>+	igt_assert(!"reached");
>>+	return -1;
>>+}
>>+
>>+struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid)
>>+{
>>+	struct intel_perf_metric_set *metric_set = NULL, *metric_set_iter;
>>+	const char *metric_set_name = NULL;
>>+
>>+	igt_assert_neq(devid, 0);
>>+
>>+	/*
>>+	 * We don't have a TestOa metric set for Haswell so use
>>+	 * RenderBasic
>>+	 */
>>+	if (IS_HASWELL(devid))
>>+		metric_set_name = "RenderBasic";
>>+	else
>>+		metric_set_name = "TestOa";
>>+
>>+	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
>>+		if (strcmp(metric_set_iter->symbol_name, metric_set_name) == 0) {
>>+			metric_set = metric_set_iter;
>>+			break;
>>+		}
>>+	}
>>+
>>+	return metric_set;
>>+}
>>+
>>+struct intel_perf *i915_perf_init_sys_info(int drm_fd)
>>+{
>>+	struct intel_perf *intel_perf;
>>+
>>+	intel_perf = intel_perf_for_fd(drm_fd);
>>+	if (!intel_perf)
>>+		return NULL;
>>+
>>+	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
>>+	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
>>+	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
>>+	igt_debug("timestamp_frequency = %"PRIu64"\n",
>>+		  intel_perf->devinfo.timestamp_frequency);
>>+	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
>>+
>>+	intel_perf_load_perf_configs(intel_perf, drm_fd);
>>+
>>+	return intel_perf;
>>+}
>>+
>>+int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd)
>>+{
>>+	int32_t pm_value = 0;
>>+	int ret;
>>+
>>+	ret = perf_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, param);
>>+
>>+	igt_assert(ret >= 0);
>>+	errno = 0;
>>+
>>+	if (pm_fd) {
>>+		*pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
>>+		igt_assert(*pm_fd >= 0);
>>+
>>+		igt_assert_eq(write(*pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
>>+	}
>>+
>>+	return ret;
>>+}
>>+
>>+int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd)
>>+{
>>+	struct intel_perf_metric_set *metric_set = i915_perf_default_set(intel_perf, devid);
>>+	uint64_t oa_exp = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
>>+	uint64_t properties[] = {
>>+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
>>+		DRM_I915_PERF_PROP_OA_METRICS_SET, 0,
>>+		DRM_I915_PERF_PROP_OA_FORMAT, 0,
>>+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp,
>>+	};
>>+	struct drm_i915_perf_open_param param = {
>>+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
>>+		.num_properties = sizeof(properties) / 16,
>>+		.properties_ptr = to_user_pointer(properties),
>>+	};
>>+
>>+	igt_assert(metric_set);
>>+	igt_assert(metric_set->perf_oa_metrics_set);
>>+	igt_assert(oa_exp >= 0);
>>+
>>+	igt_debug("%s metric set UUID = %s\n",
>>+		  metric_set->symbol_name,
>>+		  metric_set->hw_config_guid);
>>+
>>+	properties[3] = metric_set->perf_oa_metrics_set;
>>+	properties[5] = metric_set->perf_oa_format;
>>+
>>+	return i915_perf_open(drm_fd, &param, pm_fd);
>>+}
>>+
>>+void i915_perf_close(int stream_fd, int pm_fd)
>>+{
>>+	close(stream_fd);
>>+	if (pm_fd >= 0)
>>+		close(pm_fd);
>>+}
>>diff --git a/lib/i915/perf.h b/lib/i915/perf.h
>>index e6e60dc997..c9cd28be47 100644
>>--- a/lib/i915/perf.h
>>+++ b/lib/i915/perf.h
>>@@ -351,6 +351,21 @@ uint64_t intel_perf_read_record_timestamp_raw(const struct intel_perf *perf,
>>const char *intel_perf_read_report_reason(const struct intel_perf *perf,
>>					  const struct drm_i915_perf_record_header *record);
>>
>>+uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta);
>>+
>>+int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period);
>>+
>>+struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid);
>>+
>>+struct intel_perf *i915_perf_init_sys_info(int drm_fd);
>>+
>>+struct drm_i915_perf_open_param;
>>+int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd);
>>+
>>+int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd);
>>+
>>+void i915_perf_close(int drm_fd, int pm_fd);
>>+
>>#ifdef __cplusplus
>>};
>>#endif
>>diff --git a/lib/meson.build b/lib/meson.build
>>index d49b78ca1a..e79b31090b 100644
>>--- a/lib/meson.build
>>+++ b/lib/meson.build
>>@@ -258,6 +258,7 @@ lib_igt_drm_fdinfo = declare_dependency(link_with : lib_igt_drm_fdinfo_build,
>>				  include_directories : inc)
>>i915_perf_files = [
>>  'igt_list.c',
>>+  'igt_tools_stub.c',
>>  'i915/perf.c',
>>  'i915/perf_data_reader.c',
>>]
>>diff --git a/tests/i915/perf.c b/tests/i915/perf.c
>>index dd1f1ac399..a3f59d143b 100644
>>--- a/tests/i915/perf.c
>>+++ b/tests/i915/perf.c
>>@@ -287,21 +287,16 @@ pretty_print_oa_period(uint64_t oa_period_ns)
>>static void
>>__perf_close(int fd)
>>{
>>-	close(fd);
>>+	i915_perf_close(fd, pm_fd);
>>	stream_fd = -1;
>>
>>-	if (pm_fd >= 0) {
>>-		close(pm_fd);
>>+	if (pm_fd >= 0)
>>		pm_fd = -1;
>>-	}
>>}
>>
>>static int
>>__perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
>>{
>>-	int ret;
>>-	int32_t pm_value = 0;
>>-
>>	if (stream_fd >= 0)
>>		__perf_close(stream_fd);
>>	if (pm_fd >= 0) {
>>@@ -309,19 +304,7 @@ __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
>>		pm_fd = -1;
>>	}
>>
>>-	ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param);
>>-
>>-	igt_assert(ret >= 0);
>>-	errno = 0;
>>-
>>-	if (prevent_pm) {
>>-		pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
>>-		igt_assert(pm_fd >= 0);
>>-
>>-		igt_assert_eq(write(pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
>>-	}
>>-
>>-	return ret;
>>+	return i915_perf_open(fd, param, prevent_pm ? &pm_fd : NULL);
>>}
>>
>>static int
>>@@ -465,33 +448,6 @@ cs_timebase_scale(uint32_t u32_delta)
>>	return ((uint64_t)u32_delta * NSEC_PER_SEC) / cs_timestamp_frequency(drm_fd);
>>}
>>
>>-static uint64_t
>>-timebase_scale(uint32_t u32_delta)
>>-{
>>-	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
>>-}
>>-
>>-/* Returns: the largest OA exponent that will still result in a sampling period
>>- * less than or equal to the given @period.
>>- */
>>-static int
>>-max_oa_exponent_for_period_lte(uint64_t period)
>>-{
>>-	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
>>-	 * would already represent a period of ~3 minutes so there's
>>-	 * really no need to consider higher exponents.
>>-	 */
>>-	for (int i = 0; i < 30; i++) {
>>-		uint64_t oa_period = timebase_scale(2 << i);
>>-
>>-		if (oa_period > period)
>>-			return max(0, i - 1);
>>-	}
>>-
>>-	igt_assert(!"reached");
>>-	return -1;
>>-}
>>-
>>/* Return: the largest OA exponent that will still result in a sampling
>> * frequency greater than the given @frequency.
>> */
>>@@ -502,7 +458,7 @@ max_oa_exponent_for_freq_gt(uint64_t frequency)
>>
>>	igt_assert_neq(period, 0);
>>
>>-	return max_oa_exponent_for_period_lte(period - 1);
>>+	return i915_perf_max_oa_exponent_for_period_lte(intel_perf, period - 1);
>>}
>>
>>static uint64_t
>>@@ -626,7 +582,7 @@ hsw_sanity_check_render_basic_reports(const uint32_t *oa_report0,
>>				      const uint32_t *oa_report1,
>>				      enum drm_i915_oa_format fmt)
>>{
>>-	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
>>+	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
>>	uint32_t clock_delta;
>>	uint32_t max_delta;
>>	struct oa_format format = get_oa_format(fmt);
>>@@ -832,7 +788,7 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
>>				  enum drm_i915_oa_format fmt)
>>{
>>	struct oa_format format = get_oa_format(fmt);
>>-	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
>>+	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
>>	uint32_t ticks0 = read_report_ticks(oa_report0, fmt);
>>	uint32_t ticks1 = read_report_ticks(oa_report1, fmt);
>>	uint32_t clock_delta = ticks1 - ticks0;
>>@@ -950,43 +906,22 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
>>static bool
>>init_sys_info(void)
>>{
>>-	const char *test_set_name = NULL;
>>-	struct intel_perf_metric_set *metric_set_iter;
>>-
>>	igt_assert_neq(devid, 0);
>>
>>-	intel_perf = intel_perf_for_fd(drm_fd);
>>+	intel_perf = i915_perf_init_sys_info(drm_fd);
>>	igt_require(intel_perf);
>>
>>-	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
>>-	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
>>-	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
>>-	igt_debug("timestamp_frequency = %"PRIu64"\n",
>>-		  intel_perf->devinfo.timestamp_frequency);
>>-	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
>>-
>>-	/* We don't have a TestOa metric set for Haswell so use
>>-	 * RenderBasic
>>-	 */
>>	if (IS_HASWELL(devid)) {
>>-		test_set_name = "RenderBasic";
>>		read_report_ticks = hsw_read_report_ticks;
>>		sanity_check_reports = hsw_sanity_check_render_basic_reports;
>>		undefined_a_counters = hsw_undefined_a_counters;
>>	} else {
>>-		test_set_name = "TestOa";
>>		read_report_ticks = gen8_read_report_ticks;
>>		sanity_check_reports = gen8_sanity_check_test_oa_reports;
>>		undefined_a_counters = gen8_undefined_a_counters;
>>	}
>>
>>-	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
>>-		if (strcmp(metric_set_iter->symbol_name, test_set_name) == 0) {
>>-			test_set = metric_set_iter;
>>-			break;
>>-		}
>>-	}
>>-
>>+	test_set = i915_perf_default_set(intel_perf, devid);
>>	if (!test_set)
>>		return false;
>>
>>@@ -994,14 +929,12 @@ init_sys_info(void)
>>		  test_set->symbol_name,
>>		  test_set->hw_config_guid);
>>
>>-	intel_perf_load_perf_configs(intel_perf, drm_fd);
>>-
>>	if (test_set->perf_oa_metrics_set == 0) {
>>		igt_debug("Unable to load configurations\n");
>>		return false;
>>	}
>>
>>-	oa_exp_1_millisec = max_oa_exponent_for_period_lte(1000000);
>>+	oa_exp_1_millisec = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
>>
>>	return true;
>>}
>>@@ -1911,7 +1844,7 @@ test_low_oa_exponent_permissions(void)
>>
>>	igt_waitchildren();
>>
>>-	oa_period = timebase_scale(2 << ok_exponent);
>>+	oa_period = i915_perf_timebase_scale(intel_perf, 2 << ok_exponent);
>>	oa_freq = NSEC_PER_SEC / oa_period;
>>	write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", oa_freq - 100);
>>
>>@@ -2003,7 +1936,7 @@ get_time(void)
>>static void
>>test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
>>{
>>-	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
>>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
>>	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
>>	uint64_t properties[] = {
>>		/* Include OA reports in samples */
>>@@ -2162,7 +2095,7 @@ test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ke
>>static void
>>test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
>>{
>>-	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
>>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
>>	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
>>	uint64_t properties[] = {
>>		/* Include OA reports in samples */
>>@@ -2358,7 +2291,7 @@ test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ker
>>
>>static void test_polling_small_buf(void)
>>{
>>-	int oa_exponent = max_oa_exponent_for_period_lte(40 * 1000); /* 40us */
>>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 40 * 1000); /* 40us */
>>	uint64_t properties[] = {
>>		/* Include OA reports in samples */
>>		DRM_I915_PERF_PROP_SAMPLE_OA, true,
>>@@ -2461,7 +2394,7 @@ num_valid_reports_captured(struct drm_i915_perf_open_param *param,
>>static void
>>gen12_test_oa_tlb_invalidate(void)
>>{
>>-	int oa_exponent = max_oa_exponent_for_period_lte(30000000);
>>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 30000000);
>>	uint64_t properties[] = {
>>		DRM_I915_PERF_PROP_SAMPLE_OA, true,
>>
>>@@ -2503,7 +2436,7 @@ static void
>>test_buffer_fill(void)
>>{
>>	/* ~5 micro second period */
>>-	int oa_exponent = max_oa_exponent_for_period_lte(5000);
>>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
>>	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
>>	uint64_t properties[] = {
>>		/* Include OA reports in samples */
>>@@ -2651,7 +2584,7 @@ static void
>>test_non_zero_reason(void)
>>{
>>	/* ~20 micro second period */
>>-	int oa_exponent = max_oa_exponent_for_period_lte(20000);
>>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 20000);
>>	uint64_t properties[] = {
>>		/* Include OA reports in samples */
>>		DRM_I915_PERF_PROP_SAMPLE_OA, true,
>>@@ -2734,7 +2667,7 @@ static void
>>test_enable_disable(void)
>>{
>>	/* ~5 micro second period */
>>-	int oa_exponent = max_oa_exponent_for_period_lte(5000);
>>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
>>	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
>>	uint64_t properties[] = {
>>		/* Include OA reports in samples */
>>@@ -2885,7 +2818,7 @@ test_enable_disable(void)
>>static void
>>test_short_reads(void)
>>{
>>-	int oa_exponent = max_oa_exponent_for_period_lte(5000);
>>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
>>	uint64_t properties[] = {
>>		/* Include OA reports in samples */
>>		DRM_I915_PERF_PROP_SAMPLE_OA, true,
>>@@ -3447,8 +3380,8 @@ hsw_test_single_ctx_counters(void)
>>
>>		/* sanity check that we can pass the delta to timebase_scale */
>>		igt_assert(delta_ts64 < UINT32_MAX);
>>-		delta_oa32_ns = timebase_scale(delta_oa32);
>>-		delta_ts64_ns = timebase_scale(delta_ts64);
>>+		delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
>>+		delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
>>
>>		igt_debug("ts32 delta = %u, = %uns\n",
>>			  delta_oa32, (unsigned)delta_oa32_ns);
>>@@ -3498,7 +3431,7 @@ hsw_test_single_ctx_counters(void)
>>static void
>>gen8_test_single_ctx_render_target_writes_a_counter(void)
>>{
>>-	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
>>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
>>	uint64_t properties[] = {
>>		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
>>
>>@@ -3700,8 +3633,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
>>
>>			/* sanity check that we can pass the delta to timebase_scale */
>>			igt_assert(delta_ts64 < UINT32_MAX);
>>-			delta_oa32_ns = timebase_scale(delta_oa32);
>>-			delta_ts64_ns = timebase_scale(delta_ts64);
>>+			delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
>>+			delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
>>
>>			igt_debug("oa32 delta = %u, = %uns\n",
>>				  delta_oa32, (unsigned)delta_oa32_ns);
>>@@ -3783,7 +3716,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
>>				{
>>					uint32_t time_delta = report[1] - report0_32[1];
>>
>>-					if (timebase_scale(time_delta) > 1000000000) {
>>+					if (i915_perf_timebase_scale(intel_perf,
>>+								     time_delta) > 1000000000) {
>>						skip_reason = "prior first mi-rpc";
>>					}
>>				}
>>@@ -3791,7 +3725,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
>>				{
>>					uint32_t time_delta = report[1] - report1_32[1];
>>
>>-					if (timebase_scale(time_delta) <= 1000000000) {
>>+					if (i915_perf_timebase_scale(intel_perf,
>>+								     time_delta) <= 1000000000) {
>>						igt_debug("    comes after last MI_RPC (%u)\n",
>>							  report1_32[1]);
>>						report = report1_32;
>>@@ -4164,7 +4099,7 @@ static void gen12_single_ctx_helper(void)
>>
>>	/* Sanity check that we can pass the delta to timebase_scale */
>>	igt_assert(delta_ts64 < UINT32_MAX);
>>-	delta_oa32_ns = timebase_scale(delta_oa32);
>>+	delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
>>	delta_ts64_ns = cs_timebase_scale(delta_ts64);
>>
>>	igt_debug("oa32 delta = %u, = %uns\n",
>>-- 
>>2.25.1
>>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public
  2023-02-07 18:16 ` [igt-dev] [PATCH i-g-t] " Kamil Konieczny
@ 2023-02-07 19:45   ` Janusz Krzysztofik
  0 siblings, 0 replies; 18+ messages in thread
From: Janusz Krzysztofik @ 2023-02-07 19:45 UTC (permalink / raw)
  To: Kamil Konieczny, igt-dev, Petri Latvala, Arkadiusz Hiler,
	Chris Wilson, Ashutosh Dixit, Umesh Nerlige Ramappa,
	John Harrison, Vinay Belgaumkar, Janusz Krzysztofik,
	Lionel Landwerlin

Hi Kamil,

Thanks for review.

On Tuesday, 7 February 2023 19:16:56 CET Kamil Konieczny wrote:
> Hi Janusz,
> 
> On 2023-02-07 at 11:11:21 +0100, Janusz Krzysztofik wrote:
> > We need new subtests that exercise interaction between i915 perf open/
> ----------------------------------------------------------------- ^
> put it in one line as open/close but imho you can omit that two names,
> main focus is on interaction, these two functions are just usefull
> now.

OK, I can drop open/close from commit description, though those are just the 
two main functions that have been actually missing from i915/perf library.

> 
> > close and other i915 subsystems from the point of view of those other
> > subsystems.  Allow other tests to reuse __perf_open/close() family of
> > functions, now inside i915/perf test, by moving (sharable parts of)
> > them to i915/perf library.
> > 
> > Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
> > ---
> >  lib/i915/perf.c   | 130 ++++++++++++++++++++++++++++++++++++++++++++++
> >  lib/i915/perf.h   |  15 ++++++
> >  lib/meson.build   |   1 +
> >  tests/i915/perf.c | 121 ++++++++++--------------------------------
> >  4 files changed, 174 insertions(+), 93 deletions(-)
> > 
> > diff --git a/lib/i915/perf.c b/lib/i915/perf.c
> > index 6c7a192558..e71d637eb5 100644
> > --- a/lib/i915/perf.c
> > +++ b/lib/i915/perf.c
> > @@ -39,7 +39,9 @@
> >  
> >  #include "i915_pciids.h"
> >  
> > +#include "igt_aux.h"
> >  #include "intel_chipset.h"
> > +#include "ioctl_wrappers.h"
> >  #include "perf.h"
> >  
> >  #include "i915_perf_metrics_hsw.h"
> > @@ -1008,3 +1010,131 @@ const char *intel_perf_read_report_reason(const struct intel_perf *perf,
> >  
> >  	return "unknown";
> >  }
> 
> All public functions need to be described (here and below),
> maybe someone who worked on perf test could help ?
> +cc Lionel Landwerlin <lionel.g.landwerlin@intel.com>

Yes, I'd appreciate some help and/or suggestions from the authors of i915/perf 
library and test, especially where documentation is insufficient.

> 
> > +
> > +uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta)
> > +{
> > +	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
> > +}
> > +
> 
> Description starts with /** and describes also parameters.

OK (I've fixed similar patterns in some places while moving the code, but 
apparently missed the one here).

> 
> > +/* Returns: the largest OA exponent that will still result in a sampling period
> > + * less than or equal to the given @period.
> > + */
> > +int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period)
> > +{
> > +	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
> > +	 * would already represent a period of ~3 minutes so there's
> > +	 * really no need to consider higher exponents.
> > +	 */
> > +	for (int i = 0; i < 30; i++) {
> > +		uint64_t oa_period = i915_perf_timebase_scale(intel_perf, 2 << i);
> > +
> > +		if (oa_period > period)
> > +			return max(0, i - 1);
> > +	}
> > +
> > +	igt_assert(!"reached");
> 
> imho asserts in lib functions should be limited to very few cases.

OK, since I share your point, I'll review all igt_asserts() in the moved code 
and convert them to return err where applicable.

> 
> > +	return -1;
> > +}
> > +
> > +struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid)
> > +{
> > +	struct intel_perf_metric_set *metric_set = NULL, *metric_set_iter;
> > +	const char *metric_set_name = NULL;
> > +
> > +	igt_assert_neq(devid, 0);
> > +
> > +	/*
> > +	 * We don't have a TestOa metric set for Haswell so use
> > +	 * RenderBasic
> > +	 */
> > +	if (IS_HASWELL(devid))
> > +		metric_set_name = "RenderBasic";
> > +	else
> > +		metric_set_name = "TestOa";
> > +
> > +	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
> > +		if (strcmp(metric_set_iter->symbol_name, metric_set_name) == 0) {
> > +			metric_set = metric_set_iter;
> > +			break;
> > +		}
> > +	}
> > +
> > +	return metric_set;
> > +}
> > +
> > +struct intel_perf *i915_perf_init_sys_info(int drm_fd)
> > +{
> > +	struct intel_perf *intel_perf;
> > +
> > +	intel_perf = intel_perf_for_fd(drm_fd);
> > +	if (!intel_perf)
> > +		return NULL;
> > +
> > +	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
> > +	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
> > +	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
> > +	igt_debug("timestamp_frequency = %"PRIu64"\n",
> > +		  intel_perf->devinfo.timestamp_frequency);
> > +	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
> > +
> > +	intel_perf_load_perf_configs(intel_perf, drm_fd);
> > +
> > +	return intel_perf;
> > +}
> > +
> > +int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd)
> > +{
> > +	int32_t pm_value = 0;
> > +	int ret;
> > +
> > +	ret = perf_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, param);
> > +
> > +	igt_assert(ret >= 0);
> ------- ^
> imho here we should return error if ret < 0
> Checks for errors should be done in tests.

OK.

> 
> > +	errno = 0;
> > +
> > +	if (pm_fd) {
> ----------- !
> But imho this should done by separate function, like
> i915_perf_disable

Please note that throughout the i915/perf test, pm_fd is either not touched, 
or handled only together with drm_fd.  What users of a separate 
i915_perf_disable() can you see?

If you really don't like this solution, I propose to remove pm_fd handling 
from the library and let the helpers inside the i915/perf test take care of it 
where needed.

Please expect v2 soon, I'm going to submit it in series with one new subtest 
outside i915/perf that makes use of these new library functions.

Thanks,
Janusz

> 
> Regards,
> Kamil
> 
> > +		*pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
> > +		igt_assert(*pm_fd >= 0);
> > +
> > +		igt_assert_eq(write(*pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
> > +	}
> > +
> > +	return ret;
> > +}
> > +
> > +int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd)
> > +{
> > +	struct intel_perf_metric_set *metric_set = i915_perf_default_set(intel_perf, devid);
> > +	uint64_t oa_exp = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> > +	uint64_t properties[] = {
> > +		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > +		DRM_I915_PERF_PROP_OA_METRICS_SET, 0,
> > +		DRM_I915_PERF_PROP_OA_FORMAT, 0,
> > +		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp,
> > +	};
> > +	struct drm_i915_perf_open_param param = {
> > +		.flags = I915_PERF_FLAG_FD_CLOEXEC,
> > +		.num_properties = sizeof(properties) / 16,
> > +		.properties_ptr = to_user_pointer(properties),
> > +	};
> > +
> > +	igt_assert(metric_set);
> > +	igt_assert(metric_set->perf_oa_metrics_set);
> > +	igt_assert(oa_exp >= 0);
> > +
> > +	igt_debug("%s metric set UUID = %s\n",
> > +		  metric_set->symbol_name,
> > +		  metric_set->hw_config_guid);
> > +
> > +	properties[3] = metric_set->perf_oa_metrics_set;
> > +	properties[5] = metric_set->perf_oa_format;
> > +
> > +	return i915_perf_open(drm_fd, &param, pm_fd);
> > +}
> > +
> > +void i915_perf_close(int stream_fd, int pm_fd)
> > +{
> > +	close(stream_fd);
> > +	if (pm_fd >= 0)
> > +		close(pm_fd);
> > +}
> > diff --git a/lib/i915/perf.h b/lib/i915/perf.h
> > index e6e60dc997..c9cd28be47 100644
> > --- a/lib/i915/perf.h
> > +++ b/lib/i915/perf.h
> > @@ -351,6 +351,21 @@ uint64_t intel_perf_read_record_timestamp_raw(const struct intel_perf *perf,
> >  const char *intel_perf_read_report_reason(const struct intel_perf *perf,
> >  					  const struct drm_i915_perf_record_header *record);
> >  
> > +uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta);
> > +
> > +int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period);
> > +
> > +struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid);
> > +
> > +struct intel_perf *i915_perf_init_sys_info(int drm_fd);
> > +
> > +struct drm_i915_perf_open_param;
> > +int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd);
> > +
> > +int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd);
> > +
> > +void i915_perf_close(int drm_fd, int pm_fd);
> > +
> >  #ifdef __cplusplus
> >  };
> >  #endif
> > diff --git a/lib/meson.build b/lib/meson.build
> > index d49b78ca1a..e79b31090b 100644
> > --- a/lib/meson.build
> > +++ b/lib/meson.build
> > @@ -258,6 +258,7 @@ lib_igt_drm_fdinfo = declare_dependency(link_with : lib_igt_drm_fdinfo_build,
> >  				  include_directories : inc)
> >  i915_perf_files = [
> >    'igt_list.c',
> > +  'igt_tools_stub.c',
> >    'i915/perf.c',
> >    'i915/perf_data_reader.c',
> >  ]
> > diff --git a/tests/i915/perf.c b/tests/i915/perf.c
> > index dd1f1ac399..a3f59d143b 100644
> > --- a/tests/i915/perf.c
> > +++ b/tests/i915/perf.c
> > @@ -287,21 +287,16 @@ pretty_print_oa_period(uint64_t oa_period_ns)
> >  static void
> >  __perf_close(int fd)
> >  {
> > -	close(fd);
> > +	i915_perf_close(fd, pm_fd);
> >  	stream_fd = -1;
> >  
> > -	if (pm_fd >= 0) {
> > -		close(pm_fd);
> > +	if (pm_fd >= 0)
> >  		pm_fd = -1;
> > -	}
> >  }
> >  
> >  static int
> >  __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
> >  {
> > -	int ret;
> > -	int32_t pm_value = 0;
> > -
> >  	if (stream_fd >= 0)
> >  		__perf_close(stream_fd);
> >  	if (pm_fd >= 0) {
> > @@ -309,19 +304,7 @@ __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
> >  		pm_fd = -1;
> >  	}
> >  
> > -	ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param);
> > -
> > -	igt_assert(ret >= 0);
> > -	errno = 0;
> > -
> > -	if (prevent_pm) {
> > -		pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
> > -		igt_assert(pm_fd >= 0);
> > -
> > -		igt_assert_eq(write(pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
> > -	}
> > -
> > -	return ret;
> > +	return i915_perf_open(fd, param, prevent_pm ? &pm_fd : NULL);
> >  }
> >  
> >  static int
> > @@ -465,33 +448,6 @@ cs_timebase_scale(uint32_t u32_delta)
> >  	return ((uint64_t)u32_delta * NSEC_PER_SEC) / cs_timestamp_frequency(drm_fd);
> >  }
> >  
> > -static uint64_t
> > -timebase_scale(uint32_t u32_delta)
> > -{
> > -	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
> > -}
> > -
> > -/* Returns: the largest OA exponent that will still result in a sampling period
> > - * less than or equal to the given @period.
> > - */
> > -static int
> > -max_oa_exponent_for_period_lte(uint64_t period)
> > -{
> > -	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
> > -	 * would already represent a period of ~3 minutes so there's
> > -	 * really no need to consider higher exponents.
> > -	 */
> > -	for (int i = 0; i < 30; i++) {
> > -		uint64_t oa_period = timebase_scale(2 << i);
> > -
> > -		if (oa_period > period)
> > -			return max(0, i - 1);
> > -	}
> > -
> > -	igt_assert(!"reached");
> > -	return -1;
> > -}
> > -
> >  /* Return: the largest OA exponent that will still result in a sampling
> >   * frequency greater than the given @frequency.
> >   */
> > @@ -502,7 +458,7 @@ max_oa_exponent_for_freq_gt(uint64_t frequency)
> >  
> >  	igt_assert_neq(period, 0);
> >  
> > -	return max_oa_exponent_for_period_lte(period - 1);
> > +	return i915_perf_max_oa_exponent_for_period_lte(intel_perf, period - 1);
> >  }
> >  
> >  static uint64_t
> > @@ -626,7 +582,7 @@ hsw_sanity_check_render_basic_reports(const uint32_t *oa_report0,
> >  				      const uint32_t *oa_report1,
> >  				      enum drm_i915_oa_format fmt)
> >  {
> > -	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
> > +	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
> >  	uint32_t clock_delta;
> >  	uint32_t max_delta;
> >  	struct oa_format format = get_oa_format(fmt);
> > @@ -832,7 +788,7 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
> >  				  enum drm_i915_oa_format fmt)
> >  {
> >  	struct oa_format format = get_oa_format(fmt);
> > -	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
> > +	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
> >  	uint32_t ticks0 = read_report_ticks(oa_report0, fmt);
> >  	uint32_t ticks1 = read_report_ticks(oa_report1, fmt);
> >  	uint32_t clock_delta = ticks1 - ticks0;
> > @@ -950,43 +906,22 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
> >  static bool
> >  init_sys_info(void)
> >  {
> > -	const char *test_set_name = NULL;
> > -	struct intel_perf_metric_set *metric_set_iter;
> > -
> >  	igt_assert_neq(devid, 0);
> >  
> > -	intel_perf = intel_perf_for_fd(drm_fd);
> > +	intel_perf = i915_perf_init_sys_info(drm_fd);
> >  	igt_require(intel_perf);
> >  
> > -	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
> > -	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
> > -	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
> > -	igt_debug("timestamp_frequency = %"PRIu64"\n",
> > -		  intel_perf->devinfo.timestamp_frequency);
> > -	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
> > -
> > -	/* We don't have a TestOa metric set for Haswell so use
> > -	 * RenderBasic
> > -	 */
> >  	if (IS_HASWELL(devid)) {
> > -		test_set_name = "RenderBasic";
> >  		read_report_ticks = hsw_read_report_ticks;
> >  		sanity_check_reports = hsw_sanity_check_render_basic_reports;
> >  		undefined_a_counters = hsw_undefined_a_counters;
> >  	} else {
> > -		test_set_name = "TestOa";
> >  		read_report_ticks = gen8_read_report_ticks;
> >  		sanity_check_reports = gen8_sanity_check_test_oa_reports;
> >  		undefined_a_counters = gen8_undefined_a_counters;
> >  	}
> >  
> > -	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
> > -		if (strcmp(metric_set_iter->symbol_name, test_set_name) == 0) {
> > -			test_set = metric_set_iter;
> > -			break;
> > -		}
> > -	}
> > -
> > +	test_set = i915_perf_default_set(intel_perf, devid);
> >  	if (!test_set)
> >  		return false;
> >  
> > @@ -994,14 +929,12 @@ init_sys_info(void)
> >  		  test_set->symbol_name,
> >  		  test_set->hw_config_guid);
> >  
> > -	intel_perf_load_perf_configs(intel_perf, drm_fd);
> > -
> >  	if (test_set->perf_oa_metrics_set == 0) {
> >  		igt_debug("Unable to load configurations\n");
> >  		return false;
> >  	}
> >  
> > -	oa_exp_1_millisec = max_oa_exponent_for_period_lte(1000000);
> > +	oa_exp_1_millisec = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> >  
> >  	return true;
> >  }
> > @@ -1911,7 +1844,7 @@ test_low_oa_exponent_permissions(void)
> >  
> >  	igt_waitchildren();
> >  
> > -	oa_period = timebase_scale(2 << ok_exponent);
> > +	oa_period = i915_perf_timebase_scale(intel_perf, 2 << ok_exponent);
> >  	oa_freq = NSEC_PER_SEC / oa_period;
> >  	write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", oa_freq - 100);
> >  
> > @@ -2003,7 +1936,7 @@ get_time(void)
> >  static void
> >  test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
> >  {
> > -	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
> > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
> >  	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> >  	uint64_t properties[] = {
> >  		/* Include OA reports in samples */
> > @@ -2162,7 +2095,7 @@ test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ke
> >  static void
> >  test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
> >  {
> > -	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
> > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
> >  	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> >  	uint64_t properties[] = {
> >  		/* Include OA reports in samples */
> > @@ -2358,7 +2291,7 @@ test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ker
> >  
> >  static void test_polling_small_buf(void)
> >  {
> > -	int oa_exponent = max_oa_exponent_for_period_lte(40 * 1000); /* 40us */
> > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 40 * 1000); /* 40us */
> >  	uint64_t properties[] = {
> >  		/* Include OA reports in samples */
> >  		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > @@ -2461,7 +2394,7 @@ num_valid_reports_captured(struct drm_i915_perf_open_param *param,
> >  static void
> >  gen12_test_oa_tlb_invalidate(void)
> >  {
> > -	int oa_exponent = max_oa_exponent_for_period_lte(30000000);
> > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 30000000);
> >  	uint64_t properties[] = {
> >  		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> >  
> > @@ -2503,7 +2436,7 @@ static void
> >  test_buffer_fill(void)
> >  {
> >  	/* ~5 micro second period */
> > -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> >  	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> >  	uint64_t properties[] = {
> >  		/* Include OA reports in samples */
> > @@ -2651,7 +2584,7 @@ static void
> >  test_non_zero_reason(void)
> >  {
> >  	/* ~20 micro second period */
> > -	int oa_exponent = max_oa_exponent_for_period_lte(20000);
> > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 20000);
> >  	uint64_t properties[] = {
> >  		/* Include OA reports in samples */
> >  		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > @@ -2734,7 +2667,7 @@ static void
> >  test_enable_disable(void)
> >  {
> >  	/* ~5 micro second period */
> > -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> >  	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> >  	uint64_t properties[] = {
> >  		/* Include OA reports in samples */
> > @@ -2885,7 +2818,7 @@ test_enable_disable(void)
> >  static void
> >  test_short_reads(void)
> >  {
> > -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> >  	uint64_t properties[] = {
> >  		/* Include OA reports in samples */
> >  		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > @@ -3447,8 +3380,8 @@ hsw_test_single_ctx_counters(void)
> >  
> >  		/* sanity check that we can pass the delta to timebase_scale */
> >  		igt_assert(delta_ts64 < UINT32_MAX);
> > -		delta_oa32_ns = timebase_scale(delta_oa32);
> > -		delta_ts64_ns = timebase_scale(delta_ts64);
> > +		delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> > +		delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
> >  
> >  		igt_debug("ts32 delta = %u, = %uns\n",
> >  			  delta_oa32, (unsigned)delta_oa32_ns);
> > @@ -3498,7 +3431,7 @@ hsw_test_single_ctx_counters(void)
> >  static void
> >  gen8_test_single_ctx_render_target_writes_a_counter(void)
> >  {
> > -	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
> > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> >  	uint64_t properties[] = {
> >  		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
> >  
> > @@ -3700,8 +3633,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> >  
> >  			/* sanity check that we can pass the delta to timebase_scale */
> >  			igt_assert(delta_ts64 < UINT32_MAX);
> > -			delta_oa32_ns = timebase_scale(delta_oa32);
> > -			delta_ts64_ns = timebase_scale(delta_ts64);
> > +			delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> > +			delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
> >  
> >  			igt_debug("oa32 delta = %u, = %uns\n",
> >  				  delta_oa32, (unsigned)delta_oa32_ns);
> > @@ -3783,7 +3716,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> >  				{
> >  					uint32_t time_delta = report[1] - report0_32[1];
> >  
> > -					if (timebase_scale(time_delta) > 1000000000) {
> > +					if (i915_perf_timebase_scale(intel_perf,
> > +								     time_delta) > 1000000000) {
> >  						skip_reason = "prior first mi-rpc";
> >  					}
> >  				}
> > @@ -3791,7 +3725,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> >  				{
> >  					uint32_t time_delta = report[1] - report1_32[1];
> >  
> > -					if (timebase_scale(time_delta) <= 1000000000) {
> > +					if (i915_perf_timebase_scale(intel_perf,
> > +								     time_delta) <= 1000000000) {
> >  						igt_debug("    comes after last MI_RPC (%u)\n",
> >  							  report1_32[1]);
> >  						report = report1_32;
> > @@ -4164,7 +4099,7 @@ static void gen12_single_ctx_helper(void)
> >  
> >  	/* Sanity check that we can pass the delta to timebase_scale */
> >  	igt_assert(delta_ts64 < UINT32_MAX);
> > -	delta_oa32_ns = timebase_scale(delta_oa32);
> > +	delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> >  	delta_ts64_ns = cs_timebase_scale(delta_ts64);
> >  
> >  	igt_debug("oa32 delta = %u, = %uns\n",
> 




^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public
  2023-02-07 19:33   ` Umesh Nerlige Ramappa
@ 2023-02-07 20:04     ` Janusz Krzysztofik
  2023-02-07 20:15       ` Dixit, Ashutosh
  2023-02-08 14:35     ` Kamil Konieczny
  1 sibling, 1 reply; 18+ messages in thread
From: Janusz Krzysztofik @ 2023-02-07 20:04 UTC (permalink / raw)
  To: Umesh Nerlige Ramappa; +Cc: igt-dev, Chris Wilson

Hi Umesh,

On Tuesday, 7 February 2023 20:33:50 CET Umesh Nerlige Ramappa wrote:
> On Tue, Feb 07, 2023 at 11:25:00AM -0800, Umesh Nerlige Ramappa wrote:
> >I wouldn't do this. Please keep the changes local to the specific test 
> >that you implemented in your first rev. While it is a good idea to 
> >have the some of the perf capabilities in the library, this is way too 
> >much churn to implement a specific test for the original failure. 
> >Unless multiple IGT subsytems area already dependent on perf APIs to 
> >implement multiple tests, let's not do this.
> >
> 
> Also note that the perf library implemented in IGT is not entirely used 
> by IGT tests alone. The library is also linked to GPUvis software. Only 
> a few pieces of reusable code in the perf library is used by IGT tests.

Do you think that my changes will break other users?  How?

Also, it looks like there are somehow conflicting expectations from different 
reviewers.  Ashutosh wanted the new subtest to be implemented outside of i915/
perf test.  That's why I proposed to extend the library with open/close and 
related helpers, just to avoid code duplication, and I'm about to resend it in 
series with the new subtest implemented inside gem_ctx_exec.  Now, after I 
submitted this patch for initial review, you say that a specific test is not 
the way to go.  What are you afraid of?

Whose expectations should I try to satisfy in order to have a subtest accepted 
and merged?  Or should I just give up and duplicate the code from i915/perf in 
another test?  Or maybe you can have a look at the whole series before you 
decide?

Thanks,
Janusz

> 
> >Thanks,
> >Umesh
> >
> >On Tue, Feb 07, 2023 at 11:11:21AM +0100, Janusz Krzysztofik wrote:
> >>We need new subtests that exercise interaction between i915 perf open/
> >>close and other i915 subsystems from the point of view of those other
> >>subsystems.  Allow other tests to reuse __perf_open/close() family of
> >>functions, now inside i915/perf test, by moving (sharable parts of)
> >>them to i915/perf library.
> >>
> >>Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
> >>---
> >>lib/i915/perf.c   | 130 ++++++++++++++++++++++++++++++++++++++++++++++
> >>lib/i915/perf.h   |  15 ++++++
> >>lib/meson.build   |   1 +
> >>tests/i915/perf.c | 121 ++++++++++--------------------------------
> >>4 files changed, 174 insertions(+), 93 deletions(-)
> >>
> >>diff --git a/lib/i915/perf.c b/lib/i915/perf.c
> >>index 6c7a192558..e71d637eb5 100644
> >>--- a/lib/i915/perf.c
> >>+++ b/lib/i915/perf.c
> >>@@ -39,7 +39,9 @@
> >>
> >>#include "i915_pciids.h"
> >>
> >>+#include "igt_aux.h"
> >>#include "intel_chipset.h"
> >>+#include "ioctl_wrappers.h"
> >>#include "perf.h"
> >>
> >>#include "i915_perf_metrics_hsw.h"
> >>@@ -1008,3 +1010,131 @@ const char *intel_perf_read_report_reason(const struct intel_perf *perf,
> >>
> >>	return "unknown";
> >>}
> >>+
> >>+uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta)
> >>+{
> >>+	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
> >>+}
> >>+
> >>+/* Returns: the largest OA exponent that will still result in a sampling period
> >>+ * less than or equal to the given @period.
> >>+ */
> >>+int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period)
> >>+{
> >>+	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
> >>+	 * would already represent a period of ~3 minutes so there's
> >>+	 * really no need to consider higher exponents.
> >>+	 */
> >>+	for (int i = 0; i < 30; i++) {
> >>+		uint64_t oa_period = i915_perf_timebase_scale(intel_perf, 2 << i);
> >>+
> >>+		if (oa_period > period)
> >>+			return max(0, i - 1);
> >>+	}
> >>+
> >>+	igt_assert(!"reached");
> >>+	return -1;
> >>+}
> >>+
> >>+struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid)
> >>+{
> >>+	struct intel_perf_metric_set *metric_set = NULL, *metric_set_iter;
> >>+	const char *metric_set_name = NULL;
> >>+
> >>+	igt_assert_neq(devid, 0);
> >>+
> >>+	/*
> >>+	 * We don't have a TestOa metric set for Haswell so use
> >>+	 * RenderBasic
> >>+	 */
> >>+	if (IS_HASWELL(devid))
> >>+		metric_set_name = "RenderBasic";
> >>+	else
> >>+		metric_set_name = "TestOa";
> >>+
> >>+	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
> >>+		if (strcmp(metric_set_iter->symbol_name, metric_set_name) == 0) {
> >>+			metric_set = metric_set_iter;
> >>+			break;
> >>+		}
> >>+	}
> >>+
> >>+	return metric_set;
> >>+}
> >>+
> >>+struct intel_perf *i915_perf_init_sys_info(int drm_fd)
> >>+{
> >>+	struct intel_perf *intel_perf;
> >>+
> >>+	intel_perf = intel_perf_for_fd(drm_fd);
> >>+	if (!intel_perf)
> >>+		return NULL;
> >>+
> >>+	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
> >>+	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
> >>+	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
> >>+	igt_debug("timestamp_frequency = %"PRIu64"\n",
> >>+		  intel_perf->devinfo.timestamp_frequency);
> >>+	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
> >>+
> >>+	intel_perf_load_perf_configs(intel_perf, drm_fd);
> >>+
> >>+	return intel_perf;
> >>+}
> >>+
> >>+int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd)
> >>+{
> >>+	int32_t pm_value = 0;
> >>+	int ret;
> >>+
> >>+	ret = perf_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, param);
> >>+
> >>+	igt_assert(ret >= 0);
> >>+	errno = 0;
> >>+
> >>+	if (pm_fd) {
> >>+		*pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
> >>+		igt_assert(*pm_fd >= 0);
> >>+
> >>+		igt_assert_eq(write(*pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
> >>+	}
> >>+
> >>+	return ret;
> >>+}
> >>+
> >>+int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd)
> >>+{
> >>+	struct intel_perf_metric_set *metric_set = i915_perf_default_set(intel_perf, devid);
> >>+	uint64_t oa_exp = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> >>+	uint64_t properties[] = {
> >>+		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> >>+		DRM_I915_PERF_PROP_OA_METRICS_SET, 0,
> >>+		DRM_I915_PERF_PROP_OA_FORMAT, 0,
> >>+		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp,
> >>+	};
> >>+	struct drm_i915_perf_open_param param = {
> >>+		.flags = I915_PERF_FLAG_FD_CLOEXEC,
> >>+		.num_properties = sizeof(properties) / 16,
> >>+		.properties_ptr = to_user_pointer(properties),
> >>+	};
> >>+
> >>+	igt_assert(metric_set);
> >>+	igt_assert(metric_set->perf_oa_metrics_set);
> >>+	igt_assert(oa_exp >= 0);
> >>+
> >>+	igt_debug("%s metric set UUID = %s\n",
> >>+		  metric_set->symbol_name,
> >>+		  metric_set->hw_config_guid);
> >>+
> >>+	properties[3] = metric_set->perf_oa_metrics_set;
> >>+	properties[5] = metric_set->perf_oa_format;
> >>+
> >>+	return i915_perf_open(drm_fd, &param, pm_fd);
> >>+}
> >>+
> >>+void i915_perf_close(int stream_fd, int pm_fd)
> >>+{
> >>+	close(stream_fd);
> >>+	if (pm_fd >= 0)
> >>+		close(pm_fd);
> >>+}
> >>diff --git a/lib/i915/perf.h b/lib/i915/perf.h
> >>index e6e60dc997..c9cd28be47 100644
> >>--- a/lib/i915/perf.h
> >>+++ b/lib/i915/perf.h
> >>@@ -351,6 +351,21 @@ uint64_t intel_perf_read_record_timestamp_raw(const struct intel_perf *perf,
> >>const char *intel_perf_read_report_reason(const struct intel_perf *perf,
> >>					  const struct drm_i915_perf_record_header *record);
> >>
> >>+uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta);
> >>+
> >>+int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period);
> >>+
> >>+struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid);
> >>+
> >>+struct intel_perf *i915_perf_init_sys_info(int drm_fd);
> >>+
> >>+struct drm_i915_perf_open_param;
> >>+int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd);
> >>+
> >>+int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd);
> >>+
> >>+void i915_perf_close(int drm_fd, int pm_fd);
> >>+
> >>#ifdef __cplusplus
> >>};
> >>#endif
> >>diff --git a/lib/meson.build b/lib/meson.build
> >>index d49b78ca1a..e79b31090b 100644
> >>--- a/lib/meson.build
> >>+++ b/lib/meson.build
> >>@@ -258,6 +258,7 @@ lib_igt_drm_fdinfo = declare_dependency(link_with : lib_igt_drm_fdinfo_build,
> >>				  include_directories : inc)
> >>i915_perf_files = [
> >>  'igt_list.c',
> >>+  'igt_tools_stub.c',
> >>  'i915/perf.c',
> >>  'i915/perf_data_reader.c',
> >>]
> >>diff --git a/tests/i915/perf.c b/tests/i915/perf.c
> >>index dd1f1ac399..a3f59d143b 100644
> >>--- a/tests/i915/perf.c
> >>+++ b/tests/i915/perf.c
> >>@@ -287,21 +287,16 @@ pretty_print_oa_period(uint64_t oa_period_ns)
> >>static void
> >>__perf_close(int fd)
> >>{
> >>-	close(fd);
> >>+	i915_perf_close(fd, pm_fd);
> >>	stream_fd = -1;
> >>
> >>-	if (pm_fd >= 0) {
> >>-		close(pm_fd);
> >>+	if (pm_fd >= 0)
> >>		pm_fd = -1;
> >>-	}
> >>}
> >>
> >>static int
> >>__perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
> >>{
> >>-	int ret;
> >>-	int32_t pm_value = 0;
> >>-
> >>	if (stream_fd >= 0)
> >>		__perf_close(stream_fd);
> >>	if (pm_fd >= 0) {
> >>@@ -309,19 +304,7 @@ __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
> >>		pm_fd = -1;
> >>	}
> >>
> >>-	ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param);
> >>-
> >>-	igt_assert(ret >= 0);
> >>-	errno = 0;
> >>-
> >>-	if (prevent_pm) {
> >>-		pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
> >>-		igt_assert(pm_fd >= 0);
> >>-
> >>-		igt_assert_eq(write(pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
> >>-	}
> >>-
> >>-	return ret;
> >>+	return i915_perf_open(fd, param, prevent_pm ? &pm_fd : NULL);
> >>}
> >>
> >>static int
> >>@@ -465,33 +448,6 @@ cs_timebase_scale(uint32_t u32_delta)
> >>	return ((uint64_t)u32_delta * NSEC_PER_SEC) / cs_timestamp_frequency(drm_fd);
> >>}
> >>
> >>-static uint64_t
> >>-timebase_scale(uint32_t u32_delta)
> >>-{
> >>-	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
> >>-}
> >>-
> >>-/* Returns: the largest OA exponent that will still result in a sampling period
> >>- * less than or equal to the given @period.
> >>- */
> >>-static int
> >>-max_oa_exponent_for_period_lte(uint64_t period)
> >>-{
> >>-	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
> >>-	 * would already represent a period of ~3 minutes so there's
> >>-	 * really no need to consider higher exponents.
> >>-	 */
> >>-	for (int i = 0; i < 30; i++) {
> >>-		uint64_t oa_period = timebase_scale(2 << i);
> >>-
> >>-		if (oa_period > period)
> >>-			return max(0, i - 1);
> >>-	}
> >>-
> >>-	igt_assert(!"reached");
> >>-	return -1;
> >>-}
> >>-
> >>/* Return: the largest OA exponent that will still result in a sampling
> >> * frequency greater than the given @frequency.
> >> */
> >>@@ -502,7 +458,7 @@ max_oa_exponent_for_freq_gt(uint64_t frequency)
> >>
> >>	igt_assert_neq(period, 0);
> >>
> >>-	return max_oa_exponent_for_period_lte(period - 1);
> >>+	return i915_perf_max_oa_exponent_for_period_lte(intel_perf, period - 1);
> >>}
> >>
> >>static uint64_t
> >>@@ -626,7 +582,7 @@ hsw_sanity_check_render_basic_reports(const uint32_t *oa_report0,
> >>				      const uint32_t *oa_report1,
> >>				      enum drm_i915_oa_format fmt)
> >>{
> >>-	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
> >>+	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
> >>	uint32_t clock_delta;
> >>	uint32_t max_delta;
> >>	struct oa_format format = get_oa_format(fmt);
> >>@@ -832,7 +788,7 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
> >>				  enum drm_i915_oa_format fmt)
> >>{
> >>	struct oa_format format = get_oa_format(fmt);
> >>-	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
> >>+	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
> >>	uint32_t ticks0 = read_report_ticks(oa_report0, fmt);
> >>	uint32_t ticks1 = read_report_ticks(oa_report1, fmt);
> >>	uint32_t clock_delta = ticks1 - ticks0;
> >>@@ -950,43 +906,22 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
> >>static bool
> >>init_sys_info(void)
> >>{
> >>-	const char *test_set_name = NULL;
> >>-	struct intel_perf_metric_set *metric_set_iter;
> >>-
> >>	igt_assert_neq(devid, 0);
> >>
> >>-	intel_perf = intel_perf_for_fd(drm_fd);
> >>+	intel_perf = i915_perf_init_sys_info(drm_fd);
> >>	igt_require(intel_perf);
> >>
> >>-	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
> >>-	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
> >>-	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
> >>-	igt_debug("timestamp_frequency = %"PRIu64"\n",
> >>-		  intel_perf->devinfo.timestamp_frequency);
> >>-	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
> >>-
> >>-	/* We don't have a TestOa metric set for Haswell so use
> >>-	 * RenderBasic
> >>-	 */
> >>	if (IS_HASWELL(devid)) {
> >>-		test_set_name = "RenderBasic";
> >>		read_report_ticks = hsw_read_report_ticks;
> >>		sanity_check_reports = hsw_sanity_check_render_basic_reports;
> >>		undefined_a_counters = hsw_undefined_a_counters;
> >>	} else {
> >>-		test_set_name = "TestOa";
> >>		read_report_ticks = gen8_read_report_ticks;
> >>		sanity_check_reports = gen8_sanity_check_test_oa_reports;
> >>		undefined_a_counters = gen8_undefined_a_counters;
> >>	}
> >>
> >>-	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
> >>-		if (strcmp(metric_set_iter->symbol_name, test_set_name) == 0) {
> >>-			test_set = metric_set_iter;
> >>-			break;
> >>-		}
> >>-	}
> >>-
> >>+	test_set = i915_perf_default_set(intel_perf, devid);
> >>	if (!test_set)
> >>		return false;
> >>
> >>@@ -994,14 +929,12 @@ init_sys_info(void)
> >>		  test_set->symbol_name,
> >>		  test_set->hw_config_guid);
> >>
> >>-	intel_perf_load_perf_configs(intel_perf, drm_fd);
> >>-
> >>	if (test_set->perf_oa_metrics_set == 0) {
> >>		igt_debug("Unable to load configurations\n");
> >>		return false;
> >>	}
> >>
> >>-	oa_exp_1_millisec = max_oa_exponent_for_period_lte(1000000);
> >>+	oa_exp_1_millisec = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> >>
> >>	return true;
> >>}
> >>@@ -1911,7 +1844,7 @@ test_low_oa_exponent_permissions(void)
> >>
> >>	igt_waitchildren();
> >>
> >>-	oa_period = timebase_scale(2 << ok_exponent);
> >>+	oa_period = i915_perf_timebase_scale(intel_perf, 2 << ok_exponent);
> >>	oa_freq = NSEC_PER_SEC / oa_period;
> >>	write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", oa_freq - 100);
> >>
> >>@@ -2003,7 +1936,7 @@ get_time(void)
> >>static void
> >>test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
> >>{
> >>-	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
> >>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
> >>	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> >>	uint64_t properties[] = {
> >>		/* Include OA reports in samples */
> >>@@ -2162,7 +2095,7 @@ test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ke
> >>static void
> >>test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
> >>{
> >>-	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
> >>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
> >>	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> >>	uint64_t properties[] = {
> >>		/* Include OA reports in samples */
> >>@@ -2358,7 +2291,7 @@ test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ker
> >>
> >>static void test_polling_small_buf(void)
> >>{
> >>-	int oa_exponent = max_oa_exponent_for_period_lte(40 * 1000); /* 40us */
> >>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 40 * 1000); /* 40us */
> >>	uint64_t properties[] = {
> >>		/* Include OA reports in samples */
> >>		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> >>@@ -2461,7 +2394,7 @@ num_valid_reports_captured(struct drm_i915_perf_open_param *param,
> >>static void
> >>gen12_test_oa_tlb_invalidate(void)
> >>{
> >>-	int oa_exponent = max_oa_exponent_for_period_lte(30000000);
> >>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 30000000);
> >>	uint64_t properties[] = {
> >>		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> >>
> >>@@ -2503,7 +2436,7 @@ static void
> >>test_buffer_fill(void)
> >>{
> >>	/* ~5 micro second period */
> >>-	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> >>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> >>	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> >>	uint64_t properties[] = {
> >>		/* Include OA reports in samples */
> >>@@ -2651,7 +2584,7 @@ static void
> >>test_non_zero_reason(void)
> >>{
> >>	/* ~20 micro second period */
> >>-	int oa_exponent = max_oa_exponent_for_period_lte(20000);
> >>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 20000);
> >>	uint64_t properties[] = {
> >>		/* Include OA reports in samples */
> >>		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> >>@@ -2734,7 +2667,7 @@ static void
> >>test_enable_disable(void)
> >>{
> >>	/* ~5 micro second period */
> >>-	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> >>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> >>	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> >>	uint64_t properties[] = {
> >>		/* Include OA reports in samples */
> >>@@ -2885,7 +2818,7 @@ test_enable_disable(void)
> >>static void
> >>test_short_reads(void)
> >>{
> >>-	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> >>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> >>	uint64_t properties[] = {
> >>		/* Include OA reports in samples */
> >>		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> >>@@ -3447,8 +3380,8 @@ hsw_test_single_ctx_counters(void)
> >>
> >>		/* sanity check that we can pass the delta to timebase_scale */
> >>		igt_assert(delta_ts64 < UINT32_MAX);
> >>-		delta_oa32_ns = timebase_scale(delta_oa32);
> >>-		delta_ts64_ns = timebase_scale(delta_ts64);
> >>+		delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> >>+		delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
> >>
> >>		igt_debug("ts32 delta = %u, = %uns\n",
> >>			  delta_oa32, (unsigned)delta_oa32_ns);
> >>@@ -3498,7 +3431,7 @@ hsw_test_single_ctx_counters(void)
> >>static void
> >>gen8_test_single_ctx_render_target_writes_a_counter(void)
> >>{
> >>-	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
> >>+	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> >>	uint64_t properties[] = {
> >>		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
> >>
> >>@@ -3700,8 +3633,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> >>
> >>			/* sanity check that we can pass the delta to timebase_scale */
> >>			igt_assert(delta_ts64 < UINT32_MAX);
> >>-			delta_oa32_ns = timebase_scale(delta_oa32);
> >>-			delta_ts64_ns = timebase_scale(delta_ts64);
> >>+			delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> >>+			delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
> >>
> >>			igt_debug("oa32 delta = %u, = %uns\n",
> >>				  delta_oa32, (unsigned)delta_oa32_ns);
> >>@@ -3783,7 +3716,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> >>				{
> >>					uint32_t time_delta = report[1] - report0_32[1];
> >>
> >>-					if (timebase_scale(time_delta) > 1000000000) {
> >>+					if (i915_perf_timebase_scale(intel_perf,
> >>+								     time_delta) > 1000000000) {
> >>						skip_reason = "prior first mi-rpc";
> >>					}
> >>				}
> >>@@ -3791,7 +3725,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> >>				{
> >>					uint32_t time_delta = report[1] - report1_32[1];
> >>
> >>-					if (timebase_scale(time_delta) <= 1000000000) {
> >>+					if (i915_perf_timebase_scale(intel_perf,
> >>+								     time_delta) <= 1000000000) {
> >>						igt_debug("    comes after last MI_RPC (%u)\n",
> >>							  report1_32[1]);
> >>						report = report1_32;
> >>@@ -4164,7 +4099,7 @@ static void gen12_single_ctx_helper(void)
> >>
> >>	/* Sanity check that we can pass the delta to timebase_scale */
> >>	igt_assert(delta_ts64 < UINT32_MAX);
> >>-	delta_oa32_ns = timebase_scale(delta_oa32);
> >>+	delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> >>	delta_ts64_ns = cs_timebase_scale(delta_ts64);
> >>
> >>	igt_debug("oa32 delta = %u, = %uns\n",
> 




^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public
  2023-02-07 20:04     ` Janusz Krzysztofik
@ 2023-02-07 20:15       ` Dixit, Ashutosh
  2023-02-08 10:09         ` Janusz Krzysztofik
  0 siblings, 1 reply; 18+ messages in thread
From: Dixit, Ashutosh @ 2023-02-07 20:15 UTC (permalink / raw)
  To: Janusz Krzysztofik; +Cc: igt-dev, Chris Wilson

On Tue, 07 Feb 2023 12:04:17 -0800, Janusz Krzysztofik wrote:
>
> Hi Umesh,
>
> On Tuesday, 7 February 2023 20:33:50 CET Umesh Nerlige Ramappa wrote:
> > On Tue, Feb 07, 2023 at 11:25:00AM -0800, Umesh Nerlige Ramappa wrote:
> > >I wouldn't do this. Please keep the changes local to the specific test
> > >that you implemented in your first rev. While it is a good idea to
> > >have the some of the perf capabilities in the library, this is way too
> > >much churn to implement a specific test for the original failure.
> > >Unless multiple IGT subsytems area already dependent on perf APIs to
> > >implement multiple tests, let's not do this.
> > >
> >
> > Also note that the perf library implemented in IGT is not entirely used
> > by IGT tests alone. The library is also linked to GPUvis software. Only
> > a few pieces of reusable code in the perf library is used by IGT tests.
>
> Do you think that my changes will break other users?  How?
>
> Also, it looks like there are somehow conflicting expectations from different
> reviewers.  Ashutosh wanted the new subtest to be implemented outside of i915/
> perf test.  That's why I proposed to extend the library with open/close and
> related helpers, just to avoid code duplication, and I'm about to resend it in
> series with the new subtest implemented inside gem_ctx_exec.  Now, after I
> submitted this patch for initial review, you say that a specific test is not
> the way to go.  What are you afraid of?
>
> Whose expectations should I try to satisfy in order to have a subtest accepted
> and merged?  Or should I just give up and duplicate the code from i915/perf in
> another test?  Or maybe you can have a look at the whole series before you
> decide?

Hi Janusz,

I agree with Umesh. Given that here perf is just being used as a 'dummy
workload' let's just duplicate the minimal code required for perf
open/close wherever we are adding the new test. This will keep the real
perf functionality undisturbed for reasons Umesh cited.

Thanks.
--
Ashutosh

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public
  2023-02-07 20:15       ` Dixit, Ashutosh
@ 2023-02-08 10:09         ` Janusz Krzysztofik
  2023-02-08 19:34           ` Umesh Nerlige Ramappa
  0 siblings, 1 reply; 18+ messages in thread
From: Janusz Krzysztofik @ 2023-02-08 10:09 UTC (permalink / raw)
  To: Dixit, Ashutosh; +Cc: igt-dev, Chris Wilson

On Tuesday, 7 February 2023 21:15:26 CET Dixit, Ashutosh wrote:
> On Tue, 07 Feb 2023 12:04:17 -0800, Janusz Krzysztofik wrote:
> >
> > Hi Umesh,
> >
> > On Tuesday, 7 February 2023 20:33:50 CET Umesh Nerlige Ramappa wrote:
> > > On Tue, Feb 07, 2023 at 11:25:00AM -0800, Umesh Nerlige Ramappa wrote:
> > > >I wouldn't do this. Please keep the changes local to the specific test
> > > >that you implemented in your first rev. While it is a good idea to
> > > >have the some of the perf capabilities in the library, this is way too
> > > >much churn to implement a specific test for the original failure.
> > > >Unless multiple IGT subsytems area already dependent on perf APIs to
> > > >implement multiple tests, let's not do this.
> > > >
> > >
> > > Also note that the perf library implemented in IGT is not entirely used
> > > by IGT tests alone. The library is also linked to GPUvis software. Only
> > > a few pieces of reusable code in the perf library is used by IGT tests.
> >
> > Do you think that my changes will break other users?  How?
> >
> > Also, it looks like there are somehow conflicting expectations from different
> > reviewers.  Ashutosh wanted the new subtest to be implemented outside of i915/
> > perf test.  That's why I proposed to extend the library with open/close and
> > related helpers, just to avoid code duplication, and I'm about to resend it in
> > series with the new subtest implemented inside gem_ctx_exec.  Now, after I
> > submitted this patch for initial review, you say that a specific test is not
> > the way to go.  What are you afraid of?
> >
> > Whose expectations should I try to satisfy in order to have a subtest accepted
> > and merged?  Or should I just give up and duplicate the code from i915/perf in
> > another test?  Or maybe you can have a look at the whole series before you
> > decide?
> 
> Hi Janusz,
> 
> I agree with Umesh. Given that here perf is just being used as a 'dummy
> workload' let's just duplicate the minimal code required for perf
> open/close wherever we are adding the new test. This will keep the real
> perf functionality undisturbed for reasons Umesh cited.

TBH, I can't see any good justification among those reasons mentioned: "too 
much churn", "unless ... already dependent", "not entirely used by IGT tests", 
"linked to GPUvis software", "only a few pieces of reusable code ... used by 
IGT" -- which of those justifies duplication of i915 perf code in IGT tests?  
Again, do you think that my changes can break other (non-IGT) users?  How?

Anyway, assuming you are the "owner" of lib/i915/perf.c, in order to satisfy 
your (still not clear for me) requirements I'm already working on a new  
version of my patch, with the i915 perf code duplicated as needed.

Thanks,
Janusz

> 
> Thanks.
> --
> Ashutosh
> 




^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public
  2023-02-07 19:33   ` Umesh Nerlige Ramappa
  2023-02-07 20:04     ` Janusz Krzysztofik
@ 2023-02-08 14:35     ` Kamil Konieczny
  2023-02-08 17:53       ` Dixit, Ashutosh
  1 sibling, 1 reply; 18+ messages in thread
From: Kamil Konieczny @ 2023-02-08 14:35 UTC (permalink / raw)
  To: igt-dev; +Cc: Chris Wilson

Hi Umesh,

On 2023-02-07 at 11:33:50 -0800, Umesh Nerlige Ramappa wrote:
> On Tue, Feb 07, 2023 at 11:25:00AM -0800, Umesh Nerlige Ramappa wrote:
> > I wouldn't do this. Please keep the changes local to the specific test
> > that you implemented in your first rev. While it is a good idea to have
> > the some of the perf capabilities in the library, this is way too much
> > churn to implement a specific test for the original failure. Unless
> > multiple IGT subsytems area already dependent on perf APIs to implement
> > multiple tests, let's not do this.
> > 
> 
> Also note that the perf library implemented in IGT is not entirely used by
> IGT tests alone. The library is also linked to GPUvis software. Only a few
> pieces of reusable code in the perf library is used by IGT tests.

May you give http(s) link(s) to this software ?

I checked https://github.com/mikesart/gpuvis
and there is no note about intel igt dependancy.

imho we can have separate i915_perf lib with functions needed by
new test but if you are concerned about it we can start with code
duplication and refactor later.

Regards,
Kamil

> 
> > Thanks,
> > Umesh
> > 
> > On Tue, Feb 07, 2023 at 11:11:21AM +0100, Janusz Krzysztofik wrote:
> > > We need new subtests that exercise interaction between i915 perf open/
> > > close and other i915 subsystems from the point of view of those other
> > > subsystems.  Allow other tests to reuse __perf_open/close() family of
> > > functions, now inside i915/perf test, by moving (sharable parts of)
> > > them to i915/perf library.
> > > 
> > > Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
> > > ---
> > > lib/i915/perf.c   | 130 ++++++++++++++++++++++++++++++++++++++++++++++
> > > lib/i915/perf.h   |  15 ++++++
> > > lib/meson.build   |   1 +
> > > tests/i915/perf.c | 121 ++++++++++--------------------------------
> > > 4 files changed, 174 insertions(+), 93 deletions(-)
> > > 
> > > diff --git a/lib/i915/perf.c b/lib/i915/perf.c
> > > index 6c7a192558..e71d637eb5 100644
> > > --- a/lib/i915/perf.c
> > > +++ b/lib/i915/perf.c
> > > @@ -39,7 +39,9 @@
> > > 
> > > #include "i915_pciids.h"
> > > 
> > > +#include "igt_aux.h"
> > > #include "intel_chipset.h"
> > > +#include "ioctl_wrappers.h"
> > > #include "perf.h"
> > > 
> > > #include "i915_perf_metrics_hsw.h"
> > > @@ -1008,3 +1010,131 @@ const char *intel_perf_read_report_reason(const struct intel_perf *perf,
> > > 
> > > 	return "unknown";
> > > }
> > > +
> > > +uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta)
> > > +{
> > > +	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
> > > +}
> > > +
> > > +/* Returns: the largest OA exponent that will still result in a sampling period
> > > + * less than or equal to the given @period.
> > > + */
> > > +int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period)
> > > +{
> > > +	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
> > > +	 * would already represent a period of ~3 minutes so there's
> > > +	 * really no need to consider higher exponents.
> > > +	 */
> > > +	for (int i = 0; i < 30; i++) {
> > > +		uint64_t oa_period = i915_perf_timebase_scale(intel_perf, 2 << i);
> > > +
> > > +		if (oa_period > period)
> > > +			return max(0, i - 1);
> > > +	}
> > > +
> > > +	igt_assert(!"reached");
> > > +	return -1;
> > > +}
> > > +
> > > +struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid)
> > > +{
> > > +	struct intel_perf_metric_set *metric_set = NULL, *metric_set_iter;
> > > +	const char *metric_set_name = NULL;
> > > +
> > > +	igt_assert_neq(devid, 0);
> > > +
> > > +	/*
> > > +	 * We don't have a TestOa metric set for Haswell so use
> > > +	 * RenderBasic
> > > +	 */
> > > +	if (IS_HASWELL(devid))
> > > +		metric_set_name = "RenderBasic";
> > > +	else
> > > +		metric_set_name = "TestOa";
> > > +
> > > +	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
> > > +		if (strcmp(metric_set_iter->symbol_name, metric_set_name) == 0) {
> > > +			metric_set = metric_set_iter;
> > > +			break;
> > > +		}
> > > +	}
> > > +
> > > +	return metric_set;
> > > +}
> > > +
> > > +struct intel_perf *i915_perf_init_sys_info(int drm_fd)
> > > +{
> > > +	struct intel_perf *intel_perf;
> > > +
> > > +	intel_perf = intel_perf_for_fd(drm_fd);
> > > +	if (!intel_perf)
> > > +		return NULL;
> > > +
> > > +	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
> > > +	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
> > > +	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
> > > +	igt_debug("timestamp_frequency = %"PRIu64"\n",
> > > +		  intel_perf->devinfo.timestamp_frequency);
> > > +	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
> > > +
> > > +	intel_perf_load_perf_configs(intel_perf, drm_fd);
> > > +
> > > +	return intel_perf;
> > > +}
> > > +
> > > +int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd)
> > > +{
> > > +	int32_t pm_value = 0;
> > > +	int ret;
> > > +
> > > +	ret = perf_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, param);
> > > +
> > > +	igt_assert(ret >= 0);
> > > +	errno = 0;
> > > +
> > > +	if (pm_fd) {
> > > +		*pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
> > > +		igt_assert(*pm_fd >= 0);
> > > +
> > > +		igt_assert_eq(write(*pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
> > > +	}
> > > +
> > > +	return ret;
> > > +}
> > > +
> > > +int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd)
> > > +{
> > > +	struct intel_perf_metric_set *metric_set = i915_perf_default_set(intel_perf, devid);
> > > +	uint64_t oa_exp = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> > > +	uint64_t properties[] = {
> > > +		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > +		DRM_I915_PERF_PROP_OA_METRICS_SET, 0,
> > > +		DRM_I915_PERF_PROP_OA_FORMAT, 0,
> > > +		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp,
> > > +	};
> > > +	struct drm_i915_perf_open_param param = {
> > > +		.flags = I915_PERF_FLAG_FD_CLOEXEC,
> > > +		.num_properties = sizeof(properties) / 16,
> > > +		.properties_ptr = to_user_pointer(properties),
> > > +	};
> > > +
> > > +	igt_assert(metric_set);
> > > +	igt_assert(metric_set->perf_oa_metrics_set);
> > > +	igt_assert(oa_exp >= 0);
> > > +
> > > +	igt_debug("%s metric set UUID = %s\n",
> > > +		  metric_set->symbol_name,
> > > +		  metric_set->hw_config_guid);
> > > +
> > > +	properties[3] = metric_set->perf_oa_metrics_set;
> > > +	properties[5] = metric_set->perf_oa_format;
> > > +
> > > +	return i915_perf_open(drm_fd, &param, pm_fd);
> > > +}
> > > +
> > > +void i915_perf_close(int stream_fd, int pm_fd)
> > > +{
> > > +	close(stream_fd);
> > > +	if (pm_fd >= 0)
> > > +		close(pm_fd);
> > > +}
> > > diff --git a/lib/i915/perf.h b/lib/i915/perf.h
> > > index e6e60dc997..c9cd28be47 100644
> > > --- a/lib/i915/perf.h
> > > +++ b/lib/i915/perf.h
> > > @@ -351,6 +351,21 @@ uint64_t intel_perf_read_record_timestamp_raw(const struct intel_perf *perf,
> > > const char *intel_perf_read_report_reason(const struct intel_perf *perf,
> > > 					  const struct drm_i915_perf_record_header *record);
> > > 
> > > +uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta);
> > > +
> > > +int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period);
> > > +
> > > +struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid);
> > > +
> > > +struct intel_perf *i915_perf_init_sys_info(int drm_fd);
> > > +
> > > +struct drm_i915_perf_open_param;
> > > +int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd);
> > > +
> > > +int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd);
> > > +
> > > +void i915_perf_close(int drm_fd, int pm_fd);
> > > +
> > > #ifdef __cplusplus
> > > };
> > > #endif
> > > diff --git a/lib/meson.build b/lib/meson.build
> > > index d49b78ca1a..e79b31090b 100644
> > > --- a/lib/meson.build
> > > +++ b/lib/meson.build
> > > @@ -258,6 +258,7 @@ lib_igt_drm_fdinfo = declare_dependency(link_with : lib_igt_drm_fdinfo_build,
> > > 				  include_directories : inc)
> > > i915_perf_files = [
> > >  'igt_list.c',
> > > +  'igt_tools_stub.c',
> > >  'i915/perf.c',
> > >  'i915/perf_data_reader.c',
> > > ]
> > > diff --git a/tests/i915/perf.c b/tests/i915/perf.c
> > > index dd1f1ac399..a3f59d143b 100644
> > > --- a/tests/i915/perf.c
> > > +++ b/tests/i915/perf.c
> > > @@ -287,21 +287,16 @@ pretty_print_oa_period(uint64_t oa_period_ns)
> > > static void
> > > __perf_close(int fd)
> > > {
> > > -	close(fd);
> > > +	i915_perf_close(fd, pm_fd);
> > > 	stream_fd = -1;
> > > 
> > > -	if (pm_fd >= 0) {
> > > -		close(pm_fd);
> > > +	if (pm_fd >= 0)
> > > 		pm_fd = -1;
> > > -	}
> > > }
> > > 
> > > static int
> > > __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
> > > {
> > > -	int ret;
> > > -	int32_t pm_value = 0;
> > > -
> > > 	if (stream_fd >= 0)
> > > 		__perf_close(stream_fd);
> > > 	if (pm_fd >= 0) {
> > > @@ -309,19 +304,7 @@ __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
> > > 		pm_fd = -1;
> > > 	}
> > > 
> > > -	ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param);
> > > -
> > > -	igt_assert(ret >= 0);
> > > -	errno = 0;
> > > -
> > > -	if (prevent_pm) {
> > > -		pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
> > > -		igt_assert(pm_fd >= 0);
> > > -
> > > -		igt_assert_eq(write(pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
> > > -	}
> > > -
> > > -	return ret;
> > > +	return i915_perf_open(fd, param, prevent_pm ? &pm_fd : NULL);
> > > }
> > > 
> > > static int
> > > @@ -465,33 +448,6 @@ cs_timebase_scale(uint32_t u32_delta)
> > > 	return ((uint64_t)u32_delta * NSEC_PER_SEC) / cs_timestamp_frequency(drm_fd);
> > > }
> > > 
> > > -static uint64_t
> > > -timebase_scale(uint32_t u32_delta)
> > > -{
> > > -	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
> > > -}
> > > -
> > > -/* Returns: the largest OA exponent that will still result in a sampling period
> > > - * less than or equal to the given @period.
> > > - */
> > > -static int
> > > -max_oa_exponent_for_period_lte(uint64_t period)
> > > -{
> > > -	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
> > > -	 * would already represent a period of ~3 minutes so there's
> > > -	 * really no need to consider higher exponents.
> > > -	 */
> > > -	for (int i = 0; i < 30; i++) {
> > > -		uint64_t oa_period = timebase_scale(2 << i);
> > > -
> > > -		if (oa_period > period)
> > > -			return max(0, i - 1);
> > > -	}
> > > -
> > > -	igt_assert(!"reached");
> > > -	return -1;
> > > -}
> > > -
> > > /* Return: the largest OA exponent that will still result in a sampling
> > > * frequency greater than the given @frequency.
> > > */
> > > @@ -502,7 +458,7 @@ max_oa_exponent_for_freq_gt(uint64_t frequency)
> > > 
> > > 	igt_assert_neq(period, 0);
> > > 
> > > -	return max_oa_exponent_for_period_lte(period - 1);
> > > +	return i915_perf_max_oa_exponent_for_period_lte(intel_perf, period - 1);
> > > }
> > > 
> > > static uint64_t
> > > @@ -626,7 +582,7 @@ hsw_sanity_check_render_basic_reports(const uint32_t *oa_report0,
> > > 				      const uint32_t *oa_report1,
> > > 				      enum drm_i915_oa_format fmt)
> > > {
> > > -	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
> > > +	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
> > > 	uint32_t clock_delta;
> > > 	uint32_t max_delta;
> > > 	struct oa_format format = get_oa_format(fmt);
> > > @@ -832,7 +788,7 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
> > > 				  enum drm_i915_oa_format fmt)
> > > {
> > > 	struct oa_format format = get_oa_format(fmt);
> > > -	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
> > > +	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
> > > 	uint32_t ticks0 = read_report_ticks(oa_report0, fmt);
> > > 	uint32_t ticks1 = read_report_ticks(oa_report1, fmt);
> > > 	uint32_t clock_delta = ticks1 - ticks0;
> > > @@ -950,43 +906,22 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
> > > static bool
> > > init_sys_info(void)
> > > {
> > > -	const char *test_set_name = NULL;
> > > -	struct intel_perf_metric_set *metric_set_iter;
> > > -
> > > 	igt_assert_neq(devid, 0);
> > > 
> > > -	intel_perf = intel_perf_for_fd(drm_fd);
> > > +	intel_perf = i915_perf_init_sys_info(drm_fd);
> > > 	igt_require(intel_perf);
> > > 
> > > -	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
> > > -	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
> > > -	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
> > > -	igt_debug("timestamp_frequency = %"PRIu64"\n",
> > > -		  intel_perf->devinfo.timestamp_frequency);
> > > -	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
> > > -
> > > -	/* We don't have a TestOa metric set for Haswell so use
> > > -	 * RenderBasic
> > > -	 */
> > > 	if (IS_HASWELL(devid)) {
> > > -		test_set_name = "RenderBasic";
> > > 		read_report_ticks = hsw_read_report_ticks;
> > > 		sanity_check_reports = hsw_sanity_check_render_basic_reports;
> > > 		undefined_a_counters = hsw_undefined_a_counters;
> > > 	} else {
> > > -		test_set_name = "TestOa";
> > > 		read_report_ticks = gen8_read_report_ticks;
> > > 		sanity_check_reports = gen8_sanity_check_test_oa_reports;
> > > 		undefined_a_counters = gen8_undefined_a_counters;
> > > 	}
> > > 
> > > -	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
> > > -		if (strcmp(metric_set_iter->symbol_name, test_set_name) == 0) {
> > > -			test_set = metric_set_iter;
> > > -			break;
> > > -		}
> > > -	}
> > > -
> > > +	test_set = i915_perf_default_set(intel_perf, devid);
> > > 	if (!test_set)
> > > 		return false;
> > > 
> > > @@ -994,14 +929,12 @@ init_sys_info(void)
> > > 		  test_set->symbol_name,
> > > 		  test_set->hw_config_guid);
> > > 
> > > -	intel_perf_load_perf_configs(intel_perf, drm_fd);
> > > -
> > > 	if (test_set->perf_oa_metrics_set == 0) {
> > > 		igt_debug("Unable to load configurations\n");
> > > 		return false;
> > > 	}
> > > 
> > > -	oa_exp_1_millisec = max_oa_exponent_for_period_lte(1000000);
> > > +	oa_exp_1_millisec = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> > > 
> > > 	return true;
> > > }
> > > @@ -1911,7 +1844,7 @@ test_low_oa_exponent_permissions(void)
> > > 
> > > 	igt_waitchildren();
> > > 
> > > -	oa_period = timebase_scale(2 << ok_exponent);
> > > +	oa_period = i915_perf_timebase_scale(intel_perf, 2 << ok_exponent);
> > > 	oa_freq = NSEC_PER_SEC / oa_period;
> > > 	write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", oa_freq - 100);
> > > 
> > > @@ -2003,7 +1936,7 @@ get_time(void)
> > > static void
> > > test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
> > > {
> > > -	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
> > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
> > > 	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> > > 	uint64_t properties[] = {
> > > 		/* Include OA reports in samples */
> > > @@ -2162,7 +2095,7 @@ test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ke
> > > static void
> > > test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
> > > {
> > > -	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
> > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
> > > 	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> > > 	uint64_t properties[] = {
> > > 		/* Include OA reports in samples */
> > > @@ -2358,7 +2291,7 @@ test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ker
> > > 
> > > static void test_polling_small_buf(void)
> > > {
> > > -	int oa_exponent = max_oa_exponent_for_period_lte(40 * 1000); /* 40us */
> > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 40 * 1000); /* 40us */
> > > 	uint64_t properties[] = {
> > > 		/* Include OA reports in samples */
> > > 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > @@ -2461,7 +2394,7 @@ num_valid_reports_captured(struct drm_i915_perf_open_param *param,
> > > static void
> > > gen12_test_oa_tlb_invalidate(void)
> > > {
> > > -	int oa_exponent = max_oa_exponent_for_period_lte(30000000);
> > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 30000000);
> > > 	uint64_t properties[] = {
> > > 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > 
> > > @@ -2503,7 +2436,7 @@ static void
> > > test_buffer_fill(void)
> > > {
> > > 	/* ~5 micro second period */
> > > -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> > > 	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> > > 	uint64_t properties[] = {
> > > 		/* Include OA reports in samples */
> > > @@ -2651,7 +2584,7 @@ static void
> > > test_non_zero_reason(void)
> > > {
> > > 	/* ~20 micro second period */
> > > -	int oa_exponent = max_oa_exponent_for_period_lte(20000);
> > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 20000);
> > > 	uint64_t properties[] = {
> > > 		/* Include OA reports in samples */
> > > 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > @@ -2734,7 +2667,7 @@ static void
> > > test_enable_disable(void)
> > > {
> > > 	/* ~5 micro second period */
> > > -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> > > 	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> > > 	uint64_t properties[] = {
> > > 		/* Include OA reports in samples */
> > > @@ -2885,7 +2818,7 @@ test_enable_disable(void)
> > > static void
> > > test_short_reads(void)
> > > {
> > > -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> > > 	uint64_t properties[] = {
> > > 		/* Include OA reports in samples */
> > > 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > @@ -3447,8 +3380,8 @@ hsw_test_single_ctx_counters(void)
> > > 
> > > 		/* sanity check that we can pass the delta to timebase_scale */
> > > 		igt_assert(delta_ts64 < UINT32_MAX);
> > > -		delta_oa32_ns = timebase_scale(delta_oa32);
> > > -		delta_ts64_ns = timebase_scale(delta_ts64);
> > > +		delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> > > +		delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
> > > 
> > > 		igt_debug("ts32 delta = %u, = %uns\n",
> > > 			  delta_oa32, (unsigned)delta_oa32_ns);
> > > @@ -3498,7 +3431,7 @@ hsw_test_single_ctx_counters(void)
> > > static void
> > > gen8_test_single_ctx_render_target_writes_a_counter(void)
> > > {
> > > -	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
> > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> > > 	uint64_t properties[] = {
> > > 		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
> > > 
> > > @@ -3700,8 +3633,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> > > 
> > > 			/* sanity check that we can pass the delta to timebase_scale */
> > > 			igt_assert(delta_ts64 < UINT32_MAX);
> > > -			delta_oa32_ns = timebase_scale(delta_oa32);
> > > -			delta_ts64_ns = timebase_scale(delta_ts64);
> > > +			delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> > > +			delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
> > > 
> > > 			igt_debug("oa32 delta = %u, = %uns\n",
> > > 				  delta_oa32, (unsigned)delta_oa32_ns);
> > > @@ -3783,7 +3716,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> > > 				{
> > > 					uint32_t time_delta = report[1] - report0_32[1];
> > > 
> > > -					if (timebase_scale(time_delta) > 1000000000) {
> > > +					if (i915_perf_timebase_scale(intel_perf,
> > > +								     time_delta) > 1000000000) {
> > > 						skip_reason = "prior first mi-rpc";
> > > 					}
> > > 				}
> > > @@ -3791,7 +3725,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> > > 				{
> > > 					uint32_t time_delta = report[1] - report1_32[1];
> > > 
> > > -					if (timebase_scale(time_delta) <= 1000000000) {
> > > +					if (i915_perf_timebase_scale(intel_perf,
> > > +								     time_delta) <= 1000000000) {
> > > 						igt_debug("    comes after last MI_RPC (%u)\n",
> > > 							  report1_32[1]);
> > > 						report = report1_32;
> > > @@ -4164,7 +4099,7 @@ static void gen12_single_ctx_helper(void)
> > > 
> > > 	/* Sanity check that we can pass the delta to timebase_scale */
> > > 	igt_assert(delta_ts64 < UINT32_MAX);
> > > -	delta_oa32_ns = timebase_scale(delta_oa32);
> > > +	delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> > > 	delta_ts64_ns = cs_timebase_scale(delta_ts64);
> > > 
> > > 	igt_debug("oa32 delta = %u, = %uns\n",
> > > -- 
> > > 2.25.1
> > > 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public
  2023-02-08 14:35     ` Kamil Konieczny
@ 2023-02-08 17:53       ` Dixit, Ashutosh
  2023-02-08 18:23         ` Janusz Krzysztofik
  0 siblings, 1 reply; 18+ messages in thread
From: Dixit, Ashutosh @ 2023-02-08 17:53 UTC (permalink / raw)
  To: Kamil Konieczny, igt-dev, Janusz Krzysztofik,
	Umesh Nerlige Ramappa, Chris Wilson, Ashutosh Dixit,
	Zbigniew Kempczyński

On Wed, 08 Feb 2023 06:35:47 -0800, Kamil Konieczny wrote:
>
> Hi Umesh,
>
> On 2023-02-07 at 11:33:50 -0800, Umesh Nerlige Ramappa wrote:
> > On Tue, Feb 07, 2023 at 11:25:00AM -0800, Umesh Nerlige Ramappa wrote:
> > > I wouldn't do this. Please keep the changes local to the specific test
> > > that you implemented in your first rev. While it is a good idea to have
> > > the some of the perf capabilities in the library, this is way too much
> > > churn to implement a specific test for the original failure. Unless
> > > multiple IGT subsytems area already dependent on perf APIs to implement
> > > multiple tests, let's not do this.
> > >
> >
> > Also note that the perf library implemented in IGT is not entirely used by
> > IGT tests alone. The library is also linked to GPUvis software. Only a few
> > pieces of reusable code in the perf library is used by IGT tests.
>
> May you give http(s) link(s) to this software ?
>
> I checked https://github.com/mikesart/gpuvis
> and there is no note about intel igt dependancy.

Hi Kamil,

The connection between IGT and gpuvis is via this:

tools/i915-perf/i915_perf_recorder.c

So the recorder records the metrics/counters in a file and these are then
fed to gpuvis.
>
> imho we can have separate i915_perf lib with functions needed by
> new test but if you are concerned about it we can start with code
> duplication and refactor later.

Thanks Kamil and Janusz!

>
> Regards,
> Kamil
>
> >
> > > Thanks,
> > > Umesh
> > >
> > > On Tue, Feb 07, 2023 at 11:11:21AM +0100, Janusz Krzysztofik wrote:
> > > > We need new subtests that exercise interaction between i915 perf open/
> > > > close and other i915 subsystems from the point of view of those other
> > > > subsystems.  Allow other tests to reuse __perf_open/close() family of
> > > > functions, now inside i915/perf test, by moving (sharable parts of)
> > > > them to i915/perf library.
> > > >
> > > > Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
> > > > ---
> > > > lib/i915/perf.c   | 130 ++++++++++++++++++++++++++++++++++++++++++++++
> > > > lib/i915/perf.h   |  15 ++++++
> > > > lib/meson.build   |   1 +
> > > > tests/i915/perf.c | 121 ++++++++++--------------------------------
> > > > 4 files changed, 174 insertions(+), 93 deletions(-)
> > > >
> > > > diff --git a/lib/i915/perf.c b/lib/i915/perf.c
> > > > index 6c7a192558..e71d637eb5 100644
> > > > --- a/lib/i915/perf.c
> > > > +++ b/lib/i915/perf.c
> > > > @@ -39,7 +39,9 @@
> > > >
> > > > #include "i915_pciids.h"
> > > >
> > > > +#include "igt_aux.h"
> > > > #include "intel_chipset.h"
> > > > +#include "ioctl_wrappers.h"
> > > > #include "perf.h"
> > > >
> > > > #include "i915_perf_metrics_hsw.h"
> > > > @@ -1008,3 +1010,131 @@ const char *intel_perf_read_report_reason(const struct intel_perf *perf,
> > > >
> > > >		return "unknown";
> > > > }
> > > > +
> > > > +uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta)
> > > > +{
> > > > +	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
> > > > +}
> > > > +
> > > > +/* Returns: the largest OA exponent that will still result in a sampling period
> > > > + * less than or equal to the given @period.
> > > > + */
> > > > +int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period)
> > > > +{
> > > > +	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
> > > > +	 * would already represent a period of ~3 minutes so there's
> > > > +	 * really no need to consider higher exponents.
> > > > +	 */
> > > > +	for (int i = 0; i < 30; i++) {
> > > > +		uint64_t oa_period = i915_perf_timebase_scale(intel_perf, 2 << i);
> > > > +
> > > > +		if (oa_period > period)
> > > > +			return max(0, i - 1);
> > > > +	}
> > > > +
> > > > +	igt_assert(!"reached");
> > > > +	return -1;
> > > > +}
> > > > +
> > > > +struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid)
> > > > +{
> > > > +	struct intel_perf_metric_set *metric_set = NULL, *metric_set_iter;
> > > > +	const char *metric_set_name = NULL;
> > > > +
> > > > +	igt_assert_neq(devid, 0);
> > > > +
> > > > +	/*
> > > > +	 * We don't have a TestOa metric set for Haswell so use
> > > > +	 * RenderBasic
> > > > +	 */
> > > > +	if (IS_HASWELL(devid))
> > > > +		metric_set_name = "RenderBasic";
> > > > +	else
> > > > +		metric_set_name = "TestOa";
> > > > +
> > > > +	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
> > > > +		if (strcmp(metric_set_iter->symbol_name, metric_set_name) == 0) {
> > > > +			metric_set = metric_set_iter;
> > > > +			break;
> > > > +		}
> > > > +	}
> > > > +
> > > > +	return metric_set;
> > > > +}
> > > > +
> > > > +struct intel_perf *i915_perf_init_sys_info(int drm_fd)
> > > > +{
> > > > +	struct intel_perf *intel_perf;
> > > > +
> > > > +	intel_perf = intel_perf_for_fd(drm_fd);
> > > > +	if (!intel_perf)
> > > > +		return NULL;
> > > > +
> > > > +	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
> > > > +	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
> > > > +	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
> > > > +	igt_debug("timestamp_frequency = %"PRIu64"\n",
> > > > +		  intel_perf->devinfo.timestamp_frequency);
> > > > +	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
> > > > +
> > > > +	intel_perf_load_perf_configs(intel_perf, drm_fd);
> > > > +
> > > > +	return intel_perf;
> > > > +}
> > > > +
> > > > +int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd)
> > > > +{
> > > > +	int32_t pm_value = 0;
> > > > +	int ret;
> > > > +
> > > > +	ret = perf_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, param);
> > > > +
> > > > +	igt_assert(ret >= 0);
> > > > +	errno = 0;
> > > > +
> > > > +	if (pm_fd) {
> > > > +		*pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
> > > > +		igt_assert(*pm_fd >= 0);
> > > > +
> > > > +		igt_assert_eq(write(*pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
> > > > +	}
> > > > +
> > > > +	return ret;
> > > > +}
> > > > +
> > > > +int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd)
> > > > +{
> > > > +	struct intel_perf_metric_set *metric_set = i915_perf_default_set(intel_perf, devid);
> > > > +	uint64_t oa_exp = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> > > > +	uint64_t properties[] = {
> > > > +		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > > +		DRM_I915_PERF_PROP_OA_METRICS_SET, 0,
> > > > +		DRM_I915_PERF_PROP_OA_FORMAT, 0,
> > > > +		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp,
> > > > +	};
> > > > +	struct drm_i915_perf_open_param param = {
> > > > +		.flags = I915_PERF_FLAG_FD_CLOEXEC,
> > > > +		.num_properties = sizeof(properties) / 16,
> > > > +		.properties_ptr = to_user_pointer(properties),
> > > > +	};
> > > > +
> > > > +	igt_assert(metric_set);
> > > > +	igt_assert(metric_set->perf_oa_metrics_set);
> > > > +	igt_assert(oa_exp >= 0);
> > > > +
> > > > +	igt_debug("%s metric set UUID = %s\n",
> > > > +		  metric_set->symbol_name,
> > > > +		  metric_set->hw_config_guid);
> > > > +
> > > > +	properties[3] = metric_set->perf_oa_metrics_set;
> > > > +	properties[5] = metric_set->perf_oa_format;
> > > > +
> > > > +	return i915_perf_open(drm_fd, &param, pm_fd);
> > > > +}
> > > > +
> > > > +void i915_perf_close(int stream_fd, int pm_fd)
> > > > +{
> > > > +	close(stream_fd);
> > > > +	if (pm_fd >= 0)
> > > > +		close(pm_fd);
> > > > +}
> > > > diff --git a/lib/i915/perf.h b/lib/i915/perf.h
> > > > index e6e60dc997..c9cd28be47 100644
> > > > --- a/lib/i915/perf.h
> > > > +++ b/lib/i915/perf.h
> > > > @@ -351,6 +351,21 @@ uint64_t intel_perf_read_record_timestamp_raw(const struct intel_perf *perf,
> > > > const char *intel_perf_read_report_reason(const struct intel_perf *perf,
> > > >						  const struct drm_i915_perf_record_header *record);
> > > >
> > > > +uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta);
> > > > +
> > > > +int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period);
> > > > +
> > > > +struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid);
> > > > +
> > > > +struct intel_perf *i915_perf_init_sys_info(int drm_fd);
> > > > +
> > > > +struct drm_i915_perf_open_param;
> > > > +int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd);
> > > > +
> > > > +int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd);
> > > > +
> > > > +void i915_perf_close(int drm_fd, int pm_fd);
> > > > +
> > > > #ifdef __cplusplus
> > > > };
> > > > #endif
> > > > diff --git a/lib/meson.build b/lib/meson.build
> > > > index d49b78ca1a..e79b31090b 100644
> > > > --- a/lib/meson.build
> > > > +++ b/lib/meson.build
> > > > @@ -258,6 +258,7 @@ lib_igt_drm_fdinfo = declare_dependency(link_with : lib_igt_drm_fdinfo_build,
> > > >					  include_directories : inc)
> > > > i915_perf_files = [
> > > >  'igt_list.c',
> > > > +  'igt_tools_stub.c',
> > > >  'i915/perf.c',
> > > >  'i915/perf_data_reader.c',
> > > > ]
> > > > diff --git a/tests/i915/perf.c b/tests/i915/perf.c
> > > > index dd1f1ac399..a3f59d143b 100644
> > > > --- a/tests/i915/perf.c
> > > > +++ b/tests/i915/perf.c
> > > > @@ -287,21 +287,16 @@ pretty_print_oa_period(uint64_t oa_period_ns)
> > > > static void
> > > > __perf_close(int fd)
> > > > {
> > > > -	close(fd);
> > > > +	i915_perf_close(fd, pm_fd);
> > > >		stream_fd = -1;
> > > >
> > > > -	if (pm_fd >= 0) {
> > > > -		close(pm_fd);
> > > > +	if (pm_fd >= 0)
> > > >			pm_fd = -1;
> > > > -	}
> > > > }
> > > >
> > > > static int
> > > > __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
> > > > {
> > > > -	int ret;
> > > > -	int32_t pm_value = 0;
> > > > -
> > > >		if (stream_fd >= 0)
> > > >			__perf_close(stream_fd);
> > > >		if (pm_fd >= 0) {
> > > > @@ -309,19 +304,7 @@ __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
> > > >			pm_fd = -1;
> > > >		}
> > > >
> > > > -	ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param);
> > > > -
> > > > -	igt_assert(ret >= 0);
> > > > -	errno = 0;
> > > > -
> > > > -	if (prevent_pm) {
> > > > -		pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
> > > > -		igt_assert(pm_fd >= 0);
> > > > -
> > > > -		igt_assert_eq(write(pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
> > > > -	}
> > > > -
> > > > -	return ret;
> > > > +	return i915_perf_open(fd, param, prevent_pm ? &pm_fd : NULL);
> > > > }
> > > >
> > > > static int
> > > > @@ -465,33 +448,6 @@ cs_timebase_scale(uint32_t u32_delta)
> > > >		return ((uint64_t)u32_delta * NSEC_PER_SEC) / cs_timestamp_frequency(drm_fd);
> > > > }
> > > >
> > > > -static uint64_t
> > > > -timebase_scale(uint32_t u32_delta)
> > > > -{
> > > > -	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
> > > > -}
> > > > -
> > > > -/* Returns: the largest OA exponent that will still result in a sampling period
> > > > - * less than or equal to the given @period.
> > > > - */
> > > > -static int
> > > > -max_oa_exponent_for_period_lte(uint64_t period)
> > > > -{
> > > > -	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
> > > > -	 * would already represent a period of ~3 minutes so there's
> > > > -	 * really no need to consider higher exponents.
> > > > -	 */
> > > > -	for (int i = 0; i < 30; i++) {
> > > > -		uint64_t oa_period = timebase_scale(2 << i);
> > > > -
> > > > -		if (oa_period > period)
> > > > -			return max(0, i - 1);
> > > > -	}
> > > > -
> > > > -	igt_assert(!"reached");
> > > > -	return -1;
> > > > -}
> > > > -
> > > > /* Return: the largest OA exponent that will still result in a sampling
> > > > * frequency greater than the given @frequency.
> > > > */
> > > > @@ -502,7 +458,7 @@ max_oa_exponent_for_freq_gt(uint64_t frequency)
> > > >
> > > >		igt_assert_neq(period, 0);
> > > >
> > > > -	return max_oa_exponent_for_period_lte(period - 1);
> > > > +	return i915_perf_max_oa_exponent_for_period_lte(intel_perf, period - 1);
> > > > }
> > > >
> > > > static uint64_t
> > > > @@ -626,7 +582,7 @@ hsw_sanity_check_render_basic_reports(const uint32_t *oa_report0,
> > > >					      const uint32_t *oa_report1,
> > > >					      enum drm_i915_oa_format fmt)
> > > > {
> > > > -	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
> > > > +	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
> > > >		uint32_t clock_delta;
> > > >		uint32_t max_delta;
> > > >		struct oa_format format = get_oa_format(fmt);
> > > > @@ -832,7 +788,7 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
> > > >					  enum drm_i915_oa_format fmt)
> > > > {
> > > >		struct oa_format format = get_oa_format(fmt);
> > > > -	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
> > > > +	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
> > > >		uint32_t ticks0 = read_report_ticks(oa_report0, fmt);
> > > >		uint32_t ticks1 = read_report_ticks(oa_report1, fmt);
> > > >		uint32_t clock_delta = ticks1 - ticks0;
> > > > @@ -950,43 +906,22 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
> > > > static bool
> > > > init_sys_info(void)
> > > > {
> > > > -	const char *test_set_name = NULL;
> > > > -	struct intel_perf_metric_set *metric_set_iter;
> > > > -
> > > >		igt_assert_neq(devid, 0);
> > > >
> > > > -	intel_perf = intel_perf_for_fd(drm_fd);
> > > > +	intel_perf = i915_perf_init_sys_info(drm_fd);
> > > >		igt_require(intel_perf);
> > > >
> > > > -	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
> > > > -	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
> > > > -	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
> > > > -	igt_debug("timestamp_frequency = %"PRIu64"\n",
> > > > -		  intel_perf->devinfo.timestamp_frequency);
> > > > -	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
> > > > -
> > > > -	/* We don't have a TestOa metric set for Haswell so use
> > > > -	 * RenderBasic
> > > > -	 */
> > > >		if (IS_HASWELL(devid)) {
> > > > -		test_set_name = "RenderBasic";
> > > >			read_report_ticks = hsw_read_report_ticks;
> > > >			sanity_check_reports = hsw_sanity_check_render_basic_reports;
> > > >			undefined_a_counters = hsw_undefined_a_counters;
> > > >		} else {
> > > > -		test_set_name = "TestOa";
> > > >			read_report_ticks = gen8_read_report_ticks;
> > > >			sanity_check_reports = gen8_sanity_check_test_oa_reports;
> > > >			undefined_a_counters = gen8_undefined_a_counters;
> > > >		}
> > > >
> > > > -	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
> > > > -		if (strcmp(metric_set_iter->symbol_name, test_set_name) == 0) {
> > > > -			test_set = metric_set_iter;
> > > > -			break;
> > > > -		}
> > > > -	}
> > > > -
> > > > +	test_set = i915_perf_default_set(intel_perf, devid);
> > > >		if (!test_set)
> > > >			return false;
> > > >
> > > > @@ -994,14 +929,12 @@ init_sys_info(void)
> > > >			  test_set->symbol_name,
> > > >			  test_set->hw_config_guid);
> > > >
> > > > -	intel_perf_load_perf_configs(intel_perf, drm_fd);
> > > > -
> > > >		if (test_set->perf_oa_metrics_set == 0) {
> > > >			igt_debug("Unable to load configurations\n");
> > > >			return false;
> > > >		}
> > > >
> > > > -	oa_exp_1_millisec = max_oa_exponent_for_period_lte(1000000);
> > > > +	oa_exp_1_millisec = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> > > >
> > > >		return true;
> > > > }
> > > > @@ -1911,7 +1844,7 @@ test_low_oa_exponent_permissions(void)
> > > >
> > > >		igt_waitchildren();
> > > >
> > > > -	oa_period = timebase_scale(2 << ok_exponent);
> > > > +	oa_period = i915_perf_timebase_scale(intel_perf, 2 << ok_exponent);
> > > >		oa_freq = NSEC_PER_SEC / oa_period;
> > > >		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", oa_freq - 100);
> > > >
> > > > @@ -2003,7 +1936,7 @@ get_time(void)
> > > > static void
> > > > test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
> > > > {
> > > > -	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
> > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
> > > >		uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> > > >		uint64_t properties[] = {
> > > >			/* Include OA reports in samples */
> > > > @@ -2162,7 +2095,7 @@ test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ke
> > > > static void
> > > > test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
> > > > {
> > > > -	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
> > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
> > > >		uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> > > >		uint64_t properties[] = {
> > > >			/* Include OA reports in samples */
> > > > @@ -2358,7 +2291,7 @@ test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ker
> > > >
> > > > static void test_polling_small_buf(void)
> > > > {
> > > > -	int oa_exponent = max_oa_exponent_for_period_lte(40 * 1000); /* 40us */
> > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 40 * 1000); /* 40us */
> > > >		uint64_t properties[] = {
> > > >			/* Include OA reports in samples */
> > > >			DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > > @@ -2461,7 +2394,7 @@ num_valid_reports_captured(struct drm_i915_perf_open_param *param,
> > > > static void
> > > > gen12_test_oa_tlb_invalidate(void)
> > > > {
> > > > -	int oa_exponent = max_oa_exponent_for_period_lte(30000000);
> > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 30000000);
> > > >		uint64_t properties[] = {
> > > >			DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > >
> > > > @@ -2503,7 +2436,7 @@ static void
> > > > test_buffer_fill(void)
> > > > {
> > > >		/* ~5 micro second period */
> > > > -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> > > >		uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> > > >		uint64_t properties[] = {
> > > >			/* Include OA reports in samples */
> > > > @@ -2651,7 +2584,7 @@ static void
> > > > test_non_zero_reason(void)
> > > > {
> > > >		/* ~20 micro second period */
> > > > -	int oa_exponent = max_oa_exponent_for_period_lte(20000);
> > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 20000);
> > > >		uint64_t properties[] = {
> > > >			/* Include OA reports in samples */
> > > >			DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > > @@ -2734,7 +2667,7 @@ static void
> > > > test_enable_disable(void)
> > > > {
> > > >		/* ~5 micro second period */
> > > > -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> > > >		uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> > > >		uint64_t properties[] = {
> > > >			/* Include OA reports in samples */
> > > > @@ -2885,7 +2818,7 @@ test_enable_disable(void)
> > > > static void
> > > > test_short_reads(void)
> > > > {
> > > > -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> > > >		uint64_t properties[] = {
> > > >			/* Include OA reports in samples */
> > > >			DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > > @@ -3447,8 +3380,8 @@ hsw_test_single_ctx_counters(void)
> > > >
> > > >			/* sanity check that we can pass the delta to timebase_scale */
> > > >			igt_assert(delta_ts64 < UINT32_MAX);
> > > > -		delta_oa32_ns = timebase_scale(delta_oa32);
> > > > -		delta_ts64_ns = timebase_scale(delta_ts64);
> > > > +		delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> > > > +		delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
> > > >
> > > >			igt_debug("ts32 delta = %u, = %uns\n",
> > > >				  delta_oa32, (unsigned)delta_oa32_ns);
> > > > @@ -3498,7 +3431,7 @@ hsw_test_single_ctx_counters(void)
> > > > static void
> > > > gen8_test_single_ctx_render_target_writes_a_counter(void)
> > > > {
> > > > -	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
> > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> > > >		uint64_t properties[] = {
> > > >			DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
> > > >
> > > > @@ -3700,8 +3633,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> > > >
> > > >				/* sanity check that we can pass the delta to timebase_scale */
> > > >				igt_assert(delta_ts64 < UINT32_MAX);
> > > > -			delta_oa32_ns = timebase_scale(delta_oa32);
> > > > -			delta_ts64_ns = timebase_scale(delta_ts64);
> > > > +			delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> > > > +			delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
> > > >
> > > >				igt_debug("oa32 delta = %u, = %uns\n",
> > > >					  delta_oa32, (unsigned)delta_oa32_ns);
> > > > @@ -3783,7 +3716,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> > > >					{
> > > >						uint32_t time_delta = report[1] - report0_32[1];
> > > >
> > > > -					if (timebase_scale(time_delta) > 1000000000) {
> > > > +					if (i915_perf_timebase_scale(intel_perf,
> > > > +								     time_delta) > 1000000000) {
> > > >							skip_reason = "prior first mi-rpc";
> > > >						}
> > > >					}
> > > > @@ -3791,7 +3725,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> > > >					{
> > > >						uint32_t time_delta = report[1] - report1_32[1];
> > > >
> > > > -					if (timebase_scale(time_delta) <= 1000000000) {
> > > > +					if (i915_perf_timebase_scale(intel_perf,
> > > > +								     time_delta) <= 1000000000) {
> > > >							igt_debug("    comes after last MI_RPC (%u)\n",
> > > >								  report1_32[1]);
> > > >							report = report1_32;
> > > > @@ -4164,7 +4099,7 @@ static void gen12_single_ctx_helper(void)
> > > >
> > > >		/* Sanity check that we can pass the delta to timebase_scale */
> > > >		igt_assert(delta_ts64 < UINT32_MAX);
> > > > -	delta_oa32_ns = timebase_scale(delta_oa32);
> > > > +	delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> > > >		delta_ts64_ns = cs_timebase_scale(delta_ts64);
> > > >
> > > >		igt_debug("oa32 delta = %u, = %uns\n",
> > > > --
> > > > 2.25.1
> > > >

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public
  2023-02-08 17:53       ` Dixit, Ashutosh
@ 2023-02-08 18:23         ` Janusz Krzysztofik
  2023-02-09  6:30           ` Dixit, Ashutosh
  0 siblings, 1 reply; 18+ messages in thread
From: Janusz Krzysztofik @ 2023-02-08 18:23 UTC (permalink / raw)
  To: Kamil Konieczny, igt-dev, Umesh Nerlige Ramappa, Chris Wilson,
	Ashutosh Dixit, Zbigniew Kempczyński, Dixit, Ashutosh

On Wednesday, 8 February 2023 18:53:11 CET Dixit, Ashutosh wrote:
> On Wed, 08 Feb 2023 06:35:47 -0800, Kamil Konieczny wrote:
> >
> > Hi Umesh,
> >
> > On 2023-02-07 at 11:33:50 -0800, Umesh Nerlige Ramappa wrote:
> > > On Tue, Feb 07, 2023 at 11:25:00AM -0800, Umesh Nerlige Ramappa wrote:
> > > > I wouldn't do this. Please keep the changes local to the specific test
> > > > that you implemented in your first rev. While it is a good idea to have
> > > > the some of the perf capabilities in the library, this is way too much
> > > > churn to implement a specific test for the original failure. Unless
> > > > multiple IGT subsytems area already dependent on perf APIs to implement
> > > > multiple tests, let's not do this.
> > > >
> > >
> > > Also note that the perf library implemented in IGT is not entirely used by
> > > IGT tests alone. The library is also linked to GPUvis software. Only a few
> > > pieces of reusable code in the perf library is used by IGT tests.
> >
> > May you give http(s) link(s) to this software ?
> >
> > I checked https://github.com/mikesart/gpuvis
> > and there is no note about intel igt dependancy.
> 
> Hi Kamil,
> 
> The connection between IGT and gpuvis is via this:
> 
> tools/i915-perf/i915_perf_recorder.c
> 
> So the recorder records the metrics/counters in a file and these are then
> fed to gpuvis.

How are those few proposed functions, required by IGT tests, supposed to break 
that functionality?

> >
> > imho we can have separate i915_perf lib with functions needed by
> > new test but if you are concerned about it we can start with code
> > duplication and refactor later.
> 
> Thanks Kamil and Janusz!

Please understand that's still a negative choice, selected only because of no 
answer from you (I mean Umesh and you) to questions like the one above.  We 
simply can't afford delays in adding new required subtests because you don't 
like us touching tests/i915/perf.c and lib/i915/perf.c for some reason still 
not clear to me.  That's why we are forced to use a solution which seem sub-
optimal from our (IGT) POV.

Thanks,
Janusz

> 
> >
> > Regards,
> > Kamil
> >
> > >
> > > > Thanks,
> > > > Umesh
> > > >
> > > > On Tue, Feb 07, 2023 at 11:11:21AM +0100, Janusz Krzysztofik wrote:
> > > > > We need new subtests that exercise interaction between i915 perf open/
> > > > > close and other i915 subsystems from the point of view of those other
> > > > > subsystems.  Allow other tests to reuse __perf_open/close() family of
> > > > > functions, now inside i915/perf test, by moving (sharable parts of)
> > > > > them to i915/perf library.
> > > > >
> > > > > Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
> > > > > ---
> > > > > lib/i915/perf.c   | 130 ++++++++++++++++++++++++++++++++++++++++++++++
> > > > > lib/i915/perf.h   |  15 ++++++
> > > > > lib/meson.build   |   1 +
> > > > > tests/i915/perf.c | 121 ++++++++++--------------------------------
> > > > > 4 files changed, 174 insertions(+), 93 deletions(-)
> > > > >
> > > > > diff --git a/lib/i915/perf.c b/lib/i915/perf.c
> > > > > index 6c7a192558..e71d637eb5 100644
> > > > > --- a/lib/i915/perf.c
> > > > > +++ b/lib/i915/perf.c
> > > > > @@ -39,7 +39,9 @@
> > > > >
> > > > > #include "i915_pciids.h"
> > > > >
> > > > > +#include "igt_aux.h"
> > > > > #include "intel_chipset.h"
> > > > > +#include "ioctl_wrappers.h"
> > > > > #include "perf.h"
> > > > >
> > > > > #include "i915_perf_metrics_hsw.h"
> > > > > @@ -1008,3 +1010,131 @@ const char *intel_perf_read_report_reason(const struct intel_perf *perf,
> > > > >
> > > > >		return "unknown";
> > > > > }
> > > > > +
> > > > > +uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta)
> > > > > +{
> > > > > +	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
> > > > > +}
> > > > > +
> > > > > +/* Returns: the largest OA exponent that will still result in a sampling period
> > > > > + * less than or equal to the given @period.
> > > > > + */
> > > > > +int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period)
> > > > > +{
> > > > > +	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
> > > > > +	 * would already represent a period of ~3 minutes so there's
> > > > > +	 * really no need to consider higher exponents.
> > > > > +	 */
> > > > > +	for (int i = 0; i < 30; i++) {
> > > > > +		uint64_t oa_period = i915_perf_timebase_scale(intel_perf, 2 << i);
> > > > > +
> > > > > +		if (oa_period > period)
> > > > > +			return max(0, i - 1);
> > > > > +	}
> > > > > +
> > > > > +	igt_assert(!"reached");
> > > > > +	return -1;
> > > > > +}
> > > > > +
> > > > > +struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid)
> > > > > +{
> > > > > +	struct intel_perf_metric_set *metric_set = NULL, *metric_set_iter;
> > > > > +	const char *metric_set_name = NULL;
> > > > > +
> > > > > +	igt_assert_neq(devid, 0);
> > > > > +
> > > > > +	/*
> > > > > +	 * We don't have a TestOa metric set for Haswell so use
> > > > > +	 * RenderBasic
> > > > > +	 */
> > > > > +	if (IS_HASWELL(devid))
> > > > > +		metric_set_name = "RenderBasic";
> > > > > +	else
> > > > > +		metric_set_name = "TestOa";
> > > > > +
> > > > > +	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
> > > > > +		if (strcmp(metric_set_iter->symbol_name, metric_set_name) == 0) {
> > > > > +			metric_set = metric_set_iter;
> > > > > +			break;
> > > > > +		}
> > > > > +	}
> > > > > +
> > > > > +	return metric_set;
> > > > > +}
> > > > > +
> > > > > +struct intel_perf *i915_perf_init_sys_info(int drm_fd)
> > > > > +{
> > > > > +	struct intel_perf *intel_perf;
> > > > > +
> > > > > +	intel_perf = intel_perf_for_fd(drm_fd);
> > > > > +	if (!intel_perf)
> > > > > +		return NULL;
> > > > > +
> > > > > +	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
> > > > > +	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
> > > > > +	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
> > > > > +	igt_debug("timestamp_frequency = %"PRIu64"\n",
> > > > > +		  intel_perf->devinfo.timestamp_frequency);
> > > > > +	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
> > > > > +
> > > > > +	intel_perf_load_perf_configs(intel_perf, drm_fd);
> > > > > +
> > > > > +	return intel_perf;
> > > > > +}
> > > > > +
> > > > > +int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd)
> > > > > +{
> > > > > +	int32_t pm_value = 0;
> > > > > +	int ret;
> > > > > +
> > > > > +	ret = perf_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, param);
> > > > > +
> > > > > +	igt_assert(ret >= 0);
> > > > > +	errno = 0;
> > > > > +
> > > > > +	if (pm_fd) {
> > > > > +		*pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
> > > > > +		igt_assert(*pm_fd >= 0);
> > > > > +
> > > > > +		igt_assert_eq(write(*pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
> > > > > +	}
> > > > > +
> > > > > +	return ret;
> > > > > +}
> > > > > +
> > > > > +int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd)
> > > > > +{
> > > > > +	struct intel_perf_metric_set *metric_set = i915_perf_default_set(intel_perf, devid);
> > > > > +	uint64_t oa_exp = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> > > > > +	uint64_t properties[] = {
> > > > > +		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > > > +		DRM_I915_PERF_PROP_OA_METRICS_SET, 0,
> > > > > +		DRM_I915_PERF_PROP_OA_FORMAT, 0,
> > > > > +		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp,
> > > > > +	};
> > > > > +	struct drm_i915_perf_open_param param = {
> > > > > +		.flags = I915_PERF_FLAG_FD_CLOEXEC,
> > > > > +		.num_properties = sizeof(properties) / 16,
> > > > > +		.properties_ptr = to_user_pointer(properties),
> > > > > +	};
> > > > > +
> > > > > +	igt_assert(metric_set);
> > > > > +	igt_assert(metric_set->perf_oa_metrics_set);
> > > > > +	igt_assert(oa_exp >= 0);
> > > > > +
> > > > > +	igt_debug("%s metric set UUID = %s\n",
> > > > > +		  metric_set->symbol_name,
> > > > > +		  metric_set->hw_config_guid);
> > > > > +
> > > > > +	properties[3] = metric_set->perf_oa_metrics_set;
> > > > > +	properties[5] = metric_set->perf_oa_format;
> > > > > +
> > > > > +	return i915_perf_open(drm_fd, &param, pm_fd);
> > > > > +}
> > > > > +
> > > > > +void i915_perf_close(int stream_fd, int pm_fd)
> > > > > +{
> > > > > +	close(stream_fd);
> > > > > +	if (pm_fd >= 0)
> > > > > +		close(pm_fd);
> > > > > +}
> > > > > diff --git a/lib/i915/perf.h b/lib/i915/perf.h
> > > > > index e6e60dc997..c9cd28be47 100644
> > > > > --- a/lib/i915/perf.h
> > > > > +++ b/lib/i915/perf.h
> > > > > @@ -351,6 +351,21 @@ uint64_t intel_perf_read_record_timestamp_raw(const struct intel_perf *perf,
> > > > > const char *intel_perf_read_report_reason(const struct intel_perf *perf,
> > > > >						  const struct drm_i915_perf_record_header *record);
> > > > >
> > > > > +uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta);
> > > > > +
> > > > > +int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period);
> > > > > +
> > > > > +struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid);
> > > > > +
> > > > > +struct intel_perf *i915_perf_init_sys_info(int drm_fd);
> > > > > +
> > > > > +struct drm_i915_perf_open_param;
> > > > > +int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd);
> > > > > +
> > > > > +int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd);
> > > > > +
> > > > > +void i915_perf_close(int drm_fd, int pm_fd);
> > > > > +
> > > > > #ifdef __cplusplus
> > > > > };
> > > > > #endif
> > > > > diff --git a/lib/meson.build b/lib/meson.build
> > > > > index d49b78ca1a..e79b31090b 100644
> > > > > --- a/lib/meson.build
> > > > > +++ b/lib/meson.build
> > > > > @@ -258,6 +258,7 @@ lib_igt_drm_fdinfo = declare_dependency(link_with : lib_igt_drm_fdinfo_build,
> > > > >					  include_directories : inc)
> > > > > i915_perf_files = [
> > > > >  'igt_list.c',
> > > > > +  'igt_tools_stub.c',
> > > > >  'i915/perf.c',
> > > > >  'i915/perf_data_reader.c',
> > > > > ]
> > > > > diff --git a/tests/i915/perf.c b/tests/i915/perf.c
> > > > > index dd1f1ac399..a3f59d143b 100644
> > > > > --- a/tests/i915/perf.c
> > > > > +++ b/tests/i915/perf.c
> > > > > @@ -287,21 +287,16 @@ pretty_print_oa_period(uint64_t oa_period_ns)
> > > > > static void
> > > > > __perf_close(int fd)
> > > > > {
> > > > > -	close(fd);
> > > > > +	i915_perf_close(fd, pm_fd);
> > > > >		stream_fd = -1;
> > > > >
> > > > > -	if (pm_fd >= 0) {
> > > > > -		close(pm_fd);
> > > > > +	if (pm_fd >= 0)
> > > > >			pm_fd = -1;
> > > > > -	}
> > > > > }
> > > > >
> > > > > static int
> > > > > __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
> > > > > {
> > > > > -	int ret;
> > > > > -	int32_t pm_value = 0;
> > > > > -
> > > > >		if (stream_fd >= 0)
> > > > >			__perf_close(stream_fd);
> > > > >		if (pm_fd >= 0) {
> > > > > @@ -309,19 +304,7 @@ __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
> > > > >			pm_fd = -1;
> > > > >		}
> > > > >
> > > > > -	ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param);
> > > > > -
> > > > > -	igt_assert(ret >= 0);
> > > > > -	errno = 0;
> > > > > -
> > > > > -	if (prevent_pm) {
> > > > > -		pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
> > > > > -		igt_assert(pm_fd >= 0);
> > > > > -
> > > > > -		igt_assert_eq(write(pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
> > > > > -	}
> > > > > -
> > > > > -	return ret;
> > > > > +	return i915_perf_open(fd, param, prevent_pm ? &pm_fd : NULL);
> > > > > }
> > > > >
> > > > > static int
> > > > > @@ -465,33 +448,6 @@ cs_timebase_scale(uint32_t u32_delta)
> > > > >		return ((uint64_t)u32_delta * NSEC_PER_SEC) / cs_timestamp_frequency(drm_fd);
> > > > > }
> > > > >
> > > > > -static uint64_t
> > > > > -timebase_scale(uint32_t u32_delta)
> > > > > -{
> > > > > -	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
> > > > > -}
> > > > > -
> > > > > -/* Returns: the largest OA exponent that will still result in a sampling period
> > > > > - * less than or equal to the given @period.
> > > > > - */
> > > > > -static int
> > > > > -max_oa_exponent_for_period_lte(uint64_t period)
> > > > > -{
> > > > > -	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
> > > > > -	 * would already represent a period of ~3 minutes so there's
> > > > > -	 * really no need to consider higher exponents.
> > > > > -	 */
> > > > > -	for (int i = 0; i < 30; i++) {
> > > > > -		uint64_t oa_period = timebase_scale(2 << i);
> > > > > -
> > > > > -		if (oa_period > period)
> > > > > -			return max(0, i - 1);
> > > > > -	}
> > > > > -
> > > > > -	igt_assert(!"reached");
> > > > > -	return -1;
> > > > > -}
> > > > > -
> > > > > /* Return: the largest OA exponent that will still result in a sampling
> > > > > * frequency greater than the given @frequency.
> > > > > */
> > > > > @@ -502,7 +458,7 @@ max_oa_exponent_for_freq_gt(uint64_t frequency)
> > > > >
> > > > >		igt_assert_neq(period, 0);
> > > > >
> > > > > -	return max_oa_exponent_for_period_lte(period - 1);
> > > > > +	return i915_perf_max_oa_exponent_for_period_lte(intel_perf, period - 1);
> > > > > }
> > > > >
> > > > > static uint64_t
> > > > > @@ -626,7 +582,7 @@ hsw_sanity_check_render_basic_reports(const uint32_t *oa_report0,
> > > > >					      const uint32_t *oa_report1,
> > > > >					      enum drm_i915_oa_format fmt)
> > > > > {
> > > > > -	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
> > > > > +	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
> > > > >		uint32_t clock_delta;
> > > > >		uint32_t max_delta;
> > > > >		struct oa_format format = get_oa_format(fmt);
> > > > > @@ -832,7 +788,7 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
> > > > >					  enum drm_i915_oa_format fmt)
> > > > > {
> > > > >		struct oa_format format = get_oa_format(fmt);
> > > > > -	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
> > > > > +	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
> > > > >		uint32_t ticks0 = read_report_ticks(oa_report0, fmt);
> > > > >		uint32_t ticks1 = read_report_ticks(oa_report1, fmt);
> > > > >		uint32_t clock_delta = ticks1 - ticks0;
> > > > > @@ -950,43 +906,22 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
> > > > > static bool
> > > > > init_sys_info(void)
> > > > > {
> > > > > -	const char *test_set_name = NULL;
> > > > > -	struct intel_perf_metric_set *metric_set_iter;
> > > > > -
> > > > >		igt_assert_neq(devid, 0);
> > > > >
> > > > > -	intel_perf = intel_perf_for_fd(drm_fd);
> > > > > +	intel_perf = i915_perf_init_sys_info(drm_fd);
> > > > >		igt_require(intel_perf);
> > > > >
> > > > > -	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
> > > > > -	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
> > > > > -	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
> > > > > -	igt_debug("timestamp_frequency = %"PRIu64"\n",
> > > > > -		  intel_perf->devinfo.timestamp_frequency);
> > > > > -	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
> > > > > -
> > > > > -	/* We don't have a TestOa metric set for Haswell so use
> > > > > -	 * RenderBasic
> > > > > -	 */
> > > > >		if (IS_HASWELL(devid)) {
> > > > > -		test_set_name = "RenderBasic";
> > > > >			read_report_ticks = hsw_read_report_ticks;
> > > > >			sanity_check_reports = hsw_sanity_check_render_basic_reports;
> > > > >			undefined_a_counters = hsw_undefined_a_counters;
> > > > >		} else {
> > > > > -		test_set_name = "TestOa";
> > > > >			read_report_ticks = gen8_read_report_ticks;
> > > > >			sanity_check_reports = gen8_sanity_check_test_oa_reports;
> > > > >			undefined_a_counters = gen8_undefined_a_counters;
> > > > >		}
> > > > >
> > > > > -	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
> > > > > -		if (strcmp(metric_set_iter->symbol_name, test_set_name) == 0) {
> > > > > -			test_set = metric_set_iter;
> > > > > -			break;
> > > > > -		}
> > > > > -	}
> > > > > -
> > > > > +	test_set = i915_perf_default_set(intel_perf, devid);
> > > > >		if (!test_set)
> > > > >			return false;
> > > > >
> > > > > @@ -994,14 +929,12 @@ init_sys_info(void)
> > > > >			  test_set->symbol_name,
> > > > >			  test_set->hw_config_guid);
> > > > >
> > > > > -	intel_perf_load_perf_configs(intel_perf, drm_fd);
> > > > > -
> > > > >		if (test_set->perf_oa_metrics_set == 0) {
> > > > >			igt_debug("Unable to load configurations\n");
> > > > >			return false;
> > > > >		}
> > > > >
> > > > > -	oa_exp_1_millisec = max_oa_exponent_for_period_lte(1000000);
> > > > > +	oa_exp_1_millisec = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> > > > >
> > > > >		return true;
> > > > > }
> > > > > @@ -1911,7 +1844,7 @@ test_low_oa_exponent_permissions(void)
> > > > >
> > > > >		igt_waitchildren();
> > > > >
> > > > > -	oa_period = timebase_scale(2 << ok_exponent);
> > > > > +	oa_period = i915_perf_timebase_scale(intel_perf, 2 << ok_exponent);
> > > > >		oa_freq = NSEC_PER_SEC / oa_period;
> > > > >		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", oa_freq - 100);
> > > > >
> > > > > @@ -2003,7 +1936,7 @@ get_time(void)
> > > > > static void
> > > > > test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
> > > > > {
> > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
> > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
> > > > >		uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> > > > >		uint64_t properties[] = {
> > > > >			/* Include OA reports in samples */
> > > > > @@ -2162,7 +2095,7 @@ test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ke
> > > > > static void
> > > > > test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
> > > > > {
> > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
> > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
> > > > >		uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> > > > >		uint64_t properties[] = {
> > > > >			/* Include OA reports in samples */
> > > > > @@ -2358,7 +2291,7 @@ test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ker
> > > > >
> > > > > static void test_polling_small_buf(void)
> > > > > {
> > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(40 * 1000); /* 40us */
> > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 40 * 1000); /* 40us */
> > > > >		uint64_t properties[] = {
> > > > >			/* Include OA reports in samples */
> > > > >			DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > > > @@ -2461,7 +2394,7 @@ num_valid_reports_captured(struct drm_i915_perf_open_param *param,
> > > > > static void
> > > > > gen12_test_oa_tlb_invalidate(void)
> > > > > {
> > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(30000000);
> > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 30000000);
> > > > >		uint64_t properties[] = {
> > > > >			DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > > >
> > > > > @@ -2503,7 +2436,7 @@ static void
> > > > > test_buffer_fill(void)
> > > > > {
> > > > >		/* ~5 micro second period */
> > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> > > > >		uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> > > > >		uint64_t properties[] = {
> > > > >			/* Include OA reports in samples */
> > > > > @@ -2651,7 +2584,7 @@ static void
> > > > > test_non_zero_reason(void)
> > > > > {
> > > > >		/* ~20 micro second period */
> > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(20000);
> > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 20000);
> > > > >		uint64_t properties[] = {
> > > > >			/* Include OA reports in samples */
> > > > >			DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > > > @@ -2734,7 +2667,7 @@ static void
> > > > > test_enable_disable(void)
> > > > > {
> > > > >		/* ~5 micro second period */
> > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> > > > >		uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> > > > >		uint64_t properties[] = {
> > > > >			/* Include OA reports in samples */
> > > > > @@ -2885,7 +2818,7 @@ test_enable_disable(void)
> > > > > static void
> > > > > test_short_reads(void)
> > > > > {
> > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> > > > >		uint64_t properties[] = {
> > > > >			/* Include OA reports in samples */
> > > > >			DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > > > @@ -3447,8 +3380,8 @@ hsw_test_single_ctx_counters(void)
> > > > >
> > > > >			/* sanity check that we can pass the delta to timebase_scale */
> > > > >			igt_assert(delta_ts64 < UINT32_MAX);
> > > > > -		delta_oa32_ns = timebase_scale(delta_oa32);
> > > > > -		delta_ts64_ns = timebase_scale(delta_ts64);
> > > > > +		delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> > > > > +		delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
> > > > >
> > > > >			igt_debug("ts32 delta = %u, = %uns\n",
> > > > >				  delta_oa32, (unsigned)delta_oa32_ns);
> > > > > @@ -3498,7 +3431,7 @@ hsw_test_single_ctx_counters(void)
> > > > > static void
> > > > > gen8_test_single_ctx_render_target_writes_a_counter(void)
> > > > > {
> > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
> > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> > > > >		uint64_t properties[] = {
> > > > >			DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
> > > > >
> > > > > @@ -3700,8 +3633,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> > > > >
> > > > >				/* sanity check that we can pass the delta to timebase_scale */
> > > > >				igt_assert(delta_ts64 < UINT32_MAX);
> > > > > -			delta_oa32_ns = timebase_scale(delta_oa32);
> > > > > -			delta_ts64_ns = timebase_scale(delta_ts64);
> > > > > +			delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> > > > > +			delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
> > > > >
> > > > >				igt_debug("oa32 delta = %u, = %uns\n",
> > > > >					  delta_oa32, (unsigned)delta_oa32_ns);
> > > > > @@ -3783,7 +3716,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> > > > >					{
> > > > >						uint32_t time_delta = report[1] - report0_32[1];
> > > > >
> > > > > -					if (timebase_scale(time_delta) > 1000000000) {
> > > > > +					if (i915_perf_timebase_scale(intel_perf,
> > > > > +								     time_delta) > 1000000000) {
> > > > >							skip_reason = "prior first mi-rpc";
> > > > >						}
> > > > >					}
> > > > > @@ -3791,7 +3725,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> > > > >					{
> > > > >						uint32_t time_delta = report[1] - report1_32[1];
> > > > >
> > > > > -					if (timebase_scale(time_delta) <= 1000000000) {
> > > > > +					if (i915_perf_timebase_scale(intel_perf,
> > > > > +								     time_delta) <= 1000000000) {
> > > > >							igt_debug("    comes after last MI_RPC (%u)\n",
> > > > >								  report1_32[1]);
> > > > >							report = report1_32;
> > > > > @@ -4164,7 +4099,7 @@ static void gen12_single_ctx_helper(void)
> > > > >
> > > > >		/* Sanity check that we can pass the delta to timebase_scale */
> > > > >		igt_assert(delta_ts64 < UINT32_MAX);
> > > > > -	delta_oa32_ns = timebase_scale(delta_oa32);
> > > > > +	delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> > > > >		delta_ts64_ns = cs_timebase_scale(delta_ts64);
> > > > >
> > > > >		igt_debug("oa32 delta = %u, = %uns\n",
> > > > > --
> > > > > 2.25.1
> > > > >
> 




^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public
  2023-02-08 10:09         ` Janusz Krzysztofik
@ 2023-02-08 19:34           ` Umesh Nerlige Ramappa
  2023-02-09  9:56             ` Janusz Krzysztofik
  0 siblings, 1 reply; 18+ messages in thread
From: Umesh Nerlige Ramappa @ 2023-02-08 19:34 UTC (permalink / raw)
  To: Janusz Krzysztofik; +Cc: igt-dev, Chris Wilson

On Wed, Feb 08, 2023 at 11:09:23AM +0100, Janusz Krzysztofik wrote:
>On Tuesday, 7 February 2023 21:15:26 CET Dixit, Ashutosh wrote:
>> On Tue, 07 Feb 2023 12:04:17 -0800, Janusz Krzysztofik wrote:
>> >
>> > Hi Umesh,
>> >
>> > On Tuesday, 7 February 2023 20:33:50 CET Umesh Nerlige Ramappa wrote:
>> > > On Tue, Feb 07, 2023 at 11:25:00AM -0800, Umesh Nerlige Ramappa wrote:
>> > > >I wouldn't do this. Please keep the changes local to the specific test
>> > > >that you implemented in your first rev. While it is a good idea to
>> > > >have the some of the perf capabilities in the library, this is way too
>> > > >much churn to implement a specific test for the original failure.
>> > > >Unless multiple IGT subsytems area already dependent on perf APIs to
>> > > >implement multiple tests, let's not do this.
>> > > >
>> > >
>> > > Also note that the perf library implemented in IGT is not entirely used
>> > > by IGT tests alone. The library is also linked to GPUvis software. Only
>> > > a few pieces of reusable code in the perf library is used by IGT tests.
>> >
>> > Do you think that my changes will break other users?  How?
>> >
>> > Also, it looks like there are somehow conflicting expectations from different
>> > reviewers.  Ashutosh wanted the new subtest to be implemented outside of i915/
>> > perf test.  That's why I proposed to extend the library with open/close and
>> > related helpers, just to avoid code duplication, and I'm about to resend it in
>> > series with the new subtest implemented inside gem_ctx_exec.  Now, after I
>> > submitted this patch for initial review, you say that a specific test is not
>> > the way to go.  What are you afraid of?
>> >
>> > Whose expectations should I try to satisfy in order to have a subtest accepted
>> > and merged?  Or should I just give up and duplicate the code from i915/perf in
>> > another test?  Or maybe you can have a look at the whole series before you
>> > decide?
>>
>> Hi Janusz,
>>
>> I agree with Umesh. Given that here perf is just being used as a 'dummy
>> workload' let's just duplicate the minimal code required for perf
>> open/close wherever we are adding the new test. This will keep the real
>> perf functionality undisturbed for reasons Umesh cited.
>
>TBH, I can't see any good justification among those reasons mentioned: "too
>much churn", "unless ... already dependent", "not entirely used by IGT tests",
>"linked to GPUvis software", "only a few pieces of reusable code ... used by
>IGT" -- which of those justifies duplication of i915 perf code in IGT tests?

You yourself mentioned that this is not related to perf. It's just that 
perf uses some code in i915 that does barrier related stuff which helps 
you to reproduce the issue. Why can't that barrier-related-stuff be 
implemented in IGT without the use of perf APIs? If that's a lot of 
effort and it's quicker to reproduce this issue using perf APIs, then 
that's fine with me, but keep it outside of perf library and maybe add a 
note saying that this test can be improved by figuring out how to do 
barrier related execution in IGT in future. I don't see any 
justification to modify perf library for an issue that's not even perf 
related.

I believe you also mentioned somewhere that the issue was fixed by some 
'unknown' code changes to i915 and you are not able to reproduce
it consistently with this test now.

I have considered the above factors to suggest that this should not 
reside in perf library.

>Again, do you think that my changes can break other (non-IGT) users?  
>How?

I didn't say that your changes break anything. I was stating that the 
library code is shared across different tools.

Thanks,
Umesh

>
>Anyway, assuming you are the "owner" of lib/i915/perf.c, in order to satisfy
>your (still not clear for me) requirements I'm already working on a new
>version of my patch, with the i915 perf code duplicated as needed.
>
>Thanks,
>Janusz
>
>>
>> Thanks.
>> --
>> Ashutosh
>>
>
>
>
>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public
  2023-02-08 18:23         ` Janusz Krzysztofik
@ 2023-02-09  6:30           ` Dixit, Ashutosh
  2023-02-09 10:02             ` Janusz Krzysztofik
  0 siblings, 1 reply; 18+ messages in thread
From: Dixit, Ashutosh @ 2023-02-09  6:30 UTC (permalink / raw)
  To: Janusz Krzysztofik; +Cc: igt-dev, Chris Wilson

On Wed, 08 Feb 2023 10:23:07 -0800, Janusz Krzysztofik wrote:
>
> On Wednesday, 8 February 2023 18:53:11 CET Dixit, Ashutosh wrote:
> > On Wed, 08 Feb 2023 06:35:47 -0800, Kamil Konieczny wrote:
> > >
> > > Hi Umesh,
> > >
> > > On 2023-02-07 at 11:33:50 -0800, Umesh Nerlige Ramappa wrote:
> > > > On Tue, Feb 07, 2023 at 11:25:00AM -0800, Umesh Nerlige Ramappa wrote:
> > > > > I wouldn't do this. Please keep the changes local to the specific test
> > > > > that you implemented in your first rev. While it is a good idea to have
> > > > > the some of the perf capabilities in the library, this is way too much
> > > > > churn to implement a specific test for the original failure. Unless
> > > > > multiple IGT subsytems area already dependent on perf APIs to implement
> > > > > multiple tests, let's not do this.
> > > > >
> > > >
> > > > Also note that the perf library implemented in IGT is not entirely used by
> > > > IGT tests alone. The library is also linked to GPUvis software. Only a few
> > > > pieces of reusable code in the perf library is used by IGT tests.
> > >
> > > May you give http(s) link(s) to this software ?
> > >
> > > I checked https://github.com/mikesart/gpuvis
> > > and there is no note about intel igt dependancy.
> >
> > Hi Kamil,
> >
> > The connection between IGT and gpuvis is via this:
> >
> > tools/i915-perf/i915_perf_recorder.c
> >
> > So the recorder records the metrics/counters in a file and these are then
> > fed to gpuvis.
>
> How are those few proposed functions, required by IGT tests, supposed to break
> that functionality?
>
> > >
> > > imho we can have separate i915_perf lib with functions needed by
> > > new test but if you are concerned about it we can start with code
> > > duplication and refactor later.
> >
> > Thanks Kamil and Janusz!
>
> Please understand that's still a negative choice, selected only because of no
> answer from you (I mean Umesh and you) to questions like the one above.  We
> simply can't afford delays in adding new required subtests because you don't
> like us touching tests/i915/perf.c and lib/i915/perf.c for some reason still
> not clear to me.  That's why we are forced to use a solution which seem sub-
> optimal from our (IGT) POV.

Hi Janusz,

I think you are mistaken in thinking that any code which is shared in a
couple of tests can be put in the IGT library. The IGT library functions
are more than just shared code.

For example take the function below from your patch:

+int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd)
+{
+       struct intel_perf_metric_set *metric_set = i915_perf_default_set(intel_perf, devid);
+       uint64_t oa_exp = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
+       uint64_t properties[] = {
+               DRM_I915_PERF_PROP_SAMPLE_OA, true,
+               DRM_I915_PERF_PROP_OA_METRICS_SET, 0,
+               DRM_I915_PERF_PROP_OA_FORMAT, 0,
+               DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp,
+       };
+       struct drm_i915_perf_open_param param = {
+               .flags = I915_PERF_FLAG_FD_CLOEXEC,
+               .num_properties = sizeof(properties) / 16,
+               .properties_ptr = to_user_pointer(properties),
+       };

This function is setting particular perf_open properties which the perf
tests are using. This function should be in the tests, not in the library
since another client of the library might want to use different perf_open
properties.

It is for reasons such as this that we are saying unless we can demonstrate
that some functions logically belong in the perf library (which would mean
analyzing the different clients of the perf lib) we shouldn't add them to
the library. The library is not a place to add just *any* shared code.

Thanks.
--
Ashutosh



>
> Thanks,
> Janusz
>
> >
> > >
> > > Regards,
> > > Kamil
> > >
> > > >
> > > > > Thanks,
> > > > > Umesh
> > > > >
> > > > > On Tue, Feb 07, 2023 at 11:11:21AM +0100, Janusz Krzysztofik wrote:
> > > > > > We need new subtests that exercise interaction between i915 perf open/
> > > > > > close and other i915 subsystems from the point of view of those other
> > > > > > subsystems.  Allow other tests to reuse __perf_open/close() family of
> > > > > > functions, now inside i915/perf test, by moving (sharable parts of)
> > > > > > them to i915/perf library.
> > > > > >
> > > > > > Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
> > > > > > ---
> > > > > > lib/i915/perf.c   | 130 ++++++++++++++++++++++++++++++++++++++++++++++
> > > > > > lib/i915/perf.h   |  15 ++++++
> > > > > > lib/meson.build   |   1 +
> > > > > > tests/i915/perf.c | 121 ++++++++++--------------------------------
> > > > > > 4 files changed, 174 insertions(+), 93 deletions(-)
> > > > > >
> > > > > > diff --git a/lib/i915/perf.c b/lib/i915/perf.c
> > > > > > index 6c7a192558..e71d637eb5 100644
> > > > > > --- a/lib/i915/perf.c
> > > > > > +++ b/lib/i915/perf.c
> > > > > > @@ -39,7 +39,9 @@
> > > > > >
> > > > > > #include "i915_pciids.h"
> > > > > >
> > > > > > +#include "igt_aux.h"
> > > > > > #include "intel_chipset.h"
> > > > > > +#include "ioctl_wrappers.h"
> > > > > > #include "perf.h"
> > > > > >
> > > > > > #include "i915_perf_metrics_hsw.h"
> > > > > > @@ -1008,3 +1010,131 @@ const char *intel_perf_read_report_reason(const struct intel_perf *perf,
> > > > > >
> > > > > >		return "unknown";
> > > > > > }
> > > > > > +
> > > > > > +uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta)
> > > > > > +{
> > > > > > +	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
> > > > > > +}
> > > > > > +
> > > > > > +/* Returns: the largest OA exponent that will still result in a sampling period
> > > > > > + * less than or equal to the given @period.
> > > > > > + */
> > > > > > +int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period)
> > > > > > +{
> > > > > > +	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
> > > > > > +	 * would already represent a period of ~3 minutes so there's
> > > > > > +	 * really no need to consider higher exponents.
> > > > > > +	 */
> > > > > > +	for (int i = 0; i < 30; i++) {
> > > > > > +		uint64_t oa_period = i915_perf_timebase_scale(intel_perf, 2 << i);
> > > > > > +
> > > > > > +		if (oa_period > period)
> > > > > > +			return max(0, i - 1);
> > > > > > +	}
> > > > > > +
> > > > > > +	igt_assert(!"reached");
> > > > > > +	return -1;
> > > > > > +}
> > > > > > +
> > > > > > +struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid)
> > > > > > +{
> > > > > > +	struct intel_perf_metric_set *metric_set = NULL, *metric_set_iter;
> > > > > > +	const char *metric_set_name = NULL;
> > > > > > +
> > > > > > +	igt_assert_neq(devid, 0);
> > > > > > +
> > > > > > +	/*
> > > > > > +	 * We don't have a TestOa metric set for Haswell so use
> > > > > > +	 * RenderBasic
> > > > > > +	 */
> > > > > > +	if (IS_HASWELL(devid))
> > > > > > +		metric_set_name = "RenderBasic";
> > > > > > +	else
> > > > > > +		metric_set_name = "TestOa";
> > > > > > +
> > > > > > +	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
> > > > > > +		if (strcmp(metric_set_iter->symbol_name, metric_set_name) == 0) {
> > > > > > +			metric_set = metric_set_iter;
> > > > > > +			break;
> > > > > > +		}
> > > > > > +	}
> > > > > > +
> > > > > > +	return metric_set;
> > > > > > +}
> > > > > > +
> > > > > > +struct intel_perf *i915_perf_init_sys_info(int drm_fd)
> > > > > > +{
> > > > > > +	struct intel_perf *intel_perf;
> > > > > > +
> > > > > > +	intel_perf = intel_perf_for_fd(drm_fd);
> > > > > > +	if (!intel_perf)
> > > > > > +		return NULL;
> > > > > > +
> > > > > > +	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
> > > > > > +	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
> > > > > > +	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
> > > > > > +	igt_debug("timestamp_frequency = %"PRIu64"\n",
> > > > > > +		  intel_perf->devinfo.timestamp_frequency);
> > > > > > +	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
> > > > > > +
> > > > > > +	intel_perf_load_perf_configs(intel_perf, drm_fd);
> > > > > > +
> > > > > > +	return intel_perf;
> > > > > > +}
> > > > > > +
> > > > > > +int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd)
> > > > > > +{
> > > > > > +	int32_t pm_value = 0;
> > > > > > +	int ret;
> > > > > > +
> > > > > > +	ret = perf_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, param);
> > > > > > +
> > > > > > +	igt_assert(ret >= 0);
> > > > > > +	errno = 0;
> > > > > > +
> > > > > > +	if (pm_fd) {
> > > > > > +		*pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
> > > > > > +		igt_assert(*pm_fd >= 0);
> > > > > > +
> > > > > > +		igt_assert_eq(write(*pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
> > > > > > +	}
> > > > > > +
> > > > > > +	return ret;
> > > > > > +}
> > > > > > +
> > > > > > +int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd)
> > > > > > +{
> > > > > > +	struct intel_perf_metric_set *metric_set = i915_perf_default_set(intel_perf, devid);
> > > > > > +	uint64_t oa_exp = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> > > > > > +	uint64_t properties[] = {
> > > > > > +		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > > > > +		DRM_I915_PERF_PROP_OA_METRICS_SET, 0,
> > > > > > +		DRM_I915_PERF_PROP_OA_FORMAT, 0,
> > > > > > +		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp,
> > > > > > +	};
> > > > > > +	struct drm_i915_perf_open_param param = {
> > > > > > +		.flags = I915_PERF_FLAG_FD_CLOEXEC,
> > > > > > +		.num_properties = sizeof(properties) / 16,
> > > > > > +		.properties_ptr = to_user_pointer(properties),
> > > > > > +	};
> > > > > > +
> > > > > > +	igt_assert(metric_set);
> > > > > > +	igt_assert(metric_set->perf_oa_metrics_set);
> > > > > > +	igt_assert(oa_exp >= 0);
> > > > > > +
> > > > > > +	igt_debug("%s metric set UUID = %s\n",
> > > > > > +		  metric_set->symbol_name,
> > > > > > +		  metric_set->hw_config_guid);
> > > > > > +
> > > > > > +	properties[3] = metric_set->perf_oa_metrics_set;
> > > > > > +	properties[5] = metric_set->perf_oa_format;
> > > > > > +
> > > > > > +	return i915_perf_open(drm_fd, &param, pm_fd);
> > > > > > +}
> > > > > > +
> > > > > > +void i915_perf_close(int stream_fd, int pm_fd)
> > > > > > +{
> > > > > > +	close(stream_fd);
> > > > > > +	if (pm_fd >= 0)
> > > > > > +		close(pm_fd);
> > > > > > +}
> > > > > > diff --git a/lib/i915/perf.h b/lib/i915/perf.h
> > > > > > index e6e60dc997..c9cd28be47 100644
> > > > > > --- a/lib/i915/perf.h
> > > > > > +++ b/lib/i915/perf.h
> > > > > > @@ -351,6 +351,21 @@ uint64_t intel_perf_read_record_timestamp_raw(const struct intel_perf *perf,
> > > > > > const char *intel_perf_read_report_reason(const struct intel_perf *perf,
> > > > > >						  const struct drm_i915_perf_record_header *record);
> > > > > >
> > > > > > +uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta);
> > > > > > +
> > > > > > +int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period);
> > > > > > +
> > > > > > +struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid);
> > > > > > +
> > > > > > +struct intel_perf *i915_perf_init_sys_info(int drm_fd);
> > > > > > +
> > > > > > +struct drm_i915_perf_open_param;
> > > > > > +int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd);
> > > > > > +
> > > > > > +int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd);
> > > > > > +
> > > > > > +void i915_perf_close(int drm_fd, int pm_fd);
> > > > > > +
> > > > > > #ifdef __cplusplus
> > > > > > };
> > > > > > #endif
> > > > > > diff --git a/lib/meson.build b/lib/meson.build
> > > > > > index d49b78ca1a..e79b31090b 100644
> > > > > > --- a/lib/meson.build
> > > > > > +++ b/lib/meson.build
> > > > > > @@ -258,6 +258,7 @@ lib_igt_drm_fdinfo = declare_dependency(link_with : lib_igt_drm_fdinfo_build,
> > > > > >					  include_directories : inc)
> > > > > > i915_perf_files = [
> > > > > >  'igt_list.c',
> > > > > > +  'igt_tools_stub.c',
> > > > > >  'i915/perf.c',
> > > > > >  'i915/perf_data_reader.c',
> > > > > > ]
> > > > > > diff --git a/tests/i915/perf.c b/tests/i915/perf.c
> > > > > > index dd1f1ac399..a3f59d143b 100644
> > > > > > --- a/tests/i915/perf.c
> > > > > > +++ b/tests/i915/perf.c
> > > > > > @@ -287,21 +287,16 @@ pretty_print_oa_period(uint64_t oa_period_ns)
> > > > > > static void
> > > > > > __perf_close(int fd)
> > > > > > {
> > > > > > -	close(fd);
> > > > > > +	i915_perf_close(fd, pm_fd);
> > > > > >		stream_fd = -1;
> > > > > >
> > > > > > -	if (pm_fd >= 0) {
> > > > > > -		close(pm_fd);
> > > > > > +	if (pm_fd >= 0)
> > > > > >			pm_fd = -1;
> > > > > > -	}
> > > > > > }
> > > > > >
> > > > > > static int
> > > > > > __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
> > > > > > {
> > > > > > -	int ret;
> > > > > > -	int32_t pm_value = 0;
> > > > > > -
> > > > > >		if (stream_fd >= 0)
> > > > > >			__perf_close(stream_fd);
> > > > > >		if (pm_fd >= 0) {
> > > > > > @@ -309,19 +304,7 @@ __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
> > > > > >			pm_fd = -1;
> > > > > >		}
> > > > > >
> > > > > > -	ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param);
> > > > > > -
> > > > > > -	igt_assert(ret >= 0);
> > > > > > -	errno = 0;
> > > > > > -
> > > > > > -	if (prevent_pm) {
> > > > > > -		pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
> > > > > > -		igt_assert(pm_fd >= 0);
> > > > > > -
> > > > > > -		igt_assert_eq(write(pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
> > > > > > -	}
> > > > > > -
> > > > > > -	return ret;
> > > > > > +	return i915_perf_open(fd, param, prevent_pm ? &pm_fd : NULL);
> > > > > > }
> > > > > >
> > > > > > static int
> > > > > > @@ -465,33 +448,6 @@ cs_timebase_scale(uint32_t u32_delta)
> > > > > >		return ((uint64_t)u32_delta * NSEC_PER_SEC) / cs_timestamp_frequency(drm_fd);
> > > > > > }
> > > > > >
> > > > > > -static uint64_t
> > > > > > -timebase_scale(uint32_t u32_delta)
> > > > > > -{
> > > > > > -	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
> > > > > > -}
> > > > > > -
> > > > > > -/* Returns: the largest OA exponent that will still result in a sampling period
> > > > > > - * less than or equal to the given @period.
> > > > > > - */
> > > > > > -static int
> > > > > > -max_oa_exponent_for_period_lte(uint64_t period)
> > > > > > -{
> > > > > > -	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
> > > > > > -	 * would already represent a period of ~3 minutes so there's
> > > > > > -	 * really no need to consider higher exponents.
> > > > > > -	 */
> > > > > > -	for (int i = 0; i < 30; i++) {
> > > > > > -		uint64_t oa_period = timebase_scale(2 << i);
> > > > > > -
> > > > > > -		if (oa_period > period)
> > > > > > -			return max(0, i - 1);
> > > > > > -	}
> > > > > > -
> > > > > > -	igt_assert(!"reached");
> > > > > > -	return -1;
> > > > > > -}
> > > > > > -
> > > > > > /* Return: the largest OA exponent that will still result in a sampling
> > > > > > * frequency greater than the given @frequency.
> > > > > > */
> > > > > > @@ -502,7 +458,7 @@ max_oa_exponent_for_freq_gt(uint64_t frequency)
> > > > > >
> > > > > >		igt_assert_neq(period, 0);
> > > > > >
> > > > > > -	return max_oa_exponent_for_period_lte(period - 1);
> > > > > > +	return i915_perf_max_oa_exponent_for_period_lte(intel_perf, period - 1);
> > > > > > }
> > > > > >
> > > > > > static uint64_t
> > > > > > @@ -626,7 +582,7 @@ hsw_sanity_check_render_basic_reports(const uint32_t *oa_report0,
> > > > > >					      const uint32_t *oa_report1,
> > > > > >					      enum drm_i915_oa_format fmt)
> > > > > > {
> > > > > > -	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
> > > > > > +	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
> > > > > >		uint32_t clock_delta;
> > > > > >		uint32_t max_delta;
> > > > > >		struct oa_format format = get_oa_format(fmt);
> > > > > > @@ -832,7 +788,7 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
> > > > > >					  enum drm_i915_oa_format fmt)
> > > > > > {
> > > > > >		struct oa_format format = get_oa_format(fmt);
> > > > > > -	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
> > > > > > +	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
> > > > > >		uint32_t ticks0 = read_report_ticks(oa_report0, fmt);
> > > > > >		uint32_t ticks1 = read_report_ticks(oa_report1, fmt);
> > > > > >		uint32_t clock_delta = ticks1 - ticks0;
> > > > > > @@ -950,43 +906,22 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
> > > > > > static bool
> > > > > > init_sys_info(void)
> > > > > > {
> > > > > > -	const char *test_set_name = NULL;
> > > > > > -	struct intel_perf_metric_set *metric_set_iter;
> > > > > > -
> > > > > >		igt_assert_neq(devid, 0);
> > > > > >
> > > > > > -	intel_perf = intel_perf_for_fd(drm_fd);
> > > > > > +	intel_perf = i915_perf_init_sys_info(drm_fd);
> > > > > >		igt_require(intel_perf);
> > > > > >
> > > > > > -	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
> > > > > > -	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
> > > > > > -	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
> > > > > > -	igt_debug("timestamp_frequency = %"PRIu64"\n",
> > > > > > -		  intel_perf->devinfo.timestamp_frequency);
> > > > > > -	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
> > > > > > -
> > > > > > -	/* We don't have a TestOa metric set for Haswell so use
> > > > > > -	 * RenderBasic
> > > > > > -	 */
> > > > > >		if (IS_HASWELL(devid)) {
> > > > > > -		test_set_name = "RenderBasic";
> > > > > >			read_report_ticks = hsw_read_report_ticks;
> > > > > >			sanity_check_reports = hsw_sanity_check_render_basic_reports;
> > > > > >			undefined_a_counters = hsw_undefined_a_counters;
> > > > > >		} else {
> > > > > > -		test_set_name = "TestOa";
> > > > > >			read_report_ticks = gen8_read_report_ticks;
> > > > > >			sanity_check_reports = gen8_sanity_check_test_oa_reports;
> > > > > >			undefined_a_counters = gen8_undefined_a_counters;
> > > > > >		}
> > > > > >
> > > > > > -	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
> > > > > > -		if (strcmp(metric_set_iter->symbol_name, test_set_name) == 0) {
> > > > > > -			test_set = metric_set_iter;
> > > > > > -			break;
> > > > > > -		}
> > > > > > -	}
> > > > > > -
> > > > > > +	test_set = i915_perf_default_set(intel_perf, devid);
> > > > > >		if (!test_set)
> > > > > >			return false;
> > > > > >
> > > > > > @@ -994,14 +929,12 @@ init_sys_info(void)
> > > > > >			  test_set->symbol_name,
> > > > > >			  test_set->hw_config_guid);
> > > > > >
> > > > > > -	intel_perf_load_perf_configs(intel_perf, drm_fd);
> > > > > > -
> > > > > >		if (test_set->perf_oa_metrics_set == 0) {
> > > > > >			igt_debug("Unable to load configurations\n");
> > > > > >			return false;
> > > > > >		}
> > > > > >
> > > > > > -	oa_exp_1_millisec = max_oa_exponent_for_period_lte(1000000);
> > > > > > +	oa_exp_1_millisec = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> > > > > >
> > > > > >		return true;
> > > > > > }
> > > > > > @@ -1911,7 +1844,7 @@ test_low_oa_exponent_permissions(void)
> > > > > >
> > > > > >		igt_waitchildren();
> > > > > >
> > > > > > -	oa_period = timebase_scale(2 << ok_exponent);
> > > > > > +	oa_period = i915_perf_timebase_scale(intel_perf, 2 << ok_exponent);
> > > > > >		oa_freq = NSEC_PER_SEC / oa_period;
> > > > > >		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", oa_freq - 100);
> > > > > >
> > > > > > @@ -2003,7 +1936,7 @@ get_time(void)
> > > > > > static void
> > > > > > test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
> > > > > > {
> > > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
> > > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
> > > > > >		uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> > > > > >		uint64_t properties[] = {
> > > > > >			/* Include OA reports in samples */
> > > > > > @@ -2162,7 +2095,7 @@ test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ke
> > > > > > static void
> > > > > > test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
> > > > > > {
> > > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
> > > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
> > > > > >		uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> > > > > >		uint64_t properties[] = {
> > > > > >			/* Include OA reports in samples */
> > > > > > @@ -2358,7 +2291,7 @@ test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ker
> > > > > >
> > > > > > static void test_polling_small_buf(void)
> > > > > > {
> > > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(40 * 1000); /* 40us */
> > > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 40 * 1000); /* 40us */
> > > > > >		uint64_t properties[] = {
> > > > > >			/* Include OA reports in samples */
> > > > > >			DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > > > > @@ -2461,7 +2394,7 @@ num_valid_reports_captured(struct drm_i915_perf_open_param *param,
> > > > > > static void
> > > > > > gen12_test_oa_tlb_invalidate(void)
> > > > > > {
> > > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(30000000);
> > > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 30000000);
> > > > > >		uint64_t properties[] = {
> > > > > >			DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > > > >
> > > > > > @@ -2503,7 +2436,7 @@ static void
> > > > > > test_buffer_fill(void)
> > > > > > {
> > > > > >		/* ~5 micro second period */
> > > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> > > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> > > > > >		uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> > > > > >		uint64_t properties[] = {
> > > > > >			/* Include OA reports in samples */
> > > > > > @@ -2651,7 +2584,7 @@ static void
> > > > > > test_non_zero_reason(void)
> > > > > > {
> > > > > >		/* ~20 micro second period */
> > > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(20000);
> > > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 20000);
> > > > > >		uint64_t properties[] = {
> > > > > >			/* Include OA reports in samples */
> > > > > >			DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > > > > @@ -2734,7 +2667,7 @@ static void
> > > > > > test_enable_disable(void)
> > > > > > {
> > > > > >		/* ~5 micro second period */
> > > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> > > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> > > > > >		uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> > > > > >		uint64_t properties[] = {
> > > > > >			/* Include OA reports in samples */
> > > > > > @@ -2885,7 +2818,7 @@ test_enable_disable(void)
> > > > > > static void
> > > > > > test_short_reads(void)
> > > > > > {
> > > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> > > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> > > > > >		uint64_t properties[] = {
> > > > > >			/* Include OA reports in samples */
> > > > > >			DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > > > > @@ -3447,8 +3380,8 @@ hsw_test_single_ctx_counters(void)
> > > > > >
> > > > > >			/* sanity check that we can pass the delta to timebase_scale */
> > > > > >			igt_assert(delta_ts64 < UINT32_MAX);
> > > > > > -		delta_oa32_ns = timebase_scale(delta_oa32);
> > > > > > -		delta_ts64_ns = timebase_scale(delta_ts64);
> > > > > > +		delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> > > > > > +		delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
> > > > > >
> > > > > >			igt_debug("ts32 delta = %u, = %uns\n",
> > > > > >				  delta_oa32, (unsigned)delta_oa32_ns);
> > > > > > @@ -3498,7 +3431,7 @@ hsw_test_single_ctx_counters(void)
> > > > > > static void
> > > > > > gen8_test_single_ctx_render_target_writes_a_counter(void)
> > > > > > {
> > > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
> > > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> > > > > >		uint64_t properties[] = {
> > > > > >			DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
> > > > > >
> > > > > > @@ -3700,8 +3633,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> > > > > >
> > > > > >				/* sanity check that we can pass the delta to timebase_scale */
> > > > > >				igt_assert(delta_ts64 < UINT32_MAX);
> > > > > > -			delta_oa32_ns = timebase_scale(delta_oa32);
> > > > > > -			delta_ts64_ns = timebase_scale(delta_ts64);
> > > > > > +			delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> > > > > > +			delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
> > > > > >
> > > > > >				igt_debug("oa32 delta = %u, = %uns\n",
> > > > > >					  delta_oa32, (unsigned)delta_oa32_ns);
> > > > > > @@ -3783,7 +3716,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> > > > > >					{
> > > > > >						uint32_t time_delta = report[1] - report0_32[1];
> > > > > >
> > > > > > -					if (timebase_scale(time_delta) > 1000000000) {
> > > > > > +					if (i915_perf_timebase_scale(intel_perf,
> > > > > > +								     time_delta) > 1000000000) {
> > > > > >							skip_reason = "prior first mi-rpc";
> > > > > >						}
> > > > > >					}
> > > > > > @@ -3791,7 +3725,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> > > > > >					{
> > > > > >						uint32_t time_delta = report[1] - report1_32[1];
> > > > > >
> > > > > > -					if (timebase_scale(time_delta) <= 1000000000) {
> > > > > > +					if (i915_perf_timebase_scale(intel_perf,
> > > > > > +								     time_delta) <= 1000000000) {
> > > > > >							igt_debug("    comes after last MI_RPC (%u)\n",
> > > > > >								  report1_32[1]);
> > > > > >							report = report1_32;
> > > > > > @@ -4164,7 +4099,7 @@ static void gen12_single_ctx_helper(void)
> > > > > >
> > > > > >		/* Sanity check that we can pass the delta to timebase_scale */
> > > > > >		igt_assert(delta_ts64 < UINT32_MAX);
> > > > > > -	delta_oa32_ns = timebase_scale(delta_oa32);
> > > > > > +	delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> > > > > >		delta_ts64_ns = cs_timebase_scale(delta_ts64);
> > > > > >
> > > > > >		igt_debug("oa32 delta = %u, = %uns\n",
> > > > > > --
> > > > > > 2.25.1
> > > > > >
> >
>
>
>
>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public
  2023-02-08 19:34           ` Umesh Nerlige Ramappa
@ 2023-02-09  9:56             ` Janusz Krzysztofik
  2023-02-09 18:46               ` Umesh Nerlige Ramappa
  0 siblings, 1 reply; 18+ messages in thread
From: Janusz Krzysztofik @ 2023-02-09  9:56 UTC (permalink / raw)
  To: Umesh Nerlige Ramappa; +Cc: igt-dev, Chris Wilson

On Wednesday, 8 February 2023 20:34:06 CET Umesh Nerlige Ramappa wrote:
> On Wed, Feb 08, 2023 at 11:09:23AM +0100, Janusz Krzysztofik wrote:
> >On Tuesday, 7 February 2023 21:15:26 CET Dixit, Ashutosh wrote:
> >> On Tue, 07 Feb 2023 12:04:17 -0800, Janusz Krzysztofik wrote:
> >> >
> >> > Hi Umesh,
> >> >
> >> > On Tuesday, 7 February 2023 20:33:50 CET Umesh Nerlige Ramappa wrote:
> >> > > On Tue, Feb 07, 2023 at 11:25:00AM -0800, Umesh Nerlige Ramappa 
wrote:
> >> > > >I wouldn't do this. Please keep the changes local to the specific 
test
> >> > > >that you implemented in your first rev. While it is a good idea to
> >> > > >have the some of the perf capabilities in the library, this is way 
too
> >> > > >much churn to implement a specific test for the original failure.
> >> > > >Unless multiple IGT subsytems area already dependent on perf APIs to
> >> > > >implement multiple tests, let's not do this.
> >> > > >
> >> > >
> >> > > Also note that the perf library implemented in IGT is not entirely 
used
> >> > > by IGT tests alone. The library is also linked to GPUvis software. 
Only
> >> > > a few pieces of reusable code in the perf library is used by IGT 
tests.
> >> >
> >> > Do you think that my changes will break other users?  How?
> >> >
> >> > Also, it looks like there are somehow conflicting expectations from 
different
> >> > reviewers.  Ashutosh wanted the new subtest to be implemented outside 
of i915/
> >> > perf test.  That's why I proposed to extend the library with open/close 
and
> >> > related helpers, just to avoid code duplication, and I'm about to 
resend it in
> >> > series with the new subtest implemented inside gem_ctx_exec.  Now, 
after I
> >> > submitted this patch for initial review, you say that a specific test 
is not
> >> > the way to go.  What are you afraid of?
> >> >
> >> > Whose expectations should I try to satisfy in order to have a subtest 
accepted
> >> > and merged?  Or should I just give up and duplicate the code from i915/
perf in
> >> > another test?  Or maybe you can have a look at the whole series before 
you
> >> > decide?
> >>
> >> Hi Janusz,
> >>
> >> I agree with Umesh. Given that here perf is just being used as a 'dummy
> >> workload' let's just duplicate the minimal code required for perf
> >> open/close wherever we are adding the new test. This will keep the real
> >> perf functionality undisturbed for reasons Umesh cited.
> >
> >TBH, I can't see any good justification among those reasons mentioned: "too
> >much churn", "unless ... already dependent", "not entirely used by IGT 
tests",
> >"linked to GPUvis software", "only a few pieces of reusable code ... used 
by
> >IGT" -- which of those justifies duplication of i915 perf code in IGT 
tests?
> 
> You yourself mentioned that this is not related to perf. It's just that 
> perf uses some code in i915 that does barrier related stuff which helps 
> you to reproduce the issue. Why can't that barrier-related-stuff be 
> implemented in IGT without the use of perf APIs?

Because inside the driver I found no ways to trigger the issue (within a time 
period reasonable from CI perspective) other than calling
intel_context_prepare_remote_request().  Only perf and gen8 sseu call that 
function.  Out of the two, perf was my choice because:
- perf matched the user scenario reported as the one that could trigger the 
  bug,
- we already had some work in progress subtest added to tests/i915/perf.c 
  still before my root cause analysis was completed.

> If that's a lot of 
> effort and it's quicker to reproduce this issue using perf APIs, then 
> that's fine with me, but keep it outside of perf library 

Why?
(assuming by "it" you mean some useful functions now in tests/i915/perf.c)

> and maybe add a 
> note saying that this test can be improved by figuring out how to do 
> barrier related execution in IGT in future. 

I don't understand what you mean by "in IGT" here.  Isn't lib/i915/perf.c "in 
IGT" (a part of IGT)?

> I don't see any 
> justification to modify perf library for an issue that's not even perf 
> related.

Not modify, only extend with a wrapper around DRM_IOCTL_I915_PERF_OPEN and 
helpers it depends on.

Justification why we need to call DRM_IOCTL_I915_PERF_OPEN from some new 
subtests: extend CI coverage over some rarely used processing paths.

Justification why add reusable code to a library: avoid code duplication.

Justification why add it to perf library: no doubt DRM_IOCTL_I915_PERF_OPEN is 
perf related, I believe.

> I believe you also mentioned somewhere that the issue was fixed by some 
> 'unknown' code changes to i915 and you are not able to reproduce
> it consistently with this test now.

No, I must have missed my point while clarifying things if that's how you've 
read them, sorry.  The issue is reproducible.  CI results from my trybot 
attempt clearly confirm that:
https://intel-gfx-ci.01.org/tree/drm-tip/TrybotIGT_699/bat-all.html?
testfilter=barrier-race

Results from preliminary fixes tested with the new IGT subtest on trybot:
https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_113662v1/bat-all.html?
testfilter=barrier-race

> 
> I have considered the above factors to suggest that this should not 
> reside in perf library.
> 
> >Again, do you think that my changes can break other (non-IGT) users?  
> >How?
> 
> I didn't say that your changes break anything. I was stating that the 
> library code is shared across different tools.

But why do you think that an IGT library shared across different tools can't 
be extended with a few functions needed by IGT tests?

Thanks,
Janusz


> 
> Thanks,
> Umesh
> 
> >
> >Anyway, assuming you are the "owner" of lib/i915/perf.c, in order to 
satisfy
> >your (still not clear for me) requirements I'm already working on a new
> >version of my patch, with the i915 perf code duplicated as needed.
> >
> >Thanks,
> >Janusz
> >
> >>
> >> Thanks.
> >> --
> >> Ashutosh
> >>
> >
> >
> >
> >
> 




^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public
  2023-02-09  6:30           ` Dixit, Ashutosh
@ 2023-02-09 10:02             ` Janusz Krzysztofik
  0 siblings, 0 replies; 18+ messages in thread
From: Janusz Krzysztofik @ 2023-02-09 10:02 UTC (permalink / raw)
  To: Dixit, Ashutosh; +Cc: igt-dev, Chris Wilson

On Thursday, 9 February 2023 07:30:23 CET Dixit, Ashutosh wrote:
> On Wed, 08 Feb 2023 10:23:07 -0800, Janusz Krzysztofik wrote:
> >
> > On Wednesday, 8 February 2023 18:53:11 CET Dixit, Ashutosh wrote:
> > > On Wed, 08 Feb 2023 06:35:47 -0800, Kamil Konieczny wrote:
> > > >
> > > > Hi Umesh,
> > > >
> > > > On 2023-02-07 at 11:33:50 -0800, Umesh Nerlige Ramappa wrote:
> > > > > On Tue, Feb 07, 2023 at 11:25:00AM -0800, Umesh Nerlige Ramappa wrote:
> > > > > > I wouldn't do this. Please keep the changes local to the specific test
> > > > > > that you implemented in your first rev. While it is a good idea to have
> > > > > > the some of the perf capabilities in the library, this is way too much
> > > > > > churn to implement a specific test for the original failure. Unless
> > > > > > multiple IGT subsytems area already dependent on perf APIs to implement
> > > > > > multiple tests, let's not do this.
> > > > > >
> > > > >
> > > > > Also note that the perf library implemented in IGT is not entirely used by
> > > > > IGT tests alone. The library is also linked to GPUvis software. Only a few
> > > > > pieces of reusable code in the perf library is used by IGT tests.
> > > >
> > > > May you give http(s) link(s) to this software ?
> > > >
> > > > I checked https://github.com/mikesart/gpuvis
> > > > and there is no note about intel igt dependancy.
> > >
> > > Hi Kamil,
> > >
> > > The connection between IGT and gpuvis is via this:
> > >
> > > tools/i915-perf/i915_perf_recorder.c
> > >
> > > So the recorder records the metrics/counters in a file and these are then
> > > fed to gpuvis.
> >
> > How are those few proposed functions, required by IGT tests, supposed to break
> > that functionality?
> >
> > > >
> > > > imho we can have separate i915_perf lib with functions needed by
> > > > new test but if you are concerned about it we can start with code
> > > > duplication and refactor later.
> > >
> > > Thanks Kamil and Janusz!
> >
> > Please understand that's still a negative choice, selected only because of no
> > answer from you (I mean Umesh and you) to questions like the one above.  We
> > simply can't afford delays in adding new required subtests because you don't
> > like us touching tests/i915/perf.c and lib/i915/perf.c for some reason still
> > not clear to me.  That's why we are forced to use a solution which seem sub-
> > optimal from our (IGT) POV.
> 
> Hi Janusz,
> 
> I think you are mistaken in thinking that any code

Let's focus on my proposed code.

> which is shared in a
> couple of tests can be put in the IGT library. The IGT library functions
> are more than just shared code.
> 
> For example take the function below from your patch:
> 
> +int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd)
> +{
> +       struct intel_perf_metric_set *metric_set = i915_perf_default_set(intel_perf, devid);
> +       uint64_t oa_exp = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> +       uint64_t properties[] = {
> +               DRM_I915_PERF_PROP_SAMPLE_OA, true,
> +               DRM_I915_PERF_PROP_OA_METRICS_SET, 0,
> +               DRM_I915_PERF_PROP_OA_FORMAT, 0,
> +               DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp,
> +       };
> +       struct drm_i915_perf_open_param param = {
> +               .flags = I915_PERF_FLAG_FD_CLOEXEC,
> +               .num_properties = sizeof(properties) / 16,
> +               .properties_ptr = to_user_pointer(properties),
> +       };
> 
> This function is setting particular perf_open properties which the perf
> tests are using. This function should be in the tests, not in the library
> since another client of the library might want to use different perf_open
> properties.

Unless there are a couple of clients which don't care much, just expect some 
reasonable defaults.

I think it's possible to identify a default set of parameters, applicable in 
cases like use of DRM_IOCTL_I915_PERF_OPEN + close in a loop as a dumb 
workload.  The point here is that such otherwise useless workload, when added 
to some IGT tests, could extend their coverage over some otherwise rarely used 
processing paths inside the driver.

But anyway, why should we duplicate code of functions from tests/i915/perf.c 
like timebase_scale(), max_oa_exponent_for_period_lte() or (parts of) 
init_sys_info() in other tests that need them?

Please also note that we didn't discuss what is a good candidate for a library 
and what is not if I didn't attempt to move the code from tests/i915/perf.c to 
lib/i915/perf.c in response to your request for not adding my new subtest to 
the former, which was the most straightforward approach, I believe.

> It is for reasons such as this that we are saying unless we can demonstrate
> that some functions logically belong in the perf library (which would mean
> analyzing the different clients of the perf lib) we shouldn't add them to
> the library. The library is not a place to add just *any* shared code.

Were does a reusable wrapper around DRM_IOCTL_I915_PERF_OPEN and helpers it 
depends on belong to?

Thanks,
Janusz

> 
> Thanks.
> --
> Ashutosh
> 
> 
> 
> >
> > Thanks,
> > Janusz
> >
> > >
> > > >
> > > > Regards,
> > > > Kamil
> > > >
> > > > >
> > > > > > Thanks,
> > > > > > Umesh
> > > > > >
> > > > > > On Tue, Feb 07, 2023 at 11:11:21AM +0100, Janusz Krzysztofik wrote:
> > > > > > > We need new subtests that exercise interaction between i915 perf open/
> > > > > > > close and other i915 subsystems from the point of view of those other
> > > > > > > subsystems.  Allow other tests to reuse __perf_open/close() family of
> > > > > > > functions, now inside i915/perf test, by moving (sharable parts of)
> > > > > > > them to i915/perf library.
> > > > > > >
> > > > > > > Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
> > > > > > > ---
> > > > > > > lib/i915/perf.c   | 130 ++++++++++++++++++++++++++++++++++++++++++++++
> > > > > > > lib/i915/perf.h   |  15 ++++++
> > > > > > > lib/meson.build   |   1 +
> > > > > > > tests/i915/perf.c | 121 ++++++++++--------------------------------
> > > > > > > 4 files changed, 174 insertions(+), 93 deletions(-)
> > > > > > >
> > > > > > > diff --git a/lib/i915/perf.c b/lib/i915/perf.c
> > > > > > > index 6c7a192558..e71d637eb5 100644
> > > > > > > --- a/lib/i915/perf.c
> > > > > > > +++ b/lib/i915/perf.c
> > > > > > > @@ -39,7 +39,9 @@
> > > > > > >
> > > > > > > #include "i915_pciids.h"
> > > > > > >
> > > > > > > +#include "igt_aux.h"
> > > > > > > #include "intel_chipset.h"
> > > > > > > +#include "ioctl_wrappers.h"
> > > > > > > #include "perf.h"
> > > > > > >
> > > > > > > #include "i915_perf_metrics_hsw.h"
> > > > > > > @@ -1008,3 +1010,131 @@ const char *intel_perf_read_report_reason(const struct intel_perf *perf,
> > > > > > >
> > > > > > >		return "unknown";
> > > > > > > }
> > > > > > > +
> > > > > > > +uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta)
> > > > > > > +{
> > > > > > > +	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
> > > > > > > +}
> > > > > > > +
> > > > > > > +/* Returns: the largest OA exponent that will still result in a sampling period
> > > > > > > + * less than or equal to the given @period.
> > > > > > > + */
> > > > > > > +int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period)
> > > > > > > +{
> > > > > > > +	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
> > > > > > > +	 * would already represent a period of ~3 minutes so there's
> > > > > > > +	 * really no need to consider higher exponents.
> > > > > > > +	 */
> > > > > > > +	for (int i = 0; i < 30; i++) {
> > > > > > > +		uint64_t oa_period = i915_perf_timebase_scale(intel_perf, 2 << i);
> > > > > > > +
> > > > > > > +		if (oa_period > period)
> > > > > > > +			return max(0, i - 1);
> > > > > > > +	}
> > > > > > > +
> > > > > > > +	igt_assert(!"reached");
> > > > > > > +	return -1;
> > > > > > > +}
> > > > > > > +
> > > > > > > +struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid)
> > > > > > > +{
> > > > > > > +	struct intel_perf_metric_set *metric_set = NULL, *metric_set_iter;
> > > > > > > +	const char *metric_set_name = NULL;
> > > > > > > +
> > > > > > > +	igt_assert_neq(devid, 0);
> > > > > > > +
> > > > > > > +	/*
> > > > > > > +	 * We don't have a TestOa metric set for Haswell so use
> > > > > > > +	 * RenderBasic
> > > > > > > +	 */
> > > > > > > +	if (IS_HASWELL(devid))
> > > > > > > +		metric_set_name = "RenderBasic";
> > > > > > > +	else
> > > > > > > +		metric_set_name = "TestOa";
> > > > > > > +
> > > > > > > +	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
> > > > > > > +		if (strcmp(metric_set_iter->symbol_name, metric_set_name) == 0) {
> > > > > > > +			metric_set = metric_set_iter;
> > > > > > > +			break;
> > > > > > > +		}
> > > > > > > +	}
> > > > > > > +
> > > > > > > +	return metric_set;
> > > > > > > +}
> > > > > > > +
> > > > > > > +struct intel_perf *i915_perf_init_sys_info(int drm_fd)
> > > > > > > +{
> > > > > > > +	struct intel_perf *intel_perf;
> > > > > > > +
> > > > > > > +	intel_perf = intel_perf_for_fd(drm_fd);
> > > > > > > +	if (!intel_perf)
> > > > > > > +		return NULL;
> > > > > > > +
> > > > > > > +	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
> > > > > > > +	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
> > > > > > > +	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
> > > > > > > +	igt_debug("timestamp_frequency = %"PRIu64"\n",
> > > > > > > +		  intel_perf->devinfo.timestamp_frequency);
> > > > > > > +	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
> > > > > > > +
> > > > > > > +	intel_perf_load_perf_configs(intel_perf, drm_fd);
> > > > > > > +
> > > > > > > +	return intel_perf;
> > > > > > > +}
> > > > > > > +
> > > > > > > +int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd)
> > > > > > > +{
> > > > > > > +	int32_t pm_value = 0;
> > > > > > > +	int ret;
> > > > > > > +
> > > > > > > +	ret = perf_ioctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, param);
> > > > > > > +
> > > > > > > +	igt_assert(ret >= 0);
> > > > > > > +	errno = 0;
> > > > > > > +
> > > > > > > +	if (pm_fd) {
> > > > > > > +		*pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
> > > > > > > +		igt_assert(*pm_fd >= 0);
> > > > > > > +
> > > > > > > +		igt_assert_eq(write(*pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
> > > > > > > +	}
> > > > > > > +
> > > > > > > +	return ret;
> > > > > > > +}
> > > > > > > +
> > > > > > > +int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd)
> > > > > > > +{
> > > > > > > +	struct intel_perf_metric_set *metric_set = i915_perf_default_set(intel_perf, devid);
> > > > > > > +	uint64_t oa_exp = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> > > > > > > +	uint64_t properties[] = {
> > > > > > > +		DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > > > > > +		DRM_I915_PERF_PROP_OA_METRICS_SET, 0,
> > > > > > > +		DRM_I915_PERF_PROP_OA_FORMAT, 0,
> > > > > > > +		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp,
> > > > > > > +	};
> > > > > > > +	struct drm_i915_perf_open_param param = {
> > > > > > > +		.flags = I915_PERF_FLAG_FD_CLOEXEC,
> > > > > > > +		.num_properties = sizeof(properties) / 16,
> > > > > > > +		.properties_ptr = to_user_pointer(properties),
> > > > > > > +	};
> > > > > > > +
> > > > > > > +	igt_assert(metric_set);
> > > > > > > +	igt_assert(metric_set->perf_oa_metrics_set);
> > > > > > > +	igt_assert(oa_exp >= 0);
> > > > > > > +
> > > > > > > +	igt_debug("%s metric set UUID = %s\n",
> > > > > > > +		  metric_set->symbol_name,
> > > > > > > +		  metric_set->hw_config_guid);
> > > > > > > +
> > > > > > > +	properties[3] = metric_set->perf_oa_metrics_set;
> > > > > > > +	properties[5] = metric_set->perf_oa_format;
> > > > > > > +
> > > > > > > +	return i915_perf_open(drm_fd, &param, pm_fd);
> > > > > > > +}
> > > > > > > +
> > > > > > > +void i915_perf_close(int stream_fd, int pm_fd)
> > > > > > > +{
> > > > > > > +	close(stream_fd);
> > > > > > > +	if (pm_fd >= 0)
> > > > > > > +		close(pm_fd);
> > > > > > > +}
> > > > > > > diff --git a/lib/i915/perf.h b/lib/i915/perf.h
> > > > > > > index e6e60dc997..c9cd28be47 100644
> > > > > > > --- a/lib/i915/perf.h
> > > > > > > +++ b/lib/i915/perf.h
> > > > > > > @@ -351,6 +351,21 @@ uint64_t intel_perf_read_record_timestamp_raw(const struct intel_perf *perf,
> > > > > > > const char *intel_perf_read_report_reason(const struct intel_perf *perf,
> > > > > > >						  const struct drm_i915_perf_record_header *record);
> > > > > > >
> > > > > > > +uint64_t i915_perf_timebase_scale(struct intel_perf *intel_perf, uint32_t u32_delta);
> > > > > > > +
> > > > > > > +int i915_perf_max_oa_exponent_for_period_lte(struct intel_perf *intel_perf, uint64_t period);
> > > > > > > +
> > > > > > > +struct intel_perf_metric_set *i915_perf_default_set(struct intel_perf *intel_perf, uint32_t devid);
> > > > > > > +
> > > > > > > +struct intel_perf *i915_perf_init_sys_info(int drm_fd);
> > > > > > > +
> > > > > > > +struct drm_i915_perf_open_param;
> > > > > > > +int i915_perf_open(int drm_fd, struct drm_i915_perf_open_param *param, int *pm_fd);
> > > > > > > +
> > > > > > > +int i915_perf_open_for_devid(int drm_fd, uint32_t devid, struct intel_perf *intel_perf, int *pm_fd);
> > > > > > > +
> > > > > > > +void i915_perf_close(int drm_fd, int pm_fd);
> > > > > > > +
> > > > > > > #ifdef __cplusplus
> > > > > > > };
> > > > > > > #endif
> > > > > > > diff --git a/lib/meson.build b/lib/meson.build
> > > > > > > index d49b78ca1a..e79b31090b 100644
> > > > > > > --- a/lib/meson.build
> > > > > > > +++ b/lib/meson.build
> > > > > > > @@ -258,6 +258,7 @@ lib_igt_drm_fdinfo = declare_dependency(link_with : lib_igt_drm_fdinfo_build,
> > > > > > >					  include_directories : inc)
> > > > > > > i915_perf_files = [
> > > > > > >  'igt_list.c',
> > > > > > > +  'igt_tools_stub.c',
> > > > > > >  'i915/perf.c',
> > > > > > >  'i915/perf_data_reader.c',
> > > > > > > ]
> > > > > > > diff --git a/tests/i915/perf.c b/tests/i915/perf.c
> > > > > > > index dd1f1ac399..a3f59d143b 100644
> > > > > > > --- a/tests/i915/perf.c
> > > > > > > +++ b/tests/i915/perf.c
> > > > > > > @@ -287,21 +287,16 @@ pretty_print_oa_period(uint64_t oa_period_ns)
> > > > > > > static void
> > > > > > > __perf_close(int fd)
> > > > > > > {
> > > > > > > -	close(fd);
> > > > > > > +	i915_perf_close(fd, pm_fd);
> > > > > > >		stream_fd = -1;
> > > > > > >
> > > > > > > -	if (pm_fd >= 0) {
> > > > > > > -		close(pm_fd);
> > > > > > > +	if (pm_fd >= 0)
> > > > > > >			pm_fd = -1;
> > > > > > > -	}
> > > > > > > }
> > > > > > >
> > > > > > > static int
> > > > > > > __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
> > > > > > > {
> > > > > > > -	int ret;
> > > > > > > -	int32_t pm_value = 0;
> > > > > > > -
> > > > > > >		if (stream_fd >= 0)
> > > > > > >			__perf_close(stream_fd);
> > > > > > >		if (pm_fd >= 0) {
> > > > > > > @@ -309,19 +304,7 @@ __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
> > > > > > >			pm_fd = -1;
> > > > > > >		}
> > > > > > >
> > > > > > > -	ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param);
> > > > > > > -
> > > > > > > -	igt_assert(ret >= 0);
> > > > > > > -	errno = 0;
> > > > > > > -
> > > > > > > -	if (prevent_pm) {
> > > > > > > -		pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
> > > > > > > -		igt_assert(pm_fd >= 0);
> > > > > > > -
> > > > > > > -		igt_assert_eq(write(pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
> > > > > > > -	}
> > > > > > > -
> > > > > > > -	return ret;
> > > > > > > +	return i915_perf_open(fd, param, prevent_pm ? &pm_fd : NULL);
> > > > > > > }
> > > > > > >
> > > > > > > static int
> > > > > > > @@ -465,33 +448,6 @@ cs_timebase_scale(uint32_t u32_delta)
> > > > > > >		return ((uint64_t)u32_delta * NSEC_PER_SEC) / cs_timestamp_frequency(drm_fd);
> > > > > > > }
> > > > > > >
> > > > > > > -static uint64_t
> > > > > > > -timebase_scale(uint32_t u32_delta)
> > > > > > > -{
> > > > > > > -	return ((uint64_t)u32_delta * NSEC_PER_SEC) / intel_perf->devinfo.timestamp_frequency;
> > > > > > > -}
> > > > > > > -
> > > > > > > -/* Returns: the largest OA exponent that will still result in a sampling period
> > > > > > > - * less than or equal to the given @period.
> > > > > > > - */
> > > > > > > -static int
> > > > > > > -max_oa_exponent_for_period_lte(uint64_t period)
> > > > > > > -{
> > > > > > > -	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
> > > > > > > -	 * would already represent a period of ~3 minutes so there's
> > > > > > > -	 * really no need to consider higher exponents.
> > > > > > > -	 */
> > > > > > > -	for (int i = 0; i < 30; i++) {
> > > > > > > -		uint64_t oa_period = timebase_scale(2 << i);
> > > > > > > -
> > > > > > > -		if (oa_period > period)
> > > > > > > -			return max(0, i - 1);
> > > > > > > -	}
> > > > > > > -
> > > > > > > -	igt_assert(!"reached");
> > > > > > > -	return -1;
> > > > > > > -}
> > > > > > > -
> > > > > > > /* Return: the largest OA exponent that will still result in a sampling
> > > > > > > * frequency greater than the given @frequency.
> > > > > > > */
> > > > > > > @@ -502,7 +458,7 @@ max_oa_exponent_for_freq_gt(uint64_t frequency)
> > > > > > >
> > > > > > >		igt_assert_neq(period, 0);
> > > > > > >
> > > > > > > -	return max_oa_exponent_for_period_lte(period - 1);
> > > > > > > +	return i915_perf_max_oa_exponent_for_period_lte(intel_perf, period - 1);
> > > > > > > }
> > > > > > >
> > > > > > > static uint64_t
> > > > > > > @@ -626,7 +582,7 @@ hsw_sanity_check_render_basic_reports(const uint32_t *oa_report0,
> > > > > > >					      const uint32_t *oa_report1,
> > > > > > >					      enum drm_i915_oa_format fmt)
> > > > > > > {
> > > > > > > -	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
> > > > > > > +	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
> > > > > > >		uint32_t clock_delta;
> > > > > > >		uint32_t max_delta;
> > > > > > >		struct oa_format format = get_oa_format(fmt);
> > > > > > > @@ -832,7 +788,7 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
> > > > > > >					  enum drm_i915_oa_format fmt)
> > > > > > > {
> > > > > > >		struct oa_format format = get_oa_format(fmt);
> > > > > > > -	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
> > > > > > > +	uint32_t time_delta = i915_perf_timebase_scale(intel_perf, oa_report1[1] - oa_report0[1]);
> > > > > > >		uint32_t ticks0 = read_report_ticks(oa_report0, fmt);
> > > > > > >		uint32_t ticks1 = read_report_ticks(oa_report1, fmt);
> > > > > > >		uint32_t clock_delta = ticks1 - ticks0;
> > > > > > > @@ -950,43 +906,22 @@ gen8_sanity_check_test_oa_reports(const uint32_t *oa_report0,
> > > > > > > static bool
> > > > > > > init_sys_info(void)
> > > > > > > {
> > > > > > > -	const char *test_set_name = NULL;
> > > > > > > -	struct intel_perf_metric_set *metric_set_iter;
> > > > > > > -
> > > > > > >		igt_assert_neq(devid, 0);
> > > > > > >
> > > > > > > -	intel_perf = intel_perf_for_fd(drm_fd);
> > > > > > > +	intel_perf = i915_perf_init_sys_info(drm_fd);
> > > > > > >		igt_require(intel_perf);
> > > > > > >
> > > > > > > -	igt_debug("n_eu_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_slices);
> > > > > > > -	igt_debug("n_eu_sub_slices: %"PRIu64"\n", intel_perf->devinfo.n_eu_sub_slices);
> > > > > > > -	igt_debug("n_eus: %"PRIu64"\n", intel_perf->devinfo.n_eus);
> > > > > > > -	igt_debug("timestamp_frequency = %"PRIu64"\n",
> > > > > > > -		  intel_perf->devinfo.timestamp_frequency);
> > > > > > > -	igt_assert_neq(intel_perf->devinfo.timestamp_frequency, 0);
> > > > > > > -
> > > > > > > -	/* We don't have a TestOa metric set for Haswell so use
> > > > > > > -	 * RenderBasic
> > > > > > > -	 */
> > > > > > >		if (IS_HASWELL(devid)) {
> > > > > > > -		test_set_name = "RenderBasic";
> > > > > > >			read_report_ticks = hsw_read_report_ticks;
> > > > > > >			sanity_check_reports = hsw_sanity_check_render_basic_reports;
> > > > > > >			undefined_a_counters = hsw_undefined_a_counters;
> > > > > > >		} else {
> > > > > > > -		test_set_name = "TestOa";
> > > > > > >			read_report_ticks = gen8_read_report_ticks;
> > > > > > >			sanity_check_reports = gen8_sanity_check_test_oa_reports;
> > > > > > >			undefined_a_counters = gen8_undefined_a_counters;
> > > > > > >		}
> > > > > > >
> > > > > > > -	igt_list_for_each_entry(metric_set_iter, &intel_perf->metric_sets, link) {
> > > > > > > -		if (strcmp(metric_set_iter->symbol_name, test_set_name) == 0) {
> > > > > > > -			test_set = metric_set_iter;
> > > > > > > -			break;
> > > > > > > -		}
> > > > > > > -	}
> > > > > > > -
> > > > > > > +	test_set = i915_perf_default_set(intel_perf, devid);
> > > > > > >		if (!test_set)
> > > > > > >			return false;
> > > > > > >
> > > > > > > @@ -994,14 +929,12 @@ init_sys_info(void)
> > > > > > >			  test_set->symbol_name,
> > > > > > >			  test_set->hw_config_guid);
> > > > > > >
> > > > > > > -	intel_perf_load_perf_configs(intel_perf, drm_fd);
> > > > > > > -
> > > > > > >		if (test_set->perf_oa_metrics_set == 0) {
> > > > > > >			igt_debug("Unable to load configurations\n");
> > > > > > >			return false;
> > > > > > >		}
> > > > > > >
> > > > > > > -	oa_exp_1_millisec = max_oa_exponent_for_period_lte(1000000);
> > > > > > > +	oa_exp_1_millisec = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> > > > > > >
> > > > > > >		return true;
> > > > > > > }
> > > > > > > @@ -1911,7 +1844,7 @@ test_low_oa_exponent_permissions(void)
> > > > > > >
> > > > > > >		igt_waitchildren();
> > > > > > >
> > > > > > > -	oa_period = timebase_scale(2 << ok_exponent);
> > > > > > > +	oa_period = i915_perf_timebase_scale(intel_perf, 2 << ok_exponent);
> > > > > > >		oa_freq = NSEC_PER_SEC / oa_period;
> > > > > > >		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", oa_freq - 100);
> > > > > > >
> > > > > > > @@ -2003,7 +1936,7 @@ get_time(void)
> > > > > > > static void
> > > > > > > test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
> > > > > > > {
> > > > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
> > > > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
> > > > > > >		uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> > > > > > >		uint64_t properties[] = {
> > > > > > >			/* Include OA reports in samples */
> > > > > > > @@ -2162,7 +2095,7 @@ test_blocking(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ke
> > > > > > > static void
> > > > > > > test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t kernel_hrtimer)
> > > > > > > {
> > > > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(requested_oa_period);
> > > > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, requested_oa_period);
> > > > > > >		uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> > > > > > >		uint64_t properties[] = {
> > > > > > >			/* Include OA reports in samples */
> > > > > > > @@ -2358,7 +2291,7 @@ test_polling(uint64_t requested_oa_period, bool set_kernel_hrtimer, uint64_t ker
> > > > > > >
> > > > > > > static void test_polling_small_buf(void)
> > > > > > > {
> > > > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(40 * 1000); /* 40us */
> > > > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 40 * 1000); /* 40us */
> > > > > > >		uint64_t properties[] = {
> > > > > > >			/* Include OA reports in samples */
> > > > > > >			DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > > > > > @@ -2461,7 +2394,7 @@ num_valid_reports_captured(struct drm_i915_perf_open_param *param,
> > > > > > > static void
> > > > > > > gen12_test_oa_tlb_invalidate(void)
> > > > > > > {
> > > > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(30000000);
> > > > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 30000000);
> > > > > > >		uint64_t properties[] = {
> > > > > > >			DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > > > > >
> > > > > > > @@ -2503,7 +2436,7 @@ static void
> > > > > > > test_buffer_fill(void)
> > > > > > > {
> > > > > > >		/* ~5 micro second period */
> > > > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> > > > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> > > > > > >		uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> > > > > > >		uint64_t properties[] = {
> > > > > > >			/* Include OA reports in samples */
> > > > > > > @@ -2651,7 +2584,7 @@ static void
> > > > > > > test_non_zero_reason(void)
> > > > > > > {
> > > > > > >		/* ~20 micro second period */
> > > > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(20000);
> > > > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 20000);
> > > > > > >		uint64_t properties[] = {
> > > > > > >			/* Include OA reports in samples */
> > > > > > >			DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > > > > > @@ -2734,7 +2667,7 @@ static void
> > > > > > > test_enable_disable(void)
> > > > > > > {
> > > > > > >		/* ~5 micro second period */
> > > > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> > > > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> > > > > > >		uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
> > > > > > >		uint64_t properties[] = {
> > > > > > >			/* Include OA reports in samples */
> > > > > > > @@ -2885,7 +2818,7 @@ test_enable_disable(void)
> > > > > > > static void
> > > > > > > test_short_reads(void)
> > > > > > > {
> > > > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(5000);
> > > > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 5000);
> > > > > > >		uint64_t properties[] = {
> > > > > > >			/* Include OA reports in samples */
> > > > > > >			DRM_I915_PERF_PROP_SAMPLE_OA, true,
> > > > > > > @@ -3447,8 +3380,8 @@ hsw_test_single_ctx_counters(void)
> > > > > > >
> > > > > > >			/* sanity check that we can pass the delta to timebase_scale */
> > > > > > >			igt_assert(delta_ts64 < UINT32_MAX);
> > > > > > > -		delta_oa32_ns = timebase_scale(delta_oa32);
> > > > > > > -		delta_ts64_ns = timebase_scale(delta_ts64);
> > > > > > > +		delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> > > > > > > +		delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
> > > > > > >
> > > > > > >			igt_debug("ts32 delta = %u, = %uns\n",
> > > > > > >				  delta_oa32, (unsigned)delta_oa32_ns);
> > > > > > > @@ -3498,7 +3431,7 @@ hsw_test_single_ctx_counters(void)
> > > > > > > static void
> > > > > > > gen8_test_single_ctx_render_target_writes_a_counter(void)
> > > > > > > {
> > > > > > > -	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
> > > > > > > +	int oa_exponent = i915_perf_max_oa_exponent_for_period_lte(intel_perf, 1000000);
> > > > > > >		uint64_t properties[] = {
> > > > > > >			DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
> > > > > > >
> > > > > > > @@ -3700,8 +3633,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> > > > > > >
> > > > > > >				/* sanity check that we can pass the delta to timebase_scale */
> > > > > > >				igt_assert(delta_ts64 < UINT32_MAX);
> > > > > > > -			delta_oa32_ns = timebase_scale(delta_oa32);
> > > > > > > -			delta_ts64_ns = timebase_scale(delta_ts64);
> > > > > > > +			delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> > > > > > > +			delta_ts64_ns = i915_perf_timebase_scale(intel_perf, delta_ts64);
> > > > > > >
> > > > > > >				igt_debug("oa32 delta = %u, = %uns\n",
> > > > > > >					  delta_oa32, (unsigned)delta_oa32_ns);
> > > > > > > @@ -3783,7 +3716,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> > > > > > >					{
> > > > > > >						uint32_t time_delta = report[1] - report0_32[1];
> > > > > > >
> > > > > > > -					if (timebase_scale(time_delta) > 1000000000) {
> > > > > > > +					if (i915_perf_timebase_scale(intel_perf,
> > > > > > > +								     time_delta) > 1000000000) {
> > > > > > >							skip_reason = "prior first mi-rpc";
> > > > > > >						}
> > > > > > >					}
> > > > > > > @@ -3791,7 +3725,8 @@ gen8_test_single_ctx_render_target_writes_a_counter(void)
> > > > > > >					{
> > > > > > >						uint32_t time_delta = report[1] - report1_32[1];
> > > > > > >
> > > > > > > -					if (timebase_scale(time_delta) <= 1000000000) {
> > > > > > > +					if (i915_perf_timebase_scale(intel_perf,
> > > > > > > +								     time_delta) <= 1000000000) {
> > > > > > >							igt_debug("    comes after last MI_RPC (%u)\n",
> > > > > > >								  report1_32[1]);
> > > > > > >							report = report1_32;
> > > > > > > @@ -4164,7 +4099,7 @@ static void gen12_single_ctx_helper(void)
> > > > > > >
> > > > > > >		/* Sanity check that we can pass the delta to timebase_scale */
> > > > > > >		igt_assert(delta_ts64 < UINT32_MAX);
> > > > > > > -	delta_oa32_ns = timebase_scale(delta_oa32);
> > > > > > > +	delta_oa32_ns = i915_perf_timebase_scale(intel_perf, delta_oa32);
> > > > > > >		delta_ts64_ns = cs_timebase_scale(delta_ts64);
> > > > > > >
> > > > > > >		igt_debug("oa32 delta = %u, = %uns\n",
> > > > > > > --
> > > > > > > 2.25.1
> > > > > > >
> > >
> >
> >
> >
> >
> 




^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public
  2023-02-09  9:56             ` Janusz Krzysztofik
@ 2023-02-09 18:46               ` Umesh Nerlige Ramappa
  2023-02-09 20:23                 ` Janusz Krzysztofik
  0 siblings, 1 reply; 18+ messages in thread
From: Umesh Nerlige Ramappa @ 2023-02-09 18:46 UTC (permalink / raw)
  To: Janusz Krzysztofik; +Cc: igt-dev, Chris Wilson

On Thu, Feb 09, 2023 at 10:56:04AM +0100, Janusz Krzysztofik wrote:
>On Wednesday, 8 February 2023 20:34:06 CET Umesh Nerlige Ramappa wrote:
>> On Wed, Feb 08, 2023 at 11:09:23AM +0100, Janusz Krzysztofik wrote:
>> >On Tuesday, 7 February 2023 21:15:26 CET Dixit, Ashutosh wrote:
>> >> On Tue, 07 Feb 2023 12:04:17 -0800, Janusz Krzysztofik wrote:
>> >> >
>> >> > Hi Umesh,
>> >> >
>> >> > On Tuesday, 7 February 2023 20:33:50 CET Umesh Nerlige Ramappa wrote:
>> >> > > On Tue, Feb 07, 2023 at 11:25:00AM -0800, Umesh Nerlige Ramappa
>wrote:
>> >> > > >I wouldn't do this. Please keep the changes local to the specific
>test
>> >> > > >that you implemented in your first rev. While it is a good idea to
>> >> > > >have the some of the perf capabilities in the library, this is way
>too
>> >> > > >much churn to implement a specific test for the original failure.
>> >> > > >Unless multiple IGT subsytems area already dependent on perf APIs to
>> >> > > >implement multiple tests, let's not do this.
>> >> > > >
>> >> > >
>> >> > > Also note that the perf library implemented in IGT is not entirely
>used
>> >> > > by IGT tests alone. The library is also linked to GPUvis software.
>Only
>> >> > > a few pieces of reusable code in the perf library is used by IGT
>tests.
>> >> >
>> >> > Do you think that my changes will break other users?  How?
>> >> >
>> >> > Also, it looks like there are somehow conflicting expectations from
>different
>> >> > reviewers.  Ashutosh wanted the new subtest to be implemented outside
>of i915/
>> >> > perf test.  That's why I proposed to extend the library with open/close
>and
>> >> > related helpers, just to avoid code duplication, and I'm about to
>resend it in
>> >> > series with the new subtest implemented inside gem_ctx_exec.  Now,
>after I
>> >> > submitted this patch for initial review, you say that a specific test
>is not
>> >> > the way to go.  What are you afraid of?
>> >> >
>> >> > Whose expectations should I try to satisfy in order to have a subtest
>accepted
>> >> > and merged?  Or should I just give up and duplicate the code from i915/
>perf in
>> >> > another test?  Or maybe you can have a look at the whole series before
>you
>> >> > decide?
>> >>
>> >> Hi Janusz,
>> >>
>> >> I agree with Umesh. Given that here perf is just being used as a 'dummy
>> >> workload' let's just duplicate the minimal code required for perf
>> >> open/close wherever we are adding the new test. This will keep the real
>> >> perf functionality undisturbed for reasons Umesh cited.
>> >
>> >TBH, I can't see any good justification among those reasons mentioned: "too
>> >much churn", "unless ... already dependent", "not entirely used by IGT
>tests",
>> >"linked to GPUvis software", "only a few pieces of reusable code ... used
>by
>> >IGT" -- which of those justifies duplication of i915 perf code in IGT
>tests?
>>
>> You yourself mentioned that this is not related to perf. It's just that
>> perf uses some code in i915 that does barrier related stuff which helps
>> you to reproduce the issue. Why can't that barrier-related-stuff be
>> implemented in IGT without the use of perf APIs?
>
>Because inside the driver I found no ways to trigger the issue (within a time
>period reasonable from CI perspective) other than calling
>intel_context_prepare_remote_request().  Only perf and gen8 sseu call that
>function.  Out of the two, perf was my choice because:
>- perf matched the user scenario reported as the one that could trigger the
>  bug,
>- we already had some work in progress subtest added to tests/i915/perf.c
>  still before my root cause analysis was completed.

If you know the sequence in perf code that uncovering this issue, why 
not just add a selftest for this?

Umesh

>
>> If that's a lot of
>> effort and it's quicker to reproduce this issue using perf APIs, then
>> that's fine with me, but keep it outside of perf library
>
>Why?
>(assuming by "it" you mean some useful functions now in tests/i915/perf.c)
>
>> and maybe add a
>> note saying that this test can be improved by figuring out how to do
>> barrier related execution in IGT in future.
>
>I don't understand what you mean by "in IGT" here.  Isn't lib/i915/perf.c "in
>IGT" (a part of IGT)?
>
>> I don't see any
>> justification to modify perf library for an issue that's not even perf
>> related.
>
>Not modify, only extend with a wrapper around DRM_IOCTL_I915_PERF_OPEN and
>helpers it depends on.
>
>Justification why we need to call DRM_IOCTL_I915_PERF_OPEN from some new
>subtests: extend CI coverage over some rarely used processing paths.
>
>Justification why add reusable code to a library: avoid code duplication.
>
>Justification why add it to perf library: no doubt DRM_IOCTL_I915_PERF_OPEN is
>perf related, I believe.
>
>> I believe you also mentioned somewhere that the issue was fixed by some
>> 'unknown' code changes to i915 and you are not able to reproduce
>> it consistently with this test now.
>
>No, I must have missed my point while clarifying things if that's how you've
>read them, sorry.  The issue is reproducible.  CI results from my trybot
>attempt clearly confirm that:
>https://intel-gfx-ci.01.org/tree/drm-tip/TrybotIGT_699/bat-all.html?
>testfilter=barrier-race
>
>Results from preliminary fixes tested with the new IGT subtest on trybot:
>https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_113662v1/bat-all.html?
>testfilter=barrier-race
>
>>
>> I have considered the above factors to suggest that this should not
>> reside in perf library.
>>
>> >Again, do you think that my changes can break other (non-IGT) users?
>> >How?
>>
>> I didn't say that your changes break anything. I was stating that the
>> library code is shared across different tools.
>
>But why do you think that an IGT library shared across different tools can't
>be extended with a few functions needed by IGT tests?
>
>Thanks,
>Janusz
>
>
>>
>> Thanks,
>> Umesh
>>
>> >
>> >Anyway, assuming you are the "owner" of lib/i915/perf.c, in order to
>satisfy
>> >your (still not clear for me) requirements I'm already working on a new
>> >version of my patch, with the i915 perf code duplicated as needed.
>> >
>> >Thanks,
>> >Janusz
>> >
>> >>
>> >> Thanks.
>> >> --
>> >> Ashutosh
>> >>
>> >
>> >
>> >
>> >
>>
>
>
>
>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public
  2023-02-09 18:46               ` Umesh Nerlige Ramappa
@ 2023-02-09 20:23                 ` Janusz Krzysztofik
  0 siblings, 0 replies; 18+ messages in thread
From: Janusz Krzysztofik @ 2023-02-09 20:23 UTC (permalink / raw)
  To: Umesh Nerlige Ramappa; +Cc: igt-dev, Chris Wilson

On Thursday, 9 February 2023 19:46:02 CET Umesh Nerlige Ramappa wrote:
> On Thu, Feb 09, 2023 at 10:56:04AM +0100, Janusz Krzysztofik wrote:
> >On Wednesday, 8 February 2023 20:34:06 CET Umesh Nerlige Ramappa wrote:
> >> On Wed, Feb 08, 2023 at 11:09:23AM +0100, Janusz Krzysztofik wrote:
> >> >On Tuesday, 7 February 2023 21:15:26 CET Dixit, Ashutosh wrote:
> >> >> On Tue, 07 Feb 2023 12:04:17 -0800, Janusz Krzysztofik wrote:
> >> >> >
> >> >> > Hi Umesh,
> >> >> >
> >> >> > On Tuesday, 7 February 2023 20:33:50 CET Umesh Nerlige Ramappa wrote:
> >> >> > > On Tue, Feb 07, 2023 at 11:25:00AM -0800, Umesh Nerlige Ramappa
> >wrote:
> >> >> > > >I wouldn't do this. Please keep the changes local to the specific
> >test
> >> >> > > >that you implemented in your first rev. While it is a good idea to
> >> >> > > >have the some of the perf capabilities in the library, this is way
> >too
> >> >> > > >much churn to implement a specific test for the original failure.
> >> >> > > >Unless multiple IGT subsytems area already dependent on perf APIs to
> >> >> > > >implement multiple tests, let's not do this.
> >> >> > > >
> >> >> > >
> >> >> > > Also note that the perf library implemented in IGT is not entirely
> >used
> >> >> > > by IGT tests alone. The library is also linked to GPUvis software.
> >Only
> >> >> > > a few pieces of reusable code in the perf library is used by IGT
> >tests.
> >> >> >
> >> >> > Do you think that my changes will break other users?  How?
> >> >> >
> >> >> > Also, it looks like there are somehow conflicting expectations from
> >different
> >> >> > reviewers.  Ashutosh wanted the new subtest to be implemented outside
> >of i915/
> >> >> > perf test.  That's why I proposed to extend the library with open/close
> >and
> >> >> > related helpers, just to avoid code duplication, and I'm about to
> >resend it in
> >> >> > series with the new subtest implemented inside gem_ctx_exec.  Now,
> >after I
> >> >> > submitted this patch for initial review, you say that a specific test
> >is not
> >> >> > the way to go.  What are you afraid of?
> >> >> >
> >> >> > Whose expectations should I try to satisfy in order to have a subtest
> >accepted
> >> >> > and merged?  Or should I just give up and duplicate the code from i915/
> >perf in
> >> >> > another test?  Or maybe you can have a look at the whole series before
> >you
> >> >> > decide?
> >> >>
> >> >> Hi Janusz,
> >> >>
> >> >> I agree with Umesh. Given that here perf is just being used as a 'dummy
> >> >> workload' let's just duplicate the minimal code required for perf
> >> >> open/close wherever we are adding the new test. This will keep the real
> >> >> perf functionality undisturbed for reasons Umesh cited.
> >> >
> >> >TBH, I can't see any good justification among those reasons mentioned: "too
> >> >much churn", "unless ... already dependent", "not entirely used by IGT
> >tests",
> >> >"linked to GPUvis software", "only a few pieces of reusable code ... used
> >by
> >> >IGT" -- which of those justifies duplication of i915 perf code in IGT
> >tests?
> >>
> >> You yourself mentioned that this is not related to perf. It's just that
> >> perf uses some code in i915 that does barrier related stuff which helps
> >> you to reproduce the issue. Why can't that barrier-related-stuff be
> >> implemented in IGT without the use of perf APIs?
> >
> >Because inside the driver I found no ways to trigger the issue (within a time
> >period reasonable from CI perspective) other than calling
> >intel_context_prepare_remote_request().  Only perf and gen8 sseu call that
> >function.  Out of the two, perf was my choice because:
> >- perf matched the user scenario reported as the one that could trigger the
> >  bug,
> >- we already had some work in progress subtest added to tests/i915/perf.c
> >  still before my root cause analysis was completed.
> 
> If you know the sequence in perf code that uncovering this issue, why 
> not just add a selftest for this?

Hmm, because it would take some significant time for me to learn if and how 
a comparable scenario could be successfully implemented in a selftest.  Any 
help is appreciated.

Thanks,
Janusz


> 
> Umesh
> 
> >
> >> If that's a lot of
> >> effort and it's quicker to reproduce this issue using perf APIs, then
> >> that's fine with me, but keep it outside of perf library
> >
> >Why?
> >(assuming by "it" you mean some useful functions now in tests/i915/perf.c)
> >
> >> and maybe add a
> >> note saying that this test can be improved by figuring out how to do
> >> barrier related execution in IGT in future.
> >
> >I don't understand what you mean by "in IGT" here.  Isn't lib/i915/perf.c "in
> >IGT" (a part of IGT)?
> >
> >> I don't see any
> >> justification to modify perf library for an issue that's not even perf
> >> related.
> >
> >Not modify, only extend with a wrapper around DRM_IOCTL_I915_PERF_OPEN and
> >helpers it depends on.
> >
> >Justification why we need to call DRM_IOCTL_I915_PERF_OPEN from some new
> >subtests: extend CI coverage over some rarely used processing paths.
> >
> >Justification why add reusable code to a library: avoid code duplication.
> >
> >Justification why add it to perf library: no doubt DRM_IOCTL_I915_PERF_OPEN is
> >perf related, I believe.
> >
> >> I believe you also mentioned somewhere that the issue was fixed by some
> >> 'unknown' code changes to i915 and you are not able to reproduce
> >> it consistently with this test now.
> >
> >No, I must have missed my point while clarifying things if that's how you've
> >read them, sorry.  The issue is reproducible.  CI results from my trybot
> >attempt clearly confirm that:
> >https://intel-gfx-ci.01.org/tree/drm-tip/TrybotIGT_699/bat-all.html?
> >testfilter=barrier-race
> >
> >Results from preliminary fixes tested with the new IGT subtest on trybot:
> >https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_113662v1/bat-all.html?
> >testfilter=barrier-race
> >
> >>
> >> I have considered the above factors to suggest that this should not
> >> reside in perf library.
> >>
> >> >Again, do you think that my changes can break other (non-IGT) users?
> >> >How?
> >>
> >> I didn't say that your changes break anything. I was stating that the
> >> library code is shared across different tools.
> >
> >But why do you think that an IGT library shared across different tools can't
> >be extended with a few functions needed by IGT tests?
> >
> >Thanks,
> >Janusz
> >
> >
> >>
> >> Thanks,
> >> Umesh
> >>
> >> >
> >> >Anyway, assuming you are the "owner" of lib/i915/perf.c, in order to
> >satisfy
> >> >your (still not clear for me) requirements I'm already working on a new
> >> >version of my patch, with the i915 perf code duplicated as needed.
> >> >
> >> >Thanks,
> >> >Janusz
> >> >
> >> >>
> >> >> Thanks.
> >> >> --
> >> >> Ashutosh
> >> >>
> >> >
> >> >
> >> >
> >> >
> >>
> >
> >
> >
> >
> 




^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2023-02-09 20:23 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-07 10:11 [igt-dev] [PATCH i-g-t] i915/perf: Make __perf_open() and friends public Janusz Krzysztofik
2023-02-07 11:38 ` [igt-dev] ✗ Fi.CI.BAT: failure for " Patchwork
2023-02-07 18:16 ` [igt-dev] [PATCH i-g-t] " Kamil Konieczny
2023-02-07 19:45   ` Janusz Krzysztofik
2023-02-07 19:25 ` Umesh Nerlige Ramappa
2023-02-07 19:33   ` Umesh Nerlige Ramappa
2023-02-07 20:04     ` Janusz Krzysztofik
2023-02-07 20:15       ` Dixit, Ashutosh
2023-02-08 10:09         ` Janusz Krzysztofik
2023-02-08 19:34           ` Umesh Nerlige Ramappa
2023-02-09  9:56             ` Janusz Krzysztofik
2023-02-09 18:46               ` Umesh Nerlige Ramappa
2023-02-09 20:23                 ` Janusz Krzysztofik
2023-02-08 14:35     ` Kamil Konieczny
2023-02-08 17:53       ` Dixit, Ashutosh
2023-02-08 18:23         ` Janusz Krzysztofik
2023-02-09  6:30           ` Dixit, Ashutosh
2023-02-09 10:02             ` Janusz Krzysztofik

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.