All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tursulin@ursulin.net>
To: Intel-gfx@lists.freedesktop.org
Subject: [PATCH i-g-t 5/7] tests/perf_pmu: Tests for i915 PMU API
Date: Fri, 29 Sep 2017 13:39:37 +0100	[thread overview]
Message-ID: <20170929123939.3312-6-tvrtko.ursulin@linux.intel.com> (raw)
In-Reply-To: <20170929123939.3312-1-tvrtko.ursulin@linux.intel.com>

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

A bunch of tests for the new i915 PMU feature.

Parts of the code were initialy sketched by Dmitry Rogozhkin.

v2: (Most suggestions by Chris Wilson)
 * Add new class/instance based engine list.
 * Add gem_has_engine/gem_require_engine to work with class/instance.
 * Use the above two throughout the test.
 * Shorten tests to 100ms busy batches, seems enough.
 * Add queued counter sanity checks.
 * Use igt_nsec_elapsed.
 * Skip on perf -ENODEV in some tests instead of embedding knowledge locally.
 * Fix multi ordering for busy accounting.
 * Use new guranteed_usleep when sleep time is asserted on.
 * Check for no queued when idle/busy.
 * Add queued counter init test.
 * Add queued tests.
 * Consolidate and increase multiple busy engines tests to most-busy and
   all-busy tests.
 * Guarantte interrupts by using fences.
 * Test RC6 via forcewake.

v3:
 * Tweak assert in interrupts subtest.
 * Sprinkle of comments.
 * Fix multi-client test which got broken in v2.

v4:
 * Measured instead of guaranteed sleep.
 * Missing sync in no_sema.
 * Log busyness before asserts for debug.
 * access(2) instead of open(2) to determine if cpu0 is hotpluggable.
 * Test frequency reporting via min/max setting instead assuming.
   ^^ All above suggested by Chris Wilson. ^^
 * Drop queued subtests to match i915.
 * Use long batches with fences to ensure interrupts.
 * Test render node as well.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
---
 lib/igt_gt.c           |  50 +++
 lib/igt_gt.h           |  38 ++
 lib/igt_perf.h         |   9 +-
 tests/Makefile.am      |   1 +
 tests/Makefile.sources |   1 +
 tests/perf_pmu.c       | 957 +++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 1048 insertions(+), 8 deletions(-)
 create mode 100644 tests/perf_pmu.c

diff --git a/lib/igt_gt.c b/lib/igt_gt.c
index b3f3b3809eee..4c75811fb1b3 100644
--- a/lib/igt_gt.c
+++ b/lib/igt_gt.c
@@ -568,3 +568,53 @@ bool gem_can_store_dword(int fd, unsigned int engine)
 
 	return true;
 }
+
+const struct intel_execution_engine2 intel_execution_engines2[] = {
+	{ "rcs0", I915_ENGINE_CLASS_RENDER, 0 },
+	{ "bcs0", I915_ENGINE_CLASS_COPY, 0 },
+	{ "vcs0", I915_ENGINE_CLASS_VIDEO, 0 },
+	{ "vcs1", I915_ENGINE_CLASS_VIDEO, 1 },
+	{ "vecs0", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0 },
+};
+
+unsigned int
+gem_class_instance_to_eb_flags(int gem_fd,
+			       enum drm_i915_gem_engine_class class,
+			       unsigned int instance)
+{
+	if (class != I915_ENGINE_CLASS_VIDEO)
+		igt_assert(instance == 0);
+	else
+		igt_assert(instance >= 0 && instance <= 1);
+
+	switch (class) {
+	case I915_ENGINE_CLASS_RENDER:
+		return I915_EXEC_RENDER;
+	case I915_ENGINE_CLASS_COPY:
+		return I915_EXEC_BLT;
+	case I915_ENGINE_CLASS_VIDEO:
+		if (instance == 0) {
+			if (gem_has_bsd2(gem_fd))
+				return I915_EXEC_BSD | I915_EXEC_BSD_RING1;
+			else
+				return I915_EXEC_BSD;
+
+		} else {
+			return I915_EXEC_BSD | I915_EXEC_BSD_RING2;
+		}
+	case I915_ENGINE_CLASS_VIDEO_ENHANCE:
+		return I915_EXEC_VEBOX;
+	case I915_ENGINE_CLASS_OTHER:
+	default:
+		igt_assert(0);
+	};
+}
+
+bool gem_has_engine(int gem_fd,
+		    enum drm_i915_gem_engine_class class,
+		    unsigned int instance)
+{
+	return gem_has_ring(gem_fd,
+			    gem_class_instance_to_eb_flags(gem_fd, class,
+							   instance));
+}
diff --git a/lib/igt_gt.h b/lib/igt_gt.h
index 2579cbd37be7..fb67ae1a7d1f 100644
--- a/lib/igt_gt.h
+++ b/lib/igt_gt.h
@@ -25,6 +25,7 @@
 #define IGT_GT_H
 
 #include "igt_debugfs.h"
+#include "igt_core.h"
 
 void igt_require_hang_ring(int fd, int ring);
 
@@ -80,4 +81,41 @@ extern const struct intel_execution_engine {
 
 bool gem_can_store_dword(int fd, unsigned int engine);
 
+extern const struct intel_execution_engine2 {
+	const char *name;
+	int class;
+	int instance;
+} intel_execution_engines2[];
+
+#define for_each_engine_class_instance(fd__, e__) \
+	for ((e__) = intel_execution_engines2;\
+	     (e__)->name; \
+	     (e__)++)
+
+enum drm_i915_gem_engine_class {
+	I915_ENGINE_CLASS_OTHER = 0,
+	I915_ENGINE_CLASS_RENDER = 1,
+	I915_ENGINE_CLASS_COPY = 2,
+	I915_ENGINE_CLASS_VIDEO = 3,
+	I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
+	I915_ENGINE_CLASS_MAX /* non-ABI */
+};
+
+unsigned int
+gem_class_instance_to_eb_flags(int gem_fd,
+			       enum drm_i915_gem_engine_class class,
+			       unsigned int instance);
+
+bool gem_has_engine(int gem_fd,
+		    enum drm_i915_gem_engine_class class,
+		    unsigned int instance);
+
+static inline
+void gem_require_engine(int gem_fd,
+			enum drm_i915_gem_engine_class class,
+			unsigned int instance)
+{
+	igt_require(gem_has_engine(gem_fd, class, instance));
+}
+
 #endif /* IGT_GT_H */
diff --git a/lib/igt_perf.h b/lib/igt_perf.h
index e38171da5261..dc4df760f531 100644
--- a/lib/igt_perf.h
+++ b/lib/igt_perf.h
@@ -29,14 +29,7 @@
 
 #include <linux/perf_event.h>
 
-enum drm_i915_gem_engine_class {
-	I915_ENGINE_CLASS_OTHER = 0,
-	I915_ENGINE_CLASS_RENDER = 1,
-	I915_ENGINE_CLASS_COPY = 2,
-	I915_ENGINE_CLASS_VIDEO = 3,
-	I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
-	I915_ENGINE_CLASS_MAX /* non-ABI */
-};
+#include "igt_gt.h"
 
 enum drm_i915_pmu_engine_sample {
 	I915_SAMPLE_BUSY = 0,
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 1cea4036d00f..2819df381df7 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -126,6 +126,7 @@ gen7_forcewake_mt_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 gen7_forcewake_mt_LDADD = $(LDADD) -lpthread
 gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 gem_userptr_blits_LDADD = $(LDADD) -lpthread
+perf_pmu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
 
 gem_wait_LDADD = $(LDADD) -lrt
 kms_flip_LDADD = $(LDADD) -lrt -lpthread
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 0adc28a014d2..7d1fdf16892d 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -217,6 +217,7 @@ TESTS_progs = \
 	kms_vblank \
 	meta_test \
 	perf \
+	perf_pmu \
 	pm_backlight \
 	pm_lpsp \
 	pm_rc6_residency \
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
new file mode 100644
index 000000000000..70d435e953ea
--- /dev/null
+++ b/tests/perf_pmu.c
@@ -0,0 +1,957 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/times.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <time.h>
+#include <poll.h>
+
+#include "igt.h"
+#include "igt_core.h"
+#include "igt_perf.h"
+#include "igt_sysfs.h"
+
+IGT_TEST_DESCRIPTION("Test the i915 pmu perf interface");
+
+const double tolerance = 0.02f;
+const unsigned long batch_duration_ns = 100 * 1000 * 1000;
+
+static int open_pmu(uint64_t config)
+{
+	int fd;
+
+	fd = perf_i915_open(config);
+	igt_require(fd >= 0 || (fd < 0 && errno != ENODEV));
+	igt_assert(fd >= 0);
+
+	return fd;
+}
+
+static int open_group(uint64_t config, int group)
+{
+	int fd;
+
+	fd = perf_i915_open_group(config, group);
+	igt_require(fd >= 0 || (fd < 0 && errno != ENODEV));
+	igt_assert(fd >= 0);
+
+	return fd;
+}
+
+static void
+init(int gem_fd, const struct intel_execution_engine2 *e, uint8_t sample)
+{
+	int fd;
+
+	fd = open_pmu(__I915_PMU_ENGINE(e->class, e->instance, sample));
+
+	close(fd);
+}
+
+static uint64_t pmu_read_single(int fd)
+{
+	uint64_t data[2];
+
+	igt_assert_eq(read(fd, data, sizeof(data)), sizeof(data));
+
+	return data[0];
+}
+
+static void pmu_read_multi(int fd, unsigned int num, uint64_t *val)
+{
+	uint64_t buf[2 + num];
+	unsigned int i;
+
+	igt_assert_eq(read(fd, buf, sizeof(buf)), sizeof(buf));
+
+	for (i = 0; i < num; i++)
+		val[i] = buf[2 + i];
+}
+
+#define assert_within_epsilon(x, ref, tolerance) \
+	igt_assert_f((double)(x) <= (1.0 + tolerance) * (double)ref && \
+		     (double)(x) >= (1.0 - tolerance) * (double)ref, \
+		     "'%s' != '%s' (%f not within %f%% tolerance of %f)\n",\
+		     #x, #ref, (double)x, tolerance * 100.0, (double)ref)
+
+/*
+ * Helper for cases where we assert on time spent sleeping (directly or
+ * indirectly), so make it more robust by ensuring the system sleep time
+ * is within test tolerance to start with.
+ */
+static unsigned int measured_usleep(unsigned int usec)
+{
+	uint64_t slept = 0;
+
+	while (usec > 0) {
+		struct timespec start = { };
+		uint64_t this_sleep;
+
+		igt_nsec_elapsed(&start);
+		usleep(usec);
+		this_sleep = igt_nsec_elapsed(&start);
+		slept += this_sleep;
+		if (this_sleep > usec * 1000)
+			break;
+		usec -= this_sleep;
+	}
+
+	return slept;
+}
+
+static unsigned int e2ring(int gem_fd, const struct intel_execution_engine2 *e)
+{
+	return gem_class_instance_to_eb_flags(gem_fd, e->class, e->instance);
+}
+
+static void
+single(int gem_fd, const struct intel_execution_engine2 *e, bool busy)
+{
+	double ref = busy ? batch_duration_ns : 0.0f;
+	igt_spin_t *spin;
+	uint64_t val;
+	int fd;
+
+	fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
+
+	if (busy) {
+		spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+		igt_spin_batch_set_timeout(spin, batch_duration_ns);
+	} else {
+		usleep(batch_duration_ns / 1000);
+	}
+
+	if (busy)
+		gem_sync(gem_fd, spin->handle);
+
+	val = pmu_read_single(fd);
+
+	assert_within_epsilon(val, ref, tolerance);
+
+	if (busy)
+		igt_spin_batch_free(gem_fd, spin);
+	close(fd);
+}
+
+static void log_busy(int fd, unsigned int num_engines, uint64_t *val)
+{
+	char buf[1024];
+	int rem = sizeof(buf);
+	unsigned int i;
+	char *p = buf;
+
+	for (i = 0; i < num_engines; i++) {
+		int len;
+
+		len = snprintf(p, rem, "%u=%" PRIu64 "\n",  i, val[i]);
+		igt_assert(len > 0);
+		rem -= len;
+		p += len;
+	}
+
+	igt_info("%s", buf);
+}
+
+static void
+busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
+	       const unsigned int num_engines)
+{
+	const struct intel_execution_engine2 *e_;
+	uint64_t val[num_engines];
+	int fd[num_engines];
+	igt_spin_t *spin;
+	unsigned int busy_idx, i;
+
+	i = 0;
+	fd[0] = -1;
+	for_each_engine_class_instance(fd, e_) {
+		if (!gem_has_engine(gem_fd, e_->class, e_->instance))
+			continue;
+		else if (e == e_)
+			busy_idx = i;
+
+		fd[i++] = open_group(I915_PMU_ENGINE_BUSY(e_->class,
+							  e_->instance),
+				     fd[0]);
+	}
+
+	spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+	igt_spin_batch_set_timeout(spin, batch_duration_ns);
+
+	gem_sync(gem_fd, spin->handle);
+
+	pmu_read_multi(fd[0], num_engines, val);
+	log_busy(fd[0], num_engines, val);
+
+	assert_within_epsilon(val[busy_idx], batch_duration_ns, tolerance);
+	for (i = 0; i < num_engines; i++) {
+		if (i == busy_idx)
+			continue;
+		assert_within_epsilon(val[i], 0.0f, tolerance);
+	}
+
+	igt_spin_batch_free(gem_fd, spin);
+	close(fd[0]);
+}
+
+static void
+most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
+		    const unsigned int num_engines)
+{
+	const struct intel_execution_engine2 *e_;
+	uint64_t val[num_engines];
+	int fd[num_engines];
+	igt_spin_t *spin[num_engines];
+	unsigned int idle_idx, i;
+
+	gem_require_engine(gem_fd, e->class, e->instance);
+
+	i = 0;
+	fd[0] = -1;
+	for_each_engine_class_instance(fd, e_) {
+		if (!gem_has_engine(gem_fd, e_->class, e_->instance))
+			continue;
+
+		fd[i] = open_group(I915_PMU_ENGINE_BUSY(e_->class,
+							e_->instance),
+				   fd[0]);
+
+		if (e == e_) {
+			idle_idx = i;
+		} else {
+			spin[i] = igt_spin_batch_new(gem_fd, 0,
+						     e2ring(gem_fd, e_), 0);
+			igt_spin_batch_set_timeout(spin[i], batch_duration_ns);
+		}
+
+		i++;
+	}
+
+	for (i = 0; i < num_engines; i++) {
+		if (i != idle_idx)
+			gem_sync(gem_fd, spin[i]->handle);
+	}
+
+	pmu_read_multi(fd[0], num_engines, val);
+	log_busy(fd[0], num_engines, val);
+
+	for (i = 0; i < num_engines; i++) {
+		if (i == idle_idx)
+			assert_within_epsilon(val[i], 0.0f, tolerance);
+		else
+			assert_within_epsilon(val[i], batch_duration_ns,
+					      tolerance);
+	}
+
+	for (i = 0; i < num_engines; i++) {
+		if (i != idle_idx)
+			igt_spin_batch_free(gem_fd, spin[i]);
+	}
+	close(fd[0]);
+}
+
+static void
+all_busy_check_all(int gem_fd, const unsigned int num_engines)
+{
+	const struct intel_execution_engine2 *e;
+	uint64_t val[num_engines];
+	int fd[num_engines];
+	igt_spin_t *spin[num_engines];
+	unsigned int i;
+
+	i = 0;
+	fd[0] = -1;
+	for_each_engine_class_instance(fd, e) {
+		if (!gem_has_engine(gem_fd, e->class, e->instance))
+			continue;
+
+		fd[i] = open_group(I915_PMU_ENGINE_BUSY(e->class, e->instance),
+				   fd[0]);
+
+		spin[i] = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+		igt_spin_batch_set_timeout(spin[i], batch_duration_ns);
+
+		i++;
+	}
+
+	for (i = 0; i < num_engines; i++)
+		gem_sync(gem_fd, spin[i]->handle);
+
+	pmu_read_multi(fd[0], num_engines, val);
+	log_busy(fd[0], num_engines, val);
+
+	for (i = 0; i < num_engines; i++)
+		assert_within_epsilon(val[i], batch_duration_ns, tolerance);
+
+	for (i = 0; i < num_engines; i++)
+		igt_spin_batch_free(gem_fd, spin[i]);
+	close(fd[0]);
+}
+
+static void
+no_sema(int gem_fd, const struct intel_execution_engine2 *e, bool busy)
+{
+	igt_spin_t *spin;
+	uint64_t val[2];
+	int fd;
+
+	fd = open_group(I915_PMU_ENGINE_SEMA(e->class, e->instance), -1);
+	open_group(I915_PMU_ENGINE_WAIT(e->class, e->instance), fd);
+
+	if (busy) {
+		spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+		igt_spin_batch_set_timeout(spin, batch_duration_ns);
+	} else {
+		usleep(batch_duration_ns / 1000);
+	}
+
+	if (busy)
+		gem_sync(gem_fd, spin->handle);
+
+	pmu_read_multi(fd, 2, val);
+
+	assert_within_epsilon(val[0], 0.0f, tolerance);
+	assert_within_epsilon(val[1], 0.0f, tolerance);
+
+	if (busy)
+		igt_spin_batch_free(gem_fd, spin);
+	close(fd);
+}
+
+static void
+multi_client(int gem_fd, const struct intel_execution_engine2 *e)
+{
+	uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance);
+	unsigned int slept;
+	igt_spin_t *spin;
+	uint64_t val[2];
+	int fd[2];
+
+	fd[0] = open_pmu(config);
+
+	spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+	igt_spin_batch_set_timeout(spin, batch_duration_ns);
+
+	usleep(batch_duration_ns / 3000);
+
+	/*
+	 * Second PMU client which is initialized after the first one,
+	 * and exists before it, should not affect accounting as reported
+	 * in the first client.
+	 */
+	fd[1] = open_pmu(config);
+	slept = measured_usleep(batch_duration_ns / 3000);
+	val[1] = pmu_read_single(fd[1]);
+	close(fd[1]);
+
+	gem_sync(gem_fd, spin->handle);
+
+	val[0] = pmu_read_single(fd[0]);
+
+	assert_within_epsilon(val[0], batch_duration_ns, tolerance);
+	assert_within_epsilon(val[1], slept, tolerance);
+
+	igt_spin_batch_free(gem_fd, spin);
+	close(fd[0]);
+}
+
+/**
+ * Tests that i915 PMU corectly errors out in invalid initialization.
+ * i915 PMU is uncore PMU, thus:
+ *  - sampling period is not supported
+ *  - pid > 0 is not supported since we can't count per-process (we count
+ *    per whole system)
+ *  - cpu != 0 is not supported since i915 PMU exposes cpumask for CPU0
+ */
+static void invalid_init(void)
+{
+	struct perf_event_attr attr;
+	int pid, cpu;
+
+#define ATTR_INIT() \
+do { \
+	memset(&attr, 0, sizeof (attr)); \
+	attr.config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0); \
+	attr.type = i915_type_id(); \
+	igt_assert(attr.type != 0); \
+} while(0)
+
+	ATTR_INIT();
+	attr.sample_period = 100;
+	pid = -1;
+	cpu = 0;
+	igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+	igt_assert_eq(errno, EINVAL);
+
+	ATTR_INIT();
+	pid = 0;
+	cpu = 0;
+	igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+	igt_assert_eq(errno, EINVAL);
+
+	ATTR_INIT();
+	pid = -1;
+	cpu = 1;
+	igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+	igt_assert_eq(errno, ENODEV);
+}
+
+static void init_other(unsigned int i, bool valid)
+{
+	int fd;
+
+	fd = perf_i915_open(__I915_PMU_OTHER(i));
+	igt_require(!(fd < 0 && errno == ENODEV));
+	if (valid) {
+		igt_assert(fd >= 0);
+	} else {
+		igt_assert(fd < 0);
+		return;
+	}
+
+	close(fd);
+}
+
+static void read_other(unsigned int i, bool valid)
+{
+	int fd;
+
+	fd = perf_i915_open(__I915_PMU_OTHER(i));
+	igt_require(!(fd < 0 && errno == ENODEV));
+	if (valid) {
+		igt_assert(fd >= 0);
+	} else {
+		igt_assert(fd < 0);
+		return;
+	}
+
+	(void)pmu_read_single(fd);
+
+	close(fd);
+}
+
+static bool cpu0_hotplug_support(void)
+{
+	return access("/sys/devices/system/cpu/cpu0/online", W_OK) == 0;
+}
+
+static void cpu_hotplug(int gem_fd)
+{
+	struct timespec start = { };
+	igt_spin_t *spin;
+	uint64_t val, ref;
+	int fd;
+
+	igt_require(cpu0_hotplug_support());
+
+	spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
+	fd = perf_i915_open(I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0));
+	igt_assert(fd >= 0);
+
+	igt_nsec_elapsed(&start);
+
+	/*
+	 * Toggle online status of all the CPUs in a child process and ensure
+	 * this has not affected busyness stats in the parent.
+	 */
+	igt_fork(child, 1) {
+		int cpu = 0;
+
+		for (;;) {
+			char name[128];
+			int cpufd;
+
+			sprintf(name, "/sys/devices/system/cpu/cpu%d/online",
+				cpu);
+			cpufd = open(name, O_WRONLY);
+			if (cpufd == -1) {
+				igt_assert(cpu > 0);
+				break;
+			}
+			igt_assert_eq(write(cpufd, "0", 2), 2);
+
+			usleep(1000 * 1000);
+
+			igt_assert_eq(write(cpufd, "1", 2), 2);
+
+			close(cpufd);
+			cpu++;
+		}
+	}
+
+	igt_waitchildren();
+
+	igt_spin_batch_end(spin);
+	gem_sync(gem_fd, spin->handle);
+
+	ref = igt_nsec_elapsed(&start);
+	val = pmu_read_single(fd);
+
+	assert_within_epsilon(val, ref, tolerance);
+
+	igt_spin_batch_free(gem_fd, spin);
+	close(fd);
+}
+
+static unsigned long calibrate_nop(int fd, const unsigned int calibration_us)
+{
+	const unsigned int cal_min_us = calibration_us * 3;
+	const unsigned int tolerance_pct = 10;
+	const uint32_t bbe = 0xa << 23;
+	const unsigned int loops = 17;
+	struct drm_i915_gem_exec_object2 obj = {};
+	struct drm_i915_gem_execbuffer2 eb =
+		{ .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
+	struct timespec t_begin = { };
+	long size, last_size;
+	unsigned long ns;
+
+	igt_nsec_elapsed(&t_begin);
+
+	size = 256 * 1024;
+	do {
+		struct timespec t_start = { };
+
+		obj.handle = gem_create(fd, size);
+		gem_write(fd, obj.handle, size - sizeof(bbe), &bbe,
+			  sizeof(bbe));
+		gem_execbuf(fd, &eb);
+		gem_sync(fd, obj.handle);
+
+		igt_nsec_elapsed(&t_start);
+
+		for (int loop = 0; loop < loops; loop++)
+			gem_execbuf(fd, &eb);
+		gem_sync(fd, obj.handle);
+
+		ns = igt_nsec_elapsed(&t_start);
+
+		gem_close(fd, obj.handle);
+
+		last_size = size;
+		size = calibration_us * 1000 * size * loops / ns;
+		size = ALIGN(size, sizeof(uint32_t));
+	} while (igt_nsec_elapsed(&t_begin) / 1000 < cal_min_us ||
+		 abs(size - last_size) > (size * tolerance_pct / 100));
+
+	return size / sizeof(uint32_t);
+}
+
+static int chain_nop(int gem_fd, unsigned long sz, int in_fence, bool sync)
+{
+	struct drm_i915_gem_exec_object2 obj = {};
+	struct drm_i915_gem_execbuffer2 eb =
+		{ .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
+	const uint32_t bbe = 0xa << 23;
+
+	sz = ALIGN(sz, sizeof(uint32_t));
+
+	obj.handle = gem_create(gem_fd, sz);
+	gem_write(gem_fd, obj.handle, sz - sizeof(bbe), &bbe, sizeof(bbe));
+
+	eb.flags = I915_EXEC_RENDER | I915_EXEC_FENCE_OUT;
+
+	if (in_fence >= 0) {
+		eb.flags |= I915_EXEC_FENCE_IN;
+		eb.rsvd2 = in_fence;
+	}
+
+	gem_execbuf_wr(gem_fd, &eb);
+
+	if (sync)
+		gem_sync(gem_fd, obj.handle);
+
+	gem_close(gem_fd, obj.handle);
+	if (in_fence >= 0)
+		close(in_fence);
+
+	return eb.rsvd2 >> 32;
+}
+
+static void
+test_interrupts(int gem_fd)
+{
+	const unsigned int calibration_us = 250000;
+	const unsigned int batch_len_us = 100000;
+	const unsigned int batch_count = 3000000 / batch_len_us;
+	uint64_t idle, busy, prev;
+	unsigned long cal, sz;
+	int fd, fence = -1;
+	unsigned int i;
+
+	cal = calibrate_nop(gem_fd, calibration_us);
+	sz = batch_len_us * cal / calibration_us;
+
+	fd = open_pmu(I915_PMU_INTERRUPTS);
+
+	gem_quiescent_gpu(gem_fd);
+
+	/* Wait for idle state. */
+	prev = pmu_read_single(fd);
+	idle = prev + 1;
+	while (idle != prev) {
+		usleep(100000);
+		prev = idle;
+		idle = pmu_read_single(fd);
+	}
+
+	igt_assert_eq(idle - prev, 0);
+
+	/* Send some no-op batches with chained fences to ensure interrupts. */
+	for (i = 1; i <= batch_count; i++)
+		fence = chain_nop(gem_fd, sz, fence,
+				  i < batch_count ? false : true);
+
+	close(fence);
+
+	/* Check at least as many interrupts has been generated. */
+	busy = pmu_read_single(fd);
+	igt_assert(busy >= batch_count);
+
+	close(fd);
+}
+
+static void
+test_frequency(int gem_fd)
+{
+	const uint64_t duration_ns = 2000000000;
+	uint32_t min_freq, max_freq, boost_freq;
+	uint64_t min[2], max[2], start[2];
+	igt_spin_t *spin;
+	int fd, sysfs;
+
+	sysfs = igt_sysfs_open(gem_fd, NULL);
+	igt_require(sysfs >= 0);
+
+	min_freq = igt_sysfs_get_u32(sysfs, "gt_RPn_freq_mhz");
+	max_freq = igt_sysfs_get_u32(sysfs, "gt_RP0_freq_mhz");
+	boost_freq = igt_sysfs_get_u32(sysfs, "gt_boost_freq_mhz");
+	igt_require(min_freq > 0 && max_freq > 0 && boost_freq > 0);
+	igt_require(max_freq > min_freq);
+	igt_require(boost_freq > min_freq);
+
+	fd = open_group(I915_PMU_REQUESTED_FREQUENCY, -1);
+	open_group(I915_PMU_ACTUAL_FREQUENCY, fd);
+
+	/*
+	 * Set GPU to min frequency and read PMU counters.
+	 */
+	igt_require(igt_sysfs_set_u32(sysfs, "gt_max_freq_mhz", min_freq));
+	igt_require(igt_sysfs_get_u32(sysfs, "gt_max_freq_mhz") == min_freq);
+	igt_require(igt_sysfs_set_u32(sysfs, "gt_boost_freq_mhz", min_freq));
+	igt_require(igt_sysfs_get_u32(sysfs, "gt_boost_freq_mhz") == min_freq);
+
+	pmu_read_multi(fd, 2, start);
+
+	spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
+	igt_spin_batch_set_timeout(spin, duration_ns);
+	gem_sync(gem_fd, spin->handle);
+
+	pmu_read_multi(fd, 2, min);
+	min[0] -= start[0];
+	min[1] -= start[1];
+
+	igt_spin_batch_free(gem_fd, spin);
+
+	usleep(1000000);
+
+	/*
+	 * Set GPU to max frequency and read PMU counters.
+	 */
+	igt_require(igt_sysfs_set_u32(sysfs, "gt_max_freq_mhz", max_freq));
+	igt_require(igt_sysfs_get_u32(sysfs, "gt_max_freq_mhz") == max_freq);
+	igt_require(igt_sysfs_set_u32(sysfs, "gt_boost_freq_mhz", boost_freq));
+	igt_require(igt_sysfs_get_u32(sysfs, "gt_boost_freq_mhz") == boost_freq);
+
+	igt_require(igt_sysfs_set_u32(sysfs, "gt_min_freq_mhz", max_freq));
+	igt_require(igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz") == max_freq);
+
+	pmu_read_multi(fd, 2, start);
+
+	spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
+	igt_spin_batch_set_timeout(spin, duration_ns);
+	gem_sync(gem_fd, spin->handle);
+
+	pmu_read_multi(fd, 2, max);
+	max[0] -= start[0];
+	max[1] -= start[1];
+
+	igt_spin_batch_free(gem_fd, spin);
+
+	/*
+	 * Restore min/max.
+	 */
+	igt_require(igt_sysfs_set_u32(sysfs, "gt_min_freq_mhz", min_freq));
+	igt_require(igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz") == min_freq);
+
+	close(fd);
+
+	igt_assert(min[0] < max[0]);
+	igt_assert(min[1] < max[1]);
+}
+
+static void
+test_rc6(int gem_fd)
+{
+	int64_t duration_ns = 2 * 1000 * 1000 * 1000;
+	uint64_t idle, busy, prev;
+	unsigned int slept;
+	int fd, fw;
+
+	fd = open_pmu(I915_PMU_RC6_RESIDENCY);
+
+	gem_quiescent_gpu(gem_fd);
+	usleep(1000000);
+
+	/* Go idle and check full RC6. */
+	prev = pmu_read_single(fd);
+	slept = measured_usleep(duration_ns / 1000);
+	idle = pmu_read_single(fd);
+
+	assert_within_epsilon(idle - prev, slept, tolerance);
+
+	/* Wake up device and check no RC6. */
+	fw = igt_open_forcewake_handle(gem_fd);
+	igt_assert(fw >= 0);
+
+	prev = pmu_read_single(fd);
+	usleep(duration_ns / 1000);
+	busy = pmu_read_single(fd);
+
+	assert_within_epsilon(busy - prev, 0.0, tolerance);
+
+	close(fw);
+	close(fd);
+}
+
+static void
+test_rc6p(int gem_fd)
+{
+	int64_t duration_ns = 2 * 1000 * 1000 * 1000;
+	unsigned int num_pmu = 1;
+	uint64_t idle[3], busy[3], prev[3];
+	unsigned int slept, i;
+	int fd, ret, fw;
+
+	fd = open_group(I915_PMU_RC6_RESIDENCY, -1);
+	ret = perf_i915_open_group(I915_PMU_RC6p_RESIDENCY, fd);
+	if (ret > 0) {
+		num_pmu++;
+		ret = perf_i915_open_group(I915_PMU_RC6pp_RESIDENCY, fd);
+		if (ret > 0)
+			num_pmu++;
+	}
+
+	igt_require(num_pmu == 3);
+
+	gem_quiescent_gpu(gem_fd);
+	usleep(1000000);
+
+	/* Go idle and check full RC6. */
+	pmu_read_multi(fd, num_pmu, prev);
+	slept = measured_usleep(duration_ns / 1000);
+	pmu_read_multi(fd, num_pmu, idle);
+
+	for (i = 0; i < num_pmu; i++)
+		assert_within_epsilon(idle[i] - prev[i], slept, tolerance);
+
+	/* Wake up device and check no RC6. */
+	fw = igt_open_forcewake_handle(gem_fd);
+	igt_assert(fw >= 0);
+
+	pmu_read_multi(fd, num_pmu, prev);
+	usleep(duration_ns / 1000);
+	pmu_read_multi(fd, num_pmu, busy);
+
+	for (i = 0; i < num_pmu; i++)
+		assert_within_epsilon(busy[i] - prev[i], 0.0, tolerance);
+
+	close(fw);
+	close(fd);
+}
+
+igt_main
+{
+	const unsigned int num_other_metrics =
+				I915_PMU_LAST - __I915_PMU_OTHER(0) + 1;
+	unsigned int num_engines = 0;
+	int fd = -1;
+	const struct intel_execution_engine2 *e;
+	unsigned int i;
+
+	igt_fixture {
+		fd = drm_open_driver_master(DRIVER_INTEL);
+
+		igt_require_gem(fd);
+		igt_require(i915_type_id() > 0);
+
+		for_each_engine_class_instance(fd, e) {
+			if (gem_has_engine(fd, e->class, e->instance))
+				num_engines++;
+		}
+	}
+
+	/**
+	 * Test invalid access via perf API is rejected.
+	 */
+	igt_subtest("invalid-init")
+		invalid_init();
+
+	for_each_engine_class_instance(fd, e) {
+		/**
+		 * Test that a single engine metric can be initialized.
+		 */
+		igt_subtest_f("init-busy-%s", e->name)
+			init(fd, e, I915_SAMPLE_BUSY);
+
+		igt_subtest_f("init-wait-%s", e->name)
+			init(fd, e, I915_SAMPLE_WAIT);
+
+		igt_subtest_f("init-sema-%s", e->name)
+			init(fd, e, I915_SAMPLE_SEMA);
+
+		/**
+		 * Test that engines show no load when idle.
+		 */
+		igt_subtest_f("idle-%s", e->name)
+			single(fd, e, false);
+
+		/**
+		 * Test that a single engine reports load correctly.
+		 */
+		igt_subtest_f("busy-%s", e->name)
+			single(fd, e, true);
+
+		/**
+		 * Test that when one engine is loaded other report no load.
+		 */
+		igt_subtest_f("busy-check-all-%s", e->name)
+			busy_check_all(fd, e, num_engines);
+
+		/**
+		 * Test that when all except one engine are loaded all loads
+		 * are correctly reported.
+		 */
+		igt_subtest_f("most-busy-check-all-%s", e->name)
+			most_busy_check_all(fd, e, num_engines);
+
+		/**
+		 * Test that semphore counters report no activity on idle
+		 * or busy engines.
+		 */
+		igt_subtest_f("idle-no-semaphores-%s", e->name)
+			no_sema(fd, e, false);
+
+		igt_subtest_f("busy-no-semaphores-%s", e->name)
+			no_sema(fd, e, true);
+
+		/**
+		 * Check that two perf clients do not influence each others
+		 * observations.
+		 */
+		igt_subtest_f("multi-client-%s", e->name)
+			multi_client(fd, e);
+	}
+
+	/**
+	 * Test that when all engines are loaded all loads are
+	 * correctly reported.
+	 */
+	igt_subtest("all-busy-check-all")
+		all_busy_check_all(fd, num_engines);
+
+	/**
+	 * Test that non-engine counters can be initialized and read. Apart
+	 * from the invalid metric which should fail.
+	 */
+	for (i = 0; i < num_other_metrics + 1; i++) {
+		igt_subtest_f("other-init-%u", i)
+			init_other(i, i < num_other_metrics);
+
+		igt_subtest_f("other-read-%u", i)
+			read_other(i, i < num_other_metrics);
+	}
+
+	/**
+	 * Test counters are not affected by CPU offline/online events.
+	 */
+	igt_subtest("cpu-hotplug")
+		cpu_hotplug(fd);
+
+	/**
+	 * Test GPU frequency.
+	 */
+	igt_subtest("frequency")
+		test_frequency(fd);
+
+	/**
+	 * Test interrupt count reporting.
+	 */
+	igt_subtest("interrupts")
+		test_interrupts(fd);
+
+	/**
+	 * Test RC6 residency reporting.
+	 */
+	igt_subtest("rc6")
+		test_rc6(fd);
+
+	/**
+	 * Test RC6p residency reporting.
+	 */
+	igt_subtest("rc6p")
+		test_rc6p(fd);
+
+	/**
+	 * Check render nodes are counted.
+	 */
+	igt_subtest_group {
+		int render_fd;
+
+		igt_fixture {
+			render_fd = drm_open_driver_render(DRIVER_INTEL);
+			igt_require_gem(render_fd);
+
+			gem_quiescent_gpu(fd);
+		}
+
+		for_each_engine_class_instance(fd, e) {
+			igt_subtest_f("render-node-busy-%s", e->name)
+				single(fd, e, true);
+		}
+
+		igt_fixture {
+			close(render_fd);
+		}
+	}
+}
-- 
2.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2017-09-29 12:39 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-09-29 12:39 [PATCH v3 i-g-t 0/7] IGT PMU support Tvrtko Ursulin
2017-09-29 12:39 ` [PATCH i-g-t 1/7] intel-gpu-overlay: Move local perf implementation to a library Tvrtko Ursulin
2017-09-29 13:43   ` Petri Latvala
2017-10-06 15:25     ` Tvrtko Ursulin
2017-10-09  9:22       ` Petri Latvala
2017-10-09  9:54         ` Tvrtko Ursulin
2017-10-09 10:25           ` Petri Latvala
2017-09-29 12:39 ` [PATCH i-g-t 2/7] intel-gpu-overlay: Consolidate perf PMU access to library Tvrtko Ursulin
2017-09-29 12:39 ` [PATCH i-g-t 3/7] intel-gpu-overlay: Fix interrupts PMU readout Tvrtko Ursulin
2017-09-29 12:39 ` [PATCH i-g-t 4/7] intel-gpu-overlay: Catch-up to new i915 PMU Tvrtko Ursulin
2017-09-29 13:04   ` Chris Wilson
2017-09-29 12:39 ` Tvrtko Ursulin [this message]
2017-09-29 12:58   ` [PATCH i-g-t 5/7] tests/perf_pmu: Tests for i915 PMU API Petri Latvala
2017-09-29 12:39 ` [PATCH i-g-t 6/7] gem_wsim: Busy stats balancers Tvrtko Ursulin
2017-09-29 12:39 ` [PATCH i-g-t 7/7] media-bench.pl: Add busy balancers to the list Tvrtko Ursulin
2017-09-29 14:09 ` ✓ Fi.CI.BAT: success for IGT PMU support (rev5) Patchwork
2017-09-29 17:02 ` ✗ Fi.CI.IGT: warning " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2017-09-25 15:14 [PATCH i-g-t v2 0/7] IGT PMU support Tvrtko Ursulin
2017-09-25 15:15 ` [PATCH i-g-t 5/7] tests/perf_pmu: Tests for i915 PMU API Tvrtko Ursulin
2017-09-25 16:21   ` Chris Wilson
2017-09-26 11:19     ` Tvrtko Ursulin
2017-09-26 11:42       ` Chris Wilson
2017-10-09 10:32         ` Tvrtko Ursulin
2017-10-09 10:55           ` Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170929123939.3312-6-tvrtko.ursulin@linux.intel.com \
    --to=tursulin@ursulin.net \
    --cc=Intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.