All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC i-g-t v1] tests/perf_pmu: test i915 RFC PMU
@ 2017-09-01 15:57 Dmitry Rogozhkin
  2017-09-01 15:57 ` [RFC i-g-t v2] " Dmitry Rogozhkin
  2017-09-02  0:19 ` ✓ Fi.CI.BAT: success for tests/perf_pmu: test i915 RFC PMU (rev4) Patchwork
  0 siblings, 2 replies; 3+ messages in thread
From: Dmitry Rogozhkin @ 2017-09-01 15:57 UTC (permalink / raw)
  To: intel-gfx

i915 RFC PMU:
* https://patchwork.freedesktop.org/series/27488/
* https://patchwork.freedesktop.org/series/28842/

Tests:
* init: try to initialize all possible metrics exposed in i915 PMU
  (limit to 0-instance of engines)
* invalid_init: verify that i915 PMU correctly error out on invalid
  initialization
* single: verify that BUSY metrics work for each engine
* parallel: verify that parallel requests for metrics do not conflict
* cpu_online: verify PMU context migration on CPUs going online/offline

v1: add cpu_online test

Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/Makefile.sources |   1 +
 tests/perf_pmu.c       | 629 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 630 insertions(+)
 create mode 100644 tests/perf_pmu.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index bb013c7..51b684b 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -215,6 +215,7 @@ TESTS_progs = \
 	kms_vblank \
 	meta_test \
 	perf \
+	perf_pmu \
 	pm_backlight \
 	pm_lpsp \
 	pm_rc6_residency \
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
new file mode 100644
index 0000000..428b2f9
--- /dev/null
+++ b/tests/perf_pmu.c
@@ -0,0 +1,629 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include "igt_sysfs.h"
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/time.h>
+#include <time.h>
+#include "drm.h"
+
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+#define USAGE_TOLERANCE 0.02
+
+////////////////////////////////////////////////////////////////////////
+// This is a copy of perf.h from intel-gpu-tools/overlay
+// because I am lazy enough to move it to some common library
+////////////////////////////////////////////////////////////////////////
+
+#include <linux/perf_event.h>
+
+enum drm_i915_gem_engine_class {
+	I915_ENGINE_CLASS_OTHER = 0,
+	I915_ENGINE_CLASS_RENDER = 1,
+	I915_ENGINE_CLASS_COPY = 2,
+	I915_ENGINE_CLASS_VIDEO = 3,
+	I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
+	I915_ENGINE_CLASS_MAX /* non-ABI */
+};
+
+enum drm_i915_pmu_engine_sample {
+	I915_SAMPLE_QUEUED = 0,
+	I915_SAMPLE_BUSY = 1,
+	I915_SAMPLE_WAIT = 2,
+	I915_SAMPLE_SEMA = 3
+};
+
+#define I915_PMU_SAMPLE_BITS (4)
+#define I915_PMU_SAMPLE_MASK (0xf)
+#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
+#define I915_PMU_CLASS_SHIFT \
+	(I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
+
+#define __I915_PMU_ENGINE(class, instance, sample) \
+	((class) << I915_PMU_CLASS_SHIFT | \
+	(instance) << I915_PMU_SAMPLE_BITS | \
+	(sample))
+
+#define I915_PMU_ENGINE_QUEUED(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
+
+#define I915_PMU_ENGINE_BUSY(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
+
+#define I915_PMU_ENGINE_WAIT(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
+
+#define I915_PMU_ENGINE_SEMA(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
+
+#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+
+#define I915_PMU_ACTUAL_FREQUENCY 	__I915_PMU_OTHER(0)
+#define I915_PMU_REQUESTED_FREQUENCY	__I915_PMU_OTHER(1)
+#define I915_PMU_ENERGY			__I915_PMU_OTHER(2)
+#define I915_PMU_INTERRUPTS		__I915_PMU_OTHER(3)
+
+#define I915_PMU_RC6_RESIDENCY		__I915_PMU_OTHER(4)
+#define I915_PMU_RC6p_RESIDENCY		__I915_PMU_OTHER(5)
+#define I915_PMU_RC6pp_RESIDENCY	__I915_PMU_OTHER(6)
+
+static inline int
+perf_event_open(struct perf_event_attr *attr,
+		pid_t pid,
+		int cpu,
+		int group_fd,
+		unsigned long flags)
+{
+#ifndef __NR_perf_event_open
+#if defined(__i386__)
+#define __NR_perf_event_open 336
+#elif defined(__x86_64__)
+#define __NR_perf_event_open 298
+#else
+#define __NR_perf_event_open 0
+#endif
+#endif
+    attr->size = sizeof(*attr);
+    return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+static uint64_t i915_type_id(void)
+{
+	char buf[1024];
+	int fd, n;
+
+	fd = open("/sys/bus/event_source/devices/i915/type", 0);
+	if (fd < 0) {
+		n = -1;
+	} else {
+		n = read(fd, buf, sizeof(buf)-1);
+		close(fd);
+	}
+	if (n < 0)
+		return 0;
+
+	buf[n] = '\0';
+	return strtoull(buf, 0, 0);
+}
+
+////////////////////////////////////////////////////////////////////////
+
+static double elapsed(const struct timespec *start, const struct timespec *end)
+{
+	return ((end->tv_sec - start->tv_sec) +
+		(end->tv_nsec - start->tv_nsec)*1e-9);
+}
+
+static uint64_t elapsed_ns(const struct timespec *start, const struct timespec *end)
+{
+	return ((end->tv_sec - start->tv_sec)*1e9 +
+		(end->tv_nsec - start->tv_nsec));
+}
+
+static void nop_on_ring(int fd, uint32_t handle, unsigned ring_id, int timeout)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj;
+	struct timespec start, now;
+
+	gem_require_ring(fd, ring_id);
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = handle;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags = ring_id;
+	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	if (__gem_execbuf(fd, &execbuf)) {
+		execbuf.flags = ring_id;
+		gem_execbuf(fd, &execbuf);
+	}
+
+	do {
+		for (int loop = 0; loop < 1024; loop++) {
+			gem_execbuf(fd, &execbuf);
+		}
+		clock_gettime(CLOCK_MONOTONIC, &now);
+	} while (elapsed(&start, &now) < timeout);
+	gem_sync(fd, handle);
+}
+
+static int perf_i915_open(int config, int group, int read_format)
+{
+	struct perf_event_attr attr;
+
+	memset(&attr, 0, sizeof (attr));
+
+	attr.type = i915_type_id();
+	if (attr.type == 0)
+		return -ENOENT;
+	attr.config = config;
+
+	attr.read_format = read_format;
+	if (group != -1)
+		attr.read_format &= ~PERF_FORMAT_GROUP;
+
+	return perf_event_open(&attr, -1, 0, group, 0);
+}
+
+struct metric {
+	int config;
+	uint64_t old_value;
+	uint64_t value;
+};
+
+struct pmu_metrics {
+	int fd;
+	int read_format;
+	int num_metrics;
+	struct metric* metrics;
+};
+
+static int perf_init(struct pmu_metrics *pm, int num_configs, int* configs)
+{
+	int i, res;
+
+	memset(pm, 0, sizeof(struct pmu_metrics));
+	pm->fd = -1;
+	pm->read_format =
+		PERF_FORMAT_TOTAL_TIME_ENABLED |
+		PERF_FORMAT_GROUP;
+	pm->metrics = (struct metric*)calloc(num_configs, sizeof(struct metric));
+	if (!pm->metrics)
+		return -1;
+
+	for (i = 0; i < num_configs; ++i) {
+		if (pm->fd < 0)
+			res = pm->fd = perf_i915_open(configs[i], -1, pm->read_format);
+		else
+			res = perf_i915_open(configs[i], pm->fd, pm->read_format);
+		if (res >= 0) {
+			pm->metrics[pm->num_metrics++].config = configs[i];
+		}
+	}
+
+	igt_info("perf_init: enabled %d metrics from %d requested\n",
+		pm->num_metrics, num_configs);
+
+	return 0;
+}
+
+static void perf_close(struct pmu_metrics *pm)
+{
+	if (pm->fd != -1 ) { close(pm->fd); pm->fd = -1; }
+	if (pm->metrics) { free(pm->metrics); pm->metrics= NULL; }
+}
+
+/* see 'man 2 perf_event_open' */
+struct perf_read_format {
+	uint64_t nr_values;     /* The number of events */
+	uint64_t time_enabled;  /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
+	struct {
+		uint64_t value;     /* The value of the event */
+	} values[1024];
+};
+
+static int perf_read(struct pmu_metrics *pm)
+{
+	int read_format =
+		PERF_FORMAT_TOTAL_TIME_ENABLED |
+		PERF_FORMAT_GROUP;
+	struct perf_read_format data;
+	ssize_t len;
+	int i;
+
+	if (pm->fd < 0)
+		return -1;
+
+	if (pm->read_format != read_format)
+		return -1;
+
+	len = read(pm->fd, &data, sizeof(data));
+	if (len < 0) {
+		return -1;
+	}
+
+	if (pm->num_metrics != data.nr_values)
+		return -1;
+
+	for (i = 0; i < data.nr_values; ++i) {
+		pm->metrics[i].old_value = pm->metrics[i].value;
+		pm->metrics[i].value = data.values[i].value;
+	}
+
+	return 0;
+}
+
+static const char* perf_get_metric_name(int config)
+{
+	switch (config) {
+		case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0):
+			return "i915/rcs0-busy/";
+		case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0):
+			return "i915/vcs0-busy/";
+		case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1):
+			return "i915/vcs1-busy/";
+		case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0):
+			return "i915/bcs0-busy/";
+		case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0):
+			return "i915/vecs0-busy/";
+		default:
+			return "i915/unknown/";
+	}
+}
+
+static uint64_t perf_elapsed(struct metric* m)
+{
+	return m->value - m->old_value;
+}
+
+static void test_init(void)
+{
+	struct pmu_metrics pm;
+	unsigned int class[] =
+	{
+		I915_ENGINE_CLASS_RENDER,
+		I915_ENGINE_CLASS_VIDEO,
+		I915_ENGINE_CLASS_VIDEO,
+		I915_ENGINE_CLASS_COPY,
+		I915_ENGINE_CLASS_VIDEO_ENHANCE,
+	};
+	int* configs = malloc(1024 * sizeof(int));
+	int num_configs = 0;
+
+	igt_assert(configs != NULL);
+
+	for (int i=0; i < sizeof(class)/sizeof(class[0]); ++i) {
+		/* TODO Adding metrics for 0-instances only. Would be nice
+		 * to get everything, but for that we either need to add
+		 * check for different platforms here or use upcoming
+		 * engines discover API.
+		 */
+		configs[num_configs++] = I915_PMU_ENGINE_BUSY(class[i], 0);
+		configs[num_configs++] = I915_PMU_ENGINE_QUEUED(class[i], 0);
+		configs[num_configs++] = I915_PMU_ENGINE_WAIT(class[i], 0);
+		configs[num_configs++] = I915_PMU_ENGINE_SEMA(class[i], 0);
+	}
+	configs[num_configs++] = I915_PMU_ACTUAL_FREQUENCY;
+	configs[num_configs++] = I915_PMU_REQUESTED_FREQUENCY;
+	configs[num_configs++] = I915_PMU_ENERGY;
+	configs[num_configs++] = I915_PMU_RC6_RESIDENCY;
+	configs[num_configs++] = I915_PMU_RC6p_RESIDENCY;
+	configs[num_configs++] = I915_PMU_RC6pp_RESIDENCY;
+
+	igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+	igt_assert_eq(perf_read(&pm), 0);
+	igt_assert_eq(pm.num_metrics, num_configs);
+
+	perf_close(&pm);
+}
+
+/* Tests that i915 PMU corectly error out in invalid initialization.
+ * i915 PMU is uncore PMU, thus:
+ *  - sampling period is not supported
+ *  - pid > 0 is not supported since we can't count per-process (we count
+ *    per whole system(
+ *  - cpu != 0 is not supported since i915 PMU exposes cpumask for CPU0
+ */
+static void test_invalid_init(void)
+{
+	struct perf_event_attr attr;
+	int pid, cpu;
+
+#define ATTR_INIT() \
+	do { \
+		memset(&attr, 0, sizeof (attr)); \
+		attr.config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0); \
+		attr.type = i915_type_id(); \
+		igt_assert(attr.type != 0); \
+	} while(0)
+
+	ATTR_INIT();
+	attr.sample_period = 100;
+	pid = -1;
+	cpu = 0;
+	igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+	igt_assert_eq(errno, EINVAL);
+	
+	ATTR_INIT();
+	pid = 0;
+	cpu = 0;
+	igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+	igt_assert_eq(errno, EINVAL);
+
+	ATTR_INIT();
+	pid = -1;
+	cpu = 1;
+	igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+	igt_assert_eq(errno, ENODEV);
+}
+
+static int test_single(int fd, uint32_t handle)
+{
+	struct {
+		const char* engine_name;
+		unsigned int class;
+		unsigned int instance;
+		unsigned int ring_id;
+	} engines[] = {
+		{ "rcs0", I915_ENGINE_CLASS_RENDER, 0, I915_EXEC_RENDER },
+		{ "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD | I915_EXEC_BSD_RING1 },
+		{ "vcs1", I915_ENGINE_CLASS_VIDEO, 1, I915_EXEC_BSD | I915_EXEC_BSD_RING2 },
+		{ "bcs0", I915_ENGINE_CLASS_COPY, 0, I915_EXEC_BLT },
+		{ "vecs0", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, I915_EXEC_VEBOX },
+	};
+	struct pmu_metrics pm;
+	int configs[] = {
+		I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0),
+		I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0),
+		I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1),
+		I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0),
+		I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0),
+	};
+	int num_configs = sizeof(configs)/sizeof(configs[0]);
+	struct timespec start, now;
+
+	igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+	igt_assert_eq(pm.num_metrics, num_configs);
+
+	for (int i = 0; i < sizeof(engines)/sizeof(engines[0]); ++i) {
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		igt_assert_eq(perf_read(&pm), 0);
+
+		/* Create almost 100% load on the examined engine for specified time. */
+		nop_on_ring(fd, handle, engines[i].ring_id, 20);
+
+		igt_assert_eq(perf_read(&pm), 0);
+		clock_gettime(CLOCK_MONOTONIC, &now);
+
+		igt_info("Executed on %s for %ldus\n", engines[i].engine_name, elapsed_ns(&start, &now));
+		for (int j = 0; j < num_configs; ++j) {
+			igt_info("  %s: %ldus\n", perf_get_metric_name(pm.metrics[j].config), perf_elapsed(&pm.metrics[j]));
+
+			igt_assert(perf_elapsed(&pm.metrics[j]) < elapsed_ns(&start, &now));
+
+			if (configs[j] == I915_PMU_ENGINE_BUSY(engines[i].class, engines[i].instance)) {
+				/* Check that the loaded engine had almost 100% load. */
+				igt_assert(perf_elapsed(&pm.metrics[j]) > (1 - USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+			} else if (configs[j] == I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0)) {
+				/* Check that BCS engine had just tiny load.
+				 * NOTE Some load on BCS is non-avoidable if you run under any graphical server,
+				 * so we can't check for zero.
+				 */
+				igt_assert(perf_elapsed(&pm.metrics[j]) < USAGE_TOLERANCE * elapsed_ns(&start, &now));
+			} else {
+				/* Check that other engines did not have any load.
+				 * NOTE This may fail if you have any other workload running in parallel to this test.
+				 */
+				igt_assert_eq(perf_elapsed(&pm.metrics[j]), 0);
+			}
+		}
+	}
+	perf_close(&pm);
+
+	/* Return how many angines we have tried. */
+	return sizeof(engines)/sizeof(engines[0]);
+}
+
+static void test_parallel(int fd, uint32_t handle)
+{
+	struct pmu_metrics pm;
+	int configs[] = {
+		I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0),
+		I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0),
+		I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1),
+		I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0),
+		I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0),
+	};
+	int num_configs = sizeof(configs)/sizeof(configs[0]);
+	int num_engines;
+	struct timespec start, now;
+
+	igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+	igt_assert_eq(pm.num_metrics, num_configs);
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	igt_assert_eq(perf_read(&pm), 0);
+
+	/* Create almost 100% load on the engines one by one, we will get back
+	 * how many engines were tried.
+	 */
+	num_engines = test_single(fd, handle);
+
+	igt_assert_eq(perf_read(&pm), 0);
+	clock_gettime(CLOCK_MONOTONIC, &now);
+
+	igt_info("Executed on %d engines for %ldus\n", num_engines, elapsed_ns(&start, &now));
+	for (int j = 0; j < num_configs; ++j) {
+		igt_info("  %s: %ldus\n", perf_get_metric_name(pm.metrics[j].config), perf_elapsed(&pm.metrics[j]));
+
+		/* Since engines were loaded in turns one by one for the barely the same time,
+		 * they each should have produced barely the same load proportional to the
+		 * number of engines.
+		 */
+		igt_assert(perf_elapsed(&pm.metrics[j]) * num_engines > (1-USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+		igt_assert(perf_elapsed(&pm.metrics[j]) * num_engines < (1+USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+	}
+	perf_close(&pm);
+}
+
+static bool is_hotplug_cpu0(void)
+{
+	int fd = open("/sys/devices/system/cpu/cpu0/online", O_WRONLY);
+	if (fd == -1)
+		return false;
+	close(fd);
+	return true;
+}
+
+static void test_cpu_online(int fd, uint32_t handle)
+{
+	struct pmu_metrics pm;
+	int config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0);
+	struct timespec start, now;
+	int timeout = 32; // seconds
+
+	igt_require(is_hotplug_cpu0());
+
+	igt_assert_eq(perf_init(&pm, 1, &config), 0);
+	igt_assert_eq(pm.num_metrics, 1);
+
+	igt_fork(child, 1) {
+		int cpu_fd;
+		char cpu_name[64];
+		char online[] = "1";
+		char offline[] = "0";
+		useconds_t offline_time= 1000*1000; // 1 second
+
+		igt_info("ettempting to put each CPU offline for 1 second:\n");
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		do {
+			int cpu = 0;
+			do {
+				sprintf(cpu_name, "/sys/devices/system/cpu/cpu%d/online", cpu);
+				cpu_fd = open(cpu_name, O_WRONLY);
+				if (cpu_fd == -1) {
+					igt_info("  no more CPUs, starting over...\n");
+					break;
+				}
+
+				igt_info("  CPU#%d: ", cpu);
+				if (-1 == write(cpu_fd, &offline, sizeof(offline))) {
+					igt_info("failed to put offline: ");
+				}
+				usleep(offline_time);
+				if (-1 == write(cpu_fd, &online, sizeof(online))) {
+					igt_info("failed to put it back online: ");
+				}
+				close(cpu_fd);
+				igt_info("done\n");
+				++cpu;
+			} while(1);
+			clock_gettime(CLOCK_MONOTONIC, &now);
+		} while(elapsed(&start, &now) < timeout);
+	}
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	igt_assert_eq(perf_read(&pm), 0);
+
+	/* Create almost 100% load on the examined engine for specified time. */
+	nop_on_ring(fd, handle, I915_EXEC_RENDER, timeout);
+
+	igt_assert_eq(perf_read(&pm), 0);
+	clock_gettime(CLOCK_MONOTONIC, &now);
+
+	igt_waitchildren();
+
+	igt_info("Executed on rcs0 for %ldus\n",elapsed_ns(&start, &now));
+	igt_info("  %s: %ldus\n", perf_get_metric_name(pm.metrics[0].config), perf_elapsed(&pm.metrics[0]));
+
+	/* Check that the loaded engine had almost 100% load. */
+	igt_assert(perf_elapsed(&pm.metrics[0]) < elapsed_ns(&start, &now));
+	igt_assert(perf_elapsed(&pm.metrics[0]) > (1-USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+
+	perf_close(&pm);
+}
+
+igt_main
+{
+	uint32_t handle = 0;
+	int device = -1;
+
+	igt_fixture {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+
+		device = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(device);
+
+		handle = gem_create(device, 4096);
+		gem_write(device, handle, 0, &bbe, sizeof(bbe));
+
+		igt_fork_hang_detector(device);
+	}
+
+	/* Test that we can intialize all the metrics. */
+	igt_subtest_f("init")
+		test_init();
+
+	/* Test that we can intialize all the metrics. */
+	igt_subtest_f("invalid_init")
+		test_invalid_init();
+
+	/* Test single metrics consumet. */
+	igt_subtest_f("single")
+		test_single(device, handle);
+
+	/* Test parallel metrics consumers. */
+	igt_subtest_f("parallel")
+		test_parallel(device, handle);
+
+	/* Test pmu context migration to another CPU on cpu getting online/offline. */
+	igt_subtest_f("cpu_online")
+		test_cpu_online(device, handle);
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		gem_close(device, handle);
+		close(device);
+	}
+}
-- 
1.8.3.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [RFC i-g-t v2] tests/perf_pmu: test i915 RFC PMU
  2017-09-01 15:57 [RFC i-g-t v1] tests/perf_pmu: test i915 RFC PMU Dmitry Rogozhkin
@ 2017-09-01 15:57 ` Dmitry Rogozhkin
  2017-09-02  0:19 ` ✓ Fi.CI.BAT: success for tests/perf_pmu: test i915 RFC PMU (rev4) Patchwork
  1 sibling, 0 replies; 3+ messages in thread
From: Dmitry Rogozhkin @ 2017-09-01 15:57 UTC (permalink / raw)
  To: intel-gfx

i915 RFC PMU:
* https://patchwork.freedesktop.org/series/27488/
* https://patchwork.freedesktop.org/series/28842/

Tests:
* init: try to initialize all possible metrics exposed in i915 PMU
  (limit to 0-instance of engines)
* invalid_init: verify that i915 PMU correctly error out on invalid
  initialization
* enable: verify that events from parallel consumenrs can be disabled
  without effect on another consumer
* frequency: verify that actual frequency metric works as expected
* busy: verify that BUSY metrics work for each engine
* busy_parallel: verify that parallel requests for BUSY metrics do
  not conflict
* cpu_online: verify PMU context migration on CPUs going online/offline

v1: add cpu_online test

v2: add enable and frequency tests

Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/Makefile.sources |   1 +
 tests/perf_pmu.c       | 824 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 825 insertions(+)
 create mode 100644 tests/perf_pmu.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index bb013c7..51b684b 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -215,6 +215,7 @@ TESTS_progs = \
 	kms_vblank \
 	meta_test \
 	perf \
+	perf_pmu \
 	pm_backlight \
 	pm_lpsp \
 	pm_rc6_residency \
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
new file mode 100644
index 0000000..f7b0904
--- /dev/null
+++ b/tests/perf_pmu.c
@@ -0,0 +1,824 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include "igt_sysfs.h"
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/time.h>
+#include <time.h>
+#include "drm.h"
+
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+#define USAGE_TOLERANCE 0.02
+
+////////////////////////////////////////////////////////////////////////
+// This is a copy of perf.h from intel-gpu-tools/overlay
+// because I am lazy enough to move it to some common library
+////////////////////////////////////////////////////////////////////////
+
+#include <linux/perf_event.h>
+
+enum drm_i915_gem_engine_class {
+	I915_ENGINE_CLASS_OTHER = 0,
+	I915_ENGINE_CLASS_RENDER = 1,
+	I915_ENGINE_CLASS_COPY = 2,
+	I915_ENGINE_CLASS_VIDEO = 3,
+	I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
+	I915_ENGINE_CLASS_MAX /* non-ABI */
+};
+
+enum drm_i915_pmu_engine_sample {
+	I915_SAMPLE_QUEUED = 0,
+	I915_SAMPLE_BUSY = 1,
+	I915_SAMPLE_WAIT = 2,
+	I915_SAMPLE_SEMA = 3
+};
+
+#define I915_PMU_SAMPLE_BITS (4)
+#define I915_PMU_SAMPLE_MASK (0xf)
+#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
+#define I915_PMU_CLASS_SHIFT \
+	(I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
+
+#define __I915_PMU_ENGINE(class, instance, sample) \
+	((class) << I915_PMU_CLASS_SHIFT | \
+	(instance) << I915_PMU_SAMPLE_BITS | \
+	(sample))
+
+#define I915_PMU_ENGINE_QUEUED(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
+
+#define I915_PMU_ENGINE_BUSY(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
+
+#define I915_PMU_ENGINE_WAIT(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
+
+#define I915_PMU_ENGINE_SEMA(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
+
+#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+
+#define I915_PMU_ACTUAL_FREQUENCY 	__I915_PMU_OTHER(0)
+#define I915_PMU_REQUESTED_FREQUENCY	__I915_PMU_OTHER(1)
+#define I915_PMU_ENERGY			__I915_PMU_OTHER(2)
+#define I915_PMU_INTERRUPTS		__I915_PMU_OTHER(3)
+
+#define I915_PMU_RC6_RESIDENCY		__I915_PMU_OTHER(4)
+#define I915_PMU_RC6p_RESIDENCY		__I915_PMU_OTHER(5)
+#define I915_PMU_RC6pp_RESIDENCY	__I915_PMU_OTHER(6)
+
+static inline int
+perf_event_open(struct perf_event_attr *attr,
+		pid_t pid,
+		int cpu,
+		int group_fd,
+		unsigned long flags)
+{
+#ifndef __NR_perf_event_open
+#if defined(__i386__)
+#define __NR_perf_event_open 336
+#elif defined(__x86_64__)
+#define __NR_perf_event_open 298
+#else
+#define __NR_perf_event_open 0
+#endif
+#endif
+    attr->size = sizeof(*attr);
+    return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+static uint64_t i915_type_id(void)
+{
+	char buf[1024];
+	int fd, n;
+
+	fd = open("/sys/bus/event_source/devices/i915/type", 0);
+	if (fd < 0) {
+		n = -1;
+	} else {
+		n = read(fd, buf, sizeof(buf)-1);
+		close(fd);
+	}
+	if (n < 0)
+		return 0;
+
+	buf[n] = '\0';
+	return strtoull(buf, 0, 0);
+}
+
+////////////////////////////////////////////////////////////////////////
+
+static double elapsed(const struct timespec *start, const struct timespec *end)
+{
+	return ((end->tv_sec - start->tv_sec) +
+		(end->tv_nsec - start->tv_nsec)*1e-9);
+}
+
+static uint64_t elapsed_ns(const struct timespec *start, const struct timespec *end)
+{
+	return ((end->tv_sec - start->tv_sec)*1e9 +
+		(end->tv_nsec - start->tv_nsec));
+}
+
+static void nop_on_ring(int fd, uint32_t handle, unsigned ring_id, int timeout)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj;
+	struct timespec start, now;
+
+	gem_require_ring(fd, ring_id);
+
+	memset(&obj, 0, sizeof(obj));
+	obj.handle = handle;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
+	execbuf.flags = ring_id;
+	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	if (__gem_execbuf(fd, &execbuf)) {
+		execbuf.flags = ring_id;
+		gem_execbuf(fd, &execbuf);
+	}
+
+	do {
+		for (int loop = 0; loop < 1024; loop++) {
+			gem_execbuf(fd, &execbuf);
+		}
+		clock_gettime(CLOCK_MONOTONIC, &now);
+	} while (elapsed(&start, &now) < timeout);
+	gem_sync(fd, handle);
+}
+
+static int perf_i915_open(int config, int group, int read_format)
+{
+	struct perf_event_attr attr;
+
+	memset(&attr, 0, sizeof (attr));
+
+	attr.type = i915_type_id();
+	if (attr.type == 0)
+		return -ENOENT;
+	attr.config = config;
+
+	attr.read_format = read_format;
+	if (group != -1)
+		attr.read_format &= ~PERF_FORMAT_GROUP;
+
+	return perf_event_open(&attr, -1, 0, group, 0);
+}
+
+struct metric {
+	int config;
+	struct {
+		uint64_t value;
+		uint64_t time;
+	} start;
+	struct {
+		uint64_t value;
+		uint64_t time;
+	} end;
+};
+
+struct pmu_metrics {
+	int fd;
+	int read_format;
+	int num_metrics;
+	struct metric* metrics;
+};
+
+static int perf_init(struct pmu_metrics *pm, int num_configs, int* configs)
+{
+	int i, res;
+
+	memset(pm, 0, sizeof(struct pmu_metrics));
+	pm->fd = -1;
+	pm->read_format =
+		PERF_FORMAT_TOTAL_TIME_ENABLED |
+		PERF_FORMAT_GROUP;
+	pm->metrics = (struct metric*)calloc(num_configs, sizeof(struct metric));
+	if (!pm->metrics)
+		return -1;
+
+	for (i = 0; i < num_configs; ++i) {
+		if (pm->fd < 0)
+			res = pm->fd = perf_i915_open(configs[i], -1, pm->read_format);
+		else
+			res = perf_i915_open(configs[i], pm->fd, pm->read_format);
+		if (res >= 0) {
+			pm->metrics[pm->num_metrics++].config = configs[i];
+		}
+	}
+
+	igt_info("perf_init: enabled %d metrics from %d requested\n",
+		pm->num_metrics, num_configs);
+
+	return 0;
+}
+
+static void perf_close(struct pmu_metrics *pm)
+{
+	if (pm->fd != -1 ) { close(pm->fd); pm->fd = -1; }
+	if (pm->metrics) { free(pm->metrics); pm->metrics= NULL; }
+}
+
+/* see 'man 2 perf_event_open' */
+struct perf_read_format {
+	uint64_t nr_values;     /* The number of events */
+	uint64_t timestamp;     /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
+	struct {
+		uint64_t value;     /* The value of the event */
+	} values[1024];
+};
+
+static int perf_read(struct pmu_metrics *pm)
+{
+	int read_format =
+		PERF_FORMAT_TOTAL_TIME_ENABLED |
+		PERF_FORMAT_GROUP;
+	struct perf_read_format data;
+	ssize_t len;
+	int i;
+
+	if (pm->fd < 0)
+		return -1;
+
+	if (pm->read_format != read_format)
+		return -1;
+
+	len = read(pm->fd, &data, sizeof(data));
+	if (len < 0) {
+		return -1;
+	}
+
+	if (pm->num_metrics != data.nr_values)
+		return -1;
+
+	for (i = 0; i < data.nr_values; ++i) {
+		pm->metrics[i].start.value = pm->metrics[i].end.value;
+		pm->metrics[i].end.value = data.values[i].value;
+		if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+			pm->metrics[i].start.time = pm->metrics[i].end.time;
+			pm->metrics[i].end.time = data.timestamp;
+		}
+	}
+
+	return 0;
+}
+
+static const char* perf_get_metric_name(int config)
+{
+	switch (config) {
+		case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0):
+			return "i915/rcs0-busy/";
+		case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0):
+			return "i915/vcs0-busy/";
+		case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1):
+			return "i915/vcs1-busy/";
+		case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0):
+			return "i915/bcs0-busy/";
+		case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0):
+			return "i915/vecs0-busy/";
+		case I915_PMU_ACTUAL_FREQUENCY:
+			return "i915/actual-frequency/";
+		default:
+			return "i915/unknown/";
+	}
+}
+
+static uint64_t perf_elapsed(struct metric* m)
+{
+	return m->end.value - m->start.value;
+}
+
+static uint64_t perf_avg_freq(struct metric* m)
+{
+	return (m->end.value - m->start.value)/(m->end.time - m->start.time);
+}
+
+static int sysfs_read_i32(const char *name)
+{
+	char buf[4096];
+	int sysfd;
+	int len;
+
+	sprintf(buf, "/sys/class/drm/card%d/%s",
+		drm_get_card(), name);
+	sysfd = open(buf, O_RDONLY);
+	if (sysfd < 0)
+		return -1;
+
+	len = read(sysfd, buf, sizeof(buf)-1);
+	close(sysfd);
+	if (len < 0)
+		return -1;
+
+	buf[len] = '\0';
+	return atoi(buf);
+}
+
+static uint64_t debugfs_read_u64_16(const char *name)
+{
+	char buf[4096];
+	int sysfd;
+	int len;
+
+	sprintf(buf, "/sys/kernel/debug/dri/%d/%s",
+		drm_get_card(), name);
+	sysfd = open(buf, O_RDONLY);
+	if (sysfd < 0)
+		return 0;
+
+	len = read(sysfd, buf, sizeof(buf)-1);
+	close(sysfd);
+	if (len < 0)
+		return 0;
+
+	buf[len] = '\0';
+
+	return strtoll(buf, NULL, 16);
+}
+
+static bool is_engine_config(uint64_t config)
+{
+	return config < __I915_PMU_OTHER(0);
+}
+
+#define ENGINE_SAMPLE_BITS (16)
+#define BIT_ULL(nr) (1ULL << (nr))
+
+static uint64_t event_enabled_mask(uint64_t config)
+{
+	if (is_engine_config(config))
+		return BIT_ULL(config & I915_PMU_SAMPLE_MASK);
+	else
+		return BIT_ULL(config - __I915_PMU_OTHER(0)) <<
+		       ENGINE_SAMPLE_BITS;
+}
+
+static int configure_all(int** configs)
+{
+	unsigned int class[] =
+	{
+		I915_ENGINE_CLASS_RENDER,
+		I915_ENGINE_CLASS_VIDEO,
+		I915_ENGINE_CLASS_VIDEO,
+		I915_ENGINE_CLASS_COPY,
+		I915_ENGINE_CLASS_VIDEO_ENHANCE,
+	};
+	int* c = malloc(1024 * sizeof(int));
+	int n = 0;
+
+	igt_assert(c != NULL);
+
+	for (int i=0; i < sizeof(class)/sizeof(class[0]); ++i) {
+		/* TODO Adding metrics for 0-instances only. Would be nice
+		 * to get everything, but for that we either need to add
+		 * check for different platforms here or use upcoming
+		 * engines discover API.
+		 */
+		c[n++] = I915_PMU_ENGINE_BUSY(class[i], 0);
+		c[n++] = I915_PMU_ENGINE_QUEUED(class[i], 0);
+		c[n++] = I915_PMU_ENGINE_WAIT(class[i], 0);
+		c[n++] = I915_PMU_ENGINE_SEMA(class[i], 0);
+	}
+	c[n++] = I915_PMU_ACTUAL_FREQUENCY;
+	c[n++] = I915_PMU_REQUESTED_FREQUENCY;
+	c[n++] = I915_PMU_ENERGY;
+	c[n++] = I915_PMU_RC6_RESIDENCY;
+	c[n++] = I915_PMU_RC6p_RESIDENCY;
+	c[n++] = I915_PMU_RC6pp_RESIDENCY;
+
+	*configs = c;
+	return n;
+}
+
+static uint64_t get_enabled_mask(int num_configs, int* configs)
+{
+	uint64_t mask = 0;
+	for (int i = 0; i < num_configs; ++i) {
+		mask |= event_enabled_mask(configs[i]);
+	}
+	return mask;
+}
+
+static void test_init(void)
+{
+	struct pmu_metrics pm;
+	int* configs;
+	int num_configs = configure_all(&configs);
+	uint64_t enabled = get_enabled_mask(num_configs, configs);
+
+	igt_info("expected pmu enable mask: 0x%lx\n", enabled);
+
+	igt_assert_eq(0, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+	igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+	igt_assert_eq(pm.num_metrics, num_configs);
+
+	igt_assert_eq(enabled, debugfs_read_u64_16("i915_pmu_enable_info"));
+	igt_assert_eq(perf_read(&pm), 0);
+
+	ioctl(pm.fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
+	igt_assert_eq(0, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+	perf_close(&pm);
+	free(configs);
+}
+
+/* Tests that i915 PMU corectly error out in invalid initialization.
+ * i915 PMU is uncore PMU, thus:
+ *  - sampling period is not supported
+ *  - pid > 0 is not supported since we can't count per-process (we count
+ *    per whole system(
+ *  - cpu != 0 is not supported since i915 PMU exposes cpumask for CPU0
+ */
+static void test_invalid_init(void)
+{
+	struct perf_event_attr attr;
+	int pid, cpu;
+
+#define ATTR_INIT() \
+	do { \
+		memset(&attr, 0, sizeof (attr)); \
+		attr.config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0); \
+		attr.type = i915_type_id(); \
+		igt_assert(attr.type != 0); \
+	} while(0)
+
+	ATTR_INIT();
+	attr.sample_period = 100;
+	pid = -1;
+	cpu = 0;
+	igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+	igt_assert_eq(errno, EINVAL);
+	
+	ATTR_INIT();
+	pid = 0;
+	cpu = 0;
+	igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+	igt_assert_eq(errno, EINVAL);
+
+	ATTR_INIT();
+	pid = -1;
+	cpu = 1;
+	igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+	igt_assert_eq(errno, ENODEV);
+}
+
+static void test_enable(void)
+{
+	struct pmu_metrics pm1, pm2;
+	int* configs;
+	int num_configs = configure_all(&configs);
+	uint64_t enabled = get_enabled_mask(num_configs, configs);
+
+	igt_info("expected pmu enable mask: 0x%lx\n", enabled);
+
+	igt_assert_eq(0, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+	igt_assert_eq(perf_init(&pm1, num_configs, configs), 0);
+	igt_assert_eq(pm1.num_metrics, num_configs);
+
+	igt_assert_eq(enabled, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+	igt_assert_eq(perf_init(&pm2, num_configs, configs), 0);
+	igt_assert_eq(pm2.num_metrics, num_configs);
+	igt_assert_eq(enabled, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+	ioctl(pm2.fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
+	igt_assert_eq(enabled, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+	ioctl(pm1.fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
+	igt_assert_eq(0, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+	perf_close(&pm2);
+	perf_close(&pm1);
+	free(configs);
+}
+
+static int test_single(int fd, uint32_t handle)
+{
+	struct {
+		const char* engine_name;
+		unsigned int class;
+		unsigned int instance;
+		unsigned int ring_id;
+	} engines[] = {
+		{ "rcs0", I915_ENGINE_CLASS_RENDER, 0, I915_EXEC_RENDER },
+		{ "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD | I915_EXEC_BSD_RING1 },
+		{ "vcs1", I915_ENGINE_CLASS_VIDEO, 1, I915_EXEC_BSD | I915_EXEC_BSD_RING2 },
+		{ "bcs0", I915_ENGINE_CLASS_COPY, 0, I915_EXEC_BLT },
+		{ "vecs0", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, I915_EXEC_VEBOX },
+	};
+	struct pmu_metrics pm;
+	int configs[] = {
+		I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0),
+		I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0),
+		I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1),
+		I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0),
+		I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0),
+	};
+	int num_configs = sizeof(configs)/sizeof(configs[0]);
+	struct timespec start, now;
+
+	igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+	igt_assert_eq(pm.num_metrics, num_configs);
+
+	for (int i = 0; i < sizeof(engines)/sizeof(engines[0]); ++i) {
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		igt_assert_eq(perf_read(&pm), 0);
+
+		/* Create almost 100% load on the examined engine for specified time. */
+		nop_on_ring(fd, handle, engines[i].ring_id, 20);
+
+		igt_assert_eq(perf_read(&pm), 0);
+		clock_gettime(CLOCK_MONOTONIC, &now);
+
+		igt_info("Executed on %s for %ldus\n", engines[i].engine_name, elapsed_ns(&start, &now));
+		for (int j = 0; j < num_configs; ++j) {
+			igt_info("  %s: %ldus\n", perf_get_metric_name(pm.metrics[j].config), perf_elapsed(&pm.metrics[j]));
+
+			igt_assert(perf_elapsed(&pm.metrics[j]) < elapsed_ns(&start, &now));
+
+			if (configs[j] == I915_PMU_ENGINE_BUSY(engines[i].class, engines[i].instance)) {
+				/* Check that the loaded engine had almost 100% load. */
+				igt_assert(perf_elapsed(&pm.metrics[j]) > (1 - USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+			} else if (configs[j] == I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0)) {
+				/* Check that BCS engine had just tiny load.
+				 * NOTE Some load on BCS is non-avoidable if you run under any graphical server,
+				 * so we can't check for zero.
+				 */
+				igt_assert(perf_elapsed(&pm.metrics[j]) < USAGE_TOLERANCE * elapsed_ns(&start, &now));
+			} else {
+				/* Check that other engines did not have any load.
+				 * NOTE This may fail if you have any other workload running in parallel to this test.
+				 */
+				igt_assert_eq(perf_elapsed(&pm.metrics[j]), 0);
+			}
+		}
+	}
+	perf_close(&pm);
+
+	/* Return how many angines we have tried. */
+	return sizeof(engines)/sizeof(engines[0]);
+}
+
+static void test_parallel(int fd, uint32_t handle)
+{
+	struct pmu_metrics pm;
+	int configs[] = {
+		I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0),
+		I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0),
+		I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1),
+		I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0),
+		I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0),
+	};
+	int num_configs = sizeof(configs)/sizeof(configs[0]);
+	int num_engines;
+	struct timespec start, now;
+
+	igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+	igt_assert_eq(pm.num_metrics, num_configs);
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	igt_assert_eq(perf_read(&pm), 0);
+
+	/* Create almost 100% load on the engines one by one, we will get back
+	 * how many engines were tried.
+	 */
+	num_engines = test_single(fd, handle);
+
+	igt_assert_eq(perf_read(&pm), 0);
+	clock_gettime(CLOCK_MONOTONIC, &now);
+
+	igt_info("Executed on %d engines for %ldus\n", num_engines, elapsed_ns(&start, &now));
+	for (int j = 0; j < num_configs; ++j) {
+		igt_info("  %s: %ldus\n", perf_get_metric_name(pm.metrics[j].config), perf_elapsed(&pm.metrics[j]));
+
+		/* Since engines were loaded in turns one by one for the barely the same time,
+		 * they each should have produced barely the same load proportional to the
+		 * number of engines.
+		 */
+		igt_assert(perf_elapsed(&pm.metrics[j]) * num_engines > (1-USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+		igt_assert(perf_elapsed(&pm.metrics[j]) * num_engines < (1+USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+	}
+	perf_close(&pm);
+}
+
+static bool is_hotplug_cpu0(void)
+{
+	int fd = open("/sys/devices/system/cpu/cpu0/online", O_WRONLY);
+	if (fd == -1)
+		return false;
+	close(fd);
+	return true;
+}
+
+static void test_cpu_online(int fd, uint32_t handle)
+{
+	struct pmu_metrics pm;
+	int config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0);
+	struct timespec start, now;
+	int timeout = 32; // seconds
+
+	igt_require(is_hotplug_cpu0());
+
+	igt_assert_eq(perf_init(&pm, 1, &config), 0);
+	igt_assert_eq(pm.num_metrics, 1);
+
+	igt_fork(child, 1) {
+		int cpu_fd;
+		char cpu_name[64];
+		char online[] = "1";
+		char offline[] = "0";
+		useconds_t offline_time= 1000*1000; // 1 second
+
+		igt_info("ettempting to put each CPU offline for 1 second:\n");
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		do {
+			int cpu = 0;
+			do {
+				sprintf(cpu_name, "/sys/devices/system/cpu/cpu%d/online", cpu);
+				cpu_fd = open(cpu_name, O_WRONLY);
+				if (cpu_fd == -1) {
+					igt_info("  no more CPUs, starting over...\n");
+					break;
+				}
+
+				igt_info("  CPU#%d: ", cpu);
+				if (-1 == write(cpu_fd, &offline, sizeof(offline))) {
+					igt_info("failed to put offline: ");
+				}
+				usleep(offline_time);
+				if (-1 == write(cpu_fd, &online, sizeof(online))) {
+					igt_info("failed to put it back online: ");
+				}
+				close(cpu_fd);
+				igt_info("done\n");
+				++cpu;
+			} while(1);
+			clock_gettime(CLOCK_MONOTONIC, &now);
+		} while(elapsed(&start, &now) < timeout);
+	}
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	igt_assert_eq(perf_read(&pm), 0);
+
+	/* Create almost 100% load on the examined engine for specified time. */
+	nop_on_ring(fd, handle, I915_EXEC_RENDER, timeout);
+
+	igt_assert_eq(perf_read(&pm), 0);
+	clock_gettime(CLOCK_MONOTONIC, &now);
+
+	igt_waitchildren();
+
+	igt_info("Executed on rcs0 for %ldus\n",elapsed_ns(&start, &now));
+	igt_info("  %s: %ldus\n", perf_get_metric_name(pm.metrics[0].config), perf_elapsed(&pm.metrics[0]));
+
+	/* Check that the loaded engine had almost 100% load. */
+	igt_assert(perf_elapsed(&pm.metrics[0]) < elapsed_ns(&start, &now));
+	igt_assert(perf_elapsed(&pm.metrics[0]) > (1-USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+
+	perf_close(&pm);
+}
+
+static void test_frequency(int fd, uint32_t handle)
+{
+	struct pmu_metrics pm;
+	int configs[] = {
+		I915_PMU_ACTUAL_FREQUENCY
+	};
+	int num_configs = sizeof(configs)/sizeof(configs[0]);
+	struct timespec start, now;
+
+	igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+	igt_assert_eq(pm.num_metrics, num_configs);
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	igt_assert_eq(perf_read(&pm), 0);
+
+	/* Create almost 100% load on rcs0 for the specified time. */
+	nop_on_ring(fd, handle, I915_EXEC_RENDER, 20);
+
+	igt_assert_eq(perf_read(&pm), 0);
+	clock_gettime(CLOCK_MONOTONIC, &now);
+
+	igt_waitchildren();
+
+	igt_info("Executed on rcs0 for %ldus\n", elapsed_ns(&start, &now));
+	for (int j = 0; j < num_configs; ++j) {
+		if (pm.metrics[j].config == I915_PMU_ACTUAL_FREQUENCY) {
+			int min = sysfs_read_i32("gt_min_freq_mhz");
+			int max = sysfs_read_i32("gt_max_freq_mhz");
+
+			igt_info("  %s: %ld MHz\n", perf_get_metric_name(pm.metrics[j].config), perf_avg_freq(&pm.metrics[j]));
+
+			igt_assert(perf_avg_freq(&pm.metrics[j]) >= min);
+			igt_assert(perf_avg_freq(&pm.metrics[j]) <= max);
+		} else if (pm.metrics[j].config == I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0)) {
+			igt_info("  %s: %ldus\n", perf_get_metric_name(pm.metrics[j].config), perf_elapsed(&pm.metrics[j]));
+
+			/* Since engines were loaded in turns one by one for the barely the same time,
+			* they each should have produced barely the same load proportional to the
+			* number of engines.
+			*/
+			igt_assert(perf_elapsed(&pm.metrics[j]) > (1-USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+			igt_assert(perf_elapsed(&pm.metrics[j]) < (1+USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+		} else {
+			igt_assert(!"buggy test");
+		}
+	}
+	perf_close(&pm);
+}
+
+igt_main
+{
+	uint32_t handle = 0;
+	int device = -1;
+
+	igt_fixture {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+
+		device = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(device);
+
+		handle = gem_create(device, 4096);
+		gem_write(device, handle, 0, &bbe, sizeof(bbe));
+
+		igt_fork_hang_detector(device);
+	}
+
+	/* Test that we can intialize all the metrics. */
+	igt_subtest_f("init")
+		test_init();
+
+	/* Test that we can intialize all the metrics. */
+	igt_subtest_f("invalid_init")
+		test_invalid_init();
+
+	/* Test concurent events enable/disable. */
+	igt_subtest_f("enable")
+		test_enable();
+
+	/* Test frequency metrics consumer. */
+	igt_subtest_f("frequency")
+		test_frequency(device, handle);
+
+	/* Test single engines busy metrics consumer. */
+	igt_subtest_f("busy")
+		test_single(device, handle);
+
+	/* Test parallel engines busy metrics consumers. */
+	igt_subtest_f("busy_parallel")
+		test_parallel(device, handle);
+
+	/* Test pmu context migration to another CPU on cpu getting online/offline. */
+	igt_subtest_f("cpu_online")
+		test_cpu_online(device, handle);
+
+
+	igt_fixture {
+		igt_stop_hang_detector();
+		gem_close(device, handle);
+		close(device);
+	}
+}
-- 
1.8.3.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* ✓ Fi.CI.BAT: success for tests/perf_pmu: test i915 RFC PMU (rev4)
  2017-09-01 15:57 [RFC i-g-t v1] tests/perf_pmu: test i915 RFC PMU Dmitry Rogozhkin
  2017-09-01 15:57 ` [RFC i-g-t v2] " Dmitry Rogozhkin
@ 2017-09-02  0:19 ` Patchwork
  1 sibling, 0 replies; 3+ messages in thread
From: Patchwork @ 2017-09-02  0:19 UTC (permalink / raw)
  To: Dmitry Rogozhkin; +Cc: intel-gfx

== Series Details ==

Series: tests/perf_pmu: test i915 RFC PMU (rev4)
URL   : https://patchwork.freedesktop.org/series/29313/
State : success

== Summary ==

IGT patchset tested on top of latest successful build
5ce65a9a51f17e0183e3e4f8943981ee7b96cadd pm_rps: Changes in waitboost scenario

with latest DRM-Tip kernel build CI_DRM_3028
5f8335a1a74c drm-tip: 2017y-09m-01d-19h-10m-49s UTC integration manifest

Test kms_cursor_legacy:
        Subgroup basic-busy-flip-before-cursor-atomic:
                pass       -> FAIL       (fi-snb-2600) fdo#100215 +1

fdo#100215 https://bugs.freedesktop.org/show_bug.cgi?id=100215

fi-bdw-5557u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:455s
fi-bdw-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:442s
fi-blb-e6850     total:288  pass:224  dwarn:1   dfail:0   fail:0   skip:63  time:366s
fi-bsw-n3050     total:288  pass:243  dwarn:0   dfail:0   fail:0   skip:45  time:574s
fi-bwr-2160      total:288  pass:184  dwarn:0   dfail:0   fail:0   skip:104 time:253s
fi-bxt-j4205     total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:527s
fi-byt-j1900     total:288  pass:254  dwarn:1   dfail:0   fail:0   skip:33  time:535s
fi-byt-n2820     total:288  pass:251  dwarn:0   dfail:0   fail:0   skip:37  time:520s
fi-elk-e7500     total:288  pass:230  dwarn:0   dfail:0   fail:0   skip:58  time:443s
fi-glk-2a        total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:617s
fi-hsw-4770      total:288  pass:263  dwarn:0   dfail:0   fail:0   skip:25  time:451s
fi-hsw-4770r     total:288  pass:263  dwarn:0   dfail:0   fail:0   skip:25  time:435s
fi-ilk-650       total:288  pass:229  dwarn:0   dfail:0   fail:0   skip:59  time:421s
fi-ivb-3520m     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:506s
fi-ivb-3770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:480s
fi-kbl-7500u     total:288  pass:264  dwarn:1   dfail:0   fail:0   skip:23  time:512s
fi-kbl-7560u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:602s
fi-kbl-r         total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:592s
fi-skl-6260u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:470s
fi-skl-6700k     total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:536s
fi-skl-6770hq    total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:491s
fi-skl-gvtdvm    total:288  pass:266  dwarn:0   dfail:0   fail:0   skip:22  time:444s
fi-skl-x1585l    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:496s
fi-snb-2520m     total:288  pass:251  dwarn:0   dfail:0   fail:0   skip:37  time:551s
fi-snb-2600      total:288  pass:249  dwarn:0   dfail:0   fail:1   skip:38  time:404s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_138/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-09-02  0:19 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-01 15:57 [RFC i-g-t v1] tests/perf_pmu: test i915 RFC PMU Dmitry Rogozhkin
2017-09-01 15:57 ` [RFC i-g-t v2] " Dmitry Rogozhkin
2017-09-02  0:19 ` ✓ Fi.CI.BAT: success for tests/perf_pmu: test i915 RFC PMU (rev4) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.