* [RFC i-g-t v1] tests/perf_pmu: test i915 RFC PMU
@ 2017-09-01 15:57 Dmitry Rogozhkin
2017-09-01 15:57 ` [RFC i-g-t v2] " Dmitry Rogozhkin
2017-09-02 0:19 ` ✓ Fi.CI.BAT: success for tests/perf_pmu: test i915 RFC PMU (rev4) Patchwork
0 siblings, 2 replies; 3+ messages in thread
From: Dmitry Rogozhkin @ 2017-09-01 15:57 UTC (permalink / raw)
To: intel-gfx
i915 RFC PMU:
* https://patchwork.freedesktop.org/series/27488/
* https://patchwork.freedesktop.org/series/28842/
Tests:
* init: try to initialize all possible metrics exposed in i915 PMU
(limit to 0-instance of engines)
* invalid_init: verify that i915 PMU correctly error out on invalid
initialization
* single: verify that BUSY metrics work for each engine
* parallel: verify that parallel requests for metrics do not conflict
* cpu_online: verify PMU context migration on CPUs going online/offline
v1: add cpu_online test
Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
tests/Makefile.sources | 1 +
tests/perf_pmu.c | 629 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 630 insertions(+)
create mode 100644 tests/perf_pmu.c
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index bb013c7..51b684b 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -215,6 +215,7 @@ TESTS_progs = \
kms_vblank \
meta_test \
perf \
+ perf_pmu \
pm_backlight \
pm_lpsp \
pm_rc6_residency \
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
new file mode 100644
index 0000000..428b2f9
--- /dev/null
+++ b/tests/perf_pmu.c
@@ -0,0 +1,629 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include "igt_sysfs.h"
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/time.h>
+#include <time.h>
+#include "drm.h"
+
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+#define USAGE_TOLERANCE 0.02
+
+////////////////////////////////////////////////////////////////////////
+// This is a copy of perf.h from intel-gpu-tools/overlay
+// because I am lazy enough to move it to some common library
+////////////////////////////////////////////////////////////////////////
+
+#include <linux/perf_event.h>
+
+enum drm_i915_gem_engine_class {
+ I915_ENGINE_CLASS_OTHER = 0,
+ I915_ENGINE_CLASS_RENDER = 1,
+ I915_ENGINE_CLASS_COPY = 2,
+ I915_ENGINE_CLASS_VIDEO = 3,
+ I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
+ I915_ENGINE_CLASS_MAX /* non-ABI */
+};
+
+enum drm_i915_pmu_engine_sample {
+ I915_SAMPLE_QUEUED = 0,
+ I915_SAMPLE_BUSY = 1,
+ I915_SAMPLE_WAIT = 2,
+ I915_SAMPLE_SEMA = 3
+};
+
+#define I915_PMU_SAMPLE_BITS (4)
+#define I915_PMU_SAMPLE_MASK (0xf)
+#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
+#define I915_PMU_CLASS_SHIFT \
+ (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
+
+#define __I915_PMU_ENGINE(class, instance, sample) \
+ ((class) << I915_PMU_CLASS_SHIFT | \
+ (instance) << I915_PMU_SAMPLE_BITS | \
+ (sample))
+
+#define I915_PMU_ENGINE_QUEUED(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
+
+#define I915_PMU_ENGINE_BUSY(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
+
+#define I915_PMU_ENGINE_WAIT(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
+
+#define I915_PMU_ENGINE_SEMA(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
+
+#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+
+#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
+#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1)
+#define I915_PMU_ENERGY __I915_PMU_OTHER(2)
+#define I915_PMU_INTERRUPTS __I915_PMU_OTHER(3)
+
+#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(4)
+#define I915_PMU_RC6p_RESIDENCY __I915_PMU_OTHER(5)
+#define I915_PMU_RC6pp_RESIDENCY __I915_PMU_OTHER(6)
+
+static inline int
+perf_event_open(struct perf_event_attr *attr,
+ pid_t pid,
+ int cpu,
+ int group_fd,
+ unsigned long flags)
+{
+#ifndef __NR_perf_event_open
+#if defined(__i386__)
+#define __NR_perf_event_open 336
+#elif defined(__x86_64__)
+#define __NR_perf_event_open 298
+#else
+#define __NR_perf_event_open 0
+#endif
+#endif
+ attr->size = sizeof(*attr);
+ return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+static uint64_t i915_type_id(void)
+{
+ char buf[1024];
+ int fd, n;
+
+ fd = open("/sys/bus/event_source/devices/i915/type", 0);
+ if (fd < 0) {
+ n = -1;
+ } else {
+ n = read(fd, buf, sizeof(buf)-1);
+ close(fd);
+ }
+ if (n < 0)
+ return 0;
+
+ buf[n] = '\0';
+ return strtoull(buf, 0, 0);
+}
+
+////////////////////////////////////////////////////////////////////////
+
+static double elapsed(const struct timespec *start, const struct timespec *end)
+{
+ return ((end->tv_sec - start->tv_sec) +
+ (end->tv_nsec - start->tv_nsec)*1e-9);
+}
+
+static uint64_t elapsed_ns(const struct timespec *start, const struct timespec *end)
+{
+ return ((end->tv_sec - start->tv_sec)*1e9 +
+ (end->tv_nsec - start->tv_nsec));
+}
+
+static void nop_on_ring(int fd, uint32_t handle, unsigned ring_id, int timeout)
+{
+ struct drm_i915_gem_execbuffer2 execbuf;
+ struct drm_i915_gem_exec_object2 obj;
+ struct timespec start, now;
+
+ gem_require_ring(fd, ring_id);
+
+ memset(&obj, 0, sizeof(obj));
+ obj.handle = handle;
+
+ memset(&execbuf, 0, sizeof(execbuf));
+ execbuf.buffers_ptr = to_user_pointer(&obj);
+ execbuf.buffer_count = 1;
+ execbuf.flags = ring_id;
+ execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+ execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ if (__gem_execbuf(fd, &execbuf)) {
+ execbuf.flags = ring_id;
+ gem_execbuf(fd, &execbuf);
+ }
+
+ do {
+ for (int loop = 0; loop < 1024; loop++) {
+ gem_execbuf(fd, &execbuf);
+ }
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ } while (elapsed(&start, &now) < timeout);
+ gem_sync(fd, handle);
+}
+
+static int perf_i915_open(int config, int group, int read_format)
+{
+ struct perf_event_attr attr;
+
+ memset(&attr, 0, sizeof (attr));
+
+ attr.type = i915_type_id();
+ if (attr.type == 0)
+ return -ENOENT;
+ attr.config = config;
+
+ attr.read_format = read_format;
+ if (group != -1)
+ attr.read_format &= ~PERF_FORMAT_GROUP;
+
+ return perf_event_open(&attr, -1, 0, group, 0);
+}
+
+struct metric {
+ int config;
+ uint64_t old_value;
+ uint64_t value;
+};
+
+struct pmu_metrics {
+ int fd;
+ int read_format;
+ int num_metrics;
+ struct metric* metrics;
+};
+
+static int perf_init(struct pmu_metrics *pm, int num_configs, int* configs)
+{
+ int i, res;
+
+ memset(pm, 0, sizeof(struct pmu_metrics));
+ pm->fd = -1;
+ pm->read_format =
+ PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_GROUP;
+ pm->metrics = (struct metric*)calloc(num_configs, sizeof(struct metric));
+ if (!pm->metrics)
+ return -1;
+
+ for (i = 0; i < num_configs; ++i) {
+ if (pm->fd < 0)
+ res = pm->fd = perf_i915_open(configs[i], -1, pm->read_format);
+ else
+ res = perf_i915_open(configs[i], pm->fd, pm->read_format);
+ if (res >= 0) {
+ pm->metrics[pm->num_metrics++].config = configs[i];
+ }
+ }
+
+ igt_info("perf_init: enabled %d metrics from %d requested\n",
+ pm->num_metrics, num_configs);
+
+ return 0;
+}
+
+static void perf_close(struct pmu_metrics *pm)
+{
+ if (pm->fd != -1 ) { close(pm->fd); pm->fd = -1; }
+ if (pm->metrics) { free(pm->metrics); pm->metrics= NULL; }
+}
+
+/* see 'man 2 perf_event_open' */
+struct perf_read_format {
+ uint64_t nr_values; /* The number of events */
+ uint64_t time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
+ struct {
+ uint64_t value; /* The value of the event */
+ } values[1024];
+};
+
+static int perf_read(struct pmu_metrics *pm)
+{
+ int read_format =
+ PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_GROUP;
+ struct perf_read_format data;
+ ssize_t len;
+ int i;
+
+ if (pm->fd < 0)
+ return -1;
+
+ if (pm->read_format != read_format)
+ return -1;
+
+ len = read(pm->fd, &data, sizeof(data));
+ if (len < 0) {
+ return -1;
+ }
+
+ if (pm->num_metrics != data.nr_values)
+ return -1;
+
+ for (i = 0; i < data.nr_values; ++i) {
+ pm->metrics[i].old_value = pm->metrics[i].value;
+ pm->metrics[i].value = data.values[i].value;
+ }
+
+ return 0;
+}
+
+static const char* perf_get_metric_name(int config)
+{
+ switch (config) {
+ case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0):
+ return "i915/rcs0-busy/";
+ case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0):
+ return "i915/vcs0-busy/";
+ case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1):
+ return "i915/vcs1-busy/";
+ case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0):
+ return "i915/bcs0-busy/";
+ case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0):
+ return "i915/vecs0-busy/";
+ default:
+ return "i915/unknown/";
+ }
+}
+
+static uint64_t perf_elapsed(struct metric* m)
+{
+ return m->value - m->old_value;
+}
+
+static void test_init(void)
+{
+ struct pmu_metrics pm;
+ unsigned int class[] =
+ {
+ I915_ENGINE_CLASS_RENDER,
+ I915_ENGINE_CLASS_VIDEO,
+ I915_ENGINE_CLASS_VIDEO,
+ I915_ENGINE_CLASS_COPY,
+ I915_ENGINE_CLASS_VIDEO_ENHANCE,
+ };
+ int* configs = malloc(1024 * sizeof(int));
+ int num_configs = 0;
+
+ igt_assert(configs != NULL);
+
+ for (int i=0; i < sizeof(class)/sizeof(class[0]); ++i) {
+ /* TODO Adding metrics for 0-instances only. Would be nice
+ * to get everything, but for that we either need to add
+ * check for different platforms here or use upcoming
+ * engines discover API.
+ */
+ configs[num_configs++] = I915_PMU_ENGINE_BUSY(class[i], 0);
+ configs[num_configs++] = I915_PMU_ENGINE_QUEUED(class[i], 0);
+ configs[num_configs++] = I915_PMU_ENGINE_WAIT(class[i], 0);
+ configs[num_configs++] = I915_PMU_ENGINE_SEMA(class[i], 0);
+ }
+ configs[num_configs++] = I915_PMU_ACTUAL_FREQUENCY;
+ configs[num_configs++] = I915_PMU_REQUESTED_FREQUENCY;
+ configs[num_configs++] = I915_PMU_ENERGY;
+ configs[num_configs++] = I915_PMU_RC6_RESIDENCY;
+ configs[num_configs++] = I915_PMU_RC6p_RESIDENCY;
+ configs[num_configs++] = I915_PMU_RC6pp_RESIDENCY;
+
+ igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+ igt_assert_eq(perf_read(&pm), 0);
+ igt_assert_eq(pm.num_metrics, num_configs);
+
+ perf_close(&pm);
+}
+
+/* Tests that i915 PMU corectly error out in invalid initialization.
+ * i915 PMU is uncore PMU, thus:
+ * - sampling period is not supported
+ * - pid > 0 is not supported since we can't count per-process (we count
+ * per whole system(
+ * - cpu != 0 is not supported since i915 PMU exposes cpumask for CPU0
+ */
+static void test_invalid_init(void)
+{
+ struct perf_event_attr attr;
+ int pid, cpu;
+
+#define ATTR_INIT() \
+ do { \
+ memset(&attr, 0, sizeof (attr)); \
+ attr.config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0); \
+ attr.type = i915_type_id(); \
+ igt_assert(attr.type != 0); \
+ } while(0)
+
+ ATTR_INIT();
+ attr.sample_period = 100;
+ pid = -1;
+ cpu = 0;
+ igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+ igt_assert_eq(errno, EINVAL);
+
+ ATTR_INIT();
+ pid = 0;
+ cpu = 0;
+ igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+ igt_assert_eq(errno, EINVAL);
+
+ ATTR_INIT();
+ pid = -1;
+ cpu = 1;
+ igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+ igt_assert_eq(errno, ENODEV);
+}
+
+static int test_single(int fd, uint32_t handle)
+{
+ struct {
+ const char* engine_name;
+ unsigned int class;
+ unsigned int instance;
+ unsigned int ring_id;
+ } engines[] = {
+ { "rcs0", I915_ENGINE_CLASS_RENDER, 0, I915_EXEC_RENDER },
+ { "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD | I915_EXEC_BSD_RING1 },
+ { "vcs1", I915_ENGINE_CLASS_VIDEO, 1, I915_EXEC_BSD | I915_EXEC_BSD_RING2 },
+ { "bcs0", I915_ENGINE_CLASS_COPY, 0, I915_EXEC_BLT },
+ { "vecs0", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, I915_EXEC_VEBOX },
+ };
+ struct pmu_metrics pm;
+ int configs[] = {
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0),
+ };
+ int num_configs = sizeof(configs)/sizeof(configs[0]);
+ struct timespec start, now;
+
+ igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+ igt_assert_eq(pm.num_metrics, num_configs);
+
+ for (int i = 0; i < sizeof(engines)/sizeof(engines[0]); ++i) {
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ igt_assert_eq(perf_read(&pm), 0);
+
+ /* Create almost 100% load on the examined engine for specified time. */
+ nop_on_ring(fd, handle, engines[i].ring_id, 20);
+
+ igt_assert_eq(perf_read(&pm), 0);
+ clock_gettime(CLOCK_MONOTONIC, &now);
+
+ igt_info("Executed on %s for %ldus\n", engines[i].engine_name, elapsed_ns(&start, &now));
+ for (int j = 0; j < num_configs; ++j) {
+ igt_info(" %s: %ldus\n", perf_get_metric_name(pm.metrics[j].config), perf_elapsed(&pm.metrics[j]));
+
+ igt_assert(perf_elapsed(&pm.metrics[j]) < elapsed_ns(&start, &now));
+
+ if (configs[j] == I915_PMU_ENGINE_BUSY(engines[i].class, engines[i].instance)) {
+ /* Check that the loaded engine had almost 100% load. */
+ igt_assert(perf_elapsed(&pm.metrics[j]) > (1 - USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+ } else if (configs[j] == I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0)) {
+ /* Check that BCS engine had just tiny load.
+ * NOTE Some load on BCS is non-avoidable if you run under any graphical server,
+ * so we can't check for zero.
+ */
+ igt_assert(perf_elapsed(&pm.metrics[j]) < USAGE_TOLERANCE * elapsed_ns(&start, &now));
+ } else {
+ /* Check that other engines did not have any load.
+ * NOTE This may fail if you have any other workload running in parallel to this test.
+ */
+ igt_assert_eq(perf_elapsed(&pm.metrics[j]), 0);
+ }
+ }
+ }
+ perf_close(&pm);
+
+ /* Return how many angines we have tried. */
+ return sizeof(engines)/sizeof(engines[0]);
+}
+
+static void test_parallel(int fd, uint32_t handle)
+{
+ struct pmu_metrics pm;
+ int configs[] = {
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0),
+ };
+ int num_configs = sizeof(configs)/sizeof(configs[0]);
+ int num_engines;
+ struct timespec start, now;
+
+ igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+ igt_assert_eq(pm.num_metrics, num_configs);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ igt_assert_eq(perf_read(&pm), 0);
+
+ /* Create almost 100% load on the engines one by one, we will get back
+ * how many engines were tried.
+ */
+ num_engines = test_single(fd, handle);
+
+ igt_assert_eq(perf_read(&pm), 0);
+ clock_gettime(CLOCK_MONOTONIC, &now);
+
+ igt_info("Executed on %d engines for %ldus\n", num_engines, elapsed_ns(&start, &now));
+ for (int j = 0; j < num_configs; ++j) {
+ igt_info(" %s: %ldus\n", perf_get_metric_name(pm.metrics[j].config), perf_elapsed(&pm.metrics[j]));
+
+ /* Since engines were loaded in turns one by one for the barely the same time,
+ * they each should have produced barely the same load proportional to the
+ * number of engines.
+ */
+ igt_assert(perf_elapsed(&pm.metrics[j]) * num_engines > (1-USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+ igt_assert(perf_elapsed(&pm.metrics[j]) * num_engines < (1+USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+ }
+ perf_close(&pm);
+}
+
+static bool is_hotplug_cpu0(void)
+{
+ int fd = open("/sys/devices/system/cpu/cpu0/online", O_WRONLY);
+ if (fd == -1)
+ return false;
+ close(fd);
+ return true;
+}
+
+static void test_cpu_online(int fd, uint32_t handle)
+{
+ struct pmu_metrics pm;
+ int config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0);
+ struct timespec start, now;
+ int timeout = 32; // seconds
+
+ igt_require(is_hotplug_cpu0());
+
+ igt_assert_eq(perf_init(&pm, 1, &config), 0);
+ igt_assert_eq(pm.num_metrics, 1);
+
+ igt_fork(child, 1) {
+ int cpu_fd;
+ char cpu_name[64];
+ char online[] = "1";
+ char offline[] = "0";
+ useconds_t offline_time= 1000*1000; // 1 second
+
+ igt_info("ettempting to put each CPU offline for 1 second:\n");
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ do {
+ int cpu = 0;
+ do {
+ sprintf(cpu_name, "/sys/devices/system/cpu/cpu%d/online", cpu);
+ cpu_fd = open(cpu_name, O_WRONLY);
+ if (cpu_fd == -1) {
+ igt_info(" no more CPUs, starting over...\n");
+ break;
+ }
+
+ igt_info(" CPU#%d: ", cpu);
+ if (-1 == write(cpu_fd, &offline, sizeof(offline))) {
+ igt_info("failed to put offline: ");
+ }
+ usleep(offline_time);
+ if (-1 == write(cpu_fd, &online, sizeof(online))) {
+ igt_info("failed to put it back online: ");
+ }
+ close(cpu_fd);
+ igt_info("done\n");
+ ++cpu;
+ } while(1);
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ } while(elapsed(&start, &now) < timeout);
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ igt_assert_eq(perf_read(&pm), 0);
+
+ /* Create almost 100% load on the examined engine for specified time. */
+ nop_on_ring(fd, handle, I915_EXEC_RENDER, timeout);
+
+ igt_assert_eq(perf_read(&pm), 0);
+ clock_gettime(CLOCK_MONOTONIC, &now);
+
+ igt_waitchildren();
+
+ igt_info("Executed on rcs0 for %ldus\n",elapsed_ns(&start, &now));
+ igt_info(" %s: %ldus\n", perf_get_metric_name(pm.metrics[0].config), perf_elapsed(&pm.metrics[0]));
+
+ /* Check that the loaded engine had almost 100% load. */
+ igt_assert(perf_elapsed(&pm.metrics[0]) < elapsed_ns(&start, &now));
+ igt_assert(perf_elapsed(&pm.metrics[0]) > (1-USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+
+ perf_close(&pm);
+}
+
+igt_main
+{
+ uint32_t handle = 0;
+ int device = -1;
+
+ igt_fixture {
+ const uint32_t bbe = MI_BATCH_BUFFER_END;
+
+ device = drm_open_driver(DRIVER_INTEL);
+ igt_require_gem(device);
+
+ handle = gem_create(device, 4096);
+ gem_write(device, handle, 0, &bbe, sizeof(bbe));
+
+ igt_fork_hang_detector(device);
+ }
+
+ /* Test that we can intialize all the metrics. */
+ igt_subtest_f("init")
+ test_init();
+
+ /* Test that we can intialize all the metrics. */
+ igt_subtest_f("invalid_init")
+ test_invalid_init();
+
+ /* Test single metrics consumet. */
+ igt_subtest_f("single")
+ test_single(device, handle);
+
+ /* Test parallel metrics consumers. */
+ igt_subtest_f("parallel")
+ test_parallel(device, handle);
+
+ /* Test pmu context migration to another CPU on cpu getting online/offline. */
+ igt_subtest_f("cpu_online")
+ test_cpu_online(device, handle);
+
+ igt_fixture {
+ igt_stop_hang_detector();
+ gem_close(device, handle);
+ close(device);
+ }
+}
--
1.8.3.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [RFC i-g-t v2] tests/perf_pmu: test i915 RFC PMU
2017-09-01 15:57 [RFC i-g-t v1] tests/perf_pmu: test i915 RFC PMU Dmitry Rogozhkin
@ 2017-09-01 15:57 ` Dmitry Rogozhkin
2017-09-02 0:19 ` ✓ Fi.CI.BAT: success for tests/perf_pmu: test i915 RFC PMU (rev4) Patchwork
1 sibling, 0 replies; 3+ messages in thread
From: Dmitry Rogozhkin @ 2017-09-01 15:57 UTC (permalink / raw)
To: intel-gfx
i915 RFC PMU:
* https://patchwork.freedesktop.org/series/27488/
* https://patchwork.freedesktop.org/series/28842/
Tests:
* init: try to initialize all possible metrics exposed in i915 PMU
(limit to 0-instance of engines)
* invalid_init: verify that i915 PMU correctly error out on invalid
initialization
* enable: verify that events from parallel consumenrs can be disabled
without effect on another consumer
* frequency: verify that actual frequency metric works as expected
* busy: verify that BUSY metrics work for each engine
* busy_parallel: verify that parallel requests for BUSY metrics do
not conflict
* cpu_online: verify PMU context migration on CPUs going online/offline
v1: add cpu_online test
v2: add enable and frequency tests
Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
tests/Makefile.sources | 1 +
tests/perf_pmu.c | 824 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 825 insertions(+)
create mode 100644 tests/perf_pmu.c
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index bb013c7..51b684b 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -215,6 +215,7 @@ TESTS_progs = \
kms_vblank \
meta_test \
perf \
+ perf_pmu \
pm_backlight \
pm_lpsp \
pm_rc6_residency \
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
new file mode 100644
index 0000000..f7b0904
--- /dev/null
+++ b/tests/perf_pmu.c
@@ -0,0 +1,824 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include "igt_sysfs.h"
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/time.h>
+#include <time.h>
+#include "drm.h"
+
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+#define USAGE_TOLERANCE 0.02
+
+////////////////////////////////////////////////////////////////////////
+// This is a copy of perf.h from intel-gpu-tools/overlay
+// because I am lazy enough to move it to some common library
+////////////////////////////////////////////////////////////////////////
+
+#include <linux/perf_event.h>
+
+enum drm_i915_gem_engine_class {
+ I915_ENGINE_CLASS_OTHER = 0,
+ I915_ENGINE_CLASS_RENDER = 1,
+ I915_ENGINE_CLASS_COPY = 2,
+ I915_ENGINE_CLASS_VIDEO = 3,
+ I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
+ I915_ENGINE_CLASS_MAX /* non-ABI */
+};
+
+enum drm_i915_pmu_engine_sample {
+ I915_SAMPLE_QUEUED = 0,
+ I915_SAMPLE_BUSY = 1,
+ I915_SAMPLE_WAIT = 2,
+ I915_SAMPLE_SEMA = 3
+};
+
+#define I915_PMU_SAMPLE_BITS (4)
+#define I915_PMU_SAMPLE_MASK (0xf)
+#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
+#define I915_PMU_CLASS_SHIFT \
+ (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
+
+#define __I915_PMU_ENGINE(class, instance, sample) \
+ ((class) << I915_PMU_CLASS_SHIFT | \
+ (instance) << I915_PMU_SAMPLE_BITS | \
+ (sample))
+
+#define I915_PMU_ENGINE_QUEUED(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
+
+#define I915_PMU_ENGINE_BUSY(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
+
+#define I915_PMU_ENGINE_WAIT(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
+
+#define I915_PMU_ENGINE_SEMA(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
+
+#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+
+#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
+#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1)
+#define I915_PMU_ENERGY __I915_PMU_OTHER(2)
+#define I915_PMU_INTERRUPTS __I915_PMU_OTHER(3)
+
+#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(4)
+#define I915_PMU_RC6p_RESIDENCY __I915_PMU_OTHER(5)
+#define I915_PMU_RC6pp_RESIDENCY __I915_PMU_OTHER(6)
+
+static inline int
+perf_event_open(struct perf_event_attr *attr,
+ pid_t pid,
+ int cpu,
+ int group_fd,
+ unsigned long flags)
+{
+#ifndef __NR_perf_event_open
+#if defined(__i386__)
+#define __NR_perf_event_open 336
+#elif defined(__x86_64__)
+#define __NR_perf_event_open 298
+#else
+#define __NR_perf_event_open 0
+#endif
+#endif
+ attr->size = sizeof(*attr);
+ return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+static uint64_t i915_type_id(void)
+{
+ char buf[1024];
+ int fd, n;
+
+ fd = open("/sys/bus/event_source/devices/i915/type", 0);
+ if (fd < 0) {
+ n = -1;
+ } else {
+ n = read(fd, buf, sizeof(buf)-1);
+ close(fd);
+ }
+ if (n < 0)
+ return 0;
+
+ buf[n] = '\0';
+ return strtoull(buf, 0, 0);
+}
+
+////////////////////////////////////////////////////////////////////////
+
+static double elapsed(const struct timespec *start, const struct timespec *end)
+{
+ return ((end->tv_sec - start->tv_sec) +
+ (end->tv_nsec - start->tv_nsec)*1e-9);
+}
+
+static uint64_t elapsed_ns(const struct timespec *start, const struct timespec *end)
+{
+ return ((end->tv_sec - start->tv_sec)*1e9 +
+ (end->tv_nsec - start->tv_nsec));
+}
+
+static void nop_on_ring(int fd, uint32_t handle, unsigned ring_id, int timeout)
+{
+ struct drm_i915_gem_execbuffer2 execbuf;
+ struct drm_i915_gem_exec_object2 obj;
+ struct timespec start, now;
+
+ gem_require_ring(fd, ring_id);
+
+ memset(&obj, 0, sizeof(obj));
+ obj.handle = handle;
+
+ memset(&execbuf, 0, sizeof(execbuf));
+ execbuf.buffers_ptr = to_user_pointer(&obj);
+ execbuf.buffer_count = 1;
+ execbuf.flags = ring_id;
+ execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+ execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ if (__gem_execbuf(fd, &execbuf)) {
+ execbuf.flags = ring_id;
+ gem_execbuf(fd, &execbuf);
+ }
+
+ do {
+ for (int loop = 0; loop < 1024; loop++) {
+ gem_execbuf(fd, &execbuf);
+ }
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ } while (elapsed(&start, &now) < timeout);
+ gem_sync(fd, handle);
+}
+
+static int perf_i915_open(int config, int group, int read_format)
+{
+ struct perf_event_attr attr;
+
+ memset(&attr, 0, sizeof (attr));
+
+ attr.type = i915_type_id();
+ if (attr.type == 0)
+ return -ENOENT;
+ attr.config = config;
+
+ attr.read_format = read_format;
+ if (group != -1)
+ attr.read_format &= ~PERF_FORMAT_GROUP;
+
+ return perf_event_open(&attr, -1, 0, group, 0);
+}
+
+struct metric {
+ int config;
+ struct {
+ uint64_t value;
+ uint64_t time;
+ } start;
+ struct {
+ uint64_t value;
+ uint64_t time;
+ } end;
+};
+
+struct pmu_metrics {
+ int fd;
+ int read_format;
+ int num_metrics;
+ struct metric* metrics;
+};
+
+static int perf_init(struct pmu_metrics *pm, int num_configs, int* configs)
+{
+ int i, res;
+
+ memset(pm, 0, sizeof(struct pmu_metrics));
+ pm->fd = -1;
+ pm->read_format =
+ PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_GROUP;
+ pm->metrics = (struct metric*)calloc(num_configs, sizeof(struct metric));
+ if (!pm->metrics)
+ return -1;
+
+ for (i = 0; i < num_configs; ++i) {
+ if (pm->fd < 0)
+ res = pm->fd = perf_i915_open(configs[i], -1, pm->read_format);
+ else
+ res = perf_i915_open(configs[i], pm->fd, pm->read_format);
+ if (res >= 0) {
+ pm->metrics[pm->num_metrics++].config = configs[i];
+ }
+ }
+
+ igt_info("perf_init: enabled %d metrics from %d requested\n",
+ pm->num_metrics, num_configs);
+
+ return 0;
+}
+
+static void perf_close(struct pmu_metrics *pm)
+{
+ if (pm->fd != -1 ) { close(pm->fd); pm->fd = -1; }
+ if (pm->metrics) { free(pm->metrics); pm->metrics= NULL; }
+}
+
+/* see 'man 2 perf_event_open' */
+struct perf_read_format {
+ uint64_t nr_values; /* The number of events */
+ uint64_t timestamp; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
+ struct {
+ uint64_t value; /* The value of the event */
+ } values[1024];
+};
+
+static int perf_read(struct pmu_metrics *pm)
+{
+ int read_format =
+ PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_GROUP;
+ struct perf_read_format data;
+ ssize_t len;
+ int i;
+
+ if (pm->fd < 0)
+ return -1;
+
+ if (pm->read_format != read_format)
+ return -1;
+
+ len = read(pm->fd, &data, sizeof(data));
+ if (len < 0) {
+ return -1;
+ }
+
+ if (pm->num_metrics != data.nr_values)
+ return -1;
+
+ for (i = 0; i < data.nr_values; ++i) {
+ pm->metrics[i].start.value = pm->metrics[i].end.value;
+ pm->metrics[i].end.value = data.values[i].value;
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ pm->metrics[i].start.time = pm->metrics[i].end.time;
+ pm->metrics[i].end.time = data.timestamp;
+ }
+ }
+
+ return 0;
+}
+
+static const char* perf_get_metric_name(int config)
+{
+ switch (config) {
+ case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0):
+ return "i915/rcs0-busy/";
+ case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0):
+ return "i915/vcs0-busy/";
+ case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1):
+ return "i915/vcs1-busy/";
+ case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0):
+ return "i915/bcs0-busy/";
+ case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0):
+ return "i915/vecs0-busy/";
+ case I915_PMU_ACTUAL_FREQUENCY:
+ return "i915/actual-frequency/";
+ default:
+ return "i915/unknown/";
+ }
+}
+
+static uint64_t perf_elapsed(struct metric* m)
+{
+ return m->end.value - m->start.value;
+}
+
+static uint64_t perf_avg_freq(struct metric* m)
+{
+ return (m->end.value - m->start.value)/(m->end.time - m->start.time);
+}
+
+static int sysfs_read_i32(const char *name)
+{
+ char buf[4096];
+ int sysfd;
+ int len;
+
+ sprintf(buf, "/sys/class/drm/card%d/%s",
+ drm_get_card(), name);
+ sysfd = open(buf, O_RDONLY);
+ if (sysfd < 0)
+ return -1;
+
+ len = read(sysfd, buf, sizeof(buf)-1);
+ close(sysfd);
+ if (len < 0)
+ return -1;
+
+ buf[len] = '\0';
+ return atoi(buf);
+}
+
+static uint64_t debugfs_read_u64_16(const char *name)
+{
+ char buf[4096];
+ int sysfd;
+ int len;
+
+ sprintf(buf, "/sys/kernel/debug/dri/%d/%s",
+ drm_get_card(), name);
+ sysfd = open(buf, O_RDONLY);
+ if (sysfd < 0)
+ return 0;
+
+ len = read(sysfd, buf, sizeof(buf)-1);
+ close(sysfd);
+ if (len < 0)
+ return 0;
+
+ buf[len] = '\0';
+
+ return strtoll(buf, NULL, 16);
+}
+
+static bool is_engine_config(uint64_t config)
+{
+ return config < __I915_PMU_OTHER(0);
+}
+
+#define ENGINE_SAMPLE_BITS (16)
+#define BIT_ULL(nr) (1ULL << (nr))
+
+static uint64_t event_enabled_mask(uint64_t config)
+{
+ if (is_engine_config(config))
+ return BIT_ULL(config & I915_PMU_SAMPLE_MASK);
+ else
+ return BIT_ULL(config - __I915_PMU_OTHER(0)) <<
+ ENGINE_SAMPLE_BITS;
+}
+
+static int configure_all(int** configs)
+{
+ unsigned int class[] =
+ {
+ I915_ENGINE_CLASS_RENDER,
+ I915_ENGINE_CLASS_VIDEO,
+ I915_ENGINE_CLASS_VIDEO,
+ I915_ENGINE_CLASS_COPY,
+ I915_ENGINE_CLASS_VIDEO_ENHANCE,
+ };
+ int* c = malloc(1024 * sizeof(int));
+ int n = 0;
+
+ igt_assert(c != NULL);
+
+ for (int i=0; i < sizeof(class)/sizeof(class[0]); ++i) {
+ /* TODO Adding metrics for 0-instances only. Would be nice
+ * to get everything, but for that we either need to add
+ * check for different platforms here or use upcoming
+ * engines discover API.
+ */
+ c[n++] = I915_PMU_ENGINE_BUSY(class[i], 0);
+ c[n++] = I915_PMU_ENGINE_QUEUED(class[i], 0);
+ c[n++] = I915_PMU_ENGINE_WAIT(class[i], 0);
+ c[n++] = I915_PMU_ENGINE_SEMA(class[i], 0);
+ }
+ c[n++] = I915_PMU_ACTUAL_FREQUENCY;
+ c[n++] = I915_PMU_REQUESTED_FREQUENCY;
+ c[n++] = I915_PMU_ENERGY;
+ c[n++] = I915_PMU_RC6_RESIDENCY;
+ c[n++] = I915_PMU_RC6p_RESIDENCY;
+ c[n++] = I915_PMU_RC6pp_RESIDENCY;
+
+ *configs = c;
+ return n;
+}
+
+static uint64_t get_enabled_mask(int num_configs, int* configs)
+{
+ uint64_t mask = 0;
+ for (int i = 0; i < num_configs; ++i) {
+ mask |= event_enabled_mask(configs[i]);
+ }
+ return mask;
+}
+
+static void test_init(void)
+{
+ struct pmu_metrics pm;
+ int* configs;
+ int num_configs = configure_all(&configs);
+ uint64_t enabled = get_enabled_mask(num_configs, configs);
+
+ igt_info("expected pmu enable mask: 0x%lx\n", enabled);
+
+ igt_assert_eq(0, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+ igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+ igt_assert_eq(pm.num_metrics, num_configs);
+
+ igt_assert_eq(enabled, debugfs_read_u64_16("i915_pmu_enable_info"));
+ igt_assert_eq(perf_read(&pm), 0);
+
+ ioctl(pm.fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
+ igt_assert_eq(0, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+ perf_close(&pm);
+ free(configs);
+}
+
+/* Tests that i915 PMU corectly error out in invalid initialization.
+ * i915 PMU is uncore PMU, thus:
+ * - sampling period is not supported
+ * - pid > 0 is not supported since we can't count per-process (we count
+ * per whole system(
+ * - cpu != 0 is not supported since i915 PMU exposes cpumask for CPU0
+ */
+static void test_invalid_init(void)
+{
+ struct perf_event_attr attr;
+ int pid, cpu;
+
+#define ATTR_INIT() \
+ do { \
+ memset(&attr, 0, sizeof (attr)); \
+ attr.config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0); \
+ attr.type = i915_type_id(); \
+ igt_assert(attr.type != 0); \
+ } while(0)
+
+ ATTR_INIT();
+ attr.sample_period = 100;
+ pid = -1;
+ cpu = 0;
+ igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+ igt_assert_eq(errno, EINVAL);
+
+ ATTR_INIT();
+ pid = 0;
+ cpu = 0;
+ igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+ igt_assert_eq(errno, EINVAL);
+
+ ATTR_INIT();
+ pid = -1;
+ cpu = 1;
+ igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+ igt_assert_eq(errno, ENODEV);
+}
+
+static void test_enable(void)
+{
+ struct pmu_metrics pm1, pm2;
+ int* configs;
+ int num_configs = configure_all(&configs);
+ uint64_t enabled = get_enabled_mask(num_configs, configs);
+
+ igt_info("expected pmu enable mask: 0x%lx\n", enabled);
+
+ igt_assert_eq(0, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+ igt_assert_eq(perf_init(&pm1, num_configs, configs), 0);
+ igt_assert_eq(pm1.num_metrics, num_configs);
+
+ igt_assert_eq(enabled, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+ igt_assert_eq(perf_init(&pm2, num_configs, configs), 0);
+ igt_assert_eq(pm2.num_metrics, num_configs);
+ igt_assert_eq(enabled, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+ ioctl(pm2.fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
+ igt_assert_eq(enabled, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+ ioctl(pm1.fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
+ igt_assert_eq(0, debugfs_read_u64_16("i915_pmu_enable_info"));
+
+ perf_close(&pm2);
+ perf_close(&pm1);
+ free(configs);
+}
+
+static int test_single(int fd, uint32_t handle)
+{
+ struct {
+ const char* engine_name;
+ unsigned int class;
+ unsigned int instance;
+ unsigned int ring_id;
+ } engines[] = {
+ { "rcs0", I915_ENGINE_CLASS_RENDER, 0, I915_EXEC_RENDER },
+ { "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD | I915_EXEC_BSD_RING1 },
+ { "vcs1", I915_ENGINE_CLASS_VIDEO, 1, I915_EXEC_BSD | I915_EXEC_BSD_RING2 },
+ { "bcs0", I915_ENGINE_CLASS_COPY, 0, I915_EXEC_BLT },
+ { "vecs0", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, I915_EXEC_VEBOX },
+ };
+ struct pmu_metrics pm;
+ int configs[] = {
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0),
+ };
+ int num_configs = sizeof(configs)/sizeof(configs[0]);
+ struct timespec start, now;
+
+ igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+ igt_assert_eq(pm.num_metrics, num_configs);
+
+ for (int i = 0; i < sizeof(engines)/sizeof(engines[0]); ++i) {
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ igt_assert_eq(perf_read(&pm), 0);
+
+ /* Create almost 100% load on the examined engine for specified time. */
+ nop_on_ring(fd, handle, engines[i].ring_id, 20);
+
+ igt_assert_eq(perf_read(&pm), 0);
+ clock_gettime(CLOCK_MONOTONIC, &now);
+
+ igt_info("Executed on %s for %ldus\n", engines[i].engine_name, elapsed_ns(&start, &now));
+ for (int j = 0; j < num_configs; ++j) {
+ igt_info(" %s: %ldus\n", perf_get_metric_name(pm.metrics[j].config), perf_elapsed(&pm.metrics[j]));
+
+ igt_assert(perf_elapsed(&pm.metrics[j]) < elapsed_ns(&start, &now));
+
+ if (configs[j] == I915_PMU_ENGINE_BUSY(engines[i].class, engines[i].instance)) {
+ /* Check that the loaded engine had almost 100% load. */
+ igt_assert(perf_elapsed(&pm.metrics[j]) > (1 - USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+ } else if (configs[j] == I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0)) {
+ /* Check that BCS engine had just tiny load.
+ * NOTE Some load on BCS is non-avoidable if you run under any graphical server,
+ * so we can't check for zero.
+ */
+ igt_assert(perf_elapsed(&pm.metrics[j]) < USAGE_TOLERANCE * elapsed_ns(&start, &now));
+ } else {
+ /* Check that other engines did not have any load.
+ * NOTE This may fail if you have any other workload running in parallel to this test.
+ */
+ igt_assert_eq(perf_elapsed(&pm.metrics[j]), 0);
+ }
+ }
+ }
+ perf_close(&pm);
+
+ /* Return how many angines we have tried. */
+ return sizeof(engines)/sizeof(engines[0]);
+}
+
+static void test_parallel(int fd, uint32_t handle)
+{
+ struct pmu_metrics pm;
+ int configs[] = {
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0),
+ I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0),
+ };
+ int num_configs = sizeof(configs)/sizeof(configs[0]);
+ int num_engines;
+ struct timespec start, now;
+
+ igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+ igt_assert_eq(pm.num_metrics, num_configs);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ igt_assert_eq(perf_read(&pm), 0);
+
+ /* Create almost 100% load on the engines one by one, we will get back
+ * how many engines were tried.
+ */
+ num_engines = test_single(fd, handle);
+
+ igt_assert_eq(perf_read(&pm), 0);
+ clock_gettime(CLOCK_MONOTONIC, &now);
+
+ igt_info("Executed on %d engines for %ldus\n", num_engines, elapsed_ns(&start, &now));
+ for (int j = 0; j < num_configs; ++j) {
+ igt_info(" %s: %ldus\n", perf_get_metric_name(pm.metrics[j].config), perf_elapsed(&pm.metrics[j]));
+
+ /* Since engines were loaded in turns one by one for the barely the same time,
+ * they each should have produced barely the same load proportional to the
+ * number of engines.
+ */
+ igt_assert(perf_elapsed(&pm.metrics[j]) * num_engines > (1-USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+ igt_assert(perf_elapsed(&pm.metrics[j]) * num_engines < (1+USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+ }
+ perf_close(&pm);
+}
+
+static bool is_hotplug_cpu0(void)
+{
+ int fd = open("/sys/devices/system/cpu/cpu0/online", O_WRONLY);
+ if (fd == -1)
+ return false;
+ close(fd);
+ return true;
+}
+
+static void test_cpu_online(int fd, uint32_t handle)
+{
+ struct pmu_metrics pm;
+ int config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0);
+ struct timespec start, now;
+ int timeout = 32; // seconds
+
+ igt_require(is_hotplug_cpu0());
+
+ igt_assert_eq(perf_init(&pm, 1, &config), 0);
+ igt_assert_eq(pm.num_metrics, 1);
+
+ igt_fork(child, 1) {
+ int cpu_fd;
+ char cpu_name[64];
+ char online[] = "1";
+ char offline[] = "0";
+ useconds_t offline_time= 1000*1000; // 1 second
+
+ igt_info("ettempting to put each CPU offline for 1 second:\n");
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ do {
+ int cpu = 0;
+ do {
+ sprintf(cpu_name, "/sys/devices/system/cpu/cpu%d/online", cpu);
+ cpu_fd = open(cpu_name, O_WRONLY);
+ if (cpu_fd == -1) {
+ igt_info(" no more CPUs, starting over...\n");
+ break;
+ }
+
+ igt_info(" CPU#%d: ", cpu);
+ if (-1 == write(cpu_fd, &offline, sizeof(offline))) {
+ igt_info("failed to put offline: ");
+ }
+ usleep(offline_time);
+ if (-1 == write(cpu_fd, &online, sizeof(online))) {
+ igt_info("failed to put it back online: ");
+ }
+ close(cpu_fd);
+ igt_info("done\n");
+ ++cpu;
+ } while(1);
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ } while(elapsed(&start, &now) < timeout);
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ igt_assert_eq(perf_read(&pm), 0);
+
+ /* Create almost 100% load on the examined engine for specified time. */
+ nop_on_ring(fd, handle, I915_EXEC_RENDER, timeout);
+
+ igt_assert_eq(perf_read(&pm), 0);
+ clock_gettime(CLOCK_MONOTONIC, &now);
+
+ igt_waitchildren();
+
+ igt_info("Executed on rcs0 for %ldus\n",elapsed_ns(&start, &now));
+ igt_info(" %s: %ldus\n", perf_get_metric_name(pm.metrics[0].config), perf_elapsed(&pm.metrics[0]));
+
+ /* Check that the loaded engine had almost 100% load. */
+ igt_assert(perf_elapsed(&pm.metrics[0]) < elapsed_ns(&start, &now));
+ igt_assert(perf_elapsed(&pm.metrics[0]) > (1-USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+
+ perf_close(&pm);
+}
+
+static void test_frequency(int fd, uint32_t handle)
+{
+ struct pmu_metrics pm;
+ int configs[] = {
+ I915_PMU_ACTUAL_FREQUENCY
+ };
+ int num_configs = sizeof(configs)/sizeof(configs[0]);
+ struct timespec start, now;
+
+ igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+ igt_assert_eq(pm.num_metrics, num_configs);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ igt_assert_eq(perf_read(&pm), 0);
+
+ /* Create almost 100% load on rcs0 for the specified time. */
+ nop_on_ring(fd, handle, I915_EXEC_RENDER, 20);
+
+ igt_assert_eq(perf_read(&pm), 0);
+ clock_gettime(CLOCK_MONOTONIC, &now);
+
+ igt_waitchildren();
+
+ igt_info("Executed on rcs0 for %ldus\n", elapsed_ns(&start, &now));
+ for (int j = 0; j < num_configs; ++j) {
+ if (pm.metrics[j].config == I915_PMU_ACTUAL_FREQUENCY) {
+ int min = sysfs_read_i32("gt_min_freq_mhz");
+ int max = sysfs_read_i32("gt_max_freq_mhz");
+
+ igt_info(" %s: %ld MHz\n", perf_get_metric_name(pm.metrics[j].config), perf_avg_freq(&pm.metrics[j]));
+
+ igt_assert(perf_avg_freq(&pm.metrics[j]) >= min);
+ igt_assert(perf_avg_freq(&pm.metrics[j]) <= max);
+ } else if (pm.metrics[j].config == I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0)) {
+ igt_info(" %s: %ldus\n", perf_get_metric_name(pm.metrics[j].config), perf_elapsed(&pm.metrics[j]));
+
+ /* Since engines were loaded in turns one by one for the barely the same time,
+ * they each should have produced barely the same load proportional to the
+ * number of engines.
+ */
+ igt_assert(perf_elapsed(&pm.metrics[j]) > (1-USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+ igt_assert(perf_elapsed(&pm.metrics[j]) < (1+USAGE_TOLERANCE) * elapsed_ns(&start, &now));
+ } else {
+ igt_assert(!"buggy test");
+ }
+ }
+ perf_close(&pm);
+}
+
+igt_main
+{
+ uint32_t handle = 0;
+ int device = -1;
+
+ igt_fixture {
+ const uint32_t bbe = MI_BATCH_BUFFER_END;
+
+ device = drm_open_driver(DRIVER_INTEL);
+ igt_require_gem(device);
+
+ handle = gem_create(device, 4096);
+ gem_write(device, handle, 0, &bbe, sizeof(bbe));
+
+ igt_fork_hang_detector(device);
+ }
+
+ /* Test that we can intialize all the metrics. */
+ igt_subtest_f("init")
+ test_init();
+
+ /* Test that we can intialize all the metrics. */
+ igt_subtest_f("invalid_init")
+ test_invalid_init();
+
+ /* Test concurent events enable/disable. */
+ igt_subtest_f("enable")
+ test_enable();
+
+ /* Test frequency metrics consumer. */
+ igt_subtest_f("frequency")
+ test_frequency(device, handle);
+
+ /* Test single engines busy metrics consumer. */
+ igt_subtest_f("busy")
+ test_single(device, handle);
+
+ /* Test parallel engines busy metrics consumers. */
+ igt_subtest_f("busy_parallel")
+ test_parallel(device, handle);
+
+ /* Test pmu context migration to another CPU on cpu getting online/offline. */
+ igt_subtest_f("cpu_online")
+ test_cpu_online(device, handle);
+
+
+ igt_fixture {
+ igt_stop_hang_detector();
+ gem_close(device, handle);
+ close(device);
+ }
+}
--
1.8.3.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 3+ messages in thread
* ✓ Fi.CI.BAT: success for tests/perf_pmu: test i915 RFC PMU (rev4)
2017-09-01 15:57 [RFC i-g-t v1] tests/perf_pmu: test i915 RFC PMU Dmitry Rogozhkin
2017-09-01 15:57 ` [RFC i-g-t v2] " Dmitry Rogozhkin
@ 2017-09-02 0:19 ` Patchwork
1 sibling, 0 replies; 3+ messages in thread
From: Patchwork @ 2017-09-02 0:19 UTC (permalink / raw)
To: Dmitry Rogozhkin; +Cc: intel-gfx
== Series Details ==
Series: tests/perf_pmu: test i915 RFC PMU (rev4)
URL : https://patchwork.freedesktop.org/series/29313/
State : success
== Summary ==
IGT patchset tested on top of latest successful build
5ce65a9a51f17e0183e3e4f8943981ee7b96cadd pm_rps: Changes in waitboost scenario
with latest DRM-Tip kernel build CI_DRM_3028
5f8335a1a74c drm-tip: 2017y-09m-01d-19h-10m-49s UTC integration manifest
Test kms_cursor_legacy:
Subgroup basic-busy-flip-before-cursor-atomic:
pass -> FAIL (fi-snb-2600) fdo#100215 +1
fdo#100215 https://bugs.freedesktop.org/show_bug.cgi?id=100215
fi-bdw-5557u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:455s
fi-bdw-gvtdvm total:288 pass:265 dwarn:0 dfail:0 fail:0 skip:23 time:442s
fi-blb-e6850 total:288 pass:224 dwarn:1 dfail:0 fail:0 skip:63 time:366s
fi-bsw-n3050 total:288 pass:243 dwarn:0 dfail:0 fail:0 skip:45 time:574s
fi-bwr-2160 total:288 pass:184 dwarn:0 dfail:0 fail:0 skip:104 time:253s
fi-bxt-j4205 total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:527s
fi-byt-j1900 total:288 pass:254 dwarn:1 dfail:0 fail:0 skip:33 time:535s
fi-byt-n2820 total:288 pass:251 dwarn:0 dfail:0 fail:0 skip:37 time:520s
fi-elk-e7500 total:288 pass:230 dwarn:0 dfail:0 fail:0 skip:58 time:443s
fi-glk-2a total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:617s
fi-hsw-4770 total:288 pass:263 dwarn:0 dfail:0 fail:0 skip:25 time:451s
fi-hsw-4770r total:288 pass:263 dwarn:0 dfail:0 fail:0 skip:25 time:435s
fi-ilk-650 total:288 pass:229 dwarn:0 dfail:0 fail:0 skip:59 time:421s
fi-ivb-3520m total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:506s
fi-ivb-3770 total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:480s
fi-kbl-7500u total:288 pass:264 dwarn:1 dfail:0 fail:0 skip:23 time:512s
fi-kbl-7560u total:288 pass:269 dwarn:0 dfail:0 fail:0 skip:19 time:602s
fi-kbl-r total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:592s
fi-skl-6260u total:288 pass:269 dwarn:0 dfail:0 fail:0 skip:19 time:470s
fi-skl-6700k total:288 pass:265 dwarn:0 dfail:0 fail:0 skip:23 time:536s
fi-skl-6770hq total:288 pass:269 dwarn:0 dfail:0 fail:0 skip:19 time:491s
fi-skl-gvtdvm total:288 pass:266 dwarn:0 dfail:0 fail:0 skip:22 time:444s
fi-skl-x1585l total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:496s
fi-snb-2520m total:288 pass:251 dwarn:0 dfail:0 fail:0 skip:37 time:551s
fi-snb-2600 total:288 pass:249 dwarn:0 dfail:0 fail:1 skip:38 time:404s
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_138/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2017-09-02 0:19 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-01 15:57 [RFC i-g-t v1] tests/perf_pmu: test i915 RFC PMU Dmitry Rogozhkin
2017-09-01 15:57 ` [RFC i-g-t v2] " Dmitry Rogozhkin
2017-09-02 0:19 ` ✓ Fi.CI.BAT: success for tests/perf_pmu: test i915 RFC PMU (rev4) Patchwork
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.