All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH i-g-t 0/5] IGT PMU support
@ 2017-09-18 11:38 Tvrtko Ursulin
  2017-09-18 11:38 ` [PATCH i-g-t 1/5] intel-gpu-overlay: Move local perf implementation to a library Tvrtko Ursulin
                   ` (7 more replies)
  0 siblings, 8 replies; 15+ messages in thread
From: Tvrtko Ursulin @ 2017-09-18 11:38 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

1.
Fixes for intel-gpu-overlay to work on top of the proposed i915 PMU perf API.

2.
New test to exercise the same API.

Tvrtko Ursulin (5):
  intel-gpu-overlay: Move local perf implementation to a library
  intel-gpu-overlay: Consolidate perf PMU access to library
  intel-gpu-overlay: Fix interrupts PMU readout
  intel-gpu-overlay: Catch-up to new i915 PMU
  tests/perf_pmu: Tests for i915 PMU API

 lib/Makefile.sources     |   2 +
 lib/igt_gt.c             |  23 +-
 lib/igt_gt.h             |   8 +
 lib/igt_perf.c           |  59 ++++
 lib/igt_perf.h           | 107 +++++++
 overlay/Makefile.am      |   6 +-
 overlay/gem-interrupts.c |  25 +-
 overlay/gpu-freq.c       |  25 +-
 overlay/gpu-perf.c       |   3 +-
 overlay/gpu-top.c        |  87 +++---
 overlay/perf.c           |  26 --
 overlay/perf.h           |  64 -----
 overlay/power.c          |  22 +-
 overlay/rc6.c            |  27 +-
 tests/Makefile.sources   |   1 +
 tests/perf_pmu.c         | 713 +++++++++++++++++++++++++++++++++++++++++++++++
 16 files changed, 970 insertions(+), 228 deletions(-)
 create mode 100644 lib/igt_perf.c
 create mode 100644 lib/igt_perf.h
 delete mode 100644 overlay/perf.c
 delete mode 100644 overlay/perf.h
 create mode 100644 tests/perf_pmu.c

-- 
2.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH i-g-t 1/5] intel-gpu-overlay: Move local perf implementation to a library
  2017-09-18 11:38 [PATCH i-g-t 0/5] IGT PMU support Tvrtko Ursulin
@ 2017-09-18 11:38 ` Tvrtko Ursulin
  2017-09-18 11:38 ` [PATCH i-g-t 2/5] intel-gpu-overlay: Consolidate perf PMU access to library Tvrtko Ursulin
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 15+ messages in thread
From: Tvrtko Ursulin @ 2017-09-18 11:38 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 lib/Makefile.sources             | 2 ++
 overlay/perf.c => lib/igt_perf.c | 2 +-
 overlay/perf.h => lib/igt_perf.h | 2 ++
 overlay/Makefile.am              | 6 ++----
 overlay/gem-interrupts.c         | 3 ++-
 overlay/gpu-freq.c               | 3 ++-
 overlay/gpu-perf.c               | 3 ++-
 overlay/gpu-top.c                | 3 ++-
 overlay/power.c                  | 3 ++-
 overlay/rc6.c                    | 3 ++-
 10 files changed, 19 insertions(+), 11 deletions(-)
 rename overlay/perf.c => lib/igt_perf.c (94%)
 rename overlay/perf.h => lib/igt_perf.h (99%)

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 53fdb54cbfa5..c031cb502469 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -16,6 +16,8 @@ lib_source_list =	 	\
 	igt_gt.h		\
 	igt_gvt.c		\
 	igt_gvt.h		\
+	igt_perf.c		\
+	igt_perf.h		\
 	igt_primes.c		\
 	igt_primes.h		\
 	igt_rand.c		\
diff --git a/overlay/perf.c b/lib/igt_perf.c
similarity index 94%
rename from overlay/perf.c
rename to lib/igt_perf.c
index b8fdc675c587..45cccff0ae53 100644
--- a/overlay/perf.c
+++ b/lib/igt_perf.c
@@ -3,7 +3,7 @@
 #include <unistd.h>
 #include <stdlib.h>
 
-#include "perf.h"
+#include "igt_perf.h"
 
 uint64_t i915_type_id(void)
 {
diff --git a/overlay/perf.h b/lib/igt_perf.h
similarity index 99%
rename from overlay/perf.h
rename to lib/igt_perf.h
index c44e65f9734c..a80b311cd1d1 100644
--- a/overlay/perf.h
+++ b/lib/igt_perf.h
@@ -1,6 +1,8 @@
 #ifndef I915_PERF_H
 #define I915_PERF_H
 
+#include <stdint.h>
+
 #include <linux/perf_event.h>
 
 #define I915_SAMPLE_BUSY	0
diff --git a/overlay/Makefile.am b/overlay/Makefile.am
index 5472514efc16..c66a80f4e571 100644
--- a/overlay/Makefile.am
+++ b/overlay/Makefile.am
@@ -4,8 +4,8 @@ endif
 
 AM_CPPFLAGS = -I.
 AM_CFLAGS = $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(CWARNFLAGS) \
-	$(CAIRO_CFLAGS) $(OVERLAY_CFLAGS) $(WERROR_CFLAGS)
-LDADD = $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(OVERLAY_LIBS)
+	$(CAIRO_CFLAGS) $(OVERLAY_CFLAGS) $(WERROR_CFLAGS) -I$(srcdir)/../lib
+LDADD = $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(OVERLAY_LIBS) $(top_builddir)/lib/libintel_tools.la
 
 intel_gpu_overlay_SOURCES = \
 	chart.h \
@@ -29,8 +29,6 @@ intel_gpu_overlay_SOURCES = \
 	igfx.c \
 	overlay.h \
 	overlay.c \
-	perf.h \
-	perf.c \
 	power.h \
 	power.c \
 	rc6.h \
diff --git a/overlay/gem-interrupts.c b/overlay/gem-interrupts.c
index 0150a1d03825..7ba54fcd487d 100644
--- a/overlay/gem-interrupts.c
+++ b/overlay/gem-interrupts.c
@@ -31,9 +31,10 @@
 #include <string.h>
 #include <ctype.h>
 
+#include "igt_perf.h"
+
 #include "gem-interrupts.h"
 #include "debugfs.h"
-#include "perf.h"
 
 static int perf_open(void)
 {
diff --git a/overlay/gpu-freq.c b/overlay/gpu-freq.c
index 321c93882238..7f29b1aa986e 100644
--- a/overlay/gpu-freq.c
+++ b/overlay/gpu-freq.c
@@ -28,9 +28,10 @@
 #include <string.h>
 #include <stdio.h>
 
+#include "igt_perf.h"
+
 #include "gpu-freq.h"
 #include "debugfs.h"
-#include "perf.h"
 
 static int perf_i915_open(int config, int group)
 {
diff --git a/overlay/gpu-perf.c b/overlay/gpu-perf.c
index f557b9f06a17..3d4a9be91a94 100644
--- a/overlay/gpu-perf.c
+++ b/overlay/gpu-perf.c
@@ -34,7 +34,8 @@
 #include <fcntl.h>
 #include <errno.h>
 
-#include "perf.h"
+#include "igt_perf.h"
+
 #include "gpu-perf.h"
 #include "debugfs.h"
 
diff --git a/overlay/gpu-top.c b/overlay/gpu-top.c
index 891a7ea7c0b1..06f489dfdc83 100644
--- a/overlay/gpu-top.c
+++ b/overlay/gpu-top.c
@@ -31,7 +31,8 @@
 #include <errno.h>
 #include <assert.h>
 
-#include "perf.h"
+#include "igt_perf.h"
+
 #include "igfx.h"
 #include "gpu-top.h"
 
diff --git a/overlay/power.c b/overlay/power.c
index 2f1521b82cd6..84d860cae40c 100644
--- a/overlay/power.c
+++ b/overlay/power.c
@@ -31,7 +31,8 @@
 #include <time.h>
 #include <errno.h>
 
-#include "perf.h"
+#include "igt_perf.h"
+
 #include "power.h"
 #include "debugfs.h"
 
diff --git a/overlay/rc6.c b/overlay/rc6.c
index d7047c2f4880..3175bb22308f 100644
--- a/overlay/rc6.c
+++ b/overlay/rc6.c
@@ -31,8 +31,9 @@
 #include <time.h>
 #include <errno.h>
 
+#include "igt_perf.h"
+
 #include "rc6.h"
-#include "perf.h"
 
 static int perf_i915_open(int config, int group)
 {
-- 
2.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH i-g-t 2/5] intel-gpu-overlay: Consolidate perf PMU access to library
  2017-09-18 11:38 [PATCH i-g-t 0/5] IGT PMU support Tvrtko Ursulin
  2017-09-18 11:38 ` [PATCH i-g-t 1/5] intel-gpu-overlay: Move local perf implementation to a library Tvrtko Ursulin
@ 2017-09-18 11:38 ` Tvrtko Ursulin
  2017-09-18 11:38 ` [PATCH i-g-t 3/5] intel-gpu-overlay: Fix interrupts PMU readout Tvrtko Ursulin
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 15+ messages in thread
From: Tvrtko Ursulin @ 2017-09-18 11:38 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 lib/igt_perf.c           | 33 +++++++++++++++++++++++++++++++++
 lib/igt_perf.h           |  2 ++
 overlay/gem-interrupts.c | 16 +---------------
 overlay/gpu-freq.c       | 22 ++--------------------
 overlay/gpu-top.c        | 32 ++++++++------------------------
 overlay/power.c          | 17 +----------------
 overlay/rc6.c            | 24 +++---------------------
 7 files changed, 50 insertions(+), 96 deletions(-)

diff --git a/lib/igt_perf.c b/lib/igt_perf.c
index 45cccff0ae53..0fa5ae3acb66 100644
--- a/lib/igt_perf.c
+++ b/lib/igt_perf.c
@@ -2,6 +2,8 @@
 #include <fcntl.h>
 #include <unistd.h>
 #include <stdlib.h>
+#include <string.h>
+#include <errno.h>
 
 #include "igt_perf.h"
 
@@ -24,3 +26,34 @@ uint64_t i915_type_id(void)
 	return strtoull(buf, 0, 0);
 }
 
+static int _perf_open(int config, int group, int format)
+{
+	struct perf_event_attr attr;
+
+	memset(&attr, 0, sizeof (attr));
+
+	attr.type = i915_type_id();
+	if (attr.type == 0)
+		return -ENOENT;
+
+	attr.config = config;
+
+	if (group >= 0)
+		format &= ~PERF_FORMAT_GROUP;
+
+	attr.read_format = format;
+
+	return perf_event_open(&attr, -1, 0, group, 0);
+
+}
+
+int perf_i915_open(int config)
+{
+	return _perf_open(config, -1, PERF_FORMAT_TOTAL_TIME_ENABLED);
+}
+
+int perf_i915_open_group(int config, int group)
+{
+	return _perf_open(config, group,
+			  PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_GROUP);
+}
diff --git a/lib/igt_perf.h b/lib/igt_perf.h
index a80b311cd1d1..8e674c3a3755 100644
--- a/lib/igt_perf.h
+++ b/lib/igt_perf.h
@@ -62,5 +62,7 @@ perf_event_open(struct perf_event_attr *attr,
 }
 
 uint64_t i915_type_id(void);
+int perf_i915_open(int config);
+int perf_i915_open_group(int config, int group);
 
 #endif /* I915_PERF_H */
diff --git a/overlay/gem-interrupts.c b/overlay/gem-interrupts.c
index 7ba54fcd487d..a84aef0398a7 100644
--- a/overlay/gem-interrupts.c
+++ b/overlay/gem-interrupts.c
@@ -36,20 +36,6 @@
 #include "gem-interrupts.h"
 #include "debugfs.h"
 
-static int perf_open(void)
-{
-	struct perf_event_attr attr;
-
-	memset(&attr, 0, sizeof (attr));
-
-	attr.type = i915_type_id();
-	if (attr.type == 0)
-		return -ENOENT;
-	attr.config = I915_PERF_INTERRUPTS;
-
-	return perf_event_open(&attr, -1, 0, -1, 0);
-}
-
 static long long debugfs_read(void)
 {
 	char buf[8192], *b;
@@ -127,7 +113,7 @@ int gem_interrupts_init(struct gem_interrupts *irqs)
 {
 	memset(irqs, 0, sizeof(*irqs));
 
-	irqs->fd = perf_open();
+	irqs->fd = perf_i915_open(I915_PERF_INTERRUPTS);
 	if (irqs->fd < 0 && interrupts_read() < 0)
 		irqs->error = ENODEV;
 
diff --git a/overlay/gpu-freq.c b/overlay/gpu-freq.c
index 7f29b1aa986e..76c5ed9acfd1 100644
--- a/overlay/gpu-freq.c
+++ b/overlay/gpu-freq.c
@@ -33,30 +33,12 @@
 #include "gpu-freq.h"
 #include "debugfs.h"
 
-static int perf_i915_open(int config, int group)
-{
-	struct perf_event_attr attr;
-
-	memset(&attr, 0, sizeof (attr));
-
-	attr.type = i915_type_id();
-	if (attr.type == 0)
-		return -ENOENT;
-	attr.config = config;
-
-	attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED;
-	if (group == -1)
-		attr.read_format |= PERF_FORMAT_GROUP;
-
-	return perf_event_open(&attr, -1, 0, group, 0);
-}
-
 static int perf_open(void)
 {
 	int fd;
 
-	fd = perf_i915_open(I915_PERF_ACTUAL_FREQUENCY, -1);
-	if (perf_i915_open(I915_PERF_REQUESTED_FREQUENCY, fd) < 0) {
+	fd = perf_i915_open_group(I915_PERF_ACTUAL_FREQUENCY, -1);
+	if (perf_i915_open_group(I915_PERF_REQUESTED_FREQUENCY, fd) < 0) {
 		close(fd);
 		fd = -1;
 	}
diff --git a/overlay/gpu-top.c b/overlay/gpu-top.c
index 06f489dfdc83..812f47d5aced 100644
--- a/overlay/gpu-top.c
+++ b/overlay/gpu-top.c
@@ -48,24 +48,6 @@
 #define I915_PERF_RING_WAIT(n) (__I915_PERF_RING(n) + 1)
 #define I915_PERF_RING_SEMA(n) (__I915_PERF_RING(n) + 2)
 
-static int perf_i915_open(int config, int group)
-{
-	struct perf_event_attr attr;
-
-	memset(&attr, 0, sizeof (attr));
-
-	attr.type = i915_type_id();
-	if (attr.type == 0)
-		return -ENOENT;
-	attr.config = config;
-
-	attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED;
-	if (group == -1)
-		attr.read_format |= PERF_FORMAT_GROUP;
-
-	return perf_event_open(&attr, -1, 0, group, 0);
-}
-
 static int perf_init(struct gpu_top *gt)
 {
 	const char *names[] = {
@@ -77,27 +59,29 @@ static int perf_init(struct gpu_top *gt)
 	};
 	int n;
 
-	gt->fd = perf_i915_open(I915_PERF_RING_BUSY(0), -1);
+	gt->fd = perf_i915_open_group(I915_PERF_RING_BUSY(0), -1);
 	if (gt->fd < 0)
 		return -1;
 
-	if (perf_i915_open(I915_PERF_RING_WAIT(0), gt->fd) >= 0)
+	if (perf_i915_open_group(I915_PERF_RING_WAIT(0), gt->fd) >= 0)
 		gt->have_wait = 1;
 
-	if (perf_i915_open(I915_PERF_RING_SEMA(0), gt->fd) >= 0)
+	if (perf_i915_open_group(I915_PERF_RING_SEMA(0), gt->fd) >= 0)
 		gt->have_sema = 1;
 
 	gt->ring[0].name = names[0];
 	gt->num_rings = 1;
 
 	for (n = 1; names[n]; n++) {
-		if (perf_i915_open(I915_PERF_RING_BUSY(n), gt->fd) >= 0) {
+		if (perf_i915_open_group(I915_PERF_RING_BUSY(n), gt->fd) >= 0) {
 			if (gt->have_wait &&
-			    perf_i915_open(I915_PERF_RING_WAIT(n), gt->fd) < 0)
+			    perf_i915_open_group(I915_PERF_RING_WAIT(n),
+						 gt->fd) < 0)
 				return -1;
 
 			if (gt->have_sema &&
-			    perf_i915_open(I915_PERF_RING_SEMA(n), gt->fd) < 0)
+			    perf_i915_open_group(I915_PERF_RING_SEMA(n),
+						 gt->fd) < 0)
 				return -1;
 
 			gt->ring[gt->num_rings++].name = names[n];
diff --git a/overlay/power.c b/overlay/power.c
index 84d860cae40c..dd4aec6bffd9 100644
--- a/overlay/power.c
+++ b/overlay/power.c
@@ -38,21 +38,6 @@
 
 /* XXX Is this exposed through RAPL? */
 
-static int perf_open(void)
-{
-	struct perf_event_attr attr;
-
-	memset(&attr, 0, sizeof (attr));
-
-	attr.type = i915_type_id();
-	if (attr.type == 0)
-		return -1;
-	attr.config = I915_PERF_ENERGY;
-
-	attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED;
-	return perf_event_open(&attr, -1, 0, -1, 0);
-}
-
 int power_init(struct power *power)
 {
 	char buf[4096];
@@ -60,7 +45,7 @@ int power_init(struct power *power)
 
 	memset(power, 0, sizeof(*power));
 
-	power->fd = perf_open();
+	power->fd = perf_i915_open(I915_PERF_ENERGY);
 	if (power->fd != -1)
 		return 0;
 
diff --git a/overlay/rc6.c b/overlay/rc6.c
index 3175bb22308f..46c975a557ff 100644
--- a/overlay/rc6.c
+++ b/overlay/rc6.c
@@ -35,24 +35,6 @@
 
 #include "rc6.h"
 
-static int perf_i915_open(int config, int group)
-{
-	struct perf_event_attr attr;
-
-	memset(&attr, 0, sizeof (attr));
-
-	attr.type = i915_type_id();
-	if (attr.type == 0)
-		return -ENOENT;
-	attr.config = config;
-
-	attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED;
-	if (group == -1)
-		attr.read_format |= PERF_FORMAT_GROUP;
-
-	return perf_event_open(&attr, -1, 0, group, 0);
-}
-
 #define RC6	(1<<0)
 #define RC6p	(1<<1)
 #define RC6pp	(1<<2)
@@ -61,15 +43,15 @@ static int perf_open(unsigned *flags)
 {
 	int fd;
 
-	fd = perf_i915_open(I915_PERF_RC6_RESIDENCY, -1);
+	fd = perf_i915_open_group(I915_PERF_RC6_RESIDENCY, -1);
 	if (fd < 0)
 		return -1;
 
 	*flags |= RC6;
-	if (perf_i915_open(I915_PERF_RC6p_RESIDENCY, fd) >= 0)
+	if (perf_i915_open_group(I915_PERF_RC6p_RESIDENCY, fd) >= 0)
 		*flags |= RC6p;
 
-	if (perf_i915_open(I915_PERF_RC6pp_RESIDENCY, fd) >= 0)
+	if (perf_i915_open_group(I915_PERF_RC6pp_RESIDENCY, fd) >= 0)
 		*flags |= RC6pp;
 
 	return fd;
-- 
2.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH i-g-t 3/5] intel-gpu-overlay: Fix interrupts PMU readout
  2017-09-18 11:38 [PATCH i-g-t 0/5] IGT PMU support Tvrtko Ursulin
  2017-09-18 11:38 ` [PATCH i-g-t 1/5] intel-gpu-overlay: Move local perf implementation to a library Tvrtko Ursulin
  2017-09-18 11:38 ` [PATCH i-g-t 2/5] intel-gpu-overlay: Consolidate perf PMU access to library Tvrtko Ursulin
@ 2017-09-18 11:38 ` Tvrtko Ursulin
  2017-09-18 11:38 ` [PATCH i-g-t 4/5] intel-gpu-overlay: Catch-up to new i915 PMU Tvrtko Ursulin
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 15+ messages in thread
From: Tvrtko Ursulin @ 2017-09-18 11:38 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 overlay/gem-interrupts.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/overlay/gem-interrupts.c b/overlay/gem-interrupts.c
index a84aef0398a7..3eda24f4d7eb 100644
--- a/overlay/gem-interrupts.c
+++ b/overlay/gem-interrupts.c
@@ -136,8 +136,12 @@ int gem_interrupts_update(struct gem_interrupts *irqs)
 		else
 			val = ret;
 	} else {
-		if (read(irqs->fd, &val, sizeof(val)) < 0)
+		uint64_t data[2];
+
+		if (read(irqs->fd, &data, sizeof(data)) < 0)
 			return irqs->error = errno;
+
+		val = data[0];
 	}
 
 	update = irqs->last_count == 0;
-- 
2.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH i-g-t 4/5] intel-gpu-overlay: Catch-up to new i915 PMU
  2017-09-18 11:38 [PATCH i-g-t 0/5] IGT PMU support Tvrtko Ursulin
                   ` (2 preceding siblings ...)
  2017-09-18 11:38 ` [PATCH i-g-t 3/5] intel-gpu-overlay: Fix interrupts PMU readout Tvrtko Ursulin
@ 2017-09-18 11:38 ` Tvrtko Ursulin
  2017-09-18 11:38 ` [PATCH i-g-t 5/5] tests/perf_pmu: Tests for i915 PMU API Tvrtko Ursulin
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 15+ messages in thread
From: Tvrtko Ursulin @ 2017-09-18 11:38 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 lib/igt_perf.h           | 93 ++++++++++++++++++++++++++++++++++--------------
 overlay/gem-interrupts.c |  2 +-
 overlay/gpu-freq.c       |  4 +--
 overlay/gpu-top.c        | 68 +++++++++++++++++++----------------
 overlay/power.c          |  4 +--
 overlay/rc6.c            |  6 ++--
 6 files changed, 111 insertions(+), 66 deletions(-)

diff --git a/lib/igt_perf.h b/lib/igt_perf.h
index 8e674c3a3755..e29216f0500a 100644
--- a/lib/igt_perf.h
+++ b/lib/igt_perf.h
@@ -1,3 +1,27 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
 #ifndef I915_PERF_H
 #define I915_PERF_H
 
@@ -5,41 +29,56 @@
 
 #include <linux/perf_event.h>
 
-#define I915_SAMPLE_BUSY	0
-#define I915_SAMPLE_WAIT	1
-#define I915_SAMPLE_SEMA	2
+enum drm_i915_gem_engine_class {
+	I915_ENGINE_CLASS_OTHER = 0,
+	I915_ENGINE_CLASS_RENDER = 1,
+	I915_ENGINE_CLASS_COPY = 2,
+	I915_ENGINE_CLASS_VIDEO = 3,
+	I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
+	I915_ENGINE_CLASS_MAX /* non-ABI */
+};
+
+enum drm_i915_pmu_engine_sample {
+	I915_SAMPLE_QUEUED = 0,
+	I915_SAMPLE_BUSY = 1,
+	I915_SAMPLE_WAIT = 2,
+	I915_SAMPLE_SEMA = 3,
+	I915_ENGINE_SAMPLE_MAX /* non-ABI */
+};
+
+#define I915_PMU_SAMPLE_BITS (4)
+#define I915_PMU_SAMPLE_MASK (0xf)
+#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
+#define I915_PMU_CLASS_SHIFT \
+	(I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
 
-#define I915_SAMPLE_RCS		0
-#define I915_SAMPLE_VCS		1
-#define I915_SAMPLE_BCS		2
-#define I915_SAMPLE_VECS	3
+#define __I915_PMU_ENGINE(class, instance, sample) \
+	((class) << I915_PMU_CLASS_SHIFT | \
+	(instance) << I915_PMU_SAMPLE_BITS | \
+	(sample))
 
-#define __I915_PERF_COUNT(ring, id) ((ring) << 4 | (id))
+#define I915_PMU_ENGINE_QUEUED(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
 
-#define I915_PERF_COUNT_RCS_BUSY __I915_PERF_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_BUSY)
-#define I915_PERF_COUNT_RCS_WAIT __I915_PERF_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_WAIT)
-#define I915_PERF_COUNT_RCS_SEMA __I915_PERF_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_SEMA)
+#define I915_PMU_ENGINE_BUSY(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
 
-#define I915_PERF_COUNT_VCS_BUSY __I915_PERF_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_BUSY)
-#define I915_PERF_COUNT_VCS_WAIT __I915_PERF_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_WAIT)
-#define I915_PERF_COUNT_VCS_SEMA __I915_PERF_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_SEMA)
+#define I915_PMU_ENGINE_WAIT(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
 
-#define I915_PERF_COUNT_BCS_BUSY __I915_PERF_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_BUSY)
-#define I915_PERF_COUNT_BCS_WAIT __I915_PERF_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_WAIT)
-#define I915_PERF_COUNT_BCS_SEMA __I915_PERF_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_SEMA)
+#define I915_PMU_ENGINE_SEMA(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
 
-#define I915_PERF_COUNT_VECS_BUSY __I915_PERF_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_BUSY)
-#define I915_PERF_COUNT_VECS_WAIT __I915_PERF_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_WAIT)
-#define I915_PERF_COUNT_VECS_SEMA __I915_PERF_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_SEMA)
+#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
 
-#define I915_PERF_ACTUAL_FREQUENCY 32
-#define I915_PERF_REQUESTED_FREQUENCY 33
-#define I915_PERF_ENERGY 34
-#define I915_PERF_INTERRUPTS 35
+#define I915_PMU_ACTUAL_FREQUENCY	__I915_PMU_OTHER(0)
+#define I915_PMU_REQUESTED_FREQUENCY	__I915_PMU_OTHER(1)
+#define I915_PMU_INTERRUPTS		__I915_PMU_OTHER(2)
+#define I915_PMU_RC6_RESIDENCY		__I915_PMU_OTHER(3)
+#define I915_PMU_RC6p_RESIDENCY		__I915_PMU_OTHER(4)
+#define I915_PMU_RC6pp_RESIDENCY	__I915_PMU_OTHER(5)
 
-#define I915_PERF_RC6_RESIDENCY		40
-#define I915_PERF_RC6p_RESIDENCY	41
-#define I915_PERF_RC6pp_RESIDENCY	42
+#define I915_PMU_LAST I915_PMU_RC6pp_RESIDENCY
 
 static inline int
 perf_event_open(struct perf_event_attr *attr,
diff --git a/overlay/gem-interrupts.c b/overlay/gem-interrupts.c
index 3eda24f4d7eb..add4a9dfd725 100644
--- a/overlay/gem-interrupts.c
+++ b/overlay/gem-interrupts.c
@@ -113,7 +113,7 @@ int gem_interrupts_init(struct gem_interrupts *irqs)
 {
 	memset(irqs, 0, sizeof(*irqs));
 
-	irqs->fd = perf_i915_open(I915_PERF_INTERRUPTS);
+	irqs->fd = perf_i915_open(I915_PMU_INTERRUPTS);
 	if (irqs->fd < 0 && interrupts_read() < 0)
 		irqs->error = ENODEV;
 
diff --git a/overlay/gpu-freq.c b/overlay/gpu-freq.c
index 76c5ed9acfd1..c4619b87242a 100644
--- a/overlay/gpu-freq.c
+++ b/overlay/gpu-freq.c
@@ -37,8 +37,8 @@ static int perf_open(void)
 {
 	int fd;
 
-	fd = perf_i915_open_group(I915_PERF_ACTUAL_FREQUENCY, -1);
-	if (perf_i915_open_group(I915_PERF_REQUESTED_FREQUENCY, fd) < 0) {
+	fd = perf_i915_open_group(I915_PMU_ACTUAL_FREQUENCY, -1);
+	if (perf_i915_open_group(I915_PMU_REQUESTED_FREQUENCY, fd) < 0) {
 		close(fd);
 		fd = -1;
 	}
diff --git a/overlay/gpu-top.c b/overlay/gpu-top.c
index 812f47d5aced..61b8f62fd78c 100644
--- a/overlay/gpu-top.c
+++ b/overlay/gpu-top.c
@@ -43,49 +43,57 @@
 #define   RING_WAIT		(1<<11)
 #define   RING_WAIT_SEMAPHORE	(1<<10)
 
-#define __I915_PERF_RING(n) (4*n)
-#define I915_PERF_RING_BUSY(n) (__I915_PERF_RING(n) + 0)
-#define I915_PERF_RING_WAIT(n) (__I915_PERF_RING(n) + 1)
-#define I915_PERF_RING_SEMA(n) (__I915_PERF_RING(n) + 2)
-
 static int perf_init(struct gpu_top *gt)
 {
-	const char *names[] = {
-		"RCS",
-		"BCS",
-		"VCS0",
-		"VCS1",
-		NULL,
+	struct engine_desc {
+		unsigned class, inst;
+		const char *name;
+	} *d, engines[] = {
+		{ I915_ENGINE_CLASS_RENDER, 0, "rcs0" },
+		{ I915_ENGINE_CLASS_COPY, 0, "bcs0" },
+		{ I915_ENGINE_CLASS_VIDEO, 0, "vcs0" },
+		{ I915_ENGINE_CLASS_VIDEO, 1, "vcs1" },
+		{ I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, "vecs0" },
+		{ 0, 0, NULL }
 	};
-	int n;
 
-	gt->fd = perf_i915_open_group(I915_PERF_RING_BUSY(0), -1);
+	d = &engines[0];
+
+	gt->fd = perf_i915_open_group(I915_PMU_ENGINE_BUSY(d->class, d->inst),
+				      -1);
 	if (gt->fd < 0)
 		return -1;
 
-	if (perf_i915_open_group(I915_PERF_RING_WAIT(0), gt->fd) >= 0)
+	if (perf_i915_open_group(I915_PMU_ENGINE_WAIT(d->class, d->inst),
+				 gt->fd) >= 0)
 		gt->have_wait = 1;
 
-	if (perf_i915_open_group(I915_PERF_RING_SEMA(0), gt->fd) >= 0)
+	if (perf_i915_open_group(I915_PMU_ENGINE_SEMA(d->class, d->inst),
+				 gt->fd) >= 0)
 		gt->have_sema = 1;
 
-	gt->ring[0].name = names[0];
+	gt->ring[0].name = d->name;
 	gt->num_rings = 1;
 
-	for (n = 1; names[n]; n++) {
-		if (perf_i915_open_group(I915_PERF_RING_BUSY(n), gt->fd) >= 0) {
-			if (gt->have_wait &&
-			    perf_i915_open_group(I915_PERF_RING_WAIT(n),
-						 gt->fd) < 0)
-				return -1;
-
-			if (gt->have_sema &&
-			    perf_i915_open_group(I915_PERF_RING_SEMA(n),
-						 gt->fd) < 0)
-				return -1;
-
-			gt->ring[gt->num_rings++].name = names[n];
-		}
+	for (d++; d->name; d++) {
+		if (perf_i915_open_group(I915_PMU_ENGINE_BUSY(d->class,
+							      d->inst),
+					gt->fd) < 0)
+			continue;
+
+		if (gt->have_wait &&
+		    perf_i915_open_group(I915_PMU_ENGINE_WAIT(d->class,
+							      d->inst),
+					 gt->fd) < 0)
+			return -1;
+
+		if (gt->have_sema &&
+		    perf_i915_open_group(I915_PMU_ENGINE_SEMA(d->class,
+							      d->inst),
+				   gt->fd) < 0)
+			return -1;
+
+		gt->ring[gt->num_rings++].name = d->name;
 	}
 
 	return 0;
diff --git a/overlay/power.c b/overlay/power.c
index dd4aec6bffd9..805f4ca7805c 100644
--- a/overlay/power.c
+++ b/overlay/power.c
@@ -45,9 +45,7 @@ int power_init(struct power *power)
 
 	memset(power, 0, sizeof(*power));
 
-	power->fd = perf_i915_open(I915_PERF_ENERGY);
-	if (power->fd != -1)
-		return 0;
+	power->fd = -1;
 
 	sprintf(buf, "%s/i915_energy_uJ", debugfs_dri_path);
 	fd = open(buf, 0);
diff --git a/overlay/rc6.c b/overlay/rc6.c
index 46c975a557ff..29aa29dbaf72 100644
--- a/overlay/rc6.c
+++ b/overlay/rc6.c
@@ -43,15 +43,15 @@ static int perf_open(unsigned *flags)
 {
 	int fd;
 
-	fd = perf_i915_open_group(I915_PERF_RC6_RESIDENCY, -1);
+	fd = perf_i915_open_group(I915_PMU_RC6_RESIDENCY, -1);
 	if (fd < 0)
 		return -1;
 
 	*flags |= RC6;
-	if (perf_i915_open_group(I915_PERF_RC6p_RESIDENCY, fd) >= 0)
+	if (perf_i915_open_group(I915_PMU_RC6p_RESIDENCY, fd) >= 0)
 		*flags |= RC6p;
 
-	if (perf_i915_open_group(I915_PERF_RC6pp_RESIDENCY, fd) >= 0)
+	if (perf_i915_open_group(I915_PMU_RC6pp_RESIDENCY, fd) >= 0)
 		*flags |= RC6pp;
 
 	return fd;
-- 
2.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH i-g-t 5/5] tests/perf_pmu: Tests for i915 PMU API
  2017-09-18 11:38 [PATCH i-g-t 0/5] IGT PMU support Tvrtko Ursulin
                   ` (3 preceding siblings ...)
  2017-09-18 11:38 ` [PATCH i-g-t 4/5] intel-gpu-overlay: Catch-up to new i915 PMU Tvrtko Ursulin
@ 2017-09-18 11:38 ` Tvrtko Ursulin
  2017-09-18 13:17   ` Chris Wilson
                     ` (2 more replies)
  2017-09-18 18:16 ` ✓ Fi.CI.BAT: success for IGT PMU support (rev2) Patchwork
                   ` (2 subsequent siblings)
  7 siblings, 3 replies; 15+ messages in thread
From: Tvrtko Ursulin @ 2017-09-18 11:38 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

A bunch of tests for the new i915 PMU feature.

Parts of the code were initialy sketched by Dmitry Rogozhkin.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
---
 lib/igt_gt.c           |  23 +-
 lib/igt_gt.h           |   8 +
 tests/Makefile.sources |   1 +
 tests/perf_pmu.c       | 713 +++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 738 insertions(+), 7 deletions(-)
 create mode 100644 tests/perf_pmu.c

diff --git a/lib/igt_gt.c b/lib/igt_gt.c
index b3f3b3809eee..102cc2841feb 100644
--- a/lib/igt_gt.c
+++ b/lib/igt_gt.c
@@ -537,14 +537,23 @@ unsigned intel_detect_and_clear_missed_interrupts(int fd)
 	return missed;
 }
 
+enum drm_i915_gem_engine_class {
+	I915_ENGINE_CLASS_OTHER = 0,
+	I915_ENGINE_CLASS_RENDER = 1,
+	I915_ENGINE_CLASS_COPY = 2,
+	I915_ENGINE_CLASS_VIDEO = 3,
+	I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
+	I915_ENGINE_CLASS_MAX /* non-ABI */
+};
+
 const struct intel_execution_engine intel_execution_engines[] = {
-	{ "default", NULL, 0, 0 },
-	{ "render", "rcs0", I915_EXEC_RENDER, 0 },
-	{ "bsd", "vcs0", I915_EXEC_BSD, 0 },
-	{ "bsd1", "vcs0", I915_EXEC_BSD, 1<<13 /*I915_EXEC_BSD_RING1*/ },
-	{ "bsd2", "vcs1", I915_EXEC_BSD, 2<<13 /*I915_EXEC_BSD_RING2*/ },
-	{ "blt", "bcs0", I915_EXEC_BLT, 0 },
-	{ "vebox", "vecs0", I915_EXEC_VEBOX, 0 },
+	{ "default", NULL, -1, -1, 0, 0 },
+	{ "render", "rcs0", I915_ENGINE_CLASS_RENDER, 0, I915_EXEC_RENDER, 0 },
+	{ "bsd", "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD, 0 },
+	{ "bsd1", "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD, 1<<13 /*I915_EXEC_BSD_RING1*/ },
+	{ "bsd2", "vcs1", I915_ENGINE_CLASS_VIDEO, 1, I915_EXEC_BSD, 2<<13 /*I915_EXEC_BSD_RING2*/ },
+	{ "blt", "bcs0", I915_ENGINE_CLASS_COPY, 0, I915_EXEC_BLT, 0 },
+	{ "vebox", "vecs0", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, I915_EXEC_VEBOX, 0 },
 	{ NULL, 0, 0 }
 };
 
diff --git a/lib/igt_gt.h b/lib/igt_gt.h
index 2579cbd37be7..436041ce9cc0 100644
--- a/lib/igt_gt.h
+++ b/lib/igt_gt.h
@@ -66,6 +66,8 @@ unsigned intel_detect_and_clear_missed_interrupts(int fd);
 extern const struct intel_execution_engine {
 	const char *name;
 	const char *full_name;
+	int class;
+	int instance;
 	unsigned exec_id;
 	unsigned flags;
 } intel_execution_engines[];
@@ -78,6 +80,12 @@ extern const struct intel_execution_engine {
 	     e__++) \
 		for_if (gem_has_ring(fd__, flags__ = e__->exec_id | e__->flags))
 
+#define for_each_engine_class_instance(fd__, e__) \
+	for ((e__) = intel_execution_engines;\
+	     (e__)->name; \
+	     (e__)++) \
+		for_if ((e__)->class > 0)
+
 bool gem_can_store_dword(int fd, unsigned int engine);
 
 #endif /* IGT_GT_H */
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index cf542df181a8..4bab6247151c 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -217,6 +217,7 @@ TESTS_progs = \
 	kms_vblank \
 	meta_test \
 	perf \
+	perf_pmu \
 	pm_backlight \
 	pm_lpsp \
 	pm_rc6_residency \
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
new file mode 100644
index 000000000000..2dbee586dacc
--- /dev/null
+++ b/tests/perf_pmu.c
@@ -0,0 +1,713 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/times.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <time.h>
+#include <poll.h>
+
+#include "igt.h"
+#include "igt_perf.h"
+
+IGT_TEST_DESCRIPTION("Test the i915 pmu perf interface");
+
+const double tolerance = 0.02f;
+const unsigned long batch_duration_ns = 1000 * 1000 * 1000 / 2;
+
+static void
+init(int gem_fd, const struct intel_execution_engine *e, uint8_t sample)
+{
+	uint64_t config = __I915_PMU_ENGINE(e->class, e->instance, sample);
+	int fd;
+
+	igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
+
+	fd = perf_i915_open(config);
+	igt_assert(fd >= 0);
+
+	close(fd);
+}
+
+static uint64_t pmu_read_single(int fd)
+{
+	uint64_t data[2];
+	ssize_t len;
+
+	len = read(fd, data, sizeof(data));
+	igt_assert_eq(len, sizeof(data));
+
+	return data[0];
+}
+
+static void pmu_read_multi(int fd, unsigned int num, uint64_t *val)
+{
+	uint64_t buf[2 + num];
+	unsigned int i;
+	ssize_t len;
+
+	len = read(fd, buf, sizeof(buf));
+	igt_assert_eq(len, sizeof(buf));
+	for (i = 0; i < num; i++)
+		val[i] = buf[2 + i];
+}
+
+#define assert_within_epsilon(x, ref, tolerance) \
+	igt_assert_f((double)(x) <= (1.0 + tolerance) * (double)ref && \
+		     (double)(x) >= (1.0 - tolerance) * (double)ref, \
+		     "'%s' != '%s' (%f not within %f%% tolerance of %f)\n",\
+		     #x, #ref, (double)x, tolerance * 100.0, (double)ref)
+
+static void
+single(int gem_fd, const struct intel_execution_engine *e, bool busy)
+{
+	uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance);
+	double ref = busy ? batch_duration_ns : 0.0f;
+	igt_spin_t *spin;
+	uint64_t val;
+	int fd;
+
+	igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
+
+	if (busy) {
+		spin = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
+		igt_spin_batch_set_timeout(spin, batch_duration_ns);
+	} else {
+		usleep(batch_duration_ns / 1000);
+	}
+
+	fd = perf_i915_open(config);
+	igt_assert(fd >= 0);
+
+	if (busy)
+		gem_sync(gem_fd, spin->handle);
+
+	val = pmu_read_single(fd);
+
+	assert_within_epsilon(val, ref, tolerance);
+
+	if (busy)
+		igt_spin_batch_free(gem_fd, spin);
+	close(fd);
+}
+
+static void
+busy_check_all(int gem_fd, const struct intel_execution_engine *e,
+	       const unsigned int num_engines)
+{
+	const struct intel_execution_engine *e_;
+	uint64_t val[num_engines];
+	int fd[2];
+	igt_spin_t *spin;
+	unsigned int busy_idx, i;
+
+	igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
+
+	spin = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
+	igt_spin_batch_set_timeout(spin, batch_duration_ns);
+
+	i = 0;
+	fd[0] = -1;
+	for_each_engine_class_instance(fd, e_) {
+		if (!gem_has_ring(gem_fd, e_->exec_id | e_->flags))
+			continue;
+
+		if (e == e_)
+			busy_idx = i;
+
+		fd[i == 0 ? 0 : 1] =
+			perf_i915_open_group(I915_PMU_ENGINE_BUSY(e_->class,
+								  e_->instance),
+					     fd[0]);
+		igt_assert(fd[0] > 0);
+		igt_assert(i == 0 || fd[1] > 0);
+		i++;
+	}
+
+	gem_sync(gem_fd, spin->handle);
+
+	pmu_read_multi(fd[0], num_engines, val);
+
+	assert_within_epsilon(val[busy_idx], batch_duration_ns, tolerance);
+	for (i = 0; i < num_engines; i++) {
+		if (i == busy_idx)
+			continue;
+		assert_within_epsilon(val[i], 0.0f, tolerance);
+	}
+
+	igt_spin_batch_free(gem_fd, spin);
+	close(fd[0]);
+}
+
+static void
+two_busy_check_all(int gem_fd, const struct intel_execution_engine *e,
+		   const unsigned int num_engines)
+{
+	const struct intel_execution_engine *e_;
+	uint64_t val[num_engines];
+	int fd[2];
+	igt_spin_t *spin[2];
+	unsigned int busy_idx[2], i;
+
+	igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
+	igt_assert(e->exec_id != 0 && e->exec_id != I915_EXEC_RENDER);
+
+	spin[0] = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
+	igt_spin_batch_set_timeout(spin[0], batch_duration_ns);
+
+	spin[1] = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
+	igt_spin_batch_set_timeout(spin[1], batch_duration_ns);
+
+	i = 0;
+	fd[0] = -1;
+	for_each_engine_class_instance(fd, e_) {
+		unsigned int idx = i == 0 ? 0 : 1;
+
+		if (!gem_has_ring(gem_fd, e_->exec_id | e_->flags))
+			continue;
+
+		if (e_->class == I915_ENGINE_CLASS_RENDER && e_->instance == 0)
+			busy_idx[0] = i;
+		else if (e == e_)
+			busy_idx[1] = i;
+
+		fd[idx] =
+			perf_i915_open_group(I915_PMU_ENGINE_BUSY(e_->class,
+								  e_->instance),
+					     fd[0]);
+		igt_assert(fd[idx] > 0);
+		i++;
+	}
+
+	gem_sync(gem_fd, spin[0]->handle);
+	gem_sync(gem_fd, spin[1]->handle);
+
+	pmu_read_multi(fd[0], num_engines, val);
+
+	for (i = 0; i < num_engines; i++) {
+		if (i == busy_idx[0] || i == busy_idx[1])
+			assert_within_epsilon(val[i], batch_duration_ns,
+					      tolerance);
+		else
+			assert_within_epsilon(val[i], 0.0f, tolerance);
+	}
+
+	igt_spin_batch_free(gem_fd, spin[0]);
+	igt_spin_batch_free(gem_fd, spin[1]);
+	close(fd[0]);
+}
+
+static void
+no_sema(int gem_fd, const struct intel_execution_engine *e, bool busy)
+{
+	igt_spin_t *spin;
+	uint64_t val[2];
+	int fd, fd2;
+
+	igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
+
+	if (busy) {
+		spin = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
+		igt_spin_batch_set_timeout(spin, batch_duration_ns);
+	} else {
+		usleep(batch_duration_ns / 1000);
+	}
+
+	fd = perf_i915_open_group(I915_PMU_ENGINE_SEMA(e->class, e->instance),
+				  -1);
+	igt_assert(fd >= 0);
+	fd2 = perf_i915_open_group(I915_PMU_ENGINE_WAIT(e->class, e->instance),
+				  fd);
+
+	pmu_read_multi(fd, 2, val);
+
+	assert_within_epsilon(val[0], 0.0f, tolerance);
+	assert_within_epsilon(val[1], 0.0f, tolerance);
+
+	if (busy)
+		igt_spin_batch_free(gem_fd, spin);
+	close(fd2);
+	close(fd);
+}
+
+static void
+multi_client(int gem_fd, const struct intel_execution_engine *e)
+{
+	uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance);
+	igt_spin_t *spin;
+	uint64_t val[2];
+	int fd[2];
+
+	igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
+
+	spin = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
+	igt_spin_batch_set_timeout(spin, batch_duration_ns);
+
+	fd[0] = perf_i915_open(config);
+	igt_assert(fd[0] >= 0);
+
+	usleep(batch_duration_ns / 4000);
+
+	fd[1] = perf_i915_open(config);
+	igt_assert(fd[1] >= 0);
+
+	usleep(batch_duration_ns / 3000);
+
+	val[1] = pmu_read_single(fd[1]);
+	close(fd[1]);
+
+	gem_sync(gem_fd, spin->handle);
+
+	val[0] = pmu_read_single(fd[0]);
+
+	assert_within_epsilon(val[0], batch_duration_ns, tolerance);
+	assert_within_epsilon(val[1], batch_duration_ns / 3, tolerance);
+
+	igt_spin_batch_free(gem_fd, spin);
+	close(fd[0]);
+}
+
+/**
+ * Tests that i915 PMU corectly error out in invalid initialization.
+ * i915 PMU is uncore PMU, thus:
+ *  - sampling period is not supported
+ *  - pid > 0 is not supported since we can't count per-process (we count
+ *    per whole system(
+ *  - cpu != 0 is not supported since i915 PMU exposes cpumask for CPU0
+ */
+static void invalid_init(void)
+{
+	struct perf_event_attr attr;
+	int pid, cpu;
+
+#define ATTR_INIT() \
+do { \
+	memset(&attr, 0, sizeof (attr)); \
+	attr.config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0); \
+	attr.type = i915_type_id(); \
+	igt_assert(attr.type != 0); \
+} while(0)
+
+	ATTR_INIT();
+	attr.sample_period = 100;
+	pid = -1;
+	cpu = 0;
+	igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+	igt_assert_eq(errno, EINVAL);
+
+	ATTR_INIT();
+	pid = 0;
+	cpu = 0;
+	igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+	igt_assert_eq(errno, EINVAL);
+
+	ATTR_INIT();
+	pid = -1;
+	cpu = 1;
+	igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+	igt_assert_eq(errno, ENODEV);
+}
+
+static void init_other(unsigned int i, bool valid)
+{
+	int fd;
+
+	fd = perf_i915_open(__I915_PMU_OTHER(i));
+	igt_require(!(fd < 0 && errno == ENODEV));
+	if (valid) {
+		igt_assert(fd >= 0);
+	} else {
+		igt_assert(fd < 0);
+		return;
+	}
+
+	close(fd);
+}
+
+static void read_other(unsigned int i, bool valid)
+{
+	int fd;
+
+	fd = perf_i915_open(__I915_PMU_OTHER(i));
+	igt_require(!(fd < 0 && errno == ENODEV));
+	if (valid) {
+		igt_assert(fd >= 0);
+	} else {
+		igt_assert(fd < 0);
+		return;
+	}
+
+	(void)pmu_read_single(fd);
+
+	close(fd);
+}
+
+static bool cpu0_hotplug_support(void)
+{
+	int fd = open("/sys/devices/system/cpu/cpu0/online", O_WRONLY);
+
+	close(fd);
+
+	return fd > 0;
+}
+
+static uint64_t
+elapsed_ns(const struct timespec *start, const struct timespec *end)
+{
+	return ((end->tv_sec - start->tv_sec) * 1e9 +
+		(end->tv_nsec - start->tv_nsec));
+}
+
+static void cpu_hotplug(int gem_fd)
+{
+	struct timespec start, now;
+	igt_spin_t *spin;
+	uint64_t val, ref;
+	int fd;
+
+	igt_require(cpu0_hotplug_support());
+
+	spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
+	fd = perf_i915_open(I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0));
+	igt_assert(fd >= 0);
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	igt_fork(child, 1) {
+		int cpu = 0;
+
+		for (;;) {
+			char name[128];
+			int cpufd;
+
+			sprintf(name, "/sys/devices/system/cpu/cpu%d/online",
+				cpu);
+			cpufd = open(name, O_WRONLY);
+			if (cpufd == -1) {
+				igt_assert(cpu > 0);
+				break;
+			}
+			igt_assert_eq(write(cpufd, "0", 2), 2);
+
+			usleep(1000 * 1000);
+
+			igt_assert_eq(write(cpufd, "1", 2), 2);
+
+			close(cpufd);
+			cpu++;
+		}
+	}
+
+	igt_waitchildren();
+
+	igt_spin_batch_end(spin);
+	gem_sync(gem_fd, spin->handle);
+
+	clock_gettime(CLOCK_MONOTONIC, &now);
+	val = pmu_read_single(fd);
+
+	ref = elapsed_ns(&start, &now);
+
+	assert_within_epsilon(val, ref, tolerance);
+
+	igt_spin_batch_free(gem_fd, spin);
+	close(fd);
+}
+
+static void
+test_interrupts(int gem_fd)
+{
+	igt_spin_t *spin;
+	uint64_t idle, busy, prev;
+	int fd;
+
+	fd = perf_i915_open(I915_PMU_INTERRUPTS);
+	igt_assert(fd >= 0);
+
+	gem_quiescent_gpu(gem_fd);
+	sleep(2);
+	prev = pmu_read_single(fd);
+	usleep(batch_duration_ns / 1000);
+	idle = pmu_read_single(fd);
+
+	igt_assert_eq(idle - prev, 0);
+
+	spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
+	igt_spin_batch_set_timeout(spin, batch_duration_ns);
+	gem_sync(gem_fd, spin->handle);
+
+	busy = pmu_read_single(fd);
+	igt_assert(busy > idle);
+
+	igt_spin_batch_free(gem_fd, spin);
+	close(fd);
+}
+
+static void
+test_frequency(int gem_fd)
+{
+	igt_spin_t *spin;
+	uint64_t idle[2], busy[2];
+	int fd;
+
+	fd = perf_i915_open_group(I915_PMU_REQUESTED_FREQUENCY, -1);
+	igt_assert(fd >= 0);
+	igt_assert(perf_i915_open_group(I915_PMU_ACTUAL_FREQUENCY, fd) >= 0);
+
+	gem_quiescent_gpu(gem_fd);
+	usleep(batch_duration_ns / 1000);
+
+	pmu_read_multi(fd, 2, idle);
+
+	spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
+	igt_spin_batch_set_timeout(spin, batch_duration_ns);
+	gem_sync(gem_fd, spin->handle);
+
+	pmu_read_multi(fd, 2, busy);
+
+	igt_assert(busy[0] > idle[0]);
+	igt_assert(busy[1] > idle[1]);
+
+	igt_spin_batch_free(gem_fd, spin);
+	close(fd);
+}
+
+static void
+test_rc6(int gem_fd)
+{
+	int64_t duration_ns = 500 * 1000 * 1000;
+	igt_spin_t *spin;
+	uint64_t idle, busy, prev;
+	int fd;
+
+	fd = perf_i915_open(I915_PMU_RC6_RESIDENCY);
+	igt_assert(fd >= 0);
+
+	gem_quiescent_gpu(gem_fd);
+	sleep(2);
+
+	prev = pmu_read_single(fd);
+	usleep(duration_ns / 1000);
+	idle = pmu_read_single(fd);
+
+	assert_within_epsilon(idle - prev, duration_ns, tolerance);
+
+	spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
+	igt_spin_batch_set_timeout(spin, duration_ns);
+
+	prev = pmu_read_single(fd);
+
+	gem_sync(gem_fd, spin->handle);
+
+	busy = pmu_read_single(fd);
+	assert_within_epsilon(busy - prev, 0.0, tolerance);
+
+	igt_spin_batch_free(gem_fd, spin);
+	close(fd);
+}
+
+static void
+test_rc6p(int gem_fd)
+{
+	const unsigned int devid = intel_get_drm_devid(gem_fd);
+	int64_t duration_ns = 2 * 1000 * 1000 * 1000;
+	unsigned int num_pmu = 1;
+	igt_spin_t *spin;
+	uint64_t idle[3], busy[3], prev[3];
+	unsigned int i;
+	int fd, ret;
+
+	igt_require(intel_gen(devid) < 8 && !IS_HASWELL(devid));
+
+	fd = perf_i915_open_group(I915_PMU_RC6_RESIDENCY, -1);
+	igt_assert(fd >= 0);
+
+	ret = perf_i915_open_group(I915_PMU_RC6p_RESIDENCY, fd);
+	if (ret > 0) {
+		num_pmu++;
+		ret = perf_i915_open_group(I915_PMU_RC6p_RESIDENCY, fd);
+		if (ret > 0)
+			num_pmu++;
+	}
+
+	gem_quiescent_gpu(gem_fd);
+	sleep(2);
+
+	pmu_read_multi(fd, num_pmu, prev);
+	usleep(duration_ns / 1000);
+	pmu_read_multi(fd, num_pmu, idle);
+
+	for (i = 0; i < num_pmu; i++)
+		assert_within_epsilon(idle[i] - prev[i], duration_ns,
+				      tolerance);
+
+	spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
+	igt_spin_batch_set_timeout(spin, duration_ns);
+
+	pmu_read_multi(fd, num_pmu, prev);
+
+	gem_sync(gem_fd, spin->handle);
+
+	pmu_read_multi(fd, num_pmu, busy);
+
+	for (i = 0; i < num_pmu; i++)
+		assert_within_epsilon(busy[i] - prev[i], 0.0, tolerance);
+
+	igt_spin_batch_free(gem_fd, spin);
+	close(fd);
+}
+
+igt_main
+{
+	const unsigned int num_other_metrics =
+				I915_PMU_LAST - __I915_PMU_OTHER(0) + 1;
+	unsigned int num_engines = 0;
+	int fd = -1;
+	const struct intel_execution_engine *e;
+	unsigned int i;
+
+	igt_fixture {
+		fd = drm_open_driver_master(DRIVER_INTEL);
+
+		igt_require_gem(fd);
+		igt_require(i915_type_id() > 0);
+
+		for_each_engine_class_instance(fd, e) {
+			if (gem_has_ring(fd, e->exec_id | e->flags))
+				num_engines++;
+		}
+	}
+
+	/**
+	 * Test invalid access via perf API is rejected.
+	 */
+	igt_subtest("invalid-init")
+		invalid_init();
+
+	for_each_engine_class_instance(fd, e) {
+		/**
+		 * Test that a single engine metric can be initialized.
+		 */
+		igt_subtest_f("init-busy-%s", e->name)
+			init(fd, e, I915_SAMPLE_BUSY);
+
+		igt_subtest_f("init-wait-%s", e->name)
+			init(fd, e, I915_SAMPLE_WAIT);
+
+		igt_subtest_f("init-sema-%s", e->name)
+			init(fd, e, I915_SAMPLE_SEMA);
+
+		/**
+		 * Test that engines show no load when idle.
+		 */
+		igt_subtest_f("idle-%s", e->name)
+			single(fd, e, false);
+
+		/**
+		 * Test that a single engine reports load correctly.
+		 */
+		igt_subtest_f("busy-%s", e->name)
+			single(fd, e, true);
+
+		/**
+		 * Test that when one engine is loaded other report no load.
+		 */
+		igt_subtest_f("busy-check-all-%s", e->name)
+			busy_check_all(fd, e, num_engines);
+
+		/**
+		 * Test that when two engines are loaded other report no load.
+		 */
+		if (!(e->class == I915_ENGINE_CLASS_RENDER && e->instance == 0))
+			igt_subtest_f("two-busy-check-all-%s", e->name)
+				two_busy_check_all(fd, e, num_engines);
+
+		/**
+		 * Test that semphore counters report no activity on idle
+		 * engines.
+		 */
+		igt_subtest_f("idle-no-semaphores-%s", e->name)
+			no_sema(fd, e, false);
+
+		igt_subtest_f("busy-no-semaphores-%s", e->name)
+			no_sema(fd, e, true);
+
+		/**
+		 * Check that two perf clients do not influence each others
+		 * observations.
+		 */
+		igt_subtest_f("multi-client-%s", e->name)
+			multi_client(fd, e);
+	}
+
+	/**
+	 * Test that non-engine counters can be initialized and read. Apart
+	 * from the invalid metric which should fail.
+	 */
+	for (i = 0; i < num_other_metrics + 1; i++) {
+		igt_subtest_f("other-init-%u", i)
+			init_other(i, i < num_other_metrics);
+
+		igt_subtest_f("other-read-%u", i)
+			read_other(i, i < num_other_metrics);
+	}
+
+	/**
+	 * Test counters are not affected by CPU offline/online events.
+	 */
+	igt_subtest("cpu-hotplug")
+		cpu_hotplug(fd);
+
+	/**
+	 * Test GPU frequency.
+	 */
+	igt_subtest("frequency")
+		test_frequency(fd);
+
+	/**
+	 * Test interrupt count reporting.
+	 */
+	igt_subtest("interrupts")
+		test_interrupts(fd);
+
+	/**
+	 * Test RC6 residency reporting.
+	 */
+	igt_subtest("rc6")
+		test_rc6(fd);
+
+	/**
+	 * Test RC6p residency reporting.
+	 */
+	igt_subtest("rc6p")
+		test_rc6p(fd);
+}
-- 
2.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH i-g-t 5/5] tests/perf_pmu: Tests for i915 PMU API
  2017-09-18 11:38 ` [PATCH i-g-t 5/5] tests/perf_pmu: Tests for i915 PMU API Tvrtko Ursulin
@ 2017-09-18 13:17   ` Chris Wilson
  2017-09-19  8:37     ` Tvrtko Ursulin
  2017-09-18 21:18   ` Rogozhkin, Dmitry V
  2017-09-20 16:12   ` [PATCH v2 " Tvrtko Ursulin
  2 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2017-09-18 13:17 UTC (permalink / raw)
  To: Tvrtko Ursulin, Intel-gfx

Quoting Tvrtko Ursulin (2017-09-18 12:38:40)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> A bunch of tests for the new i915 PMU feature.
> 
> Parts of the code were initialy sketched by Dmitry Rogozhkin.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
> ---
>  lib/igt_gt.c           |  23 +-
>  lib/igt_gt.h           |   8 +
>  tests/Makefile.sources |   1 +
>  tests/perf_pmu.c       | 713 +++++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 738 insertions(+), 7 deletions(-)
>  create mode 100644 tests/perf_pmu.c
> 
> diff --git a/lib/igt_gt.c b/lib/igt_gt.c
> index b3f3b3809eee..102cc2841feb 100644
> --- a/lib/igt_gt.c
> +++ b/lib/igt_gt.c
> @@ -537,14 +537,23 @@ unsigned intel_detect_and_clear_missed_interrupts(int fd)
>         return missed;
>  }
>  
> +enum drm_i915_gem_engine_class {
> +       I915_ENGINE_CLASS_OTHER = 0,
> +       I915_ENGINE_CLASS_RENDER = 1,
> +       I915_ENGINE_CLASS_COPY = 2,
> +       I915_ENGINE_CLASS_VIDEO = 3,
> +       I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
> +       I915_ENGINE_CLASS_MAX /* non-ABI */
> +};
> +
>  const struct intel_execution_engine intel_execution_engines[] = {
> -       { "default", NULL, 0, 0 },
> -       { "render", "rcs0", I915_EXEC_RENDER, 0 },
> -       { "bsd", "vcs0", I915_EXEC_BSD, 0 },
> -       { "bsd1", "vcs0", I915_EXEC_BSD, 1<<13 /*I915_EXEC_BSD_RING1*/ },
> -       { "bsd2", "vcs1", I915_EXEC_BSD, 2<<13 /*I915_EXEC_BSD_RING2*/ },
> -       { "blt", "bcs0", I915_EXEC_BLT, 0 },
> -       { "vebox", "vecs0", I915_EXEC_VEBOX, 0 },
> +       { "default", NULL, -1, -1, 0, 0 },
> +       { "render", "rcs0", I915_ENGINE_CLASS_RENDER, 0, I915_EXEC_RENDER, 0 },
> +       { "bsd", "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD, 0 },
> +       { "bsd1", "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD, 1<<13 /*I915_EXEC_BSD_RING1*/ },
> +       { "bsd2", "vcs1", I915_ENGINE_CLASS_VIDEO, 1, I915_EXEC_BSD, 2<<13 /*I915_EXEC_BSD_RING2*/ },
> +       { "blt", "bcs0", I915_ENGINE_CLASS_COPY, 0, I915_EXEC_BLT, 0 },
> +       { "vebox", "vecs0", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, I915_EXEC_VEBOX, 0 },
>         { NULL, 0, 0 }

I was anticipating a new struct for the explicit interface so that we
can easily phase out the out with its aliasing.

>  };
>  
> diff --git a/lib/igt_gt.h b/lib/igt_gt.h
> index 2579cbd37be7..436041ce9cc0 100644
> --- a/lib/igt_gt.h
> +++ b/lib/igt_gt.h
> @@ -66,6 +66,8 @@ unsigned intel_detect_and_clear_missed_interrupts(int fd);
>  extern const struct intel_execution_engine {
>         const char *name;
>         const char *full_name;
> +       int class;
> +       int instance;
>         unsigned exec_id;
>         unsigned flags;
>  } intel_execution_engines[];
> @@ -78,6 +80,12 @@ extern const struct intel_execution_engine {
>              e__++) \
>                 for_if (gem_has_ring(fd__, flags__ = e__->exec_id | e__->flags))
>  
> +#define for_each_engine_class_instance(fd__, e__) \
> +       for ((e__) = intel_execution_engines;\
> +            (e__)->name; \
> +            (e__)++) \
> +               for_if ((e__)->class > 0)
> +
>  bool gem_can_store_dword(int fd, unsigned int engine);
>  
>  #endif /* IGT_GT_H */
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index cf542df181a8..4bab6247151c 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -217,6 +217,7 @@ TESTS_progs = \
>         kms_vblank \
>         meta_test \
>         perf \
> +       perf_pmu \
>         pm_backlight \
>         pm_lpsp \
>         pm_rc6_residency \
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> new file mode 100644
> index 000000000000..2dbee586dacc
> --- /dev/null
> +++ b/tests/perf_pmu.c
> @@ -0,0 +1,713 @@
> +/*
> + * Copyright © 2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <inttypes.h>
> +#include <errno.h>
> +#include <sys/stat.h>
> +#include <sys/time.h>
> +#include <sys/times.h>
> +#include <sys/types.h>
> +#include <dirent.h>
> +#include <time.h>
> +#include <poll.h>
> +
> +#include "igt.h"
> +#include "igt_perf.h"
> +
> +IGT_TEST_DESCRIPTION("Test the i915 pmu perf interface");
> +
> +const double tolerance = 0.02f;
> +const unsigned long batch_duration_ns = 1000 * 1000 * 1000 / 2;
> +
> +static void
> +init(int gem_fd, const struct intel_execution_engine *e, uint8_t sample)
> +{
> +       uint64_t config = __I915_PMU_ENGINE(e->class, e->instance, sample);
> +       int fd;
> +
> +       igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));

gem_require_ring()

> +
> +       fd = perf_i915_open(config);

Although the kernel interface is the authority.

So this should be igt_require, and igt_assert(has_ring);

> +       igt_assert(fd >= 0);
> +
> +       close(fd);
> +}
> +
> +static uint64_t pmu_read_single(int fd)
> +{
> +       uint64_t data[2];
> +       ssize_t len;
> +
> +       len = read(fd, data, sizeof(data));

Perf is a datagram api, right? A short read gives what you asked for and
discards the rest of the packet, iirc.

> +       igt_assert_eq(len, sizeof(data));
> +
> +       return data[0];
> +}
> +
> +static void pmu_read_multi(int fd, unsigned int num, uint64_t *val)
> +{
> +       uint64_t buf[2 + num];
> +       unsigned int i;
> +       ssize_t len;
> +
> +       len = read(fd, buf, sizeof(buf));
> +       igt_assert_eq(len, sizeof(buf));
> +       for (i = 0; i < num; i++)
> +               val[i] = buf[2 + i];
> +}
> +
> +#define assert_within_epsilon(x, ref, tolerance) \
> +       igt_assert_f((double)(x) <= (1.0 + tolerance) * (double)ref && \
> +                    (double)(x) >= (1.0 - tolerance) * (double)ref, \
> +                    "'%s' != '%s' (%f not within %f%% tolerance of %f)\n",\
> +                    #x, #ref, (double)x, tolerance * 100.0, (double)ref)
> +
> +static void
> +single(int gem_fd, const struct intel_execution_engine *e, bool busy)
> +{
> +       uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance);
> +       double ref = busy ? batch_duration_ns : 0.0f;
> +       igt_spin_t *spin;
> +       uint64_t val;
> +       int fd;
> +
> +       igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
> +
> +       if (busy) {
> +               spin = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
> +               igt_spin_batch_set_timeout(spin, batch_duration_ns);
> +       } else {
> +               usleep(batch_duration_ns / 1000);
> +       }
> +
> +       fd = perf_i915_open(config);
> +       igt_assert(fd >= 0);
> +
> +       if (busy)
> +               gem_sync(gem_fd, spin->handle);
> +
> +       val = pmu_read_single(fd);
> +
> +       assert_within_epsilon(val, ref, tolerance);
> +
> +       if (busy)
> +               igt_spin_batch_free(gem_fd, spin);
> +       close(fd);
> +}
> +
> +static void
> +busy_check_all(int gem_fd, const struct intel_execution_engine *e,

busy_check_others

busy_check_all I would expect to be checking that all engines are
correctly recorded as being busy at the same time. And there should also
be permutations of (busy, idle, wait) across the engines.

> +              const unsigned int num_engines)
> +{
> +       const struct intel_execution_engine *e_;
> +       uint64_t val[num_engines];
> +       int fd[2];
> +       igt_spin_t *spin;
> +       unsigned int busy_idx, i;
> +
> +       igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
> +
> +       spin = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
> +       igt_spin_batch_set_timeout(spin, batch_duration_ns);
> +
> +       i = 0;
> +       fd[0] = -1;
> +       for_each_engine_class_instance(fd, e_) {
> +               if (!gem_has_ring(gem_fd, e_->exec_id | e_->flags))
> +                       continue;
> +
> +               if (e == e_)
> +                       busy_idx = i;
> +
> +               fd[i == 0 ? 0 : 1] =
> +                       perf_i915_open_group(I915_PMU_ENGINE_BUSY(e_->class,
> +                                                                 e_->instance),
> +                                            fd[0]);
> +               igt_assert(fd[0] > 0);
> +               igt_assert(i == 0 || fd[1] > 0);
> +               i++;
> +       }
> +
> +       gem_sync(gem_fd, spin->handle);
> +
> +       pmu_read_multi(fd[0], num_engines, val);
> +
> +       assert_within_epsilon(val[busy_idx], batch_duration_ns, tolerance);
> +       for (i = 0; i < num_engines; i++) {
> +               if (i == busy_idx)
> +                       continue;
> +               assert_within_epsilon(val[i], 0.0f, tolerance);
> +       }
> +
> +       igt_spin_batch_free(gem_fd, spin);
> +       close(fd[0]);
> +}

> +static void
> +no_sema(int gem_fd, const struct intel_execution_engine *e, bool busy)

This is just the sanity check half of the sema test.

No wait, no queued?

> +static void
> +multi_client(int gem_fd, const struct intel_execution_engine *e)
> +{
> +       uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance);
> +       igt_spin_t *spin;
> +       uint64_t val[2];
> +       int fd[2];
> +
> +       igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
> +
> +       spin = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
> +       igt_spin_batch_set_timeout(spin, batch_duration_ns);
> +
> +       fd[0] = perf_i915_open(config);
> +       igt_assert(fd[0] >= 0);
> +
> +       usleep(batch_duration_ns / 4000);
> +
> +       fd[1] = perf_i915_open(config);
> +       igt_assert(fd[1] >= 0);
> +
> +       usleep(batch_duration_ns / 3000);
> +
> +       val[1] = pmu_read_single(fd[1]);
> +       close(fd[1]);
> +
> +       gem_sync(gem_fd, spin->handle);
> +
> +       val[0] = pmu_read_single(fd[0]);
> +
> +       assert_within_epsilon(val[0], batch_duration_ns, tolerance);
> +       assert_within_epsilon(val[1], batch_duration_ns / 3, tolerance);
> +
> +       igt_spin_batch_free(gem_fd, spin);
> +       close(fd[0]);
> +}


> +static void cpu_hotplug(int gem_fd)
> +{
> +       struct timespec start, now;
> +       igt_spin_t *spin;
> +       uint64_t val, ref;
> +       int fd;
> +
> +       igt_require(cpu0_hotplug_support());
> +
> +       spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
> +       fd = perf_i915_open(I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0));
> +       igt_assert(fd >= 0);
> +
> +       clock_gettime(CLOCK_MONOTONIC, &start);
> +
> +       igt_fork(child, 1) {
> +               int cpu = 0;
> +
> +               for (;;) {
> +                       char name[128];
> +                       int cpufd;
> +
> +                       sprintf(name, "/sys/devices/system/cpu/cpu%d/online",
> +                               cpu);
> +                       cpufd = open(name, O_WRONLY);
> +                       if (cpufd == -1) {
> +                               igt_assert(cpu > 0);
> +                               break;
> +                       }
> +                       igt_assert_eq(write(cpufd, "0", 2), 2);
> +
> +                       usleep(1000 * 1000);
> +
> +                       igt_assert_eq(write(cpufd, "1", 2), 2);
> +
> +                       close(cpufd);
> +                       cpu++;
> +               }
> +       }
> +
> +       igt_waitchildren();
> +
> +       igt_spin_batch_end(spin);
> +       gem_sync(gem_fd, spin->handle);
> +
> +       clock_gettime(CLOCK_MONOTONIC, &now);

Did we ever export the igt routines for probing supported clocks?
In this case, this fits into igt_nsec_elapsed.

> +       val = pmu_read_single(fd);
> +
> +       ref = elapsed_ns(&start, &now);
> +
> +       assert_within_epsilon(val, ref, tolerance);
> +
> +       igt_spin_batch_free(gem_fd, spin);
> +       close(fd);
> +}
> +
> +static void
> +test_interrupts(int gem_fd)
> +{
> +       igt_spin_t *spin;
> +       uint64_t idle, busy, prev;
> +       int fd;
> +
> +       fd = perf_i915_open(I915_PMU_INTERRUPTS);
> +       igt_assert(fd >= 0);
> +
> +       gem_quiescent_gpu(gem_fd);
> +       sleep(2);
> +       prev = pmu_read_single(fd);
> +       usleep(batch_duration_ns / 1000);
> +       idle = pmu_read_single(fd);
> +
> +       igt_assert_eq(idle - prev, 0);
> +
> +       spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
> +       igt_spin_batch_set_timeout(spin, batch_duration_ns);
> +       gem_sync(gem_fd, spin->handle);

There's no guaranteed interrupt here.

> +
> +       busy = pmu_read_single(fd);
> +       igt_assert(busy > idle);
> +
> +       igt_spin_batch_free(gem_fd, spin);
> +       close(fd);
> +}
> +
> +static void
> +test_frequency(int gem_fd)
> +{
> +       igt_spin_t *spin;
> +       uint64_t idle[2], busy[2];
> +       int fd;
> +
> +       fd = perf_i915_open_group(I915_PMU_REQUESTED_FREQUENCY, -1);
> +       igt_assert(fd >= 0);

Ask the kernel if it is supported.

> +       igt_assert(perf_i915_open_group(I915_PMU_ACTUAL_FREQUENCY, fd) >= 0);
> +
> +       gem_quiescent_gpu(gem_fd);
> +       usleep(batch_duration_ns / 1000);
> +
> +       pmu_read_multi(fd, 2, idle);
> +
> +       spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
> +       igt_spin_batch_set_timeout(spin, batch_duration_ns);
> +       gem_sync(gem_fd, spin->handle);
> +
> +       pmu_read_multi(fd, 2, busy);
> +
> +       igt_assert(busy[0] > idle[0]);
> +       igt_assert(busy[1] > idle[1]);

Nothing guarantees busy[1] changes, it is hw/fw dependent.
busy[0] depends on user config.

> +
> +       igt_spin_batch_free(gem_fd, spin);
> +       close(fd);
> +}
> +

> +static void
> +test_rc6p(int gem_fd)
> +{
> +       const unsigned int devid = intel_get_drm_devid(gem_fd);
> +       int64_t duration_ns = 2 * 1000 * 1000 * 1000;
> +       unsigned int num_pmu = 1;
> +       igt_spin_t *spin;
> +       uint64_t idle[3], busy[3], prev[3];
> +       unsigned int i;
> +       int fd, ret;
> +
> +       igt_require(intel_gen(devid) < 8 && !IS_HASWELL(devid));

Ask the kernel. (Applies equally to rc6, rc6p).

No rc6pp testing?

> +
> +       fd = perf_i915_open_group(I915_PMU_RC6_RESIDENCY, -1);
> +       igt_assert(fd >= 0);
> +
> +       ret = perf_i915_open_group(I915_PMU_RC6p_RESIDENCY, fd);
> +       if (ret > 0) {
> +               num_pmu++;
> +               ret = perf_i915_open_group(I915_PMU_RC6p_RESIDENCY, fd);
> +               if (ret > 0)
> +                       num_pmu++;
> +       }
> +
> +       gem_quiescent_gpu(gem_fd);
> +       sleep(2);
> +
> +       pmu_read_multi(fd, num_pmu, prev);
> +       usleep(duration_ns / 1000);
> +       pmu_read_multi(fd, num_pmu, idle);
> +
> +       for (i = 0; i < num_pmu; i++)
> +               assert_within_epsilon(idle[i] - prev[i], duration_ns,
> +                                     tolerance);
> +
> +       spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
> +       igt_spin_batch_set_timeout(spin, duration_ns);

Are we sure the GPU isn't allowed to sleep? i915_user_forcewake we
expect to keep the GPU out of rc6.

> +igt_main
> +{
> +       const unsigned int num_other_metrics =
> +                               I915_PMU_LAST - __I915_PMU_OTHER(0) + 1;
> +       unsigned int num_engines = 0;
> +       int fd = -1;
> +       const struct intel_execution_engine *e;
> +       unsigned int i;
> +
> +       igt_fixture {
> +               fd = drm_open_driver_master(DRIVER_INTEL);
> +
> +               igt_require_gem(fd);
> +               igt_require(i915_type_id() > 0);
> +
> +               for_each_engine_class_instance(fd, e) {
> +                       if (gem_has_ring(fd, e->exec_id | e->flags))
> +                               num_engines++;
> +               }
> +       }
> +
> +       /**
> +        * Test invalid access via perf API is rejected.
> +        */

ARGH. No comments on the intentions of the code?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✓ Fi.CI.BAT: success for IGT PMU support (rev2)
  2017-09-18 11:38 [PATCH i-g-t 0/5] IGT PMU support Tvrtko Ursulin
                   ` (4 preceding siblings ...)
  2017-09-18 11:38 ` [PATCH i-g-t 5/5] tests/perf_pmu: Tests for i915 PMU API Tvrtko Ursulin
@ 2017-09-18 18:16 ` Patchwork
  2017-09-19  9:44 ` ✓ Fi.CI.IGT: " Patchwork
  2017-09-20 16:52 ` ✗ Fi.CI.BAT: warning for IGT PMU support (rev3) Patchwork
  7 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2017-09-18 18:16 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: IGT PMU support (rev2)
URL   : https://patchwork.freedesktop.org/series/28253/
State : success

== Summary ==

IGT patchset tested on top of latest successful build
2e93946e4e15cb8e7e978334cb62677f9ead82fe meson: Fix IGT_GIT_SHA1 handling

with latest DRM-Tip kernel build CI_DRM_3102
dee3ba3a8c8b drm-tip: 2017y-09m-18d-15h-59m-43s UTC integration manifest

fi-bdw-5557u     total:289  pass:268  dwarn:0   dfail:0   fail:0   skip:21  time:448s
fi-bdw-gvtdvm    total:289  pass:265  dwarn:0   dfail:0   fail:0   skip:24  time:484s
fi-blb-e6850     total:289  pass:224  dwarn:1   dfail:0   fail:0   skip:64  time:421s
fi-bsw-n3050     total:289  pass:243  dwarn:0   dfail:0   fail:0   skip:46  time:520s
fi-bwr-2160      total:289  pass:184  dwarn:0   dfail:0   fail:0   skip:105 time:280s
fi-bxt-j4205     total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:508s
fi-byt-j1900     total:289  pass:254  dwarn:1   dfail:0   fail:0   skip:34  time:504s
fi-byt-n2820     total:289  pass:250  dwarn:1   dfail:0   fail:0   skip:38  time:499s
fi-cfl-s         total:289  pass:223  dwarn:34  dfail:0   fail:0   skip:32  time:552s
fi-elk-e7500     total:289  pass:230  dwarn:0   dfail:0   fail:0   skip:59  time:429s
fi-glk-2a        total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:597s
fi-hsw-4770      total:289  pass:263  dwarn:0   dfail:0   fail:0   skip:26  time:429s
fi-hsw-4770r     total:289  pass:263  dwarn:0   dfail:0   fail:0   skip:26  time:407s
fi-ilk-650       total:289  pass:229  dwarn:0   dfail:0   fail:0   skip:60  time:429s
fi-ivb-3520m     total:289  pass:261  dwarn:0   dfail:0   fail:0   skip:28  time:493s
fi-ivb-3770      total:289  pass:261  dwarn:0   dfail:0   fail:0   skip:28  time:469s
fi-kbl-7500u     total:289  pass:264  dwarn:1   dfail:0   fail:0   skip:24  time:478s
fi-kbl-7560u     total:289  pass:270  dwarn:0   dfail:0   fail:0   skip:19  time:586s
fi-kbl-r         total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:597s
fi-pnv-d510      total:289  pass:223  dwarn:1   dfail:0   fail:0   skip:65  time:544s
fi-skl-6260u     total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:461s
fi-skl-6700k     total:289  pass:265  dwarn:0   dfail:0   fail:0   skip:24  time:756s
fi-skl-6770hq    total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:492s
fi-skl-gvtdvm    total:289  pass:266  dwarn:0   dfail:0   fail:0   skip:23  time:482s
fi-snb-2520m     total:289  pass:251  dwarn:0   dfail:0   fail:0   skip:38  time:578s
fi-snb-2600      total:289  pass:248  dwarn:0   dfail:0   fail:2   skip:39  time:428s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_194/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH i-g-t 5/5] tests/perf_pmu: Tests for i915 PMU API
  2017-09-18 11:38 ` [PATCH i-g-t 5/5] tests/perf_pmu: Tests for i915 PMU API Tvrtko Ursulin
  2017-09-18 13:17   ` Chris Wilson
@ 2017-09-18 21:18   ` Rogozhkin, Dmitry V
  2017-09-19  8:19     ` Tvrtko Ursulin
  2017-09-20 16:12   ` [PATCH v2 " Tvrtko Ursulin
  2 siblings, 1 reply; 15+ messages in thread
From: Rogozhkin, Dmitry V @ 2017-09-18 21:18 UTC (permalink / raw)
  To: tursulin; +Cc: Intel-gfx

Did you try tests on the system with 2 VDBOX engines? On my side 2 tests
are failing on SKL GT4e NUC:

(perf_pmu:5414) CRITICAL: Test assertion failure function
busy_check_all, file perf_pmu.c:164:
(perf_pmu:5414) CRITICAL: Failed assertion: (double)(val[i]) <= (1.0 +
tolerance) * (double)0.0f && (double)(val[i]) >= (1.0 - tolerance) *
(double)0.0f
(perf_pmu:5414) CRITICAL: 'val[i]' != '0.0f' (499984960.000000 not
within 2.000000% tolerance of 0.000000)
Subtest two-busy-check-all-bsd: FAIL (0.501s)

(perf_pmu:5414) CRITICAL: Test assertion failure function
two_busy_check_all, file perf_pmu.c:221:
(perf_pmu:5414) CRITICAL: Failed assertion: (double)(val[i]) <= (1.0 +
tolerance) * (double)0.0f && (double)(val[i]) >= (1.0 - tolerance) *
(double)0.0f
(perf_pmu:5414) CRITICAL: 'val[i]' != '0.0f' (499940146.000000 not
within 2.000000% tolerance of 0.000000)
Subtest two-busy-check-all-bsd1: FAIL (0.501s)

I am trying to speculate on the reasons below.


On Mon, 2017-09-18 at 12:38 +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> A bunch of tests for the new i915 PMU feature.
> 
> Parts of the code were initialy sketched by Dmitry Rogozhkin.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
> ---
>  lib/igt_gt.c           |  23 +-
>  lib/igt_gt.h           |   8 +
>  tests/Makefile.sources |   1 +
>  tests/perf_pmu.c       | 713 +++++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 738 insertions(+), 7 deletions(-)
>  create mode 100644 tests/perf_pmu.c
> 
> diff --git a/lib/igt_gt.c b/lib/igt_gt.c
> index b3f3b3809eee..102cc2841feb 100644
> --- a/lib/igt_gt.c
> +++ b/lib/igt_gt.c
> @@ -537,14 +537,23 @@ unsigned intel_detect_and_clear_missed_interrupts(int fd)
>  	return missed;
>  }
>  
> +enum drm_i915_gem_engine_class {
> +	I915_ENGINE_CLASS_OTHER = 0,
> +	I915_ENGINE_CLASS_RENDER = 1,
> +	I915_ENGINE_CLASS_COPY = 2,
> +	I915_ENGINE_CLASS_VIDEO = 3,
> +	I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
> +	I915_ENGINE_CLASS_MAX /* non-ABI */
> +};
> +
>  const struct intel_execution_engine intel_execution_engines[] = {
> -	{ "default", NULL, 0, 0 },
> -	{ "render", "rcs0", I915_EXEC_RENDER, 0 },
> -	{ "bsd", "vcs0", I915_EXEC_BSD, 0 },
> -	{ "bsd1", "vcs0", I915_EXEC_BSD, 1<<13 /*I915_EXEC_BSD_RING1*/ },
> -	{ "bsd2", "vcs1", I915_EXEC_BSD, 2<<13 /*I915_EXEC_BSD_RING2*/ },
> -	{ "blt", "bcs0", I915_EXEC_BLT, 0 },
> -	{ "vebox", "vecs0", I915_EXEC_VEBOX, 0 },
> +	{ "default", NULL, -1, -1, 0, 0 },
> +	{ "render", "rcs0", I915_ENGINE_CLASS_RENDER, 0, I915_EXEC_RENDER, 0 },
> +	{ "bsd", "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD, 0 },
With such definition, we will probably detect "bsd" as an engine (as
well as "bsd1" and "bsd2"), right? As a result, we will run
two-busy-check-all-bsd for it and according to defined flags we will
submit workloads to _both_ vcs0 and vcs1 evenly following i915 KMD
dispatching. Thus, the two-busy-check-all-bsd will fail since it will
detect a load on 3 engines (rcs0, vcs0, vcs1) instead of 2.

I am not quite sure why two-busy-check-all-bsd1 fails as well on my
side? or rather, why it did not fail on your side as well? The only
explanation I see is that the test thinks "bsd" and "bsd1" are separate
engines, and, thus, count them as 2. But that should fail on single
VDBOX system as well... hm...

> +	{ "bsd1", "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD, 1<<13 /*I915_EXEC_BSD_RING1*/ },
> +	{ "bsd2", "vcs1", I915_ENGINE_CLASS_VIDEO, 1, I915_EXEC_BSD, 2<<13 /*I915_EXEC_BSD_RING2*/ },
> +	{ "blt", "bcs0", I915_ENGINE_CLASS_COPY, 0, I915_EXEC_BLT, 0 },
> +	{ "vebox", "vecs0", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, I915_EXEC_VEBOX, 0 },
>  	{ NULL, 0, 0 }
>  };
>  
> diff --git a/lib/igt_gt.h b/lib/igt_gt.h
> index 2579cbd37be7..436041ce9cc0 100644
> --- a/lib/igt_gt.h
> +++ b/lib/igt_gt.h
> @@ -66,6 +66,8 @@ unsigned intel_detect_and_clear_missed_interrupts(int fd);
>  extern const struct intel_execution_engine {
>  	const char *name;
>  	const char *full_name;
> +	int class;
> +	int instance;
>  	unsigned exec_id;
>  	unsigned flags;
>  } intel_execution_engines[];
> @@ -78,6 +80,12 @@ extern const struct intel_execution_engine {
>  	     e__++) \
>  		for_if (gem_has_ring(fd__, flags__ = e__->exec_id | e__->flags))
>  
> +#define for_each_engine_class_instance(fd__, e__) \
> +	for ((e__) = intel_execution_engines;\
> +	     (e__)->name; \
> +	     (e__)++) \
> +		for_if ((e__)->class > 0)
> +
>  bool gem_can_store_dword(int fd, unsigned int engine);
>  
>  #endif /* IGT_GT_H */
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index cf542df181a8..4bab6247151c 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -217,6 +217,7 @@ TESTS_progs = \
>  	kms_vblank \
>  	meta_test \
>  	perf \
> +	perf_pmu \
>  	pm_backlight \
>  	pm_lpsp \
>  	pm_rc6_residency \
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> new file mode 100644
> index 000000000000..2dbee586dacc
> --- /dev/null
> +++ b/tests/perf_pmu.c
> @@ -0,0 +1,713 @@
> +/*
> + * Copyright © 2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <inttypes.h>
> +#include <errno.h>
> +#include <sys/stat.h>
> +#include <sys/time.h>
> +#include <sys/times.h>
> +#include <sys/types.h>
> +#include <dirent.h>
> +#include <time.h>
> +#include <poll.h>
> +
> +#include "igt.h"
> +#include "igt_perf.h"
> +
> +IGT_TEST_DESCRIPTION("Test the i915 pmu perf interface");
> +
> +const double tolerance = 0.02f;
> +const unsigned long batch_duration_ns = 1000 * 1000 * 1000 / 2;
> +
> +static void
> +init(int gem_fd, const struct intel_execution_engine *e, uint8_t sample)
> +{
> +	uint64_t config = __I915_PMU_ENGINE(e->class, e->instance, sample);
> +	int fd;
> +
> +	igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
> +
> +	fd = perf_i915_open(config);
> +	igt_assert(fd >= 0);
> +
> +	close(fd);
> +}
> +
> +static uint64_t pmu_read_single(int fd)
> +{
> +	uint64_t data[2];
> +	ssize_t len;
> +
> +	len = read(fd, data, sizeof(data));
> +	igt_assert_eq(len, sizeof(data));
> +
> +	return data[0];
> +}
> +
> +static void pmu_read_multi(int fd, unsigned int num, uint64_t *val)
> +{
> +	uint64_t buf[2 + num];
> +	unsigned int i;
> +	ssize_t len;
> +
> +	len = read(fd, buf, sizeof(buf));
> +	igt_assert_eq(len, sizeof(buf));
> +	for (i = 0; i < num; i++)
> +		val[i] = buf[2 + i];
> +}
> +
> +#define assert_within_epsilon(x, ref, tolerance) \
> +	igt_assert_f((double)(x) <= (1.0 + tolerance) * (double)ref && \
> +		     (double)(x) >= (1.0 - tolerance) * (double)ref, \
> +		     "'%s' != '%s' (%f not within %f%% tolerance of %f)\n",\
> +		     #x, #ref, (double)x, tolerance * 100.0, (double)ref)
> +
> +static void
> +single(int gem_fd, const struct intel_execution_engine *e, bool busy)
> +{
> +	uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance);
> +	double ref = busy ? batch_duration_ns : 0.0f;
> +	igt_spin_t *spin;
> +	uint64_t val;
> +	int fd;
> +
> +	igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
> +
> +	if (busy) {
> +		spin = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
> +		igt_spin_batch_set_timeout(spin, batch_duration_ns);
> +	} else {
> +		usleep(batch_duration_ns / 1000);
> +	}
> +
> +	fd = perf_i915_open(config);
> +	igt_assert(fd >= 0);
> +
> +	if (busy)
> +		gem_sync(gem_fd, spin->handle);
> +
> +	val = pmu_read_single(fd);
> +
> +	assert_within_epsilon(val, ref, tolerance);
> +
> +	if (busy)
> +		igt_spin_batch_free(gem_fd, spin);
> +	close(fd);
> +}
> +
> +static void
> +busy_check_all(int gem_fd, const struct intel_execution_engine *e,
> +	       const unsigned int num_engines)
> +{
> +	const struct intel_execution_engine *e_;
> +	uint64_t val[num_engines];
> +	int fd[2];
> +	igt_spin_t *spin;
> +	unsigned int busy_idx, i;
> +
> +	igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
> +
> +	spin = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
> +	igt_spin_batch_set_timeout(spin, batch_duration_ns);
> +
> +	i = 0;
> +	fd[0] = -1;
> +	for_each_engine_class_instance(fd, e_) {
> +		if (!gem_has_ring(gem_fd, e_->exec_id | e_->flags))
> +			continue;
> +
> +		if (e == e_)
> +			busy_idx = i;
> +
> +		fd[i == 0 ? 0 : 1] =
> +			perf_i915_open_group(I915_PMU_ENGINE_BUSY(e_->class,
> +								  e_->instance),
> +					     fd[0]);
> +		igt_assert(fd[0] > 0);
> +		igt_assert(i == 0 || fd[1] > 0);
> +		i++;
> +	}
> +
> +	gem_sync(gem_fd, spin->handle);
> +
> +	pmu_read_multi(fd[0], num_engines, val);
> +
> +	assert_within_epsilon(val[busy_idx], batch_duration_ns, tolerance);
> +	for (i = 0; i < num_engines; i++) {
> +		if (i == busy_idx)
> +			continue;
> +		assert_within_epsilon(val[i], 0.0f, tolerance);
> +	}
> +
> +	igt_spin_batch_free(gem_fd, spin);
> +	close(fd[0]);
> +}
> +
> +static void
> +two_busy_check_all(int gem_fd, const struct intel_execution_engine *e,
> +		   const unsigned int num_engines)
> +{
> +	const struct intel_execution_engine *e_;
> +	uint64_t val[num_engines];
> +	int fd[2];
> +	igt_spin_t *spin[2];
> +	unsigned int busy_idx[2], i;
> +
> +	igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
> +	igt_assert(e->exec_id != 0 && e->exec_id != I915_EXEC_RENDER);
> +
> +	spin[0] = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
> +	igt_spin_batch_set_timeout(spin[0], batch_duration_ns);
> +
> +	spin[1] = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
> +	igt_spin_batch_set_timeout(spin[1], batch_duration_ns);
> +
> +	i = 0;
> +	fd[0] = -1;
> +	for_each_engine_class_instance(fd, e_) {
> +		unsigned int idx = i == 0 ? 0 : 1;
> +
> +		if (!gem_has_ring(gem_fd, e_->exec_id | e_->flags))
> +			continue;
> +
> +		if (e_->class == I915_ENGINE_CLASS_RENDER && e_->instance == 0)
> +			busy_idx[0] = i;
> +		else if (e == e_)
> +			busy_idx[1] = i;
> +
> +		fd[idx] =
> +			perf_i915_open_group(I915_PMU_ENGINE_BUSY(e_->class,
> +								  e_->instance),
> +					     fd[0]);
> +		igt_assert(fd[idx] > 0);
> +		i++;
> +	}
> +
> +	gem_sync(gem_fd, spin[0]->handle);
> +	gem_sync(gem_fd, spin[1]->handle);
> +
> +	pmu_read_multi(fd[0], num_engines, val);
> +
> +	for (i = 0; i < num_engines; i++) {
> +		if (i == busy_idx[0] || i == busy_idx[1])
> +			assert_within_epsilon(val[i], batch_duration_ns,
> +					      tolerance);
> +		else
> +			assert_within_epsilon(val[i], 0.0f, tolerance);
> +	}
> +
> +	igt_spin_batch_free(gem_fd, spin[0]);
> +	igt_spin_batch_free(gem_fd, spin[1]);
> +	close(fd[0]);
> +}
> +
> +static void
> +no_sema(int gem_fd, const struct intel_execution_engine *e, bool busy)
> +{
> +	igt_spin_t *spin;
> +	uint64_t val[2];
> +	int fd, fd2;
> +
> +	igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
> +
> +	if (busy) {
> +		spin = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
> +		igt_spin_batch_set_timeout(spin, batch_duration_ns);
> +	} else {
> +		usleep(batch_duration_ns / 1000);
> +	}
> +
> +	fd = perf_i915_open_group(I915_PMU_ENGINE_SEMA(e->class, e->instance),
> +				  -1);
> +	igt_assert(fd >= 0);
> +	fd2 = perf_i915_open_group(I915_PMU_ENGINE_WAIT(e->class, e->instance),
> +				  fd);
> +
> +	pmu_read_multi(fd, 2, val);
> +
> +	assert_within_epsilon(val[0], 0.0f, tolerance);
> +	assert_within_epsilon(val[1], 0.0f, tolerance);
> +
> +	if (busy)
> +		igt_spin_batch_free(gem_fd, spin);
> +	close(fd2);
> +	close(fd);
> +}
> +
> +static void
> +multi_client(int gem_fd, const struct intel_execution_engine *e)
> +{
> +	uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance);
> +	igt_spin_t *spin;
> +	uint64_t val[2];
> +	int fd[2];
> +
> +	igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
> +
> +	spin = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
> +	igt_spin_batch_set_timeout(spin, batch_duration_ns);
> +
> +	fd[0] = perf_i915_open(config);
> +	igt_assert(fd[0] >= 0);
> +
> +	usleep(batch_duration_ns / 4000);
> +
> +	fd[1] = perf_i915_open(config);
> +	igt_assert(fd[1] >= 0);
> +
> +	usleep(batch_duration_ns / 3000);
> +
> +	val[1] = pmu_read_single(fd[1]);
> +	close(fd[1]);
> +
> +	gem_sync(gem_fd, spin->handle);
> +
> +	val[0] = pmu_read_single(fd[0]);
> +
> +	assert_within_epsilon(val[0], batch_duration_ns, tolerance);
> +	assert_within_epsilon(val[1], batch_duration_ns / 3, tolerance);
> +
> +	igt_spin_batch_free(gem_fd, spin);
> +	close(fd[0]);
> +}
> +
> +/**
> + * Tests that i915 PMU corectly error out in invalid initialization.
> + * i915 PMU is uncore PMU, thus:
> + *  - sampling period is not supported
> + *  - pid > 0 is not supported since we can't count per-process (we count
> + *    per whole system(
> + *  - cpu != 0 is not supported since i915 PMU exposes cpumask for CPU0
> + */
> +static void invalid_init(void)
> +{
> +	struct perf_event_attr attr;
> +	int pid, cpu;
> +
> +#define ATTR_INIT() \
> +do { \
> +	memset(&attr, 0, sizeof (attr)); \
> +	attr.config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0); \
> +	attr.type = i915_type_id(); \
> +	igt_assert(attr.type != 0); \
> +} while(0)
> +
> +	ATTR_INIT();
> +	attr.sample_period = 100;
> +	pid = -1;
> +	cpu = 0;
> +	igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
> +	igt_assert_eq(errno, EINVAL);
> +
> +	ATTR_INIT();
> +	pid = 0;
> +	cpu = 0;
> +	igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
> +	igt_assert_eq(errno, EINVAL);
> +
> +	ATTR_INIT();
> +	pid = -1;
> +	cpu = 1;
> +	igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
> +	igt_assert_eq(errno, ENODEV);
> +}
> +
> +static void init_other(unsigned int i, bool valid)
> +{
> +	int fd;
> +
> +	fd = perf_i915_open(__I915_PMU_OTHER(i));
> +	igt_require(!(fd < 0 && errno == ENODEV));
> +	if (valid) {
> +		igt_assert(fd >= 0);
> +	} else {
> +		igt_assert(fd < 0);
> +		return;
> +	}
> +
> +	close(fd);
> +}
> +
> +static void read_other(unsigned int i, bool valid)
> +{
> +	int fd;
> +
> +	fd = perf_i915_open(__I915_PMU_OTHER(i));
> +	igt_require(!(fd < 0 && errno == ENODEV));
> +	if (valid) {
> +		igt_assert(fd >= 0);
> +	} else {
> +		igt_assert(fd < 0);
> +		return;
> +	}
> +
> +	(void)pmu_read_single(fd);
> +
> +	close(fd);
> +}
> +
> +static bool cpu0_hotplug_support(void)
> +{
> +	int fd = open("/sys/devices/system/cpu/cpu0/online", O_WRONLY);
> +
> +	close(fd);
> +
> +	return fd > 0;
> +}
> +
> +static uint64_t
> +elapsed_ns(const struct timespec *start, const struct timespec *end)
> +{
> +	return ((end->tv_sec - start->tv_sec) * 1e9 +
> +		(end->tv_nsec - start->tv_nsec));
> +}
> +
> +static void cpu_hotplug(int gem_fd)
> +{
> +	struct timespec start, now;
> +	igt_spin_t *spin;
> +	uint64_t val, ref;
> +	int fd;
> +
> +	igt_require(cpu0_hotplug_support());
> +
> +	spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
> +	fd = perf_i915_open(I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0));
> +	igt_assert(fd >= 0);
> +
> +	clock_gettime(CLOCK_MONOTONIC, &start);
> +
> +	igt_fork(child, 1) {
> +		int cpu = 0;
> +
> +		for (;;) {
> +			char name[128];
> +			int cpufd;
> +
> +			sprintf(name, "/sys/devices/system/cpu/cpu%d/online",
> +				cpu);
> +			cpufd = open(name, O_WRONLY);
> +			if (cpufd == -1) {
> +				igt_assert(cpu > 0);
> +				break;
> +			}
> +			igt_assert_eq(write(cpufd, "0", 2), 2);
> +
> +			usleep(1000 * 1000);
> +
> +			igt_assert_eq(write(cpufd, "1", 2), 2);
> +
> +			close(cpufd);
> +			cpu++;
> +		}
> +	}
> +
> +	igt_waitchildren();
> +
> +	igt_spin_batch_end(spin);
> +	gem_sync(gem_fd, spin->handle);
> +
> +	clock_gettime(CLOCK_MONOTONIC, &now);
> +	val = pmu_read_single(fd);
> +
> +	ref = elapsed_ns(&start, &now);
> +
> +	assert_within_epsilon(val, ref, tolerance);
> +
> +	igt_spin_batch_free(gem_fd, spin);
> +	close(fd);
> +}
> +
> +static void
> +test_interrupts(int gem_fd)
> +{
> +	igt_spin_t *spin;
> +	uint64_t idle, busy, prev;
> +	int fd;
> +
> +	fd = perf_i915_open(I915_PMU_INTERRUPTS);
> +	igt_assert(fd >= 0);
> +
> +	gem_quiescent_gpu(gem_fd);
> +	sleep(2);
> +	prev = pmu_read_single(fd);
> +	usleep(batch_duration_ns / 1000);
> +	idle = pmu_read_single(fd);
> +
> +	igt_assert_eq(idle - prev, 0);
> +
> +	spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
> +	igt_spin_batch_set_timeout(spin, batch_duration_ns);
> +	gem_sync(gem_fd, spin->handle);
> +
> +	busy = pmu_read_single(fd);
> +	igt_assert(busy > idle);
> +
> +	igt_spin_batch_free(gem_fd, spin);
> +	close(fd);
> +}
> +
> +static void
> +test_frequency(int gem_fd)
> +{
> +	igt_spin_t *spin;
> +	uint64_t idle[2], busy[2];
> +	int fd;
> +
> +	fd = perf_i915_open_group(I915_PMU_REQUESTED_FREQUENCY, -1);
> +	igt_assert(fd >= 0);
> +	igt_assert(perf_i915_open_group(I915_PMU_ACTUAL_FREQUENCY, fd) >= 0);
> +
> +	gem_quiescent_gpu(gem_fd);
> +	usleep(batch_duration_ns / 1000);
> +
> +	pmu_read_multi(fd, 2, idle);
> +
> +	spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
> +	igt_spin_batch_set_timeout(spin, batch_duration_ns);
> +	gem_sync(gem_fd, spin->handle);
> +
> +	pmu_read_multi(fd, 2, busy);
> +
> +	igt_assert(busy[0] > idle[0]);
> +	igt_assert(busy[1] > idle[1]);
> +
> +	igt_spin_batch_free(gem_fd, spin);
> +	close(fd);
> +}
> +
> +static void
> +test_rc6(int gem_fd)
> +{
> +	int64_t duration_ns = 500 * 1000 * 1000;
> +	igt_spin_t *spin;
> +	uint64_t idle, busy, prev;
> +	int fd;
> +
> +	fd = perf_i915_open(I915_PMU_RC6_RESIDENCY);
> +	igt_assert(fd >= 0);
> +
> +	gem_quiescent_gpu(gem_fd);
> +	sleep(2);
> +
> +	prev = pmu_read_single(fd);
> +	usleep(duration_ns / 1000);
> +	idle = pmu_read_single(fd);
> +
> +	assert_within_epsilon(idle - prev, duration_ns, tolerance);
> +
> +	spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
> +	igt_spin_batch_set_timeout(spin, duration_ns);
> +
> +	prev = pmu_read_single(fd);
> +
> +	gem_sync(gem_fd, spin->handle);
> +
> +	busy = pmu_read_single(fd);
> +	assert_within_epsilon(busy - prev, 0.0, tolerance);
> +
> +	igt_spin_batch_free(gem_fd, spin);
> +	close(fd);
> +}
> +
> +static void
> +test_rc6p(int gem_fd)
> +{
> +	const unsigned int devid = intel_get_drm_devid(gem_fd);
> +	int64_t duration_ns = 2 * 1000 * 1000 * 1000;
> +	unsigned int num_pmu = 1;
> +	igt_spin_t *spin;
> +	uint64_t idle[3], busy[3], prev[3];
> +	unsigned int i;
> +	int fd, ret;
> +
> +	igt_require(intel_gen(devid) < 8 && !IS_HASWELL(devid));
> +
> +	fd = perf_i915_open_group(I915_PMU_RC6_RESIDENCY, -1);
> +	igt_assert(fd >= 0);
> +
> +	ret = perf_i915_open_group(I915_PMU_RC6p_RESIDENCY, fd);
> +	if (ret > 0) {
> +		num_pmu++;
> +		ret = perf_i915_open_group(I915_PMU_RC6p_RESIDENCY, fd);
> +		if (ret > 0)
> +			num_pmu++;
> +	}
> +
> +	gem_quiescent_gpu(gem_fd);
> +	sleep(2);
> +
> +	pmu_read_multi(fd, num_pmu, prev);
> +	usleep(duration_ns / 1000);
> +	pmu_read_multi(fd, num_pmu, idle);
> +
> +	for (i = 0; i < num_pmu; i++)
> +		assert_within_epsilon(idle[i] - prev[i], duration_ns,
> +				      tolerance);
> +
> +	spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
> +	igt_spin_batch_set_timeout(spin, duration_ns);
> +
> +	pmu_read_multi(fd, num_pmu, prev);
> +
> +	gem_sync(gem_fd, spin->handle);
> +
> +	pmu_read_multi(fd, num_pmu, busy);
> +
> +	for (i = 0; i < num_pmu; i++)
> +		assert_within_epsilon(busy[i] - prev[i], 0.0, tolerance);
> +
> +	igt_spin_batch_free(gem_fd, spin);
> +	close(fd);
> +}
> +
> +igt_main
> +{
> +	const unsigned int num_other_metrics =
> +				I915_PMU_LAST - __I915_PMU_OTHER(0) + 1;
> +	unsigned int num_engines = 0;
> +	int fd = -1;
> +	const struct intel_execution_engine *e;
> +	unsigned int i;
> +
> +	igt_fixture {
> +		fd = drm_open_driver_master(DRIVER_INTEL);
> +
> +		igt_require_gem(fd);
> +		igt_require(i915_type_id() > 0);
> +
> +		for_each_engine_class_instance(fd, e) {
> +			if (gem_has_ring(fd, e->exec_id | e->flags))
> +				num_engines++;
> +		}
> +	}
> +
> +	/**
> +	 * Test invalid access via perf API is rejected.
> +	 */
> +	igt_subtest("invalid-init")
> +		invalid_init();
> +
> +	for_each_engine_class_instance(fd, e) {
> +		/**
> +		 * Test that a single engine metric can be initialized.
> +		 */
> +		igt_subtest_f("init-busy-%s", e->name)
> +			init(fd, e, I915_SAMPLE_BUSY);
> +
> +		igt_subtest_f("init-wait-%s", e->name)
> +			init(fd, e, I915_SAMPLE_WAIT);
> +
> +		igt_subtest_f("init-sema-%s", e->name)
> +			init(fd, e, I915_SAMPLE_SEMA);
> +
> +		/**
> +		 * Test that engines show no load when idle.
> +		 */
> +		igt_subtest_f("idle-%s", e->name)
> +			single(fd, e, false);
> +
> +		/**
> +		 * Test that a single engine reports load correctly.
> +		 */
> +		igt_subtest_f("busy-%s", e->name)
> +			single(fd, e, true);
> +
> +		/**
> +		 * Test that when one engine is loaded other report no load.
> +		 */
> +		igt_subtest_f("busy-check-all-%s", e->name)
> +			busy_check_all(fd, e, num_engines);
> +
> +		/**
> +		 * Test that when two engines are loaded other report no load.
> +		 */
> +		if (!(e->class == I915_ENGINE_CLASS_RENDER && e->instance == 0))
> +			igt_subtest_f("two-busy-check-all-%s", e->name)
> +				two_busy_check_all(fd, e, num_engines);
> +
> +		/**
> +		 * Test that semphore counters report no activity on idle
> +		 * engines.
> +		 */
> +		igt_subtest_f("idle-no-semaphores-%s", e->name)
> +			no_sema(fd, e, false);
> +
> +		igt_subtest_f("busy-no-semaphores-%s", e->name)
> +			no_sema(fd, e, true);
> +
> +		/**
> +		 * Check that two perf clients do not influence each others
> +		 * observations.
> +		 */
> +		igt_subtest_f("multi-client-%s", e->name)
> +			multi_client(fd, e);
> +	}
> +
> +	/**
> +	 * Test that non-engine counters can be initialized and read. Apart
> +	 * from the invalid metric which should fail.
> +	 */
> +	for (i = 0; i < num_other_metrics + 1; i++) {
> +		igt_subtest_f("other-init-%u", i)
> +			init_other(i, i < num_other_metrics);
> +
> +		igt_subtest_f("other-read-%u", i)
> +			read_other(i, i < num_other_metrics);
> +	}
> +
> +	/**
> +	 * Test counters are not affected by CPU offline/online events.
> +	 */
> +	igt_subtest("cpu-hotplug")
> +		cpu_hotplug(fd);
> +
> +	/**
> +	 * Test GPU frequency.
> +	 */
> +	igt_subtest("frequency")
> +		test_frequency(fd);
> +
> +	/**
> +	 * Test interrupt count reporting.
> +	 */
> +	igt_subtest("interrupts")
> +		test_interrupts(fd);
> +
> +	/**
> +	 * Test RC6 residency reporting.
> +	 */
> +	igt_subtest("rc6")
> +		test_rc6(fd);
> +
> +	/**
> +	 * Test RC6p residency reporting.
> +	 */
> +	igt_subtest("rc6p")
> +		test_rc6p(fd);
> +}

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH i-g-t 5/5] tests/perf_pmu: Tests for i915 PMU API
  2017-09-18 21:18   ` Rogozhkin, Dmitry V
@ 2017-09-19  8:19     ` Tvrtko Ursulin
  0 siblings, 0 replies; 15+ messages in thread
From: Tvrtko Ursulin @ 2017-09-19  8:19 UTC (permalink / raw)
  To: Rogozhkin, Dmitry V, tursulin; +Cc: Intel-gfx


On 18/09/2017 22:18, Rogozhkin, Dmitry V wrote:
> Did you try tests on the system with 2 VDBOX engines? On my side 2 tests
> are failing on SKL GT4e NUC:

Nope.

> (perf_pmu:5414) CRITICAL: Test assertion failure function
> busy_check_all, file perf_pmu.c:164:
> (perf_pmu:5414) CRITICAL: Failed assertion: (double)(val[i]) <= (1.0 +
> tolerance) * (double)0.0f && (double)(val[i]) >= (1.0 - tolerance) *
> (double)0.0f
> (perf_pmu:5414) CRITICAL: 'val[i]' != '0.0f' (499984960.000000 not
> within 2.000000% tolerance of 0.000000)
> Subtest two-busy-check-all-bsd: FAIL (0.501s)
> 
> (perf_pmu:5414) CRITICAL: Test assertion failure function
> two_busy_check_all, file perf_pmu.c:221:
> (perf_pmu:5414) CRITICAL: Failed assertion: (double)(val[i]) <= (1.0 +
> tolerance) * (double)0.0f && (double)(val[i]) >= (1.0 - tolerance) *
> (double)0.0f
> (perf_pmu:5414) CRITICAL: 'val[i]' != '0.0f' (499940146.000000 not
> within 2.000000% tolerance of 0.000000)
> Subtest two-busy-check-all-bsd1: FAIL (0.501s)
> 
> I am trying to speculate on the reasons below.
> 
> 
> On Mon, 2017-09-18 at 12:38 +0100, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> A bunch of tests for the new i915 PMU feature.
>>
>> Parts of the code were initialy sketched by Dmitry Rogozhkin.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
>> ---
>>   lib/igt_gt.c           |  23 +-
>>   lib/igt_gt.h           |   8 +
>>   tests/Makefile.sources |   1 +
>>   tests/perf_pmu.c       | 713 +++++++++++++++++++++++++++++++++++++++++++++++++
>>   4 files changed, 738 insertions(+), 7 deletions(-)
>>   create mode 100644 tests/perf_pmu.c
>>
>> diff --git a/lib/igt_gt.c b/lib/igt_gt.c
>> index b3f3b3809eee..102cc2841feb 100644
>> --- a/lib/igt_gt.c
>> +++ b/lib/igt_gt.c
>> @@ -537,14 +537,23 @@ unsigned intel_detect_and_clear_missed_interrupts(int fd)
>>   	return missed;
>>   }
>>   
>> +enum drm_i915_gem_engine_class {
>> +	I915_ENGINE_CLASS_OTHER = 0,
>> +	I915_ENGINE_CLASS_RENDER = 1,
>> +	I915_ENGINE_CLASS_COPY = 2,
>> +	I915_ENGINE_CLASS_VIDEO = 3,
>> +	I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
>> +	I915_ENGINE_CLASS_MAX /* non-ABI */
>> +};
>> +
>>   const struct intel_execution_engine intel_execution_engines[] = {
>> -	{ "default", NULL, 0, 0 },
>> -	{ "render", "rcs0", I915_EXEC_RENDER, 0 },
>> -	{ "bsd", "vcs0", I915_EXEC_BSD, 0 },
>> -	{ "bsd1", "vcs0", I915_EXEC_BSD, 1<<13 /*I915_EXEC_BSD_RING1*/ },
>> -	{ "bsd2", "vcs1", I915_EXEC_BSD, 2<<13 /*I915_EXEC_BSD_RING2*/ },
>> -	{ "blt", "bcs0", I915_EXEC_BLT, 0 },
>> -	{ "vebox", "vecs0", I915_EXEC_VEBOX, 0 },
>> +	{ "default", NULL, -1, -1, 0, 0 },
>> +	{ "render", "rcs0", I915_ENGINE_CLASS_RENDER, 0, I915_EXEC_RENDER, 0 },
>> +	{ "bsd", "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD, 0 },
> With such definition, we will probably detect "bsd" as an engine (as
> well as "bsd1" and "bsd2"), right? As a result, we will run
> two-busy-check-all-bsd for it and according to defined flags we will
> submit workloads to _both_ vcs0 and vcs1 evenly following i915 KMD
> dispatching. Thus, the two-busy-check-all-bsd will fail since it will
> detect a load on 3 engines (rcs0, vcs0, vcs1) instead of 2.
> 
> I am not quite sure why two-busy-check-all-bsd1 fails as well on my
> side? or rather, why it did not fail on your side as well? The only
> explanation I see is that the test thinks "bsd" and "bsd1" are separate
> engines, and, thus, count them as 2. But that should fail on single
> VDBOX system as well... hm...

What makes it fail on GT3+ parts is the fact I915_EXEC_BSD gets 
round-robinned per context.

I need to come up with an elegant solution in the world where we started 
using engine class/instance concepts but don't yet have the 
class-instance execbuf...

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH i-g-t 5/5] tests/perf_pmu: Tests for i915 PMU API
  2017-09-18 13:17   ` Chris Wilson
@ 2017-09-19  8:37     ` Tvrtko Ursulin
  2017-09-19  9:58       ` Chris Wilson
  0 siblings, 1 reply; 15+ messages in thread
From: Tvrtko Ursulin @ 2017-09-19  8:37 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, Intel-gfx


On 18/09/2017 14:17, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2017-09-18 12:38:40)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> A bunch of tests for the new i915 PMU feature.
>>
>> Parts of the code were initialy sketched by Dmitry Rogozhkin.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
>> ---
>>   lib/igt_gt.c           |  23 +-
>>   lib/igt_gt.h           |   8 +
>>   tests/Makefile.sources |   1 +
>>   tests/perf_pmu.c       | 713 +++++++++++++++++++++++++++++++++++++++++++++++++
>>   4 files changed, 738 insertions(+), 7 deletions(-)
>>   create mode 100644 tests/perf_pmu.c
>>
>> diff --git a/lib/igt_gt.c b/lib/igt_gt.c
>> index b3f3b3809eee..102cc2841feb 100644
>> --- a/lib/igt_gt.c
>> +++ b/lib/igt_gt.c
>> @@ -537,14 +537,23 @@ unsigned intel_detect_and_clear_missed_interrupts(int fd)
>>          return missed;
>>   }
>>   
>> +enum drm_i915_gem_engine_class {
>> +       I915_ENGINE_CLASS_OTHER = 0,
>> +       I915_ENGINE_CLASS_RENDER = 1,
>> +       I915_ENGINE_CLASS_COPY = 2,
>> +       I915_ENGINE_CLASS_VIDEO = 3,
>> +       I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
>> +       I915_ENGINE_CLASS_MAX /* non-ABI */
>> +};
>> +
>>   const struct intel_execution_engine intel_execution_engines[] = {
>> -       { "default", NULL, 0, 0 },
>> -       { "render", "rcs0", I915_EXEC_RENDER, 0 },
>> -       { "bsd", "vcs0", I915_EXEC_BSD, 0 },
>> -       { "bsd1", "vcs0", I915_EXEC_BSD, 1<<13 /*I915_EXEC_BSD_RING1*/ },
>> -       { "bsd2", "vcs1", I915_EXEC_BSD, 2<<13 /*I915_EXEC_BSD_RING2*/ },
>> -       { "blt", "bcs0", I915_EXEC_BLT, 0 },
>> -       { "vebox", "vecs0", I915_EXEC_VEBOX, 0 },
>> +       { "default", NULL, -1, -1, 0, 0 },
>> +       { "render", "rcs0", I915_ENGINE_CLASS_RENDER, 0, I915_EXEC_RENDER, 0 },
>> +       { "bsd", "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD, 0 },
>> +       { "bsd1", "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD, 1<<13 /*I915_EXEC_BSD_RING1*/ },
>> +       { "bsd2", "vcs1", I915_ENGINE_CLASS_VIDEO, 1, I915_EXEC_BSD, 2<<13 /*I915_EXEC_BSD_RING2*/ },
>> +       { "blt", "bcs0", I915_ENGINE_CLASS_COPY, 0, I915_EXEC_BLT, 0 },
>> +       { "vebox", "vecs0", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, I915_EXEC_VEBOX, 0 },
>>          { NULL, 0, 0 }
> 
> I was anticipating a new struct for the explicit interface so that we
> can easily phase out the out with its aliasing.

It's definitely buggy as it is as Dmitry has discovered. I'll have a 
think on how to do it elegantly. Too bad we can't piggy back the 
class-instance execbuf to this..

>>   };
>>   
>> diff --git a/lib/igt_gt.h b/lib/igt_gt.h
>> index 2579cbd37be7..436041ce9cc0 100644
>> --- a/lib/igt_gt.h
>> +++ b/lib/igt_gt.h
>> @@ -66,6 +66,8 @@ unsigned intel_detect_and_clear_missed_interrupts(int fd);
>>   extern const struct intel_execution_engine {
>>          const char *name;
>>          const char *full_name;
>> +       int class;
>> +       int instance;
>>          unsigned exec_id;
>>          unsigned flags;
>>   } intel_execution_engines[];
>> @@ -78,6 +80,12 @@ extern const struct intel_execution_engine {
>>               e__++) \
>>                  for_if (gem_has_ring(fd__, flags__ = e__->exec_id | e__->flags))
>>   
>> +#define for_each_engine_class_instance(fd__, e__) \
>> +       for ((e__) = intel_execution_engines;\
>> +            (e__)->name; \
>> +            (e__)++) \
>> +               for_if ((e__)->class > 0)
>> +
>>   bool gem_can_store_dword(int fd, unsigned int engine);
>>   
>>   #endif /* IGT_GT_H */
>> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
>> index cf542df181a8..4bab6247151c 100644
>> --- a/tests/Makefile.sources
>> +++ b/tests/Makefile.sources
>> @@ -217,6 +217,7 @@ TESTS_progs = \
>>          kms_vblank \
>>          meta_test \
>>          perf \
>> +       perf_pmu \
>>          pm_backlight \
>>          pm_lpsp \
>>          pm_rc6_residency \
>> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
>> new file mode 100644
>> index 000000000000..2dbee586dacc
>> --- /dev/null
>> +++ b/tests/perf_pmu.c
>> @@ -0,0 +1,713 @@
>> +/*
>> + * Copyright © 2017 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#include <stdlib.h>
>> +#include <stdio.h>
>> +#include <string.h>
>> +#include <fcntl.h>
>> +#include <inttypes.h>
>> +#include <errno.h>
>> +#include <sys/stat.h>
>> +#include <sys/time.h>
>> +#include <sys/times.h>
>> +#include <sys/types.h>
>> +#include <dirent.h>
>> +#include <time.h>
>> +#include <poll.h>
>> +
>> +#include "igt.h"
>> +#include "igt_perf.h"
>> +
>> +IGT_TEST_DESCRIPTION("Test the i915 pmu perf interface");
>> +
>> +const double tolerance = 0.02f;
>> +const unsigned long batch_duration_ns = 1000 * 1000 * 1000 / 2;
>> +
>> +static void
>> +init(int gem_fd, const struct intel_execution_engine *e, uint8_t sample)
>> +{
>> +       uint64_t config = __I915_PMU_ENGINE(e->class, e->instance, sample);
>> +       int fd;
>> +
>> +       igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
> 
> gem_require_ring()

Missed the existance of it.


> 
>> +
>> +       fd = perf_i915_open(config);
> 
> Although the kernel interface is the authority.
> 
> So this should be igt_require, and igt_assert(has_ring);

Don't get what you are saying, igt_require(has_ring) followed by 
igt_assert(has_ring)??

> 
>> +       igt_assert(fd >= 0);
>> +
>> +       close(fd);
>> +}
>> +
>> +static uint64_t pmu_read_single(int fd)
>> +{
>> +       uint64_t data[2];
>> +       ssize_t len;
>> +
>> +       len = read(fd, data, sizeof(data));
> 
> Perf is a datagram api, right? A short read gives what you asked for and
> discards the rest of the packet, iirc.

Nope, I've noticed overlay was failing due that assumption and even 
traced the code in core perf which fails short reads. Hence the patch in 
this series to fix overlay in that respect.

> 
>> +       igt_assert_eq(len, sizeof(data));
>> +
>> +       return data[0];
>> +}
>> +
>> +static void pmu_read_multi(int fd, unsigned int num, uint64_t *val)
>> +{
>> +       uint64_t buf[2 + num];
>> +       unsigned int i;
>> +       ssize_t len;
>> +
>> +       len = read(fd, buf, sizeof(buf));
>> +       igt_assert_eq(len, sizeof(buf));
>> +       for (i = 0; i < num; i++)
>> +               val[i] = buf[2 + i];
>> +}
>> +
>> +#define assert_within_epsilon(x, ref, tolerance) \
>> +       igt_assert_f((double)(x) <= (1.0 + tolerance) * (double)ref && \
>> +                    (double)(x) >= (1.0 - tolerance) * (double)ref, \
>> +                    "'%s' != '%s' (%f not within %f%% tolerance of %f)\n",\
>> +                    #x, #ref, (double)x, tolerance * 100.0, (double)ref)
>> +
>> +static void
>> +single(int gem_fd, const struct intel_execution_engine *e, bool busy)
>> +{
>> +       uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance);
>> +       double ref = busy ? batch_duration_ns : 0.0f;
>> +       igt_spin_t *spin;
>> +       uint64_t val;
>> +       int fd;
>> +
>> +       igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
>> +
>> +       if (busy) {
>> +               spin = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
>> +               igt_spin_batch_set_timeout(spin, batch_duration_ns);
>> +       } else {
>> +               usleep(batch_duration_ns / 1000);
>> +       }
>> +
>> +       fd = perf_i915_open(config);
>> +       igt_assert(fd >= 0);
>> +
>> +       if (busy)
>> +               gem_sync(gem_fd, spin->handle);
>> +
>> +       val = pmu_read_single(fd);
>> +
>> +       assert_within_epsilon(val, ref, tolerance);
>> +
>> +       if (busy)
>> +               igt_spin_batch_free(gem_fd, spin);
>> +       close(fd);
>> +}
>> +
>> +static void
>> +busy_check_all(int gem_fd, const struct intel_execution_engine *e,
> 
> busy_check_others
> 
> busy_check_all I would expect to be checking that all engines are
> correctly recorded as being busy at the same time. And there should also
> be permutations of (busy, idle, wait) across the engines.

I can do that, sure. But it is checking all engines, just some for 100% 
busy, and some for 100% idle. :) Naming it other would then be not 
correct either.

> 
>> +              const unsigned int num_engines)
>> +{
>> +       const struct intel_execution_engine *e_;
>> +       uint64_t val[num_engines];
>> +       int fd[2];
>> +       igt_spin_t *spin;
>> +       unsigned int busy_idx, i;
>> +
>> +       igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
>> +
>> +       spin = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
>> +       igt_spin_batch_set_timeout(spin, batch_duration_ns);
>> +
>> +       i = 0;
>> +       fd[0] = -1;
>> +       for_each_engine_class_instance(fd, e_) {
>> +               if (!gem_has_ring(gem_fd, e_->exec_id | e_->flags))
>> +                       continue;
>> +
>> +               if (e == e_)
>> +                       busy_idx = i;
>> +
>> +               fd[i == 0 ? 0 : 1] =
>> +                       perf_i915_open_group(I915_PMU_ENGINE_BUSY(e_->class,
>> +                                                                 e_->instance),
>> +                                            fd[0]);
>> +               igt_assert(fd[0] > 0);
>> +               igt_assert(i == 0 || fd[1] > 0);
>> +               i++;
>> +       }
>> +
>> +       gem_sync(gem_fd, spin->handle);
>> +
>> +       pmu_read_multi(fd[0], num_engines, val);
>> +
>> +       assert_within_epsilon(val[busy_idx], batch_duration_ns, tolerance);
>> +       for (i = 0; i < num_engines; i++) {
>> +               if (i == busy_idx)
>> +                       continue;
>> +               assert_within_epsilon(val[i], 0.0f, tolerance);
>> +       }
>> +
>> +       igt_spin_batch_free(gem_fd, spin);
>> +       close(fd[0]);
>> +}
> 
>> +static void
>> +no_sema(int gem_fd, const struct intel_execution_engine *e, bool busy)
> 
> This is just the sanity check half of the sema test.
> 
> No wait, no queued?

Forgot about queued completely!

And semaphores I left for later. I don't have any <gen9 machines to play 
with them locally.

> 
>> +static void
>> +multi_client(int gem_fd, const struct intel_execution_engine *e)
>> +{
>> +       uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance);
>> +       igt_spin_t *spin;
>> +       uint64_t val[2];
>> +       int fd[2];
>> +
>> +       igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
>> +
>> +       spin = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
>> +       igt_spin_batch_set_timeout(spin, batch_duration_ns);
>> +
>> +       fd[0] = perf_i915_open(config);
>> +       igt_assert(fd[0] >= 0);
>> +
>> +       usleep(batch_duration_ns / 4000);
>> +
>> +       fd[1] = perf_i915_open(config);
>> +       igt_assert(fd[1] >= 0);
>> +
>> +       usleep(batch_duration_ns / 3000);
>> +
>> +       val[1] = pmu_read_single(fd[1]);
>> +       close(fd[1]);
>> +
>> +       gem_sync(gem_fd, spin->handle);
>> +
>> +       val[0] = pmu_read_single(fd[0]);
>> +
>> +       assert_within_epsilon(val[0], batch_duration_ns, tolerance);
>> +       assert_within_epsilon(val[1], batch_duration_ns / 3, tolerance);
>> +
>> +       igt_spin_batch_free(gem_fd, spin);
>> +       close(fd[0]);
>> +}
> 

Forgot to comment or changed your mind?

>> +static void cpu_hotplug(int gem_fd)
>> +{
>> +       struct timespec start, now;
>> +       igt_spin_t *spin;
>> +       uint64_t val, ref;
>> +       int fd;
>> +
>> +       igt_require(cpu0_hotplug_support());
>> +
>> +       spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
>> +       fd = perf_i915_open(I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0));
>> +       igt_assert(fd >= 0);
>> +
>> +       clock_gettime(CLOCK_MONOTONIC, &start);
>> +
>> +       igt_fork(child, 1) {
>> +               int cpu = 0;
>> +
>> +               for (;;) {
>> +                       char name[128];
>> +                       int cpufd;
>> +
>> +                       sprintf(name, "/sys/devices/system/cpu/cpu%d/online",
>> +                               cpu);
>> +                       cpufd = open(name, O_WRONLY);
>> +                       if (cpufd == -1) {
>> +                               igt_assert(cpu > 0);
>> +                               break;
>> +                       }
>> +                       igt_assert_eq(write(cpufd, "0", 2), 2);
>> +
>> +                       usleep(1000 * 1000);
>> +
>> +                       igt_assert_eq(write(cpufd, "1", 2), 2);
>> +
>> +                       close(cpufd);
>> +                       cpu++;
>> +               }
>> +       }
>> +
>> +       igt_waitchildren();
>> +
>> +       igt_spin_batch_end(spin);
>> +       gem_sync(gem_fd, spin->handle);
>> +
>> +       clock_gettime(CLOCK_MONOTONIC, &now);
> 
> Did we ever export the igt routines for probing supported clocks?
> In this case, this fits into igt_nsec_elapsed.

Did not spot this one either.

> 
>> +       val = pmu_read_single(fd);
>> +
>> +       ref = elapsed_ns(&start, &now);
>> +
>> +       assert_within_epsilon(val, ref, tolerance);
>> +
>> +       igt_spin_batch_free(gem_fd, spin);
>> +       close(fd);
>> +}
>> +
>> +static void
>> +test_interrupts(int gem_fd)
>> +{
>> +       igt_spin_t *spin;
>> +       uint64_t idle, busy, prev;
>> +       int fd;
>> +
>> +       fd = perf_i915_open(I915_PMU_INTERRUPTS);
>> +       igt_assert(fd >= 0);
>> +
>> +       gem_quiescent_gpu(gem_fd);
>> +       sleep(2);
>> +       prev = pmu_read_single(fd);
>> +       usleep(batch_duration_ns / 1000);
>> +       idle = pmu_read_single(fd);
>> +
>> +       igt_assert_eq(idle - prev, 0);
>> +
>> +       spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
>> +       igt_spin_batch_set_timeout(spin, batch_duration_ns);
>> +       gem_sync(gem_fd, spin->handle);
> 
> There's no guaranteed interrupt here.

Hm yes.. bugger. Use fences instead of spin batch to ensure some? Or 
extend spin batch API to support fences?

> 
>> +
>> +       busy = pmu_read_single(fd);
>> +       igt_assert(busy > idle);
>> +
>> +       igt_spin_batch_free(gem_fd, spin);
>> +       close(fd);
>> +}
>> +
>> +static void
>> +test_frequency(int gem_fd)
>> +{
>> +       igt_spin_t *spin;
>> +       uint64_t idle[2], busy[2];
>> +       int fd;
>> +
>> +       fd = perf_i915_open_group(I915_PMU_REQUESTED_FREQUENCY, -1);
>> +       igt_assert(fd >= 0);
> 
> Ask the kernel if it is supported.

Yep.

> 
>> +       igt_assert(perf_i915_open_group(I915_PMU_ACTUAL_FREQUENCY, fd) >= 0);
>> +
>> +       gem_quiescent_gpu(gem_fd);
>> +       usleep(batch_duration_ns / 1000);
>> +
>> +       pmu_read_multi(fd, 2, idle);
>> +
>> +       spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
>> +       igt_spin_batch_set_timeout(spin, batch_duration_ns);
>> +       gem_sync(gem_fd, spin->handle);
>> +
>> +       pmu_read_multi(fd, 2, busy);
>> +
>> +       igt_assert(busy[0] > idle[0]);
>> +       igt_assert(busy[1] > idle[1]);
> 
> Nothing guarantees busy[1] changes, it is hw/fw dependent.
> busy[0] depends on user config.

Do we reasonably expect IGT to be ran in such environments? Or change 
this to not expect a change but just compare against debugfs?

> 
>> +
>> +       igt_spin_batch_free(gem_fd, spin);
>> +       close(fd);
>> +}
>> +
> 
>> +static void
>> +test_rc6p(int gem_fd)
>> +{
>> +       const unsigned int devid = intel_get_drm_devid(gem_fd);
>> +       int64_t duration_ns = 2 * 1000 * 1000 * 1000;
>> +       unsigned int num_pmu = 1;
>> +       igt_spin_t *spin;
>> +       uint64_t idle[3], busy[3], prev[3];
>> +       unsigned int i;
>> +       int fd, ret;
>> +
>> +       igt_require(intel_gen(devid) < 8 && !IS_HASWELL(devid));
> 
> Ask the kernel. (Applies equally to rc6, rc6p).

What is the way to do this? Don't see these in get_param.

> No rc6pp testing?

Copy and paste error.

>> +
>> +       fd = perf_i915_open_group(I915_PMU_RC6_RESIDENCY, -1);
>> +       igt_assert(fd >= 0);
>> +
>> +       ret = perf_i915_open_group(I915_PMU_RC6p_RESIDENCY, fd);
>> +       if (ret > 0) {
>> +               num_pmu++;
>> +               ret = perf_i915_open_group(I915_PMU_RC6p_RESIDENCY, fd);
>> +               if (ret > 0)
>> +                       num_pmu++;
>> +       }
>> +
>> +       gem_quiescent_gpu(gem_fd);
>> +       sleep(2);
>> +
>> +       pmu_read_multi(fd, num_pmu, prev);
>> +       usleep(duration_ns / 1000);
>> +       pmu_read_multi(fd, num_pmu, idle);
>> +
>> +       for (i = 0; i < num_pmu; i++)
>> +               assert_within_epsilon(idle[i] - prev[i], duration_ns,
>> +                                     tolerance);
>> +
>> +       spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
>> +       igt_spin_batch_set_timeout(spin, duration_ns);
> 
> Are we sure the GPU isn't allowed to sleep? i915_user_forcewake we
> expect to keep the GPU out of rc6.

I was sure, but was I wrong? :)

Regardless, replacing spin batch with a forcewake sounds simpler so I 
can do that.

> 
>> +igt_main
>> +{
>> +       const unsigned int num_other_metrics =
>> +                               I915_PMU_LAST - __I915_PMU_OTHER(0) + 1;
>> +       unsigned int num_engines = 0;
>> +       int fd = -1;
>> +       const struct intel_execution_engine *e;
>> +       unsigned int i;
>> +
>> +       igt_fixture {
>> +               fd = drm_open_driver_master(DRIVER_INTEL);
>> +
>> +               igt_require_gem(fd);
>> +               igt_require(i915_type_id() > 0);
>> +
>> +               for_each_engine_class_instance(fd, e) {
>> +                       if (gem_has_ring(fd, e->exec_id | e->flags))
>> +                               num_engines++;
>> +               }
>> +       }
>> +
>> +       /**
>> +        * Test invalid access via perf API is rejected.
>> +        */
> 
> ARGH. No comments on the intentions of the code?

Will add.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✓ Fi.CI.IGT: success for IGT PMU support (rev2)
  2017-09-18 11:38 [PATCH i-g-t 0/5] IGT PMU support Tvrtko Ursulin
                   ` (5 preceding siblings ...)
  2017-09-18 18:16 ` ✓ Fi.CI.BAT: success for IGT PMU support (rev2) Patchwork
@ 2017-09-19  9:44 ` Patchwork
  2017-09-20 16:52 ` ✗ Fi.CI.BAT: warning for IGT PMU support (rev3) Patchwork
  7 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2017-09-19  9:44 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: IGT PMU support (rev2)
URL   : https://patchwork.freedesktop.org/series/28253/
State : success

== Summary ==

Test kms_flip:
        Subgroup plain-flip-fb-recreate:
                fail       -> PASS       (shard-hsw) fdo#102504
Test gem_eio:
        Subgroup in-flight:
                pass       -> DMESG-FAIL (shard-hsw) fdo#102616
Test drv_module_reload:
        Subgroup basic-reload-inject:
                dmesg-warn -> PASS       (shard-hsw) fdo#102707

fdo#102504 https://bugs.freedesktop.org/show_bug.cgi?id=102504
fdo#102616 https://bugs.freedesktop.org/show_bug.cgi?id=102616
fdo#102707 https://bugs.freedesktop.org/show_bug.cgi?id=102707

shard-hsw        total:2392 pass:1244 dwarn:0   dfail:1   fail:13  skip:1134 time:9506s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_194/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH i-g-t 5/5] tests/perf_pmu: Tests for i915 PMU API
  2017-09-19  8:37     ` Tvrtko Ursulin
@ 2017-09-19  9:58       ` Chris Wilson
  0 siblings, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2017-09-19  9:58 UTC (permalink / raw)
  To: Tvrtko Ursulin, Tvrtko Ursulin, Intel-gfx

Quoting Tvrtko Ursulin (2017-09-19 09:37:35)
> 
> On 18/09/2017 14:17, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2017-09-18 12:38:40)
> >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >> A bunch of tests for the new i915 PMU feature.
> >>
> >> Parts of the code were initialy sketched by Dmitry Rogozhkin.
> >>
> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
> >> ---
> >>   lib/igt_gt.c           |  23 +-
> >>   lib/igt_gt.h           |   8 +
> >>   tests/Makefile.sources |   1 +
> >>   tests/perf_pmu.c       | 713 +++++++++++++++++++++++++++++++++++++++++++++++++
> >>   4 files changed, 738 insertions(+), 7 deletions(-)
> >>   create mode 100644 tests/perf_pmu.c
> >>
> >> diff --git a/lib/igt_gt.c b/lib/igt_gt.c
> >> index b3f3b3809eee..102cc2841feb 100644
> >> --- a/lib/igt_gt.c
> >> +++ b/lib/igt_gt.c
> >> @@ -537,14 +537,23 @@ unsigned intel_detect_and_clear_missed_interrupts(int fd)
> >>          return missed;
> >>   }
> >>   
> >> +enum drm_i915_gem_engine_class {
> >> +       I915_ENGINE_CLASS_OTHER = 0,
> >> +       I915_ENGINE_CLASS_RENDER = 1,
> >> +       I915_ENGINE_CLASS_COPY = 2,
> >> +       I915_ENGINE_CLASS_VIDEO = 3,
> >> +       I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
> >> +       I915_ENGINE_CLASS_MAX /* non-ABI */
> >> +};
> >> +
> >>   const struct intel_execution_engine intel_execution_engines[] = {
> >> -       { "default", NULL, 0, 0 },
> >> -       { "render", "rcs0", I915_EXEC_RENDER, 0 },
> >> -       { "bsd", "vcs0", I915_EXEC_BSD, 0 },
> >> -       { "bsd1", "vcs0", I915_EXEC_BSD, 1<<13 /*I915_EXEC_BSD_RING1*/ },
> >> -       { "bsd2", "vcs1", I915_EXEC_BSD, 2<<13 /*I915_EXEC_BSD_RING2*/ },
> >> -       { "blt", "bcs0", I915_EXEC_BLT, 0 },
> >> -       { "vebox", "vecs0", I915_EXEC_VEBOX, 0 },
> >> +       { "default", NULL, -1, -1, 0, 0 },
> >> +       { "render", "rcs0", I915_ENGINE_CLASS_RENDER, 0, I915_EXEC_RENDER, 0 },
> >> +       { "bsd", "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD, 0 },
> >> +       { "bsd1", "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD, 1<<13 /*I915_EXEC_BSD_RING1*/ },
> >> +       { "bsd2", "vcs1", I915_ENGINE_CLASS_VIDEO, 1, I915_EXEC_BSD, 2<<13 /*I915_EXEC_BSD_RING2*/ },
> >> +       { "blt", "bcs0", I915_ENGINE_CLASS_COPY, 0, I915_EXEC_BLT, 0 },
> >> +       { "vebox", "vecs0", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, I915_EXEC_VEBOX, 0 },
> >>          { NULL, 0, 0 }
> > 
> > I was anticipating a new struct for the explicit interface so that we
> > can easily phase out the out with its aliasing.
> 
> It's definitely buggy as it is as Dmitry has discovered. I'll have a 
> think on how to do it elegantly. Too bad we can't piggy back the 
> class-instance execbuf to this..

Well, I am hoping that it will be designed to slot into that interface. :)
Then we start going through the tests deciding which are covering ABI
and so need exercise on both, and which are designed to exercise
internal/hw paths and so only need to be run on specific engines and not
all aliases.
 
> >>   };
> >>   
> >> diff --git a/lib/igt_gt.h b/lib/igt_gt.h
> >> index 2579cbd37be7..436041ce9cc0 100644
> >> --- a/lib/igt_gt.h
> >> +++ b/lib/igt_gt.h
> >> @@ -66,6 +66,8 @@ unsigned intel_detect_and_clear_missed_interrupts(int fd);
> >>   extern const struct intel_execution_engine {
> >>          const char *name;
> >>          const char *full_name;
> >> +       int class;
> >> +       int instance;
> >>          unsigned exec_id;
> >>          unsigned flags;
> >>   } intel_execution_engines[];
> >> @@ -78,6 +80,12 @@ extern const struct intel_execution_engine {
> >>               e__++) \
> >>                  for_if (gem_has_ring(fd__, flags__ = e__->exec_id | e__->flags))
> >>   
> >> +#define for_each_engine_class_instance(fd__, e__) \
> >> +       for ((e__) = intel_execution_engines;\
> >> +            (e__)->name; \
> >> +            (e__)++) \
> >> +               for_if ((e__)->class > 0)
> >> +
> >>   bool gem_can_store_dword(int fd, unsigned int engine);
> >>   
> >>   #endif /* IGT_GT_H */
> >> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> >> index cf542df181a8..4bab6247151c 100644
> >> --- a/tests/Makefile.sources
> >> +++ b/tests/Makefile.sources
> >> @@ -217,6 +217,7 @@ TESTS_progs = \
> >>          kms_vblank \
> >>          meta_test \
> >>          perf \
> >> +       perf_pmu \
> >>          pm_backlight \
> >>          pm_lpsp \
> >>          pm_rc6_residency \
> >> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> >> new file mode 100644
> >> index 000000000000..2dbee586dacc
> >> --- /dev/null
> >> +++ b/tests/perf_pmu.c
> >> @@ -0,0 +1,713 @@
> >> +/*
> >> + * Copyright © 2017 Intel Corporation
> >> + *
> >> + * Permission is hereby granted, free of charge, to any person obtaining a
> >> + * copy of this software and associated documentation files (the "Software"),
> >> + * to deal in the Software without restriction, including without limitation
> >> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> >> + * and/or sell copies of the Software, and to permit persons to whom the
> >> + * Software is furnished to do so, subject to the following conditions:
> >> + *
> >> + * The above copyright notice and this permission notice (including the next
> >> + * paragraph) shall be included in all copies or substantial portions of the
> >> + * Software.
> >> + *
> >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> >> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> >> + * IN THE SOFTWARE.
> >> + *
> >> + */
> >> +
> >> +#include <stdlib.h>
> >> +#include <stdio.h>
> >> +#include <string.h>
> >> +#include <fcntl.h>
> >> +#include <inttypes.h>
> >> +#include <errno.h>
> >> +#include <sys/stat.h>
> >> +#include <sys/time.h>
> >> +#include <sys/times.h>
> >> +#include <sys/types.h>
> >> +#include <dirent.h>
> >> +#include <time.h>
> >> +#include <poll.h>
> >> +
> >> +#include "igt.h"
> >> +#include "igt_perf.h"
> >> +
> >> +IGT_TEST_DESCRIPTION("Test the i915 pmu perf interface");
> >> +
> >> +const double tolerance = 0.02f;
> >> +const unsigned long batch_duration_ns = 1000 * 1000 * 1000 / 2;
> >> +
> >> +static void
> >> +init(int gem_fd, const struct intel_execution_engine *e, uint8_t sample)
> >> +{
> >> +       uint64_t config = __I915_PMU_ENGINE(e->class, e->instance, sample);
> >> +       int fd;
> >> +
> >> +       igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
> > 
> > gem_require_ring()
> 
> Missed the existance of it.
> 
> 
> > 
> >> +
> >> +       fd = perf_i915_open(config);
> > 
> > Although the kernel interface is the authority.
> > 
> > So this should be igt_require, and igt_assert(has_ring);
> 
> Don't get what you are saying, igt_require(has_ring) followed by 
> igt_assert(has_ring)??

I'm saying the perf_i915_open() knows better than the test when it is
allowed to be run. i.e. don't second guess by preceding it with
gem_require_ring(), but assert afterwards that the result matches
expectation.

> > 
> >> +       igt_assert(fd >= 0);
> >> +
> >> +       close(fd);
> >> +}
> >> +
> >> +static uint64_t pmu_read_single(int fd)
> >> +{
> >> +       uint64_t data[2];
> >> +       ssize_t len;
> >> +
> >> +       len = read(fd, data, sizeof(data));
> > 
> > Perf is a datagram api, right? A short read gives what you asked for and
> > discards the rest of the packet, iirc.
> 
> Nope, I've noticed overlay was failing due that assumption and even 
> traced the code in core perf which fails short reads. Hence the patch in 
> this series to fix overlay in that respect.

You now understand why that bug exists ;) Ta.

> 
> > 
> >> +       igt_assert_eq(len, sizeof(data));
> >> +
> >> +       return data[0];
> >> +}
> >> +
> >> +static void pmu_read_multi(int fd, unsigned int num, uint64_t *val)
> >> +{
> >> +       uint64_t buf[2 + num];
> >> +       unsigned int i;
> >> +       ssize_t len;
> >> +
> >> +       len = read(fd, buf, sizeof(buf));
> >> +       igt_assert_eq(len, sizeof(buf));
> >> +       for (i = 0; i < num; i++)
> >> +               val[i] = buf[2 + i];
> >> +}
> >> +
> >> +#define assert_within_epsilon(x, ref, tolerance) \
> >> +       igt_assert_f((double)(x) <= (1.0 + tolerance) * (double)ref && \
> >> +                    (double)(x) >= (1.0 - tolerance) * (double)ref, \
> >> +                    "'%s' != '%s' (%f not within %f%% tolerance of %f)\n",\
> >> +                    #x, #ref, (double)x, tolerance * 100.0, (double)ref)
> >> +
> >> +static void
> >> +single(int gem_fd, const struct intel_execution_engine *e, bool busy)
> >> +{
> >> +       uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance);
> >> +       double ref = busy ? batch_duration_ns : 0.0f;
> >> +       igt_spin_t *spin;
> >> +       uint64_t val;
> >> +       int fd;
> >> +
> >> +       igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
> >> +
> >> +       if (busy) {
> >> +               spin = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
> >> +               igt_spin_batch_set_timeout(spin, batch_duration_ns);
> >> +       } else {
> >> +               usleep(batch_duration_ns / 1000);
> >> +       }
> >> +
> >> +       fd = perf_i915_open(config);
> >> +       igt_assert(fd >= 0);
> >> +
> >> +       if (busy)
> >> +               gem_sync(gem_fd, spin->handle);
> >> +
> >> +       val = pmu_read_single(fd);
> >> +
> >> +       assert_within_epsilon(val, ref, tolerance);
> >> +
> >> +       if (busy)
> >> +               igt_spin_batch_free(gem_fd, spin);
> >> +       close(fd);
> >> +}
> >> +
> >> +static void
> >> +busy_check_all(int gem_fd, const struct intel_execution_engine *e,
> > 
> > busy_check_others
> > 
> > busy_check_all I would expect to be checking that all engines are
> > correctly recorded as being busy at the same time. And there should also
> > be permutations of (busy, idle, wait) across the engines.
> 
> I can do that, sure. But it is checking all engines, just some for 100% 
> busy, and some for 100% idle. :) Naming it other would then be not 
> correct either.

Definitely like the idea of doing odd-one-out testing for both busy and
idle (1 busy, N-1 idle; 1 idle, N-1 busy) across all engines. I don't
expect to get any more insight from the other permutations. 2N passes
with say 100us batches isn't going to be an issue.

Do we have a resolution test? With your execlists interface, you should
get precise timings for even nop batches. But we don't expose the lower
limit on accuracy for legacy, do we?

> >> +              const unsigned int num_engines)
> >> +{
> >> +       const struct intel_execution_engine *e_;
> >> +       uint64_t val[num_engines];
> >> +       int fd[2];
> >> +       igt_spin_t *spin;
> >> +       unsigned int busy_idx, i;
> >> +
> >> +       igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
> >> +
> >> +       spin = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
> >> +       igt_spin_batch_set_timeout(spin, batch_duration_ns);
> >> +
> >> +       i = 0;
> >> +       fd[0] = -1;
> >> +       for_each_engine_class_instance(fd, e_) {
> >> +               if (!gem_has_ring(gem_fd, e_->exec_id | e_->flags))
> >> +                       continue;
> >> +
> >> +               if (e == e_)
> >> +                       busy_idx = i;
> >> +
> >> +               fd[i == 0 ? 0 : 1] =
> >> +                       perf_i915_open_group(I915_PMU_ENGINE_BUSY(e_->class,
> >> +                                                                 e_->instance),
> >> +                                            fd[0]);
> >> +               igt_assert(fd[0] > 0);
> >> +               igt_assert(i == 0 || fd[1] > 0);
> >> +               i++;
> >> +       }
> >> +
> >> +       gem_sync(gem_fd, spin->handle);
> >> +
> >> +       pmu_read_multi(fd[0], num_engines, val);
> >> +
> >> +       assert_within_epsilon(val[busy_idx], batch_duration_ns, tolerance);
> >> +       for (i = 0; i < num_engines; i++) {
> >> +               if (i == busy_idx)
> >> +                       continue;
> >> +               assert_within_epsilon(val[i], 0.0f, tolerance);
> >> +       }
> >> +
> >> +       igt_spin_batch_free(gem_fd, spin);
> >> +       close(fd[0]);
> >> +}
> > 
> >> +static void
> >> +no_sema(int gem_fd, const struct intel_execution_engine *e, bool busy)
> > 
> > This is just the sanity check half of the sema test.
> > 
> > No wait, no queued?
> 
> Forgot about queued completely!
> 
> And semaphores I left for later. I don't have any <gen9 machines to play 
> with them locally.

You can have an ivb celery with my sympathies!

> >> +static void
> >> +multi_client(int gem_fd, const struct intel_execution_engine *e)
> >> +{
> >> +       uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance);
> >> +       igt_spin_t *spin;
> >> +       uint64_t val[2];
> >> +       int fd[2];
> >> +
> >> +       igt_require(gem_has_ring(gem_fd, e->exec_id | e->flags));
> >> +
> >> +       spin = igt_spin_batch_new(gem_fd, 0, e->exec_id | e->flags, 0);
> >> +       igt_spin_batch_set_timeout(spin, batch_duration_ns);
> >> +
> >> +       fd[0] = perf_i915_open(config);
> >> +       igt_assert(fd[0] >= 0);
> >> +
> >> +       usleep(batch_duration_ns / 4000);
> >> +
> >> +       fd[1] = perf_i915_open(config);
> >> +       igt_assert(fd[1] >= 0);
> >> +
> >> +       usleep(batch_duration_ns / 3000);
> >> +
> >> +       val[1] = pmu_read_single(fd[1]);
> >> +       close(fd[1]);
> >> +
> >> +       gem_sync(gem_fd, spin->handle);
> >> +
> >> +       val[0] = pmu_read_single(fd[0]);
> >> +
> >> +       assert_within_epsilon(val[0], batch_duration_ns, tolerance);
> >> +       assert_within_epsilon(val[1], batch_duration_ns / 3, tolerance);
> >> +
> >> +       igt_spin_batch_free(gem_fd, spin);
> >> +       close(fd[0]);
> >> +}
> > 
> 
> Forgot to comment or changed your mind?

Partly, I think I was reading backwards and was going to comment on
igt_require(gem_has_ring()), and the other part thinks I left it here to
read again afterwards.

Hmm. I don't trust usleep() to be accurate. Tolerance is 2%. Feel safer
if you wrapped usleep() with clock_gettime / igt_nsecs_elapsed.

> >> +static void
> >> +test_interrupts(int gem_fd)
> >> +{
> >> +       igt_spin_t *spin;
> >> +       uint64_t idle, busy, prev;
> >> +       int fd;
> >> +
> >> +       fd = perf_i915_open(I915_PMU_INTERRUPTS);
> >> +       igt_assert(fd >= 0);
> >> +
> >> +       gem_quiescent_gpu(gem_fd);
> >> +       sleep(2);
> >> +       prev = pmu_read_single(fd);
> >> +       usleep(batch_duration_ns / 1000);
> >> +       idle = pmu_read_single(fd);
> >> +
> >> +       igt_assert_eq(idle - prev, 0);
> >> +
> >> +       spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
> >> +       igt_spin_batch_set_timeout(spin, batch_duration_ns);
> >> +       gem_sync(gem_fd, spin->handle);
> > 
> > There's no guaranteed interrupt here.
> 
> Hm yes.. bugger. Use fences instead of spin batch to ensure some? Or 
> extend spin batch API to support fences?

Limit to execlists and do a context-switch? Oh, that reminds me, knowing
context-switch per engine per second would also be useful.

(As would isolating the wait times for flips.)

Oh, and if we really wanted to be nasty, MI_USER_INTERRUPT from a batch.
It wouldn't break the kernel, but I expect we might need a secure batch.

> >> +
> >> +       busy = pmu_read_single(fd);
> >> +       igt_assert(busy > idle);
> >> +
> >> +       igt_spin_batch_free(gem_fd, spin);
> >> +       close(fd);
> >> +}
> >> +
> >> +static void
> >> +test_frequency(int gem_fd)
> >> +{
> >> +       igt_spin_t *spin;
> >> +       uint64_t idle[2], busy[2];
> >> +       int fd;
> >> +
> >> +       fd = perf_i915_open_group(I915_PMU_REQUESTED_FREQUENCY, -1);
> >> +       igt_assert(fd >= 0);
> > 
> > Ask the kernel if it is supported.
> 
> Yep.
> 
> > 
> >> +       igt_assert(perf_i915_open_group(I915_PMU_ACTUAL_FREQUENCY, fd) >= 0);
> >> +
> >> +       gem_quiescent_gpu(gem_fd);
> >> +       usleep(batch_duration_ns / 1000);
> >> +
> >> +       pmu_read_multi(fd, 2, idle);
> >> +
> >> +       spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
> >> +       igt_spin_batch_set_timeout(spin, batch_duration_ns);
> >> +       gem_sync(gem_fd, spin->handle);
> >> +
> >> +       pmu_read_multi(fd, 2, busy);
> >> +
> >> +       igt_assert(busy[0] > idle[0]);
> >> +       igt_assert(busy[1] > idle[1]);
> > 
> > Nothing guarantees busy[1] changes, it is hw/fw dependent.
> > busy[0] depends on user config.
> 
> Do we reasonably expect IGT to be ran in such environments? Or change 
> this to not expect a change but just compare against debugfs?
> 
> > 
> >> +
> >> +       igt_spin_batch_free(gem_fd, spin);
> >> +       close(fd);
> >> +}
> >> +
> > 
> >> +static void
> >> +test_rc6p(int gem_fd)
> >> +{
> >> +       const unsigned int devid = intel_get_drm_devid(gem_fd);
> >> +       int64_t duration_ns = 2 * 1000 * 1000 * 1000;
> >> +       unsigned int num_pmu = 1;
> >> +       igt_spin_t *spin;
> >> +       uint64_t idle[3], busy[3], prev[3];
> >> +       unsigned int i;
> >> +       int fd, ret;
> >> +
> >> +       igt_require(intel_gen(devid) < 8 && !IS_HASWELL(devid));
> > 
> > Ask the kernel. (Applies equally to rc6, rc6p).
> 
> What is the way to do this? Don't see these in get_param.

Perf knows! I just don't like the test second guessing the kernel. The
kernel has both the list of supported hw, along with the user
restrictions and also what is allowed through the perf interface.
I expect the tests to be fairly agnostic, if the kernel says it has the
counter, then it should comply with our expectations on rc6 behaviour.
If the kernel doesn't say it has the counter, then it may not have for
any number of good reasons.

> > No rc6pp testing?
> 
> Copy and paste error.

Lack of hw would be a fine answer ;)

It's more of a question as to whether we should make sure the ABI covers
all rc6 possibilities, even though not all are currently implemented.

> >> +
> >> +       fd = perf_i915_open_group(I915_PMU_RC6_RESIDENCY, -1);
> >> +       igt_assert(fd >= 0);
> >> +
> >> +       ret = perf_i915_open_group(I915_PMU_RC6p_RESIDENCY, fd);
> >> +       if (ret > 0) {
> >> +               num_pmu++;
> >> +               ret = perf_i915_open_group(I915_PMU_RC6p_RESIDENCY, fd);
> >> +               if (ret > 0)
> >> +                       num_pmu++;
> >> +       }
> >> +
> >> +       gem_quiescent_gpu(gem_fd);
> >> +       sleep(2);
> >> +
> >> +       pmu_read_multi(fd, num_pmu, prev);
> >> +       usleep(duration_ns / 1000);
> >> +       pmu_read_multi(fd, num_pmu, idle);
> >> +
> >> +       for (i = 0; i < num_pmu; i++)
> >> +               assert_within_epsilon(idle[i] - prev[i], duration_ns,
> >> +                                     tolerance);
> >> +
> >> +       spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
> >> +       igt_spin_batch_set_timeout(spin, duration_ns);
> > 
> > Are we sure the GPU isn't allowed to sleep? i915_user_forcewake we
> > expect to keep the GPU out of rc6.
> 
> I was sure, but was I wrong? :)
> 
> Regardless, replacing spin batch with a forcewake sounds simpler so I 
> can do that.
> 
> > 
> >> +igt_main
> >> +{
> >> +       const unsigned int num_other_metrics =
> >> +                               I915_PMU_LAST - __I915_PMU_OTHER(0) + 1;
> >> +       unsigned int num_engines = 0;
> >> +       int fd = -1;
> >> +       const struct intel_execution_engine *e;
> >> +       unsigned int i;
> >> +
> >> +       igt_fixture {
> >> +               fd = drm_open_driver_master(DRIVER_INTEL);
> >> +
> >> +               igt_require_gem(fd);
> >> +               igt_require(i915_type_id() > 0);
> >> +
> >> +               for_each_engine_class_instance(fd, e) {
> >> +                       if (gem_has_ring(fd, e->exec_id | e->flags))
> >> +                               num_engines++;
> >> +               }
> >> +       }
> >> +
> >> +       /**
> >> +        * Test invalid access via perf API is rejected.
> >> +        */
> > 
> > ARGH. No comments on the intentions of the code?
> 
> Will add.

Pet peeve, adding comments for a hypothetical end user (to do what?)
rather than explain the purpose and subtleties of the code (esp. things
like using a context-switch to trigger an interrupt, that's the inside
knowledge that perhaps not everyone will know first hand).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2 i-g-t 5/5] tests/perf_pmu: Tests for i915 PMU API
  2017-09-18 11:38 ` [PATCH i-g-t 5/5] tests/perf_pmu: Tests for i915 PMU API Tvrtko Ursulin
  2017-09-18 13:17   ` Chris Wilson
  2017-09-18 21:18   ` Rogozhkin, Dmitry V
@ 2017-09-20 16:12   ` Tvrtko Ursulin
  2 siblings, 0 replies; 15+ messages in thread
From: Tvrtko Ursulin @ 2017-09-20 16:12 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

A bunch of tests for the new i915 PMU feature.

Parts of the code were initialy sketched by Dmitry Rogozhkin.

v2: (Most suggestions by Chris Wilson)
 * Add new class/instance based engine list.
 * Add gem_has_engine/gem_require_engine to work with class/instance.
 * Use the above two throughout the test.
 * Shorten tests to 100ms busy batches, seems enough.
 * Add queued counter sanity checks.
 * Use igt_nsec_elapsed.
 * Skip on perf -ENODEV in some tests instead of embedding knowledge locally.
 * Fix multi ordering for busy accounting.
 * Use new guranteed_usleep when sleep time is asserted on.
 * Check for no queued when idle/busy.
 * Add queued counter init test.
 * Add queued tests.
 * Consolidate and increase multiple busy engines tests to most-busy and
   all-busy tests.
 * Guarantte interrupts by using fences.
 * Test RC6 via forcewake.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
---
 lib/igt_gt.c           |  50 +++
 lib/igt_gt.h           |  38 +++
 lib/igt_perf.h         |   9 +-
 tests/Makefile.sources |   1 +
 tests/perf_pmu.c       | 840 +++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 930 insertions(+), 8 deletions(-)
 create mode 100644 tests/perf_pmu.c

diff --git a/lib/igt_gt.c b/lib/igt_gt.c
index b3f3b3809eee..4c75811fb1b3 100644
--- a/lib/igt_gt.c
+++ b/lib/igt_gt.c
@@ -568,3 +568,53 @@ bool gem_can_store_dword(int fd, unsigned int engine)
 
 	return true;
 }
+
+const struct intel_execution_engine2 intel_execution_engines2[] = {
+	{ "rcs0", I915_ENGINE_CLASS_RENDER, 0 },
+	{ "bcs0", I915_ENGINE_CLASS_COPY, 0 },
+	{ "vcs0", I915_ENGINE_CLASS_VIDEO, 0 },
+	{ "vcs1", I915_ENGINE_CLASS_VIDEO, 1 },
+	{ "vecs0", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0 },
+};
+
+unsigned int
+gem_class_instance_to_eb_flags(int gem_fd,
+			       enum drm_i915_gem_engine_class class,
+			       unsigned int instance)
+{
+	if (class != I915_ENGINE_CLASS_VIDEO)
+		igt_assert(instance == 0);
+	else
+		igt_assert(instance >= 0 && instance <= 1);
+
+	switch (class) {
+	case I915_ENGINE_CLASS_RENDER:
+		return I915_EXEC_RENDER;
+	case I915_ENGINE_CLASS_COPY:
+		return I915_EXEC_BLT;
+	case I915_ENGINE_CLASS_VIDEO:
+		if (instance == 0) {
+			if (gem_has_bsd2(gem_fd))
+				return I915_EXEC_BSD | I915_EXEC_BSD_RING1;
+			else
+				return I915_EXEC_BSD;
+
+		} else {
+			return I915_EXEC_BSD | I915_EXEC_BSD_RING2;
+		}
+	case I915_ENGINE_CLASS_VIDEO_ENHANCE:
+		return I915_EXEC_VEBOX;
+	case I915_ENGINE_CLASS_OTHER:
+	default:
+		igt_assert(0);
+	};
+}
+
+bool gem_has_engine(int gem_fd,
+		    enum drm_i915_gem_engine_class class,
+		    unsigned int instance)
+{
+	return gem_has_ring(gem_fd,
+			    gem_class_instance_to_eb_flags(gem_fd, class,
+							   instance));
+}
diff --git a/lib/igt_gt.h b/lib/igt_gt.h
index 2579cbd37be7..fb67ae1a7d1f 100644
--- a/lib/igt_gt.h
+++ b/lib/igt_gt.h
@@ -25,6 +25,7 @@
 #define IGT_GT_H
 
 #include "igt_debugfs.h"
+#include "igt_core.h"
 
 void igt_require_hang_ring(int fd, int ring);
 
@@ -80,4 +81,41 @@ extern const struct intel_execution_engine {
 
 bool gem_can_store_dword(int fd, unsigned int engine);
 
+extern const struct intel_execution_engine2 {
+	const char *name;
+	int class;
+	int instance;
+} intel_execution_engines2[];
+
+#define for_each_engine_class_instance(fd__, e__) \
+	for ((e__) = intel_execution_engines2;\
+	     (e__)->name; \
+	     (e__)++)
+
+enum drm_i915_gem_engine_class {
+	I915_ENGINE_CLASS_OTHER = 0,
+	I915_ENGINE_CLASS_RENDER = 1,
+	I915_ENGINE_CLASS_COPY = 2,
+	I915_ENGINE_CLASS_VIDEO = 3,
+	I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
+	I915_ENGINE_CLASS_MAX /* non-ABI */
+};
+
+unsigned int
+gem_class_instance_to_eb_flags(int gem_fd,
+			       enum drm_i915_gem_engine_class class,
+			       unsigned int instance);
+
+bool gem_has_engine(int gem_fd,
+		    enum drm_i915_gem_engine_class class,
+		    unsigned int instance);
+
+static inline
+void gem_require_engine(int gem_fd,
+			enum drm_i915_gem_engine_class class,
+			unsigned int instance)
+{
+	igt_require(gem_has_engine(gem_fd, class, instance));
+}
+
 #endif /* IGT_GT_H */
diff --git a/lib/igt_perf.h b/lib/igt_perf.h
index e29216f0500a..d64e0bd7a06a 100644
--- a/lib/igt_perf.h
+++ b/lib/igt_perf.h
@@ -29,14 +29,7 @@
 
 #include <linux/perf_event.h>
 
-enum drm_i915_gem_engine_class {
-	I915_ENGINE_CLASS_OTHER = 0,
-	I915_ENGINE_CLASS_RENDER = 1,
-	I915_ENGINE_CLASS_COPY = 2,
-	I915_ENGINE_CLASS_VIDEO = 3,
-	I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
-	I915_ENGINE_CLASS_MAX /* non-ABI */
-};
+#include "igt_gt.h"
 
 enum drm_i915_pmu_engine_sample {
 	I915_SAMPLE_QUEUED = 0,
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index cf542df181a8..4bab6247151c 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -217,6 +217,7 @@ TESTS_progs = \
 	kms_vblank \
 	meta_test \
 	perf \
+	perf_pmu \
 	pm_backlight \
 	pm_lpsp \
 	pm_rc6_residency \
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
new file mode 100644
index 000000000000..42d92b36a384
--- /dev/null
+++ b/tests/perf_pmu.c
@@ -0,0 +1,840 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/times.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <time.h>
+#include <poll.h>
+
+#include "igt.h"
+#include "igt_core.h"
+#include "igt_perf.h"
+
+IGT_TEST_DESCRIPTION("Test the i915 pmu perf interface");
+
+const double tolerance = 0.02f;
+const unsigned long batch_duration_ns = 100 * 1000 * 1000;
+
+static int open_pmu(uint64_t config)
+{
+	int fd;
+
+	fd = perf_i915_open(config);
+	igt_require(fd >= 0 || (fd < 0 && errno != ENODEV));
+	igt_assert(fd >= 0);
+
+	return fd;
+}
+
+static int open_group(uint64_t config, int group)
+{
+	int fd;
+
+	fd = perf_i915_open_group(config, group);
+	igt_require(fd >= 0 || (fd < 0 && errno != ENODEV));
+	igt_assert(fd >= 0);
+
+	return fd;
+}
+
+static void
+init(int gem_fd, const struct intel_execution_engine2 *e, uint8_t sample)
+{
+	int fd;
+
+	fd = open_pmu(__I915_PMU_ENGINE(e->class, e->instance, sample));
+
+	close(fd);
+}
+
+static uint64_t pmu_read_single(int fd)
+{
+	uint64_t data[2];
+	ssize_t len;
+
+	len = read(fd, data, sizeof(data));
+	igt_assert_eq(len, sizeof(data));
+
+	return data[0];
+}
+
+static void pmu_read_multi(int fd, unsigned int num, uint64_t *val)
+{
+	uint64_t buf[2 + num];
+	unsigned int i;
+	ssize_t len;
+
+	len = read(fd, buf, sizeof(buf));
+	igt_assert_eq(len, sizeof(buf));
+	for (i = 0; i < num; i++)
+		val[i] = buf[2 + i];
+}
+
+#define assert_within_epsilon(x, ref, tolerance) \
+	igt_assert_f((double)(x) <= (1.0 + tolerance) * (double)ref && \
+		     (double)(x) >= (1.0 - tolerance) * (double)ref, \
+		     "'%s' != '%s' (%f not within %f%% tolerance of %f)\n",\
+		     #x, #ref, (double)x, tolerance * 100.0, (double)ref)
+
+static void guaranteed_usleep(unsigned int usec)
+{
+	uint64_t slept = 0, to_sleep = usec;
+
+	while (usec > 0) {
+		struct timespec start = { };
+		uint64_t this_sleep;
+
+		igt_nsec_elapsed(&start);
+		usleep(usec);
+		this_sleep = igt_nsec_elapsed(&start) / 1000;
+		slept += this_sleep;
+		if (this_sleep > usec)
+			break;
+		usec -= this_sleep;
+	}
+
+	assert_within_epsilon(slept, to_sleep, tolerance);
+}
+
+static unsigned int e2ring(int gem_fd, const struct intel_execution_engine2 *e)
+{
+	return gem_class_instance_to_eb_flags(gem_fd, e->class, e->instance);
+}
+
+static void
+single(int gem_fd, const struct intel_execution_engine2 *e, uint8_t sample,
+       bool busy)
+{
+	double ref = busy && sample == I915_SAMPLE_BUSY ?
+		     batch_duration_ns : 0.0f;
+	igt_spin_t *spin;
+	uint64_t val;
+	int fd;
+
+	fd = open_pmu(__I915_PMU_ENGINE(e->class, e->instance, sample));
+
+	if (busy) {
+		spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+		igt_spin_batch_set_timeout(spin, batch_duration_ns);
+	} else {
+		guaranteed_usleep(batch_duration_ns / 1000);
+	}
+
+	if (busy)
+		gem_sync(gem_fd, spin->handle);
+
+	val = pmu_read_single(fd);
+
+	assert_within_epsilon(val, ref, tolerance);
+
+	if (busy)
+		igt_spin_batch_free(gem_fd, spin);
+	close(fd);
+}
+
+static void
+queued(int gem_fd, const struct intel_execution_engine2 *e)
+{
+	igt_spin_t *spin[2];
+	uint64_t val;
+	int fd;
+
+	fd = open_pmu(I915_PMU_ENGINE_QUEUED(e->class, e->instance));
+
+	spin[0] = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+	igt_spin_batch_set_timeout(spin[0], batch_duration_ns);
+
+	spin[1] = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+	igt_spin_batch_set_timeout(spin[1], batch_duration_ns);
+
+	gem_sync(gem_fd, spin[0]->handle);
+
+	val = pmu_read_single(fd);
+	assert_within_epsilon(val, batch_duration_ns, tolerance);
+
+	gem_sync(gem_fd, spin[1]->handle);
+
+	igt_spin_batch_free(gem_fd, spin[0]);
+	igt_spin_batch_free(gem_fd, spin[1]);
+	close(fd);
+}
+
+static void
+busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
+	       const unsigned int num_engines)
+{
+	const struct intel_execution_engine2 *e_;
+	uint64_t val[num_engines];
+	int fd[num_engines];
+	igt_spin_t *spin;
+	unsigned int busy_idx, i;
+
+	i = 0;
+	fd[0] = -1;
+	for_each_engine_class_instance(fd, e_) {
+		if (!gem_has_engine(gem_fd, e_->class, e_->instance))
+			continue;
+		else if (e == e_)
+			busy_idx = i;
+
+		fd[i++] = open_group(I915_PMU_ENGINE_BUSY(e_->class,
+							  e_->instance),
+				     fd[0]);
+	}
+
+	spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+	igt_spin_batch_set_timeout(spin, batch_duration_ns);
+
+	gem_sync(gem_fd, spin->handle);
+
+	pmu_read_multi(fd[0], num_engines, val);
+
+	assert_within_epsilon(val[busy_idx], batch_duration_ns, tolerance);
+	for (i = 0; i < num_engines; i++) {
+		if (i == busy_idx)
+			continue;
+		assert_within_epsilon(val[i], 0.0f, tolerance);
+	}
+
+	igt_spin_batch_free(gem_fd, spin);
+	close(fd[0]);
+}
+
+static void
+most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
+		    const unsigned int num_engines)
+{
+	const struct intel_execution_engine2 *e_;
+	uint64_t val[num_engines];
+	int fd[num_engines];
+	igt_spin_t *spin[num_engines];
+	unsigned int idle_idx, i;
+
+	gem_require_engine(gem_fd, e->class, e->instance);
+
+	i = 0;
+	fd[0] = -1;
+	for_each_engine_class_instance(fd, e_) {
+		if (!gem_has_engine(gem_fd, e_->class, e_->instance))
+			continue;
+
+		fd[i] = open_group(I915_PMU_ENGINE_BUSY(e_->class,
+							e_->instance),
+				   fd[0]);
+
+		if (e == e_) {
+			idle_idx = i;
+		} else {
+			spin[i] = igt_spin_batch_new(gem_fd, 0,
+						     e2ring(gem_fd, e_), 0);
+			igt_spin_batch_set_timeout(spin[i], batch_duration_ns);
+		}
+
+		i++;
+	}
+
+	for (i = 0; i < num_engines; i++) {
+		if (i != idle_idx)
+			gem_sync(gem_fd, spin[i]->handle);
+	}
+
+	pmu_read_multi(fd[0], num_engines, val);
+
+	for (i = 0; i < num_engines; i++) {
+		if (i == idle_idx)
+			assert_within_epsilon(val[i], 0.0f, tolerance);
+		else
+			assert_within_epsilon(val[i], batch_duration_ns,
+					      tolerance);
+	}
+
+	for (i = 0; i < num_engines; i++) {
+		if (i != idle_idx)
+			igt_spin_batch_free(gem_fd, spin[i]);
+	}
+	close(fd[0]);
+}
+
+static void
+all_busy_check_all(int gem_fd, const unsigned int num_engines)
+{
+	const struct intel_execution_engine2 *e;
+	uint64_t val[num_engines];
+	int fd[num_engines];
+	igt_spin_t *spin[num_engines];
+	unsigned int i;
+
+	i = 0;
+	fd[0] = -1;
+	for_each_engine_class_instance(fd, e) {
+		if (!gem_has_engine(gem_fd, e->class, e->instance))
+			continue;
+
+		fd[i] = open_group(I915_PMU_ENGINE_BUSY(e->class, e->instance),
+				   fd[0]);
+
+		spin[i] = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+		igt_spin_batch_set_timeout(spin[i], batch_duration_ns);
+
+		i++;
+	}
+
+	for (i = 0; i < num_engines; i++)
+		gem_sync(gem_fd, spin[i]->handle);
+
+	pmu_read_multi(fd[0], num_engines, val);
+
+	for (i = 0; i < num_engines; i++)
+		assert_within_epsilon(val[i], batch_duration_ns, tolerance);
+
+	for (i = 0; i < num_engines; i++)
+		igt_spin_batch_free(gem_fd, spin[i]);
+	close(fd[0]);
+}
+
+static void
+no_sema(int gem_fd, const struct intel_execution_engine2 *e, bool busy)
+{
+	igt_spin_t *spin;
+	uint64_t val[2];
+	int fd;
+
+	fd = open_group(I915_PMU_ENGINE_SEMA(e->class, e->instance), -1);
+	open_group(I915_PMU_ENGINE_WAIT(e->class, e->instance), fd);
+
+	if (busy) {
+		spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+		igt_spin_batch_set_timeout(spin, batch_duration_ns);
+	} else {
+		usleep(batch_duration_ns / 1000);
+	}
+
+	pmu_read_multi(fd, 2, val);
+
+	assert_within_epsilon(val[0], 0.0f, tolerance);
+	assert_within_epsilon(val[1], 0.0f, tolerance);
+
+	if (busy)
+		igt_spin_batch_free(gem_fd, spin);
+	close(fd);
+}
+
+static void
+multi_client(int gem_fd, const struct intel_execution_engine2 *e)
+{
+	uint64_t config = I915_PMU_ENGINE_BUSY(e->class, e->instance);
+	igt_spin_t *spin;
+	uint64_t val[2];
+	int fd[2];
+
+	fd[0] = open_pmu(config);
+
+	spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+	igt_spin_batch_set_timeout(spin, batch_duration_ns);
+
+	guaranteed_usleep(batch_duration_ns / 2000);
+
+	fd[1] = perf_i915_open(config);
+	igt_assert(fd[1] >= 0);
+
+	gem_sync(gem_fd, spin->handle);
+
+	val[0] = pmu_read_single(fd[0]);
+	val[1] = pmu_read_single(fd[1]);
+	close(fd[1]);
+
+	assert_within_epsilon(val[0], batch_duration_ns, tolerance);
+	assert_within_epsilon(val[1], batch_duration_ns / 2, tolerance);
+
+	igt_spin_batch_free(gem_fd, spin);
+	close(fd[0]);
+}
+
+/**
+ * Tests that i915 PMU corectly errors out in invalid initialization.
+ * i915 PMU is uncore PMU, thus:
+ *  - sampling period is not supported
+ *  - pid > 0 is not supported since we can't count per-process (we count
+ *    per whole system)
+ *  - cpu != 0 is not supported since i915 PMU exposes cpumask for CPU0
+ */
+static void invalid_init(void)
+{
+	struct perf_event_attr attr;
+	int pid, cpu;
+
+#define ATTR_INIT() \
+do { \
+	memset(&attr, 0, sizeof (attr)); \
+	attr.config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0); \
+	attr.type = i915_type_id(); \
+	igt_assert(attr.type != 0); \
+} while(0)
+
+	ATTR_INIT();
+	attr.sample_period = 100;
+	pid = -1;
+	cpu = 0;
+	igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+	igt_assert_eq(errno, EINVAL);
+
+	ATTR_INIT();
+	pid = 0;
+	cpu = 0;
+	igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+	igt_assert_eq(errno, EINVAL);
+
+	ATTR_INIT();
+	pid = -1;
+	cpu = 1;
+	igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+	igt_assert_eq(errno, ENODEV);
+}
+
+static void init_other(unsigned int i, bool valid)
+{
+	int fd;
+
+	fd = perf_i915_open(__I915_PMU_OTHER(i));
+	igt_require(!(fd < 0 && errno == ENODEV));
+	if (valid) {
+		igt_assert(fd >= 0);
+	} else {
+		igt_assert(fd < 0);
+		return;
+	}
+
+	close(fd);
+}
+
+static void read_other(unsigned int i, bool valid)
+{
+	int fd;
+
+	fd = perf_i915_open(__I915_PMU_OTHER(i));
+	igt_require(!(fd < 0 && errno == ENODEV));
+	if (valid) {
+		igt_assert(fd >= 0);
+	} else {
+		igt_assert(fd < 0);
+		return;
+	}
+
+	(void)pmu_read_single(fd);
+
+	close(fd);
+}
+
+static bool cpu0_hotplug_support(void)
+{
+	int fd = open("/sys/devices/system/cpu/cpu0/online", O_WRONLY);
+
+	close(fd);
+
+	return fd > 0;
+}
+
+static void cpu_hotplug(int gem_fd)
+{
+	struct timespec start = { };
+	igt_spin_t *spin;
+	uint64_t val, ref;
+	int fd;
+
+	igt_require(cpu0_hotplug_support());
+
+	spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
+	fd = perf_i915_open(I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0));
+	igt_assert(fd >= 0);
+
+	igt_nsec_elapsed(&start);
+
+	igt_fork(child, 1) {
+		int cpu = 0;
+
+		for (;;) {
+			char name[128];
+			int cpufd;
+
+			sprintf(name, "/sys/devices/system/cpu/cpu%d/online",
+				cpu);
+			cpufd = open(name, O_WRONLY);
+			if (cpufd == -1) {
+				igt_assert(cpu > 0);
+				break;
+			}
+			igt_assert_eq(write(cpufd, "0", 2), 2);
+
+			usleep(1000 * 1000);
+
+			igt_assert_eq(write(cpufd, "1", 2), 2);
+
+			close(cpufd);
+			cpu++;
+		}
+	}
+
+	igt_waitchildren();
+
+	igt_spin_batch_end(spin);
+	gem_sync(gem_fd, spin->handle);
+
+	ref = igt_nsec_elapsed(&start);
+	val = pmu_read_single(fd);
+
+	assert_within_epsilon(val, ref, tolerance);
+
+	igt_spin_batch_free(gem_fd, spin);
+	close(fd);
+}
+
+static int chain_nop(int gem_fd, int in_fence, bool sync)
+{
+	struct drm_i915_gem_exec_object2 obj = {};
+	struct drm_i915_gem_execbuffer2 eb =
+		{ .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
+	const uint32_t bbe = 0xa << 23;
+
+	obj.handle = gem_create(gem_fd, sizeof(bbe));
+	gem_write(gem_fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+	eb.flags = I915_EXEC_RENDER | I915_EXEC_FENCE_OUT;
+
+	if (in_fence >= 0) {
+		eb.flags |= I915_EXEC_FENCE_IN;
+		eb.rsvd2 = in_fence;
+	}
+
+	gem_execbuf_wr(gem_fd, &eb);
+
+	if (sync)
+		gem_sync(gem_fd, obj.handle);
+
+	gem_close(gem_fd, obj.handle);
+	if (in_fence >= 0)
+		close(in_fence);
+
+	return eb.rsvd2 >> 32;
+}
+
+static void
+test_interrupts(int gem_fd)
+{
+	uint64_t idle, busy, prev;
+	int fd, fence = -1;
+	const unsigned int count = 1000;
+	unsigned int i;
+
+	fd = open_pmu(I915_PMU_INTERRUPTS);
+
+	gem_quiescent_gpu(gem_fd);
+
+	/* Wait for idle state. */
+	prev = pmu_read_single(fd);
+	idle = prev + 1;
+	while (idle != prev) {
+		usleep(batch_duration_ns / 1000);
+		prev = idle;
+		idle = pmu_read_single(fd);
+	}
+
+	igt_assert_eq(idle - prev, 0);
+
+	/* Send some no-op batches with chained fences to ensure interrupts. */
+	for (i = 1; i <= count; i++)
+		fence = chain_nop(gem_fd, fence, i < count ? false : true);
+	close(fence);
+
+	/* Check at least as many interrupts has been generated. */
+	busy = pmu_read_single(fd);
+	igt_assert(busy > count - 1);
+
+	close(fd);
+}
+
+static void
+test_frequency(int gem_fd)
+{
+	igt_spin_t *spin;
+	uint64_t idle[2], busy[2];
+	int fd;
+
+	fd = open_group(I915_PMU_REQUESTED_FREQUENCY, -1);
+	open_group(I915_PMU_ACTUAL_FREQUENCY, fd);
+
+	gem_quiescent_gpu(gem_fd);
+	usleep(batch_duration_ns / 1000);
+
+	pmu_read_multi(fd, 2, idle);
+
+	spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
+	igt_spin_batch_set_timeout(spin, batch_duration_ns);
+	gem_sync(gem_fd, spin->handle);
+
+	pmu_read_multi(fd, 2, busy);
+
+	igt_assert(busy[0] > idle[0]);
+	igt_assert(busy[1] > idle[1]);
+
+	igt_spin_batch_free(gem_fd, spin);
+	close(fd);
+}
+
+static void
+test_rc6(int gem_fd)
+{
+	int64_t duration_ns = 2 * 1000 * 1000 * 1000;
+	uint64_t idle, busy, prev;
+	int fd, fw;
+
+	fd = open_pmu(I915_PMU_RC6_RESIDENCY);
+
+	gem_quiescent_gpu(gem_fd);
+
+	/* Go idle and check full RC6. */
+	prev = pmu_read_single(fd);
+	guaranteed_usleep(duration_ns / 1000);
+	idle = pmu_read_single(fd);
+
+	assert_within_epsilon(idle - prev, duration_ns, tolerance);
+
+	/* Wake up device and check no RC6. */
+	fw = igt_open_forcewake_handle(gem_fd);
+	igt_assert(fw >= 0);
+
+	prev = pmu_read_single(fd);
+	guaranteed_usleep(duration_ns / 1000);
+	busy = pmu_read_single(fd);
+
+	assert_within_epsilon(busy - prev, 0.0, tolerance);
+
+	close(fw);
+	close(fd);
+}
+
+static void
+test_rc6p(int gem_fd)
+{
+	int64_t duration_ns = 2 * 1000 * 1000 * 1000;
+	unsigned int num_pmu = 1;
+	uint64_t idle[3], busy[3], prev[3];
+	unsigned int i;
+	int fd, ret, fw;
+
+	fd = open_group(I915_PMU_RC6_RESIDENCY, -1);
+	ret = perf_i915_open_group(I915_PMU_RC6p_RESIDENCY, fd);
+	if (ret > 0) {
+		num_pmu++;
+		ret = perf_i915_open_group(I915_PMU_RC6pp_RESIDENCY, fd);
+		if (ret > 0)
+			num_pmu++;
+	}
+
+	igt_require(num_pmu == 3);
+
+	gem_quiescent_gpu(gem_fd);
+
+	/* Go idle and check full RC6. */
+	pmu_read_multi(fd, num_pmu, prev);
+	guaranteed_usleep(duration_ns / 1000);
+	pmu_read_multi(fd, num_pmu, idle);
+
+	for (i = 0; i < num_pmu; i++)
+		assert_within_epsilon(idle[i] - prev[i], duration_ns,
+				      tolerance);
+
+	/* Wake up device and check no RC6. */
+	fw = igt_open_forcewake_handle(gem_fd);
+	igt_assert(fw >= 0);
+
+	pmu_read_multi(fd, num_pmu, prev);
+	guaranteed_usleep(duration_ns / 1000);
+	pmu_read_multi(fd, num_pmu, busy);
+
+	for (i = 0; i < num_pmu; i++)
+		assert_within_epsilon(busy[i] - prev[i], 0.0, tolerance);
+
+	close(fw);
+	close(fd);
+}
+
+igt_main
+{
+	const unsigned int num_other_metrics =
+				I915_PMU_LAST - __I915_PMU_OTHER(0) + 1;
+	unsigned int num_engines = 0;
+	int fd = -1;
+	const struct intel_execution_engine2 *e;
+	unsigned int i;
+
+	igt_fixture {
+		fd = drm_open_driver_master(DRIVER_INTEL);
+
+		igt_require_gem(fd);
+		igt_require(i915_type_id() > 0);
+
+		for_each_engine_class_instance(fd, e) {
+			if (gem_has_engine(fd, e->class, e->instance))
+				num_engines++;
+		}
+	}
+
+	/**
+	 * Test invalid access via perf API is rejected.
+	 */
+	igt_subtest("invalid-init")
+		invalid_init();
+
+	for_each_engine_class_instance(fd, e) {
+		/**
+		 * Test that a single engine metric can be initialized.
+		 */
+		igt_subtest_f("init-queued-%s", e->name)
+			init(fd, e, I915_SAMPLE_QUEUED);
+
+		igt_subtest_f("init-busy-%s", e->name)
+			init(fd, e, I915_SAMPLE_BUSY);
+
+		igt_subtest_f("init-wait-%s", e->name)
+			init(fd, e, I915_SAMPLE_WAIT);
+
+		igt_subtest_f("init-sema-%s", e->name)
+			init(fd, e, I915_SAMPLE_SEMA);
+
+		/**
+		 * Test that queued metric works.
+		 */
+		igt_subtest_f("queued-%s", e->name)
+			queued(fd, e);
+
+		/**
+		 * Test that engines show nothing queued when idle or busy.
+		 */
+		igt_subtest_f("idle-no-queued-%s", e->name)
+			single(fd, e, I915_SAMPLE_QUEUED, false);
+
+		igt_subtest_f("busy-no-queued-%s", e->name)
+			single(fd, e, I915_SAMPLE_QUEUED, true);
+
+		/**
+		 * Test that engines show no load when idle.
+		 */
+		igt_subtest_f("idle-%s", e->name)
+			single(fd, e, I915_SAMPLE_BUSY, false);
+
+		/**
+		 * Test that a single engine reports load correctly.
+		 */
+		igt_subtest_f("busy-%s", e->name)
+			single(fd, e, I915_SAMPLE_BUSY, true);
+
+		/**
+		 * Test that when one engine is loaded other report no load.
+		 */
+		igt_subtest_f("busy-check-all-%s", e->name)
+			busy_check_all(fd, e, num_engines);
+
+		/**
+		 * Test that when all except one engine are loaded all loads
+		 * are correctly reported.
+		 */
+		igt_subtest_f("most-busy-check-all-%s", e->name)
+			most_busy_check_all(fd, e, num_engines);
+
+		/**
+		 * Test that semphore counters report no activity on idle
+		 * or busy engines.
+		 */
+		igt_subtest_f("idle-no-semaphores-%s", e->name)
+			no_sema(fd, e, false);
+
+		igt_subtest_f("busy-no-semaphores-%s", e->name)
+			no_sema(fd, e, true);
+
+		/**
+		 * Check that two perf clients do not influence each others
+		 * observations.
+		 */
+		igt_subtest_f("multi-client-%s", e->name)
+			multi_client(fd, e);
+	}
+
+	/**
+	 * Test that when all engines are loaded all loads are
+	 * correctly reported.
+	 */
+	igt_subtest("all-busy-check-all")
+		all_busy_check_all(fd, num_engines);
+
+	/**
+	 * Test that non-engine counters can be initialized and read. Apart
+	 * from the invalid metric which should fail.
+	 */
+	for (i = 0; i < num_other_metrics + 1; i++) {
+		igt_subtest_f("other-init-%u", i)
+			init_other(i, i < num_other_metrics);
+
+		igt_subtest_f("other-read-%u", i)
+			read_other(i, i < num_other_metrics);
+	}
+
+	/**
+	 * Test counters are not affected by CPU offline/online events.
+	 */
+	igt_subtest("cpu-hotplug")
+		cpu_hotplug(fd);
+
+	/**
+	 * Test GPU frequency.
+	 */
+	igt_subtest("frequency")
+		test_frequency(fd);
+
+	/**
+	 * Test interrupt count reporting.
+	 */
+	igt_subtest("interrupts")
+		test_interrupts(fd);
+
+	/**
+	 * Test RC6 residency reporting.
+	 */
+	igt_subtest("rc6")
+		test_rc6(fd);
+
+	/**
+	 * Test RC6p residency reporting.
+	 */
+	igt_subtest("rc6p")
+		test_rc6p(fd);
+}
-- 
2.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* ✗ Fi.CI.BAT: warning for IGT PMU support (rev3)
  2017-09-18 11:38 [PATCH i-g-t 0/5] IGT PMU support Tvrtko Ursulin
                   ` (6 preceding siblings ...)
  2017-09-19  9:44 ` ✓ Fi.CI.IGT: " Patchwork
@ 2017-09-20 16:52 ` Patchwork
  7 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2017-09-20 16:52 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: IGT PMU support (rev3)
URL   : https://patchwork.freedesktop.org/series/28253/
State : warning

== Summary ==

IGT patchset tested on top of latest successful build
1043c09ccbcba8e5c2ec5f2a358a442346348bd8 tests/kms_cursor_legacy: Do not start collecting CRC after making FB busy

with latest DRM-Tip kernel build CI_DRM_3113
ed7a99bf23ce drm-tip: 2017y-09m-20d-11h-03m-37s UTC integration manifest

Test gem_exec_reloc:
        Subgroup basic-write-cpu:
                pass       -> DMESG-WARN (fi-kbl-r)
        Subgroup basic-write-gtt:
                pass       -> DMESG-WARN (fi-kbl-r)
        Subgroup basic-gtt-noreloc:
                pass       -> DMESG-WARN (fi-kbl-r)
        Subgroup basic-write-gtt-noreloc:
                pass       -> DMESG-WARN (fi-kbl-r)
        Subgroup basic-write-read-noreloc:
                pass       -> DMESG-WARN (fi-kbl-r)
        Subgroup basic-cpu-active:
                pass       -> DMESG-WARN (fi-kbl-r)
        Subgroup basic-write-gtt-active:
                pass       -> DMESG-WARN (fi-kbl-r)
Test gem_exec_suspend:
        Subgroup basic-s3:
                pass       -> INCOMPLETE (fi-kbl-r) fdo#102850
Test kms_force_connector_basic:
        Subgroup force-connector-state:
                skip       -> PASS       (fi-ivb-3520m)
        Subgroup force-edid:
                skip       -> PASS       (fi-ivb-3520m)
        Subgroup force-load-detect:
                skip       -> PASS       (fi-ivb-3520m)
        Subgroup prune-stale-modes:
                skip       -> PASS       (fi-ivb-3520m)
Test pm_rpm:
        Subgroup basic-rte:
                dmesg-warn -> PASS       (fi-cfl-s) fdo#102294
Test drv_module_reload:
        Subgroup basic-reload:
                pass       -> DMESG-WARN (fi-glk-1) fdo#102777 +1

fdo#102850 https://bugs.freedesktop.org/show_bug.cgi?id=102850
fdo#102294 https://bugs.freedesktop.org/show_bug.cgi?id=102294
fdo#102777 https://bugs.freedesktop.org/show_bug.cgi?id=102777

fi-bdw-5557u     total:289  pass:268  dwarn:0   dfail:0   fail:0   skip:21  time:448s
fi-bdw-gvtdvm    total:289  pass:265  dwarn:0   dfail:0   fail:0   skip:24  time:477s
fi-blb-e6850     total:289  pass:224  dwarn:1   dfail:0   fail:0   skip:64  time:425s
fi-bsw-n3050     total:289  pass:243  dwarn:0   dfail:0   fail:0   skip:46  time:520s
fi-bwr-2160      total:289  pass:184  dwarn:0   dfail:0   fail:0   skip:105 time:277s
fi-bxt-j4205     total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:507s
fi-byt-j1900     total:289  pass:254  dwarn:1   dfail:0   fail:0   skip:34  time:498s
fi-byt-n2820     total:289  pass:250  dwarn:1   dfail:0   fail:0   skip:38  time:493s
fi-cfl-s         total:289  pass:223  dwarn:34  dfail:0   fail:0   skip:32  time:545s
fi-elk-e7500     total:289  pass:230  dwarn:0   dfail:0   fail:0   skip:59  time:423s
fi-glk-1         total:289  pass:258  dwarn:2   dfail:0   fail:0   skip:29  time:569s
fi-hsw-4770      total:289  pass:263  dwarn:0   dfail:0   fail:0   skip:26  time:433s
fi-hsw-4770r     total:289  pass:263  dwarn:0   dfail:0   fail:0   skip:26  time:407s
fi-ilk-650       total:289  pass:229  dwarn:0   dfail:0   fail:0   skip:60  time:438s
fi-ivb-3520m     total:289  pass:261  dwarn:0   dfail:0   fail:0   skip:28  time:490s
fi-ivb-3770      total:289  pass:261  dwarn:0   dfail:0   fail:0   skip:28  time:466s
fi-kbl-7500u     total:118  pass:100  dwarn:1   dfail:0   fail:0   skip:16 
fi-kbl-7560u     total:289  pass:270  dwarn:0   dfail:0   fail:0   skip:19  time:577s
fi-kbl-r         total:118  pass:90   dwarn:7   dfail:0   fail:0   skip:20 
fi-pnv-d510      total:289  pass:223  dwarn:1   dfail:0   fail:0   skip:65  time:544s
fi-skl-6260u     total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:451s
fi-skl-6700k     total:289  pass:265  dwarn:0   dfail:0   fail:0   skip:24  time:753s
fi-skl-6770hq    total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:501s
fi-skl-gvtdvm    total:289  pass:266  dwarn:0   dfail:0   fail:0   skip:23  time:479s
fi-snb-2520m     total:289  pass:251  dwarn:0   dfail:0   fail:0   skip:38  time:573s
fi-snb-2600      total:289  pass:249  dwarn:0   dfail:0   fail:1   skip:39  time:419s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_232/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2017-09-20 16:52 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-18 11:38 [PATCH i-g-t 0/5] IGT PMU support Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH i-g-t 1/5] intel-gpu-overlay: Move local perf implementation to a library Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH i-g-t 2/5] intel-gpu-overlay: Consolidate perf PMU access to library Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH i-g-t 3/5] intel-gpu-overlay: Fix interrupts PMU readout Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH i-g-t 4/5] intel-gpu-overlay: Catch-up to new i915 PMU Tvrtko Ursulin
2017-09-18 11:38 ` [PATCH i-g-t 5/5] tests/perf_pmu: Tests for i915 PMU API Tvrtko Ursulin
2017-09-18 13:17   ` Chris Wilson
2017-09-19  8:37     ` Tvrtko Ursulin
2017-09-19  9:58       ` Chris Wilson
2017-09-18 21:18   ` Rogozhkin, Dmitry V
2017-09-19  8:19     ` Tvrtko Ursulin
2017-09-20 16:12   ` [PATCH v2 " Tvrtko Ursulin
2017-09-18 18:16 ` ✓ Fi.CI.BAT: success for IGT PMU support (rev2) Patchwork
2017-09-19  9:44 ` ✓ Fi.CI.IGT: " Patchwork
2017-09-20 16:52 ` ✗ Fi.CI.BAT: warning for IGT PMU support (rev3) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.