All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] benchmarks/gem_slice_shutdown: microbenchmark for slice shutdown delays
@ 2017-05-02 15:08 Oscar Mateo
  0 siblings, 0 replies; only message in thread
From: Oscar Mateo @ 2017-05-02 15:08 UTC (permalink / raw)
  To: intel-gfx

Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
---
 benchmarks/Makefile.sources     |   1 +
 benchmarks/gem_slice_shutdown.c | 295 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 296 insertions(+)
 create mode 100644 benchmarks/gem_slice_shutdown.c

diff --git a/benchmarks/Makefile.sources b/benchmarks/Makefile.sources
index 3a94115..591b5ae 100644
--- a/benchmarks/Makefile.sources
+++ b/benchmarks/Makefile.sources
@@ -13,6 +13,7 @@ benchmarks_prog_list =			\
 	gem_mmap			\
 	gem_prw				\
 	gem_set_domain			\
+	gem_slice_shutdown		\
 	gem_syslatency			\
 	gem_wsim			\
 	kms_vblank			\
diff --git a/benchmarks/gem_slice_shutdown.c b/benchmarks/gem_slice_shutdown.c
new file mode 100644
index 0000000..dcb17c1
--- /dev/null
+++ b/benchmarks/gem_slice_shutdown.c
@@ -0,0 +1,295 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Oscar Mateo <oscar.mateo@intel.com>
+ *
+ */
+
+/*
+ * This tool measures time to change the configuration of number of slices
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <time.h>
+#include <assert.h>
+
+#include "drm.h"
+#include "ioctl_wrappers.h"
+#include "drmtest.h"
+#include "intel_io.h"
+#include "igt_stats.h"
+#include "intel_chipset.h"
+#include "intel_bufmgr.h"
+
+#define GEN8_R_PWR_CLK_STATE	(0x20C8)
+#define   GEN8_RPCS_ENABLE		(1 << 31)
+#define   GEN8_RPCS_S_CNT_ENABLE	(1 << 18)
+#define   GEN8_RPCS_S_CNT_SHIFT		15
+#define   GEN8_RPCS_S_CNT_MASK		(0x7 << GEN8_RPCS_S_CNT_SHIFT)
+#define   GEN8_RPCS_SS_CNT_ENABLE	(1 << 11)
+#define   GEN8_RPCS_SS_CNT_SHIFT	8
+#define   GEN8_RPCS_SS_CNT_MASK		(0x7 << GEN8_RPCS_SS_CNT_SHIFT)
+#define   GEN8_RPCS_EU_MAX_SHIFT	4
+#define   GEN8_RPCS_EU_MAX_MASK		(0xf << GEN8_RPCS_EU_MAX_SHIFT)
+#define   GEN8_RPCS_EU_MIN_SHIFT	0
+#define   GEN8_RPCS_EU_MIN_MASK		(0xf << GEN8_RPCS_EU_MIN_SHIFT)
+
+static const char *yesno(bool x)
+{
+	return x ? "yes" : "no";
+}
+
+static void print_rpcs_config(uint32_t rpcs_config)
+{
+	bool rpcs_enable, s_enable, ss_enable;
+	uint s_count, ss_count, eu_max, eu_min;
+
+	rpcs_enable = rpcs_config & GEN8_RPCS_ENABLE;
+	s_enable    = rpcs_config & GEN8_RPCS_S_CNT_ENABLE;
+	ss_enable   = rpcs_config & GEN8_RPCS_SS_CNT_ENABLE;
+	s_count  = (rpcs_config & GEN8_RPCS_S_CNT_MASK)  >> GEN8_RPCS_S_CNT_SHIFT;
+	ss_count = (rpcs_config & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT;
+	eu_max   = (rpcs_config & GEN8_RPCS_EU_MAX_MASK) >> GEN8_RPCS_EU_MAX_SHIFT;
+	eu_min   = (rpcs_config & GEN8_RPCS_EU_MIN_MASK) >> GEN8_RPCS_EU_MIN_SHIFT;
+	printf("RPCS enabled: %s\n", yesno(rpcs_enable));
+	printf("Slice count enabled: %s, count: %u\n", yesno(s_enable), s_count);
+	printf("Subslice count enabled: %s, count: %u\n", yesno(ss_enable), ss_count);
+	printf("EU max: %u, min: %u\n", eu_max, eu_min);
+}
+
+static void init_buffer(drm_intel_bufmgr *bufmgr,
+			struct igt_buf *buf,
+			uint32_t size)
+{
+	buf->bo = drm_intel_bo_alloc(bufmgr, "", size, 4096);
+	buf->size = size;
+	buf->tiling = I915_TILING_NONE;
+	buf->stride = 4096;
+}
+
+static double elapsed(const struct timespec *start, const struct timespec *end)
+{
+	return 1e6*(end->tv_sec - start->tv_sec) + 1e-3*(end->tv_nsec - start->tv_nsec);
+}
+
+#define MI_STORE_REGISTER_MEM_64_BIT_ADDR	((0x24 << 23) | 2)
+
+static uint32_t do_read_pwrclk_state(drm_intel_bufmgr *bufmgr,
+				     struct intel_batchbuffer *batch,
+				     drm_intel_context *context)
+{
+	uint32_t rpcs_config;
+	uint32_t *data;
+	drm_intel_bo *dst_bo;
+
+	dst_bo = drm_intel_bo_alloc(bufmgr, "dst", 4, 4096);
+
+	BEGIN_BATCH(3, 1);
+	OUT_BATCH(MI_STORE_REGISTER_MEM_64_BIT_ADDR);
+	OUT_BATCH(GEN8_R_PWR_CLK_STATE);
+	OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush_with_context(batch, context);
+
+	drm_intel_bo_map(dst_bo, 1);
+
+	data = dst_bo->virtual;
+	rpcs_config = *data;
+
+	drm_intel_bo_unmap(dst_bo);
+
+	drm_intel_bo_unreference(dst_bo);
+
+	return rpcs_config;
+}
+
+#define LOCAL_MI_LOAD_REGISTER_IMM	(0x22 << 23)
+
+#define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
+#define   PIPE_CONTROL_CS_STALL				(1<<20)
+#define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12)
+#define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7)
+#define   PIPE_CONTROL_DC_FLUSH_ENABLE			(1<<5)
+#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
+
+static void emit_config_slice_count(drm_intel_bufmgr *bufmgr,
+				    struct intel_batchbuffer *batch,
+				    drm_intel_context *context,
+				    uint32_t rpcs_config)
+{
+	drm_intel_bo *dst_bo;
+
+	dst_bo = drm_intel_bo_alloc(bufmgr, "scratch", 4, 4096);
+
+	BEGIN_BATCH(9, 1);
+	OUT_BATCH(LOCAL_MI_LOAD_REGISTER_IMM | 1);
+	OUT_BATCH(GEN8_R_PWR_CLK_STATE);
+	OUT_BATCH(rpcs_config);
+	OUT_BATCH(GFX_OP_PIPE_CONTROL(6));
+	OUT_BATCH(PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+		  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+		  PIPE_CONTROL_DC_FLUSH_ENABLE |
+		  PIPE_CONTROL_FLUSH_ENABLE |
+		  PIPE_CONTROL_CS_STALL);
+	OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	ADVANCE_BATCH();
+
+	drm_intel_bo_unreference(dst_bo);
+}
+
+static void do_test(igt_render_copyfunc_t rendercopy,
+		    drm_intel_bufmgr *bufmgr,
+		    drm_intel_context *context,
+		    struct intel_batchbuffer *batch,
+		    uint32_t rpcs_config,
+		    struct igt_buf *src,
+		    struct igt_buf *dst,
+		    uint c)
+{
+	rpcs_config &= ~GEN8_RPCS_S_CNT_MASK;
+	rpcs_config |= (c << GEN8_RPCS_S_CNT_SHIFT);
+
+	emit_config_slice_count(bufmgr, batch, context, rpcs_config);
+	rendercopy(batch, context, src, 0, 0, 0, 0, dst, 0, 0);
+	drm_intel_bo_wait_rendering(dst->bo);
+}
+
+static void do_measures(igt_render_copyfunc_t rendercopy,
+			drm_intel_bufmgr *bufmgr,
+			drm_intel_context *context,
+			struct intel_batchbuffer *batch,
+			uint32_t rpcs_config,
+			struct igt_buf *src,
+			struct igt_buf *dst,
+			uint c1, uint c2)
+{
+	uint c = c1;
+	struct igt_mean real[2], baseline[2];
+	struct timespec start, end;
+	int i, j;
+
+	for (i = 0; i < 2; i++) {
+		igt_mean_init(&real[i]);
+		igt_mean_init(&baseline[i]);
+	}
+
+	/* Initial config and warm up */
+	do_test(rendercopy, bufmgr, context, batch, rpcs_config, src, dst, c);
+
+#define LOOPS 1000
+	for (j = 0; j < LOOPS; j++) {
+		c = c2;
+		for (i = 0; i < 2; i++) {
+			clock_gettime(CLOCK_MONOTONIC, &start);
+			do_test(rendercopy, bufmgr, context, batch,
+				rpcs_config, src, dst, c);
+			clock_gettime(CLOCK_MONOTONIC, &end);
+
+			igt_mean_add(&real[i], elapsed(&start, &end));
+
+			clock_gettime(CLOCK_MONOTONIC, &start);
+			do_test(rendercopy, bufmgr, context, batch,
+				rpcs_config, src, dst, c);
+			clock_gettime(CLOCK_MONOTONIC, &end);
+
+			igt_mean_add(&baseline[i], elapsed(&start, &end));
+
+			c = c1;
+		}
+	}
+
+	printf("Slice poweron time (%u -> %u): %7.3f us\n", c1, c2,
+		igt_mean_get(&real[0]) - igt_mean_get(&baseline[0]));
+	printf("Slice shutdown time (%u -> %u): %7.3f us\n", c2, c1,
+		igt_mean_get(&real[1]) - igt_mean_get(&baseline[1]));
+
+	printf("\n");
+}
+
+static void slice_shutdown(int fd, int devid)
+{
+	igt_render_copyfunc_t rendercopy = igt_get_render_copyfunc(devid);
+	drm_intel_bufmgr *bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+	drm_intel_context *context = drm_intel_gem_context_create(bufmgr);
+	struct intel_batchbuffer *batch = intel_batchbuffer_alloc(bufmgr, devid);
+	struct igt_buf src, dst;
+	uint32_t rpcs_config;
+	bool rpcs_enable, s_enable;
+	int s_count;
+	uint i, j;
+
+	init_buffer(bufmgr, &src, 10*4096*4096);
+	init_buffer(bufmgr, &dst, 10*4096*4096);
+
+	rpcs_config = do_read_pwrclk_state(bufmgr, batch, context);
+	print_rpcs_config(rpcs_config);
+
+	rpcs_enable = rpcs_config & GEN8_RPCS_ENABLE;
+	s_enable = rpcs_config & GEN8_RPCS_S_CNT_ENABLE;
+
+	if (!rpcs_enable || !s_enable) {
+		fprintf(stderr, "We need slice count to be already enabled\n");
+		exit(-1);
+	}
+
+	s_count = (rpcs_config & GEN8_RPCS_S_CNT_MASK) >> GEN8_RPCS_S_CNT_SHIFT;
+
+	for (i = 1; i <= s_count; i++)
+		for (j = i + 1; j <= s_count; j++)
+			do_measures(rendercopy, bufmgr, context, batch,
+				    rpcs_config, &src, &dst, i, j);
+
+	intel_batchbuffer_free(batch);
+	drm_intel_gem_context_destroy(context);
+	drm_intel_bufmgr_destroy(bufmgr);
+}
+
+int main(int argc, char **argv)
+{
+	int fd, devid;
+
+	fd = drm_open_driver(DRIVER_INTEL);
+
+	devid = intel_get_drm_devid(fd);
+	if (intel_gen(devid) < 8) {
+		fprintf(stderr, "gen8+ required, yours is gen%u\n",
+				intel_gen(devid));
+		exit(-1);
+	}
+
+	slice_shutdown(fd, devid);
+}
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2017-05-02 22:08 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-02 15:08 [RFC] benchmarks/gem_slice_shutdown: microbenchmark for slice shutdown delays Oscar Mateo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.