All of lore.kernel.org
 help / color / mirror / Atom feed
* [CI 1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm
@ 2019-10-10 19:48 Chris Wilson
  2019-10-10 19:48 ` [CI 2/9] drm/i915/perf: introduce a versioning of the i915-perf uapi Chris Wilson
                   ` (14 more replies)
  0 siblings, 15 replies; 20+ messages in thread
From: Chris Wilson @ 2019-10-10 19:48 UTC (permalink / raw)
  To: intel-gfx

As we now have a specific engine to use OA on, exchange the top-level
runtime-pm wakeref with the engine-pm. This still results in the same
top-level runtime-pm, but with more nuances to keep the engine and its
gt awake.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_perf.c       | 8 ++++----
 drivers/gpu/drm/i915/i915_perf_types.h | 6 ------
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 77c3cef64548..c4a436dfb7db 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -196,7 +196,7 @@
 #include <linux/uuid.h>
 
 #include "gem/i915_gem_context.h"
-#include "gem/i915_gem_pm.h"
+#include "gt/intel_engine_pm.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_lrc_reg.h"
 
@@ -1353,7 +1353,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 	free_oa_buffer(stream);
 
 	intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
-	intel_runtime_pm_put(stream->uncore->rpm, stream->wakeref);
+	intel_engine_pm_put(stream->engine);
 
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
@@ -2218,7 +2218,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	 *   In our case we are expecting that taking pm + FORCEWAKE
 	 *   references will effectively disable RC6.
 	 */
-	stream->wakeref = intel_runtime_pm_get(stream->uncore->rpm);
+	intel_engine_pm_get(stream->engine);
 	intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL);
 
 	ret = alloc_oa_buffer(stream);
@@ -2252,7 +2252,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	put_oa_config(stream->oa_config);
 
 	intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
-	intel_runtime_pm_put(stream->uncore->rpm, stream->wakeref);
+	intel_engine_pm_put(stream->engine);
 
 err_config:
 	if (stream->ctx)
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index a91ae2d1a543..eb8d1ebd5095 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -134,12 +134,6 @@ struct i915_perf_stream {
 	 */
 	struct intel_uncore *uncore;
 
-	/**
-	 * @wakeref: As we keep the device awake while the perf stream is
-	 * active, we track our runtime pm reference for later release.
-	 */
-	intel_wakeref_t wakeref;
-
 	/**
 	 * @engine: Engine associated with this performance stream.
 	 */
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [CI 2/9] drm/i915/perf: introduce a versioning of the i915-perf uapi
  2019-10-10 19:48 [CI 1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Chris Wilson
@ 2019-10-10 19:48 ` Chris Wilson
  2019-10-10 19:48 ` [CI 3/9] drm/i915/perf: allow for CS OA configs to be created lazily Chris Wilson
                   ` (13 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2019-10-10 19:48 UTC (permalink / raw)
  To: intel-gfx

From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

Reporting this version will help application figure out what level of
the support the running kernel provides.

v2: Add i915_perf_ioctl_version() (Chris)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_getparam.c |  4 ++++
 drivers/gpu/drm/i915/i915_perf.c     | 10 ++++++++++
 drivers/gpu/drm/i915/i915_perf.h     |  1 +
 include/uapi/drm/i915_drm.h          | 21 +++++++++++++++++++++
 4 files changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index f4b3cbb1adce..ad33fbe90a28 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -5,6 +5,7 @@
 #include "gt/intel_engine_user.h"
 
 #include "i915_drv.h"
+#include "i915_perf.h"
 
 int i915_getparam_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv)
@@ -156,6 +157,9 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_MMAP_GTT_COHERENT:
 		value = INTEL_INFO(i915)->has_coherent_ggtt;
 		break;
+	case I915_PARAM_PERF_REVISION:
+		value = i915_perf_ioctl_version();
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c4a436dfb7db..0b51ab3ab523 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -3665,3 +3665,13 @@ void i915_perf_fini(struct drm_i915_private *i915)
 	memset(&perf->ops, 0, sizeof(perf->ops));
 	perf->i915 = NULL;
 }
+
+/**
+ * i915_perf_ioctl_version - Version of the i915-perf subsystem
+ *
+ * This version number is used by userspace to detect available features.
+ */
+int i915_perf_ioctl_version(void)
+{
+	return 1;
+}
diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h
index ff412fb0dbbf..295e33e8eef7 100644
--- a/drivers/gpu/drm/i915/i915_perf.h
+++ b/drivers/gpu/drm/i915/i915_perf.h
@@ -20,6 +20,7 @@ void i915_perf_init(struct drm_i915_private *i915);
 void i915_perf_fini(struct drm_i915_private *i915);
 void i915_perf_register(struct drm_i915_private *i915);
 void i915_perf_unregister(struct drm_i915_private *i915);
+int i915_perf_ioctl_version(void);
 
 int i915_perf_open_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 30c542144016..c50c712b3771 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -611,6 +611,13 @@ typedef struct drm_i915_irq_wait {
  * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
  */
 #define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
+
+/*
+ * Revision of the i915-perf uAPI. The value returned helps determine what
+ * i915-perf features are available. See drm_i915_perf_property_id.
+ */
+#define I915_PARAM_PERF_REVISION	54
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1844,23 +1851,31 @@ enum drm_i915_perf_property_id {
 	 * Open the stream for a specific context handle (as used with
 	 * execbuffer2). A stream opened for a specific context this way
 	 * won't typically require root privileges.
+	 *
+	 * This property is available in perf revision 1.
 	 */
 	DRM_I915_PERF_PROP_CTX_HANDLE = 1,
 
 	/**
 	 * A value of 1 requests the inclusion of raw OA unit reports as
 	 * part of stream samples.
+	 *
+	 * This property is available in perf revision 1.
 	 */
 	DRM_I915_PERF_PROP_SAMPLE_OA,
 
 	/**
 	 * The value specifies which set of OA unit metrics should be
 	 * be configured, defining the contents of any OA unit reports.
+	 *
+	 * This property is available in perf revision 1.
 	 */
 	DRM_I915_PERF_PROP_OA_METRICS_SET,
 
 	/**
 	 * The value specifies the size and layout of OA unit reports.
+	 *
+	 * This property is available in perf revision 1.
 	 */
 	DRM_I915_PERF_PROP_OA_FORMAT,
 
@@ -1870,6 +1885,8 @@ enum drm_i915_perf_property_id {
 	 * from this exponent as follows:
 	 *
 	 *   80ns * 2^(period_exponent + 1)
+	 *
+	 * This property is available in perf revision 1.
 	 */
 	DRM_I915_PERF_PROP_OA_EXPONENT,
 
@@ -1901,6 +1918,8 @@ struct drm_i915_perf_open_param {
  * to close and re-open a stream with the same configuration.
  *
  * It's undefined whether any pending data for the stream will be lost.
+ *
+ * This ioctl is available in perf revision 1.
  */
 #define I915_PERF_IOCTL_ENABLE	_IO('i', 0x0)
 
@@ -1908,6 +1927,8 @@ struct drm_i915_perf_open_param {
  * Disable data capture for a stream.
  *
  * It is an error to try and read a stream that is disabled.
+ *
+ * This ioctl is available in perf revision 1.
  */
 #define I915_PERF_IOCTL_DISABLE	_IO('i', 0x1)
 
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [CI 3/9] drm/i915/perf: allow for CS OA configs to be created lazily
  2019-10-10 19:48 [CI 1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Chris Wilson
  2019-10-10 19:48 ` [CI 2/9] drm/i915/perf: introduce a versioning of the i915-perf uapi Chris Wilson
@ 2019-10-10 19:48 ` Chris Wilson
  2019-10-10 19:48 ` [CI 4/9] drm/i915: add support for perf configuration queries Chris Wilson
                   ` (12 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2019-10-10 19:48 UTC (permalink / raw)
  To: intel-gfx

From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

Here we introduce a mechanism by which the execbuf part of the i915
driver will be able to request that a batch buffer containing the
programming for a particular OA config be created.

We'll execute these OA configuration buffers right before executing a
set of userspace commands so that a particular user batchbuffer be
executed with a given OA configuration.

This mechanism essentially allows the userspace driver to go through
several OA configuration without having to open/close the i915/perf
stream.

v2: No need for locking on object OA config object creation (Chris)
    Flush cpu mapping of OA config (Chris)

v3: Properly deal with the perf_metric lock (Chris/Lionel)

v4: Fix oa config unref/put when not found (Lionel)

v5: Allocate BOs for configurations on the stream instead of globally
    (Lionel)

v6: Fix 64bit division (Chris)

v7: Store allocated config BOs into the stream (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4)
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> (v4)
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |   1 +
 drivers/gpu/drm/i915/i915_perf.c             | 107 +++++++++++--------
 drivers/gpu/drm/i915/i915_perf.h             |  24 +++++
 drivers/gpu/drm/i915/i915_perf_types.h       |  23 ++--
 4 files changed, 102 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index b0227ab2fe1b..0987100c786b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -138,6 +138,7 @@
 /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
 #define   MI_LRI_CS_MMIO		(1<<19)
 #define   MI_LRI_FORCE_POSTED		(1<<12)
+#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
 #define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
 #define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
 #define   MI_SRM_LRM_GLOBAL_GTT		(1<<22)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 0b51ab3ab523..5fa0df46fcc3 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -369,52 +369,52 @@ struct perf_open_properties {
 	struct intel_engine_cs *engine;
 };
 
+struct i915_oa_config_bo {
+	struct llist_node node;
+
+	struct i915_oa_config *oa_config;
+	struct i915_vma *vma;
+};
+
 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
 
-static void free_oa_config(struct i915_oa_config *oa_config)
+void i915_oa_config_release(struct kref *ref)
 {
+	struct i915_oa_config *oa_config =
+		container_of(ref, typeof(*oa_config), ref);
+
 	if (!PTR_ERR(oa_config->flex_regs))
 		kfree(oa_config->flex_regs);
 	if (!PTR_ERR(oa_config->b_counter_regs))
 		kfree(oa_config->b_counter_regs);
 	if (!PTR_ERR(oa_config->mux_regs))
 		kfree(oa_config->mux_regs);
-	kfree(oa_config);
-}
-
-static void put_oa_config(struct i915_oa_config *oa_config)
-{
-	if (!atomic_dec_and_test(&oa_config->ref_count))
-		return;
 
-	free_oa_config(oa_config);
+	kfree_rcu(oa_config, rcu);
 }
 
-static int get_oa_config(struct i915_perf *perf,
-			 int metrics_set,
-			 struct i915_oa_config **out_config)
+struct i915_oa_config *
+i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
 {
-	int ret;
-
-	if (metrics_set == 1) {
-		*out_config = &perf->test_config;
-		atomic_inc(&perf->test_config.ref_count);
-		return 0;
-	}
-
-	ret = mutex_lock_interruptible(&perf->metrics_lock);
-	if (ret)
-		return ret;
+	struct i915_oa_config *oa_config;
 
-	*out_config = idr_find(&perf->metrics_idr, metrics_set);
-	if (!*out_config)
-		ret = -EINVAL;
+	rcu_read_lock();
+	if (metrics_set == 1)
+		oa_config = &perf->test_config;
 	else
-		atomic_inc(&(*out_config)->ref_count);
+		oa_config = idr_find(&perf->metrics_idr, metrics_set);
+	if (oa_config)
+		oa_config = i915_oa_config_get(oa_config);
+	rcu_read_unlock();
 
-	mutex_unlock(&perf->metrics_lock);
+	return oa_config;
+}
 
-	return ret;
+static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
+{
+	i915_oa_config_put(oa_bo->oa_config);
+	i915_vma_put(oa_bo->vma);
+	kfree(oa_bo);
 }
 
 static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
@@ -1337,6 +1337,16 @@ free_oa_buffer(struct i915_perf_stream *stream)
 	stream->oa_buffer.vaddr = NULL;
 }
 
+static void
+free_oa_configs(struct i915_perf_stream *stream)
+{
+	struct i915_oa_config_bo *oa_bo, *tmp;
+
+	i915_oa_config_put(stream->oa_config);
+	llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node)
+		free_oa_config_bo(oa_bo);
+}
+
 static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 {
 	struct i915_perf *perf = stream->perf;
@@ -1358,7 +1368,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
 
-	put_oa_config(stream->oa_config);
+	free_oa_configs(stream);
 
 	if (perf->spurious_report_rs.missed) {
 		DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
@@ -1505,10 +1515,6 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
 		goto err_unpin;
 	}
 
-	DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n",
-			 i915_ggtt_offset(stream->oa_buffer.vma),
-			 stream->oa_buffer.vaddr);
-
 	return 0;
 
 err_unpin:
@@ -2200,9 +2206,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		}
 	}
 
-	ret = get_oa_config(perf, props->metrics_set, &stream->oa_config);
-	if (ret) {
+	stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set);
+	if (!stream->oa_config) {
 		DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
+		ret = -EINVAL;
 		goto err_config;
 	}
 
@@ -2234,6 +2241,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		goto err_enable;
 	}
 
+	DRM_DEBUG("opening stream oa config uuid=%s\n",
+		  stream->oa_config->uuid);
+
 	hrtimer_init(&stream->poll_check_timer,
 		     CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	stream->poll_check_timer.function = oa_poll_check_timer_cb;
@@ -2249,7 +2259,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	free_oa_buffer(stream);
 
 err_oa_buf_alloc:
-	put_oa_config(stream->oa_config);
+	free_oa_configs(stream);
 
 	intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
 	intel_engine_pm_put(stream->engine);
@@ -3057,7 +3067,8 @@ void i915_perf_register(struct drm_i915_private *i915)
 	if (ret)
 		goto sysfs_error;
 
-	atomic_set(&perf->test_config.ref_count, 1);
+	perf->test_config.perf = perf;
+	kref_init(&perf->test_config.ref);
 
 	goto exit;
 
@@ -3315,7 +3326,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 		return -ENOMEM;
 	}
 
-	atomic_set(&oa_config->ref_count, 1);
+	oa_config->perf = perf;
+	kref_init(&oa_config->ref);
 
 	if (!uuid_is_valid(args->uuid)) {
 		DRM_DEBUG("Invalid uuid format for OA config\n");
@@ -3414,7 +3426,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 sysfs_err:
 	mutex_unlock(&perf->metrics_lock);
 reg_err:
-	put_oa_config(oa_config);
+	i915_oa_config_put(oa_config);
 	DRM_DEBUG("Failed to add new OA config\n");
 	return err;
 }
@@ -3450,13 +3462,13 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 
 	ret = mutex_lock_interruptible(&perf->metrics_lock);
 	if (ret)
-		goto lock_err;
+		return ret;
 
 	oa_config = idr_find(&perf->metrics_idr, *arg);
 	if (!oa_config) {
 		DRM_DEBUG("Failed to remove unknown OA config\n");
 		ret = -ENOENT;
-		goto config_err;
+		goto err_unlock;
 	}
 
 	GEM_BUG_ON(*arg != oa_config->id);
@@ -3466,13 +3478,16 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 
 	idr_remove(&perf->metrics_idr, *arg);
 
+	mutex_unlock(&perf->metrics_lock);
+
 	DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
 
-	put_oa_config(oa_config);
+	i915_oa_config_put(oa_config);
+
+	return 0;
 
-config_err:
+err_unlock:
 	mutex_unlock(&perf->metrics_lock);
-lock_err:
 	return ret;
 }
 
@@ -3642,7 +3657,7 @@ void i915_perf_init(struct drm_i915_private *i915)
 
 static int destroy_config(int id, void *p, void *data)
 {
-	put_oa_config(p);
+	i915_oa_config_put(p);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h
index 295e33e8eef7..4ceebce72060 100644
--- a/drivers/gpu/drm/i915/i915_perf.h
+++ b/drivers/gpu/drm/i915/i915_perf.h
@@ -6,6 +6,7 @@
 #ifndef __I915_PERF_H__
 #define __I915_PERF_H__
 
+#include <linux/kref.h>
 #include <linux/types.h>
 
 #include "i915_perf_types.h"
@@ -13,6 +14,7 @@
 struct drm_device;
 struct drm_file;
 struct drm_i915_private;
+struct i915_oa_config;
 struct intel_context;
 struct intel_engine_cs;
 
@@ -28,7 +30,29 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 			       struct drm_file *file);
 int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file);
+
 void i915_oa_init_reg_state(const struct intel_context *ce,
 			    const struct intel_engine_cs *engine);
 
+struct i915_oa_config *
+i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set);
+
+static inline struct i915_oa_config *
+i915_oa_config_get(struct i915_oa_config *oa_config)
+{
+	if (kref_get_unless_zero(&oa_config->ref))
+		return oa_config;
+	else
+		return NULL;
+}
+
+void i915_oa_config_release(struct kref *ref);
+static inline void i915_oa_config_put(struct i915_oa_config *oa_config)
+{
+	if (!oa_config)
+		return;
+
+	kref_put(&oa_config->ref, i915_oa_config_release);
+}
+
 #endif /* __I915_PERF_H__ */
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index eb8d1ebd5095..337cd7d2ad77 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -9,7 +9,7 @@
 #include <linux/atomic.h>
 #include <linux/device.h>
 #include <linux/hrtimer.h>
-#include <linux/list.h>
+#include <linux/llist.h>
 #include <linux/poll.h>
 #include <linux/sysfs.h>
 #include <linux/types.h>
@@ -22,6 +22,7 @@
 struct drm_i915_private;
 struct file;
 struct i915_gem_context;
+struct i915_perf;
 struct i915_vma;
 struct intel_context;
 struct intel_engine_cs;
@@ -37,6 +38,8 @@ struct i915_oa_reg {
 };
 
 struct i915_oa_config {
+	struct i915_perf *perf;
+
 	char uuid[UUID_STRING_LEN + 1];
 	int id;
 
@@ -51,7 +54,8 @@ struct i915_oa_config {
 	struct attribute *attrs[2];
 	struct device_attribute sysfs_metric_id;
 
-	atomic_t ref_count;
+	struct kref ref;
+	struct rcu_head rcu;
 };
 
 struct i915_perf_stream;
@@ -177,6 +181,12 @@ struct i915_perf_stream {
 	 */
 	struct i915_oa_config *oa_config;
 
+	/**
+	 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
+	 * each time @oa_config changes.
+	 */
+	struct llist_head oa_config_bos;
+
 	/**
 	 * @pinned_ctx: The OA context specific information.
 	 */
@@ -331,13 +341,13 @@ struct i915_perf {
 
 	/*
 	 * Lock associated with adding/modifying/removing OA configs
-	 * in dev_priv->perf.metrics_idr.
+	 * in perf->metrics_idr.
 	 */
 	struct mutex metrics_lock;
 
 	/*
-	 * List of dynamic configurations, you need to hold
-	 * dev_priv->perf.metrics_lock to access it.
+	 * List of dynamic configurations (struct i915_oa_config), you
+	 * need to hold perf->metrics_lock to access it.
 	 */
 	struct idr metrics_idr;
 
@@ -350,8 +360,7 @@ struct i915_perf {
 	/*
 	 * The stream currently using the OA unit. If accessed
 	 * outside a syscall associated to its file
-	 * descriptor, you need to hold
-	 * dev_priv->drm.struct_mutex.
+	 * descriptor.
 	 */
 	struct i915_perf_stream *exclusive_stream;
 
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [CI 4/9] drm/i915: add support for perf configuration queries
  2019-10-10 19:48 [CI 1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Chris Wilson
  2019-10-10 19:48 ` [CI 2/9] drm/i915/perf: introduce a versioning of the i915-perf uapi Chris Wilson
  2019-10-10 19:48 ` [CI 3/9] drm/i915/perf: allow for CS OA configs to be created lazily Chris Wilson
@ 2019-10-10 19:48 ` Chris Wilson
  2019-10-10 19:48 ` [CI 5/9] drm/i915/perf: implement active wait for noa configurations Chris Wilson
                   ` (11 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2019-10-10 19:48 UTC (permalink / raw)
  To: intel-gfx

From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

Listing configurations at the moment is supported only through sysfs.
This might cause issues for applications wanting to list
configurations from a container where sysfs isn't available.

This change adds a way to query the number of configurations and their
content through the i915 query uAPI.

v2: Fix sparse warnings (Lionel)
    Add support to query configuration using uuid (Lionel)

v3: Fix some inconsistency in uapi header (Lionel)
    Fix unlocking when not locked issue (Lionel)
    Add debug messages (Lionel)

v4: Fix missing unlock (Dan)

v5: Drop lock when copying config content to userspace (Chris)

v6: Drop lock when copying config list to userspace (Chris)
    Fix deadlock when calling i915_perf_get_oa_config() under
    perf.metrics_lock (Lionel)
    Add i915_oa_config_get() (Chris)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_perf.c  |   3 +-
 drivers/gpu/drm/i915/i915_query.c | 295 ++++++++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h       |  62 ++++++-
 3 files changed, 357 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 5fa0df46fcc3..7d7baee7febe 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -3473,8 +3473,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 
 	GEM_BUG_ON(*arg != oa_config->id);
 
-	sysfs_remove_group(perf->metrics_kobj,
-			   &oa_config->sysfs_metric);
+	sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric);
 
 	idr_remove(&perf->metrics_idr, *arg);
 
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index abac5042da2b..6a68ecc7bb5f 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -7,6 +7,7 @@
 #include <linux/nospec.h>
 
 #include "i915_drv.h"
+#include "i915_perf.h"
 #include "i915_query.h"
 #include <uapi/drm/i915_drm.h>
 
@@ -140,10 +141,304 @@ query_engine_info(struct drm_i915_private *i915,
 	return len;
 }
 
+static int can_copy_perf_config_registers_or_number(u32 user_n_regs,
+						    u64 user_regs_ptr,
+						    u32 kernel_n_regs)
+{
+	/*
+	 * We'll just put the number of registers, and won't copy the
+	 * register.
+	 */
+	if (user_n_regs == 0)
+		return 0;
+
+	if (user_n_regs < kernel_n_regs)
+		return -EINVAL;
+
+	if (!access_ok(u64_to_user_ptr(user_regs_ptr),
+		       2 * sizeof(u32) * kernel_n_regs))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int copy_perf_config_registers_or_number(const struct i915_oa_reg *kernel_regs,
+						u32 kernel_n_regs,
+						u64 user_regs_ptr,
+						u32 *user_n_regs)
+{
+	u32 r;
+
+	if (*user_n_regs == 0) {
+		*user_n_regs = kernel_n_regs;
+		return 0;
+	}
+
+	*user_n_regs = kernel_n_regs;
+
+	for (r = 0; r < kernel_n_regs; r++) {
+		u32 __user *user_reg_ptr =
+			u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2);
+		u32 __user *user_val_ptr =
+			u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2 +
+					sizeof(u32));
+		int ret;
+
+		ret = __put_user(i915_mmio_reg_offset(kernel_regs[r].addr),
+				 user_reg_ptr);
+		if (ret)
+			return -EFAULT;
+
+		ret = __put_user(kernel_regs[r].value, user_val_ptr);
+		if (ret)
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int query_perf_config_data(struct drm_i915_private *i915,
+				  struct drm_i915_query_item *query_item,
+				  bool use_uuid)
+{
+	struct drm_i915_query_perf_config __user *user_query_config_ptr =
+		u64_to_user_ptr(query_item->data_ptr);
+	struct drm_i915_perf_oa_config __user *user_config_ptr =
+		u64_to_user_ptr(query_item->data_ptr +
+				sizeof(struct drm_i915_query_perf_config));
+	struct drm_i915_perf_oa_config user_config;
+	struct i915_perf *perf = &i915->perf;
+	struct i915_oa_config *oa_config;
+	char uuid[UUID_STRING_LEN + 1];
+	u64 config_id;
+	u32 flags, total_size;
+	int ret;
+
+	if (!perf->i915)
+		return -ENODEV;
+
+	total_size =
+		sizeof(struct drm_i915_query_perf_config) +
+		sizeof(struct drm_i915_perf_oa_config);
+
+	if (query_item->length == 0)
+		return total_size;
+
+	if (query_item->length < total_size) {
+		DRM_DEBUG("Invalid query config data item size=%u expected=%u\n",
+			  query_item->length, total_size);
+		return -EINVAL;
+	}
+
+	if (!access_ok(user_query_config_ptr, total_size))
+		return -EFAULT;
+
+	if (__get_user(flags, &user_query_config_ptr->flags))
+		return -EFAULT;
+
+	if (flags != 0)
+		return -EINVAL;
+
+	if (use_uuid) {
+		struct i915_oa_config *tmp;
+		int id;
+
+		BUILD_BUG_ON(sizeof(user_query_config_ptr->uuid) >= sizeof(uuid));
+
+		memset(&uuid, 0, sizeof(uuid));
+		if (__copy_from_user(uuid, user_query_config_ptr->uuid,
+				     sizeof(user_query_config_ptr->uuid)))
+			return -EFAULT;
+
+		oa_config = NULL;
+		rcu_read_lock();
+		idr_for_each_entry(&perf->metrics_idr, tmp, id) {
+			if (!strcmp(tmp->uuid, uuid)) {
+				oa_config = i915_oa_config_get(tmp);
+				break;
+			}
+		}
+		rcu_read_unlock();
+	} else {
+		if (__get_user(config_id, &user_query_config_ptr->config))
+			return -EFAULT;
+
+		oa_config = i915_perf_get_oa_config(perf, config_id);
+	}
+	if (!oa_config)
+		return -ENOENT;
+
+	if (__copy_from_user(&user_config, user_config_ptr,
+			     sizeof(user_config))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	ret = can_copy_perf_config_registers_or_number(user_config.n_boolean_regs,
+						       user_config.boolean_regs_ptr,
+						       oa_config->b_counter_regs_len);
+	if (ret)
+		goto out;
+
+	ret = can_copy_perf_config_registers_or_number(user_config.n_flex_regs,
+						       user_config.flex_regs_ptr,
+						       oa_config->flex_regs_len);
+	if (ret)
+		goto out;
+
+	ret = can_copy_perf_config_registers_or_number(user_config.n_mux_regs,
+						       user_config.mux_regs_ptr,
+						       oa_config->mux_regs_len);
+	if (ret)
+		goto out;
+
+	ret = copy_perf_config_registers_or_number(oa_config->b_counter_regs,
+						   oa_config->b_counter_regs_len,
+						   user_config.boolean_regs_ptr,
+						   &user_config.n_boolean_regs);
+	if (ret)
+		goto out;
+
+	ret = copy_perf_config_registers_or_number(oa_config->flex_regs,
+						   oa_config->flex_regs_len,
+						   user_config.flex_regs_ptr,
+						   &user_config.n_flex_regs);
+	if (ret)
+		goto out;
+
+	ret = copy_perf_config_registers_or_number(oa_config->mux_regs,
+						   oa_config->mux_regs_len,
+						   user_config.mux_regs_ptr,
+						   &user_config.n_mux_regs);
+	if (ret)
+		goto out;
+
+	memcpy(user_config.uuid, oa_config->uuid, sizeof(user_config.uuid));
+
+	if (__copy_to_user(user_config_ptr, &user_config,
+			   sizeof(user_config))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	ret = total_size;
+
+out:
+	i915_oa_config_put(oa_config);
+	return ret;
+}
+
+static size_t sizeof_perf_config_list(size_t count)
+{
+	return sizeof(struct drm_i915_query_perf_config) + sizeof(u64) * count;
+}
+
+static size_t sizeof_perf_metrics(struct i915_perf *perf)
+{
+	struct i915_oa_config *tmp;
+	size_t i;
+	int id;
+
+	i = 1;
+	rcu_read_lock();
+	idr_for_each_entry(&perf->metrics_idr, tmp, id)
+		i++;
+	rcu_read_unlock();
+
+	return sizeof_perf_config_list(i);
+}
+
+static int query_perf_config_list(struct drm_i915_private *i915,
+				  struct drm_i915_query_item *query_item)
+{
+	struct drm_i915_query_perf_config __user *user_query_config_ptr =
+		u64_to_user_ptr(query_item->data_ptr);
+	struct i915_perf *perf = &i915->perf;
+	u64 *oa_config_ids = NULL;
+	int i, n_configs;
+	u32 flags;
+	int ret;
+
+	if (!perf->i915)
+		return -ENODEV;
+
+	if (query_item->length == 0)
+		return sizeof_perf_metrics(perf);
+
+	if (get_user(flags, &user_query_config_ptr->flags))
+		return -EFAULT;
+
+	if (flags != 0)
+		return -EINVAL;
+
+	n_configs = 1;
+	do {
+		struct i915_oa_config *tmp;
+		u64 *ids;
+		int id;
+
+		ids = krealloc(oa_config_ids,
+			       n_configs * sizeof(*oa_config_ids),
+			       GFP_KERNEL);
+		if (!ids)
+			return -ENOMEM;
+
+		i = 0;
+		ids[i++] = 1ull;
+		rcu_read_lock();
+		idr_for_each_entry(&perf->metrics_idr, tmp, id) {
+			if (i < n_configs)
+				ids[i] = id;
+			i++;
+		}
+		rcu_read_unlock();
+
+		oa_config_ids = ids;
+	} while (i > n_configs);
+
+	if (query_item->length < sizeof_perf_config_list(n_configs)) {
+		DRM_DEBUG("Invalid query config list item size=%u expected=%zu\n",
+			  query_item->length,
+			  sizeof_perf_config_list(n_configs));
+		kfree(oa_config_ids);
+		return -EINVAL;
+	}
+
+	if (put_user(n_configs, &user_query_config_ptr->config)) {
+		kfree(oa_config_ids);
+		return -EFAULT;
+	}
+
+	ret = copy_to_user(user_query_config_ptr + 1,
+			   oa_config_ids,
+			   n_configs * sizeof(*oa_config_ids));
+	kfree(oa_config_ids);
+	if (ret)
+		return -EFAULT;
+
+	return sizeof_perf_config_list(n_configs);
+}
+
+static int query_perf_config(struct drm_i915_private *i915,
+			     struct drm_i915_query_item *query_item)
+{
+	switch (query_item->flags) {
+	case DRM_I915_QUERY_PERF_CONFIG_LIST:
+		return query_perf_config_list(i915, query_item);
+	case DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID:
+		return query_perf_config_data(i915, query_item, true);
+	case DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID:
+		return query_perf_config_data(i915, query_item, false);
+	default:
+		return -EINVAL;
+	}
+}
+
 static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
 					struct drm_i915_query_item *query_item) = {
 	query_topology_info,
 	query_engine_info,
+	query_perf_config,
 };
 
 int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index c50c712b3771..0c7b2815fbf1 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -2005,6 +2005,7 @@ struct drm_i915_query_item {
 	__u64 query_id;
 #define DRM_I915_QUERY_TOPOLOGY_INFO    1
 #define DRM_I915_QUERY_ENGINE_INFO	2
+#define DRM_I915_QUERY_PERF_CONFIG      3
 /* Must be kept compact -- no holes and well documented */
 
 	/*
@@ -2016,9 +2017,18 @@ struct drm_i915_query_item {
 	__s32 length;
 
 	/*
-	 * Unused for now. Must be cleared to zero.
+	 * When query_id == DRM_I915_QUERY_TOPOLOGY_INFO, must be 0.
+	 *
+	 * When query_id == DRM_I915_QUERY_PERF_CONFIG, must be one of the
+	 * following :
+	 *         - DRM_I915_QUERY_PERF_CONFIG_LIST
+	 *         - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID
+	 *         - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID
 	 */
 	__u32 flags;
+#define DRM_I915_QUERY_PERF_CONFIG_LIST          1
+#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID 2
+#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID   3
 
 	/*
 	 * Data will be written at the location pointed by data_ptr when the
@@ -2146,6 +2156,56 @@ struct drm_i915_query_engine_info {
 	struct drm_i915_engine_info engines[];
 };
 
+/*
+ * Data written by the kernel with query DRM_I915_QUERY_PERF_CONFIG.
+ */
+struct drm_i915_query_perf_config {
+	union {
+		/*
+		 * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets
+		 * this fields to the number of configurations available.
+		 */
+		__u64 n_configs;
+
+		/*
+		 * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID,
+		 * i915 will use the value in this field as configuration
+		 * identifier to decide what data to write into config_ptr.
+		 */
+		__u64 config;
+
+		/*
+		 * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID,
+		 * i915 will use the value in this field as configuration
+		 * identifier to decide what data to write into config_ptr.
+		 *
+		 * String formatted like "%08x-%04x-%04x-%04x-%012x"
+		 */
+		char uuid[36];
+	};
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 flags;
+
+	/*
+	 * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 will
+	 * write an array of __u64 of configuration identifiers.
+	 *
+	 * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_DATA, i915 will
+	 * write a struct drm_i915_perf_oa_config. If the following fields of
+	 * drm_i915_perf_oa_config are set not set to 0, i915 will write into
+	 * the associated pointers the values of submitted when the
+	 * configuration was created :
+	 *
+	 *         - n_mux_regs
+	 *         - n_boolean_regs
+	 *         - n_flex_regs
+	 */
+	__u8 data[];
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [CI 5/9] drm/i915/perf: implement active wait for noa configurations
  2019-10-10 19:48 [CI 1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Chris Wilson
                   ` (2 preceding siblings ...)
  2019-10-10 19:48 ` [CI 4/9] drm/i915: add support for perf configuration queries Chris Wilson
@ 2019-10-10 19:48 ` Chris Wilson
  2019-10-10 20:24   ` [PATCH] " Chris Wilson
  2019-10-10 19:48 ` [CI 6/9] drm/i915/perf: execute OA configuration from command stream Chris Wilson
                   ` (10 subsequent siblings)
  14 siblings, 1 reply; 20+ messages in thread
From: Chris Wilson @ 2019-10-10 19:48 UTC (permalink / raw)
  To: intel-gfx

From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

NOA configuration take some amount of time to apply. That amount of
time depends on the size of the GT. There is no documented time for
this. For example, past experimentations with powergating
configuration changes seem to indicate a 60~70us delay. We go with
500us as default for now which should be over the required amount of
time (according to HW architects).

v2: Don't forget to save/restore registers used for the wait (Chris)

v3: Name used CS_GPR registers (Chris)
    Fix compile issue due to rebase (Lionel)

v4: Fix save/restore helpers (Umesh)

v5: Move noa_wait from drm_i915_private to i915_perf_stream (Lionel)

v6: Add missing struct declarations in i915_perf.h

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4)
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> (v4)
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h  |   4 +-
 drivers/gpu/drm/i915/gt/intel_gt_types.h      |   5 +
 drivers/gpu/drm/i915/i915_debugfs.c           |  32 +++
 drivers/gpu/drm/i915/i915_perf.c              | 224 ++++++++++++++++++
 drivers/gpu/drm/i915/i915_perf_types.h        |   8 +
 drivers/gpu/drm/i915/i915_reg.h               |   4 +-
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 drivers/gpu/drm/i915/selftests/i915_perf.c    | 216 +++++++++++++++++
 8 files changed, 492 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/selftests/i915_perf.c

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 0987100c786b..8e63cffcabe0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -163,7 +163,8 @@
 #define MI_BATCH_BUFFER_START	MI_INSTR(0x31, 0)
 #define   MI_BATCH_GTT		    (2<<6) /* aliased with (1<<7) on gen4 */
 #define MI_BATCH_BUFFER_START_GEN8	MI_INSTR(0x31, 1)
-#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
+#define   MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
+#define   MI_BATCH_PREDICATE         REG_BIT(15) /* HSW+ on RCS only*/
 
 /*
  * 3D instructions used by the kernel
@@ -224,6 +225,7 @@
 #define   PIPE_CONTROL_CS_STALL				(1<<20)
 #define   PIPE_CONTROL_TLB_INVALIDATE			(1<<18)
 #define   PIPE_CONTROL_MEDIA_STATE_CLEAR		(1<<16)
+#define   PIPE_CONTROL_WRITE_TIMESTAMP			(3<<14)
 #define   PIPE_CONTROL_QW_WRITE				(1<<14)
 #define   PIPE_CONTROL_POST_SYNC_OP_MASK                (3<<14)
 #define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 802f516a3430..be4b263621c8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -109,6 +109,11 @@ enum intel_gt_scratch_field {
 	/* 8 bytes */
 	INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
 
+	/* 6 * 8 bytes */
+	INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048,
+
+	/* 4 bytes */
+	INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096,
 };
 
 #endif /* __INTEL_GT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 277f31297f29..d463a28b7475 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3590,6 +3590,37 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
 			i915_wedged_get, i915_wedged_set,
 			"%llu\n");
 
+static int
+i915_perf_noa_delay_set(void *data, u64 val)
+{
+	struct drm_i915_private *i915 = data;
+	const u32 clk = RUNTIME_INFO(i915)->cs_timestamp_frequency_khz;
+
+	/*
+	 * This would lead to infinite waits as we're doing timestamp
+	 * difference on the CS with only 32bits.
+	 */
+	if (val > mul_u32_u32(U32_MAX, clk))
+		return -EINVAL;
+
+	atomic64_set(&i915->perf.noa_programming_delay, val);
+	return 0;
+}
+
+static int
+i915_perf_noa_delay_get(void *data, u64 *val)
+{
+	struct drm_i915_private *i915 = data;
+
+	*val = atomic64_read(&i915->perf.noa_programming_delay);
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops,
+			i915_perf_noa_delay_get,
+			i915_perf_noa_delay_set,
+			"%llu\n");
+
 #define DROP_UNBOUND	BIT(0)
 #define DROP_BOUND	BIT(1)
 #define DROP_RETIRE	BIT(2)
@@ -4340,6 +4371,7 @@ static const struct i915_debugfs_files {
 	const char *name;
 	const struct file_operations *fops;
 } i915_debugfs_files[] = {
+	{"i915_perf_noa_delay", &i915_perf_noa_delay_fops},
 	{"i915_wedged", &i915_wedged_fops},
 	{"i915_cache_sharing", &i915_cache_sharing_fops},
 	{"i915_gem_drop_caches", &i915_drop_caches_fops},
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 7d7baee7febe..caa4ab68cea5 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -198,6 +198,7 @@
 #include "gem/i915_gem_context.h"
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_engine_user.h"
+#include "gt/intel_gt.h"
 #include "gt/intel_lrc_reg.h"
 
 #include "i915_drv.h"
@@ -1337,6 +1338,12 @@ free_oa_buffer(struct i915_perf_stream *stream)
 	stream->oa_buffer.vaddr = NULL;
 }
 
+static void
+free_noa_wait(struct i915_perf_stream *stream)
+{
+	i915_vma_unpin_and_release(&stream->noa_wait, 0);
+}
+
 static void
 free_oa_configs(struct i915_perf_stream *stream)
 {
@@ -1369,6 +1376,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 		oa_put_render_ctx_id(stream);
 
 	free_oa_configs(stream);
+	free_noa_wait(stream);
 
 	if (perf->spurious_report_rs.missed) {
 		DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
@@ -1529,6 +1537,206 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
 	return ret;
 }
 
+static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
+				  bool save, i915_reg_t reg, u32 offset,
+				  u32 dword_count)
+{
+	u32 cmd;
+	u32 d;
+
+	cmd = save ? MI_STORE_REGISTER_MEM : MI_LOAD_REGISTER_MEM;
+	if (INTEL_GEN(stream->perf->i915) >= 8)
+		cmd++;
+
+	for (d = 0; d < dword_count; d++) {
+		*cs++ = cmd;
+		*cs++ = i915_mmio_reg_offset(reg) + 4 * d;
+		*cs++ = intel_gt_scratch_offset(stream->engine->gt,
+					       	offset) + 4 * d;
+		*cs++ = 0;
+	}
+
+	return cs;
+}
+
+static int alloc_noa_wait(struct i915_perf_stream *stream)
+{
+	struct drm_i915_private *i915 = stream->perf->i915;
+	struct drm_i915_gem_object *bo;
+	struct i915_vma *vma;
+	const u64 delay_ticks = 0xffffffffffffffff -
+		DIV64_U64_ROUND_UP(
+			atomic64_read(&stream->perf->noa_programming_delay) *
+			RUNTIME_INFO(i915)->cs_timestamp_frequency_khz,
+			1000000ull);
+	const u32 base = stream->engine->mmio_base;
+#define CS_GPR(x) GEN8_RING_CS_GPR(base, x)
+	u32 *batch, *ts0, *cs, *jump;
+	int ret, i;
+	enum {
+		START_TS,
+		NOW_TS,
+		DELTA_TS,
+		JUMP_PREDICATE,
+		DELTA_TARGET,
+		N_CS_GPR
+	};
+
+	bo = i915_gem_object_create_internal(i915, 4096);
+	if (IS_ERR(bo)) {
+		DRM_ERROR("Failed to allocate NOA wait batchbuffer\n");
+		return PTR_ERR(bo);
+	}
+
+	/*
+	 * We pin in GGTT because we jump into this buffer now because
+	 * multiple OA config BOs will have a jump to this address and it
+	 * needs to be fixed during the lifetime of the i915/perf stream.
+	 */
+	vma = i915_gem_object_ggtt_pin(bo, NULL, 0, 0, PIN_HIGH);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_unref;
+	}
+
+	batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
+	if (IS_ERR(batch)) {
+		ret = PTR_ERR(batch);
+		goto err_unpin;
+	}
+
+	/* Save registers. */
+	for (i = 0; i < N_CS_GPR; i++)
+		cs = save_restore_register(
+			stream, cs, true /* save */, CS_GPR(i),
+			INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
+	cs = save_restore_register(
+		stream, cs, true /* save */, MI_PREDICATE_RESULT_1,
+		INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
+
+	/* First timestamp snapshot location. */
+	ts0 = cs;
+
+	/*
+	 * Initial snapshot of the timestamp register to implement the wait.
+	 * We work with 32b values, so clear out the top 32b bits of the
+	 * register because the ALU works 64bits.
+	 */
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base));
+	*cs++ = i915_mmio_reg_offset(CS_GPR(START_TS));
+
+	/*
+	 * This is the location we're going to jump back into until the
+	 * required amount of time has passed.
+	 */
+	jump = cs;
+
+	/*
+	 * Take another snapshot of the timestamp register. Take care to clear
+	 * up the top 32bits of CS_GPR(1) as we're using it for other
+	 * operations below.
+	 */
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base));
+	*cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS));
+
+	/*
+	 * Do a diff between the 2 timestamps and store the result back into
+	 * CS_GPR(1).
+	 */
+	*cs++ = MI_MATH(5);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(DELTA_TS), MI_MATH_REG_ACCU);
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF);
+
+	/*
+	 * Transfer the carry flag (set to 1 if ts1 < ts0, meaning the
+	 * timestamp have rolled over the 32bits) into the predicate register
+	 * to be used for the predicated jump.
+	 */
+	*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+	*cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
+	*cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1);
+
+	/* Restart from the beginning if we had timestamps roll over. */
+	*cs++ = (INTEL_GEN(i915) < 8 ?
+		 MI_BATCH_BUFFER_START :
+		 MI_BATCH_BUFFER_START_GEN8) |
+		MI_BATCH_PREDICATE;
+	*cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4;
+	*cs++ = 0;
+
+	/*
+	 * Now add the diff between to previous timestamps and add it to :
+	 *      (((1 * << 64) - 1) - delay_ns)
+	 *
+	 * When the Carry Flag contains 1 this means the elapsed time is
+	 * longer than the expected delay, and we can exit the wait loop.
+	 */
+	*cs++ = MI_LOAD_REGISTER_IMM(2);
+	*cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET));
+	*cs++ = lower_32_bits(delay_ticks);
+	*cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)) + 4;
+	*cs++ = upper_32_bits(delay_ticks);
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(DELTA_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(DELTA_TARGET));
+	*cs++ = MI_MATH_ADD;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF);
+
+	/*
+	 * Transfer the result into the predicate register to be used for the
+	 * predicated jump.
+	 */
+	*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+	*cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
+	*cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1);
+
+	/* Predicate the jump.  */
+	*cs++ = (INTEL_GEN(i915) < 8 ?
+		 MI_BATCH_BUFFER_START :
+		 MI_BATCH_BUFFER_START_GEN8) |
+		MI_BATCH_PREDICATE;
+	*cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4;
+	*cs++ = 0;
+
+	/* Restore registers. */
+	for (i = 0; i < N_CS_GPR; i++)
+		cs = save_restore_register(
+			stream, cs, false /* restore */, CS_GPR(i),
+			INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
+	cs = save_restore_register(
+		stream, cs, false /* restore */, MI_PREDICATE_RESULT_1,
+		INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
+
+	/* And return to the ring. */
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	GEM_BUG_ON(cs - batch > PAGE_SIZE / sizeof(*batch));
+
+	i915_gem_object_flush_map(bo);
+	i915_gem_object_unpin_map(bo);
+
+	stream->noa_wait = vma;
+	return 0;
+
+err_unpin:
+	__i915_vma_unpin(vma);
+err_unref:
+	i915_gem_object_put(bo);
+	return ret;
+}
+
 static void config_oa_regs(struct intel_uncore *uncore,
 			   const struct i915_oa_reg *regs,
 			   u32 n_regs)
@@ -2206,6 +2414,12 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		}
 	}
 
+	ret = alloc_noa_wait(stream);
+	if (ret) {
+		DRM_DEBUG("Unable to allocate NOA wait batch buffer\n");
+		goto err_noa_wait_alloc;
+	}
+
 	stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set);
 	if (!stream->oa_config) {
 		DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
@@ -2265,6 +2479,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	intel_engine_pm_put(stream->engine);
 
 err_config:
+	free_noa_wait(stream);
+
+err_noa_wait_alloc:
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
 
@@ -3650,6 +3867,9 @@ void i915_perf_init(struct drm_i915_private *i915)
 		ratelimit_set_flags(&perf->spurious_report_rs,
 				    RATELIMIT_MSG_ON_RELEASE);
 
+		atomic64_set(&perf->noa_programming_delay,
+			     500 * 1000 /* 500us */);
+
 		perf->i915 = i915;
 	}
 }
@@ -3689,3 +3909,7 @@ int i915_perf_ioctl_version(void)
 {
 	return 1;
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_perf.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index 337cd7d2ad77..d35a3c1946c3 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -266,6 +266,12 @@ struct i915_perf_stream {
 		 */
 		u32 head;
 	} oa_buffer;
+
+	/**
+	 * A batch buffer doing a wait on the GPU for the NOA logic to be
+	 * reprogrammed.
+	 */
+	struct i915_vma *noa_wait;
 };
 
 /**
@@ -385,6 +391,8 @@ struct i915_perf {
 
 	struct i915_oa_ops ops;
 	const struct i915_oa_format *oa_formats;
+
+	atomic64_t noa_programming_delay;
 };
 
 #endif /* _I915_PERF_TYPES_H_ */
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 1dc067fc57ab..99f8a08dc2b2 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -545,7 +545,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define MI_PREDICATE_SRC0_UDW	_MMIO(0x2400 + 4)
 #define MI_PREDICATE_SRC1	_MMIO(0x2408)
 #define MI_PREDICATE_SRC1_UDW	_MMIO(0x2408 + 4)
-
+#define MI_PREDICATE_DATA       _MMIO(0x2410)
+#define MI_PREDICATE_RESULT     _MMIO(0x2418)
+#define MI_PREDICATE_RESULT_1   _MMIO(0x241c)
 #define MI_PREDICATE_RESULT_2	_MMIO(0x2214)
 #define  LOWER_SLICE_ENABLED	(1 << 0)
 #define  LOWER_SLICE_DISABLED	(0 << 0)
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index 6713efea350b..6daf6599ec79 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -35,3 +35,4 @@ selftest(reset, intel_reset_live_selftests)
 selftest(hangcheck, intel_hangcheck_live_selftests)
 selftest(execlists, intel_execlists_live_selftests)
 selftest(guc, intel_guc_live_selftest)
+selftest(perf, i915_perf_live_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c
new file mode 100644
index 000000000000..dc6d689e4251
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_perf.c
@@ -0,0 +1,216 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/kref.h>
+
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_gt.h"
+
+#include "i915_selftest.h"
+
+#include "igt_flush_test.h"
+#include "lib_sw_fence.h"
+
+static struct i915_perf_stream *
+test_stream(struct i915_perf *perf)
+{
+	struct drm_i915_perf_open_param param = {};
+	struct perf_open_properties props = {
+		.engine = intel_engine_lookup_user(perf->i915,
+						   I915_ENGINE_CLASS_RENDER,
+						   0),
+		.sample_flags = SAMPLE_OA_REPORT,
+		.oa_format = I915_OA_FORMAT_C4_B8,
+		.metrics_set = 1,
+	};
+	struct i915_perf_stream *stream;
+
+	stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+	if (!stream)
+		return NULL;
+
+	stream->perf = perf;
+
+	mutex_lock(&perf->lock);
+	if (i915_oa_stream_init(stream, &param, &props)) {
+		kfree(stream);
+		stream =  NULL;
+	}
+	mutex_unlock(&perf->lock);
+
+	return stream;
+}
+
+static void stream_destroy(struct i915_perf_stream *stream)
+{
+	struct i915_perf *perf = stream->perf;
+
+	mutex_lock(&perf->lock);
+	i915_perf_destroy_locked(stream);
+	mutex_unlock(&perf->lock);
+}
+
+static int live_sanitycheck(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct i915_perf_stream *stream;
+
+	/* Quick check we can create a perf stream */
+
+	stream = test_stream(&i915->perf);
+	if (!stream)
+		return -EINVAL;
+
+	stream_destroy(stream);
+	return 0;
+}
+
+static int write_timestamp(struct i915_request *rq, int slot)
+{
+	u32 *cs;
+	int len;
+
+	cs = intel_ring_begin(rq, 6);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	len = 5;
+	if (INTEL_GEN(rq->i915) >= 8)
+		len++;
+
+	*cs++ = GFX_OP_PIPE_CONTROL(len);
+	*cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB |
+		PIPE_CONTROL_STORE_DATA_INDEX |
+		PIPE_CONTROL_WRITE_TIMESTAMP;
+	*cs++ = slot * sizeof(u32);
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static ktime_t poll_status(struct i915_request *rq, int slot)
+{
+	while (!intel_read_status_page(rq->engine, slot) &&
+	       !i915_request_completed(rq))
+		cpu_relax();
+
+	return ktime_get();
+}
+
+static int live_noa_delay(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct i915_perf_stream *stream;
+	struct i915_request *rq;
+	ktime_t t0, t1;
+	u64 expected;
+	u32 delay;
+	int err;
+	int i;
+
+	/* Check that the GPU delays matches expectations */
+
+	stream = test_stream(&i915->perf);
+	if (!stream)
+		return -ENOMEM;
+
+	expected = atomic64_read(&stream->perf->noa_programming_delay);
+
+	if (stream->engine->class != RENDER_CLASS) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	for (i = 0; i < 4; i++)
+		intel_write_status_page(stream->engine, 0x100 + i, 0);
+
+	rq = i915_request_create(stream->engine->kernel_context);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out;
+	}
+
+	if (rq->engine->emit_init_breadcrumb &&
+	    i915_request_timeline(rq)->has_initial_breadcrumb) {
+		err = rq->engine->emit_init_breadcrumb(rq);
+		if (err) {
+			i915_request_add(rq);
+			goto out;
+		}
+	}
+
+	err = write_timestamp(rq, 0x100);
+	if (err) {
+		i915_request_add(rq);
+		goto out;
+	}
+
+	err = rq->engine->emit_bb_start(rq,
+					i915_ggtt_offset(stream->noa_wait), 0,
+					I915_DISPATCH_SECURE);
+	if (err) {
+		i915_request_add(rq);
+		goto out;
+	}
+
+	err = write_timestamp(rq, 0x102);
+	if (err) {
+		i915_request_add(rq);
+		goto out;
+	}
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+
+	preempt_disable();
+	t0 = poll_status(rq, 0x100);
+	t1 = poll_status(rq, 0x102);
+	preempt_enable();
+
+	pr_info("CPU delay: %lluns, expected %lluns\n",
+		ktime_sub(t1, t0), expected);
+
+	delay = intel_read_status_page(stream->engine, 0x102);
+	delay -= intel_read_status_page(stream->engine, 0x100);
+	delay = div_u64(mul_u32_u32(delay, 1000 * 1000),
+			RUNTIME_INFO(i915)->cs_timestamp_frequency_khz);
+	pr_info("GPU delay: %uns, expected %lluns\n",
+		delay, expected);
+
+	if (4 * delay < 3 * expected || 2 * delay > 3 * expected) {
+		pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n",
+		       delay / 1000,
+		       div_u64(3 * expected, 4000),
+		       div_u64(3 * expected, 2000));
+		err = -EINVAL;
+	}
+
+	i915_request_put(rq);
+out:
+	stream_destroy(stream);
+	return err;
+}
+
+int i915_perf_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(live_sanitycheck),
+		SUBTEST(live_noa_delay),
+	};
+	struct i915_perf *perf = &i915->perf;
+
+	if (!perf->metrics_kobj || !perf->ops.enable_metric_set)
+		return 0;
+
+	if (intel_gt_is_wedged(&i915->gt))
+		return 0;
+
+	return i915_subtests(tests, i915);
+}
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [CI 6/9] drm/i915/perf: execute OA configuration from command stream
  2019-10-10 19:48 [CI 1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Chris Wilson
                   ` (3 preceding siblings ...)
  2019-10-10 19:48 ` [CI 5/9] drm/i915/perf: implement active wait for noa configurations Chris Wilson
@ 2019-10-10 19:48 ` Chris Wilson
  2019-10-10 19:48 ` [CI 7/9] drm/i915/perf: Allow dynamic reconfiguration of the OA stream Chris Wilson
                   ` (9 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2019-10-10 19:48 UTC (permalink / raw)
  To: intel-gfx

From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

We haven't run into issues with programming the global OA/NOA
registers configuration from CPU so far, but HW engineers actually
recommend doing this from the command streamer. On TGL in particular
one of the clock domain in which some of that programming goes might
not be powered when we poke things from the CPU.

Since we have a command buffer prepared for the execbuffer side of
things, we can reuse that approach here too.

This also allows us to significantly reduce the amount of time we hold
the main lock.

v2: Drop the global lock as much as possible

v3: Take global lock to pin global

v4: Create i915 request in emit_oa_config() to avoid deadlocks (Lionel)

v5: Move locking to the stream (Lionel)

v6: Move active reconfiguration request into i915_perf_stream (Lionel)

v7: Pin VMA outside request creation (Chris)
    Lock VMA before move to active (Chris)

v8: Fix double free on stream->initial_oa_config_bo (Lionel)
    Don't allow interruption when waiting on active config request
    (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_perf.c | 199 ++++++++++++++++++++++++-------
 1 file changed, 156 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index caa4ab68cea5..c37fe275cf33 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1731,56 +1731,181 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
 	return 0;
 
 err_unpin:
-	__i915_vma_unpin(vma);
+	i915_vma_unpin_and_release(&vma, 0);
 err_unref:
 	i915_gem_object_put(bo);
 	return ret;
 }
 
-static void config_oa_regs(struct intel_uncore *uncore,
-			   const struct i915_oa_reg *regs,
-			   u32 n_regs)
+static u32 *write_cs_mi_lri(u32 *cs,
+			    const struct i915_oa_reg *reg_data,
+			    u32 n_regs)
 {
 	u32 i;
 
 	for (i = 0; i < n_regs; i++) {
-		const struct i915_oa_reg *reg = regs + i;
+		if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
+			u32 n_lri = min_t(u32,
+					  n_regs - i,
+					  MI_LOAD_REGISTER_IMM_MAX_REGS);
+
+			*cs++ = MI_LOAD_REGISTER_IMM(n_lri);
+		}
+		*cs++ = i915_mmio_reg_offset(reg_data[i].addr);
+		*cs++ = reg_data[i].value;
+	}
+
+	return cs;
+}
+
+static int num_lri_dwords(int num_regs)
+{
+	int count = 0;
+
+	if (num_regs > 0) {
+		count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS);
+		count += num_regs * 2;
+	}
+
+	return count;
+}
+
+static struct i915_oa_config_bo *
+alloc_oa_config_buffer(struct i915_perf_stream *stream,
+		       struct i915_oa_config *oa_config)
+{
+	struct drm_i915_gem_object *obj;
+	struct i915_oa_config_bo *oa_bo;
+	size_t config_length = 0;
+	u32 *cs;
+	int err;
+
+	oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
+	if (!oa_bo)
+		return ERR_PTR(-ENOMEM);
+
+	config_length += num_lri_dwords(oa_config->mux_regs_len);
+	config_length += num_lri_dwords(oa_config->b_counter_regs_len);
+	config_length += num_lri_dwords(oa_config->flex_regs_len);
+	config_length++; /* MI_BATCH_BUFFER_END */
+	config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE);
+
+	obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		goto err_free;
+	}
+
+	cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_oa_bo;
+	}
 
-		intel_uncore_write(uncore, reg->addr, reg->value);
+	cs = write_cs_mi_lri(cs,
+			     oa_config->mux_regs,
+			     oa_config->mux_regs_len);
+	cs = write_cs_mi_lri(cs,
+			     oa_config->b_counter_regs,
+			     oa_config->b_counter_regs_len);
+	cs = write_cs_mi_lri(cs,
+			     oa_config->flex_regs,
+			     oa_config->flex_regs_len);
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	i915_gem_object_flush_map(obj);
+	i915_gem_object_unpin_map(obj);
+
+	oa_bo->vma = i915_vma_instance(obj,
+				       &stream->engine->gt->ggtt->vm,
+				       NULL);
+	if (IS_ERR(oa_bo->vma)) {
+		err = PTR_ERR(oa_bo->vma);
+		goto err_oa_bo;
 	}
+
+	oa_bo->oa_config = i915_oa_config_get(oa_config);
+	llist_add(&oa_bo->node, &stream->oa_config_bos);
+
+	return oa_bo;
+
+err_oa_bo:
+	i915_gem_object_put(obj);
+err_free:
+	kfree(oa_bo);
+	return ERR_PTR(err);
 }
 
-static void delay_after_mux(void)
+static struct i915_vma *
+get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
 {
+	struct i915_oa_config_bo *oa_bo;
+
 	/*
-	 * It apparently takes a fairly long time for a new MUX
-	 * configuration to be be applied after these register writes.
-	 * This delay duration was derived empirically based on the
-	 * render_basic config but hopefully it covers the maximum
-	 * configuration latency.
-	 *
-	 * As a fallback, the checks in _append_oa_reports() to skip
-	 * invalid OA reports do also seem to work to discard reports
-	 * generated before this config has completed - albeit not
-	 * silently.
-	 *
-	 * Unfortunately this is essentially a magic number, since we
-	 * don't currently know of a reliable mechanism for predicting
-	 * how long the MUX config will take to apply and besides
-	 * seeing invalid reports we don't know of a reliable way to
-	 * explicitly check that the MUX config has landed.
-	 *
-	 * It's even possible we've miss characterized the underlying
-	 * problem - it just seems like the simplest explanation why
-	 * a delay at this location would mitigate any invalid reports.
+	 * Look for the buffer in the already allocated BOs attached
+	 * to the stream.
 	 */
-	usleep_range(15000, 20000);
+	llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) {
+		if (oa_bo->oa_config == oa_config &&
+		    memcmp(oa_bo->oa_config->uuid,
+			   oa_config->uuid,
+			   sizeof(oa_config->uuid)) == 0)
+			goto out;
+	}
+
+	oa_bo = alloc_oa_config_buffer(stream, oa_config);
+	if (IS_ERR(oa_bo))
+		return ERR_CAST(oa_bo);
+
+out:
+	return i915_vma_get(oa_bo->vma);
+}
+
+static int emit_oa_config(struct i915_perf_stream *stream,
+			  struct intel_context *ce)
+{
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	int err;
+
+	vma = get_oa_vma(stream, stream->oa_config);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	if (err)
+		goto err_vma_put;
+
+	rq = i915_request_create(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_vma_unpin;
+	}
+
+	i915_vma_lock(vma);
+	err = i915_request_await_object(rq, vma->obj, 0);
+	if (!err)
+		err = i915_vma_move_to_active(vma, rq, 0);
+	i915_vma_unlock(vma);
+	if (err)
+		goto err_add_request;
+
+	err = rq->engine->emit_bb_start(rq,
+					vma->node.start, 0,
+					I915_DISPATCH_SECURE);
+err_add_request:
+	i915_request_add(rq);
+err_vma_unpin:
+	i915_vma_unpin(vma);
+err_vma_put:
+	i915_vma_put(vma);
+	return err;
 }
 
 static int hsw_enable_metric_set(struct i915_perf_stream *stream)
 {
 	struct intel_uncore *uncore = stream->uncore;
-	const struct i915_oa_config *oa_config = stream->oa_config;
 
 	/*
 	 * PRM:
@@ -1797,13 +1922,7 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream)
 	intel_uncore_rmw(uncore, GEN6_UCGCTL1,
 			 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
 
-	config_oa_regs(uncore, oa_config->mux_regs, oa_config->mux_regs_len);
-	delay_after_mux();
-
-	config_oa_regs(uncore, oa_config->b_counter_regs,
-		       oa_config->b_counter_regs_len);
-
-	return 0;
+	return emit_oa_config(stream, stream->engine->kernel_context);
 }
 
 static void hsw_disable_metric_set(struct i915_perf_stream *stream)
@@ -2167,13 +2286,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream)
 	if (ret)
 		return ret;
 
-	config_oa_regs(uncore, oa_config->mux_regs, oa_config->mux_regs_len);
-	delay_after_mux();
-
-	config_oa_regs(uncore, oa_config->b_counter_regs,
-		       oa_config->b_counter_regs_len);
-
-	return 0;
+	return emit_oa_config(stream, stream->engine->kernel_context);
 }
 
 static void gen8_disable_metric_set(struct i915_perf_stream *stream)
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [CI 7/9] drm/i915/perf: Allow dynamic reconfiguration of the OA stream
  2019-10-10 19:48 [CI 1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Chris Wilson
                   ` (4 preceding siblings ...)
  2019-10-10 19:48 ` [CI 6/9] drm/i915/perf: execute OA configuration from command stream Chris Wilson
@ 2019-10-10 19:48 ` Chris Wilson
  2019-10-10 20:22   ` [PATCH] " Chris Wilson
  2019-10-10 21:23   ` Chris Wilson
  2019-10-10 19:48 ` [CI 8/9] drm/i915/perf: allow holding preemption on filtered ctx Chris Wilson
                   ` (8 subsequent siblings)
  14 siblings, 2 replies; 20+ messages in thread
From: Chris Wilson @ 2019-10-10 19:48 UTC (permalink / raw)
  To: intel-gfx

Introduce a new perf_ioctl command to change the OA configuration of the
active stream. This allows the OA stream to be reconfigured between
batch buffers, giving greater flexibility in sampling. We inject a
request into the OA context to reconfigure the stream asynchronously on
the GPU in between and ordered with execbuffer calls.

Original patch for dynamic reconfiguration by Lionel Landwerlin.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 34 +++++++++++++++++++++++++++++++-
 include/uapi/drm/i915_drm.h      | 10 ++++++++++
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c37fe275cf33..001fb249aaec 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2860,6 +2860,28 @@ static void i915_perf_disable_locked(struct i915_perf_stream *stream)
 		stream->ops->disable(stream);
 }
 
+static int i915_perf_config_locked(struct i915_perf_stream *stream,
+				   unsigned long metrics_set)
+{
+	struct i915_oa_config *config;
+	int err = 0;
+
+	config = i915_perf_get_oa_config(stream->perf, metrics_set);
+	if (!config)
+		return -EINVAL;
+
+	if (config != stream->oa_config) {
+		if (stream->pinned_ctx)
+			err = emit_oa_config(stream, stream->pinned_ctx);
+		if (err == 0)
+			config = xchg(&stream->oa_config, config);
+	}
+
+	i915_oa_config_put(config);
+
+	return err;
+}
+
 /**
  * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
  * @stream: An i915 perf stream
@@ -2883,6 +2905,8 @@ static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
 	case I915_PERF_IOCTL_DISABLE:
 		i915_perf_disable_locked(stream);
 		return 0;
+	case I915_PERF_IOCTL_CONFIG:
+		return i915_perf_config_locked(stream, arg);
 	}
 
 	return -EINVAL;
@@ -4020,7 +4044,15 @@ void i915_perf_fini(struct drm_i915_private *i915)
  */
 int i915_perf_ioctl_version(void)
 {
-	return 1;
+	/*
+	 * 1: Initial version
+	 *   I915_PERF_IOCTL_ENABLE
+	 *   I915_PERF_IOCTL_DISABLE
+	 *
+	 * 2: Added runtime modification of OA config.
+	 *   I915_PERF_IOCTL_CONFIG
+	 */
+	return 2;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 0c7b2815fbf1..5e66f7c60261 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1932,6 +1932,16 @@ struct drm_i915_perf_open_param {
  */
 #define I915_PERF_IOCTL_DISABLE	_IO('i', 0x1)
 
+/**
+ * Change metrics_set captured by a stream.
+ *
+ * Will not take effect until the stream is restart, or upon the next
+ * execbuf when attached to a specific context.
+ *
+ * This ioctl is available in perf revision 2.
+ */
+#define I915_PERF_IOCTL_CONFIG	_IO('i', 0x2)
+
 /**
  * Common to all i915 perf records
  */
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [CI 8/9] drm/i915/perf: allow holding preemption on filtered ctx
  2019-10-10 19:48 [CI 1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Chris Wilson
                   ` (5 preceding siblings ...)
  2019-10-10 19:48 ` [CI 7/9] drm/i915/perf: Allow dynamic reconfiguration of the OA stream Chris Wilson
@ 2019-10-10 19:48 ` Chris Wilson
  2019-10-10 19:48 ` [CI 9/9] drm/i915/execlists: Prevent merging requests with conflicting flags Chris Wilson
                   ` (7 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2019-10-10 19:48 UTC (permalink / raw)
  To: intel-gfx

From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

We would like to make use of perf in Vulkan. The Vulkan API is much
lower level than OpenGL, with applications directly exposed to the
concept of command buffers (pretty much equivalent to our batch
buffers). In Vulkan, queries are always limited in scope to a command
buffer. In OpenGL, the lack of command buffer concept meant that
queries' duration could span multiple command buffers.

With that restriction gone in Vulkan, we would like to simplify
measuring performance just by measuring the deltas between the counter
snapshots written by 2 MI_RECORD_PERF_COUNT commands, rather than the
more complex scheme we currently have in the GL driver, using 2
MI_RECORD_PERF_COUNT commands and doing some post processing on the
stream of OA reports, coming from the global OA buffer, to remove any
unrelated deltas in between the 2 MI_RECORD_PERF_COUNT.

Disabling preemption only apply to a single context with which want to
query performance counters for and is considered a privileged
operation, by default protected by CAP_SYS_ADMIN. It is possible to
enable it for a normal user by disabling the paranoid stream setting.

v2: Store preemption setting in intel_context (Chris)

v3: Use priorities to avoid preemption rather than the HW mechanism

v4: Just modify the port priority reporting function

v5: Add nopreempt flag on gem context and always flag requests
    appropriately, regarless of OA reconfiguration.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.h   | 18 ++++++++++
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  3 ++
 drivers/gpu/drm/i915/i915_perf.c              | 34 +++++++++++++++++--
 drivers/gpu/drm/i915/i915_perf_types.h        |  8 +++++
 include/uapi/drm/i915_drm.h                   | 11 ++++++
 6 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 9234586830d1..cfe80590f0ed 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -114,6 +114,24 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx)
 	clear_bit(CONTEXT_USER_ENGINES, &ctx->flags);
 }
 
+static inline bool
+i915_gem_context_nopreempt(const struct i915_gem_context *ctx)
+{
+	return test_bit(CONTEXT_NOPREEMPT, &ctx->flags);
+}
+
+static inline void
+i915_gem_context_set_nopreempt(struct i915_gem_context *ctx)
+{
+	set_bit(CONTEXT_NOPREEMPT, &ctx->flags);
+}
+
+static inline void
+i915_gem_context_clear_nopreempt(struct i915_gem_context *ctx)
+{
+	clear_bit(CONTEXT_NOPREEMPT, &ctx->flags);
+}
+
 static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
 {
 	return !ctx->file_priv;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index ab8e1367dfc8..fe97b8ba4fda 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -146,6 +146,7 @@ struct i915_gem_context {
 #define CONTEXT_CLOSED			1
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	2
 #define CONTEXT_USER_ENGINES		3
+#define CONTEXT_NOPREEMPT		4
 
 	struct mutex mutex;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 98816c35ffc3..e96901888323 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2077,6 +2077,9 @@ static int eb_submit(struct i915_execbuffer *eb)
 	if (err)
 		return err;
 
+	if (i915_gem_context_nopreempt(eb->gem_context))
+		eb->request->flags |= I915_REQUEST_NOPREEMPT;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 001fb249aaec..980cec647d2d 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -344,6 +344,8 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
  * struct perf_open_properties - for validated properties given to open a stream
  * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags
  * @single_context: Whether a single or all gpu contexts should be monitored
+ * @hold_preemption: Whether the preemption is disabled for the filtered
+ *                   context
  * @ctx_handle: A gem ctx handle for use with @single_context
  * @metrics_set: An ID for an OA unit metric set advertised via sysfs
  * @oa_format: An OA unit HW report format
@@ -359,6 +361,7 @@ struct perf_open_properties {
 	u32 sample_flags;
 
 	u64 single_context:1;
+	u64 hold_preemption:1;
 	u64 ctx_handle;
 
 	/* OA sampling state */
@@ -2512,6 +2515,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	if (WARN_ON(stream->oa_buffer.format_size == 0))
 		return -EINVAL;
 
+	stream->hold_preemption = props->hold_preemption;
+
 	stream->oa_buffer.format =
 		perf->oa_formats[props->oa_format].format;
 
@@ -2832,6 +2837,9 @@ static void i915_perf_enable_locked(struct i915_perf_stream *stream)
 
 	if (stream->ops->enable)
 		stream->ops->enable(stream);
+
+	if (stream->hold_preemption)
+		i915_gem_context_set_nopreempt(stream->ctx);
 }
 
 /**
@@ -2856,6 +2864,9 @@ static void i915_perf_disable_locked(struct i915_perf_stream *stream)
 	/* Allow stream->ops->disable() to refer to this */
 	stream->enabled = false;
 
+	if (stream->hold_preemption)
+		i915_gem_context_clear_nopreempt(stream->ctx);
+
 	if (stream->ops->disable)
 		stream->ops->disable(stream);
 }
@@ -3053,6 +3064,15 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
 		}
 	}
 
+	if (props->hold_preemption) {
+		if (!props->single_context) {
+			DRM_DEBUG("preemption disable with no context\n");
+			ret = -EINVAL;
+			goto err;
+		}
+		privileged_op = true;
+	}
+
 	/*
 	 * On Haswell the OA unit supports clock gating off for a specific
 	 * context and in this mode there's no visibility of metrics for the
@@ -3067,7 +3087,7 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
 	 * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to
 	 * enable the OA unit by default.
 	 */
-	if (IS_HASWELL(perf->i915) && specific_ctx)
+	if (IS_HASWELL(perf->i915) && specific_ctx && !props->hold_preemption)
 		privileged_op = false;
 
 	/* Similar to perf's kernel.perf_paranoid_cpu sysctl option
@@ -3077,7 +3097,7 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
 	 */
 	if (privileged_op &&
 	    i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
-		DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n");
+		DRM_DEBUG("Insufficient privileges to open i915 perf stream\n");
 		ret = -EACCES;
 		goto err_ctx;
 	}
@@ -3279,6 +3299,9 @@ static int read_properties_unlocked(struct i915_perf *perf,
 			props->oa_periodic = true;
 			props->oa_period_exponent = value;
 			break;
+		case DRM_I915_PERF_PROP_HOLD_PREEMPTION:
+			props->hold_preemption = !!value;
+			break;
 		case DRM_I915_PERF_PROP_MAX:
 			MISSING_CASE(id);
 			return -EINVAL;
@@ -4051,8 +4074,13 @@ int i915_perf_ioctl_version(void)
 	 *
 	 * 2: Added runtime modification of OA config.
 	 *   I915_PERF_IOCTL_CONFIG
+	 *
+	 * 3: Add DRM_I915_PERF_PROP_HOLD_PREEMPTION parameter to hold
+	 *    preemption on a particular context so that performance data is
+	 *    accessible from a delta of MI_RPC reports without looking at the
+	 *    OA buffer.
 	 */
-	return 2;
+	return 3;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index d35a3c1946c3..7842ea31ca5a 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -170,6 +170,14 @@ struct i915_perf_stream {
 	 */
 	bool enabled;
 
+	/*8
+	 * @hold_preemption: Whether preemption is put on hold for command
+	 * submissions done on the @ctx. This is useful for some drivers that
+	 * cannot easily post process the OA buffer context to subtract delta
+	 * of performance counters not associated with @ctx.
+	 */
+	bool hold_preemption;
+
 	/**
 	 * @ops: The callbacks providing the implementation of this specific
 	 * type of configured stream.
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 5e66f7c60261..9f9f05a54e86 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1890,6 +1890,17 @@ enum drm_i915_perf_property_id {
 	 */
 	DRM_I915_PERF_PROP_OA_EXPONENT,
 
+	/**
+	 * Specifying this property is only valid when specify a context to
+	 * filter with DRM_I915_PERF_PROP_CTX_HANDLE. Specifying this property
+	 * will hold preemption of the particular context we want to gather
+	 * performance data about. The execbuf2 submissions must include a
+	 * drm_i915_gem_execbuffer_ext_perf parameter for this to apply.
+	 *
+	 * This property is available in perf revision 3.
+	 */
+	DRM_I915_PERF_PROP_HOLD_PREEMPTION,
+
 	DRM_I915_PERF_PROP_MAX /* non-ABI */
 };
 
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [CI 9/9] drm/i915/execlists: Prevent merging requests with conflicting flags
  2019-10-10 19:48 [CI 1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Chris Wilson
                   ` (6 preceding siblings ...)
  2019-10-10 19:48 ` [CI 8/9] drm/i915/perf: allow holding preemption on filtered ctx Chris Wilson
@ 2019-10-10 19:48 ` Chris Wilson
  2019-10-10 20:22 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Patchwork
                   ` (6 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2019-10-10 19:48 UTC (permalink / raw)
  To: intel-gfx

We set out-of-bound parameters inside the i915_requests.flags field,
such as disabling preemption or marking the end-of-context. We should
not coalesce consecutive requests if they have differing instructions
as we only inspect the last active request in a context. Thus if we
allow a later request to be merged into the same execution context, it
will mask any of the earlier flags.

References: 2a98f4e65bba ("drm/i915: add infrastructure to hold off preemption on a request")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 9666d51b7e97..7b43c1852776 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1184,6 +1184,9 @@ static bool can_merge_rq(const struct i915_request *prev,
 	if (i915_request_completed(next))
 		return true;
 
+	if (unlikely(prev->flags ^ next->flags) & I915_REQUEST_NOPREEMPT)
+		return false;
+
 	if (!can_merge_ctx(prev->hw_context, next->hw_context))
 		return false;
 
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm
  2019-10-10 19:48 [CI 1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Chris Wilson
                   ` (7 preceding siblings ...)
  2019-10-10 19:48 ` [CI 9/9] drm/i915/execlists: Prevent merging requests with conflicting flags Chris Wilson
@ 2019-10-10 20:22 ` Patchwork
  2019-10-10 20:53 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm (rev3) Patchwork
                   ` (5 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Patchwork @ 2019-10-10 20:22 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm
URL   : https://patchwork.freedesktop.org/series/67874/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
4aa72c4ef98d drm/i915/perf: Replace global wakeref tracking with engine-pm
de5e384f91f5 drm/i915/perf: introduce a versioning of the i915-perf uapi
ac5a1dbba730 drm/i915/perf: allow for CS OA configs to be created lazily
15679a893373 drm/i915: add support for perf configuration queries
f4fc96f15f7a drm/i915/perf: implement active wait for noa configurations
-:46: CHECK:SPACING: spaces preferred around that '<<' (ctx:VxV)
#46: FILE: drivers/gpu/drm/i915/gt/intel_gpu_commands.h:228:
+#define   PIPE_CONTROL_WRITE_TIMESTAMP			(3<<14)
                                       			  ^

-:168: ERROR:CODE_INDENT: code indent should use tabs where possible
#168: FILE: drivers/gpu/drm/i915/i915_perf.c:1555:
+^I^I^I^I^I       ^Ioffset) + 4 * d;$

-:168: WARNING:SPACE_BEFORE_TAB: please, no space before tabs
#168: FILE: drivers/gpu/drm/i915/i915_perf.c:1555:
+^I^I^I^I^I       ^Ioffset) + 4 * d;$

-:181: CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
#181: FILE: drivers/gpu/drm/i915/i915_perf.c:1568:
+		DIV64_U64_ROUND_UP(

-:215: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#215: FILE: drivers/gpu/drm/i915/i915_perf.c:1602:
+	batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);

-:223: CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
#223: FILE: drivers/gpu/drm/i915/i915_perf.c:1610:
+		cs = save_restore_register(

-:226: CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
#226: FILE: drivers/gpu/drm/i915/i915_perf.c:1613:
+	cs = save_restore_register(

-:328: CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
#328: FILE: drivers/gpu/drm/i915/i915_perf.c:1715:
+		cs = save_restore_register(

-:331: CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
#331: FILE: drivers/gpu/drm/i915/i915_perf.c:1718:
+	cs = save_restore_register(

-:448: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#448: 
new file mode 100644

-:453: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#453: FILE: drivers/gpu/drm/i915/selftests/i915_perf.c:1:
+/*

-:454: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#454: FILE: drivers/gpu/drm/i915/selftests/i915_perf.c:2:
+ * SPDX-License-Identifier: MIT

total: 1 errors, 4 warnings, 7 checks, 590 lines checked
2245bacfcea6 drm/i915/perf: execute OA configuration from command stream
def0ed974f64 drm/i915/perf: Allow dynamic reconfiguration of the OA stream
c04dded670da drm/i915/perf: allow holding preemption on filtered ctx
e7a841a6b27d drm/i915/execlists: Prevent merging requests with conflicting flags
-:14: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#14: 
References: 2a98f4e65bba ("drm/i915: add infrastructure to hold off preemption on a request")

-:14: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 2a98f4e65bba ("drm/i915: add infrastructure to hold off preemption on a request")'
#14: 
References: 2a98f4e65bba ("drm/i915: add infrastructure to hold off preemption on a request")

total: 1 errors, 1 warnings, 0 checks, 9 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] drm/i915/perf: Allow dynamic reconfiguration of the OA stream
  2019-10-10 19:48 ` [CI 7/9] drm/i915/perf: Allow dynamic reconfiguration of the OA stream Chris Wilson
@ 2019-10-10 20:22   ` Chris Wilson
  2019-10-10 21:23   ` Chris Wilson
  1 sibling, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2019-10-10 20:22 UTC (permalink / raw)
  To: intel-gfx

Introduce a new perf_ioctl command to change the OA configuration of the
active stream. This allows the OA stream to be reconfigured between
batch buffers, giving greater flexibility in sampling. We inject a
request into the OA context to reconfigure the stream asynchronously on
the GPU in between and ordered with execbuffer calls.

Original patch for dynamic reconfiguration by Lionel Landwerlin.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 46 +++++++++++++++++++++++++++++++-
 include/uapi/drm/i915_drm.h      | 10 +++++++
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c37fe275cf33..0459e66eba36 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2860,6 +2860,40 @@ static void i915_perf_disable_locked(struct i915_perf_stream *stream)
 		stream->ops->disable(stream);
 }
 
+static long i915_perf_config_locked(struct i915_perf_stream *stream,
+				    unsigned long metrics_set)
+{
+	struct i915_oa_config *config;
+	int err = 0;
+
+	config = i915_perf_get_oa_config(stream->perf, metrics_set);
+	if (!config)
+		return -EINVAL;
+
+	if (config != stream->oa_config) {
+		struct intel_context *ce;
+
+		/*
+		 * If OA is bound to a specific context, emit the
+		 * reconfiguration inline from that context. The update
+		 * will then be ordered with respect to submission on that
+		 * context.
+		 *
+		 * When set globally, we use a low priority kernel context,
+		 * so it will effectively take effect when idle.
+		 */
+		ce = stream->pinned_ctx ?: stream->engine->kernel_context;
+
+		err = emit_oa_config(stream, ce);
+		if (err == 0)
+			config = xchg(&stream->oa_config, config);
+	}
+
+	i915_oa_config_put(config);
+
+	return err;
+}
+
 /**
  * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
  * @stream: An i915 perf stream
@@ -2883,6 +2917,8 @@ static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
 	case I915_PERF_IOCTL_DISABLE:
 		i915_perf_disable_locked(stream);
 		return 0;
+	case I915_PERF_IOCTL_CONFIG:
+		return i915_perf_config_locked(stream, arg);
 	}
 
 	return -EINVAL;
@@ -4020,7 +4056,15 @@ void i915_perf_fini(struct drm_i915_private *i915)
  */
 int i915_perf_ioctl_version(void)
 {
-	return 1;
+	/*
+	 * 1: Initial version
+	 *   I915_PERF_IOCTL_ENABLE
+	 *   I915_PERF_IOCTL_DISABLE
+	 *
+	 * 2: Added runtime modification of OA config.
+	 *   I915_PERF_IOCTL_CONFIG
+	 */
+	return 2;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 0c7b2815fbf1..5e66f7c60261 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1932,6 +1932,16 @@ struct drm_i915_perf_open_param {
  */
 #define I915_PERF_IOCTL_DISABLE	_IO('i', 0x1)
 
+/**
+ * Change metrics_set captured by a stream.
+ *
+ * Will not take effect until the stream is restart, or upon the next
+ * execbuf when attached to a specific context.
+ *
+ * This ioctl is available in perf revision 2.
+ */
+#define I915_PERF_IOCTL_CONFIG	_IO('i', 0x2)
+
 /**
  * Common to all i915 perf records
  */
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH] drm/i915/perf: implement active wait for noa configurations
  2019-10-10 19:48 ` [CI 5/9] drm/i915/perf: implement active wait for noa configurations Chris Wilson
@ 2019-10-10 20:24   ` Chris Wilson
  0 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2019-10-10 20:24 UTC (permalink / raw)
  To: intel-gfx

From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

NOA configuration take some amount of time to apply. That amount of
time depends on the size of the GT. There is no documented time for
this. For example, past experimentations with powergating
configuration changes seem to indicate a 60~70us delay. We go with
500us as default for now which should be over the required amount of
time (according to HW architects).

v2: Don't forget to save/restore registers used for the wait (Chris)

v3: Name used CS_GPR registers (Chris)
    Fix compile issue due to rebase (Lionel)

v4: Fix save/restore helpers (Umesh)

v5: Move noa_wait from drm_i915_private to i915_perf_stream (Lionel)

v6: Add missing struct declarations in i915_perf.h

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4)
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> (v4)
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h  |   4 +-
 drivers/gpu/drm/i915/gt/intel_gt_types.h      |   5 +
 drivers/gpu/drm/i915/i915_debugfs.c           |  32 +++
 drivers/gpu/drm/i915/i915_perf.c              | 224 ++++++++++++++++++
 drivers/gpu/drm/i915/i915_perf_types.h        |   8 +
 drivers/gpu/drm/i915/i915_reg.h               |   4 +-
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 drivers/gpu/drm/i915/selftests/i915_perf.c    | 216 +++++++++++++++++
 8 files changed, 492 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/selftests/i915_perf.c

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 0987100c786b..8e63cffcabe0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -163,7 +163,8 @@
 #define MI_BATCH_BUFFER_START	MI_INSTR(0x31, 0)
 #define   MI_BATCH_GTT		    (2<<6) /* aliased with (1<<7) on gen4 */
 #define MI_BATCH_BUFFER_START_GEN8	MI_INSTR(0x31, 1)
-#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
+#define   MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
+#define   MI_BATCH_PREDICATE         REG_BIT(15) /* HSW+ on RCS only*/
 
 /*
  * 3D instructions used by the kernel
@@ -224,6 +225,7 @@
 #define   PIPE_CONTROL_CS_STALL				(1<<20)
 #define   PIPE_CONTROL_TLB_INVALIDATE			(1<<18)
 #define   PIPE_CONTROL_MEDIA_STATE_CLEAR		(1<<16)
+#define   PIPE_CONTROL_WRITE_TIMESTAMP			(3<<14)
 #define   PIPE_CONTROL_QW_WRITE				(1<<14)
 #define   PIPE_CONTROL_POST_SYNC_OP_MASK                (3<<14)
 #define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 802f516a3430..be4b263621c8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -109,6 +109,11 @@ enum intel_gt_scratch_field {
 	/* 8 bytes */
 	INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
 
+	/* 6 * 8 bytes */
+	INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048,
+
+	/* 4 bytes */
+	INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096,
 };
 
 #endif /* __INTEL_GT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 277f31297f29..d463a28b7475 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3590,6 +3590,37 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
 			i915_wedged_get, i915_wedged_set,
 			"%llu\n");
 
+static int
+i915_perf_noa_delay_set(void *data, u64 val)
+{
+	struct drm_i915_private *i915 = data;
+	const u32 clk = RUNTIME_INFO(i915)->cs_timestamp_frequency_khz;
+
+	/*
+	 * This would lead to infinite waits as we're doing timestamp
+	 * difference on the CS with only 32bits.
+	 */
+	if (val > mul_u32_u32(U32_MAX, clk))
+		return -EINVAL;
+
+	atomic64_set(&i915->perf.noa_programming_delay, val);
+	return 0;
+}
+
+static int
+i915_perf_noa_delay_get(void *data, u64 *val)
+{
+	struct drm_i915_private *i915 = data;
+
+	*val = atomic64_read(&i915->perf.noa_programming_delay);
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops,
+			i915_perf_noa_delay_get,
+			i915_perf_noa_delay_set,
+			"%llu\n");
+
 #define DROP_UNBOUND	BIT(0)
 #define DROP_BOUND	BIT(1)
 #define DROP_RETIRE	BIT(2)
@@ -4340,6 +4371,7 @@ static const struct i915_debugfs_files {
 	const char *name;
 	const struct file_operations *fops;
 } i915_debugfs_files[] = {
+	{"i915_perf_noa_delay", &i915_perf_noa_delay_fops},
 	{"i915_wedged", &i915_wedged_fops},
 	{"i915_cache_sharing", &i915_cache_sharing_fops},
 	{"i915_gem_drop_caches", &i915_drop_caches_fops},
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 7d7baee7febe..abb7a70e17ec 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -198,6 +198,7 @@
 #include "gem/i915_gem_context.h"
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_engine_user.h"
+#include "gt/intel_gt.h"
 #include "gt/intel_lrc_reg.h"
 
 #include "i915_drv.h"
@@ -1337,6 +1338,12 @@ free_oa_buffer(struct i915_perf_stream *stream)
 	stream->oa_buffer.vaddr = NULL;
 }
 
+static void
+free_noa_wait(struct i915_perf_stream *stream)
+{
+	i915_vma_unpin_and_release(&stream->noa_wait, 0);
+}
+
 static void
 free_oa_configs(struct i915_perf_stream *stream)
 {
@@ -1369,6 +1376,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 		oa_put_render_ctx_id(stream);
 
 	free_oa_configs(stream);
+	free_noa_wait(stream);
 
 	if (perf->spurious_report_rs.missed) {
 		DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
@@ -1529,6 +1537,206 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
 	return ret;
 }
 
+static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
+				  bool save, i915_reg_t reg, u32 offset,
+				  u32 dword_count)
+{
+	u32 cmd;
+	u32 d;
+
+	cmd = save ? MI_STORE_REGISTER_MEM : MI_LOAD_REGISTER_MEM;
+	if (INTEL_GEN(stream->perf->i915) >= 8)
+		cmd++;
+
+	for (d = 0; d < dword_count; d++) {
+		*cs++ = cmd;
+		*cs++ = i915_mmio_reg_offset(reg) + 4 * d;
+		*cs++ = intel_gt_scratch_offset(stream->engine->gt,
+						offset) + 4 * d;
+		*cs++ = 0;
+	}
+
+	return cs;
+}
+
+static int alloc_noa_wait(struct i915_perf_stream *stream)
+{
+	struct drm_i915_private *i915 = stream->perf->i915;
+	struct drm_i915_gem_object *bo;
+	struct i915_vma *vma;
+	const u64 delay_ticks = 0xffffffffffffffff -
+		DIV64_U64_ROUND_UP(
+			atomic64_read(&stream->perf->noa_programming_delay) *
+			RUNTIME_INFO(i915)->cs_timestamp_frequency_khz,
+			1000000ull);
+	const u32 base = stream->engine->mmio_base;
+#define CS_GPR(x) GEN8_RING_CS_GPR(base, x)
+	u32 *batch, *ts0, *cs, *jump;
+	int ret, i;
+	enum {
+		START_TS,
+		NOW_TS,
+		DELTA_TS,
+		JUMP_PREDICATE,
+		DELTA_TARGET,
+		N_CS_GPR
+	};
+
+	bo = i915_gem_object_create_internal(i915, 4096);
+	if (IS_ERR(bo)) {
+		DRM_ERROR("Failed to allocate NOA wait batchbuffer\n");
+		return PTR_ERR(bo);
+	}
+
+	/*
+	 * We pin in GGTT because we jump into this buffer now because
+	 * multiple OA config BOs will have a jump to this address and it
+	 * needs to be fixed during the lifetime of the i915/perf stream.
+	 */
+	vma = i915_gem_object_ggtt_pin(bo, NULL, 0, 0, PIN_HIGH);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_unref;
+	}
+
+	batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
+	if (IS_ERR(batch)) {
+		ret = PTR_ERR(batch);
+		goto err_unpin;
+	}
+
+	/* Save registers. */
+	for (i = 0; i < N_CS_GPR; i++)
+		cs = save_restore_register(
+			stream, cs, true /* save */, CS_GPR(i),
+			INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
+	cs = save_restore_register(
+		stream, cs, true /* save */, MI_PREDICATE_RESULT_1,
+		INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
+
+	/* First timestamp snapshot location. */
+	ts0 = cs;
+
+	/*
+	 * Initial snapshot of the timestamp register to implement the wait.
+	 * We work with 32b values, so clear out the top 32b bits of the
+	 * register because the ALU works 64bits.
+	 */
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base));
+	*cs++ = i915_mmio_reg_offset(CS_GPR(START_TS));
+
+	/*
+	 * This is the location we're going to jump back into until the
+	 * required amount of time has passed.
+	 */
+	jump = cs;
+
+	/*
+	 * Take another snapshot of the timestamp register. Take care to clear
+	 * up the top 32bits of CS_GPR(1) as we're using it for other
+	 * operations below.
+	 */
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base));
+	*cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS));
+
+	/*
+	 * Do a diff between the 2 timestamps and store the result back into
+	 * CS_GPR(1).
+	 */
+	*cs++ = MI_MATH(5);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(DELTA_TS), MI_MATH_REG_ACCU);
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF);
+
+	/*
+	 * Transfer the carry flag (set to 1 if ts1 < ts0, meaning the
+	 * timestamp have rolled over the 32bits) into the predicate register
+	 * to be used for the predicated jump.
+	 */
+	*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+	*cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
+	*cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1);
+
+	/* Restart from the beginning if we had timestamps roll over. */
+	*cs++ = (INTEL_GEN(i915) < 8 ?
+		 MI_BATCH_BUFFER_START :
+		 MI_BATCH_BUFFER_START_GEN8) |
+		MI_BATCH_PREDICATE;
+	*cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4;
+	*cs++ = 0;
+
+	/*
+	 * Now add the diff between to previous timestamps and add it to :
+	 *      (((1 * << 64) - 1) - delay_ns)
+	 *
+	 * When the Carry Flag contains 1 this means the elapsed time is
+	 * longer than the expected delay, and we can exit the wait loop.
+	 */
+	*cs++ = MI_LOAD_REGISTER_IMM(2);
+	*cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET));
+	*cs++ = lower_32_bits(delay_ticks);
+	*cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)) + 4;
+	*cs++ = upper_32_bits(delay_ticks);
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(DELTA_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(DELTA_TARGET));
+	*cs++ = MI_MATH_ADD;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF);
+
+	/*
+	 * Transfer the result into the predicate register to be used for the
+	 * predicated jump.
+	 */
+	*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+	*cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
+	*cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1);
+
+	/* Predicate the jump.  */
+	*cs++ = (INTEL_GEN(i915) < 8 ?
+		 MI_BATCH_BUFFER_START :
+		 MI_BATCH_BUFFER_START_GEN8) |
+		MI_BATCH_PREDICATE;
+	*cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4;
+	*cs++ = 0;
+
+	/* Restore registers. */
+	for (i = 0; i < N_CS_GPR; i++)
+		cs = save_restore_register(
+			stream, cs, false /* restore */, CS_GPR(i),
+			INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
+	cs = save_restore_register(
+		stream, cs, false /* restore */, MI_PREDICATE_RESULT_1,
+		INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
+
+	/* And return to the ring. */
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	GEM_BUG_ON(cs - batch > PAGE_SIZE / sizeof(*batch));
+
+	i915_gem_object_flush_map(bo);
+	i915_gem_object_unpin_map(bo);
+
+	stream->noa_wait = vma;
+	return 0;
+
+err_unpin:
+	__i915_vma_unpin(vma);
+err_unref:
+	i915_gem_object_put(bo);
+	return ret;
+}
+
 static void config_oa_regs(struct intel_uncore *uncore,
 			   const struct i915_oa_reg *regs,
 			   u32 n_regs)
@@ -2206,6 +2414,12 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		}
 	}
 
+	ret = alloc_noa_wait(stream);
+	if (ret) {
+		DRM_DEBUG("Unable to allocate NOA wait batch buffer\n");
+		goto err_noa_wait_alloc;
+	}
+
 	stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set);
 	if (!stream->oa_config) {
 		DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
@@ -2265,6 +2479,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	intel_engine_pm_put(stream->engine);
 
 err_config:
+	free_noa_wait(stream);
+
+err_noa_wait_alloc:
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
 
@@ -3650,6 +3867,9 @@ void i915_perf_init(struct drm_i915_private *i915)
 		ratelimit_set_flags(&perf->spurious_report_rs,
 				    RATELIMIT_MSG_ON_RELEASE);
 
+		atomic64_set(&perf->noa_programming_delay,
+			     500 * 1000 /* 500us */);
+
 		perf->i915 = i915;
 	}
 }
@@ -3689,3 +3909,7 @@ int i915_perf_ioctl_version(void)
 {
 	return 1;
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_perf.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index 337cd7d2ad77..d35a3c1946c3 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -266,6 +266,12 @@ struct i915_perf_stream {
 		 */
 		u32 head;
 	} oa_buffer;
+
+	/**
+	 * A batch buffer doing a wait on the GPU for the NOA logic to be
+	 * reprogrammed.
+	 */
+	struct i915_vma *noa_wait;
 };
 
 /**
@@ -385,6 +391,8 @@ struct i915_perf {
 
 	struct i915_oa_ops ops;
 	const struct i915_oa_format *oa_formats;
+
+	atomic64_t noa_programming_delay;
 };
 
 #endif /* _I915_PERF_TYPES_H_ */
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 1dc067fc57ab..99f8a08dc2b2 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -545,7 +545,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define MI_PREDICATE_SRC0_UDW	_MMIO(0x2400 + 4)
 #define MI_PREDICATE_SRC1	_MMIO(0x2408)
 #define MI_PREDICATE_SRC1_UDW	_MMIO(0x2408 + 4)
-
+#define MI_PREDICATE_DATA       _MMIO(0x2410)
+#define MI_PREDICATE_RESULT     _MMIO(0x2418)
+#define MI_PREDICATE_RESULT_1   _MMIO(0x241c)
 #define MI_PREDICATE_RESULT_2	_MMIO(0x2214)
 #define  LOWER_SLICE_ENABLED	(1 << 0)
 #define  LOWER_SLICE_DISABLED	(0 << 0)
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index 6713efea350b..6daf6599ec79 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -35,3 +35,4 @@ selftest(reset, intel_reset_live_selftests)
 selftest(hangcheck, intel_hangcheck_live_selftests)
 selftest(execlists, intel_execlists_live_selftests)
 selftest(guc, intel_guc_live_selftest)
+selftest(perf, i915_perf_live_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c
new file mode 100644
index 000000000000..dc6d689e4251
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_perf.c
@@ -0,0 +1,216 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/kref.h>
+
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_gt.h"
+
+#include "i915_selftest.h"
+
+#include "igt_flush_test.h"
+#include "lib_sw_fence.h"
+
+static struct i915_perf_stream *
+test_stream(struct i915_perf *perf)
+{
+	struct drm_i915_perf_open_param param = {};
+	struct perf_open_properties props = {
+		.engine = intel_engine_lookup_user(perf->i915,
+						   I915_ENGINE_CLASS_RENDER,
+						   0),
+		.sample_flags = SAMPLE_OA_REPORT,
+		.oa_format = I915_OA_FORMAT_C4_B8,
+		.metrics_set = 1,
+	};
+	struct i915_perf_stream *stream;
+
+	stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+	if (!stream)
+		return NULL;
+
+	stream->perf = perf;
+
+	mutex_lock(&perf->lock);
+	if (i915_oa_stream_init(stream, &param, &props)) {
+		kfree(stream);
+		stream =  NULL;
+	}
+	mutex_unlock(&perf->lock);
+
+	return stream;
+}
+
+static void stream_destroy(struct i915_perf_stream *stream)
+{
+	struct i915_perf *perf = stream->perf;
+
+	mutex_lock(&perf->lock);
+	i915_perf_destroy_locked(stream);
+	mutex_unlock(&perf->lock);
+}
+
+static int live_sanitycheck(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct i915_perf_stream *stream;
+
+	/* Quick check we can create a perf stream */
+
+	stream = test_stream(&i915->perf);
+	if (!stream)
+		return -EINVAL;
+
+	stream_destroy(stream);
+	return 0;
+}
+
+static int write_timestamp(struct i915_request *rq, int slot)
+{
+	u32 *cs;
+	int len;
+
+	cs = intel_ring_begin(rq, 6);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	len = 5;
+	if (INTEL_GEN(rq->i915) >= 8)
+		len++;
+
+	*cs++ = GFX_OP_PIPE_CONTROL(len);
+	*cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB |
+		PIPE_CONTROL_STORE_DATA_INDEX |
+		PIPE_CONTROL_WRITE_TIMESTAMP;
+	*cs++ = slot * sizeof(u32);
+	*cs++ = 0;
+	*cs++ = 0;
+	*cs++ = 0;
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static ktime_t poll_status(struct i915_request *rq, int slot)
+{
+	while (!intel_read_status_page(rq->engine, slot) &&
+	       !i915_request_completed(rq))
+		cpu_relax();
+
+	return ktime_get();
+}
+
+static int live_noa_delay(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct i915_perf_stream *stream;
+	struct i915_request *rq;
+	ktime_t t0, t1;
+	u64 expected;
+	u32 delay;
+	int err;
+	int i;
+
+	/* Check that the GPU delays matches expectations */
+
+	stream = test_stream(&i915->perf);
+	if (!stream)
+		return -ENOMEM;
+
+	expected = atomic64_read(&stream->perf->noa_programming_delay);
+
+	if (stream->engine->class != RENDER_CLASS) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	for (i = 0; i < 4; i++)
+		intel_write_status_page(stream->engine, 0x100 + i, 0);
+
+	rq = i915_request_create(stream->engine->kernel_context);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out;
+	}
+
+	if (rq->engine->emit_init_breadcrumb &&
+	    i915_request_timeline(rq)->has_initial_breadcrumb) {
+		err = rq->engine->emit_init_breadcrumb(rq);
+		if (err) {
+			i915_request_add(rq);
+			goto out;
+		}
+	}
+
+	err = write_timestamp(rq, 0x100);
+	if (err) {
+		i915_request_add(rq);
+		goto out;
+	}
+
+	err = rq->engine->emit_bb_start(rq,
+					i915_ggtt_offset(stream->noa_wait), 0,
+					I915_DISPATCH_SECURE);
+	if (err) {
+		i915_request_add(rq);
+		goto out;
+	}
+
+	err = write_timestamp(rq, 0x102);
+	if (err) {
+		i915_request_add(rq);
+		goto out;
+	}
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+
+	preempt_disable();
+	t0 = poll_status(rq, 0x100);
+	t1 = poll_status(rq, 0x102);
+	preempt_enable();
+
+	pr_info("CPU delay: %lluns, expected %lluns\n",
+		ktime_sub(t1, t0), expected);
+
+	delay = intel_read_status_page(stream->engine, 0x102);
+	delay -= intel_read_status_page(stream->engine, 0x100);
+	delay = div_u64(mul_u32_u32(delay, 1000 * 1000),
+			RUNTIME_INFO(i915)->cs_timestamp_frequency_khz);
+	pr_info("GPU delay: %uns, expected %lluns\n",
+		delay, expected);
+
+	if (4 * delay < 3 * expected || 2 * delay > 3 * expected) {
+		pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n",
+		       delay / 1000,
+		       div_u64(3 * expected, 4000),
+		       div_u64(3 * expected, 2000));
+		err = -EINVAL;
+	}
+
+	i915_request_put(rq);
+out:
+	stream_destroy(stream);
+	return err;
+}
+
+int i915_perf_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(live_sanitycheck),
+		SUBTEST(live_noa_delay),
+	};
+	struct i915_perf *perf = &i915->perf;
+
+	if (!perf->metrics_kobj || !perf->ops.enable_metric_set)
+		return 0;
+
+	if (intel_gt_is_wedged(&i915->gt))
+		return 0;
+
+	return i915_subtests(tests, i915);
+}
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm (rev3)
  2019-10-10 19:48 [CI 1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Chris Wilson
                   ` (8 preceding siblings ...)
  2019-10-10 20:22 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Patchwork
@ 2019-10-10 20:53 ` Patchwork
  2019-10-10 21:03 ` ✓ Fi.CI.BAT: success for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Patchwork
                   ` (4 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Patchwork @ 2019-10-10 20:53 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm (rev3)
URL   : https://patchwork.freedesktop.org/series/67874/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
e3e8e6f3790d drm/i915/perf: Replace global wakeref tracking with engine-pm
44ce06aa2ac1 drm/i915/perf: introduce a versioning of the i915-perf uapi
bb73a1117979 drm/i915/perf: allow for CS OA configs to be created lazily
5dac73e6848a drm/i915: add support for perf configuration queries
93a0ad5870f5 drm/i915/perf: implement active wait for noa configurations
-:46: CHECK:SPACING: spaces preferred around that '<<' (ctx:VxV)
#46: FILE: drivers/gpu/drm/i915/gt/intel_gpu_commands.h:228:
+#define   PIPE_CONTROL_WRITE_TIMESTAMP			(3<<14)
                                       			  ^

-:181: CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
#181: FILE: drivers/gpu/drm/i915/i915_perf.c:1568:
+		DIV64_U64_ROUND_UP(

-:215: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#215: FILE: drivers/gpu/drm/i915/i915_perf.c:1602:
+	batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);

-:223: CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
#223: FILE: drivers/gpu/drm/i915/i915_perf.c:1610:
+		cs = save_restore_register(

-:226: CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
#226: FILE: drivers/gpu/drm/i915/i915_perf.c:1613:
+	cs = save_restore_register(

-:328: CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
#328: FILE: drivers/gpu/drm/i915/i915_perf.c:1715:
+		cs = save_restore_register(

-:331: CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
#331: FILE: drivers/gpu/drm/i915/i915_perf.c:1718:
+	cs = save_restore_register(

-:448: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#448: 
new file mode 100644

-:453: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#453: FILE: drivers/gpu/drm/i915/selftests/i915_perf.c:1:
+/*

-:454: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#454: FILE: drivers/gpu/drm/i915/selftests/i915_perf.c:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 3 warnings, 7 checks, 590 lines checked
1296e9cb95c6 drm/i915/perf: execute OA configuration from command stream
db4b462f364b drm/i915/perf: Allow dynamic reconfiguration of the OA stream
1b2d6533f931 drm/i915/perf: allow holding preemption on filtered ctx
14b613a9550b drm/i915/execlists: Prevent merging requests with conflicting flags
-:14: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#14: 
References: 2a98f4e65bba ("drm/i915: add infrastructure to hold off preemption on a request")

-:14: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 2a98f4e65bba ("drm/i915: add infrastructure to hold off preemption on a request")'
#14: 
References: 2a98f4e65bba ("drm/i915: add infrastructure to hold off preemption on a request")

total: 1 errors, 1 warnings, 0 checks, 9 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm
  2019-10-10 19:48 [CI 1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Chris Wilson
                   ` (9 preceding siblings ...)
  2019-10-10 20:53 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm (rev3) Patchwork
@ 2019-10-10 21:03 ` Patchwork
  2019-10-10 21:14 ` ✓ Fi.CI.BAT: success for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm (rev3) Patchwork
                   ` (3 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Patchwork @ 2019-10-10 21:03 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm
URL   : https://patchwork.freedesktop.org/series/67874/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_7058 -> Patchwork_14758
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14758/index.html

New tests
---------

  New tests have been introduced between CI_DRM_7058 and Patchwork_14758:

### New IGT tests (1) ###

  * igt@i915_selftest@live_perf:
    - Statuses : 44 pass(s)
    - Exec time: [0.40, 2.67] s

  

Known issues
------------

  Here are the changes found in Patchwork_14758 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_render_linear_blits@basic:
    - fi-icl-u3:          [PASS][1] -> [DMESG-WARN][2] ([fdo#107724]) +2 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/fi-icl-u3/igt@gem_render_linear_blits@basic.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14758/fi-icl-u3/igt@gem_render_linear_blits@basic.html

  
#### Possible fixes ####

  * igt@gem_exec_suspend@basic:
    - {fi-icl-guc}:       [FAIL][3] ([fdo#111699]) -> [PASS][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/fi-icl-guc/igt@gem_exec_suspend@basic.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14758/fi-icl-guc/igt@gem_exec_suspend@basic.html

  * igt@gem_mmap_gtt@basic-small-bo-tiledy:
    - {fi-icl-dsi}:       [DMESG-WARN][5] ([fdo#106107]) -> [PASS][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/fi-icl-dsi/igt@gem_mmap_gtt@basic-small-bo-tiledy.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14758/fi-icl-dsi/igt@gem_mmap_gtt@basic-small-bo-tiledy.html

  * igt@gem_mmap_gtt@basic-write-cpu-read-gtt:
    - fi-icl-u3:          [DMESG-WARN][7] ([fdo#107724]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/fi-icl-u3/igt@gem_mmap_gtt@basic-write-cpu-read-gtt.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14758/fi-icl-u3/igt@gem_mmap_gtt@basic-write-cpu-read-gtt.html

  * igt@i915_selftest@live_coherency:
    - {fi-kbl-soraka}:    [INCOMPLETE][9] -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/fi-kbl-soraka/igt@i915_selftest@live_coherency.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14758/fi-kbl-soraka/igt@i915_selftest@live_coherency.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#106107]: https://bugs.freedesktop.org/show_bug.cgi?id=106107
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#109644]: https://bugs.freedesktop.org/show_bug.cgi?id=109644
  [fdo#110464]: https://bugs.freedesktop.org/show_bug.cgi?id=110464
  [fdo#111600]: https://bugs.freedesktop.org/show_bug.cgi?id=111600
  [fdo#111699]: https://bugs.freedesktop.org/show_bug.cgi?id=111699
  [fdo#111867]: https://bugs.freedesktop.org/show_bug.cgi?id=111867


Participating hosts (54 -> 47)
------------------------------

  Missing    (7): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-icl-y fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_7058 -> Patchwork_14758

  CI-20190529: 20190529
  CI_DRM_7058: ec85c0501ada08c2aea8adb7da74931a6d0ae39b @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5220: 1e38e32d721210a780198c8293a6b8c8e881df68 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_14758: e7a841a6b27d0350b1e96bbc0f210719c047cc1d @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

e7a841a6b27d drm/i915/execlists: Prevent merging requests with conflicting flags
c04dded670da drm/i915/perf: allow holding preemption on filtered ctx
def0ed974f64 drm/i915/perf: Allow dynamic reconfiguration of the OA stream
2245bacfcea6 drm/i915/perf: execute OA configuration from command stream
f4fc96f15f7a drm/i915/perf: implement active wait for noa configurations
15679a893373 drm/i915: add support for perf configuration queries
ac5a1dbba730 drm/i915/perf: allow for CS OA configs to be created lazily
de5e384f91f5 drm/i915/perf: introduce a versioning of the i915-perf uapi
4aa72c4ef98d drm/i915/perf: Replace global wakeref tracking with engine-pm

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14758/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm (rev3)
  2019-10-10 19:48 [CI 1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Chris Wilson
                   ` (10 preceding siblings ...)
  2019-10-10 21:03 ` ✓ Fi.CI.BAT: success for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Patchwork
@ 2019-10-10 21:14 ` Patchwork
  2019-10-10 22:14 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm (rev4) Patchwork
                   ` (2 subsequent siblings)
  14 siblings, 0 replies; 20+ messages in thread
From: Patchwork @ 2019-10-10 21:14 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm (rev3)
URL   : https://patchwork.freedesktop.org/series/67874/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_7058 -> Patchwork_14759
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14759/index.html

New tests
---------

  New tests have been introduced between CI_DRM_7058 and Patchwork_14759:

### New IGT tests (1) ###

  * igt@i915_selftest@live_perf:
    - Statuses : 44 pass(s)
    - Exec time: [0.39, 1.38] s

  

Known issues
------------

  Here are the changes found in Patchwork_14759 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_flink_basic@basic:
    - fi-icl-u3:          [PASS][1] -> [DMESG-WARN][2] ([fdo#107724])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/fi-icl-u3/igt@gem_flink_basic@basic.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14759/fi-icl-u3/igt@gem_flink_basic@basic.html

  * igt@kms_chamelium@hdmi-edid-read:
    - fi-kbl-7500u:       [PASS][3] -> [FAIL][4] ([fdo#109483])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/fi-kbl-7500u/igt@kms_chamelium@hdmi-edid-read.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14759/fi-kbl-7500u/igt@kms_chamelium@hdmi-edid-read.html

  
#### Possible fixes ####

  * igt@gem_exec_suspend@basic:
    - {fi-icl-guc}:       [FAIL][5] ([fdo#111699]) -> [PASS][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/fi-icl-guc/igt@gem_exec_suspend@basic.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14759/fi-icl-guc/igt@gem_exec_suspend@basic.html

  * igt@gem_mmap_gtt@basic-small-bo-tiledy:
    - {fi-icl-dsi}:       [DMESG-WARN][7] ([fdo#106107]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/fi-icl-dsi/igt@gem_mmap_gtt@basic-small-bo-tiledy.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14759/fi-icl-dsi/igt@gem_mmap_gtt@basic-small-bo-tiledy.html

  * igt@gem_mmap_gtt@basic-write-cpu-read-gtt:
    - fi-icl-u3:          [DMESG-WARN][9] ([fdo#107724]) -> [PASS][10] +1 similar issue
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/fi-icl-u3/igt@gem_mmap_gtt@basic-write-cpu-read-gtt.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14759/fi-icl-u3/igt@gem_mmap_gtt@basic-write-cpu-read-gtt.html

  
#### Warnings ####

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-kbl-7500u:       [FAIL][11] ([fdo#111407]) -> [FAIL][12] ([fdo#111045] / [fdo#111096])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14759/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#106107]: https://bugs.freedesktop.org/show_bug.cgi?id=106107
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#109483]: https://bugs.freedesktop.org/show_bug.cgi?id=109483
  [fdo#111045]: https://bugs.freedesktop.org/show_bug.cgi?id=111045
  [fdo#111096]: https://bugs.freedesktop.org/show_bug.cgi?id=111096
  [fdo#111407]: https://bugs.freedesktop.org/show_bug.cgi?id=111407
  [fdo#111600]: https://bugs.freedesktop.org/show_bug.cgi?id=111600
  [fdo#111699]: https://bugs.freedesktop.org/show_bug.cgi?id=111699


Participating hosts (54 -> 45)
------------------------------

  Missing    (9): fi-kbl-soraka fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-pnv-d510 fi-icl-y fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_7058 -> Patchwork_14759

  CI-20190529: 20190529
  CI_DRM_7058: ec85c0501ada08c2aea8adb7da74931a6d0ae39b @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5220: 1e38e32d721210a780198c8293a6b8c8e881df68 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_14759: 14b613a9550b3fdb416e7bb0c5674259616b4799 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

14b613a9550b drm/i915/execlists: Prevent merging requests with conflicting flags
1b2d6533f931 drm/i915/perf: allow holding preemption on filtered ctx
db4b462f364b drm/i915/perf: Allow dynamic reconfiguration of the OA stream
1296e9cb95c6 drm/i915/perf: execute OA configuration from command stream
93a0ad5870f5 drm/i915/perf: implement active wait for noa configurations
5dac73e6848a drm/i915: add support for perf configuration queries
bb73a1117979 drm/i915/perf: allow for CS OA configs to be created lazily
44ce06aa2ac1 drm/i915/perf: introduce a versioning of the i915-perf uapi
e3e8e6f3790d drm/i915/perf: Replace global wakeref tracking with engine-pm

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14759/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] drm/i915/perf: Allow dynamic reconfiguration of the OA stream
  2019-10-10 19:48 ` [CI 7/9] drm/i915/perf: Allow dynamic reconfiguration of the OA stream Chris Wilson
  2019-10-10 20:22   ` [PATCH] " Chris Wilson
@ 2019-10-10 21:23   ` Chris Wilson
  2019-10-11 13:25     ` Lionel Landwerlin
  1 sibling, 1 reply; 20+ messages in thread
From: Chris Wilson @ 2019-10-10 21:23 UTC (permalink / raw)
  To: intel-gfx

Introduce a new perf_ioctl command to change the OA configuration of the
active stream. This allows the OA stream to be reconfigured between
batch buffers, giving greater flexibility in sampling. We inject a
request into the OA context to reconfigure the stream asynchronously on
the GPU in between and ordered with execbuffer calls.

Original patch for dynamic reconfiguration by Lionel Landwerlin.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 49 +++++++++++++++++++++++++++++++-
 include/uapi/drm/i915_drm.h      |  9 ++++++
 2 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c2431b5a1f55..5daaf8d0bdc3 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2860,6 +2860,43 @@ static void i915_perf_disable_locked(struct i915_perf_stream *stream)
 		stream->ops->disable(stream);
 }
 
+static long i915_perf_config_locked(struct i915_perf_stream *stream,
+				    unsigned long metrics_set)
+{
+	struct i915_oa_config *config;
+	long ret = stream->oa_config->id;
+
+	config = i915_perf_get_oa_config(stream->perf, metrics_set);
+	if (!config)
+		return -EINVAL;
+
+	if (config != stream->oa_config) {
+		struct intel_context *ce;
+		int err;
+
+		/*
+		 * If OA is bound to a specific context, emit the
+		 * reconfiguration inline from that context. The update
+		 * will then be ordered with respect to submission on that
+		 * context.
+		 *
+		 * When set globally, we use a low priority kernel context,
+		 * so it will effectively take effect when idle.
+		 */
+		ce = stream->pinned_ctx ?: stream->engine->kernel_context;
+
+		err = emit_oa_config(stream, ce);
+		if (err == 0)
+			config = xchg(&stream->oa_config, config);
+		else
+			ret = err;
+	}
+
+	i915_oa_config_put(config);
+
+	return ret;
+}
+
 /**
  * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
  * @stream: An i915 perf stream
@@ -2883,6 +2920,8 @@ static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
 	case I915_PERF_IOCTL_DISABLE:
 		i915_perf_disable_locked(stream);
 		return 0;
+	case I915_PERF_IOCTL_CONFIG:
+		return i915_perf_config_locked(stream, arg);
 	}
 
 	return -EINVAL;
@@ -4020,7 +4059,15 @@ void i915_perf_fini(struct drm_i915_private *i915)
  */
 int i915_perf_ioctl_version(void)
 {
-	return 1;
+	/*
+	 * 1: Initial version
+	 *   I915_PERF_IOCTL_ENABLE
+	 *   I915_PERF_IOCTL_DISABLE
+	 *
+	 * 2: Added runtime modification of OA config.
+	 *   I915_PERF_IOCTL_CONFIG
+	 */
+	return 2;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 0c7b2815fbf1..0a44438c8fbb 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1932,6 +1932,15 @@ struct drm_i915_perf_open_param {
  */
 #define I915_PERF_IOCTL_DISABLE	_IO('i', 0x1)
 
+/**
+ * Change metrics_set captured by a stream.
+ *
+ * Returns the previously bound metrics set id, or a negative error code.
+ *
+ * This ioctl is available in perf revision 2.
+ */
+#define I915_PERF_IOCTL_CONFIG	_IO('i', 0x2)
+
 /**
  * Common to all i915 perf records
  */
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm (rev4)
  2019-10-10 19:48 [CI 1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Chris Wilson
                   ` (11 preceding siblings ...)
  2019-10-10 21:14 ` ✓ Fi.CI.BAT: success for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm (rev3) Patchwork
@ 2019-10-10 22:14 ` Patchwork
  2019-10-10 22:47 ` ✓ Fi.CI.BAT: success " Patchwork
  2019-10-11 10:54 ` ✗ Fi.CI.IGT: failure " Patchwork
  14 siblings, 0 replies; 20+ messages in thread
From: Patchwork @ 2019-10-10 22:14 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm (rev4)
URL   : https://patchwork.freedesktop.org/series/67874/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
8bb740107b70 drm/i915/perf: Replace global wakeref tracking with engine-pm
2025bc9c090b drm/i915/perf: introduce a versioning of the i915-perf uapi
c42664d0f2c6 drm/i915/perf: allow for CS OA configs to be created lazily
4e283719a084 drm/i915: add support for perf configuration queries
b8b9dc4807cc drm/i915/perf: implement active wait for noa configurations
-:46: CHECK:SPACING: spaces preferred around that '<<' (ctx:VxV)
#46: FILE: drivers/gpu/drm/i915/gt/intel_gpu_commands.h:228:
+#define   PIPE_CONTROL_WRITE_TIMESTAMP			(3<<14)
                                       			  ^

-:181: CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
#181: FILE: drivers/gpu/drm/i915/i915_perf.c:1568:
+		DIV64_U64_ROUND_UP(

-:215: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#215: FILE: drivers/gpu/drm/i915/i915_perf.c:1602:
+	batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);

-:223: CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
#223: FILE: drivers/gpu/drm/i915/i915_perf.c:1610:
+		cs = save_restore_register(

-:226: CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
#226: FILE: drivers/gpu/drm/i915/i915_perf.c:1613:
+	cs = save_restore_register(

-:328: CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
#328: FILE: drivers/gpu/drm/i915/i915_perf.c:1715:
+		cs = save_restore_register(

-:331: CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
#331: FILE: drivers/gpu/drm/i915/i915_perf.c:1718:
+	cs = save_restore_register(

-:448: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#448: 
new file mode 100644

-:453: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#453: FILE: drivers/gpu/drm/i915/selftests/i915_perf.c:1:
+/*

-:454: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#454: FILE: drivers/gpu/drm/i915/selftests/i915_perf.c:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 3 warnings, 7 checks, 590 lines checked
729178efb761 drm/i915/perf: execute OA configuration from command stream
161ef9ac4e5a drm/i915/perf: Allow dynamic reconfiguration of the OA stream
ff1133b4b2ff drm/i915/perf: allow holding preemption on filtered ctx
a8604dba22d6 drm/i915/execlists: Prevent merging requests with conflicting flags
-:14: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#14: 
References: 2a98f4e65bba ("drm/i915: add infrastructure to hold off preemption on a request")

-:14: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 2a98f4e65bba ("drm/i915: add infrastructure to hold off preemption on a request")'
#14: 
References: 2a98f4e65bba ("drm/i915: add infrastructure to hold off preemption on a request")

total: 1 errors, 1 warnings, 0 checks, 9 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm (rev4)
  2019-10-10 19:48 [CI 1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Chris Wilson
                   ` (12 preceding siblings ...)
  2019-10-10 22:14 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm (rev4) Patchwork
@ 2019-10-10 22:47 ` Patchwork
  2019-10-11 10:54 ` ✗ Fi.CI.IGT: failure " Patchwork
  14 siblings, 0 replies; 20+ messages in thread
From: Patchwork @ 2019-10-10 22:47 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm (rev4)
URL   : https://patchwork.freedesktop.org/series/67874/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_7058 -> Patchwork_14761
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/index.html

New tests
---------

  New tests have been introduced between CI_DRM_7058 and Patchwork_14761:

### New IGT tests (1) ###

  * igt@i915_selftest@live_perf:
    - Statuses : 44 pass(s)
    - Exec time: [0.43, 2.56] s

  

Known issues
------------

  Here are the changes found in Patchwork_14761 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_ctx_switch@rcs0:
    - fi-bxt-dsi:         [PASS][1] -> [INCOMPLETE][2] ([fdo#103927])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/fi-bxt-dsi/igt@gem_ctx_switch@rcs0.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/fi-bxt-dsi/igt@gem_ctx_switch@rcs0.html

  * igt@prime_vgem@basic-fence-read:
    - fi-icl-u3:          [PASS][3] -> [DMESG-WARN][4] ([fdo#107724]) +1 similar issue
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/fi-icl-u3/igt@prime_vgem@basic-fence-read.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/fi-icl-u3/igt@prime_vgem@basic-fence-read.html

  
#### Possible fixes ####

  * igt@gem_exec_suspend@basic:
    - {fi-icl-guc}:       [FAIL][5] ([fdo#111699]) -> [PASS][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/fi-icl-guc/igt@gem_exec_suspend@basic.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/fi-icl-guc/igt@gem_exec_suspend@basic.html

  * igt@gem_mmap_gtt@basic-small-bo-tiledy:
    - {fi-icl-dsi}:       [DMESG-WARN][7] ([fdo#106107]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/fi-icl-dsi/igt@gem_mmap_gtt@basic-small-bo-tiledy.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/fi-icl-dsi/igt@gem_mmap_gtt@basic-small-bo-tiledy.html

  * igt@gem_mmap_gtt@basic-write-cpu-read-gtt:
    - fi-icl-u3:          [DMESG-WARN][9] ([fdo#107724]) -> [PASS][10] +1 similar issue
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/fi-icl-u3/igt@gem_mmap_gtt@basic-write-cpu-read-gtt.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/fi-icl-u3/igt@gem_mmap_gtt@basic-write-cpu-read-gtt.html

  * igt@i915_selftest@live_coherency:
    - {fi-kbl-soraka}:    [INCOMPLETE][11] -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/fi-kbl-soraka/igt@i915_selftest@live_coherency.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/fi-kbl-soraka/igt@i915_selftest@live_coherency.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#106107]: https://bugs.freedesktop.org/show_bug.cgi?id=106107
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#111045]: https://bugs.freedesktop.org/show_bug.cgi?id=111045
  [fdo#111096]: https://bugs.freedesktop.org/show_bug.cgi?id=111096
  [fdo#111699]: https://bugs.freedesktop.org/show_bug.cgi?id=111699


Participating hosts (54 -> 46)
------------------------------

  Missing    (8): fi-ilk-m540 fi-hsw-4200u fi-bsw-n3050 fi-byt-squawks fi-bsw-cyan fi-icl-y fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_7058 -> Patchwork_14761

  CI-20190529: 20190529
  CI_DRM_7058: ec85c0501ada08c2aea8adb7da74931a6d0ae39b @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5220: 1e38e32d721210a780198c8293a6b8c8e881df68 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_14761: a8604dba22d680da43d834f586dba412a6b74d0f @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

a8604dba22d6 drm/i915/execlists: Prevent merging requests with conflicting flags
ff1133b4b2ff drm/i915/perf: allow holding preemption on filtered ctx
161ef9ac4e5a drm/i915/perf: Allow dynamic reconfiguration of the OA stream
729178efb761 drm/i915/perf: execute OA configuration from command stream
b8b9dc4807cc drm/i915/perf: implement active wait for noa configurations
4e283719a084 drm/i915: add support for perf configuration queries
c42664d0f2c6 drm/i915/perf: allow for CS OA configs to be created lazily
2025bc9c090b drm/i915/perf: introduce a versioning of the i915-perf uapi
8bb740107b70 drm/i915/perf: Replace global wakeref tracking with engine-pm

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* ✗ Fi.CI.IGT: failure for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm (rev4)
  2019-10-10 19:48 [CI 1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Chris Wilson
                   ` (13 preceding siblings ...)
  2019-10-10 22:47 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2019-10-11 10:54 ` Patchwork
  14 siblings, 0 replies; 20+ messages in thread
From: Patchwork @ 2019-10-11 10:54 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm (rev4)
URL   : https://patchwork.freedesktop.org/series/67874/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_7058_full -> Patchwork_14761_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_14761_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_14761_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_14761_full:

### IGT changes ###

#### Possible regressions ####

  * igt@kms_fbcon_fbt@psr-suspend:
    - shard-iclb:         [PASS][1] -> [DMESG-WARN][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-iclb7/igt@kms_fbcon_fbt@psr-suspend.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-iclb3/igt@kms_fbcon_fbt@psr-suspend.html

  
New tests
---------

  New tests have been introduced between CI_DRM_7058_full and Patchwork_14761_full:

### New IGT tests (1) ###

  * igt@i915_selftest@live_perf:
    - Statuses : 7 pass(s)
    - Exec time: [0.35, 2.54] s

  

Known issues
------------

  Here are the changes found in Patchwork_14761_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_ctx_shared@exec-shared-gtt-bsd2:
    - shard-iclb:         [PASS][3] -> [SKIP][4] ([fdo#109276]) +10 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-iclb4/igt@gem_ctx_shared@exec-shared-gtt-bsd2.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-iclb8/igt@gem_ctx_shared@exec-shared-gtt-bsd2.html

  * igt@gem_ctx_shared@exec-single-timeline-bsd:
    - shard-iclb:         [PASS][5] -> [SKIP][6] ([fdo#110841])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-iclb6/igt@gem_ctx_shared@exec-single-timeline-bsd.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-iclb2/igt@gem_ctx_shared@exec-single-timeline-bsd.html

  * igt@gem_eio@in-flight-contexts-immediate:
    - shard-snb:          [PASS][7] -> [FAIL][8] ([fdo#111925])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-snb4/igt@gem_eio@in-flight-contexts-immediate.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-snb5/igt@gem_eio@in-flight-contexts-immediate.html

  * igt@gem_exec_schedule@preempt-other-chain-bsd:
    - shard-iclb:         [PASS][9] -> [SKIP][10] ([fdo#111325]) +7 similar issues
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-iclb3/igt@gem_exec_schedule@preempt-other-chain-bsd.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-iclb4/igt@gem_exec_schedule@preempt-other-chain-bsd.html

  * igt@gem_fence_thrash@bo-write-verify-threaded-none:
    - shard-hsw:          [PASS][11] -> [INCOMPLETE][12] ([fdo#103540])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-hsw7/igt@gem_fence_thrash@bo-write-verify-threaded-none.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-hsw8/igt@gem_fence_thrash@bo-write-verify-threaded-none.html

  * igt@gem_softpin@noreloc-s3:
    - shard-skl:          [PASS][13] -> [INCOMPLETE][14] ([fdo#104108])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-skl6/igt@gem_softpin@noreloc-s3.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-skl7/igt@gem_softpin@noreloc-s3.html

  * igt@gem_userptr_blits@sync-unmap:
    - shard-hsw:          [PASS][15] -> [DMESG-WARN][16] ([fdo#111870])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-hsw6/igt@gem_userptr_blits@sync-unmap.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-hsw8/igt@gem_userptr_blits@sync-unmap.html

  * igt@gem_userptr_blits@sync-unmap-cycles:
    - shard-snb:          [PASS][17] -> [DMESG-WARN][18] ([fdo#111870]) +1 similar issue
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-snb5/igt@gem_userptr_blits@sync-unmap-cycles.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-snb6/igt@gem_userptr_blits@sync-unmap-cycles.html

  * igt@kms_busy@basic-flip-a:
    - shard-kbl:          [PASS][19] -> [DMESG-WARN][20] ([fdo#106107])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-kbl6/igt@kms_busy@basic-flip-a.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-kbl7/igt@kms_busy@basic-flip-a.html

  * igt@kms_cursor_crc@pipe-b-cursor-256x256-offscreen:
    - shard-skl:          [PASS][21] -> [FAIL][22] ([fdo#103232])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-skl8/igt@kms_cursor_crc@pipe-b-cursor-256x256-offscreen.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-skl10/igt@kms_cursor_crc@pipe-b-cursor-256x256-offscreen.html

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-indfb-draw-mmap-gtt:
    - shard-iclb:         [PASS][23] -> [FAIL][24] ([fdo#103167]) +5 similar issues
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-iclb4/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-indfb-draw-mmap-gtt.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-iclb4/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-indfb-draw-mmap-gtt.html

  * igt@kms_frontbuffer_tracking@fbc-suspend:
    - shard-apl:          [PASS][25] -> [DMESG-WARN][26] ([fdo#108566]) +5 similar issues
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-apl8/igt@kms_frontbuffer_tracking@fbc-suspend.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-apl4/igt@kms_frontbuffer_tracking@fbc-suspend.html

  * igt@kms_plane_alpha_blend@pipe-c-coverage-7efc:
    - shard-skl:          [PASS][27] -> [FAIL][28] ([fdo#108145] / [fdo#110403]) +1 similar issue
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-skl4/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-skl1/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html

  * igt@kms_psr2_su@page_flip:
    - shard-iclb:         [PASS][29] -> [SKIP][30] ([fdo#109642] / [fdo#111068])
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-iclb2/igt@kms_psr2_su@page_flip.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-iclb8/igt@kms_psr2_su@page_flip.html

  * igt@kms_setmode@basic:
    - shard-apl:          [PASS][31] -> [FAIL][32] ([fdo#99912])
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-apl1/igt@kms_setmode@basic.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-apl1/igt@kms_setmode@basic.html

  
#### Possible fixes ####

  * igt@gem_exec_schedule@reorder-wide-bsd:
    - shard-iclb:         [SKIP][33] ([fdo#111325]) -> [PASS][34] +3 similar issues
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-iclb4/igt@gem_exec_schedule@reorder-wide-bsd.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-iclb7/igt@gem_exec_schedule@reorder-wide-bsd.html

  * igt@gem_sync@basic-many-each:
    - shard-iclb:         [INCOMPLETE][35] ([fdo#107713] / [fdo#109100]) -> [PASS][36]
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-iclb1/igt@gem_sync@basic-many-each.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-iclb6/igt@gem_sync@basic-many-each.html

  * igt@gem_tiled_swapping@non-threaded:
    - shard-hsw:          [INCOMPLETE][37] ([fdo#103540] / [fdo#108686]) -> [PASS][38]
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-hsw2/igt@gem_tiled_swapping@non-threaded.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-hsw2/igt@gem_tiled_swapping@non-threaded.html

  * igt@gem_userptr_blits@dmabuf-unsync:
    - shard-snb:          [DMESG-WARN][39] ([fdo#111870]) -> [PASS][40]
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-snb7/igt@gem_userptr_blits@dmabuf-unsync.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-snb1/igt@gem_userptr_blits@dmabuf-unsync.html

  * igt@kms_cursor_crc@pipe-c-cursor-suspend:
    - shard-apl:          [DMESG-WARN][41] ([fdo#108566]) -> [PASS][42] +3 similar issues
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-apl8/igt@kms_cursor_crc@pipe-c-cursor-suspend.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-apl4/igt@kms_cursor_crc@pipe-c-cursor-suspend.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible:
    - shard-skl:          [FAIL][43] ([fdo#105363]) -> [PASS][44]
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-skl10/igt@kms_flip@flip-vs-expired-vblank-interruptible.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-skl2/igt@kms_flip@flip-vs-expired-vblank-interruptible.html

  * igt@kms_flip@flip-vs-suspend:
    - shard-snb:          [DMESG-WARN][45] ([fdo#102365]) -> [PASS][46]
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-snb4/igt@kms_flip@flip-vs-suspend.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-snb7/igt@kms_flip@flip-vs-suspend.html
    - {shard-tglb}:       [INCOMPLETE][47] ([fdo#111714]) -> [PASS][48]
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-tglb1/igt@kms_flip@flip-vs-suspend.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-tglb6/igt@kms_flip@flip-vs-suspend.html

  * igt@kms_flip@flip-vs-suspend-interruptible:
    - shard-skl:          [INCOMPLETE][49] ([fdo#109507]) -> [PASS][50]
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-skl1/igt@kms_flip@flip-vs-suspend-interruptible.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-skl7/igt@kms_flip@flip-vs-suspend-interruptible.html

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-shrfb-plflip-blt:
    - shard-iclb:         [FAIL][51] ([fdo#103167]) -> [PASS][52] +6 similar issues
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-iclb6/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-shrfb-plflip-blt.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-iclb2/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-shrfb-plflip-blt.html

  * igt@kms_frontbuffer_tracking@fbcpsr-farfromfence:
    - shard-iclb:         [FAIL][53] ([fdo#109247]) -> [PASS][54] +4 similar issues
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-iclb2/igt@kms_frontbuffer_tracking@fbcpsr-farfromfence.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-iclb8/igt@kms_frontbuffer_tracking@fbcpsr-farfromfence.html

  * igt@kms_psr@psr2_basic:
    - shard-iclb:         [SKIP][55] ([fdo#109441]) -> [PASS][56] +1 similar issue
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-iclb6/igt@kms_psr@psr2_basic.html
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-iclb2/igt@kms_psr@psr2_basic.html

  * igt@kms_setmode@basic:
    - shard-hsw:          [FAIL][57] ([fdo#99912]) -> [PASS][58]
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-hsw4/igt@kms_setmode@basic.html
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-hsw1/igt@kms_setmode@basic.html

  * igt@perf@blocking:
    - shard-skl:          [FAIL][59] ([fdo#110728]) -> [PASS][60]
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-skl4/igt@perf@blocking.html
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-skl1/igt@perf@blocking.html

  * igt@prime_vgem@fence-wait-bsd2:
    - shard-iclb:         [SKIP][61] ([fdo#109276]) -> [PASS][62] +18 similar issues
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-iclb6/igt@prime_vgem@fence-wait-bsd2.html
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-iclb2/igt@prime_vgem@fence-wait-bsd2.html

  
#### Warnings ####

  * igt@gem_ctx_isolation@vcs1-nonpriv:
    - shard-iclb:         [SKIP][63] ([fdo#109276]) -> [FAIL][64] ([fdo#111329])
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-iclb6/igt@gem_ctx_isolation@vcs1-nonpriv.html
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-iclb2/igt@gem_ctx_isolation@vcs1-nonpriv.html

  * igt@gem_mocs_settings@mocs-rc6-bsd2:
    - shard-iclb:         [FAIL][65] ([fdo#111330]) -> [SKIP][66] ([fdo#109276])
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-iclb4/igt@gem_mocs_settings@mocs-rc6-bsd2.html
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-iclb8/igt@gem_mocs_settings@mocs-rc6-bsd2.html

  * igt@gem_mocs_settings@mocs-reset-bsd2:
    - shard-iclb:         [SKIP][67] ([fdo#109276]) -> [FAIL][68] ([fdo#111330]) +1 similar issue
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-iclb3/igt@gem_mocs_settings@mocs-reset-bsd2.html
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-iclb1/igt@gem_mocs_settings@mocs-reset-bsd2.html

  * igt@gem_pwrite@huge-cpu-fbr:
    - shard-iclb:         [INCOMPLETE][69] ([fdo#107713]) -> [SKIP][70] ([fdo#109290])
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-iclb7/igt@gem_pwrite@huge-cpu-fbr.html
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-iclb7/igt@gem_pwrite@huge-cpu-fbr.html

  * igt@kms_psr@psr2_suspend:
    - shard-iclb:         [DMESG-WARN][71] ([fdo#107724]) -> [SKIP][72] ([fdo#109441])
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7058/shard-iclb2/igt@kms_psr@psr2_suspend.html
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/shard-iclb5/igt@kms_psr@psr2_suspend.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#102365]: https://bugs.freedesktop.org/show_bug.cgi?id=102365
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103540]: https://bugs.freedesktop.org/show_bug.cgi?id=103540
  [fdo#104108]: https://bugs.freedesktop.org/show_bug.cgi?id=104108
  [fdo#105363]: https://bugs.freedesktop.org/show_bug.cgi?id=105363
  [fdo#106107]: https://bugs.freedesktop.org/show_bug.cgi?id=106107
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108566]: https://bugs.freedesktop.org/show_bug.cgi?id=108566
  [fdo#108686]: https://bugs.freedesktop.org/show_bug.cgi?id=108686
  [fdo#109100]: https://bugs.freedesktop.org/show_bug.cgi?id=109100
  [fdo#109247]: https://bugs.freedesktop.org/show_bug.cgi?id=109247
  [fdo#109276]: https://bugs.freedesktop.org/show_bug.cgi?id=109276
  [fdo#109290]: https://bugs.freedesktop.org/show_bug.cgi?id=109290
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#109507]: https://bugs.freedesktop.org/show_bug.cgi?id=109507
  [fdo#109642]: https://bugs.freedesktop.org/show_bug.cgi?id=109642
  [fdo#110403]: https://bugs.freedesktop.org/show_bug.cgi?id=110403
  [fdo#110728]: https://bugs.freedesktop.org/show_bug.cgi?id=110728
  [fdo#110841]: https://bugs.freedesktop.org/show_bug.cgi?id=110841
  [fdo#111068]: https://bugs.freedesktop.org/show_bug.cgi?id=111068
  [fdo#111325]: https://bugs.freedesktop.org/show_bug.cgi?id=111325
  [fdo#111329]: https://bugs.freedesktop.org/show_bug.cgi?id=111329
  [fdo#111330]: https://bugs.freedesktop.org/show_bug.cgi?id=111330
  [fdo#111646]: https://bugs.freedesktop.org/show_bug.cgi?id=111646
  [fdo#111671]: https://bugs.freedesktop.org/show_bug.cgi?id=111671
  [fdo#111714]: https://bugs.freedesktop.org/show_bug.cgi?id=111714
  [fdo#111870]: https://bugs.freedesktop.org/show_bug.cgi?id=111870
  [fdo#111925]: https://bugs.freedesktop.org/show_bug.cgi?id=111925
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912


Participating hosts (11 -> 11)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_7058 -> Patchwork_14761

  CI-20190529: 20190529
  CI_DRM_7058: ec85c0501ada08c2aea8adb7da74931a6d0ae39b @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5220: 1e38e32d721210a780198c8293a6b8c8e881df68 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_14761: a8604dba22d680da43d834f586dba412a6b74d0f @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14761/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] drm/i915/perf: Allow dynamic reconfiguration of the OA stream
  2019-10-10 21:23   ` Chris Wilson
@ 2019-10-11 13:25     ` Lionel Landwerlin
  0 siblings, 0 replies; 20+ messages in thread
From: Lionel Landwerlin @ 2019-10-11 13:25 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 11/10/2019 00:23, Chris Wilson wrote:
> Introduce a new perf_ioctl command to change the OA configuration of the
> active stream. This allows the OA stream to be reconfigured between
> batch buffers, giving greater flexibility in sampling. We inject a
> request into the OA context to reconfigure the stream asynchronously on
> the GPU in between and ordered with execbuffer calls.
>
> Original patch for dynamic reconfiguration by Lionel Landwerlin.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

Updated https://gitlab.freedesktop.org/mesa/mesa/merge_requests/932 with 
this new uAPI, it works fine :)


-Lionel

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2019-10-11 13:24 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-10 19:48 [CI 1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Chris Wilson
2019-10-10 19:48 ` [CI 2/9] drm/i915/perf: introduce a versioning of the i915-perf uapi Chris Wilson
2019-10-10 19:48 ` [CI 3/9] drm/i915/perf: allow for CS OA configs to be created lazily Chris Wilson
2019-10-10 19:48 ` [CI 4/9] drm/i915: add support for perf configuration queries Chris Wilson
2019-10-10 19:48 ` [CI 5/9] drm/i915/perf: implement active wait for noa configurations Chris Wilson
2019-10-10 20:24   ` [PATCH] " Chris Wilson
2019-10-10 19:48 ` [CI 6/9] drm/i915/perf: execute OA configuration from command stream Chris Wilson
2019-10-10 19:48 ` [CI 7/9] drm/i915/perf: Allow dynamic reconfiguration of the OA stream Chris Wilson
2019-10-10 20:22   ` [PATCH] " Chris Wilson
2019-10-10 21:23   ` Chris Wilson
2019-10-11 13:25     ` Lionel Landwerlin
2019-10-10 19:48 ` [CI 8/9] drm/i915/perf: allow holding preemption on filtered ctx Chris Wilson
2019-10-10 19:48 ` [CI 9/9] drm/i915/execlists: Prevent merging requests with conflicting flags Chris Wilson
2019-10-10 20:22 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Patchwork
2019-10-10 20:53 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm (rev3) Patchwork
2019-10-10 21:03 ` ✓ Fi.CI.BAT: success for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm Patchwork
2019-10-10 21:14 ` ✓ Fi.CI.BAT: success for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm (rev3) Patchwork
2019-10-10 22:14 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [CI,1/9] drm/i915/perf: Replace global wakeref tracking with engine-pm (rev4) Patchwork
2019-10-10 22:47 ` ✓ Fi.CI.BAT: success " Patchwork
2019-10-11 10:54 ` ✗ Fi.CI.IGT: failure " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.