All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH v11 05/10] drm/i915/perf: allow for CS OA configs to be created lazily
Date: Wed, 28 Aug 2019 17:33:22 +0300	[thread overview]
Message-ID: <20190828143327.7965-6-lionel.g.landwerlin@intel.com> (raw)
In-Reply-To: <20190828143327.7965-1-lionel.g.landwerlin@intel.com>

Here we introduce a mechanism by which the execbuf part of the i915
driver will be able to request that a batch buffer containing the
programming for a particular OA config be created.

We'll execute these OA configuration buffers right before executing a
set of userspace commands so that a particular user batchbuffer be
executed with a given OA configuration.

This mechanism essentially allows the userspace driver to go through
several OA configuration without having to open/close the i915/perf
stream.

v2: No need for locking on object OA config object creation (Chris)
    Flush cpu mapping of OA config (Chris)

v3: Properly deal with the perf_metric lock (Chris/Lionel)

v4: Fix oa config unref/put when not found (Lionel)

v5: Allocate BOs for configurations on the stream instead of globally
    (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4)
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |   1 +
 drivers/gpu/drm/i915/i915_drv.h              |  14 +-
 drivers/gpu/drm/i915/i915_perf.c             | 241 ++++++++++++++++---
 drivers/gpu/drm/i915/i915_perf.h             |  22 ++
 4 files changed, 237 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 86e00a2db8a4..a7f1377a54a2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -133,6 +133,7 @@
  */
 #define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
 #define   MI_LRI_FORCE_POSTED		(1<<12)
+#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
 #define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
 #define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
 #define   MI_SRM_LRM_GLOBAL_GTT		(1<<22)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b42651a387d9..d8a1e842fb48 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -989,6 +989,8 @@ struct i915_oa_reg {
 };
 
 struct i915_oa_config {
+	struct drm_i915_private *i915;
+
 	char uuid[UUID_STRING_LEN + 1];
 	int id;
 
@@ -1003,7 +1005,7 @@ struct i915_oa_config {
 	struct attribute *attrs[2];
 	struct device_attribute sysfs_metric_id;
 
-	atomic_t ref_count;
+	struct kref ref;
 };
 
 struct i915_perf_stream;
@@ -1130,6 +1132,12 @@ struct i915_perf_stream {
 	 */
 	struct i915_oa_config *oa_config;
 
+	/**
+	 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
+	 * each time @oa_config changes.
+	 */
+	struct list_head oa_config_bos;
+
 	/**
 	 * The OA context specific information.
 	 */
@@ -1660,8 +1668,8 @@ struct drm_i915_private {
 		struct mutex metrics_lock;
 
 		/*
-		 * List of dynamic configurations, you need to hold
-		 * dev_priv->perf.metrics_lock to access it.
+		 * List of dynamic configurations (struct i915_oa_config), you
+		 * need to hold dev_priv->perf.metrics_lock to access it.
 		 */
 		struct idr metrics_idr;
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 47fb6f6f2065..0385abce7baa 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -365,11 +365,19 @@ struct perf_open_properties {
 	int oa_period_exponent;
 };
 
+struct i915_oa_config_bo {
+	struct list_head link;
+
+	struct i915_oa_config *oa_config;
+	struct drm_i915_gem_object *bo;
+};
+
 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
 
-static void free_oa_config(struct drm_i915_private *dev_priv,
-			   struct i915_oa_config *oa_config)
+void i915_oa_config_release(struct kref *ref)
 {
+	struct i915_oa_config *oa_config = container_of(ref, typeof(*oa_config), ref);
+
 	if (!PTR_ERR(oa_config->flex_regs))
 		kfree(oa_config->flex_regs);
 	if (!PTR_ERR(oa_config->b_counter_regs))
@@ -379,40 +387,173 @@ static void free_oa_config(struct drm_i915_private *dev_priv,
 	kfree(oa_config);
 }
 
-static void put_oa_config(struct drm_i915_private *dev_priv,
-			  struct i915_oa_config *oa_config)
+static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 n_regs)
 {
-	if (!atomic_dec_and_test(&oa_config->ref_count))
-		return;
+	u32 i;
+
+	for (i = 0; i < n_regs; i++) {
+		if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
+			u32 n_lri = min(n_regs - i,
+					(u32) MI_LOAD_REGISTER_IMM_MAX_REGS);
+
+			*cs++ = MI_LOAD_REGISTER_IMM(n_lri);
+		}
+		*cs++ = i915_mmio_reg_offset(reg_data[i].addr);
+		*cs++ = reg_data[i].value;
+	}
 
-	free_oa_config(dev_priv, oa_config);
+	return cs;
 }
 
-static int get_oa_config(struct drm_i915_private *dev_priv,
-			 int metrics_set,
-			 struct i915_oa_config **out_config)
+static struct i915_oa_config_bo* alloc_oa_config_buffer(struct drm_i915_private *i915,
+							struct i915_oa_config *oa_config)
 {
-	int ret;
+	struct i915_oa_config_bo *oa_bo;
+	size_t config_length = 0;
+	u32 *cs;
+	int err;
+
+	oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
+	if (!oa_bo)
+		return ERR_PTR(-ENOMEM);
+
+	oa_bo->oa_config = i915_oa_config_get(oa_config);
+
+	if (oa_config->mux_regs_len > 0) {
+		config_length += DIV_ROUND_UP(oa_config->mux_regs_len,
+					      MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
+		config_length += oa_config->mux_regs_len * 8;
+	}
+	if (oa_config->b_counter_regs_len > 0) {
+		config_length += DIV_ROUND_UP(oa_config->b_counter_regs_len,
+					      MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
+		config_length += oa_config->b_counter_regs_len * 8;
+	}
+	if (oa_config->flex_regs_len > 0) {
+		config_length += DIV_ROUND_UP(oa_config->flex_regs_len,
+					      MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
+		config_length += oa_config->flex_regs_len * 8;
+	}
+	config_length += 4; /* MI_BATCH_BUFFER_END */
+	config_length = ALIGN(config_length, I915_GTT_PAGE_SIZE);
+
+	oa_bo->bo = i915_gem_object_create_shmem(i915, config_length);
+	if (IS_ERR(oa_bo->bo)) {
+		err = PTR_ERR(oa_bo->bo);
+		goto err_oa_config;
+	}
+
+	cs = i915_gem_object_pin_map(oa_bo->bo, I915_MAP_WB);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_oa_bo;
+	}
+
+	cs = write_cs_mi_lri(cs, oa_config->mux_regs, oa_config->mux_regs_len);
+	cs = write_cs_mi_lri(cs, oa_config->b_counter_regs, oa_config->b_counter_regs_len);
+	cs = write_cs_mi_lri(cs, oa_config->flex_regs, oa_config->flex_regs_len);
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	i915_gem_object_flush_map(oa_bo->bo);
+	i915_gem_object_unpin_map(oa_bo->bo);
+
+	return oa_bo;
+
+err_oa_bo:
+	i915_gem_object_put(oa_bo->bo);
+err_oa_config:
+	i915_oa_config_put(oa_bo->oa_config);
+	kfree(oa_bo);
+
+	return ERR_PTR(err);
+}
+
+int i915_perf_get_oa_config(struct drm_i915_private *i915,
+			    int metrics_set,
+			    struct i915_oa_config **out_config)
+{
+	struct i915_oa_config *oa_config;
+	int err;
+
+	if (!i915->perf.initialized)
+		return -ENODEV;
+
+	err = mutex_lock_interruptible(&i915->perf.metrics_lock);
+	if (err)
+		return err;
 
 	if (metrics_set == 1) {
-		*out_config = &dev_priv->perf.test_config;
-		atomic_inc(&dev_priv->perf.test_config.ref_count);
-		return 0;
+		oa_config = &i915->perf.test_config;
+	} else {
+		oa_config = idr_find(&i915->perf.metrics_idr, metrics_set);
+		if (!oa_config)
+			err = -EINVAL;
 	}
 
-	ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
-	if (ret)
-		return ret;
+	if (!err)
+		*out_config = i915_oa_config_get(oa_config);
 
-	*out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set);
-	if (!*out_config)
-		ret = -EINVAL;
-	else
-		atomic_inc(&(*out_config)->ref_count);
+	mutex_unlock(&i915->perf.metrics_lock);
 
-	mutex_unlock(&dev_priv->perf.metrics_lock);
+	return err;
+}
 
-	return ret;
+int i915_perf_get_oa_config_and_bo(struct i915_perf_stream *stream,
+				   int metrics_set,
+				   struct i915_oa_config **out_config,
+				   struct drm_i915_gem_object **out_obj)
+{
+	struct drm_i915_private *i915 = stream->dev_priv;
+	struct i915_oa_config *oa_config;
+	int err = 0;
+
+	if (!i915->perf.initialized)
+		return -ENODEV;
+
+	err = i915_perf_get_oa_config(i915, metrics_set, &oa_config);
+	if (err)
+		return err;
+
+	if (out_config)
+		*out_config = oa_config;
+
+	if (out_obj) {
+		struct i915_oa_config_bo *oa_bo = NULL, *oa_bo_iter;
+
+		/* Look for the buffer in the already allocated BOs attached
+		 * to the stream.
+		 */
+		list_for_each_entry(oa_bo_iter, &stream->oa_config_bos, link) {
+			if (oa_bo_iter->oa_config == oa_config &&
+			    memcmp(oa_bo_iter->oa_config->uuid,
+				   oa_config->uuid,
+				   sizeof(oa_config->uuid)) == 0) {
+				oa_bo = oa_bo_iter;
+				break;
+			}
+		}
+
+		if (!oa_bo) {
+			oa_bo = alloc_oa_config_buffer(i915, oa_config);
+			if (IS_ERR(oa_bo)) {
+				err = PTR_ERR(oa_bo);
+				goto err;
+			}
+
+			list_add(&oa_bo->link, &stream->oa_config_bos);
+		}
+
+		*out_obj = i915_gem_object_get(oa_bo->bo);
+	}
+
+err:
+	if (err) {
+		i915_oa_config_put(oa_config);
+		*out_config = NULL;
+	}
+
+	return err;
 }
 
 static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
@@ -1360,6 +1501,19 @@ free_oa_buffer(struct i915_perf_stream *stream)
 	stream->oa_buffer.vaddr = NULL;
 }
 
+static void
+free_oa_configs(struct i915_perf_stream *stream)
+{
+	struct i915_oa_config_bo *oa_bo, *tmp;
+
+	i915_oa_config_put(stream->oa_config);
+	list_for_each_entry_safe(oa_bo, tmp, &stream->oa_config_bos, link) {
+		list_del(&oa_bo->link);
+		i915_oa_config_put(oa_bo->oa_config);
+		i915_gem_object_put(oa_bo->bo);
+	}
+}
+
 static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 {
 	struct drm_i915_private *dev_priv = stream->dev_priv;
@@ -1383,7 +1537,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
 
-	put_oa_config(dev_priv, stream->oa_config);
+	free_oa_configs(stream);
 
 	if (dev_priv->perf.spurious_report_rs.missed) {
 		DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
@@ -2230,7 +2384,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		}
 	}
 
-	ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config);
+	ret = i915_perf_get_oa_config(dev_priv, props->metrics_set,
+				      &stream->oa_config);
 	if (ret) {
 		DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
 		goto err_config;
@@ -2268,6 +2423,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		goto err_enable;
 	}
 
+	DRM_DEBUG("opening stream oa config uuid=%s\n", stream->oa_config->uuid);
+
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	hrtimer_init(&stream->poll_check_timer,
@@ -2287,7 +2444,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	free_oa_buffer(stream);
 
 err_oa_buf_alloc:
-	put_oa_config(dev_priv, stream->oa_config);
+	i915_oa_config_put(stream->oa_config);
 
 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
 	intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
@@ -2655,7 +2812,9 @@ static int i915_perf_release(struct inode *inode, struct file *file)
 	struct drm_i915_private *dev_priv = stream->dev_priv;
 
 	mutex_lock(&dev_priv->perf.lock);
+
 	i915_perf_destroy_locked(stream);
+
 	mutex_unlock(&dev_priv->perf.lock);
 
 	/* Release the reference the perf stream kept on the driver. */
@@ -2764,6 +2923,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 		goto err_ctx;
 	}
 
+	INIT_LIST_HEAD(&stream->oa_config_bos);
 	stream->dev_priv = dev_priv;
 	stream->ctx = specific_ctx;
 
@@ -3091,7 +3251,8 @@ void i915_perf_register(struct drm_i915_private *dev_priv)
 	if (ret)
 		goto sysfs_error;
 
-	atomic_set(&dev_priv->perf.test_config.ref_count, 1);
+	dev_priv->perf.test_config.i915 = dev_priv;
+	kref_init(&dev_priv->perf.test_config.ref);
 
 	goto exit;
 
@@ -3347,7 +3508,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 		return -ENOMEM;
 	}
 
-	atomic_set(&oa_config->ref_count, 1);
+	oa_config->i915 = dev_priv;
+	kref_init(&oa_config->ref);
 
 	if (!uuid_is_valid(args->uuid)) {
 		DRM_DEBUG("Invalid uuid format for OA config\n");
@@ -3446,7 +3608,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 sysfs_err:
 	mutex_unlock(&dev_priv->perf.metrics_lock);
 reg_err:
-	put_oa_config(dev_priv, oa_config);
+	i915_oa_config_put(oa_config);
 	DRM_DEBUG("Failed to add new OA config\n");
 	return err;
 }
@@ -3482,13 +3644,13 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 
 	ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
 	if (ret)
-		goto lock_err;
+		return ret;
 
 	oa_config = idr_find(&dev_priv->perf.metrics_idr, *arg);
 	if (!oa_config) {
 		DRM_DEBUG("Failed to remove unknown OA config\n");
 		ret = -ENOENT;
-		goto config_err;
+		goto err_unlock;
 	}
 
 	GEM_BUG_ON(*arg != oa_config->id);
@@ -3498,13 +3660,16 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 
 	idr_remove(&dev_priv->perf.metrics_idr, *arg);
 
+	mutex_unlock(&dev_priv->perf.metrics_lock);
+
 	DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
 
-	put_oa_config(dev_priv, oa_config);
+	i915_oa_config_put(oa_config);
 
-config_err:
+	return 0;
+
+err_unlock:
 	mutex_unlock(&dev_priv->perf.metrics_lock);
-lock_err:
 	return ret;
 }
 
@@ -3641,6 +3806,7 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
 
 	if (dev_priv->perf.ops.enable_metric_set) {
 		INIT_LIST_HEAD(&dev_priv->perf.streams);
+
 		mutex_init(&dev_priv->perf.lock);
 
 		oa_sample_rate_hard_limit = 1000 *
@@ -3675,10 +3841,9 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
 
 static int destroy_config(int id, void *p, void *data)
 {
-	struct drm_i915_private *dev_priv = data;
 	struct i915_oa_config *oa_config = p;
 
-	put_oa_config(dev_priv, oa_config);
+	i915_oa_config_put(oa_config);
 
 	return 0;
 }
@@ -3692,7 +3857,7 @@ void i915_perf_fini(struct drm_i915_private *dev_priv)
 	if (!dev_priv->perf.initialized)
 		return;
 
-	idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv);
+	idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, NULL);
 	idr_destroy(&dev_priv->perf.metrics_idr);
 
 	unregister_sysctl_table(dev_priv->perf.sysctl_header);
diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h
index 95549de65212..d62980c49d42 100644
--- a/drivers/gpu/drm/i915/i915_perf.h
+++ b/drivers/gpu/drm/i915/i915_perf.h
@@ -29,5 +29,27 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 void i915_oa_init_reg_state(struct intel_engine_cs *engine,
 			    struct intel_context *ce,
 			    u32 *reg_state);
+int i915_perf_get_oa_config(struct drm_i915_private *i915,
+			    int metrics_set,
+			    struct i915_oa_config **out_config);
+int i915_perf_get_oa_config_and_bo(struct i915_perf_stream *stream,
+				   int metrics_set,
+				   struct i915_oa_config **out_config,
+				   struct drm_i915_gem_object **out_obj);
+void i915_oa_config_release(struct kref *ref);
+
+static inline struct i915_oa_config *i915_oa_config_get(struct i915_oa_config *oa_config)
+{
+	kref_get(&oa_config->ref);
+	return oa_config;
+}
+
+static inline void i915_oa_config_put(struct i915_oa_config *oa_config)
+{
+	if (!oa_config)
+		return;
+
+	kref_put(&oa_config->ref, i915_oa_config_release);
+}
 
 #endif /* __I915_PERF_H__ */
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2019-08-28 14:33 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-28 14:33 [PATCH v11 00/10] drm/i915: Vulkan performance query support Lionel Landwerlin
2019-08-28 14:33 ` [PATCH v11 01/10] drm/syncobj: add sideband payload Lionel Landwerlin
2019-08-29  5:26   ` Zhou, David(ChunMing)
2019-08-29  6:40     ` Lionel Landwerlin
2019-09-04 16:08     ` Lionel Landwerlin
2019-08-28 14:33 ` [PATCH v11 02/10] drm/i915: introduce a mechanism to extend execbuf2 Lionel Landwerlin
2019-08-28 14:33 ` [PATCH v11 03/10] drm/i915: add syncobj timeline support Lionel Landwerlin
2019-08-28 14:33 ` [PATCH v11 04/10] drm/i915/perf: introduce a versioning of the i915-perf uapi Lionel Landwerlin
2019-08-28 14:33 ` Lionel Landwerlin [this message]
2019-08-29  2:41   ` [PATCH v11 05/10] drm/i915/perf: allow for CS OA configs to be created lazily kbuild test robot
2019-08-28 14:33 ` [PATCH v11 06/10] drm/i915/perf: implement active wait for noa configurations Lionel Landwerlin
2019-08-28 19:34   ` kbuild test robot
2019-08-28 19:39     ` Chris Wilson
2019-08-29  6:43       ` Lionel Landwerlin
2019-08-28 20:00   ` kbuild test robot
2019-08-28 14:33 ` [PATCH v11 07/10] drm/i915: add a new perf configuration execbuf parameter Lionel Landwerlin
2019-08-28 14:33 ` [PATCH v11 08/10] drm/i915/perf: allow holding preemption on filtered ctx Lionel Landwerlin
2019-08-28 14:33 ` [PATCH v11 09/10] drm/i915/perf: execute OA configuration from command stream Lionel Landwerlin
2019-08-28 19:41   ` Chris Wilson
2019-08-29  6:45     ` Lionel Landwerlin
2019-08-29  6:58       ` Lionel Landwerlin
2019-08-28 14:33 ` [PATCH v11 10/10] drm/i915: add support for perf configuration queries Lionel Landwerlin
2019-08-28 15:58 ` ✗ Fi.CI.BUILD: failure for drm/i915: Vulkan performance query support (rev11) Patchwork
2019-08-29  5:34 ` ✗ Fi.CI.BAT: failure for drm/i915: Vulkan performance query support (rev12) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190828143327.7965-6-lionel.g.landwerlin@intel.com \
    --to=lionel.g.landwerlin@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.