All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
To: intel-gfx@lists.freedesktop.org
Subject: [CI 11/13] drm/i915: add a new perf configuration execbuf parameter
Date: Mon,  9 Sep 2019 10:18:07 +0300	[thread overview]
Message-ID: <20190909071809.23310-11-lionel.g.landwerlin@intel.com> (raw)
In-Reply-To: <20190909071809.23310-1-lionel.g.landwerlin@intel.com>

We want the ability to dispatch a set of command buffer to the
hardware, each with a different OA configuration. To achieve this, we
reuse a couple of fields from the execbuf2 struct (I CAN HAZ
execbuf3?) to notify what OA configuration should be used for a batch
buffer. This requires the process making the execbuf with this flag to
also own the perf fd at the time of execbuf.

v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris)
    Move oa_config vma to active (Chris)

v3: Don't drop the lock for engine lookup (Chris)
    Move OA config vma to active before writing the ringbuffer (Chris)

v4: Reuse i915_user_extension_fn
    Serialize requests with OA config updates

v5: Check that the chained extension is only present once (Chris)
    Unpin oa_vma in main path (Chris)

v6: Use BIT_ULL (Chris)

v7: Hold drm.struct_mutex when serializing the request with OA config (Chris)

v8: Remove active request from engine (Lionel)

v9: Move fetching OA configuration pass engine pinning (Lionel)
    Lock VMA before moving to active (Chris)

v10: Fix leak on perf_fd (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 147 +++++++++++++++++-
 drivers/gpu/drm/i915/i915_getparam.c          |   4 +
 include/uapi/drm/i915_drm.h                   |  39 +++++
 3 files changed, 188 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 46ad8d9642d1..d416b60c94bb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -24,6 +24,7 @@
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
 #include "i915_gem_ioctls.h"
+#include "i915_perf.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
 
@@ -284,7 +285,12 @@ struct i915_execbuffer {
 	struct {
 		u64 flags; /** Available extensions parameters */
 		struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
+		struct drm_i915_gem_execbuffer_ext_perf perf_config;
 	} extensions;
+
+	struct file *perf_file;
+	struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is not needed. */
+	struct i915_vma *oa_vma;
 };
 
 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1152,6 +1158,58 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
 	return err;
 }
 
+
+static int
+eb_get_oa_config(struct i915_execbuffer *eb)
+{
+	struct drm_i915_gem_object *oa_bo;
+	int err = 0;
+
+	eb->perf_file = NULL;
+	eb->oa_config = NULL;
+	eb->oa_vma = NULL;
+
+	if ((eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) == 0)
+		return 0;
+
+	eb->perf_file = fget(eb->extensions.perf_config.perf_fd);
+	if (!eb->perf_file)
+		return -EINVAL;
+
+	err = i915_mutex_lock_interruptible(&eb->i915->drm);
+	if (err)
+		return err;
+
+	if (eb->perf_file->private_data != eb->i915->perf.exclusive_stream)
+		err = -EINVAL;
+
+	mutex_unlock(&eb->i915->drm.struct_mutex);
+
+	if (err)
+		return err;
+
+	if (eb->i915->perf.exclusive_stream->engine != eb->engine)
+		return -EINVAL;
+
+	err = i915_perf_get_oa_config_and_bo(
+		eb->i915->perf.exclusive_stream,
+		eb->extensions.perf_config.oa_config,
+		&eb->oa_config, &oa_bo);
+	if (err)
+		return err;
+
+	eb->oa_vma = i915_vma_instance(oa_bo,
+				       &eb->engine->gt->ggtt->vm, NULL);
+	i915_gem_object_put(oa_bo);
+	if (IS_ERR(eb->oa_vma)) {
+		err = PTR_ERR(eb->oa_vma);
+		eb->oa_vma = NULL;
+		return err;
+	}
+
+	return 0;
+}
+
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 			     struct i915_vma *vma,
 			     unsigned int len)
@@ -2051,6 +2109,54 @@ add_to_client(struct i915_request *rq, struct drm_file *file)
 	spin_unlock(&file_priv->mm.lock);
 }
 
+static int eb_oa_config(struct i915_execbuffer *eb)
+{
+	struct i915_perf_stream *perf_stream;
+	int err;
+
+	if (!eb->oa_config)
+		return 0;
+
+	perf_stream = eb->perf_file->private_data;
+
+	err = mutex_lock_interruptible(&perf_stream->config_mutex);
+	if (err)
+		return err;
+
+	err = i915_active_request_set(&perf_stream->active_config_rq,
+				      eb->request);
+	if (err)
+		goto out;
+
+	/*
+	 * If the config hasn't changed, skip reconfiguring the HW (this is
+	 * subject to a delay we want to avoid has much as possible).
+	 */
+	if (eb->oa_config == perf_stream->oa_config)
+		goto out;
+
+	i915_vma_lock(eb->oa_vma);
+	err = i915_request_await_object(eb->request, eb->oa_vma->obj, false); /* await_resv already! */
+	if (!err)
+		err = i915_vma_move_to_active(eb->oa_vma, eb->request, 0);
+	i915_vma_unlock(eb->oa_vma);
+	if (err)
+		goto out;
+
+	err = eb->engine->emit_bb_start(eb->request,
+					eb->oa_vma->node.start,
+					0, I915_DISPATCH_SECURE);
+	if (err)
+		goto out;
+
+	swap(eb->oa_config, perf_stream->oa_config);
+
+out:
+	mutex_unlock(&perf_stream->config_mutex);
+
+	return err;
+}
+
 static int eb_submit(struct i915_execbuffer *eb)
 {
 	int err;
@@ -2077,6 +2183,10 @@ static int eb_submit(struct i915_execbuffer *eb)
 			return err;
 	}
 
+	err = eb_oa_config(eb);
+	if (err)
+		return err;
+
 	err = eb->engine->emit_bb_start(eb->request,
 					eb->batch->node.start +
 					eb->batch_start_offset,
@@ -2643,8 +2753,25 @@ static int parse_timeline_fences(struct i915_user_extension __user *ext, void *d
 	return 0;
 }
 
+static int parse_perf_config(struct i915_user_extension __user *ext, void *data)
+{
+	struct i915_execbuffer *eb = data;
+
+	if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF))
+		return -EINVAL;
+
+	if (copy_from_user(&eb->extensions.perf_config, ext,
+			   sizeof(eb->extensions.perf_config)))
+		return -EFAULT;
+
+	eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF);
+
+	return 0;
+}
+
 static const i915_user_extension_fn execbuf_extensions[] = {
         [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
+        [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config,
 };
 
 static int
@@ -2769,9 +2896,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_context;
 
+	err = eb_get_oa_config(&eb);
+	if (err)
+		goto err_oa_config;
+
 	err = i915_mutex_lock_interruptible(dev);
 	if (err)
-		goto err_engine;
+		goto err_oa_config;
 
 	err = eb_relocate(&eb);
 	if (err) {
@@ -2889,6 +3020,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		}
 	}
 
+	if (eb.oa_vma) {
+		err = i915_vma_pin(eb.oa_vma, 0, 0, PIN_GLOBAL);
+		if (err)
+			goto err_request;
+	}
+
 	/*
 	 * Whilst this request exists, batch_obj will be on the
 	 * active_list, and so will hold the active reference. Only when this
@@ -2929,7 +3066,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (eb.exec)
 		eb_release_vmas(&eb);
 	mutex_unlock(&dev->struct_mutex);
-err_engine:
+err_oa_config:
+	if (eb.perf_file)
+		fput(eb.perf_file);
+	if (eb.oa_config)
+		i915_oa_config_put(eb.oa_config);
+	if (eb.oa_vma)
+		i915_vma_unpin(eb.oa_vma);
 	eb_unpin_engine(&eb);
 err_context:
 	i915_gem_context_put(eb.gem_context);
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index bd41cc5ce906..39d4c2c2e0f4 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -161,6 +161,10 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_PERF_REVISION:
 		value = i915_perf_ioctl_version();
 		break;
+	case I915_PARAM_HAS_EXEC_PERF_CONFIG:
+		/* Obviously requires perf support. */
+		value = i915->perf.initialized;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e98c9a7baa91..3166c9ca85f3 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -624,6 +624,16 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_PERF_REVISION	55
 
+/*
+ * Request an i915/perf performance configuration change before running the
+ * commands given in an execbuf.
+ *
+ * Performance configuration ID and the file descriptor of the i915 perf
+ * stream are given through drm_i915_gem_execbuffer_ext_perf. See
+ * I915_EXEC_EXT.
+ */
+#define I915_PARAM_HAS_EXEC_PERF_CONFIG 56
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1026,6 +1036,12 @@ enum drm_i915_gem_execbuffer_ext {
 	 */
 	DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1,
 
+	/**
+	 * This identifier is associated with
+	 * drm_i915_gem_execbuffer_perf_ext.
+	 */
+	DRM_I915_GEM_EXECBUFFER_EXT_PERF,
+
 	DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
 };
 
@@ -1056,6 +1072,29 @@ struct drm_i915_gem_execbuffer_ext_timeline_fences {
 	__u64 values_ptr;
 };
 
+struct drm_i915_gem_execbuffer_ext_perf {
+	struct i915_user_extension base;
+
+	/**
+	 * Performance file descriptor returned by DRM_IOCTL_I915_PERF_OPEN.
+	 * This is used to identify that the application requesting a HW
+	 * performance configuration change actually has a right to do so
+	 * because it also has access the i915-perf stream.
+	 */
+	__s32 perf_fd;
+
+	/**
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 pad;
+
+	/**
+	 * OA configuration ID to switch to before executing the commands
+	 * associated to the execbuf.
+	 */
+	__u64 oa_config;
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2019-09-09  7:18 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-09  7:17 [CI 01/13] drm/i915: introduce a mechanism to extend execbuf2 Lionel Landwerlin
2019-09-09  7:17 ` [CI 02/13] drm/i915: add syncobj timeline support Lionel Landwerlin
2019-09-09  7:17 ` [CI 03/13] drm/i915/perf: drop list of streams Lionel Landwerlin
2019-09-09  7:18 ` [CI 04/13] drm/i915/perf: store the associated engine of a stream Lionel Landwerlin
2019-09-09  7:18 ` [CI 05/13] drm/i915/perf: introduce a versioning of the i915-perf uapi Lionel Landwerlin
2019-09-09  7:18 ` [CI 06/13] drm/i915/perf: move perf types to their own header Lionel Landwerlin
2019-09-09  7:18 ` [CI 07/13] drm/i915/perf: allow for CS OA configs to be created lazily Lionel Landwerlin
2019-09-09  7:18 ` [CI 08/13] drm/i915/perf: implement active wait for noa configurations Lionel Landwerlin
2019-09-09  7:18 ` [CI 09/13] drm/i915: add wait flags to i915_active_request_retire Lionel Landwerlin
2019-09-09  7:18 ` [CI 10/13] drm/i915/perf: execute OA configuration from command stream Lionel Landwerlin
2019-09-09  7:18 ` Lionel Landwerlin [this message]
2019-09-09  7:18 ` [CI 12/13] drm/i915/perf: allow holding preemption on filtered ctx Lionel Landwerlin
2019-09-09  7:18 ` [CI 13/13] drm/i915: add support for perf configuration queries Lionel Landwerlin
2019-09-09  7:44 ` ✗ Fi.CI.BUILD: failure for series starting with [CI,01/13] drm/i915: introduce a mechanism to extend execbuf2 Patchwork
2019-09-09  8:22 [CI 01/13] " Lionel Landwerlin
2019-09-09  8:22 ` [CI 11/13] drm/i915: add a new perf configuration execbuf parameter Lionel Landwerlin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190909071809.23310-11-lionel.g.landwerlin@intel.com \
    --to=lionel.g.landwerlin@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.