All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH v11 07/10] drm/i915: add a new perf configuration execbuf parameter
Date: Wed, 28 Aug 2019 17:33:24 +0300	[thread overview]
Message-ID: <20190828143327.7965-8-lionel.g.landwerlin@intel.com> (raw)
In-Reply-To: <20190828143327.7965-1-lionel.g.landwerlin@intel.com>

We want the ability to dispatch a set of command buffer to the
hardware, each with a different OA configuration. To achieve this, we
reuse a couple of fields from the execbuf2 struct (I CAN HAZ
execbuf3?) to notify what OA configuration should be used for a batch
buffer. This requires the process making the execbuf with this flag to
also own the perf fd at the time of execbuf.

v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris)
    Move oa_config vma to active (Chris)

v3: Don't drop the lock for engine lookup (Chris)
    Move OA config vma to active before writing the ringbuffer (Chris)

v4: Reuse i915_user_extension_fn
    Serialize requests with OA config updates

v5: Check that the chained extension is only present once (Chris)
    Unpin oa_vma in main path (Chris)

v6: Use BIT_ULL (Chris)

v7: Hold drm.struct_mutex when serializing the request with OA config (Chris)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 133 +++++++++++++++++-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   4 +
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   9 ++
 drivers/gpu/drm/i915/gt/intel_lrc.c           |   4 +-
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |   4 +-
 drivers/gpu/drm/i915/i915_getparam.c          |   4 +
 drivers/gpu/drm/i915/i915_perf.c              |   2 -
 include/uapi/drm/i915_drm.h                   |  39 +++++
 8 files changed, 194 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 46ad8d9642d1..bdecd893cd61 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -24,6 +24,7 @@
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
 #include "i915_gem_ioctls.h"
+#include "i915_perf.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
 
@@ -284,7 +285,12 @@ struct i915_execbuffer {
 	struct {
 		u64 flags; /** Available extensions parameters */
 		struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
+		struct drm_i915_gem_execbuffer_ext_perf perf_config;
 	} extensions;
+
+	struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is not needed. */
+	struct drm_i915_gem_object *oa_bo;
+	struct i915_vma *oa_vma;
 };
 
 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1152,6 +1158,42 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
 	return err;
 }
 
+
+static int
+get_execbuf_oa_config(struct i915_execbuffer *eb)
+{
+	struct file *perf_file;
+	int err = 0;
+
+	eb->oa_config = NULL;
+	eb->oa_vma = NULL;
+	eb->oa_bo = NULL;
+
+	if ((eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) == 0)
+		return 0;
+
+	perf_file = fget(eb->extensions.perf_config.perf_fd);
+	if (!perf_file) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (perf_file->private_data != eb->i915->perf.exclusive_stream)
+		err = -EINVAL;
+
+	if (!err) {
+		err = i915_perf_get_oa_config_and_bo(
+			eb->i915->perf.exclusive_stream,
+			eb->extensions.perf_config.oa_config,
+			&eb->oa_config, &eb->oa_bo);
+	}
+
+	fput(perf_file);
+
+out:
+	return err;
+}
+
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 			     struct i915_vma *vma,
 			     unsigned int len)
@@ -2051,6 +2093,42 @@ add_to_client(struct i915_request *rq, struct drm_file *file)
 	spin_unlock(&file_priv->mm.lock);
 }
 
+static int eb_oa_config(struct i915_execbuffer *eb)
+{
+	int ret;
+
+	if (!eb->oa_config)
+		return 0;
+
+	lockdep_assert_held(&eb->i915->drm.struct_mutex); /* oa_config */
+
+	ret = i915_active_request_set(&eb->engine->last_oa_config,
+				      eb->request);
+	if (ret)
+		return ret;
+
+	/*
+	 * If the config hasn't changed, skip reconfiguring the HW (this is
+	 * subject to a delay we want to avoid has much as possible).
+	 */
+	if (eb->oa_config == eb->i915->perf.exclusive_stream->oa_config)
+		return 0;
+
+	ret = i915_vma_move_to_active(eb->oa_vma, eb->request, 0);
+	if (ret)
+		return ret;
+
+	ret = eb->engine->emit_bb_start(eb->request,
+					eb->oa_vma->node.start,
+					0, I915_DISPATCH_SECURE);
+	if (ret)
+		return ret;
+
+	swap(eb->oa_config, eb->i915->perf.exclusive_stream->oa_config);
+
+	return 0;
+}
+
 static int eb_submit(struct i915_execbuffer *eb)
 {
 	int err;
@@ -2077,6 +2155,10 @@ static int eb_submit(struct i915_execbuffer *eb)
 			return err;
 	}
 
+	err = eb_oa_config(eb);
+	if (err)
+		return err;
+
 	err = eb->engine->emit_bb_start(eb->request,
 					eb->batch->node.start +
 					eb->batch_start_offset,
@@ -2643,8 +2725,25 @@ static int parse_timeline_fences(struct i915_user_extension __user *ext, void *d
 	return 0;
 }
 
+static int parse_perf_config(struct i915_user_extension __user *ext, void *data)
+{
+	struct i915_execbuffer *eb = data;
+
+	if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF))
+		return -EINVAL;
+
+	if (copy_from_user(&eb->extensions.perf_config, ext,
+			   sizeof(eb->extensions.perf_config)))
+		return -EFAULT;
+
+	eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF);
+
+	return 0;
+}
+
 static const i915_user_extension_fn execbuf_extensions[] = {
         [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
+        [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config,
 };
 
 static int
@@ -2755,10 +2854,14 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		}
 	}
 
-	err = eb_create(&eb);
+	err = get_execbuf_oa_config(&eb);
 	if (err)
 		goto err_out_fence;
 
+	err = eb_create(&eb);
+	if (err)
+		goto err_oa_config;
+
 	GEM_BUG_ON(!eb.lut_size);
 
 	err = eb_select_context(&eb);
@@ -2769,6 +2872,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_context;
 
+	if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
+		if (!intel_engine_has_oa(eb.engine)) {
+			err = -ENODEV;
+			goto err_engine;
+		}
+	}
+
 	err = i915_mutex_lock_interruptible(dev);
 	if (err)
 		goto err_engine;
@@ -2889,6 +2999,20 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		}
 	}
 
+	if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
+		eb.oa_vma = i915_vma_instance(eb.oa_bo,
+					      &eb.engine->i915->ggtt.vm, NULL);
+		if (unlikely(IS_ERR(eb.oa_vma))) {
+			err = PTR_ERR(eb.oa_vma);
+			eb.oa_vma = NULL;
+			goto err_request;
+		}
+
+		err = i915_vma_pin(eb.oa_vma, 0, 0, PIN_GLOBAL);
+		if (err)
+			goto err_request;
+	}
+
 	/*
 	 * Whilst this request exists, batch_obj will be on the
 	 * active_list, and so will hold the active reference. Only when this
@@ -2935,6 +3059,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	i915_gem_context_put(eb.gem_context);
 err_destroy:
 	eb_destroy(&eb);
+err_oa_config:
+	if (eb.oa_config) {
+		i915_gem_object_put(eb.oa_bo);
+		i915_oa_config_put(eb.oa_config);
+	}
+	if (eb.oa_vma)
+		i915_vma_unpin(eb.oa_vma);
 err_out_fence:
 	if (out_fence_fd != -1)
 		put_unused_fd(out_fence_fd);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 17006d50b63f..f65375a26532 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -786,6 +786,10 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 
 	engine->emit_fini_breadcrumb_dw = ret;
 
+
+	INIT_ACTIVE_REQUEST(&engine->last_oa_config,
+			    &engine->i915->drm.struct_mutex);
+
 	return 0;
 
 err_unpin:
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 15e02cb58a67..c62bdb464a06 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -399,6 +399,8 @@ struct intel_engine_cs {
 	struct i915_wa_list wa_list;
 	struct i915_wa_list whitelist;
 
+	struct i915_active_request last_oa_config;
+
 	u32             irq_keep_mask; /* always keep these interrupts */
 	u32		irq_enable_mask; /* bitmask to enable ring interrupt */
 	void		(*irq_enable)(struct intel_engine_cs *engine);
@@ -481,6 +483,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
 #define I915_ENGINE_IS_VIRTUAL       BIT(5)
+#define I915_ENGINE_HAS_OA           BIT(6)
 	unsigned int flags;
 
 	/*
@@ -576,6 +579,12 @@ intel_engine_is_virtual(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_IS_VIRTUAL;
 }
 
+static inline bool
+intel_engine_has_oa(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_HAS_OA;
+}
+
 #define instdone_has_slice(dev_priv___, sseu___, slice___) \
 	((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & BIT(slice___))
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index a141e9e37bf7..709b08f973c5 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -3082,8 +3082,10 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 	logical_ring_default_vfuncs(engine);
 	logical_ring_default_irqs(engine);
 
-	if (engine->class == RENDER_CLASS)
+	if (engine->class == RENDER_CLASS) {
 		rcs_submission_override(engine);
+		engine->flags |= I915_ENGINE_HAS_OA;
+	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 601c16239fdf..6b06f64ffa23 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -2250,8 +2250,10 @@ static void setup_rcs(struct intel_engine_cs *engine)
 		engine->irq_enable_mask = I915_USER_INTERRUPT;
 	}
 
-	if (IS_HASWELL(i915))
+	if (IS_HASWELL(i915)) {
 		engine->emit_bb_start = hsw_emit_bb_start;
+		engine->flags |= I915_ENGINE_HAS_OA;
+	}
 
 	engine->resume = rcs_resume;
 }
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index bd41cc5ce906..39d4c2c2e0f4 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -161,6 +161,10 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_PERF_REVISION:
 		value = i915_perf_ioctl_version();
 		break;
+	case I915_PARAM_HAS_EXEC_PERF_CONFIG:
+		/* Obviously requires perf support. */
+		value = i915->perf.initialized;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index dc3f3170764b..b9e76378b0c2 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2666,8 +2666,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	free_oa_buffer(stream);
 
 err_oa_buf_alloc:
-	i915_oa_config_put(stream->oa_config);
-
 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
 	intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e98c9a7baa91..3166c9ca85f3 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -624,6 +624,16 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_PERF_REVISION	55
 
+/*
+ * Request an i915/perf performance configuration change before running the
+ * commands given in an execbuf.
+ *
+ * Performance configuration ID and the file descriptor of the i915 perf
+ * stream are given through drm_i915_gem_execbuffer_ext_perf. See
+ * I915_EXEC_EXT.
+ */
+#define I915_PARAM_HAS_EXEC_PERF_CONFIG 56
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1026,6 +1036,12 @@ enum drm_i915_gem_execbuffer_ext {
 	 */
 	DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1,
 
+	/**
+	 * This identifier is associated with
+	 * drm_i915_gem_execbuffer_perf_ext.
+	 */
+	DRM_I915_GEM_EXECBUFFER_EXT_PERF,
+
 	DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
 };
 
@@ -1056,6 +1072,29 @@ struct drm_i915_gem_execbuffer_ext_timeline_fences {
 	__u64 values_ptr;
 };
 
+struct drm_i915_gem_execbuffer_ext_perf {
+	struct i915_user_extension base;
+
+	/**
+	 * Performance file descriptor returned by DRM_IOCTL_I915_PERF_OPEN.
+	 * This is used to identify that the application requesting a HW
+	 * performance configuration change actually has a right to do so
+	 * because it also has access the i915-perf stream.
+	 */
+	__s32 perf_fd;
+
+	/**
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 pad;
+
+	/**
+	 * OA configuration ID to switch to before executing the commands
+	 * associated to the execbuf.
+	 */
+	__u64 oa_config;
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2019-08-28 14:33 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-28 14:33 [PATCH v11 00/10] drm/i915: Vulkan performance query support Lionel Landwerlin
2019-08-28 14:33 ` [PATCH v11 01/10] drm/syncobj: add sideband payload Lionel Landwerlin
2019-08-29  5:26   ` Zhou, David(ChunMing)
2019-08-29  6:40     ` Lionel Landwerlin
2019-09-04 16:08     ` Lionel Landwerlin
2019-08-28 14:33 ` [PATCH v11 02/10] drm/i915: introduce a mechanism to extend execbuf2 Lionel Landwerlin
2019-08-28 14:33 ` [PATCH v11 03/10] drm/i915: add syncobj timeline support Lionel Landwerlin
2019-08-28 14:33 ` [PATCH v11 04/10] drm/i915/perf: introduce a versioning of the i915-perf uapi Lionel Landwerlin
2019-08-28 14:33 ` [PATCH v11 05/10] drm/i915/perf: allow for CS OA configs to be created lazily Lionel Landwerlin
2019-08-29  2:41   ` kbuild test robot
2019-08-28 14:33 ` [PATCH v11 06/10] drm/i915/perf: implement active wait for noa configurations Lionel Landwerlin
2019-08-28 19:34   ` kbuild test robot
2019-08-28 19:39     ` Chris Wilson
2019-08-29  6:43       ` Lionel Landwerlin
2019-08-28 20:00   ` kbuild test robot
2019-08-28 14:33 ` Lionel Landwerlin [this message]
2019-08-28 14:33 ` [PATCH v11 08/10] drm/i915/perf: allow holding preemption on filtered ctx Lionel Landwerlin
2019-08-28 14:33 ` [PATCH v11 09/10] drm/i915/perf: execute OA configuration from command stream Lionel Landwerlin
2019-08-28 19:41   ` Chris Wilson
2019-08-29  6:45     ` Lionel Landwerlin
2019-08-29  6:58       ` Lionel Landwerlin
2019-08-28 14:33 ` [PATCH v11 10/10] drm/i915: add support for perf configuration queries Lionel Landwerlin
2019-08-28 15:58 ` ✗ Fi.CI.BUILD: failure for drm/i915: Vulkan performance query support (rev11) Patchwork
2019-08-29  5:34 ` ✗ Fi.CI.BAT: failure for drm/i915: Vulkan performance query support (rev12) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190828143327.7965-8-lionel.g.landwerlin@intel.com \
    --to=lionel.g.landwerlin@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.