All of lore.kernel.org
 help / color / mirror / Atom feed
From: sourab.gupta@intel.com
To: intel-gfx@lists.freedesktop.org
Cc: Insoo Woo <insoo.woo@intel.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Jabin Wu <jabin.wu@intel.com>,
	Sourab Gupta <sourab.gupta@intel.com>
Subject: [RFC 8/8] drm/i915: Add support to add execbuffer tags to OA counter reports
Date: Wed, 15 Jul 2015 14:17:03 +0530	[thread overview]
Message-ID: <1436950023-13940-9-git-send-email-sourab.gupta@intel.com> (raw)
In-Reply-To: <1436950023-13940-1-git-send-email-sourab.gupta@intel.com>

From: Sourab Gupta <sourab.gupta@intel.com>

This patch enables userspace to specify tags (per workload), provided via
execbuffer ioctl, which could be added to OA reports, to help associate
reports with the corresponding workloads.

There may be multiple stages within a single context, from a userspace
perspective. An ability is needed to individually associate the OA reports
with their corresponding workloads(execbuffers), which may not be possible
solely with ctx_id or pid information. This patch enables such a mechanism.

In this patch, rsvd2 field of execbuffer arguments is being utilized for
passing the tag. A new bitfield in execbuffer flags is introduced in order
to inform kernel of the tag being passed in execbuffer arguments.

Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  9 ++++++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  8 ++++++--
 drivers/gpu/drm/i915/i915_oa_perf.c        | 23 ++++++++++++++++++++---
 include/uapi/drm/i915_drm.h                | 21 ++++++++++++++++++---
 4 files changed, 50 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 337a721..9409b4a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1672,6 +1672,7 @@ struct i915_oa_rcs_node {
 	bool discard;
 	u32 ctx_id;
 	u32 pid;
+	u32 tag;
 };
 
 extern const struct i915_oa_reg i915_oa_3d_mux_config_hsw[];
@@ -1980,11 +1981,12 @@ struct drm_i915_private {
 		struct work_struct work_timer;
 		struct work_struct work_event_destroy;
 #define I915_OA_SAMPLE_PID		(1<<0)
+#define I915_OA_SAMPLE_TAG		(1<<1)
 		int sample_info_flags;
 	} oa_pmu;
 
 	void (*insert_profile_cmd[I915_PROFILE_MAX])
-		(struct intel_ringbuffer *ringbuf, u32 ctx_id);
+		(struct intel_ringbuffer *ringbuf, u32 ctx_id, int tag);
 #endif
 
 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
@@ -3165,7 +3167,8 @@ void i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
 				struct intel_context *context);
 void i915_oa_context_unpin_notify(struct drm_i915_private *dev_priv,
 				  struct intel_context *context);
-void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id);
+void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id,
+				int tag);
 #else
 static inline void
 i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
@@ -3174,7 +3177,7 @@ static inline void
 i915_oa_context_unpin_notify(struct drm_i915_private *dev_priv,
 			     struct intel_context *context) {}
 void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf,
-				u32 ctx_id) {};
+				u32 ctx_id, int tag) {};
 #endif
 
 /* i915_gem_evict.c */
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 2f8971b..53d228c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1203,6 +1203,7 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
 	u64 exec_len;
 	int instp_mode;
 	u32 instp_mask;
+	u32 tag = 0;
 	int i, ret = 0;
 
 	if (args->num_cliprects != 0) {
@@ -1317,8 +1318,11 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
 			goto error;
 	}
 
+	if (args->flags & I915_EXEC_TAG)
+		tag = i915_execbuffer2_get_tag(*args);
+
 	i915_insert_profiling_cmd(ring->buffer,
-		i915_execbuffer2_get_context_id(*args));
+		i915_execbuffer2_get_context_id(*args), tag);
 
 	exec_len = args->batch_len;
 	if (cliprects) {
@@ -1343,7 +1347,7 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
 	}
 
 	i915_insert_profiling_cmd(ring->buffer,
-		i915_execbuffer2_get_context_id(*args));
+		i915_execbuffer2_get_context_id(*args), tag);
 
 	trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags);
 
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c
index 15920d1291..839ebb4 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -25,7 +25,8 @@ static int hsw_perf_format_sizes[] = {
 	64   /* C4_B8_HSW */
 };
 
-void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
+void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id,
+				int tag)
 {
 	struct intel_engine_cs *ring = ringbuf->ring;
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -33,11 +34,11 @@ void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
 
 	for (i = I915_PROFILE_OA; i < I915_PROFILE_MAX; i++) {
 		if (dev_priv->insert_profile_cmd[i])
-			dev_priv->insert_profile_cmd[i](ringbuf, ctx_id);
+			dev_priv->insert_profile_cmd[i](ringbuf, ctx_id, tag);
 	}
 }
 
-void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
+void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id, int tag)
 {
 	struct intel_engine_cs *ring = ringbuf->ring;
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -59,6 +60,8 @@ void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
 	entry->ctx_id = ctx_id;
 	if (dev_priv->oa_pmu.sample_info_flags & I915_OA_SAMPLE_PID)
 		entry->pid = current->pid;
+	if (dev_priv->oa_pmu.sample_info_flags & I915_OA_SAMPLE_TAG)
+		entry->tag = tag;
 	i915_gem_request_assign(&entry->req, ring->outstanding_lazy_request);
 
 	spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags);
@@ -320,6 +323,7 @@ static void forward_one_oa_rcs_sample(struct drm_i915_private *dev_priv,
 	u8 *snapshot, *current_ptr;
 	struct drm_i915_oa_node_ctx_id *ctx_info;
 	struct drm_i915_oa_node_pid *pid_info;
+	struct drm_i915_oa_node_tag *tag_info;
 	struct perf_raw_record raw;
 	u64 snapshot_ts;
 
@@ -338,6 +342,13 @@ static void forward_one_oa_rcs_sample(struct drm_i915_private *dev_priv,
 		current_ptr = snapshot + snapshot_size;
 	}
 
+	if (dev_priv->oa_pmu.sample_info_flags & I915_OA_SAMPLE_TAG) {
+		tag_info = (struct drm_i915_oa_node_tag *)current_ptr;
+		tag_info->tag = node->tag;
+		snapshot_size += sizeof(*tag_info);
+		current_ptr = snapshot + snapshot_size;
+	}
+
 	/* Flush the periodic snapshots till the ts of this OA report */
 	snapshot_ts = *(u64 *)(snapshot + 4);
 	flush_oa_snapshots(dev_priv, true, snapshot_ts);
@@ -694,6 +705,9 @@ static int init_oa_rcs_buffer(struct perf_event *event)
 	if (dev_priv->oa_pmu.sample_info_flags & I915_OA_SAMPLE_PID)
 		node_size += sizeof(struct drm_i915_oa_node_pid);
 
+	if (dev_priv->oa_pmu.sample_info_flags & I915_OA_SAMPLE_TAG)
+		node_size += sizeof(struct drm_i915_oa_node_tag);
+
 	/* node size has to be aligned to 64 bytes, since only 64 byte aligned
 	 * addresses can be given to OA unit for dumping OA reports */
 	node_size = ALIGN(node_size, 64);
@@ -850,6 +864,9 @@ static int i915_oa_event_init(struct perf_event *event)
 		if (oa_attr.sample_pid)
 			dev_priv->oa_pmu.sample_info_flags |=
 					I915_OA_SAMPLE_PID;
+		if (oa_attr.sample_tag)
+			dev_priv->oa_pmu.sample_info_flags |=
+					I915_OA_SAMPLE_TAG;
 	}
 
 	report_format = oa_attr.format;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 65e8297..1084178 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -94,7 +94,8 @@ typedef struct _drm_i915_oa_attr {
 	__u64 single_context : 1,
 		multiple_context_mode:1,
 		sample_pid:1,
-		__reserved_1:61;
+		sample_tag:1,
+		__reserved_1:60;
 } drm_i915_oa_attr_t;
 
 /* Header for PERF_RECORD_DEVICE type events */
@@ -134,6 +135,11 @@ struct drm_i915_oa_node_pid {
 	__u32 pad;
 };
 
+struct drm_i915_oa_node_tag {
+	__u32 tag;
+	__u32 pad;
+};
+
 /* Each region is a minimum of 16k, and there are at most 255 of them.
  */
 #define I915_NR_TEX_REGIONS 255	/* table size 2k - maximum due to use
@@ -802,7 +808,7 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
 	__u64 flags;
 	__u64 rsvd1; /* now used for context info */
-	__u64 rsvd2;
+	__u64 rsvd2; /* used for tag */
 };
 
 /** Resets the SO write offset registers for transform feedback on gen7. */
@@ -840,7 +846,12 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_BSD_RING1		(1<<13)
 #define I915_EXEC_BSD_RING2		(2<<13)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15)
+/** Inform the kernel that tag is passed through rsvd2 field of
+ * execbuffer args
+ */
+#define I915_EXEC_TAG			(1<<15)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(1<<16)
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
@@ -848,6 +859,10 @@ struct drm_i915_gem_execbuffer2 {
 #define i915_execbuffer2_get_context_id(eb2) \
 	((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK)
 
+#define I915_EXEC_TAG_MASK		(0xffffffff)
+#define i915_execbuffer2_get_tag(eb2) \
+	((eb2).rsvd2 & I915_EXEC_TAG_MASK)
+
 struct drm_i915_gem_pin {
 	/** Handle of the buffer to be pinned. */
 	__u32 handle;
-- 
1.8.5.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2015-07-15  8:45 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-07-15  8:46 [RFC 0/8] Introduce framework to forward multi context OA snapshots sourab.gupta
2015-07-15  8:46 ` [RFC 1/8] drm/i915: Have globally unique context ids, as opposed to drm file specific sourab.gupta
2015-07-15  9:54   ` Chris Wilson
2015-07-15 10:31     ` Chris Wilson
2015-07-15 12:36       ` Daniel Vetter
2015-07-15  8:46 ` [RFC 2/8] drm/i915: Introduce mode for capture of multi ctx OA reports synchronized with RCS sourab.gupta
2015-07-15  8:46 ` [RFC 3/8] drm/i915: Add mechanism for forwarding CS based OA counter snapshots through perf sourab.gupta
2015-07-15  8:46 ` [RFC 4/8] drm/i915: Forward periodic and CS based OA reports sorted acc to timestamps sourab.gupta
2015-07-15  8:47 ` [RFC 5/8] drm/i915: Handle event stop and destroy for commands in flight sourab.gupta
2015-07-15  8:47 ` [RFC 6/8] drm/i915: Insert commands for capture of OA counters in the ring sourab.gupta
2015-07-15 10:26   ` Chris Wilson
2015-07-15  8:47 ` [RFC 7/8] drm/i915: Add support for having pid output with OA report sourab.gupta
2015-07-15  8:47 ` sourab.gupta [this message]
2015-07-15 10:02   ` [RFC 8/8] drm/i915: Add support to add execbuffer tags to OA counter reports Chris Wilson
2015-07-15 10:04   ` Chris Wilson
2015-07-15 10:06   ` Chris Wilson
2015-08-05  5:52 [RFC 0/8] Introduce framework to forward multi context OA snapshots sourab.gupta
2015-08-05  5:52 ` [RFC 8/8] drm/i915: Add support to add execbuffer tags to OA counter reports sourab.gupta

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1436950023-13940-9-git-send-email-sourab.gupta@intel.com \
    --to=sourab.gupta@intel.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=insoo.woo@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=jabin.wu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.