All of lore.kernel.org
 help / color / mirror / Atom feed
From: sourab.gupta@intel.com
To: intel-gfx@lists.freedesktop.org
Cc: Insoo Woo <insoo.woo@intel.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Jabin Wu <jabin.wu@intel.com>,
	Sourab Gupta <sourab.gupta@intel.com>
Subject: [RFC 8/8] drm/i915: Add perfTag support for OA counter reports
Date: Mon, 22 Jun 2015 15:20:19 +0530	[thread overview]
Message-ID: <1434966619-3979-9-git-send-email-sourab.gupta@intel.com> (raw)
In-Reply-To: <1434966619-3979-1-git-send-email-sourab.gupta@intel.com>

From: Sourab Gupta <sourab.gupta@intel.com>

This patch enables collection of perfTag in the OA reports.

PerfTag is a mechanism, whereby the reports collected are marked with a
perfTag passed by userspace during the execbuffer call. This way the userspace
can identify the reports collected with the particular execbuffers.
This feature is particularly useful for identifying individual stages of a
single context, and associating the reports with these individual stages.

In this patch, rsvd2 field of execbuffer arguments is being utilized for passing
in the perfTag. A new bitfield in execbuffer flags is introduced in order to
inform kernel of perftag being passed in execbuffer arguments.

Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  7 +++++--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  6 ++++--
 drivers/gpu/drm/i915/i915_oa_perf.c        | 12 ++++++++----
 include/uapi/drm/i915_drm.h                | 15 +++++++++++++--
 4 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 798da49..758d924 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1691,8 +1691,10 @@ struct drm_i915_oa_async_queue_header {
 struct drm_i915_oa_async_node_info {
 	__u32 pid;
 	__u32 ctx_id;
+	__u32 perftag;
+	__u32 padding;
 	struct drm_i915_gem_request *req;
-	__u32 pad[12];
+	__u32 pad[10];
 };
 
 struct drm_i915_oa_async_node {
@@ -3164,7 +3166,8 @@ void i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
 				struct intel_context *context);
 void i915_oa_context_unpin_notify(struct drm_i915_private *dev_priv,
 				  struct intel_context *context);
-void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id);
+void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id,
+				int perftag);
 #else
 static inline void
 i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index f5a2308..7be4f6a 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1320,7 +1320,8 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
 	exec_len = args->batch_len;
 
 	i915_insert_profiling_cmd(ring->buffer,
-			i915_execbuffer2_get_context_id(*args));
+			i915_execbuffer2_get_context_id(*args),
+			i915_execbuffer2_get_perftag(*args));
 
 	if (cliprects) {
 		for (i = 0; i < args->num_cliprects; i++) {
@@ -1344,7 +1345,8 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
 	}
 
 	i915_insert_profiling_cmd(ring->buffer,
-			i915_execbuffer2_get_context_id(*args));
+			i915_execbuffer2_get_context_id(*args),
+			i915_execbuffer2_get_perftag(*args));
 
 	trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags);
 
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c
index b02850c..ab419d9 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -27,20 +27,23 @@ static int hsw_perf_format_sizes[] = {
 
 struct drm_i915_insert_cmd {
 	struct list_head list;
-	void (*insert_cmd)(struct intel_ringbuffer *ringbuf, u32 ctx_id);
+	void (*insert_cmd)(struct intel_ringbuffer *ringbuf, u32 ctx_id,
+				int perftag);
 };
 
-void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
+void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id,
+				int perftag)
 {
 	struct intel_engine_cs *ring = ringbuf->ring;
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	struct drm_i915_insert_cmd *entry;
 
 	list_for_each_entry(entry, &dev_priv->profile_cmd, list)
-		entry->insert_cmd(ringbuf, ctx_id);
+		entry->insert_cmd(ringbuf, ctx_id, perftag);
 }
 
-void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
+void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id,
+			int perftag)
 {
 	struct intel_engine_cs *ring = ringbuf->ring;
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -90,6 +93,7 @@ void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
 
 	node_info->pid = current->pid;
 	node_info->ctx_id = ctx_id;
+	node_info->perftag = perftag;
 	queue_hdr->node_count++;
 	if (queue_hdr->node_count > num_nodes)
 		queue_hdr->wrap_count++;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index c91b427..4d99992 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -127,6 +127,8 @@ enum drm_i915_oa_event_type {
 struct drm_i915_oa_async_node_footer {
 	__u32 pid;
 	__u32 ctx_id;
+	__u32 perftag;
+	__u32 pad;
 };
 
 /* Each region is a minimum of 16k, and there are at most 255 of them.
@@ -797,7 +799,7 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
 	__u64 flags;
 	__u64 rsvd1; /* now used for context info */
-	__u64 rsvd2;
+	__u64 rsvd2; /* used for perftag */
 };
 
 /** Resets the SO write offset registers for transform feedback on gen7. */
@@ -835,7 +837,12 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_BSD_RING1		(1<<13)
 #define I915_EXEC_BSD_RING2		(2<<13)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15)
+/** Inform the kernel that the perftag is passed through rsvd2 field of
+ * execbuffer args
+ */
+#define I915_EXEC_PERFTAG		(1<<15)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(1<<16)
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
@@ -843,6 +850,10 @@ struct drm_i915_gem_execbuffer2 {
 #define i915_execbuffer2_get_context_id(eb2) \
 	((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK)
 
+#define I915_EXEC_PERFTAG_MASK		(0xffffffff)
+#define i915_execbuffer2_get_perftag(eb2) \
+	((eb2).rsvd2 & I915_EXEC_PERFTAG_MASK)
+
 struct drm_i915_gem_pin {
 	/** Handle of the buffer to be pinned. */
 	__u32 handle;
-- 
1.8.5.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

      parent reply	other threads:[~2015-06-22  9:48 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-22  9:50 [RFC 0/8] Introduce framework to forward asynchronous OA counter sourab.gupta
2015-06-22  9:50 ` [RFC 1/8] drm/i915: Have globally unique context ids, as opposed to drm file specific sourab.gupta
2015-06-22  9:50 ` [RFC 2/8] drm/i915: Introduce mode for asynchronous capture of OA counters sourab.gupta
2015-06-22 15:59   ` Daniel Vetter
2015-06-22  9:50 ` [RFC 3/8] drm/i915: Add the data structures for async OA capture mode sourab.gupta
2015-06-22 16:01   ` Daniel Vetter
2015-06-22  9:50 ` [RFC 4/8] drm/i915: Add mechanism for forwarding async OA counter snapshots through perf sourab.gupta
2015-06-22  9:50 ` [RFC 5/8] drm/i915: Wait for GPU to finish before event stop, in async OA counter mode sourab.gupta
2015-06-22  9:50 ` [RFC 6/8] drm/i915: Routines for inserting OA capture commands in the ringbuffer sourab.gupta
2015-06-22 15:55   ` Daniel Vetter
2015-06-22  9:50 ` [RFC 7/8] drm/i915: Add commands in ringbuf for OA snapshot capture across Batchbuffer boundaries sourab.gupta
2015-06-22  9:50 ` sourab.gupta [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1434966619-3979-9-git-send-email-sourab.gupta@intel.com \
    --to=sourab.gupta@intel.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=insoo.woo@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=jabin.wu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.