All of lore.kernel.org
 help / color / mirror / Atom feed
From: sourab.gupta@intel.com
To: intel-gfx@lists.freedesktop.org
Cc: Insoo Woo <insoo.woo@intel.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Jabin Wu <jabin.wu@intel.com>,
	Sourab Gupta <sourab.gupta@intel.com>
Subject: [RFC 6/8] drm/i915: Insert commands for capture of OA counters in the ring
Date: Wed,  5 Aug 2015 11:22:55 +0530	[thread overview]
Message-ID: <1438753977-20335-7-git-send-email-sourab.gupta@intel.com> (raw)
In-Reply-To: <1438753977-20335-1-git-send-email-sourab.gupta@intel.com>

From: Sourab Gupta <sourab.gupta@intel.com>

This patch adds the routines which insert commands for capturing OA
snapshots into the ringbuffer of RCS engine.

The command MI_REPORT_PERF_COUNT can be used to capture snapshots of OA
counters, which is inserted at BB boundaries.
While inserting the commands, we keep a reference of associated request.
This will be released when we are forwarding the samples to userspace
(or when the event is being destroyed).
Also, an active reference of the destination buffer is taken here, so that
we can be assured that the buffer is freed up only after GPU is done with
it, even if the local reference of the buffer is released.

v2: Changes (as suggested by Chris):
    - Passing in 'request' struct for emit report function
    - Removed multiple calls to i915_gem_obj_to_ggtt(). Keeping hold of
      pinned vma from start and using when required.
    - Better nomenclature, and error handling.

Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            | 13 +++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  4 ++
 drivers/gpu/drm/i915/i915_oa_perf.c        | 87 ++++++++++++++++++++++++++++++
 3 files changed, 104 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d355691..5c15e30 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1661,6 +1661,11 @@ enum i915_oa_event_state {
 	I915_OA_EVENT_STOPPED,
 };
 
+enum i915_profile_mode {
+	I915_PROFILE_OA = 0,
+	I915_PROFILE_MAX,
+};
+
 struct i915_oa_rcs_node {
 	struct list_head head;
 	struct drm_i915_gem_request *req;
@@ -1966,6 +1971,7 @@ struct drm_i915_private {
 		struct {
 			struct drm_i915_gem_object *obj;
 			u32 gtt_offset;
+			struct i915_vma *vma;
 			u8 *addr;
 			int format;
 			int format_size;
@@ -1976,6 +1982,9 @@ struct drm_i915_private {
 		struct work_struct forward_work;
 		struct work_struct event_destroy_work;
 	} oa_pmu;
+
+	void (*emit_profiling_data[I915_PROFILE_MAX])
+		(struct drm_i915_gem_request *req, u32 global_ctx_id);
 #endif
 
 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
@@ -3156,6 +3165,8 @@ void i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
 				struct intel_context *context);
 void i915_oa_context_unpin_notify(struct drm_i915_private *dev_priv,
 				  struct intel_context *context);
+void i915_emit_profiling_data(struct drm_i915_gem_request *req,
+				u32 global_ctx_id);
 #else
 static inline void
 i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
@@ -3163,6 +3174,8 @@ i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
 static inline void
 i915_oa_context_unpin_notify(struct drm_i915_private *dev_priv,
 			     struct intel_context *context) {}
+void i915_emit_profiling_data(struct drm_i915_gem_request *req,
+				u32 global_ctx_id) {};
 #endif
 
 /* i915_gem_evict.c */
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3336e1c..e58b10d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1317,6 +1317,8 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
 			goto error;
 	}
 
+	i915_emit_profiling_data(intel_ring_get_request(ring), ctx->global_id);
+
 	exec_len = args->batch_len;
 	if (cliprects) {
 		for (i = 0; i < args->num_cliprects; i++) {
@@ -1339,6 +1341,8 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
 			return ret;
 	}
 
+	i915_emit_profiling_data(intel_ring_get_request(ring), ctx->global_id);
+
 	trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags);
 
 	i915_gem_execbuffer_move_to_active(vmas, ring);
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c
index 554a9fa..e3bc8e0 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -25,6 +25,86 @@ static int hsw_perf_format_sizes[] = {
 	64   /* C4_B8_HSW */
 };
 
+void i915_emit_profiling_data(struct drm_i915_gem_request *req,
+				u32 global_ctx_id)
+{
+	struct intel_engine_cs *ring = req->ring;
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	int i;
+
+	for (i = I915_PROFILE_OA; i < I915_PROFILE_MAX; i++) {
+		if (dev_priv->emit_profiling_data[i])
+			dev_priv->emit_profiling_data[i](req, global_ctx_id);
+	}
+}
+
+/*
+ * Emits the commands to capture OA perf report, into the Render CS
+ */
+static void i915_oa_emit_perf_report(struct drm_i915_gem_request *req,
+				u32 global_ctx_id)
+{
+	struct intel_engine_cs *ring = req->ring;
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct drm_i915_gem_object *obj = dev_priv->oa_pmu.oa_rcs_buffer.obj;
+	struct i915_oa_rcs_node *entry;
+	unsigned long lock_flags;
+	u32 addr = 0;
+	int ret;
+
+	/* OA counters are only supported on the render ring */
+	if (ring->id != RCS)
+		return;
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (entry == NULL) {
+		DRM_ERROR("alloc failed\n");
+		return;
+	}
+
+	ret = intel_ring_begin(ring, 4);
+	if (ret) {
+		kfree(entry);
+		return;
+	}
+
+	entry->ctx_id = global_ctx_id;
+	i915_gem_request_assign(&entry->req, ring->outstanding_lazy_request);
+
+	spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags);
+	if (list_empty(&dev_priv->oa_pmu.node_list))
+		entry->offset = 0;
+	else {
+		struct i915_oa_rcs_node *last_entry;
+		int max_offset = dev_priv->oa_pmu.oa_rcs_buffer.node_count *
+				dev_priv->oa_pmu.oa_rcs_buffer.node_size;
+
+		last_entry = list_last_entry(&dev_priv->oa_pmu.node_list,
+					struct i915_oa_rcs_node, head);
+		entry->offset = last_entry->offset +
+				dev_priv->oa_pmu.oa_rcs_buffer.node_size;
+
+		if (entry->offset > max_offset)
+			entry->offset = 0;
+	}
+	list_add_tail(&entry->head, &dev_priv->oa_pmu.node_list);
+	spin_unlock_irqrestore(&dev_priv->oa_pmu.lock, lock_flags);
+
+	addr = dev_priv->oa_pmu.oa_rcs_buffer.gtt_offset + entry->offset;
+
+	/* addr should be 64 byte aligned */
+	BUG_ON(addr & 0x3f);
+
+	intel_ring_emit(ring, MI_REPORT_PERF_COUNT | (1<<0));
+	intel_ring_emit(ring, addr | MI_REPORT_PERF_COUNT_GGTT);
+	intel_ring_emit(ring, ring->outstanding_lazy_request->seqno);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
+	i915_vma_move_to_active(dev_priv->oa_pmu.oa_rcs_buffer.vma, ring);
+}
+
 static void forward_one_oa_snapshot_to_event(struct drm_i915_private *dev_priv,
 					     u8 *snapshot,
 					     struct perf_event *event)
@@ -324,6 +404,7 @@ oa_rcs_buffer_destroy(struct drm_i915_private *i915)
 	spin_lock(&i915->oa_pmu.lock);
 	i915->oa_pmu.oa_rcs_buffer.obj = NULL;
 	i915->oa_pmu.oa_rcs_buffer.gtt_offset = 0;
+	i915->oa_pmu.oa_rcs_buffer.vma = NULL;
 	i915->oa_pmu.oa_rcs_buffer.addr = NULL;
 	spin_unlock(&i915->oa_pmu.lock);
 }
@@ -584,6 +665,7 @@ static int init_oa_rcs_buffer(struct perf_event *event)
 	dev_priv->oa_pmu.oa_rcs_buffer.obj = bo;
 	dev_priv->oa_pmu.oa_rcs_buffer.gtt_offset =
 				i915_gem_obj_ggtt_offset(bo);
+	dev_priv->oa_pmu.oa_rcs_buffer.vma = i915_gem_obj_to_ggtt(bo);
 	dev_priv->oa_pmu.oa_rcs_buffer.addr = vmap_oa_buffer(bo);
 	INIT_LIST_HEAD(&dev_priv->oa_pmu.node_list);
 
@@ -1006,6 +1088,10 @@ static void i915_oa_event_start(struct perf_event *event, int flags)
 	dev_priv->oa_pmu.event_state = I915_OA_EVENT_STARTED;
 	update_oacontrol(dev_priv);
 
+	if (dev_priv->oa_pmu.multiple_ctx_mode)
+		dev_priv->emit_profiling_data[I915_PROFILE_OA] =
+				i915_oa_emit_perf_report;
+
 	/* Reset the head ptr to ensure we don't forward reports relating
 	 * to a previous perf event */
 	oastatus1 = I915_READ(GEN7_OASTATUS1);
@@ -1042,6 +1128,7 @@ static void i915_oa_event_stop(struct perf_event *event, int flags)
 
 		spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags);
 
+		dev_priv->emit_profiling_data[I915_PROFILE_OA] = NULL;
 		dev_priv->oa_pmu.event_state = I915_OA_EVENT_STOP_IN_PROGRESS;
 		list_for_each_entry(entry, &dev_priv->oa_pmu.node_list, head)
 			entry->discard = true;
-- 
1.8.5.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2015-08-05  5:51 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-05  5:52 [RFC 0/8] Introduce framework to forward multi context OA snapshots sourab.gupta
2015-08-05  5:52 ` [RFC 1/8] drm/i915: Introduce global id for contexts sourab.gupta
2015-08-05  5:52 ` [RFC 2/8] drm/i915: Introduce mode for capture of multi ctx OA reports synchronized with RCS sourab.gupta
2015-08-05  5:52 ` [RFC 3/8] drm/i915: Add mechanism for forwarding CS based OA counter snapshots through perf sourab.gupta
2015-08-05  5:52 ` [RFC 4/8] drm/i915: Forward periodic and CS based OA reports sorted acc to timestamps sourab.gupta
2015-08-05  5:52 ` [RFC 5/8] drm/i915: Handle event stop and destroy for commands in flight sourab.gupta
2015-08-05  5:52 ` sourab.gupta [this message]
2015-08-05  5:52 ` [RFC 7/8] drm/i915: Add support for having pid output with OA report sourab.gupta
2015-08-05  5:52 ` [RFC 8/8] drm/i915: Add support to add execbuffer tags to OA counter reports sourab.gupta
  -- strict thread matches above, loose matches on Subject: below --
2015-07-15  8:46 [RFC 0/8] Introduce framework to forward multi context OA snapshots sourab.gupta
2015-07-15  8:47 ` [RFC 6/8] drm/i915: Insert commands for capture of OA counters in the ring sourab.gupta
2015-07-15 10:26   ` Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1438753977-20335-7-git-send-email-sourab.gupta@intel.com \
    --to=sourab.gupta@intel.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=insoo.woo@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=jabin.wu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.