All of lore.kernel.org
 help / color / mirror / Atom feed
From: sourab.gupta@intel.com
To: intel-gfx@lists.freedesktop.org
Cc: Insoo Woo <insoo.woo@intel.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Jabin Wu <jabin.wu@intel.com>,
	Sourab Gupta <sourab.gupta@intel.com>
Subject: [RFC 5/7] drm/i915: Wait for GPU to finish before event stop in Gen Perf PMU
Date: Mon, 22 Jun 2015 15:25:07 +0530	[thread overview]
Message-ID: <1434966909-4113-6-git-send-email-sourab.gupta@intel.com> (raw)
In-Reply-To: <1434966909-4113-1-git-send-email-sourab.gupta@intel.com>

From: Sourab Gupta <sourab.gupta@intel.com>

To collect timestamps around any GPU workload, we need to insert
commands to capture them into the ringbuffer. Therefore, during the stop event
call, we need to wait for GPU to complete processing the last request for
which these commands were inserted.
We need to ensure this processing is done before event_destroy callback which
deallocates the buffer for holding the data.

Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h     |  2 ++
 drivers/gpu/drm/i915/i915_oa_perf.c | 54 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 25c0938..a0e1d17 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2022,6 +2022,8 @@ struct drm_i915_private {
 			u32 tail;
 		} buffer;
 		struct work_struct work_timer;
+		struct work_struct work_event_stop;
+		struct completion complete;
 	} gen_pmu;
 
 	struct list_head profile_cmd;
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c
index e3e867f..574b6d3 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -306,6 +306,9 @@ void forward_gen_pmu_snapshots_work(struct work_struct *__work)
 	int head, tail, num_nodes, ret;
 	struct drm_i915_gem_request *req;
 
+	if (dev_priv->gen_pmu.event_active == false)
+		return;
+
 	first_node = (struct drm_i915_ts_node *)
 			((char *)hdr + hdr->data_offset);
 	num_nodes = (hdr->size_in_bytes - hdr->data_offset) /
@@ -335,6 +338,50 @@ void forward_gen_pmu_snapshots_work(struct work_struct *__work)
 	mutex_unlock(&dev_priv->dev->struct_mutex);
 }
 
+void i915_gen_pmu_stop_work_fn(struct work_struct *__work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(__work, typeof(*dev_priv),
+			gen_pmu.work_event_stop);
+	struct perf_event *event = dev_priv->gen_pmu.exclusive_event;
+	struct drm_i915_ts_queue_header *hdr =
+		(struct drm_i915_ts_queue_header *)
+		dev_priv->gen_pmu.buffer.addr;
+	struct drm_i915_ts_node *first_node, *node;
+	int head, tail, num_nodes, ret;
+	struct drm_i915_gem_request *req;
+
+	first_node = (struct drm_i915_ts_node *)
+			((char *)hdr + hdr->data_offset);
+	num_nodes = (hdr->size_in_bytes - hdr->data_offset) /
+			sizeof(*node);
+
+
+	ret = i915_mutex_lock_interruptible(dev_priv->dev);
+	if (ret)
+		return;
+
+	i915_gen_pmu_wait_gpu(dev_priv);
+
+	/* Ensure that all requests are completed*/
+	tail = hdr->node_count;
+	head = dev_priv->gen_pmu.buffer.head;
+	while ((head % num_nodes) != (tail % num_nodes)) {
+		node = &first_node[head % num_nodes];
+		req = node->node_info.req;
+		if (req && !i915_gem_request_completed(req, true))
+			WARN_ON(1);
+		head++;
+	}
+
+	event->hw.state = PERF_HES_STOPPED;
+	dev_priv->gen_pmu.buffer.tail = 0;
+	dev_priv->gen_pmu.buffer.head = 0;
+
+	mutex_unlock(&dev_priv->dev->struct_mutex);
+	complete(&dev_priv->gen_pmu.complete);
+}
+
 static void gen_pmu_flush_snapshots(struct drm_i915_private *dev_priv)
 {
 	WARN_ON(!dev_priv->gen_pmu.buffer.addr);
@@ -562,6 +609,7 @@ static void i915_oa_event_destroy(struct perf_event *event)
 
 static void gen_buffer_destroy(struct drm_i915_private *i915)
 {
+	wait_for_completion(&i915->gen_pmu.complete);
 	mutex_lock(&i915->dev->struct_mutex);
 
 	vunmap(i915->gen_pmu.buffer.addr);
@@ -1409,7 +1457,7 @@ static void i915_gen_event_stop(struct perf_event *event, int flags)
 	hrtimer_cancel(&dev_priv->gen_pmu.timer);
 	gen_pmu_flush_snapshots(dev_priv);
 
-	event->hw.state = PERF_HES_STOPPED;
+	schedule_work(&dev_priv->gen_pmu.work_event_stop);
 }
 
 static int i915_gen_event_add(struct perf_event *event, int flags)
@@ -1595,6 +1643,9 @@ void i915_gen_pmu_register(struct drm_device *dev)
 	i915->gen_pmu.timer.function = hrtimer_sample_gen;
 
 	INIT_WORK(&i915->gen_pmu.work_timer, forward_gen_pmu_snapshots_work);
+	INIT_WORK(&i915->gen_pmu.work_event_stop, i915_gen_pmu_stop_work_fn);
+	init_completion(&i915->gen_pmu.complete);
+
 	spin_lock_init(&i915->gen_pmu.lock);
 
 	i915->gen_pmu.pmu.capabilities  = PERF_PMU_CAP_IS_DEVICE;
@@ -1625,6 +1676,7 @@ void i915_gen_pmu_unregister(struct drm_device *dev)
 		return;
 
 	cancel_work_sync(&i915->gen_pmu.work_timer);
+	cancel_work_sync(&i915->gen_pmu.work_event_stop);
 
 	perf_pmu_unregister(&i915->gen_pmu.pmu);
 	i915->gen_pmu.pmu.event_init = NULL;
-- 
1.8.5.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2015-06-22  9:53 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-22  9:55 [RFC 0/7] Introduce framework for forwarding generic non-OA performance sourab.gupta
2015-06-22  9:55 ` [RFC 1/7] drm/i915: Add a new PMU for handling non-OA counter data profiling requests sourab.gupta
2015-06-22  9:55 ` [RFC 2/7] drm/i915: Register routines for Gen perf PMU driver sourab.gupta
2015-06-22  9:55 ` [RFC 3/7] drm/i915: Introduce timestamp node for timestamp data collection sourab.gupta
2015-06-22  9:55 ` [RFC 4/7] drm/i915: Add mechanism for forwarding the data samples to userspace through Gen PMU perf interface sourab.gupta
2015-06-22 13:21   ` Chris Wilson
2015-06-22  9:55 ` sourab.gupta [this message]
2015-06-22 13:22   ` [RFC 5/7] drm/i915: Wait for GPU to finish before event stop in Gen Perf PMU Chris Wilson
2015-06-22 16:09     ` Daniel Vetter
2015-06-25  6:02       ` Gupta, Sourab
2015-06-25  7:42         ` Daniel Vetter
2015-06-25  8:27           ` Gupta, Sourab
2015-06-25 11:47             ` Robert Bragg
2015-06-25  8:02         ` Chris Wilson
2015-06-25 17:31           ` Robert Bragg
2015-06-25 17:37             ` Chris Wilson
2015-06-25 18:20               ` Chris Wilson
2015-06-25 13:02         ` Robert Bragg
2015-06-25 13:07           ` Robert Bragg
2015-06-22  9:55 ` [RFC 6/7] drm/i915: Add routines for inserting commands in the ringbuf for capturing timestamps sourab.gupta
2015-06-22  9:55 ` [RFC 7/7] drm/i915: Add support for retrieving MMIO register values in Gen Perf PMU sourab.gupta
2015-06-22 13:29   ` Chris Wilson
2015-06-22 16:06   ` Daniel Vetter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1434966909-4113-6-git-send-email-sourab.gupta@intel.com \
    --to=sourab.gupta@intel.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=insoo.woo@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=jabin.wu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.