All of lore.kernel.org
 help / color / mirror / Atom feed
From: sourab.gupta@intel.com
To: intel-gfx@lists.freedesktop.org
Cc: Insoo Woo <insoo.woo@intel.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Jabin Wu <jabin.wu@intel.com>,
	Sourab Gupta <sourab.gupta@intel.com>
Subject: [RFC 3/8] drm/i915: Handle event stop and destroy for GPU commands submitted
Date: Wed,  5 Aug 2015 11:25:39 +0530	[thread overview]
Message-ID: <1438754144-20435-4-git-send-email-sourab.gupta@intel.com> (raw)
In-Reply-To: <1438754144-20435-1-git-send-email-sourab.gupta@intel.com>

From: Sourab Gupta <sourab.gupta@intel.com>

This patch handles the event stop and destroy callbacks taking into account
the fact that there may be commands scheduled on GPU which may utilize the
destination buffer.

The event stop would just set the event state, and stop forwarding data to
userspace. From userspace perspective, for all purposes, the event sampling
is stopped.A subsequent event start (without event destroy) would start
forwarding samples again.

The event destroy releases the local copy of the dest buffer. But since it
is expected that the active reference of buffer is taken while inserting
commands, we can rest assured that buffer is freed up only after GPU is
done with it.
Still there is a need to schedule a worker from event destroy, because we
need to do some further stuff like freeing up request references.

The ideal solution here would be to have a callback when the last request
is finished on GPU, so that we can do this stuff there (WIP:Chris'
retire-notification mechanism). Till the time, a worker thread will do.

A subsequent event init would have to wait for previously submitted RPC
commands to complete or return -EBUSY. Currently, for the sake of
simplicity, we are returning -EBUSY if such a case is detected.

Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h     |  2 +
 drivers/gpu/drm/i915/i915_oa_perf.c | 87 ++++++++++++++++++++++++++++++++-----
 2 files changed, 77 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 08235582..5717cb0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1680,6 +1680,7 @@ struct i915_gen_pmu_node {
 	struct list_head head;
 	struct drm_i915_gem_request *req;
 	u32 offset;
+	bool discard;
 	u32 ctx_id;
 };
 
@@ -2012,6 +2013,7 @@ struct drm_i915_private {
 		} buffer;
 		struct list_head node_list;
 		struct work_struct forward_work;
+		struct work_struct event_destroy_work;
 	} gen_pmu;
 
 	void (*emit_profiling_data[I915_PROFILE_MAX])
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c
index 3add862..06645f0 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -448,6 +448,21 @@ static int i915_gen_pmu_wait_gpu(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
+static void i915_gen_pmu_release_request_ref(struct drm_i915_private *dev_priv)
+{
+	struct i915_gen_pmu_node *entry, *next;
+
+	list_for_each_entry_safe
+		(entry, next, &dev_priv->gen_pmu.node_list, head) {
+		i915_gem_request_unreference__unlocked(entry->req);
+
+		spin_lock(&dev_priv->gen_pmu.lock);
+		list_del(&entry->head);
+		spin_unlock(&dev_priv->gen_pmu.lock);
+		kfree(entry);
+	}
+}
+
 static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv,
 				struct i915_gen_pmu_node *node)
 {
@@ -498,7 +513,8 @@ static void forward_gen_pmu_snapshots(struct drm_i915_private *dev_priv)
 		if (!i915_gem_request_completed(entry->req, true))
 			break;
 
-		forward_one_gen_pmu_sample(dev_priv, entry);
+		if (!entry->discard)
+			forward_one_gen_pmu_sample(dev_priv, entry);
 
 		spin_lock(&dev_priv->gen_pmu.lock);
 		list_move_tail(&entry->head, &deferred_list_free);
@@ -523,6 +539,13 @@ static void forward_gen_pmu_work_fn(struct work_struct *__work)
 	struct drm_i915_private *dev_priv =
 		container_of(__work, typeof(*dev_priv), gen_pmu.forward_work);
 
+	spin_lock(&dev_priv->gen_pmu.lock);
+	if (dev_priv->gen_pmu.event_active != true) {
+		spin_unlock(&dev_priv->gen_pmu.lock);
+		return;
+	}
+	spin_unlock(&dev_priv->gen_pmu.lock);
+
 	forward_gen_pmu_snapshots(dev_priv);
 }
 
@@ -670,12 +693,6 @@ static void gen_buffer_destroy(struct drm_i915_private *i915)
 	i915_gem_object_ggtt_unpin(i915->gen_pmu.buffer.obj);
 	drm_gem_object_unreference(&i915->gen_pmu.buffer.obj->base);
 	mutex_unlock(&i915->dev->struct_mutex);
-
-	spin_lock(&i915->gen_pmu.lock);
-	i915->gen_pmu.buffer.obj = NULL;
-	i915->gen_pmu.buffer.gtt_offset = 0;
-	i915->gen_pmu.buffer.addr = NULL;
-	spin_unlock(&i915->gen_pmu.lock);
 }
 
 static void i915_gen_event_destroy(struct perf_event *event)
@@ -685,10 +702,44 @@ static void i915_gen_event_destroy(struct perf_event *event)
 
 	WARN_ON(event->parent);
 
-	gen_buffer_destroy(i915);
+	cancel_work_sync(&i915->gen_pmu.forward_work);
 
 	BUG_ON(i915->gen_pmu.exclusive_event != event);
 	i915->gen_pmu.exclusive_event = NULL;
+
+	/* We can deference our local copy of dest buffer here, since
+	 * an active reference of buffer would be taken while
+	 * inserting commands. So the buffer would be freed up only
+	 * after GPU is done with it.
+	 */
+	gen_buffer_destroy(i915);
+
+	schedule_work(&i915->gen_pmu.event_destroy_work);
+}
+
+static void i915_gen_pmu_event_destroy_work(struct work_struct *__work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(__work, typeof(*dev_priv),
+			gen_pmu.event_destroy_work);
+	int ret;
+
+	ret = i915_gen_pmu_wait_gpu(dev_priv);
+	if (ret)
+		goto out;
+
+	i915_gen_pmu_release_request_ref(dev_priv);
+
+out:
+	/*
+	 * Done here, as this excludes a new event till we've done processing
+	 * the old one
+	 */
+	spin_lock(&dev_priv->gen_pmu.lock);
+	dev_priv->gen_pmu.buffer.obj = NULL;
+	dev_priv->gen_pmu.buffer.gtt_offset = 0;
+	dev_priv->gen_pmu.buffer.addr = NULL;
+	spin_unlock(&dev_priv->gen_pmu.lock);
 }
 
 static int alloc_obj(struct drm_i915_private *dev_priv,
@@ -1411,6 +1462,7 @@ static int i915_gen_event_init(struct perf_event *event)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(event->pmu, typeof(*dev_priv), gen_pmu.pmu);
+	unsigned long lock_flags;
 	int ret = 0;
 
 	if (event->attr.type != event->pmu->type)
@@ -1419,8 +1471,12 @@ static int i915_gen_event_init(struct perf_event *event)
 	/* To avoid the complexity of having to accurately filter
 	 * data and marshal to the appropriate client
 	 * we currently only allow exclusive access */
-	if (dev_priv->gen_pmu.buffer.obj)
+	spin_lock_irqsave(&dev_priv->gen_pmu.lock, lock_flags);
+	if (dev_priv->gen_pmu.buffer.obj) {
+		spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags);
 		return -EBUSY;
+	}
+	spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags);
 
 	/*
 	 * We need to check for CAP_SYS_ADMIN capability as we profile all
@@ -1460,14 +1516,17 @@ static void i915_gen_event_stop(struct perf_event *event, int flags)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(event->pmu, typeof(*dev_priv), gen_pmu.pmu);
+	struct i915_gen_pmu_node *entry;
+
+	hrtimer_cancel(&dev_priv->gen_pmu.timer);
+	schedule_work(&dev_priv->gen_pmu.forward_work);
 
 	spin_lock(&dev_priv->gen_pmu.lock);
 	dev_priv->gen_pmu.event_active = false;
+	list_for_each_entry(entry, &dev_priv->gen_pmu.node_list, head)
+		entry->discard = true;
 	spin_unlock(&dev_priv->gen_pmu.lock);
 
-	hrtimer_cancel(&dev_priv->gen_pmu.timer);
-	schedule_work(&dev_priv->gen_pmu.forward_work);
-
 	event->hw.state = PERF_HES_STOPPED;
 }
 
@@ -1654,6 +1713,9 @@ void i915_gen_pmu_register(struct drm_device *dev)
 	i915->gen_pmu.timer.function = hrtimer_sample_gen;
 
 	INIT_WORK(&i915->gen_pmu.forward_work, forward_gen_pmu_work_fn);
+	INIT_WORK(&i915->gen_pmu.event_destroy_work,
+			i915_gen_pmu_event_destroy_work);
+
 	spin_lock_init(&i915->gen_pmu.lock);
 
 	i915->gen_pmu.pmu.capabilities  = PERF_PMU_CAP_IS_DEVICE;
@@ -1684,6 +1746,7 @@ void i915_gen_pmu_unregister(struct drm_device *dev)
 		return;
 
 	cancel_work_sync(&i915->gen_pmu.forward_work);
+	cancel_work_sync(&i915->gen_pmu.event_destroy_work);
 
 	perf_pmu_unregister(&i915->gen_pmu.pmu);
 	i915->gen_pmu.pmu.event_init = NULL;
-- 
1.8.5.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2015-08-05  5:53 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-05  5:55 [RFC 0/8] Introduce framework for forwarding generic non-OA performance sourab.gupta
2015-08-05  5:55 ` [RFC 1/8] drm/i915: Add a new PMU for handling non-OA counter data profiling requests sourab.gupta
2015-08-05  9:22   ` Chris Wilson
2015-08-05  9:40     ` Gupta, Sourab
2015-08-05  9:38   ` Chris Wilson
2015-08-05  9:45     ` Gupta, Sourab
2015-08-05  9:49       ` Gupta, Sourab
2015-08-05 11:08         ` Chris Wilson
2015-08-05  9:56       ` Chris Wilson
2015-08-05  5:55 ` [RFC 2/8] drm/i915: Add mechanism for forwarding the timestamp data through perf sourab.gupta
2015-08-05  9:55   ` Chris Wilson
2015-08-05  5:55 ` sourab.gupta [this message]
2015-08-05  5:55 ` [RFC 4/8] drm/i915: Insert commands for capturing timestamps in the ring sourab.gupta
2015-08-05  9:30   ` Chris Wilson
2015-08-05  9:54     ` Gupta, Sourab
2015-08-05  5:55 ` [RFC 5/8] drm/i915: Add support for forwarding ring id in sample metadata through perf sourab.gupta
2015-08-05  9:26   ` Chris Wilson
2015-08-05  5:55 ` [RFC 6/8] drm/i915: Add support for forwarding pid in timestamp " sourab.gupta
2015-08-05  5:55 ` [RFC 7/8] drm/i915: Add support for forwarding execbuffer tags in timestamp sample metadata sourab.gupta
2015-08-05  9:17   ` Chris Wilson
2015-08-05  9:29     ` Daniel Vetter
2015-08-05 13:59       ` Robert Bragg
2015-08-05 15:25         ` Daniel Vetter
2015-08-05 16:48           ` Robert Bragg
2015-08-05  5:55 ` [RFC 8/8] drm/i915: Support for retrieving MMIO register values alongwith timestamps through perf sourab.gupta
2015-08-05 10:03   ` Chris Wilson
2015-08-05 10:18     ` Gupta, Sourab
2015-08-05 10:30       ` Chris Wilson
2015-08-05 14:22         ` Gupta, Sourab
2015-08-05 20:19   ` Robert Bragg
  -- strict thread matches above, loose matches on Subject: below --
2015-07-15  8:51 [RFC 0/8] Introduce framework for forwarding generic non-OA performance sourab.gupta
2015-07-15  8:51 ` [RFC 3/8] drm/i915: Handle event stop and destroy for GPU commands submitted sourab.gupta

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1438754144-20435-4-git-send-email-sourab.gupta@intel.com \
    --to=sourab.gupta@intel.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=insoo.woo@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=jabin.wu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.