All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 21/25] drm/i915: Convert trace-irq to the breadcrumb waiter
Date: Sat, 25 Jun 2016 11:13:04 +0100	[thread overview]
Message-ID: <1466849588-17558-22-git-send-email-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <1466849588-17558-1-git-send-email-chris@chris-wilson.co.uk>

If we convert the tracing over from direct use of ring->irq_get() and
over to the breadcrumb infrastructure, we only have a single user of the
ring->irq_get and so we will be able to simplify the driver routines
(eliminating the redundant validation and irq refcounting).

Process context is preferred over softirq (or even hardirq) for a couple
of reasons:

 - we already utilize process context to have fast wakeup of a single
   client (i.e. the client waiting for the GPU inspects the seqno for
   itself following an interrupt to avoid the overhead of a context
   switch before it returns to userspace)

 - engine->irq_seqno() is not suitable for use from an softirq/hardirq
   context as we may require long waits (100-250us) to ensure the seqno
   write is posted before we read it from the CPU

A signaling framework is a requirement for enabling dma-fences.

v2: Move to a signaling framework based upon the waiter.
v3: Track the first-signal to avoid having to walk the rbtree everytime.
v4: Mark the signaler thread as RT priority to reduce latency in the
indirect wakeups.
v5: Make failure to allocate the thread fatal.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h          |   8 --
 drivers/gpu/drm/i915/i915_gem.c          |   9 +-
 drivers/gpu/drm/i915/i915_trace.h        |   2 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 192 ++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_ringbuffer.h  |   8 +-
 5 files changed, 201 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d09b96d193a5..b7089cedb80c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3965,14 +3965,6 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
 			    schedule_timeout_uninterruptible(remaining_jiffies);
 	}
 }
-
-static inline void i915_trace_irq_get(struct intel_engine_cs *engine,
-				      struct drm_i915_gem_request *req)
-{
-	if (engine->trace_irq_req == NULL && engine->irq_get(engine))
-		i915_gem_request_assign(&engine->trace_irq_req, req);
-}
-
 static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
 {
 	struct intel_engine_cs *engine = req->engine;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fd59b30a024d..e6daa1ee4d2d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2745,7 +2745,8 @@ i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
 
 	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
 	if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) {
-		while (intel_kick_waiters(dev_priv))
+		while (intel_kick_waiters(dev_priv) ||
+		       intel_kick_signalers(dev_priv))
 			yield();
 	}
 
@@ -3210,12 +3211,6 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
 		i915_gem_object_retire__read(obj, engine->id);
 	}
 
-	if (unlikely(engine->trace_irq_req &&
-		     i915_gem_request_completed(engine->trace_irq_req))) {
-		engine->irq_put(engine);
-		i915_gem_request_assign(&engine->trace_irq_req, NULL);
-	}
-
 	WARN_ON(i915_verify_lists(engine->dev));
 }
 
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 3d13fde95fdf..f59cf07184ae 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -490,7 +490,7 @@ TRACE_EVENT(i915_gem_ring_dispatch,
 			   __entry->ring = req->engine->id;
 			   __entry->seqno = req->seqno;
 			   __entry->flags = flags;
-			   i915_trace_irq_get(req->engine, req);
+			   intel_engine_enable_signaling(req);
 			   ),
 
 	    TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x",
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 84c2a449dd0e..bc0e9e8b7871 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -22,6 +22,8 @@
  *
  */
 
+#include <linux/kthread.h>
+
 #include "i915_drv.h"
 
 static void intel_breadcrumbs_fake_irq(unsigned long data)
@@ -255,6 +257,15 @@ static inline bool chain_wakeup(struct rb_node *rb, int priority)
 	return rb && to_wait(rb)->tsk->prio <= priority;
 }
 
+static inline int wakeup_priority(struct intel_breadcrumbs *b,
+				  struct task_struct *tsk)
+{
+	if (tsk == b->signaler)
+		return INT_MIN;
+	else
+		return tsk->prio;
+}
+
 void intel_engine_remove_wait(struct intel_engine_cs *engine,
 			      struct intel_wait *wait)
 {
@@ -273,8 +284,8 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
 		goto out_unlock;
 
 	if (b->first_wait == wait) {
+		const int priority = wakeup_priority(b, wait->tsk);
 		struct rb_node *next;
-		const int priority = wait->tsk->prio;
 
 		GEM_BUG_ON(b->tasklet != wait->tsk);
 
@@ -340,15 +351,176 @@ out_unlock:
 	spin_unlock(&b->lock);
 }
 
+struct signal {
+	struct rb_node node;
+	struct intel_wait wait;
+	struct drm_i915_gem_request *request;
+};
+
+static bool signal_complete(struct signal *signal)
+{
+	if (signal == NULL)
+		return false;
+
+	/* If another process served as the bottom-half it may have already
+	 * signalled that this wait is already completed.
+	 */
+	if (intel_wait_complete(&signal->wait))
+		return true;
+
+	/* Carefully check if the request is complete, giving time for the
+	 * seqno to be visible or if the GPU hung.
+	 */
+	if (__i915_request_irq_complete(signal->request))
+		return true;
+
+	return false;
+}
+
+static struct signal *to_signal(struct rb_node *rb)
+{
+	return container_of(rb, struct signal, node);
+}
+
+static void signaler_set_rtpriority(void)
+{
+	 struct sched_param param = { .sched_priority = 1 };
+	 sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
+}
+
+static int intel_breadcrumbs_signaler(void *arg)
+{
+	struct intel_engine_cs *engine = arg;
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	struct signal *signal;
+
+	/* Install ourselves with high priority to reduce signalling latency */
+	signaler_set_rtpriority();
+
+	do {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		/* We are either woken up by the interrupt bottom-half,
+		 * or by a client adding a new signaller. In both cases,
+		 * the GPU seqno may have advanced beyond our oldest signal.
+		 * If it has, propagate the signal, remove the waiter and
+		 * check again with the next oldest signal. Otherwise we
+		 * need to wait for a new interrupt from the GPU or for
+		 * a new client.
+		 */
+		signal = READ_ONCE(b->first_signal);
+		if (signal_complete(signal)) {
+			/* Wake up all other completed waiters and select the
+			 * next bottom-half for the next user interrupt.
+			 */
+			intel_engine_remove_wait(engine, &signal->wait);
+
+			i915_gem_request_unreference(signal->request);
+
+			/* Find the next oldest signal. Note that as we have
+			 * not been holding the lock, another client may
+			 * have installed an even older signal than the one
+			 * we just completed - so double check we are still
+			 * the oldest before picking the next one.
+			 */
+			spin_lock(&b->lock);
+			if (signal == b->first_signal)
+				b->first_signal = rb_next(&signal->node);
+			rb_erase(&signal->node, &b->signals);
+			spin_unlock(&b->lock);
+
+			kfree(signal);
+		} else {
+			if (kthread_should_stop())
+				break;
+
+			schedule();
+		}
+	} while (1);
+	__set_current_state(TASK_RUNNING);
+
+	return 0;
+}
+
+int intel_engine_enable_signaling(struct drm_i915_gem_request *request)
+{
+	struct intel_engine_cs *engine = request->engine;
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	struct rb_node *parent, **p;
+	struct signal *signal;
+	bool first, wakeup;
+
+	signal = kmalloc(sizeof(*signal), GFP_ATOMIC);
+	if (unlikely(!signal))
+		return -ENOMEM;
+
+	signal->wait.tsk = b->signaler;
+	signal->wait.seqno = request->seqno;
+
+	signal->request = i915_gem_request_reference(request);
+
+	/* First add ourselves into the list of waiters, but register our
+	 * bottom-half as the signaller thread. As per usual, only the oldest
+	 * waiter (not just signaller) is tasked as the bottom-half waking
+	 * up all completed waiters after the user interrupt.
+	 *
+	 * If we are the oldest waiter, enable the irq (after which we
+	 * must double check that the seqno did not complete).
+	 */
+	wakeup = intel_engine_add_wait(engine, &signal->wait);
+
+	/* Now insert ourselves into the retirement ordered list of signals
+	 * on this engine. We track the oldest seqno as that will be the
+	 * first signal to complete.
+	 */
+	spin_lock(&b->lock);
+	parent = NULL;
+	first = true;
+	p = &b->signals.rb_node;
+	while (*p) {
+		parent = *p;
+		if (i915_seqno_passed(signal->wait.seqno,
+				      to_signal(parent)->wait.seqno)) {
+			p = &parent->rb_right;
+			first = false;
+		} else
+			p = &parent->rb_left;
+	}
+	rb_link_node(&signal->node, parent, p);
+	rb_insert_color(&signal->node, &b->signals);
+	if (first)
+		smp_store_mb(b->first_signal, signal);
+	spin_unlock(&b->lock);
+
+	if (wakeup)
+		wake_up_process(b->signaler);
+
+	return 0;
+}
+
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	struct task_struct *tsk;
 
 	spin_lock_init(&b->lock);
 	setup_timer(&b->fake_irq,
 		    intel_breadcrumbs_fake_irq,
 		    (unsigned long)engine);
 
+	/* Spawn a thread to provide a common bottom-half for all signals.
+	 * As this is an asynchronous interface we cannot steal the current
+	 * task for handling the bottom-half to the user interrupt, therefore
+	 * we create a thread to do the coherent seqno dance after the
+	 * interrupt and then signal the waitqueue (via the dma-buf/fence).
+	 */
+	tsk = kthread_run(intel_breadcrumbs_signaler, engine,
+			  "irq/i915:%d", engine->id);
+	if (IS_ERR(tsk))
+		return PTR_ERR(tsk);
+
+	b->signaler = tsk;
+
 	return 0;
 }
 
@@ -356,6 +528,9 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 
+	if (!IS_ERR_OR_NULL(b->signaler))
+		kthread_stop(b->signaler);
+
 	del_timer_sync(&b->fake_irq);
 }
 
@@ -377,3 +552,18 @@ unsigned intel_kick_waiters(struct drm_i915_private *i915)
 
 	return mask;
 }
+
+unsigned intel_kick_signalers(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	unsigned mask = 0;
+
+	for_each_engine(engine, i915) {
+		if (unlikely(READ_ONCE(engine->breadcrumbs.first_signal))) {
+			wake_up_process(engine->breadcrumbs.signaler);
+			mask |= intel_engine_flag(engine);
+		}
+	}
+
+	return mask;
+}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 4d6c2b773a64..c05b45727f7d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -141,6 +141,8 @@ struct  i915_ctx_workarounds {
 	struct drm_i915_gem_object *obj;
 };
 
+struct drm_i915_gem_request;
+
 struct intel_engine_cs {
 	struct drm_i915_private *i915;
 	const char	*name;
@@ -179,8 +181,11 @@ struct intel_engine_cs {
 	struct intel_breadcrumbs {
 		spinlock_t lock; /* protects the lists of requests */
 		struct rb_root waiters; /* sorted by retirement, priority */
+		struct rb_root signals; /* sorted by retirement */
 		struct intel_wait *first_wait; /* oldest waiter by retirement */
 		struct task_struct *tasklet; /* bh for user interrupts */
+		struct task_struct *signaler; /* used for fence signalling */
+		void *first_signal;
 		struct timer_list fake_irq; /* used after a missed interrupt */
 		bool irq_enabled;
 		bool rpm_wakelock;
@@ -199,7 +204,6 @@ struct intel_engine_cs {
 	unsigned irq_refcount; /* protected by dev_priv->irq_lock */
 	bool		irq_posted;
 	u32		irq_enable_mask;	/* bitmask to enable ring interrupt */
-	struct drm_i915_gem_request *trace_irq_req;
 	bool __must_check (*irq_get)(struct intel_engine_cs *ring);
 	void		(*irq_put)(struct intel_engine_cs *ring);
 
@@ -540,6 +544,7 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
 			   struct intel_wait *wait);
 void intel_engine_remove_wait(struct intel_engine_cs *engine,
 			      struct intel_wait *wait);
+int intel_engine_enable_signaling(struct drm_i915_gem_request *request);
 static inline bool intel_engine_has_waiter(struct intel_engine_cs *engine)
 {
 	return READ_ONCE(engine->breadcrumbs.tasklet);
@@ -563,5 +568,6 @@ static inline bool intel_engine_wakeup(struct intel_engine_cs *engine)
 void intel_engine_enable_fake_irq(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 unsigned intel_kick_waiters(struct drm_i915_private *i915);
+unsigned intel_kick_signalers(struct drm_i915_private *i915);
 
 #endif /* _INTEL_RINGBUFFER_H_ */
-- 
2.8.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2016-06-25 10:13 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-25 10:12 A trail of breadcrumbs Chris Wilson
2016-06-25 10:12 ` [PATCH 01/25] drm/i915: Preserve current RPS frequency across init Chris Wilson
2016-06-25 10:12 ` [PATCH 02/25] drm/i915: Remove superfluous powersave work flushing Chris Wilson
2016-06-25 10:12 ` [PATCH 03/25] drm/i915: Defer enabling rc6 til after we submit the first batch/context Chris Wilson
2016-06-25 10:12 ` [PATCH 04/25] drm: Restore double clflush on the last partial cacheline Chris Wilson
2016-06-25 10:12   ` Chris Wilson
2016-06-25 10:12 ` [PATCH 05/25] drm/i915/shrinker: Flush active on objects before counting Chris Wilson
2016-06-25 10:12 ` [PATCH 06/25] drm/i915: Delay queuing hangcheck to wait-request Chris Wilson
2016-06-25 10:12 ` [PATCH 07/25] drm/i915: Remove the dedicated hangcheck workqueue Chris Wilson
2016-06-25 10:12 ` [PATCH 08/25] drm/i915: Make queueing the hangcheck work inline Chris Wilson
2016-06-25 10:12 ` [PATCH 09/25] drm/i915: Separate GPU hang waitqueue from advance Chris Wilson
2016-06-25 10:12 ` [PATCH 10/25] drm/i915: Slaughter the thundering i915_wait_request herd Chris Wilson
2016-06-25 10:12 ` [PATCH 11/25] drm/i915: Spin after waking up for an interrupt Chris Wilson
2016-06-27 10:32   ` Tvrtko Ursulin
2016-06-28  8:55     ` Chris Wilson
2016-06-28  9:17       ` Chris Wilson
2016-06-28  9:25         ` Tvrtko Ursulin
2016-06-25 10:12 ` [PATCH 12/25] drm/i915: Use HWS for seqno tracking everywhere Chris Wilson
2016-06-25 10:12 ` [PATCH 13/25] drm/i915: Stop mapping the scratch page into CPU space Chris Wilson
2016-06-25 10:12 ` [PATCH 14/25] drm/i915: Allocate scratch page from stolen Chris Wilson
2016-06-25 10:12 ` [PATCH 15/25] drm/i915: Refactor scratch object allocation for gen2 w/a buffer Chris Wilson
2016-06-25 10:12 ` [PATCH 16/25] drm/i915: Add a delay between interrupt and inspecting the final seqno (ilk) Chris Wilson
2016-06-25 10:13 ` [PATCH 17/25] drm/i915: Check the CPU cached value in HWS of seqno after waking the waiter Chris Wilson
2016-06-25 10:13 ` [PATCH 18/25] drm/i915: Only apply one barrier after a breadcrumb interrupt is posted Chris Wilson
2016-06-27 10:35   ` Tvrtko Ursulin
2016-06-25 10:13 ` [PATCH 19/25] drm/i915: Stop setting wraparound seqno on initialisation Chris Wilson
2016-06-25 10:13 ` [PATCH 20/25] drm/i915: Only query timestamp when measuring elapsed time Chris Wilson
2016-06-27 10:37   ` Tvrtko Ursulin
2016-06-25 10:13 ` Chris Wilson [this message]
2016-06-27 11:38   ` [PATCH 21/25] drm/i915: Convert trace-irq to the breadcrumb waiter Tvrtko Ursulin
2016-06-28  8:49     ` Chris Wilson
2016-06-25 10:13 ` [PATCH 22/25] drm/i915: Embed signaling node into the GEM request Chris Wilson
2016-06-27 11:54   ` Tvrtko Ursulin
2016-06-25 10:13 ` [PATCH 23/25] drm/i915: Move the get/put irq locking into the caller Chris Wilson
2016-06-27 12:11   ` Tvrtko Ursulin
2016-06-28  8:42     ` Chris Wilson
2016-06-25 10:13 ` [PATCH 24/25] drm/i915: Simplify enabling user-interrupts with L3-remapping Chris Wilson
2016-06-25 10:13 ` [PATCH 25/25] drm/i915: Remove debug noise on detecting fault-injection of missed interrupts Chris Wilson
2016-06-25 10:43 ` ✗ Ro.CI.BAT: warning for series starting with [01/25] drm/i915: Preserve current RPS frequency across init Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1466849588-17558-22-git-send-email-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.