All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/5] drm/i915: Wrap tasklet_struct for abuse
@ 2018-05-09 12:04 Chris Wilson
  2018-05-09 12:04 ` [PATCH v3 2/5] drm/i915: Combine tasklet_kill and tasklet_disable Chris Wilson
                   ` (8 more replies)
  0 siblings, 9 replies; 14+ messages in thread
From: Chris Wilson @ 2018-05-09 12:04 UTC (permalink / raw)
  To: intel-gfx

In the next few patches, we want to abuse tasklet to avoid ksoftirqd
latency along critical paths. To make that abuse easily to swallow,
first coat the tasklet in a little syntactic sugar.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c             |  8 +--
 drivers/gpu/drm/i915/i915_irq.c             |  2 +-
 drivers/gpu/drm/i915/i915_tasklet.h         | 78 +++++++++++++++++++++
 drivers/gpu/drm/i915/intel_engine_cs.c      | 11 ++-
 drivers/gpu/drm/i915/intel_guc_submission.c |  8 +--
 drivers/gpu/drm/i915/intel_lrc.c            | 18 ++---
 drivers/gpu/drm/i915/intel_ringbuffer.h     |  3 +-
 7 files changed, 104 insertions(+), 24 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_tasklet.h

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 89bf5d67cb74..98481e150e61 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3043,9 +3043,9 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
 	 * common case of recursively being called from set-wedged from inside
 	 * i915_reset.
 	 */
-	if (!atomic_read(&engine->execlists.tasklet.count))
-		tasklet_kill(&engine->execlists.tasklet);
-	tasklet_disable(&engine->execlists.tasklet);
+	if (i915_tasklet_is_enabled(&engine->execlists.tasklet))
+		i915_tasklet_flush(&engine->execlists.tasklet);
+	i915_tasklet_disable(&engine->execlists.tasklet);
 
 	/*
 	 * We're using worker to queue preemption requests from the tasklet in
@@ -3265,7 +3265,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv,
 
 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
 {
-	tasklet_enable(&engine->execlists.tasklet);
+	i915_tasklet_enable(&engine->execlists.tasklet);
 	kthread_unpark(engine->breadcrumbs.signaler);
 
 	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index f9bc3aaa90d0..f8aff5a5aa83 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1477,7 +1477,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 	}
 
 	if (tasklet)
-		tasklet_hi_schedule(&execlists->tasklet);
+		i915_tasklet_schedule(&execlists->tasklet);
 }
 
 static void gen8_gt_irq_ack(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/i915_tasklet.h b/drivers/gpu/drm/i915/i915_tasklet.h
new file mode 100644
index 000000000000..c9f41a5ebb91
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_tasklet.h
@@ -0,0 +1,78 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#ifndef _I915_TASKLET_H_
+#define _I915_TASKLET_H_
+
+#include <linux/atomic.h>
+#include <linux/interrupt.h>
+
+/**
+ * struct i915_tasklet - wrapper around tasklet_struct
+ *
+ * We want to abuse tasklets slightly, such as calling them directly. In some
+ * cases, this requires some auxiliary tracking so subclass the tasklet_struct
+ * so that we have a central place and helpers.
+ */
+struct i915_tasklet {
+	struct tasklet_struct base;
+};
+
+static inline void i915_tasklet_init(struct i915_tasklet *t,
+				     void (*func)(unsigned long),
+				     unsigned long data)
+{
+	tasklet_init(&t->base, func, data);
+}
+
+static inline bool i915_tasklet_is_scheduled(const struct i915_tasklet *t)
+{
+	return test_bit(TASKLET_STATE_SCHED, &t->base.state);
+}
+
+static inline bool i915_tasklet_is_locked(const struct i915_tasklet *t)
+{
+	return test_bit(TASKLET_STATE_RUN, &t->base.state);
+}
+
+static inline bool i915_tasklet_is_enabled(const struct i915_tasklet *t)
+{
+	return likely(!atomic_read(&t->base.count));
+}
+
+static inline void i915_tasklet_schedule(struct i915_tasklet *t)
+{
+	tasklet_hi_schedule(&t->base);
+}
+
+static inline void i915_tasklet_flush(struct i915_tasklet *t)
+{
+	tasklet_kill(&t->base);
+}
+
+static inline void i915_tasklet_enable(struct i915_tasklet *t)
+{
+	tasklet_enable(&t->base);
+}
+
+static inline void i915_tasklet_disable(struct i915_tasklet *t)
+{
+	tasklet_disable(&t->base);
+}
+
+static inline void i915_tasklet_set_func(struct i915_tasklet *t,
+					 void (*func)(unsigned long data),
+					 unsigned long data)
+{
+	tasklet_disable(&t->base);
+
+	t->base.func = func;
+	t->base.data = data;
+
+	tasklet_enable(&t->base);
+}
+
+#endif /* _I915_TASKLET_H_ */
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 70325e0824e3..5f1118ea96d8 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1032,7 +1032,7 @@ void intel_engines_park(struct drm_i915_private *i915)
 	for_each_engine(engine, i915, id) {
 		/* Flush the residual irq tasklets first. */
 		intel_engine_disarm_breadcrumbs(engine);
-		tasklet_kill(&engine->execlists.tasklet);
+		i915_tasklet_flush(&engine->execlists.tasklet);
 
 		/*
 		 * We are committed now to parking the engines, make sure there
@@ -1249,9 +1249,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 			   intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
 			   yesno(test_bit(ENGINE_IRQ_EXECLIST,
 					  &engine->irq_posted)),
-			   yesno(test_bit(TASKLET_STATE_SCHED,
-					  &engine->execlists.tasklet.state)),
-			   enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
+			   yesno(i915_tasklet_is_scheduled(&engine->execlists.tasklet)),
+			   enableddisabled(i915_tasklet_is_enabled(&engine->execlists.tasklet)));
 		if (read >= GEN8_CSB_ENTRIES)
 			read = 0;
 		if (write >= GEN8_CSB_ENTRIES)
@@ -1479,7 +1478,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 	if (!intel_engine_supports_stats(engine))
 		return -ENODEV;
 
-	tasklet_disable(&execlists->tasklet);
+	i915_tasklet_disable(&execlists->tasklet);
 	write_seqlock_irqsave(&engine->stats.lock, flags);
 
 	if (unlikely(engine->stats.enabled == ~0)) {
@@ -1505,7 +1504,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 
 unlock:
 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
-	tasklet_enable(&execlists->tasklet);
+	i915_tasklet_enable(&execlists->tasklet);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 2feb65096966..a7afc976c3b9 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -591,7 +591,7 @@ static void inject_preempt_context(struct work_struct *work)
 	if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) {
 		execlists_clear_active(&engine->execlists,
 				       EXECLISTS_ACTIVE_PREEMPT);
-		tasklet_schedule(&engine->execlists.tasklet);
+		i915_tasklet_schedule(&engine->execlists.tasklet);
 	}
 }
 
@@ -1263,10 +1263,10 @@ int intel_guc_submission_enable(struct intel_guc *guc)
 	guc_interrupts_capture(dev_priv);
 
 	for_each_engine(engine, dev_priv, id) {
-		struct intel_engine_execlists * const execlists =
-			&engine->execlists;
+		i915_tasklet_set_func(&engine->execlists.tasklet,
+				      guc_submission_tasklet,
+				      (unsigned long)engine);
 
-		execlists->tasklet.func = guc_submission_tasklet;
 		engine->park = guc_submission_park;
 		engine->unpark = guc_submission_unpark;
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7f98dda3c929..539fa03d7600 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1165,7 +1165,7 @@ static void queue_request(struct intel_engine_cs *engine,
 static void __submit_queue(struct intel_engine_cs *engine, int prio)
 {
 	engine->execlists.queue_priority = prio;
-	tasklet_hi_schedule(&engine->execlists.tasklet);
+	i915_tasklet_schedule(&engine->execlists.tasklet);
 }
 
 static void submit_queue(struct intel_engine_cs *engine, int prio)
@@ -1778,7 +1778,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 
 	/* After a GPU reset, we may have requests to replay */
 	if (execlists->first)
-		tasklet_schedule(&execlists->tasklet);
+		i915_tasklet_schedule(&execlists->tasklet);
 
 	return 0;
 }
@@ -2182,9 +2182,8 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 	 * Tasklet cannot be active at this point due intel_mark_active/idle
 	 * so this is just for documentation.
 	 */
-	if (WARN_ON(test_bit(TASKLET_STATE_SCHED,
-			     &engine->execlists.tasklet.state)))
-		tasklet_kill(&engine->execlists.tasklet);
+	if (WARN_ON(i915_tasklet_is_scheduled(&engine->execlists.tasklet)))
+		i915_tasklet_flush(&engine->execlists.tasklet);
 
 	dev_priv = engine->i915;
 
@@ -2209,7 +2208,10 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->submit_request = execlists_submit_request;
 	engine->cancel_requests = execlists_cancel_requests;
 	engine->schedule = execlists_schedule;
-	engine->execlists.tasklet.func = execlists_submission_tasklet;
+
+	i915_tasklet_set_func(&engine->execlists.tasklet,
+			      execlists_submission_tasklet,
+			      (unsigned long)engine);
 
 	engine->park = NULL;
 	engine->unpark = NULL;
@@ -2303,8 +2305,8 @@ logical_ring_setup(struct intel_engine_cs *engine)
 
 	engine->execlists.fw_domains = fw_domains;
 
-	tasklet_init(&engine->execlists.tasklet,
-		     execlists_submission_tasklet, (unsigned long)engine);
+	i915_tasklet_init(&engine->execlists.tasklet,
+			  execlists_submission_tasklet, (unsigned long)engine);
 
 	logical_ring_default_vfuncs(engine);
 	logical_ring_default_irqs(engine);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 010750e8ee44..f6ba354faf89 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -11,6 +11,7 @@
 #include "i915_pmu.h"
 #include "i915_request.h"
 #include "i915_selftest.h"
+#include "i915_tasklet.h"
 #include "i915_timeline.h"
 #include "intel_gpu_commands.h"
 
@@ -202,7 +203,7 @@ struct intel_engine_execlists {
 	/**
 	 * @tasklet: softirq tasklet for bottom handler
 	 */
-	struct tasklet_struct tasklet;
+	struct i915_tasklet tasklet;
 
 	/**
 	 * @default_priolist: priority list for I915_PRIORITY_NORMAL
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v3 2/5] drm/i915: Combine tasklet_kill and tasklet_disable
  2018-05-09 12:04 [PATCH v3 1/5] drm/i915: Wrap tasklet_struct for abuse Chris Wilson
@ 2018-05-09 12:04 ` Chris Wilson
  2018-05-09 13:54   ` Mika Kuoppala
  2018-05-09 12:04 ` [PATCH v3 3/5] drm/i915/execlists: Direct submit onto idle engines Chris Wilson
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2018-05-09 12:04 UTC (permalink / raw)
  To: intel-gfx

Ideally, we want to atomically flush and disable the tasklet before
resetting the GPU. At present, we rely on being the only part to touch
our tasklet and serialisation of the reset process to ensure that we can
suspend the tasklet from the mix of reset/wedge pathways. In this patch,
we move the tasklet abuse into its own function and tweak it such that
we only do a synchronous operation the first time it is disabled around
the reset. This allows us to avoid the sync inside a softirq context in
subsequent patches.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Michał Winiarski <michal.winiarski@intel.com>
CC: Michel Thierry <michel.thierry@intel.com>
Cc: Jeff McGee <jeff.mcgee@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c        |  2 --
 drivers/gpu/drm/i915/i915_tasklet.h    | 14 ++++++++++++++
 drivers/gpu/drm/i915/intel_engine_cs.c |  4 ++--
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 98481e150e61..8d27e78b052c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3043,8 +3043,6 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
 	 * common case of recursively being called from set-wedged from inside
 	 * i915_reset.
 	 */
-	if (i915_tasklet_is_enabled(&engine->execlists.tasklet))
-		i915_tasklet_flush(&engine->execlists.tasklet);
 	i915_tasklet_disable(&engine->execlists.tasklet);
 
 	/*
diff --git a/drivers/gpu/drm/i915/i915_tasklet.h b/drivers/gpu/drm/i915/i915_tasklet.h
index c9f41a5ebb91..d8263892f385 100644
--- a/drivers/gpu/drm/i915/i915_tasklet.h
+++ b/drivers/gpu/drm/i915/i915_tasklet.h
@@ -59,10 +59,24 @@ static inline void i915_tasklet_enable(struct i915_tasklet *t)
 }
 
 static inline void i915_tasklet_disable(struct i915_tasklet *t)
+{
+	if (i915_tasklet_is_enabled(t))
+		i915_tasklet_flush(t);
+
+	if (atomic_inc_return(&t->base.count) == 1)
+		tasklet_unlock_wait(&t->base);
+}
+
+static inline void i915_tasklet_lock(struct i915_tasklet *t)
 {
 	tasklet_disable(&t->base);
 }
 
+static inline void i915_tasklet_unlock(struct i915_tasklet *t)
+{
+	tasklet_enable(&t->base);
+}
+
 static inline void i915_tasklet_set_func(struct i915_tasklet *t,
 					 void (*func)(unsigned long data),
 					 unsigned long data)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 5f1118ea96d8..3c8a0c3245f3 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1478,7 +1478,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 	if (!intel_engine_supports_stats(engine))
 		return -ENODEV;
 
-	i915_tasklet_disable(&execlists->tasklet);
+	i915_tasklet_lock(&execlists->tasklet);
 	write_seqlock_irqsave(&engine->stats.lock, flags);
 
 	if (unlikely(engine->stats.enabled == ~0)) {
@@ -1504,7 +1504,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 
 unlock:
 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
-	i915_tasklet_enable(&execlists->tasklet);
+	i915_tasklet_unlock(&execlists->tasklet);
 
 	return err;
 }
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v3 3/5] drm/i915/execlists: Direct submit onto idle engines
  2018-05-09 12:04 [PATCH v3 1/5] drm/i915: Wrap tasklet_struct for abuse Chris Wilson
  2018-05-09 12:04 ` [PATCH v3 2/5] drm/i915: Combine tasklet_kill and tasklet_disable Chris Wilson
@ 2018-05-09 12:04 ` Chris Wilson
  2018-05-09 12:04 ` [PATCH v3 4/5] drm/i915/execlists: Direct submission from irq handler Chris Wilson
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2018-05-09 12:04 UTC (permalink / raw)
  To: intel-gfx

Bypass using the tasklet to submit the first request to HW, as the
tasklet may be deferred unto ksoftirqd and at a minimum will add in
excess of 10us (and maybe tens of milliseconds) to our execution
latency. This latency reduction is most notable when execution flows
between engines.

v2: Beware handling preemption completion from the direct submit path as
well.
v3: Make the abuse clear and track our extra state inside i915_tasklet.

Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_tasklet.h         | 24 +++++++
 drivers/gpu/drm/i915/intel_guc_submission.c | 10 ++-
 drivers/gpu/drm/i915/intel_lrc.c            | 71 +++++++++++++++++----
 3 files changed, 89 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_tasklet.h b/drivers/gpu/drm/i915/i915_tasklet.h
index d8263892f385..bb8f03a03b29 100644
--- a/drivers/gpu/drm/i915/i915_tasklet.h
+++ b/drivers/gpu/drm/i915/i915_tasklet.h
@@ -8,8 +8,11 @@
 #define _I915_TASKLET_H_
 
 #include <linux/atomic.h>
+#include <linux/bitops.h>
 #include <linux/interrupt.h>
 
+#include "i915_gem.h"
+
 /**
  * struct i915_tasklet - wrapper around tasklet_struct
  *
@@ -19,6 +22,8 @@
  */
 struct i915_tasklet {
 	struct tasklet_struct base;
+	unsigned long flags;
+#define I915_TASKLET_DIRECT_SUBMIT BIT(0)
 };
 
 static inline void i915_tasklet_init(struct i915_tasklet *t,
@@ -43,6 +48,14 @@ static inline bool i915_tasklet_is_enabled(const struct i915_tasklet *t)
 	return likely(!atomic_read(&t->base.count));
 }
 
+static inline bool i915_tasklet_is_direct_submit(const struct i915_tasklet *t)
+{
+	/* Only legal to be checked from inside the tasklet. */
+	GEM_BUG_ON(!i915_tasklet_is_locked(t));
+
+	return t->flags & I915_TASKLET_DIRECT_SUBMIT;
+}
+
 static inline void i915_tasklet_schedule(struct i915_tasklet *t)
 {
 	tasklet_hi_schedule(&t->base);
@@ -89,4 +102,15 @@ static inline void i915_tasklet_set_func(struct i915_tasklet *t,
 	tasklet_enable(&t->base);
 }
 
+static inline void __i915_tasklet_run(const struct i915_tasklet *t)
+{
+	t->base.func(t->base.data);
+}
+
+static inline void i915_tasklet_run(const struct i915_tasklet *t)
+{
+	GEM_BUG_ON(!i915_tasklet_is_locked(t));
+	__i915_tasklet_run(t);
+}
+
 #endif /* _I915_TASKLET_H_ */
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index a7afc976c3b9..f2ded1796523 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -754,14 +754,18 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 
 static void guc_dequeue(struct intel_engine_cs *engine)
 {
-	unsigned long flags;
+	unsigned long uninitialized_var(flags);
 	bool submit;
 
 	local_irq_save(flags);
 
-	spin_lock(&engine->timeline.lock);
+	if (!i915_tasklet_is_direct_submit(&engine->execlists.tasklet))
+		spin_lock(&engine->timeline.lock);
+
 	submit = __guc_dequeue(engine);
-	spin_unlock(&engine->timeline.lock);
+
+	if (!i915_tasklet_is_direct_submit(&engine->execlists.tasklet))
+		spin_unlock(&engine->timeline.lock);
 
 	if (submit)
 		guc_submit(engine);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 539fa03d7600..09fded9d409f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -356,13 +356,15 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
 {
 	struct intel_engine_cs *engine =
 		container_of(execlists, typeof(*engine), execlists);
-	unsigned long flags;
+	unsigned long uninitialized_var(flags);
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	if (!i915_tasklet_is_direct_submit(&execlists->tasklet))
+		spin_lock_irqsave(&engine->timeline.lock, flags);
 
 	__unwind_incomplete_requests(engine);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	if (!i915_tasklet_is_direct_submit(&execlists->tasklet))
+		spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static inline void
@@ -601,6 +603,8 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 		 */
 		GEM_BUG_ON(!execlists_is_active(execlists,
 						EXECLISTS_ACTIVE_USER));
+		GEM_BUG_ON(execlists_is_active(execlists,
+					       EXECLISTS_ACTIVE_PREEMPT));
 		GEM_BUG_ON(!port_count(&port[0]));
 		if (port_count(&port[0]) > 1)
 			return false;
@@ -757,12 +761,16 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	unsigned long flags;
+	unsigned long uninitialized_var(flags);
 	bool submit;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	if (!i915_tasklet_is_direct_submit(&execlists->tasklet))
+		spin_lock_irqsave(&engine->timeline.lock, flags);
+
 	submit = __execlists_dequeue(engine);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+
+	if (!i915_tasklet_is_direct_submit(&execlists->tasklet))
+		spin_unlock_irqrestore(&engine->timeline.lock, flags);
 
 	if (submit)
 		execlists_submit_ports(engine);
@@ -1162,16 +1170,52 @@ static void queue_request(struct intel_engine_cs *engine,
 		      &lookup_priolist(engine, prio)->requests);
 }
 
-static void __submit_queue(struct intel_engine_cs *engine, int prio)
+static void __wakeup_queue(struct intel_engine_cs *engine, int prio)
 {
 	engine->execlists.queue_priority = prio;
+}
+
+static void __schedule_queue(struct intel_engine_cs *engine)
+{
 	i915_tasklet_schedule(&engine->execlists.tasklet);
 }
 
+static bool __direct_submit(struct intel_engine_execlists *const execlists)
+{
+	struct i915_tasklet * const t = &execlists->tasklet;
+
+	if (!tasklet_trylock(&t->base))
+		return false;
+
+	t->flags |= I915_TASKLET_DIRECT_SUBMIT;
+	i915_tasklet_run(t);
+	t->flags &= ~I915_TASKLET_DIRECT_SUBMIT;
+
+	tasklet_unlock(&t->base);
+	return true;
+}
+
+static void __submit_queue(struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+
+	GEM_BUG_ON(!engine->i915->gt.awake);
+
+	/* If inside GPU reset, the tasklet will be queued later. */
+	if (!i915_tasklet_is_enabled(&execlists->tasklet))
+		return;
+
+	/* Directly submit the first request to reduce the initial latency */
+	if (port_isset(execlists->port) || !__direct_submit(execlists))
+		__schedule_queue(engine);
+}
+
 static void submit_queue(struct intel_engine_cs *engine, int prio)
 {
-	if (prio > engine->execlists.queue_priority)
-		__submit_queue(engine, prio);
+	if (prio > engine->execlists.queue_priority) {
+		__wakeup_queue(engine, prio);
+		__submit_queue(engine);
+	}
 }
 
 static void execlists_submit_request(struct i915_request *request)
@@ -1183,10 +1227,9 @@ static void execlists_submit_request(struct i915_request *request)
 	spin_lock_irqsave(&engine->timeline.lock, flags);
 
 	queue_request(engine, &request->sched, rq_prio(request));
-	submit_queue(engine, rq_prio(request));
-
 	GEM_BUG_ON(!engine->execlists.first);
 	GEM_BUG_ON(list_empty(&request->sched.link));
+	submit_queue(engine, rq_prio(request));
 
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
@@ -1314,8 +1357,10 @@ static void execlists_schedule(struct i915_request *request,
 		}
 
 		if (prio > engine->execlists.queue_priority &&
-		    i915_sw_fence_done(&sched_to_request(node)->submit))
-			__submit_queue(engine, prio);
+		    i915_sw_fence_done(&sched_to_request(node)->submit)) {
+			__wakeup_queue(engine, prio);
+			__schedule_queue(engine);
+		}
 	}
 
 	spin_unlock_irq(&engine->timeline.lock);
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v3 4/5] drm/i915/execlists: Direct submission from irq handler
  2018-05-09 12:04 [PATCH v3 1/5] drm/i915: Wrap tasklet_struct for abuse Chris Wilson
  2018-05-09 12:04 ` [PATCH v3 2/5] drm/i915: Combine tasklet_kill and tasklet_disable Chris Wilson
  2018-05-09 12:04 ` [PATCH v3 3/5] drm/i915/execlists: Direct submit onto idle engines Chris Wilson
@ 2018-05-09 12:04 ` Chris Wilson
  2018-05-09 12:04 ` [PATCH v3 5/5] drm/i915: Speed up idle detection by kicking the tasklets Chris Wilson
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2018-05-09 12:04 UTC (permalink / raw)
  To: intel-gfx

Continuing the theme of bypassing ksoftirqd latency, also first try to
directly submit from the CS interrupt handler to clear the ELSP and
queue the next.

In the past, we have been hesitant to do this as the context switch
processing has been quite heavy, requiring forcewaked mmio. However, as
we now can read the GPU state from the cacheable HWSP, it is relatively
cheap!

v2: Explain why we test_bit(IRQ_EXECLIST) after doing notify_ring (it's
because the notify_ring() may itself trigger direct submission clearing
the bit)

Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c             | 21 ++++++++++++++-------
 drivers/gpu/drm/i915/i915_tasklet.h         | 19 +++++++++++++++++++
 drivers/gpu/drm/i915/intel_guc_submission.c |  2 ++
 3 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index f8aff5a5aa83..e1b3a7575fe7 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1465,19 +1465,26 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	bool tasklet = false;
 
-	if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
-		if (READ_ONCE(engine->execlists.active))
-			tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
-						    &engine->irq_posted);
-	}
+	if (iir & GT_CONTEXT_SWITCH_INTERRUPT && READ_ONCE(execlists->active))
+		tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
+					    &engine->irq_posted);
 
 	if (iir & GT_RENDER_USER_INTERRUPT) {
 		notify_ring(engine);
-		tasklet |= USES_GUC_SUBMISSION(engine->i915);
+		/*
+		 * notify_ring() may trigger direct submission onto this
+		 * engine, clearing the ENGINE_IRQ_EXECLIST bit. In that
+		 * case, we don't want to resubmit and so clear the tasklet
+		 * boolean. GuC never sets the ENGINE_IRQ_EXECLIST bit and
+		 * so when using the GuC this equates to an unconditional
+		 * setting of tasklet to true.
+		 */
+		if (!test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
+			tasklet = USES_GUC_SUBMISSION(engine->i915);
 	}
 
 	if (tasklet)
-		i915_tasklet_schedule(&execlists->tasklet);
+		i915_tasklet(&execlists->tasklet);
 }
 
 static void gen8_gt_irq_ack(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/i915_tasklet.h b/drivers/gpu/drm/i915/i915_tasklet.h
index bb8f03a03b29..6fcab4ec7f35 100644
--- a/drivers/gpu/drm/i915/i915_tasklet.h
+++ b/drivers/gpu/drm/i915/i915_tasklet.h
@@ -113,4 +113,23 @@ static inline void i915_tasklet_run(const struct i915_tasklet *t)
 	__i915_tasklet_run(t);
 }
 
+static inline bool i915_tasklet_try(struct i915_tasklet *t)
+{
+	if (unlikely(!tasklet_trylock(&t->base)))
+		return false;
+
+	i915_tasklet_run(t);
+	tasklet_unlock(&t->base);
+	return true;
+}
+
+static inline void i915_tasklet(struct i915_tasklet *t)
+{
+	if (!i915_tasklet_is_enabled(t)) /* GPU reset active */
+		return;
+
+	if (!i915_tasklet_try(t))
+		i915_tasklet_schedule(t);
+}
+
 #endif /* _I915_TASKLET_H_ */
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index f2ded1796523..4e09abf7e206 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -780,6 +780,8 @@ static void guc_submission_tasklet(unsigned long data)
 	struct execlist_port *port = execlists->port;
 	struct i915_request *rq;
 
+	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+
 	rq = port_request(port);
 	while (rq && i915_request_completed(rq)) {
 		trace_i915_request_out(rq);
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v3 5/5] drm/i915: Speed up idle detection by kicking the tasklets
  2018-05-09 12:04 [PATCH v3 1/5] drm/i915: Wrap tasklet_struct for abuse Chris Wilson
                   ` (2 preceding siblings ...)
  2018-05-09 12:04 ` [PATCH v3 4/5] drm/i915/execlists: Direct submission from irq handler Chris Wilson
@ 2018-05-09 12:04 ` Chris Wilson
  2018-05-09 12:28 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [v3,1/5] drm/i915: Wrap tasklet_struct for abuse Patchwork
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2018-05-09 12:04 UTC (permalink / raw)
  To: intel-gfx

We rely on ksoftirqd to run in a timely fashion in order to drain the
execlists queue. Quite frequently, it does not. In some cases we may see
latencies of over 200ms triggering our idle timeouts and forcing us to
declare the driver wedged!

Thus we can speed up idle detection by bypassing ksoftirqd in these
cases and flush our tasklet to confirm if we are indeed still waiting
for the ELSP to drain.

v2: Put the execlists.first check back; it is required for handling
reset!
v3: Follow Mika's suggestion to try and limit kicking the tasklet to
only when we expect it to make a difference, i.e. in catch up after a CS
interrupt, and not just execute it everytime as that is likely just to
cover over our own bugs.

References: https://bugs.freedesktop.org/show_bug.cgi?id=106373
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_engine_cs.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 3c8a0c3245f3..f975091c5498 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -944,11 +944,20 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 	if (I915_SELFTEST_ONLY(engine->breadcrumbs.mock))
 		return true;
 
+	/*
+	 * ksoftirqd has notorious latency that may cause us to
+	 * timeout while waiting for the engine to idle as we wait for
+	 * ksoftirqd to run the execlists tasklet to drain the ELSP.
+	 * If we are expecting a context switch from the GPU, check now.
+	 */
+	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
+		i915_tasklet_try(&engine->execlists.tasklet);
+
 	/* Waiting to drain ELSP? */
 	if (READ_ONCE(engine->execlists.active))
 		return false;
 
-	/* ELSP is empty, but there are ready requests? */
+	/* ELSP is empty, but there are ready requests? E.g. after reset */
 	if (READ_ONCE(engine->execlists.first))
 		return false;
 
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [v3,1/5] drm/i915: Wrap tasklet_struct for abuse
  2018-05-09 12:04 [PATCH v3 1/5] drm/i915: Wrap tasklet_struct for abuse Chris Wilson
                   ` (3 preceding siblings ...)
  2018-05-09 12:04 ` [PATCH v3 5/5] drm/i915: Speed up idle detection by kicking the tasklets Chris Wilson
@ 2018-05-09 12:28 ` Patchwork
  2018-05-09 12:30 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2018-05-09 12:28 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v3,1/5] drm/i915: Wrap tasklet_struct for abuse
URL   : https://patchwork.freedesktop.org/series/42942/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
949ea8da2485 drm/i915: Wrap tasklet_struct for abuse
-:53: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#53: 
new file mode 100644

-:58: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#58: FILE: drivers/gpu/drm/i915/i915_tasklet.h:1:
+/*

total: 0 errors, 2 warnings, 0 checks, 225 lines checked
48eff927a0b7 drm/i915: Combine tasklet_kill and tasklet_disable
7a3aa88dc258 drm/i915/execlists: Direct submit onto idle engines
-:85: WARNING:FUNCTION_ARGUMENTS: function definition argument 'flags' should also have an identifier name
#85: FILE: drivers/gpu/drm/i915/intel_guc_submission.c:757:
+	unsigned long uninitialized_var(flags);

-:111: WARNING:FUNCTION_ARGUMENTS: function definition argument 'flags' should also have an identifier name
#111: FILE: drivers/gpu/drm/i915/intel_lrc.c:359:
+	unsigned long uninitialized_var(flags);

-:139: WARNING:FUNCTION_ARGUMENTS: function definition argument 'flags' should also have an identifier name
#139: FILE: drivers/gpu/drm/i915/intel_lrc.c:764:
+	unsigned long uninitialized_var(flags);

total: 0 errors, 3 warnings, 0 checks, 192 lines checked
acdabab95b40 drm/i915/execlists: Direct submission from irq handler
5126eaff44f8 drm/i915: Speed up idle detection by kicking the tasklets

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [v3,1/5] drm/i915: Wrap tasklet_struct for abuse
  2018-05-09 12:04 [PATCH v3 1/5] drm/i915: Wrap tasklet_struct for abuse Chris Wilson
                   ` (4 preceding siblings ...)
  2018-05-09 12:28 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [v3,1/5] drm/i915: Wrap tasklet_struct for abuse Patchwork
@ 2018-05-09 12:30 ` Patchwork
  2018-05-09 12:44 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2018-05-09 12:30 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v3,1/5] drm/i915: Wrap tasklet_struct for abuse
URL   : https://patchwork.freedesktop.org/series/42942/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915: Wrap tasklet_struct for abuse
Okay!

Commit: drm/i915: Combine tasklet_kill and tasklet_disable
Okay!

Commit: drm/i915/execlists: Direct submit onto idle engines
+drivers/gpu/drm/i915/intel_guc_submission.c:768:28: warning: context imbalance in 'guc_dequeue' - unexpected unlock
+drivers/gpu/drm/i915/intel_lrc.c:367:39: warning: context imbalance in 'execlists_unwind_incomplete_requests' - unexpected unlock
+drivers/gpu/drm/i915/intel_lrc.c:773:39: warning: context imbalance in 'execlists_dequeue' - unexpected unlock

Commit: drm/i915/execlists: Direct submission from irq handler
Okay!

Commit: drm/i915: Speed up idle detection by kicking the tasklets
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [v3,1/5] drm/i915: Wrap tasklet_struct for abuse
  2018-05-09 12:04 [PATCH v3 1/5] drm/i915: Wrap tasklet_struct for abuse Chris Wilson
                   ` (5 preceding siblings ...)
  2018-05-09 12:30 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-05-09 12:44 ` Patchwork
  2018-05-09 13:44 ` [PATCH v3 1/5] " Mika Kuoppala
  2018-05-09 14:26 ` ✓ Fi.CI.IGT: success for series starting with [v3,1/5] " Patchwork
  8 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2018-05-09 12:44 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v3,1/5] drm/i915: Wrap tasklet_struct for abuse
URL   : https://patchwork.freedesktop.org/series/42942/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4162 -> Patchwork_8961 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/42942/revisions/1/mbox/

== Known issues ==

  Here are the changes found in Patchwork_8961 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@prime_vgem@basic-fence-flip:
      fi-ilk-650:         PASS -> FAIL (fdo#104008)

    
    ==== Possible fixes ====

    igt@gem_mmap_gtt@basic-small-bo-tiledx:
      fi-gdg-551:         FAIL (fdo#102575) -> PASS

    
  fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
  fdo#104008 https://bugs.freedesktop.org/show_bug.cgi?id=104008


== Participating hosts (41 -> 37) ==

  Missing    (4): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-skl-6700hq 


== Build changes ==

    * Linux: CI_DRM_4162 -> Patchwork_8961

  CI_DRM_4162: ea17aab5ec734a262a252758d7412c09dfa833cc @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4468: 548a894dc904c4628522dbbc77cb179a4c965ebc @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_8961: 5126eaff44f881f5deff006f2338ef87f4940bf9 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4468: 1e60f1499e5b71b6d5a747189d7c28f57359a87f @ git://anongit.freedesktop.org/piglit


== Linux commits ==

5126eaff44f8 drm/i915: Speed up idle detection by kicking the tasklets
acdabab95b40 drm/i915/execlists: Direct submission from irq handler
7a3aa88dc258 drm/i915/execlists: Direct submit onto idle engines
48eff927a0b7 drm/i915: Combine tasklet_kill and tasklet_disable
949ea8da2485 drm/i915: Wrap tasklet_struct for abuse

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8961/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 1/5] drm/i915: Wrap tasklet_struct for abuse
  2018-05-09 12:04 [PATCH v3 1/5] drm/i915: Wrap tasklet_struct for abuse Chris Wilson
                   ` (6 preceding siblings ...)
  2018-05-09 12:44 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-05-09 13:44 ` Mika Kuoppala
  2018-05-09 13:51   ` Chris Wilson
  2018-05-09 14:26 ` ✓ Fi.CI.IGT: success for series starting with [v3,1/5] " Patchwork
  8 siblings, 1 reply; 14+ messages in thread
From: Mika Kuoppala @ 2018-05-09 13:44 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> In the next few patches, we want to abuse tasklet to avoid ksoftirqd
> latency along critical paths. To make that abuse easily to swallow,
> first coat the tasklet in a little syntactic sugar.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c             |  8 +--
>  drivers/gpu/drm/i915/i915_irq.c             |  2 +-
>  drivers/gpu/drm/i915/i915_tasklet.h         | 78 +++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_engine_cs.c      | 11 ++-
>  drivers/gpu/drm/i915/intel_guc_submission.c |  8 +--
>  drivers/gpu/drm/i915/intel_lrc.c            | 18 ++---
>  drivers/gpu/drm/i915/intel_ringbuffer.h     |  3 +-
>  7 files changed, 104 insertions(+), 24 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/i915_tasklet.h
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 89bf5d67cb74..98481e150e61 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3043,9 +3043,9 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
>  	 * common case of recursively being called from set-wedged from inside
>  	 * i915_reset.
>  	 */
> -	if (!atomic_read(&engine->execlists.tasklet.count))
> -		tasklet_kill(&engine->execlists.tasklet);
> -	tasklet_disable(&engine->execlists.tasklet);
> +	if (i915_tasklet_is_enabled(&engine->execlists.tasklet))
> +		i915_tasklet_flush(&engine->execlists.tasklet);
> +	i915_tasklet_disable(&engine->execlists.tasklet);
>  
>  	/*
>  	 * We're using worker to queue preemption requests from the tasklet in
> @@ -3265,7 +3265,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv,
>  
>  void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
>  {
> -	tasklet_enable(&engine->execlists.tasklet);
> +	i915_tasklet_enable(&engine->execlists.tasklet);
>  	kthread_unpark(engine->breadcrumbs.signaler);
>  
>  	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index f9bc3aaa90d0..f8aff5a5aa83 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1477,7 +1477,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
>  	}
>  
>  	if (tasklet)
> -		tasklet_hi_schedule(&execlists->tasklet);
> +		i915_tasklet_schedule(&execlists->tasklet);
>  }
>  
>  static void gen8_gt_irq_ack(struct drm_i915_private *i915,
> diff --git a/drivers/gpu/drm/i915/i915_tasklet.h b/drivers/gpu/drm/i915/i915_tasklet.h
> new file mode 100644
> index 000000000000..c9f41a5ebb91
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_tasklet.h
> @@ -0,0 +1,78 @@
> +/*
> + * SPDX-License-Identifier: GPL-2.0
> + *
> + * Copyright © 2018 Intel Corporation
> + */
> +
> +#ifndef _I915_TASKLET_H_
> +#define _I915_TASKLET_H_
> +
> +#include <linux/atomic.h>
> +#include <linux/interrupt.h>
> +
> +/**
> + * struct i915_tasklet - wrapper around tasklet_struct
> + *
> + * We want to abuse tasklets slightly, such as calling them directly. In some
> + * cases, this requires some auxiliary tracking so subclass the tasklet_struct
> + * so that we have a central place and helpers.
> + */
> +struct i915_tasklet {
> +	struct tasklet_struct base;
> +};
> +
> +static inline void i915_tasklet_init(struct i915_tasklet *t,
> +				     void (*func)(unsigned long),
> +				     unsigned long data)
> +{
> +	tasklet_init(&t->base, func, data);
> +}
> +
> +static inline bool i915_tasklet_is_scheduled(const struct i915_tasklet *t)
> +{
> +	return test_bit(TASKLET_STATE_SCHED, &t->base.state);
> +}
> +
> +static inline bool i915_tasklet_is_locked(const struct i915_tasklet *t)

why not is_running() ?

> +{
> +	return test_bit(TASKLET_STATE_RUN, &t->base.state);
> +}
> +
> +static inline bool i915_tasklet_is_enabled(const struct i915_tasklet *t)
> +{
> +	return likely(!atomic_read(&t->base.count));

I am concerned that we bury the sign of racy nature out of sight and
then it comes and bites us.

-Mika

> +}
> +
> +static inline void i915_tasklet_schedule(struct i915_tasklet *t)
> +{
> +	tasklet_hi_schedule(&t->base);
> +}
> +
> +static inline void i915_tasklet_flush(struct i915_tasklet *t)
> +{
> +	tasklet_kill(&t->base);
> +}
> +
> +static inline void i915_tasklet_enable(struct i915_tasklet *t)
> +{
> +	tasklet_enable(&t->base);
> +}
> +
> +static inline void i915_tasklet_disable(struct i915_tasklet *t)
> +{
> +	tasklet_disable(&t->base);
> +}
> +
> +static inline void i915_tasklet_set_func(struct i915_tasklet *t,
> +					 void (*func)(unsigned long data),
> +					 unsigned long data)
> +{
> +	tasklet_disable(&t->base);

What does the disable/enable pair buy us here?

It looks that you want trylock and unlock
so that you can safely change the func/data.

-Mika

> +
> +	t->base.func = func;
> +	t->base.data = data;
> +
> +	tasklet_enable(&t->base);
> +}
> +
> +#endif /* _I915_TASKLET_H_ */
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 70325e0824e3..5f1118ea96d8 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1032,7 +1032,7 @@ void intel_engines_park(struct drm_i915_private *i915)
>  	for_each_engine(engine, i915, id) {
>  		/* Flush the residual irq tasklets first. */
>  		intel_engine_disarm_breadcrumbs(engine);
> -		tasklet_kill(&engine->execlists.tasklet);
> +		i915_tasklet_flush(&engine->execlists.tasklet);
>  
>  		/*
>  		 * We are committed now to parking the engines, make sure there
> @@ -1249,9 +1249,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
>  			   intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
>  			   yesno(test_bit(ENGINE_IRQ_EXECLIST,
>  					  &engine->irq_posted)),
> -			   yesno(test_bit(TASKLET_STATE_SCHED,
> -					  &engine->execlists.tasklet.state)),
> -			   enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
> +			   yesno(i915_tasklet_is_scheduled(&engine->execlists.tasklet)),
> +			   enableddisabled(i915_tasklet_is_enabled(&engine->execlists.tasklet)));
>  		if (read >= GEN8_CSB_ENTRIES)
>  			read = 0;
>  		if (write >= GEN8_CSB_ENTRIES)
> @@ -1479,7 +1478,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
>  	if (!intel_engine_supports_stats(engine))
>  		return -ENODEV;
>  
> -	tasklet_disable(&execlists->tasklet);
> +	i915_tasklet_disable(&execlists->tasklet);
>  	write_seqlock_irqsave(&engine->stats.lock, flags);
>  
>  	if (unlikely(engine->stats.enabled == ~0)) {
> @@ -1505,7 +1504,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
>  
>  unlock:
>  	write_sequnlock_irqrestore(&engine->stats.lock, flags);
> -	tasklet_enable(&execlists->tasklet);
> +	i915_tasklet_enable(&execlists->tasklet);
>  
>  	return err;
>  }
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 2feb65096966..a7afc976c3b9 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -591,7 +591,7 @@ static void inject_preempt_context(struct work_struct *work)
>  	if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) {
>  		execlists_clear_active(&engine->execlists,
>  				       EXECLISTS_ACTIVE_PREEMPT);
> -		tasklet_schedule(&engine->execlists.tasklet);
> +		i915_tasklet_schedule(&engine->execlists.tasklet);
>  	}
>  }
>  
> @@ -1263,10 +1263,10 @@ int intel_guc_submission_enable(struct intel_guc *guc)
>  	guc_interrupts_capture(dev_priv);
>  
>  	for_each_engine(engine, dev_priv, id) {
> -		struct intel_engine_execlists * const execlists =
> -			&engine->execlists;
> +		i915_tasklet_set_func(&engine->execlists.tasklet,
> +				      guc_submission_tasklet,
> +				      (unsigned long)engine);
>  
> -		execlists->tasklet.func = guc_submission_tasklet;
>  		engine->park = guc_submission_park;
>  		engine->unpark = guc_submission_unpark;
>  
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 7f98dda3c929..539fa03d7600 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1165,7 +1165,7 @@ static void queue_request(struct intel_engine_cs *engine,
>  static void __submit_queue(struct intel_engine_cs *engine, int prio)
>  {
>  	engine->execlists.queue_priority = prio;
> -	tasklet_hi_schedule(&engine->execlists.tasklet);
> +	i915_tasklet_schedule(&engine->execlists.tasklet);
>  }
>  
>  static void submit_queue(struct intel_engine_cs *engine, int prio)
> @@ -1778,7 +1778,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
>  
>  	/* After a GPU reset, we may have requests to replay */
>  	if (execlists->first)
> -		tasklet_schedule(&execlists->tasklet);
> +		i915_tasklet_schedule(&execlists->tasklet);
>  
>  	return 0;
>  }
> @@ -2182,9 +2182,8 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
>  	 * Tasklet cannot be active at this point due intel_mark_active/idle
>  	 * so this is just for documentation.
>  	 */
> -	if (WARN_ON(test_bit(TASKLET_STATE_SCHED,
> -			     &engine->execlists.tasklet.state)))
> -		tasklet_kill(&engine->execlists.tasklet);
> +	if (WARN_ON(i915_tasklet_is_scheduled(&engine->execlists.tasklet)))
> +		i915_tasklet_flush(&engine->execlists.tasklet);
>  
>  	dev_priv = engine->i915;
>  
> @@ -2209,7 +2208,10 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
>  	engine->submit_request = execlists_submit_request;
>  	engine->cancel_requests = execlists_cancel_requests;
>  	engine->schedule = execlists_schedule;
> -	engine->execlists.tasklet.func = execlists_submission_tasklet;
> +
> +	i915_tasklet_set_func(&engine->execlists.tasklet,
> +			      execlists_submission_tasklet,
> +			      (unsigned long)engine);
>  
>  	engine->park = NULL;
>  	engine->unpark = NULL;
> @@ -2303,8 +2305,8 @@ logical_ring_setup(struct intel_engine_cs *engine)
>  
>  	engine->execlists.fw_domains = fw_domains;
>  
> -	tasklet_init(&engine->execlists.tasklet,
> -		     execlists_submission_tasklet, (unsigned long)engine);
> +	i915_tasklet_init(&engine->execlists.tasklet,
> +			  execlists_submission_tasklet, (unsigned long)engine);
>  
>  	logical_ring_default_vfuncs(engine);
>  	logical_ring_default_irqs(engine);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 010750e8ee44..f6ba354faf89 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -11,6 +11,7 @@
>  #include "i915_pmu.h"
>  #include "i915_request.h"
>  #include "i915_selftest.h"
> +#include "i915_tasklet.h"
>  #include "i915_timeline.h"
>  #include "intel_gpu_commands.h"
>  
> @@ -202,7 +203,7 @@ struct intel_engine_execlists {
>  	/**
>  	 * @tasklet: softirq tasklet for bottom handler
>  	 */
> -	struct tasklet_struct tasklet;
> +	struct i915_tasklet tasklet;
>  
>  	/**
>  	 * @default_priolist: priority list for I915_PRIORITY_NORMAL
> -- 
> 2.17.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 1/5] drm/i915: Wrap tasklet_struct for abuse
  2018-05-09 13:44 ` [PATCH v3 1/5] " Mika Kuoppala
@ 2018-05-09 13:51   ` Chris Wilson
  0 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2018-05-09 13:51 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2018-05-09 14:44:30)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > In the next few patches, we want to abuse tasklet to avoid ksoftirqd
> > latency along critical paths. To make that abuse easily to swallow,
> > first coat the tasklet in a little syntactic sugar.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c             |  8 +--
> >  drivers/gpu/drm/i915/i915_irq.c             |  2 +-
> >  drivers/gpu/drm/i915/i915_tasklet.h         | 78 +++++++++++++++++++++
> >  drivers/gpu/drm/i915/intel_engine_cs.c      | 11 ++-
> >  drivers/gpu/drm/i915/intel_guc_submission.c |  8 +--
> >  drivers/gpu/drm/i915/intel_lrc.c            | 18 ++---
> >  drivers/gpu/drm/i915/intel_ringbuffer.h     |  3 +-
> >  7 files changed, 104 insertions(+), 24 deletions(-)
> >  create mode 100644 drivers/gpu/drm/i915/i915_tasklet.h
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 89bf5d67cb74..98481e150e61 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -3043,9 +3043,9 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
> >        * common case of recursively being called from set-wedged from inside
> >        * i915_reset.
> >        */
> > -     if (!atomic_read(&engine->execlists.tasklet.count))
> > -             tasklet_kill(&engine->execlists.tasklet);
> > -     tasklet_disable(&engine->execlists.tasklet);
> > +     if (i915_tasklet_is_enabled(&engine->execlists.tasklet))
> > +             i915_tasklet_flush(&engine->execlists.tasklet);
> > +     i915_tasklet_disable(&engine->execlists.tasklet);
> >  
> >       /*
> >        * We're using worker to queue preemption requests from the tasklet in
> > @@ -3265,7 +3265,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv,
> >  
> >  void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
> >  {
> > -     tasklet_enable(&engine->execlists.tasklet);
> > +     i915_tasklet_enable(&engine->execlists.tasklet);
> >       kthread_unpark(engine->breadcrumbs.signaler);
> >  
> >       intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index f9bc3aaa90d0..f8aff5a5aa83 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -1477,7 +1477,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
> >       }
> >  
> >       if (tasklet)
> > -             tasklet_hi_schedule(&execlists->tasklet);
> > +             i915_tasklet_schedule(&execlists->tasklet);
> >  }
> >  
> >  static void gen8_gt_irq_ack(struct drm_i915_private *i915,
> > diff --git a/drivers/gpu/drm/i915/i915_tasklet.h b/drivers/gpu/drm/i915/i915_tasklet.h
> > new file mode 100644
> > index 000000000000..c9f41a5ebb91
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/i915_tasklet.h
> > @@ -0,0 +1,78 @@
> > +/*
> > + * SPDX-License-Identifier: GPL-2.0
> > + *
> > + * Copyright © 2018 Intel Corporation
> > + */
> > +
> > +#ifndef _I915_TASKLET_H_
> > +#define _I915_TASKLET_H_
> > +
> > +#include <linux/atomic.h>
> > +#include <linux/interrupt.h>
> > +
> > +/**
> > + * struct i915_tasklet - wrapper around tasklet_struct
> > + *
> > + * We want to abuse tasklets slightly, such as calling them directly. In some
> > + * cases, this requires some auxiliary tracking so subclass the tasklet_struct
> > + * so that we have a central place and helpers.
> > + */
> > +struct i915_tasklet {
> > +     struct tasklet_struct base;
> > +};
> > +
> > +static inline void i915_tasklet_init(struct i915_tasklet *t,
> > +                                  void (*func)(unsigned long),
> > +                                  unsigned long data)
> > +{
> > +     tasklet_init(&t->base, func, data);
> > +}
> > +
> > +static inline bool i915_tasklet_is_scheduled(const struct i915_tasklet *t)
> > +{
> > +     return test_bit(TASKLET_STATE_SCHED, &t->base.state);
> > +}
> > +
> > +static inline bool i915_tasklet_is_locked(const struct i915_tasklet *t)
> 
> why not is_running() ?

Because I didn't want to resend the series immediately. It's confusing
because it's set by tasklet_trylock and cleared by tasklet_unlock.

> > +     return test_bit(TASKLET_STATE_RUN, &t->base.state);
> > +}
> > +
> > +static inline bool i915_tasklet_is_enabled(const struct i915_tasklet *t)
> > +{
> > +     return likely(!atomic_read(&t->base.count));
> 
> I am concerned that we bury the sign of racy nature out of sight and
> then it comes and bites us.

Oh, I wouldn't worry about that, just wait until you see what comes
later :-p

> > +static inline void i915_tasklet_set_func(struct i915_tasklet *t,
> > +                                      void (*func)(unsigned long data),
> > +                                      unsigned long data)
> > +{
> > +     tasklet_disable(&t->base);
> 
> What does the disable/enable pair buy us here?
> 

I was thinking about being clear about the ordering.

> It looks that you want trylock and unlock
> so that you can safely change the func/data.

As you point out, the lock is tasklet_disable, the trylock is just a
trylock. Embrace the insanity, let it flow through you.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 2/5] drm/i915: Combine tasklet_kill and tasklet_disable
  2018-05-09 12:04 ` [PATCH v3 2/5] drm/i915: Combine tasklet_kill and tasklet_disable Chris Wilson
@ 2018-05-09 13:54   ` Mika Kuoppala
  2018-05-09 14:02     ` Chris Wilson
  0 siblings, 1 reply; 14+ messages in thread
From: Mika Kuoppala @ 2018-05-09 13:54 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Ideally, we want to atomically flush and disable the tasklet before
> resetting the GPU. At present, we rely on being the only part to touch
> our tasklet and serialisation of the reset process to ensure that we can
> suspend the tasklet from the mix of reset/wedge pathways. In this patch,
> we move the tasklet abuse into its own function and tweak it such that
> we only do a synchronous operation the first time it is disabled around
> the reset. This allows us to avoid the sync inside a softirq context in
> subsequent patches.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> CC: Michel Thierry <michel.thierry@intel.com>
> Cc: Jeff McGee <jeff.mcgee@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c        |  2 --
>  drivers/gpu/drm/i915/i915_tasklet.h    | 14 ++++++++++++++
>  drivers/gpu/drm/i915/intel_engine_cs.c |  4 ++--
>  3 files changed, 16 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 98481e150e61..8d27e78b052c 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3043,8 +3043,6 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
>  	 * common case of recursively being called from set-wedged from inside
>  	 * i915_reset.
>  	 */
> -	if (i915_tasklet_is_enabled(&engine->execlists.tasklet))
> -		i915_tasklet_flush(&engine->execlists.tasklet);
>  	i915_tasklet_disable(&engine->execlists.tasklet);
>  
>  	/*
> diff --git a/drivers/gpu/drm/i915/i915_tasklet.h b/drivers/gpu/drm/i915/i915_tasklet.h
> index c9f41a5ebb91..d8263892f385 100644
> --- a/drivers/gpu/drm/i915/i915_tasklet.h
> +++ b/drivers/gpu/drm/i915/i915_tasklet.h
> @@ -59,10 +59,24 @@ static inline void i915_tasklet_enable(struct i915_tasklet *t)
>  }
>  
>  static inline void i915_tasklet_disable(struct i915_tasklet *t)
> +{
> +	if (i915_tasklet_is_enabled(t))
> +		i915_tasklet_flush(t);

This needs a comment to explain how we can get away with
the race.

> +
> +	if (atomic_inc_return(&t->base.count) == 1)
> +		tasklet_unlock_wait(&t->base);

I would add comment in here too.
/* No need to sync on further disables */

-Mika

> +}
> +
> +static inline void i915_tasklet_lock(struct i915_tasklet *t)
>  {
>  	tasklet_disable(&t->base);
>  }
>  
> +static inline void i915_tasklet_unlock(struct i915_tasklet *t)
> +{
> +	tasklet_enable(&t->base);
> +}
> +
>  static inline void i915_tasklet_set_func(struct i915_tasklet *t,
>  					 void (*func)(unsigned long data),
>  					 unsigned long data)
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 5f1118ea96d8..3c8a0c3245f3 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1478,7 +1478,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
>  	if (!intel_engine_supports_stats(engine))
>  		return -ENODEV;
>  
> -	i915_tasklet_disable(&execlists->tasklet);
> +	i915_tasklet_lock(&execlists->tasklet);

After the initial shock, the *lock starts to fit.
-Mika

>  	write_seqlock_irqsave(&engine->stats.lock, flags);
>  
>  	if (unlikely(engine->stats.enabled == ~0)) {
> @@ -1504,7 +1504,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
>  
>  unlock:
>  	write_sequnlock_irqrestore(&engine->stats.lock, flags);
> -	i915_tasklet_enable(&execlists->tasklet);
> +	i915_tasklet_unlock(&execlists->tasklet);
>  
>  	return err;
>  }
> -- 
> 2.17.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 2/5] drm/i915: Combine tasklet_kill and tasklet_disable
  2018-05-09 13:54   ` Mika Kuoppala
@ 2018-05-09 14:02     ` Chris Wilson
  2018-05-09 14:10       ` Chris Wilson
  0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2018-05-09 14:02 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx; +Cc: Tvrtko

Quoting Mika Kuoppala (2018-05-09 14:54:04)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Ideally, we want to atomically flush and disable the tasklet before
> > resetting the GPU. At present, we rely on being the only part to touch
> > our tasklet and serialisation of the reset process to ensure that we can
> > suspend the tasklet from the mix of reset/wedge pathways. In this patch,
> > we move the tasklet abuse into its own function and tweak it such that
> > we only do a synchronous operation the first time it is disabled around
> > the reset. This allows us to avoid the sync inside a softirq context in
> > subsequent patches.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > Cc: Michał Winiarski <michal.winiarski@intel.com>
> > CC: Michel Thierry <michel.thierry@intel.com>
> > Cc: Jeff McGee <jeff.mcgee@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c        |  2 --
> >  drivers/gpu/drm/i915/i915_tasklet.h    | 14 ++++++++++++++
> >  drivers/gpu/drm/i915/intel_engine_cs.c |  4 ++--
> >  3 files changed, 16 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 98481e150e61..8d27e78b052c 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -3043,8 +3043,6 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
> >        * common case of recursively being called from set-wedged from inside
> >        * i915_reset.
> >        */
> > -     if (i915_tasklet_is_enabled(&engine->execlists.tasklet))
> > -             i915_tasklet_flush(&engine->execlists.tasklet);
> >       i915_tasklet_disable(&engine->execlists.tasklet);
> >  
> >       /*
> > diff --git a/drivers/gpu/drm/i915/i915_tasklet.h b/drivers/gpu/drm/i915/i915_tasklet.h
> > index c9f41a5ebb91..d8263892f385 100644
> > --- a/drivers/gpu/drm/i915/i915_tasklet.h
> > +++ b/drivers/gpu/drm/i915/i915_tasklet.h
> > @@ -59,10 +59,24 @@ static inline void i915_tasklet_enable(struct i915_tasklet *t)
> >  }
> >  
> >  static inline void i915_tasklet_disable(struct i915_tasklet *t)
> > +{
> > +     if (i915_tasklet_is_enabled(t))
> > +             i915_tasklet_flush(t);
> 
> This needs a comment to explain how we can get away with
> the race.

It doesn't, we rely on exclusivity that is provided by it is only us
that controls the tasklet. The rationale comes from the caller. That we
call flush here at all, is just that it helps avoid spinning, it is not
required. If the race is too much, we just need the next line.

> > +     if (atomic_inc_return(&t->base.count) == 1)
> > +             tasklet_unlock_wait(&t->base);
> 
> I would add comment in here too.
> /* No need to sync on further disables */

Doesn't help much, that literally is what the code is doing but not why.
Why we want that is the caller again.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 2/5] drm/i915: Combine tasklet_kill and tasklet_disable
  2018-05-09 14:02     ` Chris Wilson
@ 2018-05-09 14:10       ` Chris Wilson
  0 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2018-05-09 14:10 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Chris Wilson (2018-05-09 15:02:12)
> Quoting Mika Kuoppala (2018-05-09 14:54:04)
> > Chris Wilson <chris@chris-wilson.co.uk> writes:
> > 
> > > Ideally, we want to atomically flush and disable the tasklet before
> > > resetting the GPU. At present, we rely on being the only part to touch
> > > our tasklet and serialisation of the reset process to ensure that we can
> > > suspend the tasklet from the mix of reset/wedge pathways. In this patch,
> > > we move the tasklet abuse into its own function and tweak it such that
> > > we only do a synchronous operation the first time it is disabled around
> > > the reset. This allows us to avoid the sync inside a softirq context in
> > > subsequent patches.
> > >
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > > Cc: Michał Winiarski <michal.winiarski@intel.com>
> > > CC: Michel Thierry <michel.thierry@intel.com>
> > > Cc: Jeff McGee <jeff.mcgee@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/i915_gem.c        |  2 --
> > >  drivers/gpu/drm/i915/i915_tasklet.h    | 14 ++++++++++++++
> > >  drivers/gpu/drm/i915/intel_engine_cs.c |  4 ++--
> > >  3 files changed, 16 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > > index 98481e150e61..8d27e78b052c 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > > @@ -3043,8 +3043,6 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
> > >        * common case of recursively being called from set-wedged from inside
> > >        * i915_reset.
> > >        */
> > > -     if (i915_tasklet_is_enabled(&engine->execlists.tasklet))
> > > -             i915_tasklet_flush(&engine->execlists.tasklet);
> > >       i915_tasklet_disable(&engine->execlists.tasklet);
> > >  
> > >       /*
> > > diff --git a/drivers/gpu/drm/i915/i915_tasklet.h b/drivers/gpu/drm/i915/i915_tasklet.h
> > > index c9f41a5ebb91..d8263892f385 100644
> > > --- a/drivers/gpu/drm/i915/i915_tasklet.h
> > > +++ b/drivers/gpu/drm/i915/i915_tasklet.h
> > > @@ -59,10 +59,24 @@ static inline void i915_tasklet_enable(struct i915_tasklet *t)
> > >  }
> > >  
> > >  static inline void i915_tasklet_disable(struct i915_tasklet *t)
> > > +{
> > > +     if (i915_tasklet_is_enabled(t))
> > > +             i915_tasklet_flush(t);
> > 
> > This needs a comment to explain how we can get away with
> > the race.
> 
> It doesn't, we rely on exclusivity that is provided by it is only us
> that controls the tasklet. The rationale comes from the caller. That we
> call flush here at all, is just that it helps avoid spinning, it is not
> required. If the race is too much, we just need the next line.

Rather than ever worry, let's just kill it.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* ✓ Fi.CI.IGT: success for series starting with [v3,1/5] drm/i915: Wrap tasklet_struct for abuse
  2018-05-09 12:04 [PATCH v3 1/5] drm/i915: Wrap tasklet_struct for abuse Chris Wilson
                   ` (7 preceding siblings ...)
  2018-05-09 13:44 ` [PATCH v3 1/5] " Mika Kuoppala
@ 2018-05-09 14:26 ` Patchwork
  8 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2018-05-09 14:26 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v3,1/5] drm/i915: Wrap tasklet_struct for abuse
URL   : https://patchwork.freedesktop.org/series/42942/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4162_full -> Patchwork_8961_full =

== Summary - WARNING ==

  Minor unknown changes coming with Patchwork_8961_full need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_8961_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/42942/revisions/1/mbox/

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_8961_full:

  === IGT changes ===

    ==== Warnings ====

    igt@gem_exec_schedule@deep-bsd1:
      shard-kbl:          PASS -> SKIP +2

    igt@gem_exec_schedule@deep-bsd2:
      shard-kbl:          SKIP -> PASS

    
== Known issues ==

  Here are the changes found in Patchwork_8961_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@kms_chv_cursor_fail@pipe-c-256x256-bottom-edge:
      shard-kbl:          PASS -> DMESG-WARN (fdo#103558, fdo#105602) +33

    igt@kms_flip@wf_vblank-ts-check-interruptible:
      shard-glk:          PASS -> FAIL (fdo#100368)

    igt@kms_setmode@basic:
      shard-hsw:          PASS -> FAIL (fdo#99912)
      shard-kbl:          PASS -> FAIL (fdo#99912)

    igt@perf_pmu@enable-race-vcs0:
      shard-glk:          PASS -> INCOMPLETE (fdo#103359, k.org#198133) +2

    igt@perf_pmu@enable-race-vcs1:
      shard-kbl:          PASS -> INCOMPLETE (fdo#103665) +1

    igt@perf_pmu@enable-race-vecs0:
      shard-apl:          PASS -> INCOMPLETE (fdo#103927) +2

    
    ==== Possible fixes ====

    igt@kms_cursor_legacy@2x-flip-vs-cursor-atomic:
      shard-hsw:          FAIL (fdo#104873) -> PASS

    igt@kms_flip@flip-vs-wf_vblank-interruptible:
      shard-glk:          FAIL (fdo#100368) -> PASS

    igt@perf@blocking:
      shard-hsw:          FAIL (fdo#102252) -> PASS +1

    
  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252
  fdo#103359 https://bugs.freedesktop.org/show_bug.cgi?id=103359
  fdo#103558 https://bugs.freedesktop.org/show_bug.cgi?id=103558
  fdo#103665 https://bugs.freedesktop.org/show_bug.cgi?id=103665
  fdo#103927 https://bugs.freedesktop.org/show_bug.cgi?id=103927
  fdo#104873 https://bugs.freedesktop.org/show_bug.cgi?id=104873
  fdo#105602 https://bugs.freedesktop.org/show_bug.cgi?id=105602
  fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912
  k.org#198133 https://bugzilla.kernel.org/show_bug.cgi?id=198133


== Participating hosts (9 -> 9) ==

  No changes in participating hosts


== Build changes ==

    * Linux: CI_DRM_4162 -> Patchwork_8961

  CI_DRM_4162: ea17aab5ec734a262a252758d7412c09dfa833cc @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4468: 548a894dc904c4628522dbbc77cb179a4c965ebc @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_8961: 5126eaff44f881f5deff006f2338ef87f4940bf9 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4468: 1e60f1499e5b71b6d5a747189d7c28f57359a87f @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8961/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2018-05-09 14:26 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-09 12:04 [PATCH v3 1/5] drm/i915: Wrap tasklet_struct for abuse Chris Wilson
2018-05-09 12:04 ` [PATCH v3 2/5] drm/i915: Combine tasklet_kill and tasklet_disable Chris Wilson
2018-05-09 13:54   ` Mika Kuoppala
2018-05-09 14:02     ` Chris Wilson
2018-05-09 14:10       ` Chris Wilson
2018-05-09 12:04 ` [PATCH v3 3/5] drm/i915/execlists: Direct submit onto idle engines Chris Wilson
2018-05-09 12:04 ` [PATCH v3 4/5] drm/i915/execlists: Direct submission from irq handler Chris Wilson
2018-05-09 12:04 ` [PATCH v3 5/5] drm/i915: Speed up idle detection by kicking the tasklets Chris Wilson
2018-05-09 12:28 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [v3,1/5] drm/i915: Wrap tasklet_struct for abuse Patchwork
2018-05-09 12:30 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-05-09 12:44 ` ✓ Fi.CI.BAT: success " Patchwork
2018-05-09 13:44 ` [PATCH v3 1/5] " Mika Kuoppala
2018-05-09 13:51   ` Chris Wilson
2018-05-09 14:26 ` ✓ Fi.CI.IGT: success for series starting with [v3,1/5] " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.