Fun with signals

All of lore.kernel.org
 help / color / mirror / Atom feed

* Fun with signals
@ 2018-01-05 12:37 Chris Wilson
  2018-01-05 12:37 ` [PATCH 01/14] drm/i915: Only defer freeing of fence callback when also using the timer Chris Wilson
                   ` (14 more replies)
  0 siblings, 15 replies; 16+ messages in thread
From: Chris Wilson @ 2018-01-05 12:37 UTC (permalink / raw)
  To: intel-gfx

Just some fallout with handling signals when the interrupt generation
rate is sufficient to saturate the CPU, leaving no time to clean up. In
particular, it's quite easy to cause oom on snb with a bunch of nops.
Overall though there's a small latency improvement for inter-engine
signaling.
-Chris

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 01/14] drm/i915: Only defer freeing of fence callback when also using the timer
  2018-01-05 12:37 Fun with signals Chris Wilson
@ 2018-01-05 12:37 ` Chris Wilson
  2018-01-05 12:37 ` [PATCH 02/14] drm/i915/fence: Separate timeout mechanism for awaiting on dma-fences Chris Wilson
                   ` (13 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-01-05 12:37 UTC (permalink / raw)
  To: intel-gfx

Without an accompanying timer (for internal fences), we can free the
fence callback immediately as we do not need to employ the RCU barrier
to serialise with the timer. By avoiding the RCU delay, we can avoid the
extra mempressure under heavy inter-engine request utilisation.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_sw_fence.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 3669f5eeb91e..13021326d777 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -398,7 +398,12 @@ static void dma_i915_sw_fence_wake(struct dma_fence *dma,
 	if (fence)
 		i915_sw_fence_complete(fence);
 
-	irq_work_queue(&cb->work);
+	if (cb->dma) {
+		irq_work_queue(&cb->work);
+		return;
+	}
+
+	kfree(cb);
 }
 
 static void irq_i915_sw_fence_work(struct irq_work *wrk)
@@ -437,10 +442,12 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 	i915_sw_fence_await(fence);
 
 	cb->dma = NULL;
-	timer_setup(&cb->timer, timer_i915_sw_fence_wake, TIMER_IRQSAFE);
-	init_irq_work(&cb->work, irq_i915_sw_fence_work);
 	if (timeout) {
 		cb->dma = dma_fence_get(dma);
+		init_irq_work(&cb->work, irq_i915_sw_fence_work);
+
+		timer_setup(&cb->timer,
+			    timer_i915_sw_fence_wake, TIMER_IRQSAFE);
 		mod_timer(&cb->timer, round_jiffies_up(jiffies + timeout));
 	}
 
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 02/14] drm/i915/fence: Separate timeout mechanism for awaiting on dma-fences
  2018-01-05 12:37 Fun with signals Chris Wilson
  2018-01-05 12:37 ` [PATCH 01/14] drm/i915: Only defer freeing of fence callback when also using the timer Chris Wilson
@ 2018-01-05 12:37 ` Chris Wilson
  2018-01-05 12:37 ` [PATCH 03/14] drm/i915: Rename some shorthand lock classes Chris Wilson
                   ` (12 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-01-05 12:37 UTC (permalink / raw)
  To: intel-gfx

As the timeout mechanism has grown more and more complicated, using
multiple deferred tasks and more than doubling the size of our struct,
split the two implementations to streamline the simpler no-timeout
callback variant.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_sw_fence.c | 61 +++++++++++++++++++++++-------------
 1 file changed, 40 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 13021326d777..1de5173e53a2 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -365,18 +365,31 @@ int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
 struct i915_sw_dma_fence_cb {
 	struct dma_fence_cb base;
 	struct i915_sw_fence *fence;
+};
+
+struct i915_sw_dma_fence_cb_timer {
+	struct i915_sw_dma_fence_cb base;
 	struct dma_fence *dma;
 	struct timer_list timer;
 	struct irq_work work;
 	struct rcu_head rcu;
 };
 
+static void dma_i915_sw_fence_wake(struct dma_fence *dma,
+				   struct dma_fence_cb *data)
+{
+	struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base);
+
+	i915_sw_fence_complete(cb->fence);
+	kfree(cb);
+}
+
 static void timer_i915_sw_fence_wake(struct timer_list *t)
 {
-	struct i915_sw_dma_fence_cb *cb = from_timer(cb, t, timer);
+	struct i915_sw_dma_fence_cb_timer *cb = from_timer(cb, t, timer);
 	struct i915_sw_fence *fence;
 
-	fence = xchg(&cb->fence, NULL);
+	fence = xchg(&cb->base.fence, NULL);
 	if (!fence)
 		return;
 
@@ -388,27 +401,24 @@ static void timer_i915_sw_fence_wake(struct timer_list *t)
 	i915_sw_fence_complete(fence);
 }
 
-static void dma_i915_sw_fence_wake(struct dma_fence *dma,
-				   struct dma_fence_cb *data)
+static void dma_i915_sw_fence_wake_timer(struct dma_fence *dma,
+					 struct dma_fence_cb *data)
 {
-	struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base);
+	struct i915_sw_dma_fence_cb_timer *cb =
+		container_of(data, typeof(*cb), base.base);
 	struct i915_sw_fence *fence;
 
-	fence = xchg(&cb->fence, NULL);
+	fence = xchg(&cb->base.fence, NULL);
 	if (fence)
 		i915_sw_fence_complete(fence);
 
-	if (cb->dma) {
-		irq_work_queue(&cb->work);
-		return;
-	}
-
-	kfree(cb);
+	irq_work_queue(&cb->work);
 }
 
 static void irq_i915_sw_fence_work(struct irq_work *wrk)
 {
-	struct i915_sw_dma_fence_cb *cb = container_of(wrk, typeof(*cb), work);
+	struct i915_sw_dma_fence_cb_timer *cb =
+		container_of(wrk, typeof(*cb), work);
 
 	del_timer_sync(&cb->timer);
 	dma_fence_put(cb->dma);
@@ -422,6 +432,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 				  gfp_t gfp)
 {
 	struct i915_sw_dma_fence_cb *cb;
+	dma_fence_func_t func;
 	int ret;
 
 	debug_fence_assert(fence);
@@ -430,7 +441,10 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 	if (dma_fence_is_signaled(dma))
 		return 0;
 
-	cb = kmalloc(sizeof(*cb), gfp);
+	cb = kmalloc(timeout ?
+		     sizeof(struct i915_sw_dma_fence_cb_timer) :
+		     sizeof(struct i915_sw_dma_fence_cb),
+		     gfp);
 	if (!cb) {
 		if (!gfpflags_allow_blocking(gfp))
 			return -ENOMEM;
@@ -441,21 +455,26 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 	cb->fence = fence;
 	i915_sw_fence_await(fence);
 
-	cb->dma = NULL;
+	func = dma_i915_sw_fence_wake;
 	if (timeout) {
-		cb->dma = dma_fence_get(dma);
-		init_irq_work(&cb->work, irq_i915_sw_fence_work);
+		struct i915_sw_dma_fence_cb_timer *timer =
+			container_of(cb, typeof(*timer), base);
 
-		timer_setup(&cb->timer,
+		timer->dma = dma_fence_get(dma);
+		init_irq_work(&timer->work, irq_i915_sw_fence_work);
+
+		timer_setup(&timer->timer,
 			    timer_i915_sw_fence_wake, TIMER_IRQSAFE);
-		mod_timer(&cb->timer, round_jiffies_up(jiffies + timeout));
+		mod_timer(&timer->timer, round_jiffies_up(jiffies + timeout));
+
+		func = dma_i915_sw_fence_wake_timer;
 	}
 
-	ret = dma_fence_add_callback(dma, &cb->base, dma_i915_sw_fence_wake);
+	ret = dma_fence_add_callback(dma, &cb->base, func);
 	if (ret == 0) {
 		ret = 1;
 	} else {
-		dma_i915_sw_fence_wake(dma, &cb->base);
+		func(dma, &cb->base);
 		if (ret == -ENOENT) /* fence already signaled */
 			ret = 0;
 	}
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 03/14] drm/i915: Rename some shorthand lock classes
  2018-01-05 12:37 Fun with signals Chris Wilson
  2018-01-05 12:37 ` [PATCH 01/14] drm/i915: Only defer freeing of fence callback when also using the timer Chris Wilson
  2018-01-05 12:37 ` [PATCH 02/14] drm/i915/fence: Separate timeout mechanism for awaiting on dma-fences Chris Wilson
@ 2018-01-05 12:37 ` Chris Wilson
  2018-01-05 12:37 ` [PATCH 04/14] drm/i915: Use our singlethreaded wq for freeing objects Chris Wilson
                   ` (11 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-01-05 12:37 UTC (permalink / raw)
  To: intel-gfx

By default, lockdep takes the stringified variable as the name for the
lock class. Quite often, these are constructed from local variables that
are chosen for their brevity resulting in less than distinct class
names. Rename some of the worst offenders encountered in recent reports.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c             | 2 ++
 drivers/gpu/drm/i915/i915_gem_request.c     | 2 ++
 drivers/gpu/drm/i915/i915_gem_timeline.c    | 4 ++--
 drivers/gpu/drm/i915/i915_gem_userptr.c     | 2 ++
 drivers/gpu/drm/i915/i915_utils.h           | 7 +++++++
 drivers/gpu/drm/i915/intel_breadcrumbs.c    | 3 +++
 drivers/gpu/drm/i915/intel_guc_submission.c | 2 ++
 7 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ba9f67c256f4..51488da373e1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4459,6 +4459,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			  const struct drm_i915_gem_object_ops *ops)
 {
 	mutex_init(&obj->mm.lock);
+	lockdep_set_name(&obj->mm.lock, "i915_gem_object->mm");
 
 	INIT_LIST_HEAD(&obj->vma_list);
 	INIT_LIST_HEAD(&obj->lut_list);
@@ -4475,6 +4476,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 	obj->mm.madv = I915_MADV_WILLNEED;
 	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
 	mutex_init(&obj->mm.get_page.lock);
+	lockdep_set_name(&obj->mm.get_page.lock, "i915_gem_object->mm.get_page");
 
 	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 72bdc203716f..86c86a17d558 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -707,6 +707,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	GEM_BUG_ON(req->timeline == engine->timeline);
 
 	spin_lock_init(&req->lock);
+	lockdep_set_name(&req->lock, "i915_request");
+
 	dma_fence_init(&req->fence,
 		       &i915_fence_ops,
 		       &req->lock,
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c
index e9fd87604067..82da881e52ae 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.c
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.c
@@ -90,7 +90,7 @@ int i915_gem_timeline_init(struct drm_i915_private *i915,
 	static struct lock_class_key class;
 
 	return __i915_gem_timeline_init(i915, timeline, name,
-					&class, "&timeline->lock");
+					&class, "i915_user_timeline");
 }
 
 int i915_gem_timeline_init__global(struct drm_i915_private *i915)
@@ -100,7 +100,7 @@ int i915_gem_timeline_init__global(struct drm_i915_private *i915)
 	return __i915_gem_timeline_init(i915,
 					&i915->gt.global_timeline,
 					"[execution]",
-					&class, "&global_timeline->lock");
+					&class, "i915_global_timeline");
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 382a77a1097e..6b08ee481cea 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -170,6 +170,8 @@ i915_mmu_notifier_create(struct mm_struct *mm)
 		return ERR_PTR(-ENOMEM);
 
 	spin_lock_init(&mn->lock);
+	lockdep_set_name(&mn->lock, "i915_mmu_notifier");
+
 	mn->mn.ops = &i915_gem_userptr_notifier;
 	mn->objects = RB_ROOT_CACHED;
 	mn->wq = alloc_workqueue("i915-userptr-release",
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 51dbfe5bb418..dc6deba519db 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -118,6 +118,13 @@ static inline u64 ptr_to_u64(const void *ptr)
 	__idx;								\
 })
 
+#ifdef CONFIG_LOCKDEP
+#define lockdep_set_name(lock, name)					\
+	lockdep_set_class_and_name(lock, (lock)->dep_map.key, name)
+#else
+#define lockdep_set_name(lock, name)	do {} while (0)
+#endif
+
 #include <linux/list.h>
 
 static inline void __list_del_many(struct list_head *head,
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 86acac010bb8..3b8fff28a7b0 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -843,7 +843,10 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
 	struct task_struct *tsk;
 
 	spin_lock_init(&b->rb_lock);
+	lockdep_set_name(&b->rb_lock, "intel_breadcrumbs->rb");
+
 	spin_lock_init(&b->irq_lock);
+	lockdep_set_name(&b->irq_lock, "intel_breadcrumbs->irq");
 
 	timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0);
 	timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0);
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 1f3a8786bbdc..566dc571a81b 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -888,7 +888,9 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
 	client->engines = engines;
 	client->priority = priority;
 	client->doorbell_id = GUC_DOORBELL_INVALID;
+
 	spin_lock_init(&client->wq_lock);
+	lockdep_set_name(&client->wq_lock, "intel_guc_client->wq");
 
 	ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS,
 			     GFP_KERNEL);
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 04/14] drm/i915: Use our singlethreaded wq for freeing objects
  2018-01-05 12:37 Fun with signals Chris Wilson
                   ` (2 preceding siblings ...)
  2018-01-05 12:37 ` [PATCH 03/14] drm/i915: Rename some shorthand lock classes Chris Wilson
@ 2018-01-05 12:37 ` Chris Wilson
  2018-01-05 12:37 ` [PATCH 05/14] drm/i915: Only attempt to scan the requested number of shrinker slabs Chris Wilson
                   ` (10 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-01-05 12:37 UTC (permalink / raw)
  To: intel-gfx

As freeing the objects require serialisation on struct_mutex, we should
prefer to use our singlethreaded driver wq that is dedicated to work
requiring struct_mutex (hence serialised).The benefit should be less
clutter on the system wq, allowing it to make progress even when the
driver/struct_mutex is heavily contended.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 51488da373e1..a227cc7ea952 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4734,7 +4734,7 @@ static void __i915_gem_free_object_rcu(struct rcu_head *head)
 	 * detour through a worker.
 	 */
 	if (llist_add(&obj->freed, &i915->mm.free_list))
-		schedule_work(&i915->mm.free_work);
+		queue_work(i915->wq, &i915->mm.free_work);
 }
 
 void i915_gem_free_object(struct drm_gem_object *gem_obj)
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 05/14] drm/i915: Only attempt to scan the requested number of shrinker slabs
  2018-01-05 12:37 Fun with signals Chris Wilson
                   ` (3 preceding siblings ...)
  2018-01-05 12:37 ` [PATCH 04/14] drm/i915: Use our singlethreaded wq for freeing objects Chris Wilson
@ 2018-01-05 12:37 ` Chris Wilson
  2018-01-05 12:37 ` [PATCH 06/14] drm/i915: Move i915_gem_retire_work_handler Chris Wilson
                   ` (9 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-01-05 12:37 UTC (permalink / raw)
  To: intel-gfx

Since commit 4e773c3a8a69 ("drm/i915: Wire up shrinkctl->nr_scanned"),
we track the number of objects we scan and do not wish to exceed that as
it will overly penalise our own slabs under mempressure. Given that we
now know the target number of objects to scan, use that as our guide for
deciding to shrink as opposed to the number of objects we manage to
shrink (which doesn't correspond to the numbers we report to shrinkctl).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 9029ed04879c..0e158f9287c4 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -363,13 +363,13 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 				I915_SHRINK_BOUND |
 				I915_SHRINK_UNBOUND |
 				I915_SHRINK_PURGEABLE);
-	if (freed < sc->nr_to_scan)
+	if (sc->nr_scanned < sc->nr_to_scan)
 		freed += i915_gem_shrink(i915,
 					 sc->nr_to_scan - sc->nr_scanned,
 					 &sc->nr_scanned,
 					 I915_SHRINK_BOUND |
 					 I915_SHRINK_UNBOUND);
-	if (freed < sc->nr_to_scan && current_is_kswapd()) {
+	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
 		intel_runtime_pm_get(i915);
 		freed += i915_gem_shrink(i915,
 					 sc->nr_to_scan - sc->nr_scanned,
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 06/14] drm/i915: Move i915_gem_retire_work_handler
  2018-01-05 12:37 Fun with signals Chris Wilson
                   ` (4 preceding siblings ...)
  2018-01-05 12:37 ` [PATCH 05/14] drm/i915: Only attempt to scan the requested number of shrinker slabs Chris Wilson
@ 2018-01-05 12:37 ` Chris Wilson
  2018-01-05 12:37 ` [PATCH 07/14] drm/i915: Shrink the GEM kmem_caches upon idling Chris Wilson
                   ` (8 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-01-05 12:37 UTC (permalink / raw)
  To: intel-gfx

In preparation for the next patch, move i915_gem_retire_work_handler()
later to avoid a forward declaration.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 228 ++++++++++++++++++++--------------------
 1 file changed, 114 insertions(+), 114 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a227cc7ea952..5ef0a18bf52c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3310,120 +3310,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	return true;
 }
 
-static void
-i915_gem_retire_work_handler(struct work_struct *work)
-{
-	struct drm_i915_private *dev_priv =
-		container_of(work, typeof(*dev_priv), gt.retire_work.work);
-	struct drm_device *dev = &dev_priv->drm;
-
-	/* Come back later if the device is busy... */
-	if (mutex_trylock(&dev->struct_mutex)) {
-		i915_gem_retire_requests(dev_priv);
-		mutex_unlock(&dev->struct_mutex);
-	}
-
-	/* Keep the retire handler running until we are finally idle.
-	 * We do not need to do this test under locking as in the worst-case
-	 * we queue the retire worker once too often.
-	 */
-	if (READ_ONCE(dev_priv->gt.awake)) {
-		i915_queue_hangcheck(dev_priv);
-		queue_delayed_work(dev_priv->wq,
-				   &dev_priv->gt.retire_work,
-				   round_jiffies_up_relative(HZ));
-	}
-}
-
-static inline bool
-new_requests_since_last_retire(const struct drm_i915_private *i915)
-{
-	return (READ_ONCE(i915->gt.active_requests) ||
-		work_pending(&i915->gt.idle_work.work));
-}
-
-static void
-i915_gem_idle_work_handler(struct work_struct *work)
-{
-	struct drm_i915_private *dev_priv =
-		container_of(work, typeof(*dev_priv), gt.idle_work.work);
-	bool rearm_hangcheck;
-	ktime_t end;
-
-	if (!READ_ONCE(dev_priv->gt.awake))
-		return;
-
-	/*
-	 * Wait for last execlists context complete, but bail out in case a
-	 * new request is submitted.
-	 */
-	end = ktime_add_ms(ktime_get(), I915_IDLE_ENGINES_TIMEOUT);
-	do {
-		if (new_requests_since_last_retire(dev_priv))
-			return;
-
-		if (intel_engines_are_idle(dev_priv))
-			break;
-
-		usleep_range(100, 500);
-	} while (ktime_before(ktime_get(), end));
-
-	rearm_hangcheck =
-		cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
-
-	if (!mutex_trylock(&dev_priv->drm.struct_mutex)) {
-		/* Currently busy, come back later */
-		mod_delayed_work(dev_priv->wq,
-				 &dev_priv->gt.idle_work,
-				 msecs_to_jiffies(50));
-		goto out_rearm;
-	}
-
-	/*
-	 * New request retired after this work handler started, extend active
-	 * period until next instance of the work.
-	 */
-	if (new_requests_since_last_retire(dev_priv))
-		goto out_unlock;
-
-	/*
-	 * Be paranoid and flush a concurrent interrupt to make sure
-	 * we don't reactivate any irq tasklets after parking.
-	 *
-	 * FIXME: Note that even though we have waited for execlists to be idle,
-	 * there may still be an in-flight interrupt even though the CSB
-	 * is now empty. synchronize_irq() makes sure that a residual interrupt
-	 * is completed before we continue, but it doesn't prevent the HW from
-	 * raising a spurious interrupt later. To complete the shield we should
-	 * coordinate disabling the CS irq with flushing the interrupts.
-	 */
-	synchronize_irq(dev_priv->drm.irq);
-
-	intel_engines_park(dev_priv);
-	i915_gem_timelines_park(dev_priv);
-
-	i915_pmu_gt_parked(dev_priv);
-
-	GEM_BUG_ON(!dev_priv->gt.awake);
-	dev_priv->gt.awake = false;
-	rearm_hangcheck = false;
-
-	if (INTEL_GEN(dev_priv) >= 6)
-		gen6_rps_idle(dev_priv);
-
-	intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
-
-	intel_runtime_pm_put(dev_priv);
-out_unlock:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-
-out_rearm:
-	if (rearm_hangcheck) {
-		GEM_BUG_ON(!dev_priv->gt.awake);
-		i915_queue_hangcheck(dev_priv);
-	}
-}
-
 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 {
 	struct drm_i915_private *i915 = to_i915(gem->dev);
@@ -4800,6 +4686,120 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	}
 }
 
+static void
+i915_gem_retire_work_handler(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(work, typeof(*dev_priv), gt.retire_work.work);
+	struct drm_device *dev = &dev_priv->drm;
+
+	/* Come back later if the device is busy... */
+	if (mutex_trylock(&dev->struct_mutex)) {
+		i915_gem_retire_requests(dev_priv);
+		mutex_unlock(&dev->struct_mutex);
+	}
+
+	/* Keep the retire handler running until we are finally idle.
+	 * We do not need to do this test under locking as in the worst-case
+	 * we queue the retire worker once too often.
+	 */
+	if (READ_ONCE(dev_priv->gt.awake)) {
+		i915_queue_hangcheck(dev_priv);
+		queue_delayed_work(dev_priv->wq,
+				   &dev_priv->gt.retire_work,
+				   round_jiffies_up_relative(HZ));
+	}
+}
+
+static inline bool
+new_requests_since_last_retire(const struct drm_i915_private *i915)
+{
+	return (READ_ONCE(i915->gt.active_requests) ||
+		work_pending(&i915->gt.idle_work.work));
+}
+
+static void
+i915_gem_idle_work_handler(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(work, typeof(*dev_priv), gt.idle_work.work);
+	bool rearm_hangcheck;
+	ktime_t end;
+
+	if (!READ_ONCE(dev_priv->gt.awake))
+		return;
+
+	/*
+	 * Wait for last execlists context complete, but bail out in case a
+	 * new request is submitted.
+	 */
+	end = ktime_add_ms(ktime_get(), I915_IDLE_ENGINES_TIMEOUT);
+	do {
+		if (new_requests_since_last_retire(dev_priv))
+			return;
+
+		if (intel_engines_are_idle(dev_priv))
+			break;
+
+		usleep_range(100, 500);
+	} while (ktime_before(ktime_get(), end));
+
+	rearm_hangcheck =
+		cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
+
+	if (!mutex_trylock(&dev_priv->drm.struct_mutex)) {
+		/* Currently busy, come back later */
+		mod_delayed_work(dev_priv->wq,
+				 &dev_priv->gt.idle_work,
+				 msecs_to_jiffies(50));
+		goto out_rearm;
+	}
+
+	/*
+	 * New request retired after this work handler started, extend active
+	 * period until next instance of the work.
+	 */
+	if (new_requests_since_last_retire(dev_priv))
+		goto out_unlock;
+
+	/*
+	 * Be paranoid and flush a concurrent interrupt to make sure
+	 * we don't reactivate any irq tasklets after parking.
+	 *
+	 * FIXME: Note that even though we have waited for execlists to be idle,
+	 * there may still be an in-flight interrupt even though the CSB
+	 * is now empty. synchronize_irq() makes sure that a residual interrupt
+	 * is completed before we continue, but it doesn't prevent the HW from
+	 * raising a spurious interrupt later. To complete the shield we should
+	 * coordinate disabling the CS irq with flushing the interrupts.
+	 */
+	synchronize_irq(dev_priv->drm.irq);
+
+	intel_engines_park(dev_priv);
+	i915_gem_timelines_park(dev_priv);
+
+	i915_pmu_gt_parked(dev_priv);
+
+	GEM_BUG_ON(!dev_priv->gt.awake);
+	dev_priv->gt.awake = false;
+	rearm_hangcheck = false;
+
+	if (INTEL_GEN(dev_priv) >= 6)
+		gen6_rps_idle(dev_priv);
+
+	intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
+
+	intel_runtime_pm_put(dev_priv);
+out_unlock:
+	mutex_unlock(&dev_priv->drm.struct_mutex);
+
+out_rearm:
+	if (rearm_hangcheck) {
+		GEM_BUG_ON(!dev_priv->gt.awake);
+		i915_queue_hangcheck(dev_priv);
+	}
+}
+
 int i915_gem_suspend(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 07/14] drm/i915: Shrink the GEM kmem_caches upon idling
  2018-01-05 12:37 Fun with signals Chris Wilson
                   ` (5 preceding siblings ...)
  2018-01-05 12:37 ` [PATCH 06/14] drm/i915: Move i915_gem_retire_work_handler Chris Wilson
@ 2018-01-05 12:37 ` Chris Wilson
  2018-01-05 12:37 ` [PATCH 08/14] drm/i915: Shrink the request kmem_cache on allocation error Chris Wilson
                   ` (7 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-01-05 12:37 UTC (permalink / raw)
  To: intel-gfx

When we finally decide the gpu is idle, that is a good time to shrink
our kmem_caches.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5ef0a18bf52c..8168c7c242a5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4711,6 +4711,21 @@ i915_gem_retire_work_handler(struct work_struct *work)
 	}
 }
 
+static void shrink_caches(struct drm_i915_private *i915)
+{
+	/*
+	 * kmem_cache_shrink() discards empty slabs and reorders partially
+	 * filled slabs to prioritise allocating from the mostly full slabs,
+	 * with the aim of reducing fragmentation.
+	 */
+	kmem_cache_shrink(i915->priorities);
+	kmem_cache_shrink(i915->dependencies);
+	kmem_cache_shrink(i915->requests);
+	kmem_cache_shrink(i915->luts);
+	kmem_cache_shrink(i915->vmas);
+	kmem_cache_shrink(i915->objects);
+}
+
 static inline bool
 new_requests_since_last_retire(const struct drm_i915_private *i915)
 {
@@ -4798,6 +4813,13 @@ i915_gem_idle_work_handler(struct work_struct *work)
 		GEM_BUG_ON(!dev_priv->gt.awake);
 		i915_queue_hangcheck(dev_priv);
 	}
+
+	rcu_barrier();
+
+	if (!new_requests_since_last_retire(dev_priv)) {
+		__i915_gem_free_work(&dev_priv->mm.free_work);
+		shrink_caches(dev_priv);
+	}
 }
 
 int i915_gem_suspend(struct drm_i915_private *dev_priv)
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 08/14] drm/i915: Shrink the request kmem_cache on allocation error
  2018-01-05 12:37 Fun with signals Chris Wilson
                   ` (6 preceding siblings ...)
  2018-01-05 12:37 ` [PATCH 07/14] drm/i915: Shrink the GEM kmem_caches upon idling Chris Wilson
@ 2018-01-05 12:37 ` Chris Wilson
  2018-01-05 12:37 ` [PATCH 09/14] drm/i915: Trim the retired request queue after submitting Chris Wilson
                   ` (6 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-01-05 12:37 UTC (permalink / raw)
  To: intel-gfx

If we fail to allocate a new request, make sure we recover the pages
that are in the process of being freed by inserting an RCU barrier.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 86c86a17d558..94429405e776 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -696,6 +696,9 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		if (ret)
 			goto err_unreserve;
 
+		kmem_cache_shrink(dev_priv->requests);
+		rcu_barrier();
+
 		req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
 		if (!req) {
 			ret = -ENOMEM;
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 09/14] drm/i915: Trim the retired request queue after submitting
  2018-01-05 12:37 Fun with signals Chris Wilson
                   ` (7 preceding siblings ...)
  2018-01-05 12:37 ` [PATCH 08/14] drm/i915: Shrink the request kmem_cache on allocation error Chris Wilson
@ 2018-01-05 12:37 ` Chris Wilson
  2018-01-05 12:37 ` [PATCH 10/14] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
                   ` (5 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-01-05 12:37 UTC (permalink / raw)
  To: intel-gfx

If we submit a request and see that the previous request on this
timeline was already signaled, we first do not need to add the
dependency tracker for that completed request and secondly we know that
we there is then a large backlog in retiring requests affecting this
timeline. Given that we just submitted more work to the HW, now would be
a good time to catch up on those retirements.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 94429405e776..4107ee4afd83 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1021,7 +1021,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 
 	prev = i915_gem_active_raw(&timeline->last_request,
 				   &request->i915->drm.struct_mutex);
-	if (prev) {
+	if (prev && !i915_gem_request_completed(prev)) {
 		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
 					     &request->submitq);
 		if (engine->schedule)
@@ -1057,6 +1057,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	local_bh_disable();
 	i915_sw_fence_commit(&request->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
+
+	if (prev && i915_gem_request_completed(prev))
+		i915_gem_request_retire_upto(prev);
 }
 
 static unsigned long local_clock_us(unsigned int *cpu)
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 10/14] drm/i915/breadcrumbs: Drop request reference for the signaler thread
  2018-01-05 12:37 Fun with signals Chris Wilson
                   ` (8 preceding siblings ...)
  2018-01-05 12:37 ` [PATCH 09/14] drm/i915: Trim the retired request queue after submitting Chris Wilson
@ 2018-01-05 12:37 ` Chris Wilson
  2018-01-05 12:37 ` [PATCH 11/14] drm/i915: Reduce spinlock hold time during notify_ring() interrupt Chris Wilson
                   ` (4 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-01-05 12:37 UTC (permalink / raw)
  To: intel-gfx

If we remember to cancel the signaler on a request when retiring it
(after we know that the request has been signaled), we do not need to
carry an additional request in the signaler itself. This prevents an
issue whereby the signaler threads may be delayed and hold on to
thousands of request references, causing severe memory fragmentation and
premature oom (most noticeable on 32b snb due to the limited GFP_KERNEL
and frequent use of inter-engine fences).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c  |   6 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 149 +++++++++++++++++--------------
 2 files changed, 85 insertions(+), 70 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 4107ee4afd83..4f4e4f3ff56f 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -441,7 +441,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	spin_lock_irq(&request->lock);
 	if (request->waitboost)
 		atomic_dec(&request->i915->gt_pm.rps.num_waiters);
-	dma_fence_signal_locked(&request->fence);
+	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags))
+		dma_fence_signal_locked(&request->fence);
+	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
+		intel_engine_cancel_signaling(request);
 	spin_unlock_irq(&request->lock);
 
 	i915_priotree_fini(request->i915, &request->priotree);
@@ -732,6 +735,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 
 	/* No zalloc, must clear what we need by hand */
 	req->global_seqno = 0;
+	req->signaling.wait.seqno = 0;
 	req->file_priv = NULL;
 	req->batch = NULL;
 	req->capture_list = NULL;
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 3b8fff28a7b0..4ee495f7476f 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -235,7 +235,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
 	struct intel_wait *wait, *n;
 
 	if (!b->irq_armed)
-		goto wakeup_signaler;
+		return;
 
 	/*
 	 * We only disarm the irq when we are idle (all requests completed),
@@ -260,14 +260,6 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
 	b->waiters = RB_ROOT;
 
 	spin_unlock_irq(&b->rb_lock);
-
-	/*
-	 * The signaling thread may be asleep holding a reference to a request,
-	 * that had its signaling cancelled prior to being preempted. We need
-	 * to kick the signaler, just in case, to release any such reference.
-	 */
-wakeup_signaler:
-	wake_up_process(b->signaler);
 }
 
 static bool use_fake_irq(const struct intel_breadcrumbs *b)
@@ -644,6 +636,62 @@ static void signaler_set_rtpriority(void)
 	 sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
 }
 
+static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
+					 struct drm_i915_gem_request *request)
+{
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+	lockdep_assert_held(&b->rb_lock);
+
+	/*
+	 * Wake up all other completed waiters and select the
+	 * next bottom-half for the next user interrupt.
+	 */
+	__intel_engine_remove_wait(engine, &request->signaling.wait);
+
+	/*
+	 * Find the next oldest signal. Note that as we have
+	 * not been holding the lock, another client may
+	 * have installed an even older signal than the one
+	 * we just completed - so double check we are still
+	 * the oldest before picking the next one.
+	 */
+	if (request->signaling.wait.seqno) {
+		if (request == rcu_access_pointer(b->first_signal)) {
+			struct rb_node *rb = rb_next(&request->signaling.node);
+			rcu_assign_pointer(b->first_signal,
+					   rb ? to_signaler(rb) : NULL);
+		}
+
+		rb_erase(&request->signaling.node, &b->signals);
+		request->signaling.wait.seqno = 0;
+	}
+}
+
+static struct drm_i915_gem_request *first_signal(struct intel_breadcrumbs *b)
+{
+	/*
+	 * See the big warnings for i915_gem_active_get_rcu() and similarly
+	 * for dma_fence_get_rcu_safe() that explain the intricacies involved
+	 * here with defeating CPU/compiler speculation and enforcing
+	 * the required memory barriers.
+	 */
+	do {
+		struct drm_i915_gem_request *request;
+
+		request = rcu_dereference(b->first_signal);
+		if (request)
+			request = i915_gem_request_get_rcu(request);
+
+		barrier();
+
+		if (!request || request == rcu_access_pointer(b->first_signal))
+			return rcu_pointer_handoff(request);
+
+		i915_gem_request_put(request);
+	} while (1);
+}
+
 static int intel_breadcrumbs_signaler(void *arg)
 {
 	struct intel_engine_cs *engine = arg;
@@ -667,41 +715,21 @@ static int intel_breadcrumbs_signaler(void *arg)
 		 * a new client.
 		 */
 		rcu_read_lock();
-		request = rcu_dereference(b->first_signal);
-		if (request)
-			request = i915_gem_request_get_rcu(request);
+		request = first_signal(b);
 		rcu_read_unlock();
 		if (signal_complete(request)) {
-			local_bh_disable();
-			dma_fence_signal(&request->fence);
-			local_bh_enable(); /* kick start the tasklets */
-
-			spin_lock_irq(&b->rb_lock);
-
-			/* Wake up all other completed waiters and select the
-			 * next bottom-half for the next user interrupt.
-			 */
-			__intel_engine_remove_wait(engine,
-						   &request->signaling.wait);
-
-			/* Find the next oldest signal. Note that as we have
-			 * not been holding the lock, another client may
-			 * have installed an even older signal than the one
-			 * we just completed - so double check we are still
-			 * the oldest before picking the next one.
-			 */
-			if (request == rcu_access_pointer(b->first_signal)) {
-				struct rb_node *rb =
-					rb_next(&request->signaling.node);
-				rcu_assign_pointer(b->first_signal,
-						   rb ? to_signaler(rb) : NULL);
+			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				      &request->fence.flags)) {
+				local_bh_disable();
+				dma_fence_signal(&request->fence);
+				local_bh_enable(); /* kick start the tasklets */
 			}
-			rb_erase(&request->signaling.node, &b->signals);
-			RB_CLEAR_NODE(&request->signaling.node);
-
-			spin_unlock_irq(&b->rb_lock);
 
-			i915_gem_request_put(request);
+			if (request->signaling.wait.seqno) {
+				spin_lock_irq(&b->rb_lock);
+				__intel_engine_remove_signal(engine, request);
+				spin_unlock_irq(&b->rb_lock);
+			}
 
 			/* If the engine is saturated we may be continually
 			 * processing completed requests. This angers the
@@ -712,19 +740,17 @@ static int intel_breadcrumbs_signaler(void *arg)
 			 */
 			do_schedule = need_resched();
 		}
+		i915_gem_request_put(request);
 
 		if (unlikely(do_schedule)) {
 			if (kthread_should_park())
 				kthread_parkme();
 
-			if (unlikely(kthread_should_stop())) {
-				i915_gem_request_put(request);
+			if (unlikely(kthread_should_stop()))
 				break;
-			}
 
 			schedule();
 		}
-		i915_gem_request_put(request);
 	} while (1);
 	__set_current_state(TASK_RUNNING);
 
@@ -753,12 +779,12 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 	if (!seqno)
 		return;
 
+	spin_lock(&b->rb_lock);
+
+	GEM_BUG_ON(request->signaling.wait.seqno);
 	request->signaling.wait.tsk = b->signaler;
 	request->signaling.wait.request = request;
 	request->signaling.wait.seqno = seqno;
-	i915_gem_request_get(request);
-
-	spin_lock(&b->rb_lock);
 
 	/* First add ourselves into the list of waiters, but register our
 	 * bottom-half as the signaller thread. As per usual, only the oldest
@@ -797,7 +823,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 			rcu_assign_pointer(b->first_signal, request);
 	} else {
 		__intel_engine_remove_wait(engine, &request->signaling.wait);
-		i915_gem_request_put(request);
+		request->signaling.wait.seqno = 0;
 		wakeup = false;
 	}
 
@@ -809,32 +835,17 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 
 void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
 {
-	struct intel_engine_cs *engine = request->engine;
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
 	GEM_BUG_ON(!irqs_disabled());
 	lockdep_assert_held(&request->lock);
-	GEM_BUG_ON(!request->signaling.wait.seqno);
 
-	spin_lock(&b->rb_lock);
+	if (request->signaling.wait.seqno) {
+		struct intel_engine_cs *engine = request->engine;
+		struct intel_breadcrumbs *b = &engine->breadcrumbs;
 
-	if (!RB_EMPTY_NODE(&request->signaling.node)) {
-		if (request == rcu_access_pointer(b->first_signal)) {
-			struct rb_node *rb =
-				rb_next(&request->signaling.node);
-			rcu_assign_pointer(b->first_signal,
-					   rb ? to_signaler(rb) : NULL);
-		}
-		rb_erase(&request->signaling.node, &b->signals);
-		RB_CLEAR_NODE(&request->signaling.node);
-		i915_gem_request_put(request);
+		spin_lock(&b->rb_lock);
+		__intel_engine_remove_signal(engine, request);
+		spin_unlock(&b->rb_lock);
 	}
-
-	__intel_engine_remove_wait(engine, &request->signaling.wait);
-
-	spin_unlock(&b->rb_lock);
-
-	request->signaling.wait.seqno = 0;
 }
 
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 11/14] drm/i915: Reduce spinlock hold time during notify_ring() interrupt
  2018-01-05 12:37 Fun with signals Chris Wilson
                   ` (9 preceding siblings ...)
  2018-01-05 12:37 ` [PATCH 10/14] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
@ 2018-01-05 12:37 ` Chris Wilson
  2018-01-05 12:37 ` [PATCH 12/14] drm/i915: Move the irq_counter inside the spinlock Chris Wilson
                   ` (3 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-01-05 12:37 UTC (permalink / raw)
  To: intel-gfx

By taking advantage of the RCU protection of the task struct, we can find
the appropriate signaler under the spinlock and then release the spinlock
before waking the task and signaling the fence.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_irq.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 3517c6548e2c..0b272501b738 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1065,21 +1065,24 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
 
 static void notify_ring(struct intel_engine_cs *engine)
 {
+	const u32 seqno = intel_engine_get_seqno(engine);
 	struct drm_i915_gem_request *rq = NULL;
+	struct task_struct *tsk = NULL;
 	struct intel_wait *wait;
 
-	if (!engine->breadcrumbs.irq_armed)
+	if (unlikely(!engine->breadcrumbs.irq_armed))
 		return;
 
 	atomic_inc(&engine->irq_count);
 	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 
+	rcu_read_lock();
+
 	spin_lock(&engine->breadcrumbs.irq_lock);
 	wait = engine->breadcrumbs.irq_wait;
 	if (wait) {
-		bool wakeup = engine->irq_seqno_barrier;
-
-		/* We use a callback from the dma-fence to submit
+		/*
+		 * We use a callback from the dma-fence to submit
 		 * requests after waiting on our own requests. To
 		 * ensure minimum delay in queuing the next request to
 		 * hardware, signal the fence now rather than wait for
@@ -1090,19 +1093,20 @@ static void notify_ring(struct intel_engine_cs *engine)
 		 * and to handle coalescing of multiple seqno updates
 		 * and many waiters.
 		 */
-		if (i915_seqno_passed(intel_engine_get_seqno(engine),
-				      wait->seqno)) {
+		if (i915_seqno_passed(seqno, wait->seqno)) {
 			struct drm_i915_gem_request *waiter = wait->request;
 
-			wakeup = true;
 			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 				      &waiter->fence.flags) &&
 			    intel_wait_check_request(wait, waiter))
 				rq = i915_gem_request_get(waiter);
-		}
 
-		if (wakeup)
-			wake_up_process(wait->tsk);
+			tsk = wait->tsk;
+		} else {
+			if (engine->irq_seqno_barrier &&
+			    i915_seqno_passed(seqno, wait->seqno - 1))
+				tsk = wait->tsk;
+		}
 	} else {
 		if (engine->breadcrumbs.irq_armed)
 			__intel_engine_disarm_breadcrumbs(engine);
@@ -1114,6 +1118,11 @@ static void notify_ring(struct intel_engine_cs *engine)
 		i915_gem_request_put(rq);
 	}
 
+	if (tsk && tsk->state != TASK_RUNNING)
+		wake_up_process(tsk);
+
+	rcu_read_unlock();
+
 	trace_intel_engine_notify(engine, wait);
 }
 
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 12/14] drm/i915: Move the irq_counter inside the spinlock
  2018-01-05 12:37 Fun with signals Chris Wilson
                   ` (10 preceding siblings ...)
  2018-01-05 12:37 ` [PATCH 11/14] drm/i915: Reduce spinlock hold time during notify_ring() interrupt Chris Wilson
@ 2018-01-05 12:37 ` Chris Wilson
  2018-01-05 12:37 ` [PATCH 13/14] drm/i915: Only signal from interrupt when requested Chris Wilson
                   ` (2 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-01-05 12:37 UTC (permalink / raw)
  To: intel-gfx

Rather than have multiple locked instructions inside the notify_ring()
irq handler, move them inside the spinlock and reduce their intrinsic
locking.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c  |  4 ++--
 drivers/gpu/drm/i915/i915_irq.c          |  6 +++---
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 13 ++++++++-----
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  2 +-
 4 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 4f4e4f3ff56f..fef54d9de049 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1130,7 +1130,7 @@ static bool __i915_spin_request(const struct drm_i915_gem_request *req,
 	 * takes to sleep on a request, on the order of a microsecond.
 	 */
 
-	irq = atomic_read(&engine->irq_count);
+	irq = READ_ONCE(engine->breadcrumbs.irq_count);
 	timeout_us += local_clock_us(&cpu);
 	do {
 		if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno))
@@ -1141,7 +1141,7 @@ static bool __i915_spin_request(const struct drm_i915_gem_request *req,
 		 * assume we won't see one in the near future but require
 		 * the engine->seqno_barrier() to fixup coherency.
 		 */
-		if (atomic_read(&engine->irq_count) != irq)
+		if (READ_ONCE(engine->breadcrumbs.irq_count) != irq)
 			break;
 
 		if (signal_pending_state(state, current))
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 0b272501b738..e5f76d580010 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1073,9 +1073,6 @@ static void notify_ring(struct intel_engine_cs *engine)
 	if (unlikely(!engine->breadcrumbs.irq_armed))
 		return;
 
-	atomic_inc(&engine->irq_count);
-	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
-
 	rcu_read_lock();
 
 	spin_lock(&engine->breadcrumbs.irq_lock);
@@ -1107,6 +1104,9 @@ static void notify_ring(struct intel_engine_cs *engine)
 			    i915_seqno_passed(seqno, wait->seqno - 1))
 				tsk = wait->tsk;
 		}
+
+		engine->breadcrumbs.irq_count++;
+		__set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 	} else {
 		if (engine->breadcrumbs.irq_armed)
 			__intel_engine_disarm_breadcrumbs(engine);
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 4ee495f7476f..d5b40208690e 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -98,12 +98,14 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t)
 	struct intel_engine_cs *engine =
 		from_timer(engine, t, breadcrumbs.hangcheck);
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	unsigned int irq_count;
 
 	if (!b->irq_armed)
 		return;
 
-	if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) {
-		b->hangcheck_interrupts = atomic_read(&engine->irq_count);
+	irq_count = READ_ONCE(b->irq_count);
+	if (b->hangcheck_interrupts != irq_count) {
+		b->hangcheck_interrupts = irq_count;
 		mod_timer(&b->hangcheck, wait_timeout());
 		return;
 	}
@@ -176,7 +178,7 @@ static void irq_enable(struct intel_engine_cs *engine)
 	 * we still need to force the barrier before reading the seqno,
 	 * just in case.
 	 */
-	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
+	__set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 
 	/* Caller disables interrupts */
 	spin_lock(&engine->i915->irq_lock);
@@ -270,13 +272,14 @@ static bool use_fake_irq(const struct intel_breadcrumbs *b)
 	if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings))
 		return false;
 
-	/* Only start with the heavy weight fake irq timer if we have not
+	/*
+	 * Only start with the heavy weight fake irq timer if we have not
 	 * seen any interrupts since enabling it the first time. If the
 	 * interrupts are still arriving, it means we made a mistake in our
 	 * engine->seqno_barrier(), a timing error that should be transient
 	 * and unlikely to reoccur.
 	 */
-	return atomic_read(&engine->irq_count) == b->hangcheck_interrupts;
+	return READ_ONCE(b->irq_count) == b->hangcheck_interrupts;
 }
 
 static void enable_fake_irq(struct intel_breadcrumbs *b)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c5ff203e42d6..f406d0ff4612 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -305,7 +305,6 @@ struct intel_engine_cs {
 
 	struct drm_i915_gem_object *default_state;
 
-	atomic_t irq_count;
 	unsigned long irq_posted;
 #define ENGINE_IRQ_BREADCRUMB 0
 #define ENGINE_IRQ_EXECLIST 1
@@ -340,6 +339,7 @@ struct intel_engine_cs {
 
 		unsigned int hangcheck_interrupts;
 		unsigned int irq_enabled;
+		unsigned int irq_count;
 
 		bool irq_armed : 1;
 		I915_SELFTEST_DECLARE(bool mock : 1);
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 13/14] drm/i915: Only signal from interrupt when requested
  2018-01-05 12:37 Fun with signals Chris Wilson
                   ` (11 preceding siblings ...)
  2018-01-05 12:37 ` [PATCH 12/14] drm/i915: Move the irq_counter inside the spinlock Chris Wilson
@ 2018-01-05 12:37 ` Chris Wilson
  2018-01-05 12:37 ` [PATCH 14/14] drm/i915/breadcrumbs: Reduce signaler rbtree to a sorted list Chris Wilson
  2018-01-05 13:03 ` ✓ Fi.CI.BAT: success for series starting with [01/14] drm/i915: Only defer freeing of fence callback when also using the timer Patchwork
  14 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-01-05 12:37 UTC (permalink / raw)
  To: intel-gfx

Avoid calling dma_fence_signal() from inside the interrupt if we haven't
enabled signaling on the request.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 2 +-
 drivers/gpu/drm/i915/i915_irq.c         | 3 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.h | 5 ++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index fef54d9de049..18b9f23dafb9 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1216,7 +1216,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	if (flags & I915_WAIT_LOCKED)
 		add_wait_queue(errq, &reset);
 
-	intel_wait_init(&wait, req);
+	intel_wait_init(&wait);
 
 restart:
 	do {
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index e5f76d580010..ea290f102784 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1093,7 +1093,8 @@ static void notify_ring(struct intel_engine_cs *engine)
 		if (i915_seqno_passed(seqno, wait->seqno)) {
 			struct drm_i915_gem_request *waiter = wait->request;
 
-			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+			if (waiter &&
+			    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 				      &waiter->fence.flags) &&
 			    intel_wait_check_request(wait, waiter))
 				rq = i915_gem_request_get(waiter);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f406d0ff4612..cea2092d25d9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -874,11 +874,10 @@ static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
 /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
 
-static inline void intel_wait_init(struct intel_wait *wait,
-				   struct drm_i915_gem_request *rq)
+static inline void intel_wait_init(struct intel_wait *wait)
 {
 	wait->tsk = current;
-	wait->request = rq;
+	wait->request = NULL;
 }
 
 static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno)
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 14/14] drm/i915/breadcrumbs: Reduce signaler rbtree to a sorted list
  2018-01-05 12:37 Fun with signals Chris Wilson
                   ` (12 preceding siblings ...)
  2018-01-05 12:37 ` [PATCH 13/14] drm/i915: Only signal from interrupt when requested Chris Wilson
@ 2018-01-05 12:37 ` Chris Wilson
  2018-01-05 13:03 ` ✓ Fi.CI.BAT: success for series starting with [01/14] drm/i915: Only defer freeing of fence callback when also using the timer Patchwork
  14 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-01-05 12:37 UTC (permalink / raw)
  To: intel-gfx

The goal here is to try and reduce the latency of signaling additional
requests following the wakeup from interrupt by reducing the list of
to-be-signaled requests from an rbtree to a sorted linked list. The
original choice of using an rbtree was to facilitate random insertions
of request into the signaler while maintaining a sorted list. However,
if we assume that most new requests are added when they are submitted,
we see those new requests in execution order making a insertion sort
fast, and the reduction in overhead of each signaler iteration
significant.

Since commit 56299fb7d904 ("drm/i915: Signal first fence from irq handler
if complete"), we signal most fences directly from notify_ring() in the
interrupt handler greatly reducing the amount of work that actually
needs to be done by the signaler kthread. All the thread is then
required to do is operate as the bottom-half, cleaning up after the
interrupt handler and preparing the next waiter. This includes signaling
all later completed fences in a saturated system, but on a mostly idle
system we only have to rebuild the wait rbtree in time for the next
interrupt. With this de-emphasis of the signaler's role, we want to
rejig it's datastructures to reduce the amount of work we require to
both setup the signal tree and maintain it on every interrupt.

References: 56299fb7d904 ("drm/i915: Signal first fence from irq handler if complete")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.h  |   2 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 277 +++++++++++++------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h  |   4 +-
 3 files changed, 116 insertions(+), 167 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 6c607f8dbf92..a2dd3d71c0f8 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -44,8 +44,8 @@ struct intel_wait {
 };
 
 struct intel_signal_node {
-	struct rb_node node;
 	struct intel_wait wait;
+	struct list_head link;
 };
 
 struct i915_dependency {
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index d5b40208690e..e978f6828c04 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -350,7 +350,8 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
 	lockdep_assert_held(&b->rb_lock);
 	GEM_BUG_ON(b->irq_wait == wait);
 
-	/* This request is completed, so remove it from the tree, mark it as
+	/*
+	 * This request is completed, so remove it from the tree, mark it as
 	 * complete, and *then* wake up the associated task. N.B. when the
 	 * task wakes up, it will find the empty rb_node, discern that it
 	 * has already been removed from the tree and skip the serialisation
@@ -361,7 +362,8 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
 	rb_erase(&wait->node, &b->waiters);
 	RB_CLEAR_NODE(&wait->node);
 
-	wake_up_process(wait->tsk); /* implicit smp_wmb() */
+	if (wait->tsk->state != TASK_RUNNING)
+		wake_up_process(wait->tsk); /* implicit smp_wmb() */
 }
 
 static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine,
@@ -602,36 +604,6 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
 	spin_unlock_irq(&b->rb_lock);
 }
 
-static bool signal_valid(const struct drm_i915_gem_request *request)
-{
-	return intel_wait_check_request(&request->signaling.wait, request);
-}
-
-static bool signal_complete(const struct drm_i915_gem_request *request)
-{
-	if (!request)
-		return false;
-
-	/* If another process served as the bottom-half it may have already
-	 * signalled that this wait is already completed.
-	 */
-	if (intel_wait_complete(&request->signaling.wait))
-		return signal_valid(request);
-
-	/* Carefully check if the request is complete, giving time for the
-	 * seqno to be visible or if the GPU hung.
-	 */
-	if (__i915_request_irq_complete(request))
-		return true;
-
-	return false;
-}
-
-static struct drm_i915_gem_request *to_signaler(struct rb_node *rb)
-{
-	return rb_entry(rb, struct drm_i915_gem_request, signaling.node);
-}
-
 static void signaler_set_rtpriority(void)
 {
 	 struct sched_param param = { .sched_priority = 1 };
@@ -639,77 +611,25 @@ static void signaler_set_rtpriority(void)
 	 sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
 }
 
-static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
-					 struct drm_i915_gem_request *request)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	lockdep_assert_held(&b->rb_lock);
-
-	/*
-	 * Wake up all other completed waiters and select the
-	 * next bottom-half for the next user interrupt.
-	 */
-	__intel_engine_remove_wait(engine, &request->signaling.wait);
-
-	/*
-	 * Find the next oldest signal. Note that as we have
-	 * not been holding the lock, another client may
-	 * have installed an even older signal than the one
-	 * we just completed - so double check we are still
-	 * the oldest before picking the next one.
-	 */
-	if (request->signaling.wait.seqno) {
-		if (request == rcu_access_pointer(b->first_signal)) {
-			struct rb_node *rb = rb_next(&request->signaling.node);
-			rcu_assign_pointer(b->first_signal,
-					   rb ? to_signaler(rb) : NULL);
-		}
-
-		rb_erase(&request->signaling.node, &b->signals);
-		request->signaling.wait.seqno = 0;
-	}
-}
-
-static struct drm_i915_gem_request *first_signal(struct intel_breadcrumbs *b)
-{
-	/*
-	 * See the big warnings for i915_gem_active_get_rcu() and similarly
-	 * for dma_fence_get_rcu_safe() that explain the intricacies involved
-	 * here with defeating CPU/compiler speculation and enforcing
-	 * the required memory barriers.
-	 */
-	do {
-		struct drm_i915_gem_request *request;
-
-		request = rcu_dereference(b->first_signal);
-		if (request)
-			request = i915_gem_request_get_rcu(request);
-
-		barrier();
-
-		if (!request || request == rcu_access_pointer(b->first_signal))
-			return rcu_pointer_handoff(request);
-
-		i915_gem_request_put(request);
-	} while (1);
-}
-
 static int intel_breadcrumbs_signaler(void *arg)
 {
 	struct intel_engine_cs *engine = arg;
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct drm_i915_gem_request *request;
+	struct drm_i915_gem_request *rq, *n;
 
 	/* Install ourselves with high priority to reduce signalling latency */
 	signaler_set_rtpriority();
 
 	do {
-		bool do_schedule = true;
+		LIST_HEAD(list);
+		u32 seqno;
 
 		set_current_state(TASK_INTERRUPTIBLE);
+		if (list_empty(&b->signals))
+			goto sleep;
 
-		/* We are either woken up by the interrupt bottom-half,
+		/*
+		 * We are either woken up by the interrupt bottom-half,
 		 * or by a client adding a new signaller. In both cases,
 		 * the GPU seqno may have advanced beyond our oldest signal.
 		 * If it has, propagate the signal, remove the waiter and
@@ -717,35 +637,60 @@ static int intel_breadcrumbs_signaler(void *arg)
 		 * need to wait for a new interrupt from the GPU or for
 		 * a new client.
 		 */
-		rcu_read_lock();
-		request = first_signal(b);
-		rcu_read_unlock();
-		if (signal_complete(request)) {
-			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
-				      &request->fence.flags)) {
-				local_bh_disable();
-				dma_fence_signal(&request->fence);
-				local_bh_enable(); /* kick start the tasklets */
+		seqno = intel_engine_get_seqno(engine);
+
+		spin_lock_irq(&b->rb_lock);
+		list_for_each_entry_safe(rq, n, &b->signals, signaling.link) {
+			u32 this = rq->signaling.wait.seqno;
+
+			GEM_BUG_ON(!rq->signaling.wait.seqno);
+
+			if (!i915_seqno_passed(seqno, this))
+				break;
+
+			if (this == i915_gem_request_global_seqno(rq)) {
+				__intel_engine_remove_wait(engine,
+							   &rq->signaling.wait);
+
+				rq->signaling.wait.seqno = 0;
+				__list_del_entry(&rq->signaling.link);
+
+				if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+					      &rq->fence.flags)) {
+					list_add_tail(&rq->signaling.link,
+						      &list);
+					i915_gem_request_get(rq);
+				}
 			}
+		}
+		spin_unlock_irq(&b->rb_lock);
 
-			if (request->signaling.wait.seqno) {
-				spin_lock_irq(&b->rb_lock);
-				__intel_engine_remove_signal(engine, request);
-				spin_unlock_irq(&b->rb_lock);
+		if (!list_empty(&list)) {
+			local_bh_disable();
+			list_for_each_entry_safe(rq, n, &list, signaling.link) {
+				dma_fence_signal(&rq->fence);
+				i915_gem_request_put(rq);
 			}
+			local_bh_enable(); /* kick start the tasklets */
+		}
 
-			/* If the engine is saturated we may be continually
-			 * processing completed requests. This angers the
-			 * NMI watchdog if we never let anything else
-			 * have access to the CPU. Let's pretend to be nice
-			 * and relinquish the CPU if we burn through the
-			 * entire RT timeslice!
-			 */
-			do_schedule = need_resched();
+		if (engine->irq_seqno_barrier &&
+		    test_and_clear_bit(ENGINE_IRQ_BREADCRUMB,
+				       &engine->irq_posted)) {
+			engine->irq_seqno_barrier(engine);
+			intel_engine_wakeup(engine);
 		}
-		i915_gem_request_put(request);
 
-		if (unlikely(do_schedule)) {
+		/*
+		 * If the engine is saturated we may be continually
+		 * processing completed requests. This angers the
+		 * NMI watchdog if we never let anything else
+		 * have access to the CPU. Let's pretend to be nice
+		 * and relinquish the CPU if we burn through the
+		 * entire RT timeslice!
+		 */
+		if (list_empty(&list) || need_resched()) {
+sleep:
 			if (kthread_should_park())
 				kthread_parkme();
 
@@ -760,6 +705,32 @@ static int intel_breadcrumbs_signaler(void *arg)
 	return 0;
 }
 
+static void insert_signal(struct intel_breadcrumbs *b,
+			  struct drm_i915_gem_request *request)
+{
+	struct drm_i915_gem_request *iter;
+
+	lockdep_assert_held(&b->rb_lock);
+
+	/*
+	 * The reasonable assumption is that we are called to add signals
+	 * in sequence, as the requests are submitted for execution and
+	 * assigned a global_seqno. This will be the case for the majority
+	 * of internally generated signals (inter-engine signaling).
+	 *
+	 * Out of order waiters triggering random signaling enabling will
+	 * be more problematic, but hopefully rare enough and the list
+	 * small enough that the O(N) insertion sort is not an issue.
+	 */
+
+	list_for_each_entry_reverse(iter, &b->signals, signaling.link)
+		if (i915_seqno_passed(request->signaling.wait.seqno,
+				      iter->signaling.wait.seqno))
+			break;
+
+	list_add(&request->signaling.link, &iter->signaling.link);
+}
+
 void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 				   bool wakeup)
 {
@@ -767,7 +738,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 	u32 seqno;
 
-	/* Note that we may be called from an interrupt handler on another
+	/*
+	 * Note that we may be called from an interrupt handler on another
 	 * device (e.g. nouveau signaling a fence completion causing us
 	 * to submit a request, and so enable signaling). As such,
 	 * we need to make sure that all other users of b->rb_lock protect
@@ -779,7 +751,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 	lockdep_assert_held(&request->lock);
 
 	seqno = i915_gem_request_global_seqno(request);
-	if (!seqno)
+	if (!seqno) /* will be enabled later upon execution */
 		return;
 
 	spin_lock(&b->rb_lock);
@@ -788,8 +760,10 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 	request->signaling.wait.tsk = b->signaler;
 	request->signaling.wait.request = request;
 	request->signaling.wait.seqno = seqno;
+	insert_signal(b, request);
 
-	/* First add ourselves into the list of waiters, but register our
+	/*
+	 * Add ourselves into the list of waiters, but registering our
 	 * bottom-half as the signaller thread. As per usual, only the oldest
 	 * waiter (not just signaller) is tasked as the bottom-half waking
 	 * up all completed waiters after the user interrupt.
@@ -799,37 +773,6 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 	 */
 	wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
 
-	if (!__i915_gem_request_completed(request, seqno)) {
-		struct rb_node *parent, **p;
-		bool first;
-
-		/* Now insert ourselves into the retirement ordered list of
-		 * signals on this engine. We track the oldest seqno as that
-		 * will be the first signal to complete.
-		 */
-		parent = NULL;
-		first = true;
-		p = &b->signals.rb_node;
-		while (*p) {
-			parent = *p;
-			if (i915_seqno_passed(seqno,
-					      to_signaler(parent)->signaling.wait.seqno)) {
-				p = &parent->rb_right;
-				first = false;
-			} else {
-				p = &parent->rb_left;
-			}
-		}
-		rb_link_node(&request->signaling.node, parent, p);
-		rb_insert_color(&request->signaling.node, &b->signals);
-		if (first)
-			rcu_assign_pointer(b->first_signal, request);
-	} else {
-		__intel_engine_remove_wait(engine, &request->signaling.wait);
-		request->signaling.wait.seqno = 0;
-		wakeup = false;
-	}
-
 	spin_unlock(&b->rb_lock);
 
 	if (wakeup)
@@ -838,17 +781,20 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 
 void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
 {
+	struct intel_engine_cs *engine = request->engine;
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
 	GEM_BUG_ON(!irqs_disabled());
 	lockdep_assert_held(&request->lock);
 
-	if (request->signaling.wait.seqno) {
-		struct intel_engine_cs *engine = request->engine;
-		struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	if (!request->signaling.wait.seqno)
+		return;
 
-		spin_lock(&b->rb_lock);
-		__intel_engine_remove_signal(engine, request);
-		spin_unlock(&b->rb_lock);
-	}
+	spin_lock(&b->rb_lock);
+	__intel_engine_remove_wait(engine, &request->signaling.wait);
+	if (fetch_and_zero(&request->signaling.wait.seqno))
+		__list_del_entry(&request->signaling.link);
+	spin_unlock(&b->rb_lock);
 }
 
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
@@ -865,6 +811,8 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
 	timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0);
 	timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0);
 
+	INIT_LIST_HEAD(&b->signals);
+
 	/* Spawn a thread to provide a common bottom-half for all signals.
 	 * As this is an asynchronous interface we cannot steal the current
 	 * task for handling the bottom-half to the user interrupt, therefore
@@ -924,8 +872,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 	/* The engines should be idle and all requests accounted for! */
 	WARN_ON(READ_ONCE(b->irq_wait));
 	WARN_ON(!RB_EMPTY_ROOT(&b->waiters));
-	WARN_ON(rcu_access_pointer(b->first_signal));
-	WARN_ON(!RB_EMPTY_ROOT(&b->signals));
+	WARN_ON(!list_empty(&b->signals));
 
 	if (!IS_ERR_OR_NULL(b->signaler))
 		kthread_stop(b->signaler);
@@ -938,20 +885,22 @@ bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 	bool busy = false;
 
-	spin_lock_irq(&b->rb_lock);
-
 	if (b->irq_wait) {
-		wake_up_process(b->irq_wait->tsk);
-		busy = true;
+		spin_lock_irq(&b->irq_lock);
+
+		if (b->irq_wait) {
+			wake_up_process(b->irq_wait->tsk);
+			busy = true;
+		}
+
+		spin_unlock_irq(&b->irq_lock);
 	}
 
-	if (rcu_access_pointer(b->first_signal)) {
+	if (!busy && !list_empty(&b->signals)) {
 		wake_up_process(b->signaler);
 		busy = true;
 	}
 
-	spin_unlock_irq(&b->rb_lock);
-
 	return busy;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index cea2092d25d9..fedf298de036 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -331,9 +331,9 @@ struct intel_engine_cs {
 
 		spinlock_t rb_lock; /* protects the rb and wraps irq_lock */
 		struct rb_root waiters; /* sorted by retirement, priority */
-		struct rb_root signals; /* sorted by retirement */
+		struct list_head signals; /* sorted by retirement */
 		struct task_struct *signaler; /* used for fence signalling */
-		struct drm_i915_gem_request __rcu *first_signal;
+
 		struct timer_list fake_irq; /* used after a missed interrupt */
 		struct timer_list hangcheck; /* detect missed interrupts */
 
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/14] drm/i915: Only defer freeing of fence callback when also using the timer
  2018-01-05 12:37 Fun with signals Chris Wilson
                   ` (13 preceding siblings ...)
  2018-01-05 12:37 ` [PATCH 14/14] drm/i915/breadcrumbs: Reduce signaler rbtree to a sorted list Chris Wilson
@ 2018-01-05 13:03 ` Patchwork
  14 siblings, 0 replies; 16+ messages in thread
From: Patchwork @ 2018-01-05 13:03 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/14] drm/i915: Only defer freeing of fence callback when also using the timer
URL   : https://patchwork.freedesktop.org/series/36077/
State : success

== Summary ==

Series 36077v1 series starting with [01/14] drm/i915: Only defer freeing of fence callback when also using the timer
https://patchwork.freedesktop.org/api/1.0/series/36077/revisions/1/mbox/

Test debugfs_test:
        Subgroup read_all_entries:
                dmesg-warn -> PASS       (fi-elk-e7500) fdo#103989 +2
Test gem_mmap_gtt:
        Subgroup basic-small-bo-tiledx:
                fail       -> PASS       (fi-gdg-551) fdo#102575
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                pass       -> DMESG-WARN (fi-kbl-r) fdo#104172 +1
Test kms_psr_sink_crc:
        Subgroup psr_basic:
                dmesg-warn -> PASS       (fi-skl-6700hq) fdo#104260

fdo#103989 https://bugs.freedesktop.org/show_bug.cgi?id=103989
fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
fdo#104172 https://bugs.freedesktop.org/show_bug.cgi?id=104172
fdo#104260 https://bugs.freedesktop.org/show_bug.cgi?id=104260

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:421s
fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:433s
fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:371s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:483s
fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:278s
fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:480s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:485s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:464s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:460s
fi-elk-e7500     total:224  pass:168  dwarn:10  dfail:0   fail:0   skip:45 
fi-gdg-551       total:288  pass:180  dwarn:0   dfail:0   fail:0   skip:108 time:261s
fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:512s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:392s
fi-hsw-4770r     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:408s
fi-ilk-650       total:288  pass:228  dwarn:0   dfail:0   fail:0   skip:60  time:413s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:455s
fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:415s
fi-kbl-7500u     total:288  pass:263  dwarn:1   dfail:0   fail:0   skip:24  time:467s
fi-kbl-7560u     total:288  pass:268  dwarn:1   dfail:0   fail:0   skip:19  time:501s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:458s
fi-kbl-r         total:288  pass:260  dwarn:1   dfail:0   fail:0   skip:27  time:503s
fi-pnv-d510      total:288  pass:222  dwarn:1   dfail:0   fail:0   skip:65  time:572s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:429s
fi-skl-6600u     total:288  pass:260  dwarn:1   dfail:0   fail:0   skip:27  time:510s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:528s
fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:498s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:500s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:432s
fi-snb-2520m     total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:536s
fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:411s
Blacklisted hosts:
fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:568s
fi-cnl-y         total:288  pass:261  dwarn:0   dfail:0   fail:1   skip:26  time:594s

914d61a8fb5fc53f6b0366167210468147495b3f drm-tip: 2018y-01m-05d-09h-12m-18s UTC integration manifest
9c5ac8c6b3a1 drm/i915/breadcrumbs: Reduce signaler rbtree to a sorted list
ef2164c7b9aa drm/i915: Only signal from interrupt when requested
c6f17f0f5777 drm/i915: Move the irq_counter inside the spinlock
ffe4588e23e2 drm/i915: Reduce spinlock hold time during notify_ring() interrupt
cf1c0797fed9 drm/i915/breadcrumbs: Drop request reference for the signaler thread
05d51ae9363c drm/i915: Trim the retired request queue after submitting
e6252640baec drm/i915: Shrink the request kmem_cache on allocation error
dfa43e787aa3 drm/i915: Shrink the GEM kmem_caches upon idling
6b10d025df62 drm/i915: Move i915_gem_retire_work_handler
7a90e2520dc4 drm/i915: Only attempt to scan the requested number of shrinker slabs
37fe06c7363f drm/i915: Use our singlethreaded wq for freeing objects
f0aafc85f53c drm/i915: Rename some shorthand lock classes
aa51bcad9865 drm/i915/fence: Separate timeout mechanism for awaiting on dma-fences
36dfbb4dd575 drm/i915: Only defer freeing of fence callback when also using the timer

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7618/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2018-01-05 13:03 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-05 12:37 Fun with signals Chris Wilson
2018-01-05 12:37 ` [PATCH 01/14] drm/i915: Only defer freeing of fence callback when also using the timer Chris Wilson
2018-01-05 12:37 ` [PATCH 02/14] drm/i915/fence: Separate timeout mechanism for awaiting on dma-fences Chris Wilson
2018-01-05 12:37 ` [PATCH 03/14] drm/i915: Rename some shorthand lock classes Chris Wilson
2018-01-05 12:37 ` [PATCH 04/14] drm/i915: Use our singlethreaded wq for freeing objects Chris Wilson
2018-01-05 12:37 ` [PATCH 05/14] drm/i915: Only attempt to scan the requested number of shrinker slabs Chris Wilson
2018-01-05 12:37 ` [PATCH 06/14] drm/i915: Move i915_gem_retire_work_handler Chris Wilson
2018-01-05 12:37 ` [PATCH 07/14] drm/i915: Shrink the GEM kmem_caches upon idling Chris Wilson
2018-01-05 12:37 ` [PATCH 08/14] drm/i915: Shrink the request kmem_cache on allocation error Chris Wilson
2018-01-05 12:37 ` [PATCH 09/14] drm/i915: Trim the retired request queue after submitting Chris Wilson
2018-01-05 12:37 ` [PATCH 10/14] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
2018-01-05 12:37 ` [PATCH 11/14] drm/i915: Reduce spinlock hold time during notify_ring() interrupt Chris Wilson
2018-01-05 12:37 ` [PATCH 12/14] drm/i915: Move the irq_counter inside the spinlock Chris Wilson
2018-01-05 12:37 ` [PATCH 13/14] drm/i915: Only signal from interrupt when requested Chris Wilson
2018-01-05 12:37 ` [PATCH 14/14] drm/i915/breadcrumbs: Reduce signaler rbtree to a sorted list Chris Wilson
2018-01-05 13:03 ` ✓ Fi.CI.BAT: success for series starting with [01/14] drm/i915: Only defer freeing of fence callback when also using the timer Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.