All of lore.kernel.org
 help / color / mirror / Atom feed
* Prevent trivial oom from gem_exec_nop/sequential
@ 2018-01-15 21:24 Chris Wilson
  2018-01-15 21:24 ` [PATCH 01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs Chris Wilson
                   ` (15 more replies)
  0 siblings, 16 replies; 47+ messages in thread
From: Chris Wilson @ 2018-01-15 21:24 UTC (permalink / raw)
  To: intel-gfx

About the third resend of this series that tries to curtail the prolonged
reference lifetimes of fences via the signaler thread when the CPUs are
saturated by interrupts.
-Chris

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* [PATCH 01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs
  2018-01-15 21:24 Prevent trivial oom from gem_exec_nop/sequential Chris Wilson
@ 2018-01-15 21:24 ` Chris Wilson
  2018-01-17 10:29   ` Tvrtko Ursulin
  2018-01-15 21:24 ` [PATCH 02/10] drm/i915: Move i915_gem_retire_work_handler Chris Wilson
                   ` (14 subsequent siblings)
  15 siblings, 1 reply; 47+ messages in thread
From: Chris Wilson @ 2018-01-15 21:24 UTC (permalink / raw)
  To: intel-gfx

Since commit 4e773c3a8a69 ("drm/i915: Wire up shrinkctl->nr_scanned"),
we track the number of objects we scan and do not wish to exceed that as
it will overly penalise our own slabs under mempressure. Given that we
now know the target number of objects to scan, use that as our guide for
deciding to shrink as opposed to the number of objects we manage to
shrink (which doesn't correspond to the numbers we report to shrinkctl).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 9029ed04879c..0e158f9287c4 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -363,13 +363,13 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 				I915_SHRINK_BOUND |
 				I915_SHRINK_UNBOUND |
 				I915_SHRINK_PURGEABLE);
-	if (freed < sc->nr_to_scan)
+	if (sc->nr_scanned < sc->nr_to_scan)
 		freed += i915_gem_shrink(i915,
 					 sc->nr_to_scan - sc->nr_scanned,
 					 &sc->nr_scanned,
 					 I915_SHRINK_BOUND |
 					 I915_SHRINK_UNBOUND);
-	if (freed < sc->nr_to_scan && current_is_kswapd()) {
+	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
 		intel_runtime_pm_get(i915);
 		freed += i915_gem_shrink(i915,
 					 sc->nr_to_scan - sc->nr_scanned,
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 02/10] drm/i915: Move i915_gem_retire_work_handler
  2018-01-15 21:24 Prevent trivial oom from gem_exec_nop/sequential Chris Wilson
  2018-01-15 21:24 ` [PATCH 01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs Chris Wilson
@ 2018-01-15 21:24 ` Chris Wilson
  2018-01-17 10:33   ` Tvrtko Ursulin
  2018-01-15 21:24 ` [PATCH 03/10] drm/i915: Shrink the GEM kmem_caches upon idling Chris Wilson
                   ` (13 subsequent siblings)
  15 siblings, 1 reply; 47+ messages in thread
From: Chris Wilson @ 2018-01-15 21:24 UTC (permalink / raw)
  To: intel-gfx

In preparation for the next patch, move i915_gem_retire_work_handler()
later to avoid a forward declaration.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 228 ++++++++++++++++++++--------------------
 1 file changed, 114 insertions(+), 114 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 87937c4f9dff..a8840a514377 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3310,120 +3310,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	return true;
 }
 
-static void
-i915_gem_retire_work_handler(struct work_struct *work)
-{
-	struct drm_i915_private *dev_priv =
-		container_of(work, typeof(*dev_priv), gt.retire_work.work);
-	struct drm_device *dev = &dev_priv->drm;
-
-	/* Come back later if the device is busy... */
-	if (mutex_trylock(&dev->struct_mutex)) {
-		i915_gem_retire_requests(dev_priv);
-		mutex_unlock(&dev->struct_mutex);
-	}
-
-	/* Keep the retire handler running until we are finally idle.
-	 * We do not need to do this test under locking as in the worst-case
-	 * we queue the retire worker once too often.
-	 */
-	if (READ_ONCE(dev_priv->gt.awake)) {
-		i915_queue_hangcheck(dev_priv);
-		queue_delayed_work(dev_priv->wq,
-				   &dev_priv->gt.retire_work,
-				   round_jiffies_up_relative(HZ));
-	}
-}
-
-static inline bool
-new_requests_since_last_retire(const struct drm_i915_private *i915)
-{
-	return (READ_ONCE(i915->gt.active_requests) ||
-		work_pending(&i915->gt.idle_work.work));
-}
-
-static void
-i915_gem_idle_work_handler(struct work_struct *work)
-{
-	struct drm_i915_private *dev_priv =
-		container_of(work, typeof(*dev_priv), gt.idle_work.work);
-	bool rearm_hangcheck;
-	ktime_t end;
-
-	if (!READ_ONCE(dev_priv->gt.awake))
-		return;
-
-	/*
-	 * Wait for last execlists context complete, but bail out in case a
-	 * new request is submitted.
-	 */
-	end = ktime_add_ms(ktime_get(), I915_IDLE_ENGINES_TIMEOUT);
-	do {
-		if (new_requests_since_last_retire(dev_priv))
-			return;
-
-		if (intel_engines_are_idle(dev_priv))
-			break;
-
-		usleep_range(100, 500);
-	} while (ktime_before(ktime_get(), end));
-
-	rearm_hangcheck =
-		cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
-
-	if (!mutex_trylock(&dev_priv->drm.struct_mutex)) {
-		/* Currently busy, come back later */
-		mod_delayed_work(dev_priv->wq,
-				 &dev_priv->gt.idle_work,
-				 msecs_to_jiffies(50));
-		goto out_rearm;
-	}
-
-	/*
-	 * New request retired after this work handler started, extend active
-	 * period until next instance of the work.
-	 */
-	if (new_requests_since_last_retire(dev_priv))
-		goto out_unlock;
-
-	/*
-	 * Be paranoid and flush a concurrent interrupt to make sure
-	 * we don't reactivate any irq tasklets after parking.
-	 *
-	 * FIXME: Note that even though we have waited for execlists to be idle,
-	 * there may still be an in-flight interrupt even though the CSB
-	 * is now empty. synchronize_irq() makes sure that a residual interrupt
-	 * is completed before we continue, but it doesn't prevent the HW from
-	 * raising a spurious interrupt later. To complete the shield we should
-	 * coordinate disabling the CS irq with flushing the interrupts.
-	 */
-	synchronize_irq(dev_priv->drm.irq);
-
-	intel_engines_park(dev_priv);
-	i915_gem_timelines_park(dev_priv);
-
-	i915_pmu_gt_parked(dev_priv);
-
-	GEM_BUG_ON(!dev_priv->gt.awake);
-	dev_priv->gt.awake = false;
-	rearm_hangcheck = false;
-
-	if (INTEL_GEN(dev_priv) >= 6)
-		gen6_rps_idle(dev_priv);
-
-	intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
-
-	intel_runtime_pm_put(dev_priv);
-out_unlock:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-
-out_rearm:
-	if (rearm_hangcheck) {
-		GEM_BUG_ON(!dev_priv->gt.awake);
-		i915_queue_hangcheck(dev_priv);
-	}
-}
-
 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 {
 	struct drm_i915_private *i915 = to_i915(gem->dev);
@@ -4798,6 +4684,120 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	}
 }
 
+static void
+i915_gem_retire_work_handler(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(work, typeof(*dev_priv), gt.retire_work.work);
+	struct drm_device *dev = &dev_priv->drm;
+
+	/* Come back later if the device is busy... */
+	if (mutex_trylock(&dev->struct_mutex)) {
+		i915_gem_retire_requests(dev_priv);
+		mutex_unlock(&dev->struct_mutex);
+	}
+
+	/* Keep the retire handler running until we are finally idle.
+	 * We do not need to do this test under locking as in the worst-case
+	 * we queue the retire worker once too often.
+	 */
+	if (READ_ONCE(dev_priv->gt.awake)) {
+		i915_queue_hangcheck(dev_priv);
+		queue_delayed_work(dev_priv->wq,
+				   &dev_priv->gt.retire_work,
+				   round_jiffies_up_relative(HZ));
+	}
+}
+
+static inline bool
+new_requests_since_last_retire(const struct drm_i915_private *i915)
+{
+	return (READ_ONCE(i915->gt.active_requests) ||
+		work_pending(&i915->gt.idle_work.work));
+}
+
+static void
+i915_gem_idle_work_handler(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(work, typeof(*dev_priv), gt.idle_work.work);
+	bool rearm_hangcheck;
+	ktime_t end;
+
+	if (!READ_ONCE(dev_priv->gt.awake))
+		return;
+
+	/*
+	 * Wait for last execlists context complete, but bail out in case a
+	 * new request is submitted.
+	 */
+	end = ktime_add_ms(ktime_get(), I915_IDLE_ENGINES_TIMEOUT);
+	do {
+		if (new_requests_since_last_retire(dev_priv))
+			return;
+
+		if (intel_engines_are_idle(dev_priv))
+			break;
+
+		usleep_range(100, 500);
+	} while (ktime_before(ktime_get(), end));
+
+	rearm_hangcheck =
+		cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
+
+	if (!mutex_trylock(&dev_priv->drm.struct_mutex)) {
+		/* Currently busy, come back later */
+		mod_delayed_work(dev_priv->wq,
+				 &dev_priv->gt.idle_work,
+				 msecs_to_jiffies(50));
+		goto out_rearm;
+	}
+
+	/*
+	 * New request retired after this work handler started, extend active
+	 * period until next instance of the work.
+	 */
+	if (new_requests_since_last_retire(dev_priv))
+		goto out_unlock;
+
+	/*
+	 * Be paranoid and flush a concurrent interrupt to make sure
+	 * we don't reactivate any irq tasklets after parking.
+	 *
+	 * FIXME: Note that even though we have waited for execlists to be idle,
+	 * there may still be an in-flight interrupt even though the CSB
+	 * is now empty. synchronize_irq() makes sure that a residual interrupt
+	 * is completed before we continue, but it doesn't prevent the HW from
+	 * raising a spurious interrupt later. To complete the shield we should
+	 * coordinate disabling the CS irq with flushing the interrupts.
+	 */
+	synchronize_irq(dev_priv->drm.irq);
+
+	intel_engines_park(dev_priv);
+	i915_gem_timelines_park(dev_priv);
+
+	i915_pmu_gt_parked(dev_priv);
+
+	GEM_BUG_ON(!dev_priv->gt.awake);
+	dev_priv->gt.awake = false;
+	rearm_hangcheck = false;
+
+	if (INTEL_GEN(dev_priv) >= 6)
+		gen6_rps_idle(dev_priv);
+
+	intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
+
+	intel_runtime_pm_put(dev_priv);
+out_unlock:
+	mutex_unlock(&dev_priv->drm.struct_mutex);
+
+out_rearm:
+	if (rearm_hangcheck) {
+		GEM_BUG_ON(!dev_priv->gt.awake);
+		i915_queue_hangcheck(dev_priv);
+	}
+}
+
 int i915_gem_suspend(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 03/10] drm/i915: Shrink the GEM kmem_caches upon idling
  2018-01-15 21:24 Prevent trivial oom from gem_exec_nop/sequential Chris Wilson
  2018-01-15 21:24 ` [PATCH 01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs Chris Wilson
  2018-01-15 21:24 ` [PATCH 02/10] drm/i915: Move i915_gem_retire_work_handler Chris Wilson
@ 2018-01-15 21:24 ` Chris Wilson
  2018-01-16 10:00   ` Tvrtko Ursulin
  2018-01-15 21:24 ` [PATCH 04/10] drm/i915: Shrink the request kmem_cache on allocation error Chris Wilson
                   ` (12 subsequent siblings)
  15 siblings, 1 reply; 47+ messages in thread
From: Chris Wilson @ 2018-01-15 21:24 UTC (permalink / raw)
  To: intel-gfx

When we finally decide the gpu is idle, that is a good time to shrink
our kmem_caches.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a8840a514377..8547f5214599 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4709,6 +4709,21 @@ i915_gem_retire_work_handler(struct work_struct *work)
 	}
 }
 
+static void shrink_caches(struct drm_i915_private *i915)
+{
+	/*
+	 * kmem_cache_shrink() discards empty slabs and reorders partially
+	 * filled slabs to prioritise allocating from the mostly full slabs,
+	 * with the aim of reducing fragmentation.
+	 */
+	kmem_cache_shrink(i915->priorities);
+	kmem_cache_shrink(i915->dependencies);
+	kmem_cache_shrink(i915->requests);
+	kmem_cache_shrink(i915->luts);
+	kmem_cache_shrink(i915->vmas);
+	kmem_cache_shrink(i915->objects);
+}
+
 static inline bool
 new_requests_since_last_retire(const struct drm_i915_private *i915)
 {
@@ -4796,6 +4811,13 @@ i915_gem_idle_work_handler(struct work_struct *work)
 		GEM_BUG_ON(!dev_priv->gt.awake);
 		i915_queue_hangcheck(dev_priv);
 	}
+
+	rcu_barrier();
+
+	if (!new_requests_since_last_retire(dev_priv)) {
+		__i915_gem_free_work(&dev_priv->mm.free_work);
+		shrink_caches(dev_priv);
+	}
 }
 
 int i915_gem_suspend(struct drm_i915_private *dev_priv)
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 04/10] drm/i915: Shrink the request kmem_cache on allocation error
  2018-01-15 21:24 Prevent trivial oom from gem_exec_nop/sequential Chris Wilson
                   ` (2 preceding siblings ...)
  2018-01-15 21:24 ` [PATCH 03/10] drm/i915: Shrink the GEM kmem_caches upon idling Chris Wilson
@ 2018-01-15 21:24 ` Chris Wilson
  2018-01-16 10:10   ` Tvrtko Ursulin
  2018-01-15 21:24 ` [PATCH 05/10] drm/i915: Trim the retired request queue after submitting Chris Wilson
                   ` (11 subsequent siblings)
  15 siblings, 1 reply; 47+ messages in thread
From: Chris Wilson @ 2018-01-15 21:24 UTC (permalink / raw)
  To: intel-gfx

If we fail to allocate a new request, make sure we recover the pages
that are in the process of being freed by inserting an RCU barrier.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 72bdc203716f..e6d4857b1f78 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -696,6 +696,9 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		if (ret)
 			goto err_unreserve;
 
+		kmem_cache_shrink(dev_priv->requests);
+		rcu_barrier();
+
 		req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
 		if (!req) {
 			ret = -ENOMEM;
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 05/10] drm/i915: Trim the retired request queue after submitting
  2018-01-15 21:24 Prevent trivial oom from gem_exec_nop/sequential Chris Wilson
                   ` (3 preceding siblings ...)
  2018-01-15 21:24 ` [PATCH 04/10] drm/i915: Shrink the request kmem_cache on allocation error Chris Wilson
@ 2018-01-15 21:24 ` Chris Wilson
  2018-01-16 10:18   ` Tvrtko Ursulin
  2018-01-15 21:24 ` [PATCH 06/10] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
                   ` (10 subsequent siblings)
  15 siblings, 1 reply; 47+ messages in thread
From: Chris Wilson @ 2018-01-15 21:24 UTC (permalink / raw)
  To: intel-gfx

If we submit a request and see that the previous request on this
timeline was already signaled, we first do not need to add the
dependency tracker for that completed request and secondly we know that
we there is then a large backlog in retiring requests affecting this
timeline. Given that we just submitted more work to the HW, now would be
a good time to catch up on those retirements.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index e6d4857b1f78..6a143099cea1 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1019,7 +1019,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 
 	prev = i915_gem_active_raw(&timeline->last_request,
 				   &request->i915->drm.struct_mutex);
-	if (prev) {
+	if (prev && !i915_gem_request_completed(prev)) {
 		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
 					     &request->submitq);
 		if (engine->schedule)
@@ -1055,6 +1055,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	local_bh_disable();
 	i915_sw_fence_commit(&request->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
+
+	if (prev && i915_gem_request_completed(prev))
+		i915_gem_request_retire_upto(prev);
 }
 
 static unsigned long local_clock_us(unsigned int *cpu)
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 06/10] drm/i915/breadcrumbs: Drop request reference for the signaler thread
  2018-01-15 21:24 Prevent trivial oom from gem_exec_nop/sequential Chris Wilson
                   ` (4 preceding siblings ...)
  2018-01-15 21:24 ` [PATCH 05/10] drm/i915: Trim the retired request queue after submitting Chris Wilson
@ 2018-01-15 21:24 ` Chris Wilson
  2018-01-15 21:24 ` [PATCH 07/10] drm/i915: Reduce spinlock hold time during notify_ring() interrupt Chris Wilson
                   ` (9 subsequent siblings)
  15 siblings, 0 replies; 47+ messages in thread
From: Chris Wilson @ 2018-01-15 21:24 UTC (permalink / raw)
  To: intel-gfx

If we remember to cancel the signaler on a request when retiring it
(after we know that the request has been signaled), we do not need to
carry an additional request in the signaler itself. This prevents an
issue whereby the signaler threads may be delayed and hold on to
thousands of request references, causing severe memory fragmentation and
premature oom (most noticeable on 32b snb due to the limited GFP_KERNEL
and frequent use of inter-engine fences).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c  |   6 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 149 +++++++++++++++++--------------
 2 files changed, 85 insertions(+), 70 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 6a143099cea1..836db90ef81b 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -441,7 +441,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	spin_lock_irq(&request->lock);
 	if (request->waitboost)
 		atomic_dec(&request->i915->gt_pm.rps.num_waiters);
-	dma_fence_signal_locked(&request->fence);
+	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags))
+		dma_fence_signal_locked(&request->fence);
+	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
+		intel_engine_cancel_signaling(request);
 	spin_unlock_irq(&request->lock);
 
 	i915_priotree_fini(request->i915, &request->priotree);
@@ -730,6 +733,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 
 	/* No zalloc, must clear what we need by hand */
 	req->global_seqno = 0;
+	req->signaling.wait.seqno = 0;
 	req->file_priv = NULL;
 	req->batch = NULL;
 	req->capture_list = NULL;
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 86acac010bb8..e3667dc1e96d 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -235,7 +235,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
 	struct intel_wait *wait, *n;
 
 	if (!b->irq_armed)
-		goto wakeup_signaler;
+		return;
 
 	/*
 	 * We only disarm the irq when we are idle (all requests completed),
@@ -260,14 +260,6 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
 	b->waiters = RB_ROOT;
 
 	spin_unlock_irq(&b->rb_lock);
-
-	/*
-	 * The signaling thread may be asleep holding a reference to a request,
-	 * that had its signaling cancelled prior to being preempted. We need
-	 * to kick the signaler, just in case, to release any such reference.
-	 */
-wakeup_signaler:
-	wake_up_process(b->signaler);
 }
 
 static bool use_fake_irq(const struct intel_breadcrumbs *b)
@@ -644,6 +636,62 @@ static void signaler_set_rtpriority(void)
 	 sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
 }
 
+static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
+					 struct drm_i915_gem_request *request)
+{
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+	lockdep_assert_held(&b->rb_lock);
+
+	/*
+	 * Wake up all other completed waiters and select the
+	 * next bottom-half for the next user interrupt.
+	 */
+	__intel_engine_remove_wait(engine, &request->signaling.wait);
+
+	/*
+	 * Find the next oldest signal. Note that as we have
+	 * not been holding the lock, another client may
+	 * have installed an even older signal than the one
+	 * we just completed - so double check we are still
+	 * the oldest before picking the next one.
+	 */
+	if (request->signaling.wait.seqno) {
+		if (request == rcu_access_pointer(b->first_signal)) {
+			struct rb_node *rb = rb_next(&request->signaling.node);
+			rcu_assign_pointer(b->first_signal,
+					   rb ? to_signaler(rb) : NULL);
+		}
+
+		rb_erase(&request->signaling.node, &b->signals);
+		request->signaling.wait.seqno = 0;
+	}
+}
+
+static struct drm_i915_gem_request *first_signal(struct intel_breadcrumbs *b)
+{
+	/*
+	 * See the big warnings for i915_gem_active_get_rcu() and similarly
+	 * for dma_fence_get_rcu_safe() that explain the intricacies involved
+	 * here with defeating CPU/compiler speculation and enforcing
+	 * the required memory barriers.
+	 */
+	do {
+		struct drm_i915_gem_request *request;
+
+		request = rcu_dereference(b->first_signal);
+		if (request)
+			request = i915_gem_request_get_rcu(request);
+
+		barrier();
+
+		if (!request || request == rcu_access_pointer(b->first_signal))
+			return rcu_pointer_handoff(request);
+
+		i915_gem_request_put(request);
+	} while (1);
+}
+
 static int intel_breadcrumbs_signaler(void *arg)
 {
 	struct intel_engine_cs *engine = arg;
@@ -667,41 +715,21 @@ static int intel_breadcrumbs_signaler(void *arg)
 		 * a new client.
 		 */
 		rcu_read_lock();
-		request = rcu_dereference(b->first_signal);
-		if (request)
-			request = i915_gem_request_get_rcu(request);
+		request = first_signal(b);
 		rcu_read_unlock();
 		if (signal_complete(request)) {
-			local_bh_disable();
-			dma_fence_signal(&request->fence);
-			local_bh_enable(); /* kick start the tasklets */
-
-			spin_lock_irq(&b->rb_lock);
-
-			/* Wake up all other completed waiters and select the
-			 * next bottom-half for the next user interrupt.
-			 */
-			__intel_engine_remove_wait(engine,
-						   &request->signaling.wait);
-
-			/* Find the next oldest signal. Note that as we have
-			 * not been holding the lock, another client may
-			 * have installed an even older signal than the one
-			 * we just completed - so double check we are still
-			 * the oldest before picking the next one.
-			 */
-			if (request == rcu_access_pointer(b->first_signal)) {
-				struct rb_node *rb =
-					rb_next(&request->signaling.node);
-				rcu_assign_pointer(b->first_signal,
-						   rb ? to_signaler(rb) : NULL);
+			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				      &request->fence.flags)) {
+				local_bh_disable();
+				dma_fence_signal(&request->fence);
+				local_bh_enable(); /* kick start the tasklets */
 			}
-			rb_erase(&request->signaling.node, &b->signals);
-			RB_CLEAR_NODE(&request->signaling.node);
-
-			spin_unlock_irq(&b->rb_lock);
 
-			i915_gem_request_put(request);
+			if (request->signaling.wait.seqno) {
+				spin_lock_irq(&b->rb_lock);
+				__intel_engine_remove_signal(engine, request);
+				spin_unlock_irq(&b->rb_lock);
+			}
 
 			/* If the engine is saturated we may be continually
 			 * processing completed requests. This angers the
@@ -712,19 +740,17 @@ static int intel_breadcrumbs_signaler(void *arg)
 			 */
 			do_schedule = need_resched();
 		}
+		i915_gem_request_put(request);
 
 		if (unlikely(do_schedule)) {
 			if (kthread_should_park())
 				kthread_parkme();
 
-			if (unlikely(kthread_should_stop())) {
-				i915_gem_request_put(request);
+			if (unlikely(kthread_should_stop()))
 				break;
-			}
 
 			schedule();
 		}
-		i915_gem_request_put(request);
 	} while (1);
 	__set_current_state(TASK_RUNNING);
 
@@ -753,12 +779,12 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 	if (!seqno)
 		return;
 
+	spin_lock(&b->rb_lock);
+
+	GEM_BUG_ON(request->signaling.wait.seqno);
 	request->signaling.wait.tsk = b->signaler;
 	request->signaling.wait.request = request;
 	request->signaling.wait.seqno = seqno;
-	i915_gem_request_get(request);
-
-	spin_lock(&b->rb_lock);
 
 	/* First add ourselves into the list of waiters, but register our
 	 * bottom-half as the signaller thread. As per usual, only the oldest
@@ -797,7 +823,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 			rcu_assign_pointer(b->first_signal, request);
 	} else {
 		__intel_engine_remove_wait(engine, &request->signaling.wait);
-		i915_gem_request_put(request);
+		request->signaling.wait.seqno = 0;
 		wakeup = false;
 	}
 
@@ -809,32 +835,17 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 
 void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
 {
-	struct intel_engine_cs *engine = request->engine;
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
 	GEM_BUG_ON(!irqs_disabled());
 	lockdep_assert_held(&request->lock);
-	GEM_BUG_ON(!request->signaling.wait.seqno);
 
-	spin_lock(&b->rb_lock);
+	if (request->signaling.wait.seqno) {
+		struct intel_engine_cs *engine = request->engine;
+		struct intel_breadcrumbs *b = &engine->breadcrumbs;
 
-	if (!RB_EMPTY_NODE(&request->signaling.node)) {
-		if (request == rcu_access_pointer(b->first_signal)) {
-			struct rb_node *rb =
-				rb_next(&request->signaling.node);
-			rcu_assign_pointer(b->first_signal,
-					   rb ? to_signaler(rb) : NULL);
-		}
-		rb_erase(&request->signaling.node, &b->signals);
-		RB_CLEAR_NODE(&request->signaling.node);
-		i915_gem_request_put(request);
+		spin_lock(&b->rb_lock);
+		__intel_engine_remove_signal(engine, request);
+		spin_unlock(&b->rb_lock);
 	}
-
-	__intel_engine_remove_wait(engine, &request->signaling.wait);
-
-	spin_unlock(&b->rb_lock);
-
-	request->signaling.wait.seqno = 0;
 }
 
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 07/10] drm/i915: Reduce spinlock hold time during notify_ring() interrupt
  2018-01-15 21:24 Prevent trivial oom from gem_exec_nop/sequential Chris Wilson
                   ` (5 preceding siblings ...)
  2018-01-15 21:24 ` [PATCH 06/10] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
@ 2018-01-15 21:24 ` Chris Wilson
  2018-01-17 10:45   ` Tvrtko Ursulin
  2018-01-15 21:24 ` [PATCH 08/10] drm/i915: Move the irq_counter inside the spinlock Chris Wilson
                   ` (8 subsequent siblings)
  15 siblings, 1 reply; 47+ messages in thread
From: Chris Wilson @ 2018-01-15 21:24 UTC (permalink / raw)
  To: intel-gfx

By taking advantage of the RCU protection of the task struct, we can find
the appropriate signaler under the spinlock and then release the spinlock
before waking the task and signaling the fence.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_irq.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 3517c6548e2c..0b272501b738 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1065,21 +1065,24 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
 
 static void notify_ring(struct intel_engine_cs *engine)
 {
+	const u32 seqno = intel_engine_get_seqno(engine);
 	struct drm_i915_gem_request *rq = NULL;
+	struct task_struct *tsk = NULL;
 	struct intel_wait *wait;
 
-	if (!engine->breadcrumbs.irq_armed)
+	if (unlikely(!engine->breadcrumbs.irq_armed))
 		return;
 
 	atomic_inc(&engine->irq_count);
 	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 
+	rcu_read_lock();
+
 	spin_lock(&engine->breadcrumbs.irq_lock);
 	wait = engine->breadcrumbs.irq_wait;
 	if (wait) {
-		bool wakeup = engine->irq_seqno_barrier;
-
-		/* We use a callback from the dma-fence to submit
+		/*
+		 * We use a callback from the dma-fence to submit
 		 * requests after waiting on our own requests. To
 		 * ensure minimum delay in queuing the next request to
 		 * hardware, signal the fence now rather than wait for
@@ -1090,19 +1093,20 @@ static void notify_ring(struct intel_engine_cs *engine)
 		 * and to handle coalescing of multiple seqno updates
 		 * and many waiters.
 		 */
-		if (i915_seqno_passed(intel_engine_get_seqno(engine),
-				      wait->seqno)) {
+		if (i915_seqno_passed(seqno, wait->seqno)) {
 			struct drm_i915_gem_request *waiter = wait->request;
 
-			wakeup = true;
 			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 				      &waiter->fence.flags) &&
 			    intel_wait_check_request(wait, waiter))
 				rq = i915_gem_request_get(waiter);
-		}
 
-		if (wakeup)
-			wake_up_process(wait->tsk);
+			tsk = wait->tsk;
+		} else {
+			if (engine->irq_seqno_barrier &&
+			    i915_seqno_passed(seqno, wait->seqno - 1))
+				tsk = wait->tsk;
+		}
 	} else {
 		if (engine->breadcrumbs.irq_armed)
 			__intel_engine_disarm_breadcrumbs(engine);
@@ -1114,6 +1118,11 @@ static void notify_ring(struct intel_engine_cs *engine)
 		i915_gem_request_put(rq);
 	}
 
+	if (tsk && tsk->state != TASK_RUNNING)
+		wake_up_process(tsk);
+
+	rcu_read_unlock();
+
 	trace_intel_engine_notify(engine, wait);
 }
 
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 08/10] drm/i915: Move the irq_counter inside the spinlock
  2018-01-15 21:24 Prevent trivial oom from gem_exec_nop/sequential Chris Wilson
                   ` (6 preceding siblings ...)
  2018-01-15 21:24 ` [PATCH 07/10] drm/i915: Reduce spinlock hold time during notify_ring() interrupt Chris Wilson
@ 2018-01-15 21:24 ` Chris Wilson
  2018-01-17 12:12   ` Tvrtko Ursulin
  2018-01-15 21:24 ` [PATCH 09/10] drm/i915: Only signal from interrupt when requested Chris Wilson
                   ` (7 subsequent siblings)
  15 siblings, 1 reply; 47+ messages in thread
From: Chris Wilson @ 2018-01-15 21:24 UTC (permalink / raw)
  To: intel-gfx

Rather than have multiple locked instructions inside the notify_ring()
irq handler, move them inside the spinlock and reduce their intrinsic
locking.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c  |  4 ++--
 drivers/gpu/drm/i915/i915_irq.c          |  6 +++---
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 13 ++++++++-----
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  2 +-
 4 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 836db90ef81b..08bbd56277e5 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1128,7 +1128,7 @@ static bool __i915_spin_request(const struct drm_i915_gem_request *req,
 	 * takes to sleep on a request, on the order of a microsecond.
 	 */
 
-	irq = atomic_read(&engine->irq_count);
+	irq = READ_ONCE(engine->breadcrumbs.irq_count);
 	timeout_us += local_clock_us(&cpu);
 	do {
 		if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno))
@@ -1139,7 +1139,7 @@ static bool __i915_spin_request(const struct drm_i915_gem_request *req,
 		 * assume we won't see one in the near future but require
 		 * the engine->seqno_barrier() to fixup coherency.
 		 */
-		if (atomic_read(&engine->irq_count) != irq)
+		if (READ_ONCE(engine->breadcrumbs.irq_count) != irq)
 			break;
 
 		if (signal_pending_state(state, current))
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 0b272501b738..e5f76d580010 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1073,9 +1073,6 @@ static void notify_ring(struct intel_engine_cs *engine)
 	if (unlikely(!engine->breadcrumbs.irq_armed))
 		return;
 
-	atomic_inc(&engine->irq_count);
-	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
-
 	rcu_read_lock();
 
 	spin_lock(&engine->breadcrumbs.irq_lock);
@@ -1107,6 +1104,9 @@ static void notify_ring(struct intel_engine_cs *engine)
 			    i915_seqno_passed(seqno, wait->seqno - 1))
 				tsk = wait->tsk;
 		}
+
+		engine->breadcrumbs.irq_count++;
+		__set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 	} else {
 		if (engine->breadcrumbs.irq_armed)
 			__intel_engine_disarm_breadcrumbs(engine);
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index e3667dc1e96d..7c82cfe23922 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -98,12 +98,14 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t)
 	struct intel_engine_cs *engine =
 		from_timer(engine, t, breadcrumbs.hangcheck);
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	unsigned int irq_count;
 
 	if (!b->irq_armed)
 		return;
 
-	if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) {
-		b->hangcheck_interrupts = atomic_read(&engine->irq_count);
+	irq_count = READ_ONCE(b->irq_count);
+	if (b->hangcheck_interrupts != irq_count) {
+		b->hangcheck_interrupts = irq_count;
 		mod_timer(&b->hangcheck, wait_timeout());
 		return;
 	}
@@ -176,7 +178,7 @@ static void irq_enable(struct intel_engine_cs *engine)
 	 * we still need to force the barrier before reading the seqno,
 	 * just in case.
 	 */
-	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
+	__set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 
 	/* Caller disables interrupts */
 	spin_lock(&engine->i915->irq_lock);
@@ -270,13 +272,14 @@ static bool use_fake_irq(const struct intel_breadcrumbs *b)
 	if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings))
 		return false;
 
-	/* Only start with the heavy weight fake irq timer if we have not
+	/*
+	 * Only start with the heavy weight fake irq timer if we have not
 	 * seen any interrupts since enabling it the first time. If the
 	 * interrupts are still arriving, it means we made a mistake in our
 	 * engine->seqno_barrier(), a timing error that should be transient
 	 * and unlikely to reoccur.
 	 */
-	return atomic_read(&engine->irq_count) == b->hangcheck_interrupts;
+	return READ_ONCE(b->irq_count) == b->hangcheck_interrupts;
 }
 
 static void enable_fake_irq(struct intel_breadcrumbs *b)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c5ff203e42d6..f406d0ff4612 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -305,7 +305,6 @@ struct intel_engine_cs {
 
 	struct drm_i915_gem_object *default_state;
 
-	atomic_t irq_count;
 	unsigned long irq_posted;
 #define ENGINE_IRQ_BREADCRUMB 0
 #define ENGINE_IRQ_EXECLIST 1
@@ -340,6 +339,7 @@ struct intel_engine_cs {
 
 		unsigned int hangcheck_interrupts;
 		unsigned int irq_enabled;
+		unsigned int irq_count;
 
 		bool irq_armed : 1;
 		I915_SELFTEST_DECLARE(bool mock : 1);
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 09/10] drm/i915: Only signal from interrupt when requested
  2018-01-15 21:24 Prevent trivial oom from gem_exec_nop/sequential Chris Wilson
                   ` (7 preceding siblings ...)
  2018-01-15 21:24 ` [PATCH 08/10] drm/i915: Move the irq_counter inside the spinlock Chris Wilson
@ 2018-01-15 21:24 ` Chris Wilson
  2018-01-17 12:22   ` Tvrtko Ursulin
  2018-01-15 21:24 ` [PATCH 10/10] drm/i915/breadcrumbs: Reduce signaler rbtree to a sorted list Chris Wilson
                   ` (6 subsequent siblings)
  15 siblings, 1 reply; 47+ messages in thread
From: Chris Wilson @ 2018-01-15 21:24 UTC (permalink / raw)
  To: intel-gfx

Avoid calling dma_fence_signal() from inside the interrupt if we haven't
enabled signaling on the request.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 2 +-
 drivers/gpu/drm/i915/i915_irq.c         | 3 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.h | 5 ++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 08bbd56277e5..46848aef1648 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1214,7 +1214,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	if (flags & I915_WAIT_LOCKED)
 		add_wait_queue(errq, &reset);
 
-	intel_wait_init(&wait, req);
+	intel_wait_init(&wait);
 
 restart:
 	do {
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index e5f76d580010..ea290f102784 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1093,7 +1093,8 @@ static void notify_ring(struct intel_engine_cs *engine)
 		if (i915_seqno_passed(seqno, wait->seqno)) {
 			struct drm_i915_gem_request *waiter = wait->request;
 
-			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+			if (waiter &&
+			    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 				      &waiter->fence.flags) &&
 			    intel_wait_check_request(wait, waiter))
 				rq = i915_gem_request_get(waiter);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f406d0ff4612..cea2092d25d9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -874,11 +874,10 @@ static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
 /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
 
-static inline void intel_wait_init(struct intel_wait *wait,
-				   struct drm_i915_gem_request *rq)
+static inline void intel_wait_init(struct intel_wait *wait)
 {
 	wait->tsk = current;
-	wait->request = rq;
+	wait->request = NULL;
 }
 
 static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno)
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 10/10] drm/i915/breadcrumbs: Reduce signaler rbtree to a sorted list
  2018-01-15 21:24 Prevent trivial oom from gem_exec_nop/sequential Chris Wilson
                   ` (8 preceding siblings ...)
  2018-01-15 21:24 ` [PATCH 09/10] drm/i915: Only signal from interrupt when requested Chris Wilson
@ 2018-01-15 21:24 ` Chris Wilson
  2018-01-15 22:04 ` ✓ Fi.CI.BAT: success for series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs Patchwork
                   ` (5 subsequent siblings)
  15 siblings, 0 replies; 47+ messages in thread
From: Chris Wilson @ 2018-01-15 21:24 UTC (permalink / raw)
  To: intel-gfx

The goal here is to try and reduce the latency of signaling additional
requests following the wakeup from interrupt by reducing the list of
to-be-signaled requests from an rbtree to a sorted linked list. The
original choice of using an rbtree was to facilitate random insertions
of request into the signaler while maintaining a sorted list. However,
if we assume that most new requests are added when they are submitted,
we see those new requests in execution order making a insertion sort
fast, and the reduction in overhead of each signaler iteration
significant.

Since commit 56299fb7d904 ("drm/i915: Signal first fence from irq handler
if complete"), we signal most fences directly from notify_ring() in the
interrupt handler greatly reducing the amount of work that actually
needs to be done by the signaler kthread. All the thread is then
required to do is operate as the bottom-half, cleaning up after the
interrupt handler and preparing the next waiter. This includes signaling
all later completed fences in a saturated system, but on a mostly idle
system we only have to rebuild the wait rbtree in time for the next
interrupt. With this de-emphasis of the signaler's role, we want to
rejig it's datastructures to reduce the amount of work we require to
both setup the signal tree and maintain it on every interrupt.

References: 56299fb7d904 ("drm/i915: Signal first fence from irq handler if complete")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.h  |   2 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 281 +++++++++++++------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h  |   4 +-
 3 files changed, 117 insertions(+), 170 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 6c607f8dbf92..a2dd3d71c0f8 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -44,8 +44,8 @@ struct intel_wait {
 };
 
 struct intel_signal_node {
-	struct rb_node node;
 	struct intel_wait wait;
+	struct list_head link;
 };
 
 struct i915_dependency {
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 7c82cfe23922..8a69bd69070f 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -350,7 +350,8 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
 	lockdep_assert_held(&b->rb_lock);
 	GEM_BUG_ON(b->irq_wait == wait);
 
-	/* This request is completed, so remove it from the tree, mark it as
+	/*
+	 * This request is completed, so remove it from the tree, mark it as
 	 * complete, and *then* wake up the associated task. N.B. when the
 	 * task wakes up, it will find the empty rb_node, discern that it
 	 * has already been removed from the tree and skip the serialisation
@@ -361,7 +362,8 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
 	rb_erase(&wait->node, &b->waiters);
 	RB_CLEAR_NODE(&wait->node);
 
-	wake_up_process(wait->tsk); /* implicit smp_wmb() */
+	if (wait->tsk->state != TASK_RUNNING)
+		wake_up_process(wait->tsk); /* implicit smp_wmb() */
 }
 
 static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine,
@@ -602,36 +604,6 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
 	spin_unlock_irq(&b->rb_lock);
 }
 
-static bool signal_valid(const struct drm_i915_gem_request *request)
-{
-	return intel_wait_check_request(&request->signaling.wait, request);
-}
-
-static bool signal_complete(const struct drm_i915_gem_request *request)
-{
-	if (!request)
-		return false;
-
-	/* If another process served as the bottom-half it may have already
-	 * signalled that this wait is already completed.
-	 */
-	if (intel_wait_complete(&request->signaling.wait))
-		return signal_valid(request);
-
-	/* Carefully check if the request is complete, giving time for the
-	 * seqno to be visible or if the GPU hung.
-	 */
-	if (__i915_request_irq_complete(request))
-		return true;
-
-	return false;
-}
-
-static struct drm_i915_gem_request *to_signaler(struct rb_node *rb)
-{
-	return rb_entry(rb, struct drm_i915_gem_request, signaling.node);
-}
-
 static void signaler_set_rtpriority(void)
 {
 	 struct sched_param param = { .sched_priority = 1 };
@@ -639,77 +611,25 @@ static void signaler_set_rtpriority(void)
 	 sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
 }
 
-static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
-					 struct drm_i915_gem_request *request)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-	lockdep_assert_held(&b->rb_lock);
-
-	/*
-	 * Wake up all other completed waiters and select the
-	 * next bottom-half for the next user interrupt.
-	 */
-	__intel_engine_remove_wait(engine, &request->signaling.wait);
-
-	/*
-	 * Find the next oldest signal. Note that as we have
-	 * not been holding the lock, another client may
-	 * have installed an even older signal than the one
-	 * we just completed - so double check we are still
-	 * the oldest before picking the next one.
-	 */
-	if (request->signaling.wait.seqno) {
-		if (request == rcu_access_pointer(b->first_signal)) {
-			struct rb_node *rb = rb_next(&request->signaling.node);
-			rcu_assign_pointer(b->first_signal,
-					   rb ? to_signaler(rb) : NULL);
-		}
-
-		rb_erase(&request->signaling.node, &b->signals);
-		request->signaling.wait.seqno = 0;
-	}
-}
-
-static struct drm_i915_gem_request *first_signal(struct intel_breadcrumbs *b)
-{
-	/*
-	 * See the big warnings for i915_gem_active_get_rcu() and similarly
-	 * for dma_fence_get_rcu_safe() that explain the intricacies involved
-	 * here with defeating CPU/compiler speculation and enforcing
-	 * the required memory barriers.
-	 */
-	do {
-		struct drm_i915_gem_request *request;
-
-		request = rcu_dereference(b->first_signal);
-		if (request)
-			request = i915_gem_request_get_rcu(request);
-
-		barrier();
-
-		if (!request || request == rcu_access_pointer(b->first_signal))
-			return rcu_pointer_handoff(request);
-
-		i915_gem_request_put(request);
-	} while (1);
-}
-
 static int intel_breadcrumbs_signaler(void *arg)
 {
 	struct intel_engine_cs *engine = arg;
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct drm_i915_gem_request *request;
+	struct drm_i915_gem_request *rq, *n;
 
 	/* Install ourselves with high priority to reduce signalling latency */
 	signaler_set_rtpriority();
 
 	do {
-		bool do_schedule = true;
+		LIST_HEAD(list);
+		u32 seqno;
 
 		set_current_state(TASK_INTERRUPTIBLE);
+		if (list_empty(&b->signals))
+			goto sleep;
 
-		/* We are either woken up by the interrupt bottom-half,
+		/*
+		 * We are either woken up by the interrupt bottom-half,
 		 * or by a client adding a new signaller. In both cases,
 		 * the GPU seqno may have advanced beyond our oldest signal.
 		 * If it has, propagate the signal, remove the waiter and
@@ -717,35 +637,60 @@ static int intel_breadcrumbs_signaler(void *arg)
 		 * need to wait for a new interrupt from the GPU or for
 		 * a new client.
 		 */
-		rcu_read_lock();
-		request = first_signal(b);
-		rcu_read_unlock();
-		if (signal_complete(request)) {
-			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
-				      &request->fence.flags)) {
-				local_bh_disable();
-				dma_fence_signal(&request->fence);
-				local_bh_enable(); /* kick start the tasklets */
+		seqno = intel_engine_get_seqno(engine);
+
+		spin_lock_irq(&b->rb_lock);
+		list_for_each_entry_safe(rq, n, &b->signals, signaling.link) {
+			u32 this = rq->signaling.wait.seqno;
+
+			GEM_BUG_ON(!rq->signaling.wait.seqno);
+
+			if (!i915_seqno_passed(seqno, this))
+				break;
+
+			if (this == i915_gem_request_global_seqno(rq)) {
+				__intel_engine_remove_wait(engine,
+							   &rq->signaling.wait);
+
+				rq->signaling.wait.seqno = 0;
+				__list_del_entry(&rq->signaling.link);
+
+				if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+					      &rq->fence.flags)) {
+					list_add_tail(&rq->signaling.link,
+						      &list);
+					i915_gem_request_get(rq);
+				}
 			}
+		}
+		spin_unlock_irq(&b->rb_lock);
 
-			if (request->signaling.wait.seqno) {
-				spin_lock_irq(&b->rb_lock);
-				__intel_engine_remove_signal(engine, request);
-				spin_unlock_irq(&b->rb_lock);
+		if (!list_empty(&list)) {
+			local_bh_disable();
+			list_for_each_entry_safe(rq, n, &list, signaling.link) {
+				dma_fence_signal(&rq->fence);
+				i915_gem_request_put(rq);
 			}
+			local_bh_enable(); /* kick start the tasklets */
+		}
 
-			/* If the engine is saturated we may be continually
-			 * processing completed requests. This angers the
-			 * NMI watchdog if we never let anything else
-			 * have access to the CPU. Let's pretend to be nice
-			 * and relinquish the CPU if we burn through the
-			 * entire RT timeslice!
-			 */
-			do_schedule = need_resched();
+		if (engine->irq_seqno_barrier &&
+		    test_and_clear_bit(ENGINE_IRQ_BREADCRUMB,
+				       &engine->irq_posted)) {
+			engine->irq_seqno_barrier(engine);
+			intel_engine_wakeup(engine);
 		}
-		i915_gem_request_put(request);
 
-		if (unlikely(do_schedule)) {
+		/*
+		 * If the engine is saturated we may be continually
+		 * processing completed requests. This angers the
+		 * NMI watchdog if we never let anything else
+		 * have access to the CPU. Let's pretend to be nice
+		 * and relinquish the CPU if we burn through the
+		 * entire RT timeslice!
+		 */
+		if (list_empty(&list) || need_resched()) {
+sleep:
 			if (kthread_should_park())
 				kthread_parkme();
 
@@ -760,6 +705,32 @@ static int intel_breadcrumbs_signaler(void *arg)
 	return 0;
 }
 
+static void insert_signal(struct intel_breadcrumbs *b,
+			  struct drm_i915_gem_request *request,
+			  const u32 seqno)
+{
+	struct drm_i915_gem_request *iter;
+
+	lockdep_assert_held(&b->rb_lock);
+
+	/*
+	 * The reasonable assumption is that we are called to add signals
+	 * in sequence, as the requests are submitted for execution and
+	 * assigned a global_seqno. This will be the case for the majority
+	 * of internally generated signals (inter-engine signaling).
+	 *
+	 * Out of order waiters triggering random signaling enabling will
+	 * be more problematic, but hopefully rare enough and the list
+	 * small enough that the O(N) insertion sort is not an issue.
+	 */
+
+	list_for_each_entry_reverse(iter, &b->signals, signaling.link)
+		if (i915_seqno_passed(seqno, iter->signaling.wait.seqno))
+			break;
+
+	list_add(&request->signaling.link, &iter->signaling.link);
+}
+
 void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 				   bool wakeup)
 {
@@ -767,7 +738,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 	u32 seqno;
 
-	/* Note that we may be called from an interrupt handler on another
+	/*
+	 * Note that we may be called from an interrupt handler on another
 	 * device (e.g. nouveau signaling a fence completion causing us
 	 * to submit a request, and so enable signaling). As such,
 	 * we need to make sure that all other users of b->rb_lock protect
@@ -779,17 +751,16 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 	lockdep_assert_held(&request->lock);
 
 	seqno = i915_gem_request_global_seqno(request);
-	if (!seqno)
+	if (!seqno) /* will be enabled later upon execution */
 		return;
 
-	spin_lock(&b->rb_lock);
-
 	GEM_BUG_ON(request->signaling.wait.seqno);
 	request->signaling.wait.tsk = b->signaler;
 	request->signaling.wait.request = request;
 	request->signaling.wait.seqno = seqno;
 
-	/* First add ourselves into the list of waiters, but register our
+	/*
+	 * Add ourselves into the list of waiters, but registering our
 	 * bottom-half as the signaller thread. As per usual, only the oldest
 	 * waiter (not just signaller) is tasked as the bottom-half waking
 	 * up all completed waiters after the user interrupt.
@@ -797,39 +768,9 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 	 * If we are the oldest waiter, enable the irq (after which we
 	 * must double check that the seqno did not complete).
 	 */
+	spin_lock(&b->rb_lock);
+	insert_signal(b, request, seqno);
 	wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
-
-	if (!__i915_gem_request_completed(request, seqno)) {
-		struct rb_node *parent, **p;
-		bool first;
-
-		/* Now insert ourselves into the retirement ordered list of
-		 * signals on this engine. We track the oldest seqno as that
-		 * will be the first signal to complete.
-		 */
-		parent = NULL;
-		first = true;
-		p = &b->signals.rb_node;
-		while (*p) {
-			parent = *p;
-			if (i915_seqno_passed(seqno,
-					      to_signaler(parent)->signaling.wait.seqno)) {
-				p = &parent->rb_right;
-				first = false;
-			} else {
-				p = &parent->rb_left;
-			}
-		}
-		rb_link_node(&request->signaling.node, parent, p);
-		rb_insert_color(&request->signaling.node, &b->signals);
-		if (first)
-			rcu_assign_pointer(b->first_signal, request);
-	} else {
-		__intel_engine_remove_wait(engine, &request->signaling.wait);
-		request->signaling.wait.seqno = 0;
-		wakeup = false;
-	}
-
 	spin_unlock(&b->rb_lock);
 
 	if (wakeup)
@@ -838,17 +779,20 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 
 void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
 {
+	struct intel_engine_cs *engine = request->engine;
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
 	GEM_BUG_ON(!irqs_disabled());
 	lockdep_assert_held(&request->lock);
 
-	if (request->signaling.wait.seqno) {
-		struct intel_engine_cs *engine = request->engine;
-		struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	if (!request->signaling.wait.seqno)
+		return;
 
-		spin_lock(&b->rb_lock);
-		__intel_engine_remove_signal(engine, request);
-		spin_unlock(&b->rb_lock);
-	}
+	spin_lock(&b->rb_lock);
+	__intel_engine_remove_wait(engine, &request->signaling.wait);
+	if (fetch_and_zero(&request->signaling.wait.seqno))
+		__list_del_entry(&request->signaling.link);
+	spin_unlock(&b->rb_lock);
 }
 
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
@@ -862,6 +806,8 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
 	timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0);
 	timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0);
 
+	INIT_LIST_HEAD(&b->signals);
+
 	/* Spawn a thread to provide a common bottom-half for all signals.
 	 * As this is an asynchronous interface we cannot steal the current
 	 * task for handling the bottom-half to the user interrupt, therefore
@@ -921,8 +867,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 	/* The engines should be idle and all requests accounted for! */
 	WARN_ON(READ_ONCE(b->irq_wait));
 	WARN_ON(!RB_EMPTY_ROOT(&b->waiters));
-	WARN_ON(rcu_access_pointer(b->first_signal));
-	WARN_ON(!RB_EMPTY_ROOT(&b->signals));
+	WARN_ON(!list_empty(&b->signals));
 
 	if (!IS_ERR_OR_NULL(b->signaler))
 		kthread_stop(b->signaler);
@@ -935,20 +880,22 @@ bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 	bool busy = false;
 
-	spin_lock_irq(&b->rb_lock);
-
 	if (b->irq_wait) {
-		wake_up_process(b->irq_wait->tsk);
-		busy = true;
+		spin_lock_irq(&b->irq_lock);
+
+		if (b->irq_wait) {
+			wake_up_process(b->irq_wait->tsk);
+			busy = true;
+		}
+
+		spin_unlock_irq(&b->irq_lock);
 	}
 
-	if (rcu_access_pointer(b->first_signal)) {
+	if (!busy && !list_empty(&b->signals)) {
 		wake_up_process(b->signaler);
 		busy = true;
 	}
 
-	spin_unlock_irq(&b->rb_lock);
-
 	return busy;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index cea2092d25d9..fedf298de036 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -331,9 +331,9 @@ struct intel_engine_cs {
 
 		spinlock_t rb_lock; /* protects the rb and wraps irq_lock */
 		struct rb_root waiters; /* sorted by retirement, priority */
-		struct rb_root signals; /* sorted by retirement */
+		struct list_head signals; /* sorted by retirement */
 		struct task_struct *signaler; /* used for fence signalling */
-		struct drm_i915_gem_request __rcu *first_signal;
+
 		struct timer_list fake_irq; /* used after a missed interrupt */
 		struct timer_list hangcheck; /* detect missed interrupts */
 
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 47+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs
  2018-01-15 21:24 Prevent trivial oom from gem_exec_nop/sequential Chris Wilson
                   ` (9 preceding siblings ...)
  2018-01-15 21:24 ` [PATCH 10/10] drm/i915/breadcrumbs: Reduce signaler rbtree to a sorted list Chris Wilson
@ 2018-01-15 22:04 ` Patchwork
  2018-01-16  9:21 ` ✓ Fi.CI.IGT: " Patchwork
                   ` (4 subsequent siblings)
  15 siblings, 0 replies; 47+ messages in thread
From: Patchwork @ 2018-01-15 22:04 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs
URL   : https://patchwork.freedesktop.org/series/36501/
State : success

== Summary ==

Series 36501v1 series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs
https://patchwork.freedesktop.org/api/1.0/series/36501/revisions/1/mbox/

Test debugfs_test:
        Subgroup read_all_entries:
                incomplete -> PASS       (fi-snb-2520m) fdo#103713
Test gem_mmap_gtt:
        Subgroup basic-small-bo-tiledx:
                fail       -> PASS       (fi-gdg-551) fdo#102575
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-a:
                pass       -> DMESG-WARN (fi-kbl-r) fdo#104172 +1

fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
fdo#104172 https://bugs.freedesktop.org/show_bug.cgi?id=104172

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:421s
fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:422s
fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:373s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:493s
fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:281s
fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:483s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:487s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:466s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:462s
fi-elk-e7500     total:224  pass:168  dwarn:10  dfail:0   fail:0   skip:45 
fi-gdg-551       total:288  pass:180  dwarn:0   dfail:0   fail:0   skip:108 time:278s
fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:514s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:392s
fi-hsw-4770r     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:398s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:449s
fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:418s
fi-kbl-7500u     total:288  pass:263  dwarn:1   dfail:0   fail:0   skip:24  time:460s
fi-kbl-7560u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:495s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:453s
fi-kbl-r         total:288  pass:260  dwarn:1   dfail:0   fail:0   skip:27  time:503s
fi-pnv-d510      total:288  pass:222  dwarn:1   dfail:0   fail:0   skip:65  time:581s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:433s
fi-skl-6600u     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:513s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:530s
fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:491s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:506s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:430s
fi-snb-2520m     total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:522s
fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:394s
Blacklisted hosts:
fi-cfl-s2        total:108  pass:96   dwarn:0   dfail:0   fail:0   skip:11 

70efeb2fa45da6f29e43893bb014b3c269e84e7b drm-tip: 2018y-01m-15d-21h-15m-57s UTC integration manifest
0454ff91e41d drm/i915/breadcrumbs: Reduce signaler rbtree to a sorted list
040bcda06ffd drm/i915: Only signal from interrupt when requested
d4acba3e5db1 drm/i915: Move the irq_counter inside the spinlock
e522ee9c8c44 drm/i915: Reduce spinlock hold time during notify_ring() interrupt
c6320e8e607f drm/i915/breadcrumbs: Drop request reference for the signaler thread
2d3aa6fda366 drm/i915: Trim the retired request queue after submitting
d11ff2987aea drm/i915: Shrink the request kmem_cache on allocation error
f19add3a5592 drm/i915: Shrink the GEM kmem_caches upon idling
2f2b2617005b drm/i915: Move i915_gem_retire_work_handler
20e30f74b8be drm/i915: Only attempt to scan the requested number of shrinker slabs

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7678/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* ✓ Fi.CI.IGT: success for series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs
  2018-01-15 21:24 Prevent trivial oom from gem_exec_nop/sequential Chris Wilson
                   ` (10 preceding siblings ...)
  2018-01-15 22:04 ` ✓ Fi.CI.BAT: success for series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs Patchwork
@ 2018-01-16  9:21 ` Patchwork
  2018-01-16  9:52 ` Prevent trivial oom from gem_exec_nop/sequential Tvrtko Ursulin
                   ` (3 subsequent siblings)
  15 siblings, 0 replies; 47+ messages in thread
From: Patchwork @ 2018-01-16  9:21 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs
URL   : https://patchwork.freedesktop.org/series/36501/
State : success

== Summary ==

Test kms_frontbuffer_tracking:
        Subgroup fbc-rgb565-draw-render:
                fail       -> SKIP       (shard-snb) fdo#103167
Test kms_flip:
        Subgroup plain-flip-ts-check-interruptible:
                fail       -> PASS       (shard-hsw) fdo#100368
        Subgroup wf_vblank-vs-dpms-interruptible:
                pass       -> DMESG-WARN (shard-hsw) fdo#102614
        Subgroup vblank-vs-modeset-suspend:
                skip       -> PASS       (shard-snb) fdo#102365

fdo#103167 https://bugs.freedesktop.org/show_bug.cgi?id=103167
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#102614 https://bugs.freedesktop.org/show_bug.cgi?id=102614
fdo#102365 https://bugs.freedesktop.org/show_bug.cgi?id=102365

shard-hsw        total:2729 pass:1548 dwarn:2   dfail:0   fail:10  skip:1169 time:9087s
shard-snb        total:2729 pass:1318 dwarn:1   dfail:0   fail:11  skip:1399 time:7965s
Blacklisted hosts:
shard-apl        total:2707 pass:1676 dwarn:1   dfail:0   fail:22  skip:1006 time:12409s
shard-kbl        total:2729 pass:1823 dwarn:3   dfail:0   fail:24  skip:879 time:10588s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7678/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Prevent trivial oom from gem_exec_nop/sequential
  2018-01-15 21:24 Prevent trivial oom from gem_exec_nop/sequential Chris Wilson
                   ` (11 preceding siblings ...)
  2018-01-16  9:21 ` ✓ Fi.CI.IGT: " Patchwork
@ 2018-01-16  9:52 ` Tvrtko Ursulin
  2018-01-16 10:02   ` Chris Wilson
  2018-01-16 13:10   ` Chris Wilson
  2018-01-16 13:42 ` ✓ Fi.CI.BAT: success for series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs (rev3) Patchwork
                   ` (2 subsequent siblings)
  15 siblings, 2 replies; 47+ messages in thread
From: Tvrtko Ursulin @ 2018-01-16  9:52 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 15/01/2018 21:24, Chris Wilson wrote:
> About the third resend of this series that tries to curtail the prolonged
> reference lifetimes of fences via the signaler thread when the CPUs are
> saturated by interrupts.

Happens on CI, regularly, occasionally? In the real-world it shouldnt 
right? Trying to prioritize, spotted the last patch in the series 
rewriting breadcrumbs so grumble. :)

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 03/10] drm/i915: Shrink the GEM kmem_caches upon idling
  2018-01-15 21:24 ` [PATCH 03/10] drm/i915: Shrink the GEM kmem_caches upon idling Chris Wilson
@ 2018-01-16 10:00   ` Tvrtko Ursulin
  2018-01-16 10:19     ` Chris Wilson
  2018-01-16 13:05     ` [PATCH v2] " Chris Wilson
  0 siblings, 2 replies; 47+ messages in thread
From: Tvrtko Ursulin @ 2018-01-16 10:00 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 15/01/2018 21:24, Chris Wilson wrote:
> When we finally decide the gpu is idle, that is a good time to shrink
> our kmem_caches.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem.c | 22 ++++++++++++++++++++++
>   1 file changed, 22 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a8840a514377..8547f5214599 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4709,6 +4709,21 @@ i915_gem_retire_work_handler(struct work_struct *work)
>   	}
>   }
>   
> +static void shrink_caches(struct drm_i915_private *i915)
> +{
> +	/*
> +	 * kmem_cache_shrink() discards empty slabs and reorders partially
> +	 * filled slabs to prioritise allocating from the mostly full slabs,
> +	 * with the aim of reducing fragmentation.
> +	 */

This makes it sound like it would be a very good thing in general.

> +	kmem_cache_shrink(i915->priorities);
> +	kmem_cache_shrink(i915->dependencies);
> +	kmem_cache_shrink(i915->requests);
> +	kmem_cache_shrink(i915->luts);
> +	kmem_cache_shrink(i915->vmas);
> +	kmem_cache_shrink(i915->objects);
> +}
> +
>   static inline bool
>   new_requests_since_last_retire(const struct drm_i915_private *i915)
>   {
> @@ -4796,6 +4811,13 @@ i915_gem_idle_work_handler(struct work_struct *work)
>   		GEM_BUG_ON(!dev_priv->gt.awake);
>   		i915_queue_hangcheck(dev_priv);
>   	}
> +
> +	rcu_barrier();

Ugh, more sprinkled around complexity we add the more difficult it 
becomes to maintain the code base for mere mortals. At the very minimum 
a comment is needed here.

What about activity other than requests? Like active mmap_gtt, that 
might at least create busyness on the vma and object caches and is not 
correlated to idle work handler firing.

Regards,

Tvrtko

> +
> +	if (!new_requests_since_last_retire(dev_priv)) {
> +		__i915_gem_free_work(&dev_priv->mm.free_work);
> +		shrink_caches(dev_priv);
> +	}
>   }
>   
>   int i915_gem_suspend(struct drm_i915_private *dev_priv)
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: Prevent trivial oom from gem_exec_nop/sequential
  2018-01-16  9:52 ` Prevent trivial oom from gem_exec_nop/sequential Tvrtko Ursulin
@ 2018-01-16 10:02   ` Chris Wilson
  2018-01-16 13:10   ` Chris Wilson
  1 sibling, 0 replies; 47+ messages in thread
From: Chris Wilson @ 2018-01-16 10:02 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-01-16 09:52:10)
> 
> On 15/01/2018 21:24, Chris Wilson wrote:
> > About the third resend of this series that tries to curtail the prolonged
> > reference lifetimes of fences via the signaler thread when the CPUs are
> > saturated by interrupts.
> 
> Happens on CI, regularly, occasionally?

It would if CI ran the longer running tests that expose the problems.
Even at nop rates it still takes 30s or so to exhaust memory on a small
device (obviously highly dependent on individual configurations and
device timings).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 04/10] drm/i915: Shrink the request kmem_cache on allocation error
  2018-01-15 21:24 ` [PATCH 04/10] drm/i915: Shrink the request kmem_cache on allocation error Chris Wilson
@ 2018-01-16 10:10   ` Tvrtko Ursulin
  2018-01-16 10:26     ` Chris Wilson
  2018-01-16 13:15     ` [PATCH v2] " Chris Wilson
  0 siblings, 2 replies; 47+ messages in thread
From: Tvrtko Ursulin @ 2018-01-16 10:10 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 15/01/2018 21:24, Chris Wilson wrote:
> If we fail to allocate a new request, make sure we recover the pages
> that are in the process of being freed by inserting an RCU barrier.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_request.c | 3 +++
>   1 file changed, 3 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 72bdc203716f..e6d4857b1f78 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -696,6 +696,9 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>   		if (ret)
>   			goto err_unreserve;
>   
> +		kmem_cache_shrink(dev_priv->requests);

Hm, the one in idle work handler is not enough? Or from another angle, 
the kmem_cache_alloc below won't work hard enough to allocate something 
regardless?

> +		rcu_barrier();

This one is because req cache is RCU? But doesn't that mean freed 
requests are immediately available as per:

static void i915_fence_release(struct dma_fence *fence)
{
	struct drm_i915_gem_request *req = to_request(fence);

	/* The request is put onto a RCU freelist (i.e. the address
	 * is immediately reused), mark the fences as being freed now.
	 * Otherwise the debugobjects for the fences are only marked as
	 * freed when the slab cache itself is freed, and so we would get
	 * caught trying to reuse dead objects.
	 */

Regards,

Tvrtko

> +
>   		req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
>   		if (!req) {
>   			ret = -ENOMEM;
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 05/10] drm/i915: Trim the retired request queue after submitting
  2018-01-15 21:24 ` [PATCH 05/10] drm/i915: Trim the retired request queue after submitting Chris Wilson
@ 2018-01-16 10:18   ` Tvrtko Ursulin
  2018-01-16 10:32     ` Chris Wilson
  2018-01-16 13:30     ` [PATCH v2] " Chris Wilson
  0 siblings, 2 replies; 47+ messages in thread
From: Tvrtko Ursulin @ 2018-01-16 10:18 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 15/01/2018 21:24, Chris Wilson wrote:
> If we submit a request and see that the previous request on this
> timeline was already signaled, we first do not need to add the
> dependency tracker for that completed request and secondly we know that
> we there is then a large backlog in retiring requests affecting this
> timeline. Given that we just submitted more work to the HW, now would be
> a good time to catch up on those retirements.

How can we be sure there is a large backlog? It may just be that the 
submission frequency combined with request duration is just right to 
always see even a solitary previous completed request, no?

> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_request.c | 5 ++++-
>   1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index e6d4857b1f78..6a143099cea1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -1019,7 +1019,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
>   
>   	prev = i915_gem_active_raw(&timeline->last_request,
>   				   &request->i915->drm.struct_mutex);
> -	if (prev) {
> +	if (prev && !i915_gem_request_completed(prev)) {
>   		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
>   					     &request->submitq);

This makes sense.

>   		if (engine->schedule)
> @@ -1055,6 +1055,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
>   	local_bh_disable();
>   	i915_sw_fence_commit(&request->submit);
>   	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
> +
> +	if (prev && i915_gem_request_completed(prev))
> +		i915_gem_request_retire_upto(prev);

And here I'm a bit surprised that you want to penalize the submission 
path with house-keeping - assuming cases when there really is a big 
backlog of completed requests. But since it is after the tasklet 
kicking, I suppose the effect on submission latency is somewhat 
mediated. Unless the caller wants to submit many requests rapidly. Hm.. 
retire at execbuf time seems to be coming in and out, albeit in a more 
controlled fashion with this.

Regards,

Tvrtko

>   }
>   
>   static unsigned long local_clock_us(unsigned int *cpu)
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 03/10] drm/i915: Shrink the GEM kmem_caches upon idling
  2018-01-16 10:00   ` Tvrtko Ursulin
@ 2018-01-16 10:19     ` Chris Wilson
  2018-01-16 13:05     ` [PATCH v2] " Chris Wilson
  1 sibling, 0 replies; 47+ messages in thread
From: Chris Wilson @ 2018-01-16 10:19 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-01-16 10:00:16)
> 
> On 15/01/2018 21:24, Chris Wilson wrote:
> > When we finally decide the gpu is idle, that is a good time to shrink
> > our kmem_caches.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_gem.c | 22 ++++++++++++++++++++++
> >   1 file changed, 22 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index a8840a514377..8547f5214599 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -4709,6 +4709,21 @@ i915_gem_retire_work_handler(struct work_struct *work)
> >       }
> >   }
> >   
> > +static void shrink_caches(struct drm_i915_private *i915)
> > +{
> > +     /*
> > +      * kmem_cache_shrink() discards empty slabs and reorders partially
> > +      * filled slabs to prioritise allocating from the mostly full slabs,
> > +      * with the aim of reducing fragmentation.
> > +      */
> 
> This makes it sound like it would be a very good thing in general.
> 
> > +     kmem_cache_shrink(i915->priorities);
> > +     kmem_cache_shrink(i915->dependencies);
> > +     kmem_cache_shrink(i915->requests);
> > +     kmem_cache_shrink(i915->luts);
> > +     kmem_cache_shrink(i915->vmas);
> > +     kmem_cache_shrink(i915->objects);
> > +}
> > +
> >   static inline bool
> >   new_requests_since_last_retire(const struct drm_i915_private *i915)
> >   {
> > @@ -4796,6 +4811,13 @@ i915_gem_idle_work_handler(struct work_struct *work)
> >               GEM_BUG_ON(!dev_priv->gt.awake);
> >               i915_queue_hangcheck(dev_priv);
> >       }
> > +
> > +     rcu_barrier();
> 
> Ugh, more sprinkled around complexity we add the more difficult it 
> becomes to maintain the code base for mere mortals. At the very minimum 
> a comment is needed here.

This one is because some of our kmem caches (i.e. requests) are special
and use TYPESAFE_BY_RCU which means we don't release the pages until
after a RCU grace period. This is just to encourage that we have a grace
period between each idle event. Though it looks a sensible to tie in a
grace period around kmem_cache_shrink, it really only takes effect after
so this is just to ensure the pages we used last time are given back.
 
> What about activity other than requests? Like active mmap_gtt, that 
> might at least create busyness on the vma and object caches and is not 
> correlated to idle work handler firing.

We only have a single periodic ticker atm... Patches to add a similar ticker
for GTT mmaps to coordinate with rpm etc lack some love.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 04/10] drm/i915: Shrink the request kmem_cache on allocation error
  2018-01-16 10:10   ` Tvrtko Ursulin
@ 2018-01-16 10:26     ` Chris Wilson
  2018-01-16 13:15     ` [PATCH v2] " Chris Wilson
  1 sibling, 0 replies; 47+ messages in thread
From: Chris Wilson @ 2018-01-16 10:26 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-01-16 10:10:28)
> 
> On 15/01/2018 21:24, Chris Wilson wrote:
> > If we fail to allocate a new request, make sure we recover the pages
> > that are in the process of being freed by inserting an RCU barrier.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_gem_request.c | 3 +++
> >   1 file changed, 3 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> > index 72bdc203716f..e6d4857b1f78 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_request.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> > @@ -696,6 +696,9 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
> >               if (ret)
> >                       goto err_unreserve;
> >   
> > +             kmem_cache_shrink(dev_priv->requests);
> 
> Hm, the one in idle work handler is not enough? Or from another angle, 
> the kmem_cache_alloc below won't work hard enough to allocate something 
> regardless?

No, this path here is solely to penalize overallocation via requests.
(It may be overzealous, but if the system is under such severe
mempressure that we can't allocate a page for ourselves, it makes little
difference where the stall and oom comes from.) A large part of this is
because the core mm can't reclaim from RCU deferred frees, which is
nasty problem requiring all heavy RCU consumers to implement rate
limiting themselves.
 
> > +             rcu_barrier();
> 
> This one is because req cache is RCU? But doesn't that mean freed 
> requests are immediately available as per:

Requests are immediately available, but we are simply attempting to recover
memory for the system from the request cache. (Fences make good neighbours)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 05/10] drm/i915: Trim the retired request queue after submitting
  2018-01-16 10:18   ` Tvrtko Ursulin
@ 2018-01-16 10:32     ` Chris Wilson
  2018-01-17 10:23       ` Tvrtko Ursulin
  2018-01-16 13:30     ` [PATCH v2] " Chris Wilson
  1 sibling, 1 reply; 47+ messages in thread
From: Chris Wilson @ 2018-01-16 10:32 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-01-16 10:18:55)
> 
> On 15/01/2018 21:24, Chris Wilson wrote:
> > If we submit a request and see that the previous request on this
> > timeline was already signaled, we first do not need to add the
> > dependency tracker for that completed request and secondly we know that
> > we there is then a large backlog in retiring requests affecting this
> > timeline. Given that we just submitted more work to the HW, now would be
> > a good time to catch up on those retirements.
> 
> How can we be sure there is a large backlog? It may just be that the 
> submission frequency combined with request duration is just right to 
> always see even a solitary previous completed request, no?

We always try and retire one old request per new request. To get to the
point where we see an unretired completed fence here implies that we are
allocating faster than retiring, and so have a backlog.
> 
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_gem_request.c | 5 ++++-
> >   1 file changed, 4 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> > index e6d4857b1f78..6a143099cea1 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_request.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> > @@ -1019,7 +1019,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
> >   
> >       prev = i915_gem_active_raw(&timeline->last_request,
> >                                  &request->i915->drm.struct_mutex);
> > -     if (prev) {
> > +     if (prev && !i915_gem_request_completed(prev)) {
> >               i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
> >                                            &request->submitq);
> 
> This makes sense.
> 
> >               if (engine->schedule)
> > @@ -1055,6 +1055,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
> >       local_bh_disable();
> >       i915_sw_fence_commit(&request->submit);
> >       local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
> > +
> > +     if (prev && i915_gem_request_completed(prev))
> > +             i915_gem_request_retire_upto(prev);
> 
> And here I'm a bit surprised that you want to penalize the submission 
> path with house-keeping - assuming cases when there really is a big 
> backlog of completed requests. But since it is after the tasklet 
> kicking, I suppose the effect on submission latency is somewhat 
> mediated. Unless the caller wants to submit many requests rapidly. Hm.. 
> retire at execbuf time seems to be coming in and out, albeit in a more 
> controlled fashion with this.

I was surprised myself ;) What I considered the next step here is to
limit the retirements to the client's timeline to avoid having to do
work for others. It's that this comes after the submission of the next
request so we have a few microseconds at least to play with that makes
it seem less obnoxious to me. Plus that it's so unlikely to happen, that
to me suggests that we have fallen fall behind in our alloc/retire
equilibrium that a catch up is justified. And most of the heavy work has
been move from request retirement onto kthreads (object release, context
release etc).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* [PATCH v2] drm/i915: Shrink the GEM kmem_caches upon idling
  2018-01-16 10:00   ` Tvrtko Ursulin
  2018-01-16 10:19     ` Chris Wilson
@ 2018-01-16 13:05     ` Chris Wilson
  2018-01-16 15:12       ` Tvrtko Ursulin
  1 sibling, 1 reply; 47+ messages in thread
From: Chris Wilson @ 2018-01-16 13:05 UTC (permalink / raw)
  To: intel-gfx

When we finally decide the gpu is idle, that is a good time to shrink
our kmem_caches.

v2: Comment upon the random sprinkling of rcu_barrier() inside the idle
worker.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 335731c93b4a..61b13fdfaa71 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4716,6 +4716,21 @@ i915_gem_retire_work_handler(struct work_struct *work)
 	}
 }
 
+static void shrink_caches(struct drm_i915_private *i915)
+{
+	/*
+	 * kmem_cache_shrink() discards empty slabs and reorders partially
+	 * filled slabs to prioritise allocating from the mostly full slabs,
+	 * with the aim of reducing fragmentation.
+	 */
+	kmem_cache_shrink(i915->priorities);
+	kmem_cache_shrink(i915->dependencies);
+	kmem_cache_shrink(i915->requests);
+	kmem_cache_shrink(i915->luts);
+	kmem_cache_shrink(i915->vmas);
+	kmem_cache_shrink(i915->objects);
+}
+
 static inline bool
 new_requests_since_last_retire(const struct drm_i915_private *i915)
 {
@@ -4803,6 +4818,21 @@ i915_gem_idle_work_handler(struct work_struct *work)
 		GEM_BUG_ON(!dev_priv->gt.awake);
 		i915_queue_hangcheck(dev_priv);
 	}
+
+	/*
+	 * We use magical TYPESAFE_BY_RCU kmem_caches whose pages are not
+	 * returned to the system imediately but only after an RCU grace
+	 * period. We want to encourage such pages to be returned and so
+	 * incorporate a RCU barrier here to provide some rate limiting
+	 * of the driver and flush the old pages before we free a new batch
+	 * from the next round of shrinking.
+	 */
+	rcu_barrier();
+
+	if (!new_requests_since_last_retire(dev_priv)) {
+		__i915_gem_free_work(&dev_priv->mm.free_work);
+		shrink_caches(dev_priv);
+	}
 }
 
 int i915_gem_suspend(struct drm_i915_private *dev_priv)
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 47+ messages in thread

* Re: Prevent trivial oom from gem_exec_nop/sequential
  2018-01-16  9:52 ` Prevent trivial oom from gem_exec_nop/sequential Tvrtko Ursulin
  2018-01-16 10:02   ` Chris Wilson
@ 2018-01-16 13:10   ` Chris Wilson
  1 sibling, 0 replies; 47+ messages in thread
From: Chris Wilson @ 2018-01-16 13:10 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-01-16 09:52:10)
> 
> On 15/01/2018 21:24, Chris Wilson wrote:
> > About the third resend of this series that tries to curtail the prolonged
> > reference lifetimes of fences via the signaler thread when the CPUs are
> > saturated by interrupts.
> 
> Happens on CI, regularly, occasionally? In the real-world it shouldnt 
> right? Trying to prioritize, spotted the last patch in the series 
> rewriting breadcrumbs so grumble. :)

There's a very minor performance improvement overall, plus closing an
easy to trigger DoS. That style of DoS pops up everywhere we have
references to requests (with so many fence contexts floating around,
it's very easy to manipulate), so getting a grip on it is very useful.

The gem_exec_nop/sequential one was a little more surprising because the
lifetime there wasn't ostensibly extended, it was just the result of a
huge backlog for the RT thread!
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* [PATCH v2] drm/i915: Shrink the request kmem_cache on allocation error
  2018-01-16 10:10   ` Tvrtko Ursulin
  2018-01-16 10:26     ` Chris Wilson
@ 2018-01-16 13:15     ` Chris Wilson
  2018-01-16 15:19       ` Tvrtko Ursulin
  1 sibling, 1 reply; 47+ messages in thread
From: Chris Wilson @ 2018-01-16 13:15 UTC (permalink / raw)
  To: intel-gfx

If we fail to allocate a new request, make sure we recover the pages
that are in the process of being freed by inserting an RCU barrier.

v2: Comment before the shrink and barrier in the error path.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 72bdc203716f..a0f451b4a4e8 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -696,6 +696,17 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		if (ret)
 			goto err_unreserve;
 
+		/*
+		 * We've forced the client to stall and catch up with whatever
+		 * backlog there might have been. As we are assuming that we
+		 * caused the mempressure, now is an opportune time to
+		 * recover as much memory from the request pool as is possible.
+		 * Having already penalized the client to stall, we spend
+		 * a little extra time to re-optimise page allocation.
+		 */
+		kmem_cache_shrink(dev_priv->requests);
+		rcu_barrier(); /* Recover the TYPESAFE_BY_RCU pages */
+
 		req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
 		if (!req) {
 			ret = -ENOMEM;
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH v2] drm/i915: Trim the retired request queue after submitting
  2018-01-16 10:18   ` Tvrtko Ursulin
  2018-01-16 10:32     ` Chris Wilson
@ 2018-01-16 13:30     ` Chris Wilson
  1 sibling, 0 replies; 47+ messages in thread
From: Chris Wilson @ 2018-01-16 13:30 UTC (permalink / raw)
  To: intel-gfx

If we submit a request and see that the previous request on this
timeline was already signaled, we first do not need to add the
dependency tracker for that completed request and secondly we know that
we there is then a large backlog in retiring requests affecting this
timeline. Given that we just submitted more work to the HW, now would be
a good time to catch up on those retirements.

v2: Try to sum up the compromises involved in flushing the retirement
queue after submission.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 34 ++++++++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index a0f451b4a4e8..77c2eba490e9 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -983,7 +983,8 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	lockdep_assert_held(&request->i915->drm.struct_mutex);
 	trace_i915_gem_request_add(request);
 
-	/* Make sure that no request gazumped us - if it was allocated after
+	/*
+	 * Make sure that no request gazumped us - if it was allocated after
 	 * our i915_gem_request_alloc() and called __i915_add_request() before
 	 * us, the timeline will hold its seqno which is later than ours.
 	 */
@@ -1010,7 +1011,8 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 		WARN(err, "engine->emit_flush() failed: %d!\n", err);
 	}
 
-	/* Record the position of the start of the breadcrumb so that
+	/*
+	 * Record the position of the start of the breadcrumb so that
 	 * should we detect the updated seqno part-way through the
 	 * GPU processing the request, we never over-estimate the
 	 * position of the ring's HEAD.
@@ -1019,7 +1021,8 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	GEM_BUG_ON(IS_ERR(cs));
 	request->postfix = intel_ring_offset(request, cs);
 
-	/* Seal the request and mark it as pending execution. Note that
+	/*
+	 * Seal the request and mark it as pending execution. Note that
 	 * we may inspect this state, without holding any locks, during
 	 * hangcheck. Hence we apply the barrier to ensure that we do not
 	 * see a more recent value in the hws than we are tracking.
@@ -1027,7 +1030,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 
 	prev = i915_gem_active_raw(&timeline->last_request,
 				   &request->i915->drm.struct_mutex);
-	if (prev) {
+	if (prev && !i915_gem_request_completed(prev)) {
 		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
 					     &request->submitq);
 		if (engine->schedule)
@@ -1047,7 +1050,8 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	list_add_tail(&request->ring_link, &ring->request_list);
 	request->emitted_jiffies = jiffies;
 
-	/* Let the backend know a new request has arrived that may need
+	/*
+	 * Let the backend know a new request has arrived that may need
 	 * to adjust the existing execution schedule due to a high priority
 	 * request - i.e. we may want to preempt the current request in order
 	 * to run a high priority dependency chain *before* we can execute this
@@ -1063,6 +1067,26 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	local_bh_disable();
 	i915_sw_fence_commit(&request->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
+
+	/*
+	 * In typical scenarios, we do not expect the previous request on
+	 * the timeline to be still tracked by timeline->last_request if it
+	 * has been completed. If the completed request is still here, that
+	 * implies that request retirement is a long way behind submission,
+	 * suggesting that we haven't been retiring frequently enough from
+	 * the combination of retire-before-alloc, waiters and the background
+	 * retirement worker. So if the last request on this timeline was
+	 * already completed, do a catch up pass, flushing the retirement queue
+	 * up to this client. Since we have now moved the heaviest operations
+	 * during retirement onto secondary workers, such as freeing objects
+	 * or contexts, retiring a bunch of requests is mostly list management
+	 * (and cache misses), and so we should not be overly penalizing this
+	 * client by performing excess work, though we may still performing
+	 * work on behalf of others -- but instead we should benefit from
+	 * improved resource management. (Well, that's the theory at least.)
+	 */
+	if (prev && i915_gem_request_completed(prev))
+		i915_gem_request_retire_upto(prev);
 }
 
 static unsigned long local_clock_us(unsigned int *cpu)
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 47+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs (rev3)
  2018-01-15 21:24 Prevent trivial oom from gem_exec_nop/sequential Chris Wilson
                   ` (12 preceding siblings ...)
  2018-01-16  9:52 ` Prevent trivial oom from gem_exec_nop/sequential Tvrtko Ursulin
@ 2018-01-16 13:42 ` Patchwork
  2018-01-16 14:02 ` ✓ Fi.CI.BAT: success for series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs (rev4) Patchwork
  2018-01-16 15:29 ` ✓ Fi.CI.IGT: " Patchwork
  15 siblings, 0 replies; 47+ messages in thread
From: Patchwork @ 2018-01-16 13:42 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs (rev3)
URL   : https://patchwork.freedesktop.org/series/36501/
State : success

== Summary ==

Series 36501v3 series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs
https://patchwork.freedesktop.org/api/1.0/series/36501/revisions/3/mbox/

Test kms_frontbuffer_tracking:
        Subgroup basic:
                incomplete -> SKIP       (fi-elk-e7500) fdo#103989
Test kms_pipe_crc_basic:
        Subgroup hang-read-crc-pipe-b:
                notrun     -> INCOMPLETE (fi-elk-e7500)
        Subgroup suspend-read-crc-pipe-a:
                pass       -> DMESG-WARN (fi-kbl-r) fdo#104172 +1

fdo#103989 https://bugs.freedesktop.org/show_bug.cgi?id=103989
fdo#104172 https://bugs.freedesktop.org/show_bug.cgi?id=104172

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:423s
fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:428s
fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:377s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:482s
fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:281s
fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:481s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:485s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:465s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:453s
fi-elk-e7500     total:230  pass:172  dwarn:10  dfail:1   fail:0   skip:46 
fi-gdg-551       total:288  pass:179  dwarn:0   dfail:0   fail:1   skip:108 time:275s
fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:510s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:393s
fi-hsw-4770r     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:401s
fi-ilk-650       total:288  pass:228  dwarn:0   dfail:0   fail:0   skip:60  time:412s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:447s
fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:418s
fi-kbl-7500u     total:288  pass:263  dwarn:1   dfail:0   fail:0   skip:24  time:459s
fi-kbl-7560u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:501s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:454s
fi-kbl-r         total:288  pass:260  dwarn:1   dfail:0   fail:0   skip:27  time:504s
fi-pnv-d510      total:288  pass:222  dwarn:1   dfail:0   fail:0   skip:65  time:582s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:433s
fi-skl-6600u     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:512s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:529s
fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:484s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:479s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:432s
fi-snb-2520m     total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:526s
fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:396s
Blacklisted hosts:
fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:569s
fi-glk-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:472s

a0ca279440c8d7c40d798fed9939a2a25b31434b drm-tip: 2018y-01m-16d-10h-49m-51s UTC integration manifest
f540ad11c300 drm/i915/breadcrumbs: Reduce signaler rbtree to a sorted list
e1afd79ecea0 drm/i915: Only signal from interrupt when requested
1daa9b31beed drm/i915: Move the irq_counter inside the spinlock
dee29de7d40a drm/i915: Reduce spinlock hold time during notify_ring() interrupt
b5178c4c3205 drm/i915/breadcrumbs: Drop request reference for the signaler thread
d48bcec3dfe0 drm/i915: Trim the retired request queue after submitting
ae7bc3399783 drm/i915: Shrink the request kmem_cache on allocation error
8186256d0c07 drm/i915: Shrink the GEM kmem_caches upon idling
990d859f028d drm/i915: Move i915_gem_retire_work_handler
04d14c66a3cf drm/i915: Only attempt to scan the requested number of shrinker slabs

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7681/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs (rev4)
  2018-01-15 21:24 Prevent trivial oom from gem_exec_nop/sequential Chris Wilson
                   ` (13 preceding siblings ...)
  2018-01-16 13:42 ` ✓ Fi.CI.BAT: success for series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs (rev3) Patchwork
@ 2018-01-16 14:02 ` Patchwork
  2018-01-16 15:29 ` ✓ Fi.CI.IGT: " Patchwork
  15 siblings, 0 replies; 47+ messages in thread
From: Patchwork @ 2018-01-16 14:02 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs (rev4)
URL   : https://patchwork.freedesktop.org/series/36501/
State : success

== Summary ==

Series 36501v4 series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs
https://patchwork.freedesktop.org/api/1.0/series/36501/revisions/4/mbox/

Test debugfs_test:
        Subgroup read_all_entries:
                dmesg-warn -> FAIL       (fi-elk-e7500) fdo#103989
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-a:
                pass       -> DMESG-WARN (fi-kbl-r) fdo#104172 +1

fdo#103989 https://bugs.freedesktop.org/show_bug.cgi?id=103989
fdo#104172 https://bugs.freedesktop.org/show_bug.cgi?id=104172

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:420s
fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:423s
fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:371s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:485s
fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:281s
fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:486s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:488s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:467s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:455s
fi-elk-e7500     total:224  pass:168  dwarn:9   dfail:0   fail:1   skip:45 
fi-gdg-551       total:288  pass:179  dwarn:0   dfail:0   fail:1   skip:108 time:275s
fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:513s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:390s
fi-hsw-4770r     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:401s
fi-ilk-650       total:288  pass:228  dwarn:0   dfail:0   fail:0   skip:60  time:409s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:448s
fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:412s
fi-kbl-7500u     total:288  pass:263  dwarn:1   dfail:0   fail:0   skip:24  time:460s
fi-kbl-7560u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:501s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:457s
fi-kbl-r         total:288  pass:260  dwarn:1   dfail:0   fail:0   skip:27  time:509s
fi-pnv-d510      total:288  pass:222  dwarn:1   dfail:0   fail:0   skip:65  time:579s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:431s
fi-skl-6600u     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:511s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:537s
fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:488s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:490s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:434s
fi-snb-2520m     total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:526s
fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:401s
Blacklisted hosts:
fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:576s
fi-glk-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:477s

a0ca279440c8d7c40d798fed9939a2a25b31434b drm-tip: 2018y-01m-16d-10h-49m-51s UTC integration manifest
fbbb7268251d drm/i915/breadcrumbs: Reduce signaler rbtree to a sorted list
6cfe09be420a drm/i915: Only signal from interrupt when requested
fbb4ac927391 drm/i915: Move the irq_counter inside the spinlock
433a1fc6d9f8 drm/i915: Reduce spinlock hold time during notify_ring() interrupt
52e71d9d2543 drm/i915/breadcrumbs: Drop request reference for the signaler thread
13914d7e2c16 drm/i915: Trim the retired request queue after submitting
d8622789b9c1 drm/i915: Shrink the request kmem_cache on allocation error
9b84813ac938 drm/i915: Shrink the GEM kmem_caches upon idling
31aa3e433b10 drm/i915: Move i915_gem_retire_work_handler
4ca65ba284ca drm/i915: Only attempt to scan the requested number of shrinker slabs

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7682/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2] drm/i915: Shrink the GEM kmem_caches upon idling
  2018-01-16 13:05     ` [PATCH v2] " Chris Wilson
@ 2018-01-16 15:12       ` Tvrtko Ursulin
  2018-01-16 15:16         ` Tvrtko Ursulin
  2018-01-16 15:21         ` Chris Wilson
  0 siblings, 2 replies; 47+ messages in thread
From: Tvrtko Ursulin @ 2018-01-16 15:12 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 16/01/2018 13:05, Chris Wilson wrote:
> When we finally decide the gpu is idle, that is a good time to shrink
> our kmem_caches.
> 
> v2: Comment upon the random sprinkling of rcu_barrier() inside the idle
> worker.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c | 30 ++++++++++++++++++++++++++++++
>   1 file changed, 30 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 335731c93b4a..61b13fdfaa71 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4716,6 +4716,21 @@ i915_gem_retire_work_handler(struct work_struct *work)
>   	}
>   }
>   
> +static void shrink_caches(struct drm_i915_private *i915)
> +{
> +	/*
> +	 * kmem_cache_shrink() discards empty slabs and reorders partially
> +	 * filled slabs to prioritise allocating from the mostly full slabs,
> +	 * with the aim of reducing fragmentation.
> +	 */
> +	kmem_cache_shrink(i915->priorities);
> +	kmem_cache_shrink(i915->dependencies);
> +	kmem_cache_shrink(i915->requests);
> +	kmem_cache_shrink(i915->luts);
> +	kmem_cache_shrink(i915->vmas);
> +	kmem_cache_shrink(i915->objects);
> +}
> +
>   static inline bool
>   new_requests_since_last_retire(const struct drm_i915_private *i915)
>   {
> @@ -4803,6 +4818,21 @@ i915_gem_idle_work_handler(struct work_struct *work)
>   		GEM_BUG_ON(!dev_priv->gt.awake);
>   		i915_queue_hangcheck(dev_priv);
>   	}
> +
> +	/*
> +	 * We use magical TYPESAFE_BY_RCU kmem_caches whose pages are not
> +	 * returned to the system imediately but only after an RCU grace
> +	 * period. We want to encourage such pages to be returned and so
> +	 * incorporate a RCU barrier here to provide some rate limiting
> +	 * of the driver and flush the old pages before we free a new batch
> +	 * from the next round of shrinking.
> +	 */
> +	rcu_barrier();

Should this go into the conditional below? I don't think it makes a 
difference effectively, but may be more logical.

> +
> +	if (!new_requests_since_last_retire(dev_priv)) {
> +		__i915_gem_free_work(&dev_priv->mm.free_work);

I thought for a bit if re-using the worker from here is completely fine 
but I think it is. We expect only one pass when called from here so 
need_resched will be correctly neutralized/not-relevant from this path. 
Hm, unless if we consider mmap_gtt users.. so we could still have new 
objects appearing on the free_list after the 1st pass. And then 
need_resched might kick us out. What do you think?

Regards,

Tvrtko

> +		shrink_caches(dev_priv);
> +	}
>   }
>   
>   int i915_gem_suspend(struct drm_i915_private *dev_priv)
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2] drm/i915: Shrink the GEM kmem_caches upon idling
  2018-01-16 15:12       ` Tvrtko Ursulin
@ 2018-01-16 15:16         ` Tvrtko Ursulin
  2018-01-16 15:21         ` Chris Wilson
  1 sibling, 0 replies; 47+ messages in thread
From: Tvrtko Ursulin @ 2018-01-16 15:16 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 16/01/2018 15:12, Tvrtko Ursulin wrote:
> 
> On 16/01/2018 13:05, Chris Wilson wrote:
>> When we finally decide the gpu is idle, that is a good time to shrink
>> our kmem_caches.
>>
>> v2: Comment upon the random sprinkling of rcu_barrier() inside the idle
>> worker.
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_gem.c | 30 ++++++++++++++++++++++++++++++
>>   1 file changed, 30 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c 
>> b/drivers/gpu/drm/i915/i915_gem.c
>> index 335731c93b4a..61b13fdfaa71 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -4716,6 +4716,21 @@ i915_gem_retire_work_handler(struct work_struct 
>> *work)
>>       }
>>   }
>> +static void shrink_caches(struct drm_i915_private *i915)
>> +{
>> +    /*
>> +     * kmem_cache_shrink() discards empty slabs and reorders partially
>> +     * filled slabs to prioritise allocating from the mostly full slabs,
>> +     * with the aim of reducing fragmentation.
>> +     */
>> +    kmem_cache_shrink(i915->priorities);
>> +    kmem_cache_shrink(i915->dependencies);
>> +    kmem_cache_shrink(i915->requests);
>> +    kmem_cache_shrink(i915->luts);
>> +    kmem_cache_shrink(i915->vmas);
>> +    kmem_cache_shrink(i915->objects);
>> +}
>> +
>>   static inline bool
>>   new_requests_since_last_retire(const struct drm_i915_private *i915)
>>   {
>> @@ -4803,6 +4818,21 @@ i915_gem_idle_work_handler(struct work_struct 
>> *work)
>>           GEM_BUG_ON(!dev_priv->gt.awake);
>>           i915_queue_hangcheck(dev_priv);
>>       }
>> +
>> +    /*
>> +     * We use magical TYPESAFE_BY_RCU kmem_caches whose pages are not
>> +     * returned to the system imediately but only after an RCU grace
>> +     * period. We want to encourage such pages to be returned and so
>> +     * incorporate a RCU barrier here to provide some rate limiting
>> +     * of the driver and flush the old pages before we free a new batch
>> +     * from the next round of shrinking.
>> +     */
>> +    rcu_barrier();
> 
> Should this go into the conditional below? I don't think it makes a 
> difference effectively, but may be more logical.
> 
>> +
>> +    if (!new_requests_since_last_retire(dev_priv)) {
>> +        __i915_gem_free_work(&dev_priv->mm.free_work);
> 
> I thought for a bit if re-using the worker from here is completely fine 
> but I think it is. We expect only one pass when called from here so 
> need_resched will be correctly neutralized/not-relevant from this path. 
> Hm, unless if we consider mmap_gtt users.. so we could still have new 
> objects appearing on the free_list after the 1st pass. And then 
> need_resched might kick us out. What do you think?

This also ties back to what I wrote in the earlier reply - do we want to 
shrink the obj and vma caches from here? It may be colliding with 
mmap_gtt operations. But it sounds appealing to tidy them, and I can't 
think of any other convenient point. Given how we are de-prioritising 
mmap_gtt its probably fine.

> 
> Regards,
> 
> Tvrtko
> 
>> +        shrink_caches(dev_priv);
>> +    }
>>   }
>>   int i915_gem_suspend(struct drm_i915_private *dev_priv)
>>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2] drm/i915: Shrink the request kmem_cache on allocation error
  2018-01-16 13:15     ` [PATCH v2] " Chris Wilson
@ 2018-01-16 15:19       ` Tvrtko Ursulin
  0 siblings, 0 replies; 47+ messages in thread
From: Tvrtko Ursulin @ 2018-01-16 15:19 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 16/01/2018 13:15, Chris Wilson wrote:
> If we fail to allocate a new request, make sure we recover the pages
> that are in the process of being freed by inserting an RCU barrier.
> 
> v2: Comment before the shrink and barrier in the error path.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_request.c | 11 +++++++++++
>   1 file changed, 11 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 72bdc203716f..a0f451b4a4e8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -696,6 +696,17 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>   		if (ret)
>   			goto err_unreserve;
>   
> +		/*
> +		 * We've forced the client to stall and catch up with whatever
> +		 * backlog there might have been. As we are assuming that we
> +		 * caused the mempressure, now is an opportune time to
> +		 * recover as much memory from the request pool as is possible.
> +		 * Having already penalized the client to stall, we spend
> +		 * a little extra time to re-optimise page allocation.
> +		 */
> +		kmem_cache_shrink(dev_priv->requests);
> +		rcu_barrier(); /* Recover the TYPESAFE_BY_RCU pages */
> +
>   		req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
>   		if (!req) {
>   			ret = -ENOMEM;
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2] drm/i915: Shrink the GEM kmem_caches upon idling
  2018-01-16 15:12       ` Tvrtko Ursulin
  2018-01-16 15:16         ` Tvrtko Ursulin
@ 2018-01-16 15:21         ` Chris Wilson
  2018-01-16 17:25           ` Tvrtko Ursulin
  1 sibling, 1 reply; 47+ messages in thread
From: Chris Wilson @ 2018-01-16 15:21 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-01-16 15:12:43)
> 
> On 16/01/2018 13:05, Chris Wilson wrote:
> > When we finally decide the gpu is idle, that is a good time to shrink
> > our kmem_caches.
> > 
> > v2: Comment upon the random sprinkling of rcu_barrier() inside the idle
> > worker.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_gem.c | 30 ++++++++++++++++++++++++++++++
> >   1 file changed, 30 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 335731c93b4a..61b13fdfaa71 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -4716,6 +4716,21 @@ i915_gem_retire_work_handler(struct work_struct *work)
> >       }
> >   }
> >   
> > +static void shrink_caches(struct drm_i915_private *i915)
> > +{
> > +     /*
> > +      * kmem_cache_shrink() discards empty slabs and reorders partially
> > +      * filled slabs to prioritise allocating from the mostly full slabs,
> > +      * with the aim of reducing fragmentation.
> > +      */
> > +     kmem_cache_shrink(i915->priorities);
> > +     kmem_cache_shrink(i915->dependencies);
> > +     kmem_cache_shrink(i915->requests);
> > +     kmem_cache_shrink(i915->luts);
> > +     kmem_cache_shrink(i915->vmas);
> > +     kmem_cache_shrink(i915->objects);
> > +}
> > +
> >   static inline bool
> >   new_requests_since_last_retire(const struct drm_i915_private *i915)
> >   {
> > @@ -4803,6 +4818,21 @@ i915_gem_idle_work_handler(struct work_struct *work)
> >               GEM_BUG_ON(!dev_priv->gt.awake);
> >               i915_queue_hangcheck(dev_priv);
> >       }
> > +
> > +     /*
> > +      * We use magical TYPESAFE_BY_RCU kmem_caches whose pages are not
> > +      * returned to the system imediately but only after an RCU grace
> > +      * period. We want to encourage such pages to be returned and so
> > +      * incorporate a RCU barrier here to provide some rate limiting
> > +      * of the driver and flush the old pages before we free a new batch
> > +      * from the next round of shrinking.
> > +      */
> > +     rcu_barrier();
> 
> Should this go into the conditional below? I don't think it makes a 
> difference effectively, but may be more logical.

My thinking was to have the check after the sleep as the state is
subject to change. I'm not concerned about the random unnecessary pauses
on this wq, since it is subject to struct_mutex delays, so was quite
happy to think of this as being "we shall only do one idle pass per RCU
grace period".

> > +
> > +     if (!new_requests_since_last_retire(dev_priv)) {
> > +             __i915_gem_free_work(&dev_priv->mm.free_work);
> 
> I thought for a bit if re-using the worker from here is completely fine 
> but I think it is. We expect only one pass when called from here so 
> need_resched will be correctly neutralized/not-relevant from this path. 

At present, I was only thinking about the single path. This was meant to
resemble i915_gem_drain_objects(), without the recursion :)

> Hm, unless if we consider mmap_gtt users.. so we could still have new 
> objects appearing on the free_list after the 1st pass. And then 
> need_resched might kick us out. What do you think?

Not just mmap_gtt, any user freeing objects (coupled with RCU grace
periods). I don't think it matters if we happen to loop until the
timeslice is consumed as we are doing work that we would be doing
anyway on this i915->wq.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* ✓ Fi.CI.IGT: success for series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs (rev4)
  2018-01-15 21:24 Prevent trivial oom from gem_exec_nop/sequential Chris Wilson
                   ` (14 preceding siblings ...)
  2018-01-16 14:02 ` ✓ Fi.CI.BAT: success for series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs (rev4) Patchwork
@ 2018-01-16 15:29 ` Patchwork
  15 siblings, 0 replies; 47+ messages in thread
From: Patchwork @ 2018-01-16 15:29 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs (rev4)
URL   : https://patchwork.freedesktop.org/series/36501/
State : success

== Summary ==

Test kms_flip:
        Subgroup flip-vs-fences-interruptible:
                dmesg-warn -> PASS       (shard-hsw) fdo#102614
        Subgroup plain-flip-ts-check:
                pass       -> SKIP       (shard-hsw) fdo#100368
Test gem_tiled_swapping:
        Subgroup non-threaded:
                incomplete -> PASS       (shard-snb) fdo#104218
Test kms_frontbuffer_tracking:
        Subgroup fbc-1p-offscren-pri-shrfb-draw-render:
                fail       -> PASS       (shard-snb) fdo#101623
Test perf:
        Subgroup polling:
                fail       -> PASS       (shard-hsw) fdo#102252 +1

fdo#102614 https://bugs.freedesktop.org/show_bug.cgi?id=102614
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#104218 https://bugs.freedesktop.org/show_bug.cgi?id=104218
fdo#101623 https://bugs.freedesktop.org/show_bug.cgi?id=101623
fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252

shard-hsw        total:2729 pass:1547 dwarn:1   dfail:0   fail:11  skip:1170 time:9011s
shard-snb        total:2729 pass:1319 dwarn:1   dfail:0   fail:10  skip:1399 time:7938s
Blacklisted hosts:
shard-apl        total:2729 pass:1698 dwarn:1   dfail:0   fail:23  skip:1006 time:13573s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7682/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2] drm/i915: Shrink the GEM kmem_caches upon idling
  2018-01-16 15:21         ` Chris Wilson
@ 2018-01-16 17:25           ` Tvrtko Ursulin
  2018-01-16 17:36             ` Chris Wilson
  0 siblings, 1 reply; 47+ messages in thread
From: Tvrtko Ursulin @ 2018-01-16 17:25 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 16/01/2018 15:21, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-01-16 15:12:43)
>>
>> On 16/01/2018 13:05, Chris Wilson wrote:
>>> When we finally decide the gpu is idle, that is a good time to shrink
>>> our kmem_caches.
>>>
>>> v2: Comment upon the random sprinkling of rcu_barrier() inside the idle
>>> worker.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_gem.c | 30 ++++++++++++++++++++++++++++++
>>>    1 file changed, 30 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>>> index 335731c93b4a..61b13fdfaa71 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>> @@ -4716,6 +4716,21 @@ i915_gem_retire_work_handler(struct work_struct *work)
>>>        }
>>>    }
>>>    
>>> +static void shrink_caches(struct drm_i915_private *i915)
>>> +{
>>> +     /*
>>> +      * kmem_cache_shrink() discards empty slabs and reorders partially
>>> +      * filled slabs to prioritise allocating from the mostly full slabs,
>>> +      * with the aim of reducing fragmentation.
>>> +      */
>>> +     kmem_cache_shrink(i915->priorities);
>>> +     kmem_cache_shrink(i915->dependencies);
>>> +     kmem_cache_shrink(i915->requests);
>>> +     kmem_cache_shrink(i915->luts);
>>> +     kmem_cache_shrink(i915->vmas);
>>> +     kmem_cache_shrink(i915->objects);
>>> +}
>>> +
>>>    static inline bool
>>>    new_requests_since_last_retire(const struct drm_i915_private *i915)
>>>    {
>>> @@ -4803,6 +4818,21 @@ i915_gem_idle_work_handler(struct work_struct *work)
>>>                GEM_BUG_ON(!dev_priv->gt.awake);
>>>                i915_queue_hangcheck(dev_priv);
>>>        }
>>> +
>>> +     /*
>>> +      * We use magical TYPESAFE_BY_RCU kmem_caches whose pages are not
>>> +      * returned to the system imediately but only after an RCU grace
>>> +      * period. We want to encourage such pages to be returned and so
>>> +      * incorporate a RCU barrier here to provide some rate limiting
>>> +      * of the driver and flush the old pages before we free a new batch
>>> +      * from the next round of shrinking.
>>> +      */
>>> +     rcu_barrier();
>>
>> Should this go into the conditional below? I don't think it makes a
>> difference effectively, but may be more logical.
> 
> My thinking was to have the check after the sleep as the state is
> subject to change. I'm not concerned about the random unnecessary pauses
> on this wq, since it is subject to struct_mutex delays, so was quite

The delay doesn't worry me, but just that it is random - neither the 
appearance of new requests, or completion of existing ones, has nothing 
to do with one RCU grace period.

> happy to think of this as being "we shall only do one idle pass per RCU
> grace period".

Idle worker is probably several orders of magnitude less frequent than 
RCU grace periods so I don't think that can be a concern.

Hm..

>>> +
>>> +     if (!new_requests_since_last_retire(dev_priv)) {
>>> +             __i915_gem_free_work(&dev_priv->mm.free_work);

... you wouldn't want to pull this up under the struct mutex section? It 
would need a different flavour of a function to be called, and some 
refactoring of the existing ones.

shrink_caches could be left here under the same check and preceded by 
rcu_barrier.

>> I thought for a bit if re-using the worker from here is completely fine
>> but I think it is. We expect only one pass when called from here so
>> need_resched will be correctly neutralized/not-relevant from this path.
> 
> At present, I was only thinking about the single path. This was meant to
> resemble i915_gem_drain_objects(), without the recursion :)
> 
>> Hm, unless if we consider mmap_gtt users.. so we could still have new
>> objects appearing on the free_list after the 1st pass. And then
>> need_resched might kick us out. What do you think?
> 
> Not just mmap_gtt, any user freeing objects (coupled with RCU grace
> periods). I don't think it matters if we happen to loop until the
> timeslice is consumed as we are doing work that we would be doing
> anyway on this i915->wq.

Yeah doesn't matter - I was thinking if we should explicitly not 
consider need_resched when called from the idle worker and only grab the 
first batch - what's currently on the freed list.

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2] drm/i915: Shrink the GEM kmem_caches upon idling
  2018-01-16 17:25           ` Tvrtko Ursulin
@ 2018-01-16 17:36             ` Chris Wilson
  2018-01-17 10:18               ` Tvrtko Ursulin
  0 siblings, 1 reply; 47+ messages in thread
From: Chris Wilson @ 2018-01-16 17:36 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-01-16 17:25:25)
> 
> On 16/01/2018 15:21, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-01-16 15:12:43)
> >>
> >> On 16/01/2018 13:05, Chris Wilson wrote:
> >>> When we finally decide the gpu is idle, that is a good time to shrink
> >>> our kmem_caches.
> >>>
> >>> v2: Comment upon the random sprinkling of rcu_barrier() inside the idle
> >>> worker.
> >>>
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> >>> ---
> >>>    drivers/gpu/drm/i915/i915_gem.c | 30 ++++++++++++++++++++++++++++++
> >>>    1 file changed, 30 insertions(+)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >>> index 335731c93b4a..61b13fdfaa71 100644
> >>> --- a/drivers/gpu/drm/i915/i915_gem.c
> >>> +++ b/drivers/gpu/drm/i915/i915_gem.c
> >>> @@ -4716,6 +4716,21 @@ i915_gem_retire_work_handler(struct work_struct *work)
> >>>        }
> >>>    }
> >>>    
> >>> +static void shrink_caches(struct drm_i915_private *i915)
> >>> +{
> >>> +     /*
> >>> +      * kmem_cache_shrink() discards empty slabs and reorders partially
> >>> +      * filled slabs to prioritise allocating from the mostly full slabs,
> >>> +      * with the aim of reducing fragmentation.
> >>> +      */
> >>> +     kmem_cache_shrink(i915->priorities);
> >>> +     kmem_cache_shrink(i915->dependencies);
> >>> +     kmem_cache_shrink(i915->requests);
> >>> +     kmem_cache_shrink(i915->luts);
> >>> +     kmem_cache_shrink(i915->vmas);
> >>> +     kmem_cache_shrink(i915->objects);
> >>> +}
> >>> +
> >>>    static inline bool
> >>>    new_requests_since_last_retire(const struct drm_i915_private *i915)
> >>>    {
> >>> @@ -4803,6 +4818,21 @@ i915_gem_idle_work_handler(struct work_struct *work)
> >>>                GEM_BUG_ON(!dev_priv->gt.awake);
> >>>                i915_queue_hangcheck(dev_priv);
> >>>        }
> >>> +
> >>> +     /*
> >>> +      * We use magical TYPESAFE_BY_RCU kmem_caches whose pages are not
> >>> +      * returned to the system imediately but only after an RCU grace
> >>> +      * period. We want to encourage such pages to be returned and so
> >>> +      * incorporate a RCU barrier here to provide some rate limiting
> >>> +      * of the driver and flush the old pages before we free a new batch
> >>> +      * from the next round of shrinking.
> >>> +      */
> >>> +     rcu_barrier();
> >>
> >> Should this go into the conditional below? I don't think it makes a
> >> difference effectively, but may be more logical.
> > 
> > My thinking was to have the check after the sleep as the state is
> > subject to change. I'm not concerned about the random unnecessary pauses
> > on this wq, since it is subject to struct_mutex delays, so was quite
> 
> The delay doesn't worry me, but just that it is random - neither the 
> appearance of new requests, or completion of existing ones, has nothing 
> to do with one RCU grace period.
> 
> > happy to think of this as being "we shall only do one idle pass per RCU
> > grace period".
> 
> Idle worker is probably several orders of magnitude less frequent than 
> RCU grace periods so I don't think that can be a concern.
> 
> Hm..
> 
> >>> +
> >>> +     if (!new_requests_since_last_retire(dev_priv)) {
> >>> +             __i915_gem_free_work(&dev_priv->mm.free_work);
> 
> ... you wouldn't want to pull this up under the struct mutex section? It 
> would need a different flavour of a function to be called, and some 
> refactoring of the existing ones.

"Some". I don't think that improves anything?

The statement of intent to me is that we only throw away the caches and
excess memory if and only if we are idle. The presumption is that under
active conditions those caches are important, but if we are about to
sleep for long periods of time, we should be proactive in releasing
resources.

I can hear you about to ask if we could add a timer and wake up in 10s to
prove we were idle!
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2] drm/i915: Shrink the GEM kmem_caches upon idling
  2018-01-16 17:36             ` Chris Wilson
@ 2018-01-17 10:18               ` Tvrtko Ursulin
  2018-01-18 18:06                 ` Chris Wilson
  0 siblings, 1 reply; 47+ messages in thread
From: Tvrtko Ursulin @ 2018-01-17 10:18 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 16/01/2018 17:36, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-01-16 17:25:25)
>>
>> On 16/01/2018 15:21, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2018-01-16 15:12:43)
>>>>
>>>> On 16/01/2018 13:05, Chris Wilson wrote:
>>>>> When we finally decide the gpu is idle, that is a good time to shrink
>>>>> our kmem_caches.
>>>>>
>>>>> v2: Comment upon the random sprinkling of rcu_barrier() inside the idle
>>>>> worker.
>>>>>
>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>>>>> ---
>>>>>     drivers/gpu/drm/i915/i915_gem.c | 30 ++++++++++++++++++++++++++++++
>>>>>     1 file changed, 30 insertions(+)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>>>>> index 335731c93b4a..61b13fdfaa71 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>>>> @@ -4716,6 +4716,21 @@ i915_gem_retire_work_handler(struct work_struct *work)
>>>>>         }
>>>>>     }
>>>>>     
>>>>> +static void shrink_caches(struct drm_i915_private *i915)
>>>>> +{
>>>>> +     /*
>>>>> +      * kmem_cache_shrink() discards empty slabs and reorders partially
>>>>> +      * filled slabs to prioritise allocating from the mostly full slabs,
>>>>> +      * with the aim of reducing fragmentation.
>>>>> +      */
>>>>> +     kmem_cache_shrink(i915->priorities);
>>>>> +     kmem_cache_shrink(i915->dependencies);
>>>>> +     kmem_cache_shrink(i915->requests);
>>>>> +     kmem_cache_shrink(i915->luts);
>>>>> +     kmem_cache_shrink(i915->vmas);
>>>>> +     kmem_cache_shrink(i915->objects);
>>>>> +}
>>>>> +
>>>>>     static inline bool
>>>>>     new_requests_since_last_retire(const struct drm_i915_private *i915)
>>>>>     {
>>>>> @@ -4803,6 +4818,21 @@ i915_gem_idle_work_handler(struct work_struct *work)
>>>>>                 GEM_BUG_ON(!dev_priv->gt.awake);
>>>>>                 i915_queue_hangcheck(dev_priv);
>>>>>         }
>>>>> +
>>>>> +     /*
>>>>> +      * We use magical TYPESAFE_BY_RCU kmem_caches whose pages are not
>>>>> +      * returned to the system imediately but only after an RCU grace
>>>>> +      * period. We want to encourage such pages to be returned and so
>>>>> +      * incorporate a RCU barrier here to provide some rate limiting
>>>>> +      * of the driver and flush the old pages before we free a new batch
>>>>> +      * from the next round of shrinking.
>>>>> +      */
>>>>> +     rcu_barrier();
>>>>
>>>> Should this go into the conditional below? I don't think it makes a
>>>> difference effectively, but may be more logical.
>>>
>>> My thinking was to have the check after the sleep as the state is
>>> subject to change. I'm not concerned about the random unnecessary pauses
>>> on this wq, since it is subject to struct_mutex delays, so was quite
>>
>> The delay doesn't worry me, but just that it is random - neither the
>> appearance of new requests, or completion of existing ones, has nothing
>> to do with one RCU grace period.
>>
>>> happy to think of this as being "we shall only do one idle pass per RCU
>>> grace period".
>>
>> Idle worker is probably several orders of magnitude less frequent than
>> RCU grace periods so I don't think that can be a concern.
>>
>> Hm..
>>
>>>>> +
>>>>> +     if (!new_requests_since_last_retire(dev_priv)) {
>>>>> +             __i915_gem_free_work(&dev_priv->mm.free_work);
>>
>> ... you wouldn't want to pull this up under the struct mutex section? It
>> would need a different flavour of a function to be called, and some
>> refactoring of the existing ones.
> 
> "Some". I don't think that improves anything?
> 
> The statement of intent to me is that we only throw away the caches and
> excess memory if and only if we are idle. The presumption is that under
> active conditions those caches are important, but if we are about to
> sleep for long periods of time, we should be proactive in releasing
> resources.
> 
> I can hear you about to ask if we could add a timer and wake up in 10s to
> prove we were idle!

No, pointless since this proposal already runs outside this guarantee, 
and anyway, this was or the other there is potential to disrupt the next 
client.

How about sticking in a break on new_request_since_last_retire() into 
__i915_gem_free_work()? Would that defeat the backlog cleaning? Maybe 
conditional only when called from the idle handler?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 05/10] drm/i915: Trim the retired request queue after submitting
  2018-01-16 10:32     ` Chris Wilson
@ 2018-01-17 10:23       ` Tvrtko Ursulin
  0 siblings, 0 replies; 47+ messages in thread
From: Tvrtko Ursulin @ 2018-01-17 10:23 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 16/01/2018 10:32, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-01-16 10:18:55)
>>
>> On 15/01/2018 21:24, Chris Wilson wrote:
>>> If we submit a request and see that the previous request on this
>>> timeline was already signaled, we first do not need to add the
>>> dependency tracker for that completed request and secondly we know that
>>> we there is then a large backlog in retiring requests affecting this
>>> timeline. Given that we just submitted more work to the HW, now would be
>>> a good time to catch up on those retirements.
>>
>> How can we be sure there is a large backlog? It may just be that the
>> submission frequency combined with request duration is just right to
>> always see even a solitary previous completed request, no?
> 
> We always try and retire one old request per new request. To get to the
> point where we see an unretired completed fence here implies that we are
> allocating faster than retiring, and so have a backlog.
>>
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_gem_request.c | 5 ++++-
>>>    1 file changed, 4 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
>>> index e6d4857b1f78..6a143099cea1 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_request.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
>>> @@ -1019,7 +1019,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
>>>    
>>>        prev = i915_gem_active_raw(&timeline->last_request,
>>>                                   &request->i915->drm.struct_mutex);
>>> -     if (prev) {
>>> +     if (prev && !i915_gem_request_completed(prev)) {
>>>                i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
>>>                                             &request->submitq);
>>
>> This makes sense.
>>
>>>                if (engine->schedule)
>>> @@ -1055,6 +1055,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
>>>        local_bh_disable();
>>>        i915_sw_fence_commit(&request->submit);
>>>        local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
>>> +
>>> +     if (prev && i915_gem_request_completed(prev))
>>> +             i915_gem_request_retire_upto(prev);
>>
>> And here I'm a bit surprised that you want to penalize the submission
>> path with house-keeping - assuming cases when there really is a big
>> backlog of completed requests. But since it is after the tasklet
>> kicking, I suppose the effect on submission latency is somewhat
>> mediated. Unless the caller wants to submit many requests rapidly. Hm..
>> retire at execbuf time seems to be coming in and out, albeit in a more
>> controlled fashion with this.
> 
> I was surprised myself ;) What I considered the next step here is to
> limit the retirements to the client's timeline to avoid having to do
> work for others. It's that this comes after the submission of the next
> request so we have a few microseconds at least to play with that makes
> it seem less obnoxious to me. Plus that it's so unlikely to happen, that
> to me suggests that we have fallen fall behind in our alloc/retire
> equilibrium that a catch up is justified. And most of the heavy work has
> been move from request retirement onto kthreads (object release, context
> release etc).

Okay, you convinced me. Well actually I am still uncertain that some 
diabolical submission pattern could keep triggering this path, but at 
least it shouldn't be on every submission.

Would you be happy to split up the two parts of this patch?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs
  2018-01-15 21:24 ` [PATCH 01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs Chris Wilson
@ 2018-01-17 10:29   ` Tvrtko Ursulin
  2018-01-18  9:16     ` Chris Wilson
  2018-01-18  9:19     ` Chris Wilson
  0 siblings, 2 replies; 47+ messages in thread
From: Tvrtko Ursulin @ 2018-01-17 10:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 15/01/2018 21:24, Chris Wilson wrote:
> Since commit 4e773c3a8a69 ("drm/i915: Wire up shrinkctl->nr_scanned"),

Sounds like it deserves this in the Fixes: tag as well?

> we track the number of objects we scan and do not wish to exceed that as
> it will overly penalise our own slabs under mempressure. Given that we
> now know the target number of objects to scan, use that as our guide for
> deciding to shrink as opposed to the number of objects we manage to
> shrink (which doesn't correspond to the numbers we report to shrinkctl).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_shrinker.c | 4 ++--
>   1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 9029ed04879c..0e158f9287c4 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -363,13 +363,13 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
>   				I915_SHRINK_BOUND |
>   				I915_SHRINK_UNBOUND |
>   				I915_SHRINK_PURGEABLE);
> -	if (freed < sc->nr_to_scan)
> +	if (sc->nr_scanned < sc->nr_to_scan)
>   		freed += i915_gem_shrink(i915,
>   					 sc->nr_to_scan - sc->nr_scanned,
>   					 &sc->nr_scanned,
>   					 I915_SHRINK_BOUND |
>   					 I915_SHRINK_UNBOUND);
> -	if (freed < sc->nr_to_scan && current_is_kswapd()) {
> +	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
>   		intel_runtime_pm_get(i915);
>   		freed += i915_gem_shrink(i915,
>   					 sc->nr_to_scan - sc->nr_scanned,
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 02/10] drm/i915: Move i915_gem_retire_work_handler
  2018-01-15 21:24 ` [PATCH 02/10] drm/i915: Move i915_gem_retire_work_handler Chris Wilson
@ 2018-01-17 10:33   ` Tvrtko Ursulin
  0 siblings, 0 replies; 47+ messages in thread
From: Tvrtko Ursulin @ 2018-01-17 10:33 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 15/01/2018 21:24, Chris Wilson wrote:
> In preparation for the next patch, move i915_gem_retire_work_handler()
> later to avoid a forward declaration.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem.c | 228 ++++++++++++++++++++--------------------
>   1 file changed, 114 insertions(+), 114 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 87937c4f9dff..a8840a514377 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3310,120 +3310,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
>   	return true;
>   }
>   
> -static void
> -i915_gem_retire_work_handler(struct work_struct *work)
> -{
> -	struct drm_i915_private *dev_priv =
> -		container_of(work, typeof(*dev_priv), gt.retire_work.work);
> -	struct drm_device *dev = &dev_priv->drm;
> -
> -	/* Come back later if the device is busy... */
> -	if (mutex_trylock(&dev->struct_mutex)) {
> -		i915_gem_retire_requests(dev_priv);
> -		mutex_unlock(&dev->struct_mutex);
> -	}
> -
> -	/* Keep the retire handler running until we are finally idle.
> -	 * We do not need to do this test under locking as in the worst-case
> -	 * we queue the retire worker once too often.
> -	 */
> -	if (READ_ONCE(dev_priv->gt.awake)) {
> -		i915_queue_hangcheck(dev_priv);
> -		queue_delayed_work(dev_priv->wq,
> -				   &dev_priv->gt.retire_work,
> -				   round_jiffies_up_relative(HZ));
> -	}
> -}
> -
> -static inline bool
> -new_requests_since_last_retire(const struct drm_i915_private *i915)
> -{
> -	return (READ_ONCE(i915->gt.active_requests) ||
> -		work_pending(&i915->gt.idle_work.work));
> -}
> -
> -static void
> -i915_gem_idle_work_handler(struct work_struct *work)
> -{
> -	struct drm_i915_private *dev_priv =
> -		container_of(work, typeof(*dev_priv), gt.idle_work.work);
> -	bool rearm_hangcheck;
> -	ktime_t end;
> -
> -	if (!READ_ONCE(dev_priv->gt.awake))
> -		return;
> -
> -	/*
> -	 * Wait for last execlists context complete, but bail out in case a
> -	 * new request is submitted.
> -	 */
> -	end = ktime_add_ms(ktime_get(), I915_IDLE_ENGINES_TIMEOUT);
> -	do {
> -		if (new_requests_since_last_retire(dev_priv))
> -			return;
> -
> -		if (intel_engines_are_idle(dev_priv))
> -			break;
> -
> -		usleep_range(100, 500);
> -	} while (ktime_before(ktime_get(), end));
> -
> -	rearm_hangcheck =
> -		cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
> -
> -	if (!mutex_trylock(&dev_priv->drm.struct_mutex)) {
> -		/* Currently busy, come back later */
> -		mod_delayed_work(dev_priv->wq,
> -				 &dev_priv->gt.idle_work,
> -				 msecs_to_jiffies(50));
> -		goto out_rearm;
> -	}
> -
> -	/*
> -	 * New request retired after this work handler started, extend active
> -	 * period until next instance of the work.
> -	 */
> -	if (new_requests_since_last_retire(dev_priv))
> -		goto out_unlock;
> -
> -	/*
> -	 * Be paranoid and flush a concurrent interrupt to make sure
> -	 * we don't reactivate any irq tasklets after parking.
> -	 *
> -	 * FIXME: Note that even though we have waited for execlists to be idle,
> -	 * there may still be an in-flight interrupt even though the CSB
> -	 * is now empty. synchronize_irq() makes sure that a residual interrupt
> -	 * is completed before we continue, but it doesn't prevent the HW from
> -	 * raising a spurious interrupt later. To complete the shield we should
> -	 * coordinate disabling the CS irq with flushing the interrupts.
> -	 */
> -	synchronize_irq(dev_priv->drm.irq);
> -
> -	intel_engines_park(dev_priv);
> -	i915_gem_timelines_park(dev_priv);
> -
> -	i915_pmu_gt_parked(dev_priv);
> -
> -	GEM_BUG_ON(!dev_priv->gt.awake);
> -	dev_priv->gt.awake = false;
> -	rearm_hangcheck = false;
> -
> -	if (INTEL_GEN(dev_priv) >= 6)
> -		gen6_rps_idle(dev_priv);
> -
> -	intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
> -
> -	intel_runtime_pm_put(dev_priv);
> -out_unlock:
> -	mutex_unlock(&dev_priv->drm.struct_mutex);
> -
> -out_rearm:
> -	if (rearm_hangcheck) {
> -		GEM_BUG_ON(!dev_priv->gt.awake);
> -		i915_queue_hangcheck(dev_priv);
> -	}
> -}
> -
>   void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
>   {
>   	struct drm_i915_private *i915 = to_i915(gem->dev);
> @@ -4798,6 +4684,120 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
>   	}
>   }
>   
> +static void
> +i915_gem_retire_work_handler(struct work_struct *work)
> +{
> +	struct drm_i915_private *dev_priv =
> +		container_of(work, typeof(*dev_priv), gt.retire_work.work);
> +	struct drm_device *dev = &dev_priv->drm;
> +
> +	/* Come back later if the device is busy... */
> +	if (mutex_trylock(&dev->struct_mutex)) {
> +		i915_gem_retire_requests(dev_priv);
> +		mutex_unlock(&dev->struct_mutex);
> +	}
> +
> +	/* Keep the retire handler running until we are finally idle.
> +	 * We do not need to do this test under locking as in the worst-case
> +	 * we queue the retire worker once too often.
> +	 */
> +	if (READ_ONCE(dev_priv->gt.awake)) {
> +		i915_queue_hangcheck(dev_priv);
> +		queue_delayed_work(dev_priv->wq,
> +				   &dev_priv->gt.retire_work,
> +				   round_jiffies_up_relative(HZ));
> +	}
> +}
> +
> +static inline bool
> +new_requests_since_last_retire(const struct drm_i915_private *i915)
> +{
> +	return (READ_ONCE(i915->gt.active_requests) ||
> +		work_pending(&i915->gt.idle_work.work));
> +}
> +
> +static void
> +i915_gem_idle_work_handler(struct work_struct *work)
> +{
> +	struct drm_i915_private *dev_priv =
> +		container_of(work, typeof(*dev_priv), gt.idle_work.work);
> +	bool rearm_hangcheck;
> +	ktime_t end;
> +
> +	if (!READ_ONCE(dev_priv->gt.awake))
> +		return;
> +
> +	/*
> +	 * Wait for last execlists context complete, but bail out in case a
> +	 * new request is submitted.
> +	 */
> +	end = ktime_add_ms(ktime_get(), I915_IDLE_ENGINES_TIMEOUT);
> +	do {
> +		if (new_requests_since_last_retire(dev_priv))
> +			return;
> +
> +		if (intel_engines_are_idle(dev_priv))
> +			break;
> +
> +		usleep_range(100, 500);
> +	} while (ktime_before(ktime_get(), end));
> +
> +	rearm_hangcheck =
> +		cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
> +
> +	if (!mutex_trylock(&dev_priv->drm.struct_mutex)) {
> +		/* Currently busy, come back later */
> +		mod_delayed_work(dev_priv->wq,
> +				 &dev_priv->gt.idle_work,
> +				 msecs_to_jiffies(50));
> +		goto out_rearm;
> +	}
> +
> +	/*
> +	 * New request retired after this work handler started, extend active
> +	 * period until next instance of the work.
> +	 */
> +	if (new_requests_since_last_retire(dev_priv))
> +		goto out_unlock;
> +
> +	/*
> +	 * Be paranoid and flush a concurrent interrupt to make sure
> +	 * we don't reactivate any irq tasklets after parking.
> +	 *
> +	 * FIXME: Note that even though we have waited for execlists to be idle,
> +	 * there may still be an in-flight interrupt even though the CSB
> +	 * is now empty. synchronize_irq() makes sure that a residual interrupt
> +	 * is completed before we continue, but it doesn't prevent the HW from
> +	 * raising a spurious interrupt later. To complete the shield we should
> +	 * coordinate disabling the CS irq with flushing the interrupts.
> +	 */
> +	synchronize_irq(dev_priv->drm.irq);
> +
> +	intel_engines_park(dev_priv);
> +	i915_gem_timelines_park(dev_priv);
> +
> +	i915_pmu_gt_parked(dev_priv);
> +
> +	GEM_BUG_ON(!dev_priv->gt.awake);
> +	dev_priv->gt.awake = false;
> +	rearm_hangcheck = false;
> +
> +	if (INTEL_GEN(dev_priv) >= 6)
> +		gen6_rps_idle(dev_priv);
> +
> +	intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
> +
> +	intel_runtime_pm_put(dev_priv);
> +out_unlock:
> +	mutex_unlock(&dev_priv->drm.struct_mutex);
> +
> +out_rearm:
> +	if (rearm_hangcheck) {
> +		GEM_BUG_ON(!dev_priv->gt.awake);
> +		i915_queue_hangcheck(dev_priv);
> +	}
> +}
> +
>   int i915_gem_suspend(struct drm_i915_private *dev_priv)
>   {
>   	struct drm_device *dev = &dev_priv->drm;
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 07/10] drm/i915: Reduce spinlock hold time during notify_ring() interrupt
  2018-01-15 21:24 ` [PATCH 07/10] drm/i915: Reduce spinlock hold time during notify_ring() interrupt Chris Wilson
@ 2018-01-17 10:45   ` Tvrtko Ursulin
  2018-01-18 18:08     ` Chris Wilson
  2018-01-18 18:10     ` Chris Wilson
  0 siblings, 2 replies; 47+ messages in thread
From: Tvrtko Ursulin @ 2018-01-17 10:45 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 15/01/2018 21:24, Chris Wilson wrote:
> By taking advantage of the RCU protection of the task struct, we can find
> the appropriate signaler under the spinlock and then release the spinlock
> before waking the task and signaling the fence.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_irq.c | 29 +++++++++++++++++++----------
>   1 file changed, 19 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 3517c6548e2c..0b272501b738 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1065,21 +1065,24 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
>   
>   static void notify_ring(struct intel_engine_cs *engine)
>   {
> +	const u32 seqno = intel_engine_get_seqno(engine);
>   	struct drm_i915_gem_request *rq = NULL;
> +	struct task_struct *tsk = NULL;
>   	struct intel_wait *wait;
>   
> -	if (!engine->breadcrumbs.irq_armed)
> +	if (unlikely(!engine->breadcrumbs.irq_armed))
>   		return;

It isn't unlikely in GuC mode, just sayin'...

>   
>   	atomic_inc(&engine->irq_count);
>   	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
>   
> +	rcu_read_lock();
> +
>   	spin_lock(&engine->breadcrumbs.irq_lock);
>   	wait = engine->breadcrumbs.irq_wait;
>   	if (wait) {
> -		bool wakeup = engine->irq_seqno_barrier;
> -
> -		/* We use a callback from the dma-fence to submit
> +		/*
> +		 * We use a callback from the dma-fence to submit
>   		 * requests after waiting on our own requests. To
>   		 * ensure minimum delay in queuing the next request to
>   		 * hardware, signal the fence now rather than wait for
> @@ -1090,19 +1093,20 @@ static void notify_ring(struct intel_engine_cs *engine)
>   		 * and to handle coalescing of multiple seqno updates
>   		 * and many waiters.
>   		 */
> -		if (i915_seqno_passed(intel_engine_get_seqno(engine),
> -				      wait->seqno)) {
> +		if (i915_seqno_passed(seqno, wait->seqno)) {
>   			struct drm_i915_gem_request *waiter = wait->request;
>   
> -			wakeup = true;
>   			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
>   				      &waiter->fence.flags) &&
>   			    intel_wait_check_request(wait, waiter))
>   				rq = i915_gem_request_get(waiter);
> -		}
>   
> -		if (wakeup)
> -			wake_up_process(wait->tsk);
> +			tsk = wait->tsk;
> +		} else {
> +			if (engine->irq_seqno_barrier &&
> +			    i915_seqno_passed(seqno, wait->seqno - 1))

Hm what is this about? Why -1 on platforms with coherency issues and not 
some other number? Needs a comment as minimum but still is a behaviour 
change which I did not immediately figure out how it goes with the 
commit message. If it is some additional optimization it needs to be 
split out into a separate patch.

> +				tsk = wait->tsk;
> +		}
>   	} else {
>   		if (engine->breadcrumbs.irq_armed)
>   			__intel_engine_disarm_breadcrumbs(engine);
> @@ -1114,6 +1118,11 @@ static void notify_ring(struct intel_engine_cs *engine)
>   		i915_gem_request_put(rq);
>   	}
>   
> +	if (tsk && tsk->state != TASK_RUNNING)
> +		wake_up_process(tsk);
> +
> +	rcu_read_unlock();
> +
>   	trace_intel_engine_notify(engine, wait);
>   }
>   
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 08/10] drm/i915: Move the irq_counter inside the spinlock
  2018-01-15 21:24 ` [PATCH 08/10] drm/i915: Move the irq_counter inside the spinlock Chris Wilson
@ 2018-01-17 12:12   ` Tvrtko Ursulin
  0 siblings, 0 replies; 47+ messages in thread
From: Tvrtko Ursulin @ 2018-01-17 12:12 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 15/01/2018 21:24, Chris Wilson wrote:
> Rather than have multiple locked instructions inside the notify_ring()
> irq handler, move them inside the spinlock and reduce their intrinsic
> locking.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_request.c  |  4 ++--
>   drivers/gpu/drm/i915/i915_irq.c          |  6 +++---
>   drivers/gpu/drm/i915/intel_breadcrumbs.c | 13 ++++++++-----
>   drivers/gpu/drm/i915/intel_ringbuffer.h  |  2 +-
>   4 files changed, 14 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 836db90ef81b..08bbd56277e5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -1128,7 +1128,7 @@ static bool __i915_spin_request(const struct drm_i915_gem_request *req,
>   	 * takes to sleep on a request, on the order of a microsecond.
>   	 */
>   
> -	irq = atomic_read(&engine->irq_count);
> +	irq = READ_ONCE(engine->breadcrumbs.irq_count);
>   	timeout_us += local_clock_us(&cpu);
>   	do {
>   		if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno))
> @@ -1139,7 +1139,7 @@ static bool __i915_spin_request(const struct drm_i915_gem_request *req,
>   		 * assume we won't see one in the near future but require
>   		 * the engine->seqno_barrier() to fixup coherency.
>   		 */
> -		if (atomic_read(&engine->irq_count) != irq)
> +		if (READ_ONCE(engine->breadcrumbs.irq_count) != irq)
>   			break;
>   
>   		if (signal_pending_state(state, current))
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 0b272501b738..e5f76d580010 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1073,9 +1073,6 @@ static void notify_ring(struct intel_engine_cs *engine)
>   	if (unlikely(!engine->breadcrumbs.irq_armed))
>   		return;
>   
> -	atomic_inc(&engine->irq_count);
> -	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
> -
>   	rcu_read_lock();
>   
>   	spin_lock(&engine->breadcrumbs.irq_lock);
> @@ -1107,6 +1104,9 @@ static void notify_ring(struct intel_engine_cs *engine)
>   			    i915_seqno_passed(seqno, wait->seqno - 1))
>   				tsk = wait->tsk;
>   		}
> +
> +		engine->breadcrumbs.irq_count++;
> +		__set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);

I'm nervous about moving the ENGINE_IRQ_BREADCRUMB setting to be 
conditional. __i915_request_irq_complete documents the ordering of these 
things is crucial so I worry we don't miss a wakeup. Once bitten twice 
shy? Don't know..

irq_count change looks safe, so can I, once again, suggest to split into 
two patches? :/

Regards,

Tvrtko

>   	} else {
>   		if (engine->breadcrumbs.irq_armed)
>   			__intel_engine_disarm_breadcrumbs(engine);
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index e3667dc1e96d..7c82cfe23922 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -98,12 +98,14 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t)
>   	struct intel_engine_cs *engine =
>   		from_timer(engine, t, breadcrumbs.hangcheck);
>   	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +	unsigned int irq_count;
>   
>   	if (!b->irq_armed)
>   		return;
>   
> -	if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) {
> -		b->hangcheck_interrupts = atomic_read(&engine->irq_count);
> +	irq_count = READ_ONCE(b->irq_count);
> +	if (b->hangcheck_interrupts != irq_count) {
> +		b->hangcheck_interrupts = irq_count;
>   		mod_timer(&b->hangcheck, wait_timeout());
>   		return;
>   	}
> @@ -176,7 +178,7 @@ static void irq_enable(struct intel_engine_cs *engine)
>   	 * we still need to force the barrier before reading the seqno,
>   	 * just in case.
>   	 */
> -	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
> +	__set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
>   
>   	/* Caller disables interrupts */
>   	spin_lock(&engine->i915->irq_lock);
> @@ -270,13 +272,14 @@ static bool use_fake_irq(const struct intel_breadcrumbs *b)
>   	if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings))
>   		return false;
>   
> -	/* Only start with the heavy weight fake irq timer if we have not
> +	/*
> +	 * Only start with the heavy weight fake irq timer if we have not
>   	 * seen any interrupts since enabling it the first time. If the
>   	 * interrupts are still arriving, it means we made a mistake in our
>   	 * engine->seqno_barrier(), a timing error that should be transient
>   	 * and unlikely to reoccur.
>   	 */
> -	return atomic_read(&engine->irq_count) == b->hangcheck_interrupts;
> +	return READ_ONCE(b->irq_count) == b->hangcheck_interrupts;
>   }
>   
>   static void enable_fake_irq(struct intel_breadcrumbs *b)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index c5ff203e42d6..f406d0ff4612 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -305,7 +305,6 @@ struct intel_engine_cs {
>   
>   	struct drm_i915_gem_object *default_state;
>   
> -	atomic_t irq_count;
>   	unsigned long irq_posted;
>   #define ENGINE_IRQ_BREADCRUMB 0
>   #define ENGINE_IRQ_EXECLIST 1
> @@ -340,6 +339,7 @@ struct intel_engine_cs {
>   
>   		unsigned int hangcheck_interrupts;
>   		unsigned int irq_enabled;
> +		unsigned int irq_count;
>   
>   		bool irq_armed : 1;
>   		I915_SELFTEST_DECLARE(bool mock : 1);
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 09/10] drm/i915: Only signal from interrupt when requested
  2018-01-15 21:24 ` [PATCH 09/10] drm/i915: Only signal from interrupt when requested Chris Wilson
@ 2018-01-17 12:22   ` Tvrtko Ursulin
  2018-01-18 18:12     ` Chris Wilson
  0 siblings, 1 reply; 47+ messages in thread
From: Tvrtko Ursulin @ 2018-01-17 12:22 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 15/01/2018 21:24, Chris Wilson wrote:
> Avoid calling dma_fence_signal() from inside the interrupt if we haven't
> enabled signaling on the request.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_request.c | 2 +-
>   drivers/gpu/drm/i915/i915_irq.c         | 3 ++-
>   drivers/gpu/drm/i915/intel_ringbuffer.h | 5 ++---
>   3 files changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 08bbd56277e5..46848aef1648 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -1214,7 +1214,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>   	if (flags & I915_WAIT_LOCKED)
>   		add_wait_queue(errq, &reset);
>   
> -	intel_wait_init(&wait, req);
> +	intel_wait_init(&wait);
>   
>   restart:
>   	do {
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index e5f76d580010..ea290f102784 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1093,7 +1093,8 @@ static void notify_ring(struct intel_engine_cs *engine)
>   		if (i915_seqno_passed(seqno, wait->seqno)) {
>   			struct drm_i915_gem_request *waiter = wait->request;
>   
> -			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> +			if (waiter &&
> +			    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
>   				      &waiter->fence.flags) &&
>   			    intel_wait_check_request(wait, waiter))
>   				rq = i915_gem_request_get(waiter);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index f406d0ff4612..cea2092d25d9 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -874,11 +874,10 @@ static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
>   /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
>   int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
>   
> -static inline void intel_wait_init(struct intel_wait *wait,
> -				   struct drm_i915_gem_request *rq)
> +static inline void intel_wait_init(struct intel_wait *wait)
>   {
>   	wait->tsk = current;
> -	wait->request = rq;
> +	wait->request = NULL;

Unless it is in this patch series, I can't see that this now leaves any 
code path which sets wait->request?

It's strange anyway - is this guarding against null ptr deref or what?

Regards,

Tvrtko

>   }
>   
>   static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno)
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs
  2018-01-17 10:29   ` Tvrtko Ursulin
@ 2018-01-18  9:16     ` Chris Wilson
  2018-01-18  9:19     ` Chris Wilson
  1 sibling, 0 replies; 47+ messages in thread
From: Chris Wilson @ 2018-01-18  9:16 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-01-17 10:29:51)
> 
> On 15/01/2018 21:24, Chris Wilson wrote:
> > Since commit 4e773c3a8a69 ("drm/i915: Wire up shrinkctl->nr_scanned"),
> 
> Sounds like it deserves this in the Fixes: tag as well?

Not really, the commit maintained the old behaviour just missing an
opportunity to take advantage of the new interface. Not strictly a fix,
I'd rather let it percolate upstream slowly.

> > we track the number of objects we scan and do not wish to exceed that as
> > it will overly penalise our own slabs under mempressure. Given that we
> > now know the target number of objects to scan, use that as our guide for
> > deciding to shrink as opposed to the number of objects we manage to
> > shrink (which doesn't correspond to the numbers we report to shrinkctl).
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_gem_shrinker.c | 4 ++--
> >   1 file changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > index 9029ed04879c..0e158f9287c4 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > @@ -363,13 +363,13 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
> >                               I915_SHRINK_BOUND |
> >                               I915_SHRINK_UNBOUND |
> >                               I915_SHRINK_PURGEABLE);
> > -     if (freed < sc->nr_to_scan)
> > +     if (sc->nr_scanned < sc->nr_to_scan)
> >               freed += i915_gem_shrink(i915,
> >                                        sc->nr_to_scan - sc->nr_scanned,
> >                                        &sc->nr_scanned,
> >                                        I915_SHRINK_BOUND |
> >                                        I915_SHRINK_UNBOUND);
> > -     if (freed < sc->nr_to_scan && current_is_kswapd()) {
> > +     if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
> >               intel_runtime_pm_get(i915);
> >               freed += i915_gem_shrink(i915,
> >                                        sc->nr_to_scan - sc->nr_scanned,
> > 
> 
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Thanks, pushed this separately.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs
  2018-01-17 10:29   ` Tvrtko Ursulin
  2018-01-18  9:16     ` Chris Wilson
@ 2018-01-18  9:19     ` Chris Wilson
  1 sibling, 0 replies; 47+ messages in thread
From: Chris Wilson @ 2018-01-18  9:19 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-01-17 10:29:51)
> 
> On 15/01/2018 21:24, Chris Wilson wrote:
> > Since commit 4e773c3a8a69 ("drm/i915: Wire up shrinkctl->nr_scanned"),
> 
> Sounds like it deserves this in the Fixes: tag as well?

On second thoughts, that we ask to free sc->nr_to_scan - sc->nr_scanned,
so we shouldn't let that underflow! Fixes is then deserved.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH v2] drm/i915: Shrink the GEM kmem_caches upon idling
  2018-01-17 10:18               ` Tvrtko Ursulin
@ 2018-01-18 18:06                 ` Chris Wilson
  0 siblings, 0 replies; 47+ messages in thread
From: Chris Wilson @ 2018-01-18 18:06 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-01-17 10:18:38)
> 
> On 16/01/2018 17:36, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-01-16 17:25:25)
> >>
> >> On 16/01/2018 15:21, Chris Wilson wrote:
> >>> Quoting Tvrtko Ursulin (2018-01-16 15:12:43)
> >>>>
> >>>> On 16/01/2018 13:05, Chris Wilson wrote:
> >>>>> +
> >>>>> +     if (!new_requests_since_last_retire(dev_priv)) {
> >>>>> +             __i915_gem_free_work(&dev_priv->mm.free_work);
> >>
> >> ... you wouldn't want to pull this up under the struct mutex section? It
> >> would need a different flavour of a function to be called, and some
> >> refactoring of the existing ones.
> > 
> > "Some". I don't think that improves anything?
> > 
> > The statement of intent to me is that we only throw away the caches and
> > excess memory if and only if we are idle. The presumption is that under
> > active conditions those caches are important, but if we are about to
> > sleep for long periods of time, we should be proactive in releasing
> > resources.
> > 
> > I can hear you about to ask if we could add a timer and wake up in 10s to
> > prove we were idle!
> 
> No, pointless since this proposal already runs outside this guarantee, 
> and anyway, this was or the other there is potential to disrupt the next 
> client.
> 
> How about sticking in a break on new_request_since_last_retire() into 
> __i915_gem_free_work()? Would that defeat the backlog cleaning? Maybe 
> conditional only when called from the idle handler?

__i915_gem_free_work() is a distraction, since it is just clearing the
backlog of freed objects and shouldn't be affecting the cache
optimisations for the next/concurrent client. Let me try rearranging the
flow here.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 07/10] drm/i915: Reduce spinlock hold time during notify_ring() interrupt
  2018-01-17 10:45   ` Tvrtko Ursulin
@ 2018-01-18 18:08     ` Chris Wilson
  2018-01-18 18:10     ` Chris Wilson
  1 sibling, 0 replies; 47+ messages in thread
From: Chris Wilson @ 2018-01-18 18:08 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-01-17 10:45:16)
> 
> On 15/01/2018 21:24, Chris Wilson wrote:
> > By taking advantage of the RCU protection of the task struct, we can find
> > the appropriate signaler under the spinlock and then release the spinlock
> > before waking the task and signaling the fence.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_irq.c | 29 +++++++++++++++++++----------
> >   1 file changed, 19 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index 3517c6548e2c..0b272501b738 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -1065,21 +1065,24 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
> >   
> >   static void notify_ring(struct intel_engine_cs *engine)
> >   {
> > +     const u32 seqno = intel_engine_get_seqno(engine);
> >       struct drm_i915_gem_request *rq = NULL;
> > +     struct task_struct *tsk = NULL;
> >       struct intel_wait *wait;
> >   
> > -     if (!engine->breadcrumbs.irq_armed)
> > +     if (unlikely(!engine->breadcrumbs.irq_armed))
> >               return;
> 
> It isn't unlikely in GuC mode, just sayin'...

Already taken care of, guc now "pins" the irq as opposed to arming it. So
we should be able to equate irq_armed to mean "has waiters".
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 07/10] drm/i915: Reduce spinlock hold time during notify_ring() interrupt
  2018-01-17 10:45   ` Tvrtko Ursulin
  2018-01-18 18:08     ` Chris Wilson
@ 2018-01-18 18:10     ` Chris Wilson
  1 sibling, 0 replies; 47+ messages in thread
From: Chris Wilson @ 2018-01-18 18:10 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-01-17 10:45:16)
> 
> On 15/01/2018 21:24, Chris Wilson wrote:
> > -             if (wakeup)
> > -                     wake_up_process(wait->tsk);
> > +                     tsk = wait->tsk;
> > +             } else {
> > +                     if (engine->irq_seqno_barrier &&
> > +                         i915_seqno_passed(seqno, wait->seqno - 1))
> 
> Hm what is this about? Why -1 on platforms with coherency issues and not 
> some other number? Needs a comment as minimum but still is a behaviour 
> change which I did not immediately figure out how it goes with the 
> commit message. If it is some additional optimization it needs to be 
> split out into a separate patch.

It's a finger in the air statement that I don't expect to be more than
one seqno behind in the interrupt-vs-breadcrumb race. So far I haven't
been disappointed.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 09/10] drm/i915: Only signal from interrupt when requested
  2018-01-17 12:22   ` Tvrtko Ursulin
@ 2018-01-18 18:12     ` Chris Wilson
  0 siblings, 0 replies; 47+ messages in thread
From: Chris Wilson @ 2018-01-18 18:12 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-01-17 12:22:33)
> 
> On 15/01/2018 21:24, Chris Wilson wrote:
> > Avoid calling dma_fence_signal() from inside the interrupt if we haven't
> > enabled signaling on the request.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_gem_request.c | 2 +-
> >   drivers/gpu/drm/i915/i915_irq.c         | 3 ++-
> >   drivers/gpu/drm/i915/intel_ringbuffer.h | 5 ++---
> >   3 files changed, 5 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> > index 08bbd56277e5..46848aef1648 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_request.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> > @@ -1214,7 +1214,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
> >       if (flags & I915_WAIT_LOCKED)
> >               add_wait_queue(errq, &reset);
> >   
> > -     intel_wait_init(&wait, req);
> > +     intel_wait_init(&wait);
> >   
> >   restart:
> >       do {
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index e5f76d580010..ea290f102784 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -1093,7 +1093,8 @@ static void notify_ring(struct intel_engine_cs *engine)
> >               if (i915_seqno_passed(seqno, wait->seqno)) {
> >                       struct drm_i915_gem_request *waiter = wait->request;
> >   
> > -                     if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> > +                     if (waiter &&
> > +                         !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> >                                     &waiter->fence.flags) &&
> >                           intel_wait_check_request(wait, waiter))
> >                               rq = i915_gem_request_get(waiter);
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > index f406d0ff4612..cea2092d25d9 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > @@ -874,11 +874,10 @@ static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
> >   /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
> >   int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
> >   
> > -static inline void intel_wait_init(struct intel_wait *wait,
> > -                                struct drm_i915_gem_request *rq)
> > +static inline void intel_wait_init(struct intel_wait *wait)
> >   {
> >       wait->tsk = current;
> > -     wait->request = rq;
> > +     wait->request = NULL;
> 
> Unless it is in this patch series, I can't see that this now leaves any 
> code path which sets wait->request?

The signaler is now the only entity that sets up wait.request.
 
> It's strange anyway - is this guarding against null ptr deref or what?

Uninitialized pointer deref in the irq handler, yup.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 47+ messages in thread

end of thread, other threads:[~2018-01-18 18:12 UTC | newest]

Thread overview: 47+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-15 21:24 Prevent trivial oom from gem_exec_nop/sequential Chris Wilson
2018-01-15 21:24 ` [PATCH 01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs Chris Wilson
2018-01-17 10:29   ` Tvrtko Ursulin
2018-01-18  9:16     ` Chris Wilson
2018-01-18  9:19     ` Chris Wilson
2018-01-15 21:24 ` [PATCH 02/10] drm/i915: Move i915_gem_retire_work_handler Chris Wilson
2018-01-17 10:33   ` Tvrtko Ursulin
2018-01-15 21:24 ` [PATCH 03/10] drm/i915: Shrink the GEM kmem_caches upon idling Chris Wilson
2018-01-16 10:00   ` Tvrtko Ursulin
2018-01-16 10:19     ` Chris Wilson
2018-01-16 13:05     ` [PATCH v2] " Chris Wilson
2018-01-16 15:12       ` Tvrtko Ursulin
2018-01-16 15:16         ` Tvrtko Ursulin
2018-01-16 15:21         ` Chris Wilson
2018-01-16 17:25           ` Tvrtko Ursulin
2018-01-16 17:36             ` Chris Wilson
2018-01-17 10:18               ` Tvrtko Ursulin
2018-01-18 18:06                 ` Chris Wilson
2018-01-15 21:24 ` [PATCH 04/10] drm/i915: Shrink the request kmem_cache on allocation error Chris Wilson
2018-01-16 10:10   ` Tvrtko Ursulin
2018-01-16 10:26     ` Chris Wilson
2018-01-16 13:15     ` [PATCH v2] " Chris Wilson
2018-01-16 15:19       ` Tvrtko Ursulin
2018-01-15 21:24 ` [PATCH 05/10] drm/i915: Trim the retired request queue after submitting Chris Wilson
2018-01-16 10:18   ` Tvrtko Ursulin
2018-01-16 10:32     ` Chris Wilson
2018-01-17 10:23       ` Tvrtko Ursulin
2018-01-16 13:30     ` [PATCH v2] " Chris Wilson
2018-01-15 21:24 ` [PATCH 06/10] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
2018-01-15 21:24 ` [PATCH 07/10] drm/i915: Reduce spinlock hold time during notify_ring() interrupt Chris Wilson
2018-01-17 10:45   ` Tvrtko Ursulin
2018-01-18 18:08     ` Chris Wilson
2018-01-18 18:10     ` Chris Wilson
2018-01-15 21:24 ` [PATCH 08/10] drm/i915: Move the irq_counter inside the spinlock Chris Wilson
2018-01-17 12:12   ` Tvrtko Ursulin
2018-01-15 21:24 ` [PATCH 09/10] drm/i915: Only signal from interrupt when requested Chris Wilson
2018-01-17 12:22   ` Tvrtko Ursulin
2018-01-18 18:12     ` Chris Wilson
2018-01-15 21:24 ` [PATCH 10/10] drm/i915/breadcrumbs: Reduce signaler rbtree to a sorted list Chris Wilson
2018-01-15 22:04 ` ✓ Fi.CI.BAT: success for series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs Patchwork
2018-01-16  9:21 ` ✓ Fi.CI.IGT: " Patchwork
2018-01-16  9:52 ` Prevent trivial oom from gem_exec_nop/sequential Tvrtko Ursulin
2018-01-16 10:02   ` Chris Wilson
2018-01-16 13:10   ` Chris Wilson
2018-01-16 13:42 ` ✓ Fi.CI.BAT: success for series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs (rev3) Patchwork
2018-01-16 14:02 ` ✓ Fi.CI.BAT: success for series starting with [01/10] drm/i915: Only attempt to scan the requested number of shrinker slabs (rev4) Patchwork
2018-01-16 15:29 ` ✓ Fi.CI.IGT: " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.