All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 30/34] drm/i915: Keep timeline HWSP allocated until the system is idle
Date: Mon, 21 Jan 2019 22:21:13 +0000	[thread overview]
Message-ID: <20190121222117.23305-31-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20190121222117.23305-1-chris@chris-wilson.co.uk>

In preparation for enabling HW semaphores, we need to keep in flight
timeline HWSP alive until the entire system is idle, as any other
timeline active on the GPU may still refer back to the already retired
timeline. We both have to delay recycling available cachelines and
unpinning old HWSP until the next idle point (i.e. on parking).

That we have to keep the HWSP alive for external references on HW raises
an interesting conundrum. On a busy system, we may never see a global
idle point, essentially meaning the resource will be leaking until we
are forced to sleep. What we need is a set of RCU primitives for the GPU!
This should also help mitigate the resource starvation issues
promulgating from keeping all logical state pinned until idle (instead
of as currently handled until the next context switch).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h      |   2 +
 drivers/gpu/drm/i915/i915_request.c  |  34 ++++---
 drivers/gpu/drm/i915/i915_timeline.c | 127 ++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_timeline.h |   1 +
 4 files changed, 133 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5577e0e1034f..7ca701cf9086 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1981,7 +1981,9 @@ struct drm_i915_private {
 
 			/* Pack multiple timelines' seqnos into the same page */
 			spinlock_t hwsp_lock;
+			struct list_head hwsp_pin_list;
 			struct list_head hwsp_free_list;
+			struct list_head hwsp_dead_list;
 		} timelines;
 
 		struct list_head active_rings;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index cca437ac8a7e..099c6f994b99 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -331,12 +331,6 @@ void i915_request_retire_upto(struct i915_request *rq)
 	} while (tmp != rq);
 }
 
-static u32 timeline_get_seqno(struct i915_timeline *tl)
-{
-	tl->seqno += tl->has_initial_breadcrumb;
-	return ++tl->seqno;
-}
-
 static void move_to_timeline(struct i915_request *request,
 			     struct i915_timeline *timeline)
 {
@@ -538,8 +532,10 @@ struct i915_request *
 i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 {
 	struct drm_i915_private *i915 = engine->i915;
-	struct i915_request *rq;
 	struct intel_context *ce;
+	struct i915_timeline *tl;
+	struct i915_request *rq;
+	u32 seqno;
 	int ret;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
@@ -614,7 +610,15 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 		}
 	}
 
-	rq->rcustate = get_state_synchronize_rcu();
+	tl = ce->ring->timeline;
+	GEM_BUG_ON(tl == &engine->timeline);
+	ret = i915_timeline_get_seqno(tl, &seqno);
+	if (ret)
+		goto err_free;
+
+	spin_lock_init(&rq->lock);
+	dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
+		       tl->fence_context, seqno);
 
 	INIT_LIST_HEAD(&rq->active_list);
 	rq->i915 = i915;
@@ -622,16 +626,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	rq->gem_context = ctx;
 	rq->hw_context = ce;
 	rq->ring = ce->ring;
-	rq->timeline = ce->ring->timeline;
-	GEM_BUG_ON(rq->timeline == &engine->timeline);
-	rq->hwsp_seqno = rq->timeline->hwsp_seqno;
-
-	spin_lock_init(&rq->lock);
-	dma_fence_init(&rq->fence,
-		       &i915_fence_ops,
-		       &rq->lock,
-		       rq->timeline->fence_context,
-		       timeline_get_seqno(rq->timeline));
+	rq->timeline = tl;
+	rq->hwsp_seqno = tl->hwsp_seqno;
+	rq->rcustate = get_state_synchronize_rcu();
 
 	/* We bump the ref for the fence chain */
 	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
@@ -688,6 +685,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
 	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
 
+err_free:
 	kmem_cache_free(i915->requests, rq);
 err_unreserve:
 	unreserve_gt(i915);
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 7bc9164733bc..a0bbc993048b 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -11,8 +11,11 @@
 
 struct i915_timeline_hwsp {
 	struct i915_vma *vma;
+	struct list_head pin_link;
 	struct list_head free_link;
+	struct list_head dead_link;
 	u64 free_bitmap;
+	u64 dead_bitmap;
 };
 
 static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915)
@@ -33,8 +36,7 @@ static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915)
 	return vma;
 }
 
-static struct i915_vma *
-hwsp_alloc(struct i915_timeline *timeline, int *offset)
+static struct i915_vma *hwsp_alloc(struct i915_timeline *timeline, int *offset)
 {
 	struct drm_i915_private *i915 = timeline->i915;
 	struct i915_gt_timelines *gt = &i915->gt.timelines;
@@ -66,6 +68,7 @@ hwsp_alloc(struct i915_timeline *timeline, int *offset)
 		vma->private = hwsp;
 		hwsp->vma = vma;
 		hwsp->free_bitmap = ~0ull;
+		hwsp->dead_bitmap = 0;
 
 		spin_lock(&gt->hwsp_lock);
 		list_add(&hwsp->free_link, &gt->hwsp_free_list);
@@ -96,18 +99,11 @@ static void hwsp_free(struct i915_timeline *timeline)
 
 	spin_lock(&gt->hwsp_lock);
 
-	/* As a cacheline becomes available, publish the HWSP on the freelist */
-	if (!hwsp->free_bitmap)
-		list_add_tail(&hwsp->free_link, &gt->hwsp_free_list);
-
-	hwsp->free_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
+	/* Defer recycling the HWSP cacheline until after the GPU is idle. */
+	if (!hwsp->dead_bitmap)
+		list_add_tail(&hwsp->dead_link, &gt->hwsp_dead_list);
 
-	/* And if no one is left using it, give the page back to the system */
-	if (hwsp->free_bitmap == ~0ull) {
-		i915_vma_put(hwsp->vma);
-		list_del(&hwsp->free_link);
-		kfree(hwsp);
-	}
+	hwsp->dead_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
 
 	spin_unlock(&gt->hwsp_lock);
 }
@@ -172,7 +168,9 @@ void i915_timelines_init(struct drm_i915_private *i915)
 	INIT_LIST_HEAD(&gt->active_list);
 
 	spin_lock_init(&gt->hwsp_lock);
+	INIT_LIST_HEAD(&gt->hwsp_pin_list);
 	INIT_LIST_HEAD(&gt->hwsp_free_list);
+	INIT_LIST_HEAD(&gt->hwsp_dead_list);
 
 	/* via i915_gem_wait_for_idle() */
 	i915_gem_shrinker_taints_mutex(i915, &gt->mutex);
@@ -209,6 +207,7 @@ static void timeline_inactive(struct i915_timeline *tl)
 void i915_timelines_park(struct drm_i915_private *i915)
 {
 	struct i915_gt_timelines *gt = &i915->gt.timelines;
+	struct i915_timeline_hwsp *hwsp, *hn;
 	struct i915_timeline *timeline;
 
 	mutex_lock(&gt->mutex);
@@ -222,6 +221,38 @@ void i915_timelines_park(struct drm_i915_private *i915)
 		i915_syncmap_free(&timeline->sync);
 	}
 	mutex_unlock(&gt->mutex);
+
+	/*
+	 * Now the system is idle, we can be sure that there are no more
+	 * references to our old HWSP pages remaining on the HW, so we
+	 * can return the pages back to the system.
+	 */
+	spin_lock(&gt->hwsp_lock);
+
+	list_for_each_entry_safe(hwsp, hn, &gt->hwsp_pin_list, pin_link) {
+		INIT_LIST_HEAD(&hwsp->pin_link);
+		i915_vma_unpin(hwsp->vma);
+	}
+	INIT_LIST_HEAD(&gt->hwsp_pin_list);
+
+	list_for_each_entry_safe(hwsp, hn, &gt->hwsp_dead_list, dead_link) {
+		GEM_BUG_ON(!hwsp->dead_bitmap);
+
+		if (!hwsp->free_bitmap)
+			list_add_tail(&hwsp->free_link, &gt->hwsp_free_list);
+
+		hwsp->free_bitmap |= hwsp->dead_bitmap;
+		hwsp->dead_bitmap = 0;
+
+		if (hwsp->free_bitmap == ~0ull) {
+			list_del(&hwsp->free_link);
+			i915_vma_put(hwsp->vma);
+			kfree(hwsp);
+		}
+	}
+	INIT_LIST_HEAD(&gt->hwsp_dead_list);
+
+	spin_unlock(&gt->hwsp_lock);
 }
 
 void i915_timeline_fini(struct i915_timeline *timeline)
@@ -259,6 +290,24 @@ i915_timeline_create(struct drm_i915_private *i915,
 	return timeline;
 }
 
+static void
+__i915_timeline_pin_hwsp(struct i915_timeline *tl,
+			 struct i915_timeline_hwsp *hwsp)
+{
+	GEM_BUG_ON(!tl->pin_count);
+
+	if (hwsp && list_empty(&hwsp->pin_link)) {
+		struct i915_gt_timelines *gt = &tl->i915->gt.timelines;
+
+		spin_lock(&gt->hwsp_lock);
+		if (list_empty(&hwsp->pin_link)) {
+			list_add(&hwsp->pin_link, &gt->hwsp_pin_list);
+			__i915_vma_pin(hwsp->vma);
+		}
+		spin_unlock(&gt->hwsp_lock);
+	}
+}
+
 int i915_timeline_pin(struct i915_timeline *tl)
 {
 	int err;
@@ -271,6 +320,7 @@ int i915_timeline_pin(struct i915_timeline *tl)
 	if (err)
 		goto unpin;
 
+	__i915_timeline_pin_hwsp(tl, tl->hwsp_ggtt->private);
 	timeline_active(tl);
 
 	return 0;
@@ -280,6 +330,53 @@ int i915_timeline_pin(struct i915_timeline *tl)
 	return err;
 }
 
+static u32 timeline_advance(struct i915_timeline *tl)
+{
+	GEM_BUG_ON(!tl->pin_count);
+	GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
+
+	tl->seqno += tl->has_initial_breadcrumb;
+	return ++tl->seqno;
+}
+
+static void timeline_rollback(struct i915_timeline *tl)
+{
+	tl->seqno--;
+	tl->seqno -= tl->has_initial_breadcrumb;
+}
+
+static noinline int
+__i915_timeline_get_seqno(struct i915_timeline *tl, u32 *seqno)
+{
+	struct i915_vma *vma;
+	int offset;
+
+	vma = hwsp_alloc(tl, &offset);
+	if (IS_ERR(vma)) {
+		timeline_rollback(tl);
+		return PTR_ERR(vma);
+	}
+	hwsp_free(tl);
+
+	tl->hwsp_ggtt = i915_vma_get(vma);
+	tl->hwsp_offset = offset;
+	__i915_timeline_pin_hwsp(tl, vma->private);
+
+	*seqno = timeline_advance(tl);
+	return 0;
+}
+
+int i915_timeline_get_seqno(struct i915_timeline *tl, u32 *seqno)
+{
+	*seqno = timeline_advance(tl);
+
+	/* Replace the HWSP on wraparound for HW semaphores */
+	if (unlikely(!*seqno && !i915_timeline_is_global(tl)))
+		return __i915_timeline_get_seqno(tl, seqno);
+
+	return 0;
+}
+
 void i915_timeline_unpin(struct i915_timeline *tl)
 {
 	GEM_BUG_ON(!tl->pin_count);
@@ -311,8 +408,12 @@ void i915_timelines_fini(struct drm_i915_private *i915)
 {
 	struct i915_gt_timelines *gt = &i915->gt.timelines;
 
+	i915_timelines_park(i915);
+
 	GEM_BUG_ON(!list_empty(&gt->active_list));
+	GEM_BUG_ON(!list_empty(&gt->hwsp_pin_list));
 	GEM_BUG_ON(!list_empty(&gt->hwsp_free_list));
+	GEM_BUG_ON(!list_empty(&gt->hwsp_dead_list));
 
 	mutex_destroy(&gt->mutex);
 }
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 8caeb66d1cd5..c01b81a85a15 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -149,6 +149,7 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
 }
 
 int i915_timeline_pin(struct i915_timeline *tl);
+int i915_timeline_get_seqno(struct i915_timeline *tl, u32 *seqno);
 void i915_timeline_unpin(struct i915_timeline *tl);
 
 void i915_timelines_init(struct drm_i915_private *i915);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2019-01-21 22:21 UTC|newest]

Thread overview: 89+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-21 22:20 HWSP for HW semaphores Chris Wilson
2019-01-21 22:20 ` [PATCH 01/34] drm/i915/execlists: Mark up priority boost on preemption Chris Wilson
2019-01-21 22:20 ` [PATCH 02/34] drm/i915/execlists: Suppress preempting self Chris Wilson
2019-01-22 22:18   ` John Harrison
2019-01-22 22:38     ` Chris Wilson
2019-01-21 22:20 ` [PATCH 03/34] drm/i915: Show all active engines on hangcheck Chris Wilson
2019-01-22 12:33   ` Mika Kuoppala
2019-01-22 12:42     ` Chris Wilson
2019-01-21 22:20 ` [PATCH 04/34] drm/i915/selftests: Refactor common live_test framework Chris Wilson
2019-01-22 12:37   ` Matthew Auld
2019-01-21 22:20 ` [PATCH 05/34] drm/i915/selftests: Track evict objects explicitly Chris Wilson
2019-01-22 11:53   ` Matthew Auld
2019-01-21 22:20 ` [PATCH 06/34] drm/i915/selftests: Create a clean GGTT for vma/gtt selftesting Chris Wilson
2019-01-22 12:07   ` Matthew Auld
2019-01-21 22:20 ` [PATCH 07/34] drm/i915: Refactor out intel_context_init() Chris Wilson
2019-01-22 12:32   ` Matthew Auld
2019-01-22 12:39   ` Mika Kuoppala
2019-01-22 12:48     ` Chris Wilson
2019-01-21 22:20 ` [PATCH 08/34] drm/i915: Make all GPU resets atomic Chris Wilson
2019-01-22 22:19   ` John Harrison
2019-01-22 22:27     ` Chris Wilson
2019-01-23  8:52     ` Mika Kuoppala
2019-01-21 22:20 ` [PATCH 09/34] drm/i915/guc: Disable global reset Chris Wilson
2019-01-22 22:23   ` John Harrison
2019-01-21 22:20 ` [PATCH 10/34] drm/i915: Remove GPU reset dependence on struct_mutex Chris Wilson
2019-01-24 12:06   ` Mika Kuoppala
2019-01-24 12:50     ` Chris Wilson
2019-01-24 13:12       ` Chris Wilson
2019-01-24 14:10       ` Chris Wilson
2019-01-21 22:20 ` [PATCH 11/34] drm/i915/selftests: Trim struct_mutex duration for set-wedged selftest Chris Wilson
2019-01-21 22:20 ` [PATCH 12/34] drm/i915: Issue engine resets onto idle engines Chris Wilson
2019-01-23  1:18   ` John Harrison
2019-01-23  1:31     ` Chris Wilson
2019-01-21 22:20 ` [PATCH 13/34] drm/i915: Stop tracking MRU activity on VMA Chris Wilson
2019-01-21 22:20 ` [PATCH 14/34] drm/i915: Pull VM lists under the VM mutex Chris Wilson
2019-01-22  9:09   ` Tvrtko Ursulin
2019-01-21 22:20 ` [PATCH 15/34] drm/i915: Move vma lookup to its own lock Chris Wilson
2019-01-21 22:20 ` [PATCH 16/34] drm/i915: Always allocate an object/vma for the HWSP Chris Wilson
2019-01-21 22:21 ` [PATCH 17/34] drm/i915: Move list of timelines under its own lock Chris Wilson
2019-01-21 22:21 ` [PATCH 18/34] drm/i915/selftests: Use common mock_engine::advance Chris Wilson
2019-01-22  9:33   ` Tvrtko Ursulin
2019-01-21 22:21 ` [PATCH 19/34] drm/i915: Tidy common test_bit probing of i915_request->fence.flags Chris Wilson
2019-01-22  9:35   ` Tvrtko Ursulin
2019-01-21 22:21 ` [PATCH 20/34] drm/i915: Introduce concept of per-timeline (context) HWSP Chris Wilson
2019-01-23  1:35   ` John Harrison
2019-01-21 22:21 ` [PATCH 21/34] drm/i915: Enlarge vma->pin_count Chris Wilson
2019-01-21 22:21 ` [PATCH 22/34] drm/i915: Allocate a status page for each timeline Chris Wilson
2019-01-21 22:21 ` [PATCH 23/34] drm/i915: Share per-timeline HWSP using a slab suballocator Chris Wilson
2019-01-22 10:47   ` Tvrtko Ursulin
2019-01-22 11:12     ` Chris Wilson
2019-01-22 11:33       ` Tvrtko Ursulin
2019-01-21 22:21 ` [PATCH 24/34] drm/i915: Track the context's seqno in its own timeline HWSP Chris Wilson
2019-01-22 12:24   ` Tvrtko Ursulin
2019-01-21 22:21 ` [PATCH 25/34] drm/i915: Track active timelines Chris Wilson
2019-01-22 14:56   ` Tvrtko Ursulin
2019-01-22 15:17     ` Chris Wilson
2019-01-23 22:32       ` John Harrison
2019-01-23 23:08         ` Chris Wilson
2019-01-21 22:21 ` [PATCH 26/34] drm/i915: Identify active requests Chris Wilson
2019-01-22 15:34   ` Tvrtko Ursulin
2019-01-22 15:45     ` Chris Wilson
2019-01-21 22:21 ` [PATCH 27/34] drm/i915: Remove the intel_engine_notify tracepoint Chris Wilson
2019-01-22 15:50   ` Tvrtko Ursulin
2019-01-23 12:54     ` Chris Wilson
2019-01-23 13:18       ` Tvrtko Ursulin
2019-01-23 13:24         ` Chris Wilson
2019-01-21 22:21 ` [PATCH 28/34] drm/i915: Replace global breadcrumbs with per-context interrupt tracking Chris Wilson
2019-01-23  9:21   ` Tvrtko Ursulin
2019-01-23 10:01     ` Chris Wilson
2019-01-23 16:28       ` Tvrtko Ursulin
2019-01-23 11:41   ` [PATCH] " Chris Wilson
2019-01-21 22:21 ` [PATCH 29/34] drm/i915: Drop fake breadcrumb irq Chris Wilson
2019-01-24 17:55   ` Tvrtko Ursulin
2019-01-24 18:18     ` Chris Wilson
2019-01-21 22:21 ` Chris Wilson [this message]
2019-01-21 22:37   ` [PATCH 30/34] drm/i915: Keep timeline HWSP allocated until the system is idle Chris Wilson
2019-01-21 22:48     ` Chris Wilson
2019-01-21 22:21 ` [PATCH 31/34] drm/i915/execlists: Refactor out can_merge_rq() Chris Wilson
2019-01-21 22:21 ` [PATCH 32/34] drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+ Chris Wilson
2019-01-21 22:21 ` [PATCH 33/34] drm/i915: Prioritise non-busywait semaphore workloads Chris Wilson
2019-01-23  0:33   ` Chris Wilson
2019-01-21 22:21 ` [PATCH 34/34] drm/i915: Replace global_seqno with a hangcheck heartbeat seqno Chris Wilson
2019-01-22  0:09 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/34] drm/i915/execlists: Mark up priority boost on preemption Patchwork
2019-01-22  0:22 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-01-22  0:30 ` ✓ Fi.CI.BAT: success " Patchwork
2019-01-22  1:35 ` ✗ Fi.CI.IGT: failure " Patchwork
2019-01-23 12:00 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/34] drm/i915/execlists: Mark up priority boost on preemption (rev2) Patchwork
2019-01-23 12:11 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-01-23 12:48 ` ✗ Fi.CI.BAT: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190121222117.23305-31-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.