All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Subject: [Intel-gfx] [PATCH 19/22] drm/i915/gem: Add all GPU reloc awaits/signals en masse
Date: Thu,  4 Jun 2020 11:37:48 +0100	[thread overview]
Message-ID: <20200604103751.18816-19-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20200604103751.18816-1-chris@chris-wilson.co.uk>

Asynchronous waits and signaling form a traditional semaphore with all
the usual ordering problems with taking multiple locks. If we want to
add more than one wait on a shared resource by the GPU, we must ensure
that all the associated timelines are advanced atomically, ergo we must
lock all the timelines en masse.

Testcase: igt/gem_exec_reloc/basic-concurrent16
Fixes: 0e97fbb08055 ("drm/i915/gem: Use a single chained reloc batches for a single execbuf")
References: https://gitlab.freedesktop.org/drm/intel/-/issues/1889
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 114 ++++++++++++------
 .../i915/gem/selftests/i915_gem_execbuffer.c  |  24 ++--
 2 files changed, 93 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 4c3461ab8a63..373d5eea7a5a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -259,7 +259,6 @@ struct i915_execbuffer {
 		bool has_fence : 1;
 		bool needs_unfenced : 1;
 
-		struct i915_vma *target;
 		struct i915_request *rq;
 		struct i915_vma *rq_vma;
 		u32 *rq_cmd;
@@ -924,7 +923,6 @@ static void reloc_cache_init(struct reloc_cache *cache,
 	cache->has_fence = cache->gen < 4;
 	cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
 	cache->node.flags = 0;
-	cache->target = NULL;
 }
 
 static inline void *unmask_page(unsigned long p)
@@ -1057,26 +1055,6 @@ static void reloc_gpu_flush(struct reloc_cache *cache)
 	i915_request_add(rq);
 }
 
-static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
-{
-	struct drm_i915_gem_object *obj = vma->obj;
-	int err;
-
-	i915_vma_lock(vma);
-
-	if (obj->cache_dirty & ~obj->cache_coherent)
-		i915_gem_clflush_object(obj, 0);
-	obj->write_domain = 0;
-
-	err = i915_request_await_object(rq, vma->obj, true);
-	if (err == 0)
-		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-
-	i915_vma_unlock(vma);
-
-	return err;
-}
-
 static int
 __reloc_gpu_alloc(struct i915_execbuffer *eb, struct intel_engine_cs *engine)
 {
@@ -1166,24 +1144,12 @@ __reloc_gpu_alloc(struct i915_execbuffer *eb, struct intel_engine_cs *engine)
 	return err;
 }
 
-static u32 *reloc_batch_grow(struct i915_execbuffer *eb,
-			     struct i915_vma *vma,
-			     unsigned int len)
+static u32 *reloc_batch_grow(struct i915_execbuffer *eb, unsigned int len)
 {
 	struct reloc_cache *cache = &eb->reloc_cache;
 	u32 *cmd;
 	int err;
 
-	if (vma != cache->target) {
-		err = reloc_move_to_gpu(cache->rq, vma);
-		if (unlikely(err)) {
-			i915_request_set_error_once(cache->rq, err);
-			return ERR_PTR(err);
-		}
-
-		cache->target = vma;
-	}
-
 	if (unlikely(cache->rq_size + len >
 		     PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) {
 		err = reloc_gpu_chain(cache);
@@ -1229,7 +1195,7 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb,
 	else
 		len = 3;
 
-	batch = reloc_batch_grow(eb, vma, len);
+	batch = reloc_batch_grow(eb, len);
 	if (IS_ERR(batch))
 		return PTR_ERR(batch);
 
@@ -1549,6 +1515,78 @@ static long eb_reloc_vma_validate(struct i915_execbuffer *eb, struct eb_vma *ev)
 	return required;
 }
 
+static int reloc_move_to_gpu(struct reloc_cache *cache, struct eb_vma *ev)
+{
+	struct i915_request *rq = cache->rq;
+	struct i915_vma *vma = ev->vma;
+	struct drm_i915_gem_object *obj = vma->obj;
+	int err;
+
+	if (obj->cache_dirty & ~obj->cache_coherent)
+		i915_gem_clflush_object(obj, 0);
+
+	obj->write_domain = I915_GEM_DOMAIN_RENDER;
+	obj->read_domains = I915_GEM_DOMAIN_RENDER;
+
+	err = i915_request_await_object(rq, obj, true);
+	if (err)
+		return err;
+
+	err = __i915_vma_move_to_active(vma, rq);
+	if (err)
+		return err;
+
+	dma_resv_add_excl_fence(vma->resv, &rq->fence);
+
+	return 0;
+}
+
+static int
+lock_relocs(struct i915_execbuffer *eb)
+{
+	struct ww_acquire_ctx acquire;
+	struct eb_vma *ev;
+	int err = 0;
+
+	ww_acquire_init(&acquire, &reservation_ww_class);
+
+	list_for_each_entry(ev, &eb->relocs, reloc_link) {
+		struct i915_vma *vma = ev->vma;
+
+		err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire);
+		if (err == -EDEADLK) {
+			struct eb_vma *unlock = ev, *en;
+
+			list_for_each_entry_safe_continue_reverse(unlock, en,
+								  &eb->relocs,
+								  reloc_link) {
+				ww_mutex_unlock(&unlock->vma->resv->lock);
+				list_move_tail(&unlock->reloc_link,
+					       &eb->relocs);
+			}
+
+			GEM_BUG_ON(!list_is_first(&ev->reloc_link,
+						  &eb->relocs));
+			err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
+							       &acquire);
+		}
+		if (err)
+			break;
+	}
+
+	ww_acquire_done(&acquire);
+
+	list_for_each_entry_continue_reverse(ev, &eb->relocs, reloc_link) {
+		if (err == 0)
+			err = reloc_move_to_gpu(&eb->reloc_cache, ev);
+		ww_mutex_unlock(&ev->vma->resv->lock);
+	}
+
+	ww_acquire_fini(&acquire);
+
+	return err;
+}
+
 static bool reloc_can_use_engine(const struct intel_engine_cs *engine)
 {
 	return engine->class != VIDEO_DECODE_CLASS || !IS_GEN(engine->i915, 6);
@@ -1577,6 +1615,10 @@ static int reloc_gpu(struct i915_execbuffer *eb)
 		return err;
 	GEM_BUG_ON(!eb->reloc_cache.rq);
 
+	err = lock_relocs(eb);
+	if (err)
+		goto out;
+
 	err = reloc_gpu_emit(&eb->reloc_cache);
 	if (err)
 		goto out;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
index faed6480a792..4f10b51f9a7e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
@@ -24,15 +24,15 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
 		GENMASK_ULL(eb->reloc_cache.use_64bit_reloc ? 63 : 31, 0);
 	const u32 *map = page_mask_bits(obj->mm.mapping);
 	struct i915_request *rq;
-	struct i915_vma *vma;
+	struct eb_vma ev;
 	int err;
 	int i;
 
-	vma = i915_vma_instance(obj, eb->context->vm, NULL);
-	if (IS_ERR(vma))
-		return PTR_ERR(vma);
+	ev.vma = i915_vma_instance(obj, eb->context->vm, NULL);
+	if (IS_ERR(ev.vma))
+		return PTR_ERR(ev.vma);
 
-	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+	err = i915_vma_pin(ev.vma, 0, 0, PIN_USER | PIN_HIGH);
 	if (err)
 		return err;
 
@@ -40,17 +40,22 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
 	if (err)
 		goto unpin_vma;
 
+	list_add(&ev.reloc_link, &eb->relocs);
+	err = lock_relocs(eb);
+	if (err)
+		goto unpin_vma;
+
 	err = reloc_gpu_emit(&eb->reloc_cache);
 	if (err)
 		goto unpin_vma;
 
 	/* 8-Byte aligned */
-	err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0);
+	err = __reloc_entry_gpu(eb, ev.vma, offsets[0] * sizeof(u32), 0);
 	if (err)
 		goto unpin_vma;
 
 	/* !8-Byte aligned */
-	err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1);
+	err = __reloc_entry_gpu(eb, ev.vma, offsets[1] * sizeof(u32), 1);
 	if (err)
 		goto unpin_vma;
 
@@ -62,7 +67,7 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
 	eb->reloc_cache.rq_size += i;
 
 	/* Force batch chaining */
-	err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2);
+	err = __reloc_entry_gpu(eb, ev.vma, offsets[2] * sizeof(u32), 2);
 	if (err)
 		goto unpin_vma;
 
@@ -97,7 +102,7 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
 put_rq:
 	i915_request_put(rq);
 unpin_vma:
-	i915_vma_unpin(vma);
+	i915_vma_unpin(ev.vma);
 	return err;
 }
 
@@ -121,6 +126,7 @@ static int igt_gpu_reloc(void *arg)
 	}
 
 	for_each_uabi_engine(eb.engine, eb.i915) {
+		INIT_LIST_HEAD(&eb.relocs);
 		reloc_cache_init(&eb.reloc_cache, eb.i915);
 		memset(map, POISON_INUSE, 4096);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2020-06-04 10:38 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-04 10:37 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Mark the buffer pool as active for the cmdparser Chris Wilson
2020-06-04 10:37 ` [Intel-gfx] [PATCH 02/22] drm/i915: Trim set_timer_ms() intervals Chris Wilson
2020-06-04 12:54   ` Matthew Auld
2020-06-04 10:37 ` [Intel-gfx] [PATCH 03/22] drm/i915/gt: Set timeslicing priority from queue Chris Wilson
2020-06-04 10:37 ` [Intel-gfx] [PATCH 04/22] drm/i915/gt: Always check to enable timeslicing if not submitting Chris Wilson
2020-06-04 10:37 ` [Intel-gfx] [PATCH 05/22] Restore "drm/i915: drop engine_pin/unpin_breadcrumbs_irq" Chris Wilson
2020-06-04 10:37 ` [Intel-gfx] [PATCH 06/22] drm/i915/gt: Couple tasklet scheduling for all CS interrupts Chris Wilson
2020-06-04 10:37 ` [Intel-gfx] [PATCH 07/22] drm/i915/gt: Support creation of 'internal' rings Chris Wilson
2020-06-04 10:37 ` [Intel-gfx] [PATCH 08/22] drm/i915/gt: Use client timeline address for seqno writes Chris Wilson
2020-06-04 10:37 ` [Intel-gfx] [PATCH 09/22] drm/i915/gt: Infrastructure for ring scheduling Chris Wilson
2020-06-04 10:37 ` [Intel-gfx] [PATCH 10/22] drm/i915/gt: Enable busy-stats for ring-scheduler Chris Wilson
2020-06-04 10:37 ` [Intel-gfx] [PATCH 11/22] drm/i915/gt: Track if an engine requires forcewake w/a Chris Wilson
2020-06-04 10:37 ` [Intel-gfx] [PATCH 12/22] drm/i915/gt: Implement ring scheduler for gen6/7 Chris Wilson
2020-06-04 10:37 ` [Intel-gfx] [PATCH 13/22] drm/i915/gt: Enable ring scheduling " Chris Wilson
2020-06-04 10:37 ` [Intel-gfx] [PATCH 14/22] drm/i915/gem: Async GPU relocations only Chris Wilson
2020-06-04 13:37   ` Matthew Auld
2020-06-04 13:44     ` Chris Wilson
2020-06-04 10:37 ` [Intel-gfx] [PATCH 15/22] drm/i915: Add list_for_each_entry_safe_continue_reverse Chris Wilson
2020-06-04 10:37 ` [Intel-gfx] [PATCH 16/22] drm/i915/gem: Separate reloc validation into an earlier step Chris Wilson
2020-06-04 10:37 ` [Intel-gfx] [PATCH 17/22] drm/i915/gem: Lift GPU relocation allocation Chris Wilson
2020-06-04 10:37 ` [Intel-gfx] [PATCH 18/22] drm/i915/gem: Build the reloc request first Chris Wilson
2020-06-04 10:37 ` Chris Wilson [this message]
2020-06-04 10:37 ` [Intel-gfx] [PATCH 20/22] dma-buf: Proxy fence, an unsignaled fence placeholder Chris Wilson
2020-06-04 10:37 ` [Intel-gfx] [PATCH 21/22] drm/i915: Unpeel awaits on a proxy fence Chris Wilson
2020-06-04 10:37 ` [Intel-gfx] [PATCH 22/22] drm/i915/gem: Make relocations atomic within execbuf Chris Wilson
2020-06-04 11:36 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/22] drm/i915/gem: Mark the buffer pool as active for the cmdparser Patchwork
2020-06-04 11:37 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2020-06-04 12:06 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
2020-06-04 12:49 ` [Intel-gfx] [PATCH 01/22] " Matthew Auld

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200604103751.18816-19-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.