[PATCH] [RFC] drm/i915: read-read semaphore optimization

* [PATCH] [RFC] drm/i915: read-read semaphore optimization
@ 2011-12-13  3:52 Ben Widawsky
  2011-12-13  9:49 ` Chris Wilson
  2011-12-13 17:22 ` Eric Anholt
  0 siblings, 2 replies; 16+ messages in thread
From: Ben Widawsky @ 2011-12-13  3:52 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, Ben Widawsky

Since we don't differentiate on the different GPU read domains, it
should be safe to allow back to back reads to occur without issuing a
wait (or flush in the non-semaphore case).

This has the unfortunate side effect that we need to keep track of all
the outstanding buffer reads so that we can synchronize on a write, to
another ring (since we don't know which read finishes first). In other
words, the code is quite simple for two rings, but gets more tricky for
> 2 rings.

Here is a picture of the solution to the above problem

Ring 0            Ring 1             Ring 2
batch 0           batch 1            batch 2
  read buffer A     read buffer A      wait batch 0
                                       wait batch 1
                                       write buffer A

This code is really untested. I'm hoping for some feedback if this is
worth cleaning up, and testing more thoroughly.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h            |    1 +
 drivers/gpu/drm/i915/i915_gem.c            |    1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   32 ++++++++++++++++++++++-----
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4a9c1b9..d59bf20 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -852,6 +852,7 @@ struct drm_i915_gem_object {
 
 	/** Breadcrumb of last rendering to the buffer. */
 	uint32_t last_rendering_seqno;
+	uint32_t lrs_ro[I915_NUM_RINGS];
 	struct intel_ring_buffer *ring;
 
 	/** Breadcrumb of last fenced GPU access to the buffer. */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ed0b68f..64cc804 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1561,6 +1561,7 @@ i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
 {
 	list_del_init(&obj->ring_list);
 	obj->last_rendering_seqno = 0;
+	memset(obj->lrs_ro, 0, sizeof(obj->lrs_ro));
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 42a6529..78090a3 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -752,21 +752,20 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
 {
 	struct intel_ring_buffer *from = obj->ring;
 	u32 seqno;
-	int ret, idx;
+	int ret, idx, i;
 
 	if (from == NULL || to == from)
 		return 0;
 
-	/* If the object isn't being written to, and the object isn't moving
-	 * into a GPU write domain, it doesn't need to sync.
-	 */
 	if (obj->pending_gpu_write == 0 &&
-	    obj->base.pending_write_domain == 0)
+	    obj->base.pending_write_domain == 0) {
+		obj->lrs_ro[ffs(to->id) - 1] = obj->last_rendering_seqno;
 		return 0;
+	}
 
 	/* XXX gpu semaphores are implicated in various hard hangs on SNB */
 	if (INTEL_INFO(obj->base.dev)->gen < 6 || !i915_semaphores)
-		return i915_gem_object_wait_rendering(obj);
+		ret = i915_gem_object_wait_rendering(obj);
 
 	idx = intel_ring_sync_index(from, to);
 
@@ -792,6 +791,27 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
 
 	from->sync_seqno[idx] = seqno;
 
+	/* We must be doing a write to an object which may be in queue to be
+	 * read by some other command. If we skipped synchronizing because of a read
+	 * only request, we must check and add a sync point now. The reason we
+	 * must sync on the write is because we have more than 2 rings. If two
+	 * rings have outstanding reads (without sync), we can't let the write
+	 * occur until both other have completed.
+	 */
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		uint32_t ro_seqno = obj->lrs_ro[i];
+		if (ro_seqno != 0) {
+			obj->lrs_ro[i] = 0;
+
+			if (i915_seqno_passed(from->get_seqno(from), ro_seqno))
+				continue;
+
+			ret = to->sync_to(to, from, ro_seqno - 1);
+			if (ret)
+				return ret;
+		}
+	}
+
 	return to->sync_to(to, from, seqno - 1);
 }
 
-- 
1.7.8

^ permalink raw reply related	[flat|nested] 16+ messages in thread