All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 01/46] drm/i915: Hack and slash, throttle execbuffer hogs
Date: Wed,  6 Feb 2019 13:03:11 +0000	[thread overview]
Message-ID: <20190206130356.18771-2-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20190206130356.18771-1-chris@chris-wilson.co.uk>

Apply backpressure to hogs that emit requests faster than the GPU can
process them by waiting for their ring to be less than half-full before
proceeding with taking the struct_mutex.

This is a gross hack to apply throttling backpressure, the long term
goal is to remove the struct_mutex contention so that each client
naturally waits, preferably in an asynchronous, nonblocking fashion
(pipelined operations for the win), for their own resources and never
blocks another client within the driver at least. (Realtime priority
goals would extend to ensuring that resource contention favours high
priority clients as well.)

This patch only limits excessive request production and does not attempt
to throttle clients that block waiting for eviction (either global GTT or
system memory) or any other global resources, see above for the long term
goal.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 63 ++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.c    | 13 -----
 drivers/gpu/drm/i915/intel_ringbuffer.h    | 12 +++++
 3 files changed, 75 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 8eedf7cac493..84ef3abc567e 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -753,6 +753,64 @@ static int eb_select_context(struct i915_execbuffer *eb)
 	return 0;
 }
 
+static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
+{
+	struct i915_request *rq;
+
+	if (intel_ring_update_space(ring) >= PAGE_SIZE)
+		return NULL;
+
+	/*
+	 * Find a request that after waiting upon, there will be at least half
+	 * the ring available. The hystersis allows us to compete for the
+	 * shared ring and should mean that we sleep less often prior to
+	 * claiming our resources, but not so long that the ring completely
+	 * drains before we can submit our next request.
+	 */
+	list_for_each_entry(rq, &ring->request_list, ring_link) {
+		if (__intel_ring_space(rq->postfix,
+				       ring->emit, ring->size) > ring->size / 2)
+			break;
+	}
+	if (&rq->ring_link == &ring->request_list)
+		return NULL; /* weird, we will check again later for real */
+
+	return i915_request_get(rq);
+}
+
+static int eb_wait_for_ring(const struct i915_execbuffer *eb)
+{
+	const struct intel_context *ce;
+	struct i915_request *rq;
+	int ret = 0;
+
+	/*
+	 * Apply a light amount of backpressure to prevent excessive hogs
+	 * from blocking waiting for space whilst holding struct_mutex and
+	 * keeping all of their resources pinned.
+	 */
+
+	ce = to_intel_context(eb->ctx, eb->engine);
+	if (!ce->ring) /* first use, assume empty! */
+		return 0;
+
+	rq = __eb_wait_for_ring(ce->ring);
+	if (rq) {
+		mutex_unlock(&eb->i915->drm.struct_mutex);
+
+		if (i915_request_wait(rq,
+				      I915_WAIT_INTERRUPTIBLE,
+				      MAX_SCHEDULE_TIMEOUT) < 0)
+			ret = -EINTR;
+
+		i915_request_put(rq);
+
+		mutex_lock(&eb->i915->drm.struct_mutex);
+	}
+
+	return ret;
+}
+
 static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
 	struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
@@ -2291,6 +2349,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (err)
 		goto err_rpm;
 
+	err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
+	if (unlikely(err))
+		goto err_unlock;
+
 	err = eb_relocate(&eb);
 	if (err) {
 		/*
@@ -2435,6 +2497,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_vma:
 	if (eb.exec)
 		eb_release_vmas(&eb);
+err_unlock:
 	mutex_unlock(&dev->struct_mutex);
 err_rpm:
 	intel_runtime_pm_put(eb.i915, wakeref);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b889b27f8aeb..7f841dba87b3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -49,19 +49,6 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
 		I915_GEM_HWS_INDEX_ADDR);
 }
 
-static unsigned int __intel_ring_space(unsigned int head,
-				       unsigned int tail,
-				       unsigned int size)
-{
-	/*
-	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
-	 * same cacheline, the Head Pointer must not be greater than the Tail
-	 * Pointer."
-	 */
-	GEM_BUG_ON(!is_power_of_2(size));
-	return (head - tail - CACHELINE_BYTES) & (size - 1);
-}
-
 unsigned int intel_ring_update_space(struct intel_ring *ring)
 {
 	unsigned int space;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 4d4ea6963a72..710ffb221775 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -832,6 +832,18 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
 	return tail;
 }
 
+static inline unsigned int
+__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
+{
+	/*
+	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+	 * same cacheline, the Head Pointer must not be greater than the Tail
+	 * Pointer."
+	 */
+	GEM_BUG_ON(!is_power_of_2(size));
+	return (head - tail - CACHELINE_BYTES) & (size - 1);
+}
+
 void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno);
 
 int intel_engine_setup_common(struct intel_engine_cs *engine);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2019-02-06 13:04 UTC|newest]

Thread overview: 97+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-06 13:03 The road to load balancing Chris Wilson
2019-02-06 13:03 ` Chris Wilson [this message]
2019-02-06 13:03 ` [PATCH 02/46] drm/i915: Revoke mmaps and prevent access to fence registers across reset Chris Wilson
2019-02-06 15:56   ` Mika Kuoppala
2019-02-06 16:08     ` Chris Wilson
2019-02-06 16:18       ` Chris Wilson
2019-02-26 19:53   ` Rodrigo Vivi
2019-02-26 20:27     ` Chris Wilson
2019-02-06 13:03 ` [PATCH 03/46] drm/i915: Force the GPU reset upon wedging Chris Wilson
2019-02-06 13:03 ` [PATCH 04/46] drm/i915: Uninterruptibly drain the timelines on unwedging Chris Wilson
2019-02-06 13:03 ` [PATCH 05/46] drm/i915: Wait for old resets before applying debugfs/i915_wedged Chris Wilson
2019-02-06 13:03 ` [PATCH 06/46] drm/i915: Serialise resets with wedging Chris Wilson
2019-02-06 13:03 ` [PATCH 07/46] drm/i915: Don't claim an unstarted request was guilty Chris Wilson
2019-02-06 13:03 ` [PATCH 08/46] drm/i915/execlists: Suppress mere WAIT preemption Chris Wilson
2019-02-11 11:19   ` Tvrtko Ursulin
2019-02-19 10:22   ` Matthew Auld
2019-02-19 10:34     ` Chris Wilson
2019-02-06 13:03 ` [PATCH 09/46] drm/i915/execlists: Suppress redundant preemption Chris Wilson
2019-02-06 13:03 ` [PATCH 10/46] drm/i915: Make request allocation caches global Chris Wilson
2019-02-11 11:43   ` Tvrtko Ursulin
2019-02-11 12:40     ` Chris Wilson
2019-02-11 17:02       ` Tvrtko Ursulin
2019-02-12 11:51         ` Chris Wilson
2019-02-06 13:03 ` [PATCH 11/46] drm/i915: Keep timeline HWSP allocated until idle across the system Chris Wilson
2019-02-06 13:03 ` [PATCH 12/46] drm/i915/execlists: Refactor out can_merge_rq() Chris Wilson
2019-02-06 13:03 ` [PATCH 13/46] drm/i915: Compute the global scheduler caps Chris Wilson
2019-02-11 12:24   ` Tvrtko Ursulin
2019-02-11 12:33     ` Chris Wilson
2019-02-06 13:03 ` [PATCH 14/46] drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+ Chris Wilson
2019-02-06 13:03 ` [PATCH 15/46] drm/i915: Prioritise non-busywait semaphore workloads Chris Wilson
2019-02-06 13:03 ` [PATCH 16/46] drm/i915: Show support for accurate sw PMU busyness tracking Chris Wilson
2019-02-06 13:03 ` [PATCH 17/46] drm/i915: Apply rps waitboosting for dma_fence_wait_timeout() Chris Wilson
2019-02-11 18:06   ` Tvrtko Ursulin
2019-02-06 13:03 ` [PATCH 18/46] drm/i915: Replace global_seqno with a hangcheck heartbeat seqno Chris Wilson
2019-02-11 12:40   ` Tvrtko Ursulin
2019-02-11 12:44     ` Chris Wilson
2019-02-11 16:56       ` Tvrtko Ursulin
2019-02-12 13:36         ` Chris Wilson
2019-02-06 13:03 ` [PATCH 19/46] drm/i915/pmu: Always sample an active ringbuffer Chris Wilson
2019-02-11 18:18   ` Tvrtko Ursulin
2019-02-12 13:40     ` Chris Wilson
2019-02-06 13:03 ` [PATCH 20/46] drm/i915: Remove access to global seqno in the HWSP Chris Wilson
2019-02-11 18:22   ` Tvrtko Ursulin
2019-02-06 13:03 ` [PATCH 21/46] drm/i915: Remove i915_request.global_seqno Chris Wilson
2019-02-11 18:44   ` Tvrtko Ursulin
2019-02-12 13:45     ` Chris Wilson
2019-02-06 13:03 ` [PATCH 22/46] drm/i915: Force GPU idle on suspend Chris Wilson
2019-02-06 13:03 ` [PATCH 23/46] drm/i915/selftests: Improve switch-to-kernel-context checking Chris Wilson
2019-02-06 13:03 ` [PATCH 24/46] drm/i915: Do a synchronous switch-to-kernel-context on idling Chris Wilson
2019-02-21 19:48   ` Daniele Ceraolo Spurio
2019-02-21 21:17     ` Chris Wilson
2019-02-21 21:31       ` Daniele Ceraolo Spurio
2019-02-21 21:42         ` Chris Wilson
2019-02-21 22:53           ` Daniele Ceraolo Spurio
2019-02-21 23:25             ` Chris Wilson
2019-02-22  0:29               ` Daniele Ceraolo Spurio
2019-02-06 13:03 ` [PATCH 25/46] drm/i915: Store the BIT(engine->id) as the engine's mask Chris Wilson
2019-02-11 18:51   ` Tvrtko Ursulin
2019-02-12 13:51     ` Chris Wilson
2019-02-06 13:03 ` [PATCH 26/46] drm/i915: Refactor common code to load initial power context Chris Wilson
2019-02-06 13:03 ` [PATCH 27/46] drm/i915: Reduce presumption of request ordering for barriers Chris Wilson
2019-02-06 13:03 ` [PATCH 28/46] drm/i915: Remove has-kernel-context Chris Wilson
2019-02-06 13:03 ` [PATCH 29/46] drm/i915: Introduce the i915_user_extension_method Chris Wilson
2019-02-11 19:00   ` Tvrtko Ursulin
2019-02-12 13:56     ` Chris Wilson
2019-02-06 13:03 ` [PATCH 30/46] drm/i915: Track active engines within a context Chris Wilson
2019-02-11 19:11   ` Tvrtko Ursulin
2019-02-12 13:59     ` Chris Wilson
2019-02-06 13:03 ` [PATCH 31/46] drm/i915: Introduce a context barrier callback Chris Wilson
2019-02-06 13:03 ` [PATCH 32/46] drm/i915: Create/destroy VM (ppGTT) for use with contexts Chris Wilson
2019-02-12 11:18   ` Tvrtko Ursulin
2019-02-12 14:11     ` Chris Wilson
2019-02-06 13:03 ` [PATCH 33/46] drm/i915: Extend CONTEXT_CREATE to set parameters upon construction Chris Wilson
2019-02-12 13:43   ` Tvrtko Ursulin
2019-02-06 13:03 ` [PATCH 34/46] drm/i915: Allow contexts to share a single timeline across all engines Chris Wilson
2019-02-06 13:03 ` [PATCH 35/46] drm/i915: Fix I915_EXEC_RING_MASK Chris Wilson
2019-02-06 13:03 ` [PATCH 36/46] drm/i915: Remove last traces of exec-id (GEM_BUSY) Chris Wilson
2019-02-06 13:03 ` [PATCH 37/46] drm/i915: Re-arrange execbuf so context is known before engine Chris Wilson
2019-02-06 13:03 ` [PATCH 38/46] drm/i915: Allow a context to define its set of engines Chris Wilson
2019-02-25 10:41   ` Tvrtko Ursulin
2019-02-25 10:47     ` Chris Wilson
2019-02-06 13:03 ` [PATCH 39/46] drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[] Chris Wilson
2019-02-06 13:03 ` [PATCH 40/46] drm/i915: Pass around the intel_context Chris Wilson
2019-02-06 13:03 ` [PATCH 41/46] drm/i915: Split struct intel_context definition to its own header Chris Wilson
2019-02-06 13:03 ` [PATCH 42/46] drm/i915: Move over to intel_context_lookup() Chris Wilson
2019-02-06 14:27   ` [PATCH] " Chris Wilson
2019-02-06 13:03 ` [PATCH 43/46] drm/i915: Load balancing across a virtual engine Chris Wilson
2019-02-06 13:03 ` [PATCH 44/46] drm/i915: Extend execution fence to support a callback Chris Wilson
2019-02-06 13:03 ` [PATCH 45/46] drm/i915/execlists: Virtual engine bonding Chris Wilson
2019-02-06 13:03 ` [PATCH 46/46] drm/i915: Allow specification of parallel execbuf Chris Wilson
2019-02-06 13:52 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/46] drm/i915: Hack and slash, throttle execbuffer hogs Patchwork
2019-02-06 14:09 ` ✗ Fi.CI.BAT: failure " Patchwork
2019-02-06 14:11 ` ✗ Fi.CI.SPARSE: warning " Patchwork
2019-02-06 14:37 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/46] drm/i915: Hack and slash, throttle execbuffer hogs (rev2) Patchwork
2019-02-06 14:55 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-02-06 14:56 ` ✓ Fi.CI.BAT: success " Patchwork
2019-02-06 16:18 ` ✗ Fi.CI.IGT: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190206130356.18771-2-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.