All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 04/23] drm/i915/execlists: Always force a context reload when rewinding RING_TAIL
       [not found] <20200529081810.10747-1-prasad.nallani@intel.com>
@ 2020-05-29  8:17 ` Prasad Nallani
  2020-05-29  8:17 ` [PATCH 06/23] drm/i915: Actually emit the await_start Prasad Nallani
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 8+ messages in thread
From: Prasad Nallani @ 2020-05-29  8:17 UTC (permalink / raw)
  To: gfx-internal-devel
  Cc: andi.shyti, Chris Wilson, Mika Kuoppala, stable, Jani Nikula

From: Chris Wilson <chris@chris-wilson.co.uk>

If we rewind the RING_TAIL on a context, due to a preemption event, we
must force the context restore for the RING_TAIL update to be properly
handled. Rather than note which preemption events may cause us to rewind
the tail, compare the new request's tail with the previously submitted
RING_TAIL, as it turns out that timeslicing was causing unexpected
rewinds.

   <idle>-0       0d.s2 1280851190us : __execlists_submission_tasklet: 0000:00:02.0 rcs0: expired last=130:4698, prio=3, hint=3
   <idle>-0       0d.s2 1280851192us : __i915_request_unsubmit: 0000:00:02.0 rcs0: fence 66:119966, current 119964
   <idle>-0       0d.s2 1280851195us : __i915_request_unsubmit: 0000:00:02.0 rcs0: fence 130:4698, current 4695
   <idle>-0       0d.s2 1280851198us : __i915_request_unsubmit: 0000:00:02.0 rcs0: fence 130:4696, current 4695
^----  Note we unwind 2 requests from the same context

   <idle>-0       0d.s2 1280851208us : __i915_request_submit: 0000:00:02.0 rcs0: fence 130:4696, current 4695
   <idle>-0       0d.s2 1280851213us : __i915_request_submit: 0000:00:02.0 rcs0: fence 134:1508, current 1506
^---- But to apply the new timeslice, we have to replay the first request
      before the new client can start -- the unexpected RING_TAIL rewind

   <idle>-0       0d.s2 1280851219us : trace_ports: 0000:00:02.0 rcs0: submit { 130:4696*, 134:1508 }
 synmark2-5425    2..s. 1280851239us : process_csb: 0000:00:02.0 rcs0: cs-irq head=5, tail=0
 synmark2-5425    2..s. 1280851240us : process_csb: 0000:00:02.0 rcs0: csb[0]: status=0x00008002:0x00000000
^---- Preemption event for the ELSP update; note the lite-restore

 synmark2-5425    2..s. 1280851243us : trace_ports: 0000:00:02.0 rcs0: preempted { 130:4698, 66:119966 }
 synmark2-5425    2..s. 1280851246us : trace_ports: 0000:00:02.0 rcs0: promote { 130:4696*, 134:1508 }
 synmark2-5425    2.... 1280851462us : __i915_request_commit: 0000:00:02.0 rcs0: fence 130:4700, current 4695
 synmark2-5425    2.... 1280852111us : __i915_request_commit: 0000:00:02.0 rcs0: fence 130:4702, current 4695
 synmark2-5425    2.Ns1 1280852296us : process_csb: 0000:00:02.0 rcs0: cs-irq head=0, tail=2
 synmark2-5425    2.Ns1 1280852297us : process_csb: 0000:00:02.0 rcs0: csb[1]: status=0x00000814:0x00000000
 synmark2-5425    2.Ns1 1280852299us : trace_ports: 0000:00:02.0 rcs0: completed { 130:4696!, 134:1508 }
 synmark2-5425    2.Ns1 1280852301us : process_csb: 0000:00:02.0 rcs0: csb[2]: status=0x00000818:0x00000040
 synmark2-5425    2.Ns1 1280852302us : trace_ports: 0000:00:02.0 rcs0: completed { 134:1508, 0:0 }
 synmark2-5425    2.Ns1 1280852313us : process_csb: process_csb:2336 GEM_BUG_ON(!i915_request_completed(*execlists->active) && !reset_in_progress(execlists))

Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing")
Referenecs: 82c69bf58650 ("drm/i915/gt: Detect if we miss WaIdleLiteRestore")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: <stable@vger.kernel.org> # v5.4+
Link: https://patchwork.freedesktop.org/patch/msgid/20200207211452.2860634-1-chris@chris-wilson.co.uk
(cherry picked from commit 5ba32c7be81e53ea8a27190b0f6be98e6c6779af)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
(cherry picked from commit b1339ecac661e1cf3e1dc78ac56bff3aeeaeb92c)
---
 drivers/gpu/drm/i915/gt/intel_lrc.c        | 18 ++++++++----------
 drivers/gpu/drm/i915/gt/intel_ring.c       |  1 +
 drivers/gpu/drm/i915/gt/intel_ring.h       |  8 ++++++++
 drivers/gpu/drm/i915/gt/intel_ring_types.h |  1 +
 4 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 2a04c767121b..c50e5da842ca 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1446,7 +1446,7 @@ static u64 execlists_update_context(struct i915_request *rq)
 {
 	struct intel_context *ce = rq->context;
 	u64 desc = ce->lrc_desc;
-	u32 tail;
+	u32 tail, prev;
 
 	/*
 	 * WaIdleLiteRestore:bdw,skl
@@ -1459,9 +1459,15 @@ static u64 execlists_update_context(struct i915_request *rq)
 	 * subsequent resubmissions (for lite restore). Should that fail us,
 	 * and we try and submit the same tail again, force the context
 	 * reload.
+	 *
+	 * If we need to return to a preempted context, we need to skip the
+	 * lite-restore and force it to reload the RING_TAIL. Otherwise, the
+	 * HW has a tendency to ignore us rewinding the TAIL to the end of
+	 * an earlier request.
 	 */
 	tail = intel_ring_set_tail(rq->ring, rq->tail);
-	if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail))
+	prev = ce->lrc_reg_state[CTX_RING_TAIL];
+	if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
 		desc |= CTX_DESC_FORCE_RESTORE;
 	ce->lrc_reg_state[CTX_RING_TAIL] = tail;
 	rq->tail = rq->wa_tail;
@@ -1968,14 +1974,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 */
 			__unwind_incomplete_requests(engine);
 
-			/*
-			 * If we need to return to the preempted context, we
-			 * need to skip the lite-restore and force it to
-			 * reload the RING_TAIL. Otherwise, the HW has a
-			 * tendency to ignore us rewinding the TAIL to the
-			 * end of an earlier request.
-			 */
-			last->context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
 			last = NULL;
 		} else if (need_timeslice(engine, last) &&
 			   timer_expired(&engine->execlists.timer)) {
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index 374b28f13ca0..6ff803f397c4 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -145,6 +145,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 
 	kref_init(&ring->ref);
 	ring->size = size;
+	ring->wrap = BITS_PER_TYPE(ring->size) - ilog2(size);
 
 	/*
 	 * Workaround an erratum on the i830 which causes a hang if
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
index ea2839d9e044..5bdce24994aa 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.h
+++ b/drivers/gpu/drm/i915/gt/intel_ring.h
@@ -56,6 +56,14 @@ static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
 	return pos & (ring->size - 1);
 }
 
+static inline int intel_ring_direction(const struct intel_ring *ring,
+				       u32 next, u32 prev)
+{
+	typecheck(typeof(ring->size), next);
+	typecheck(typeof(ring->size), prev);
+	return (next - prev) << ring->wrap;
+}
+
 static inline bool
 intel_ring_offset_valid(const struct intel_ring *ring,
 			unsigned int pos)
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_types.h b/drivers/gpu/drm/i915/gt/intel_ring_types.h
index d9f17f38e0cc..3cd7fec7fd8d 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_ring_types.h
@@ -45,6 +45,7 @@ struct intel_ring {
 
 	u32 space;
 	u32 size;
+	u32 wrap;
 	u32 effective_size;
 };
 

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 06/23] drm/i915: Actually emit the await_start
       [not found] <20200529081810.10747-1-prasad.nallani@intel.com>
  2020-05-29  8:17 ` [PATCH 04/23] drm/i915/execlists: Always force a context reload when rewinding RING_TAIL Prasad Nallani
@ 2020-05-29  8:17 ` Prasad Nallani
  2020-05-29  8:17 ` [PATCH 07/23] drm/i915: Return early for await_start on same timeline Prasad Nallani
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 8+ messages in thread
From: Prasad Nallani @ 2020-05-29  8:17 UTC (permalink / raw)
  To: gfx-internal-devel
  Cc: andi.shyti, Chris Wilson, Tvrtko Ursulin, stable, Tvrtko Ursulin

From: Chris Wilson <chris@chris-wilson.co.uk>

Fix the inverted test to emit the wait on the end of the previous
request if we /haven't/ already.

Fixes: 6a79d848403d ("drm/i915: Lock signaler timeline while navigating")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: <stable@vger.kernel.org> # v5.5+
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200305104210.2619967-1-chris@chris-wilson.co.uk
(cherry picked from commit 07e9c59d63df6a1c44c1975c01827ba18b69270a)
---
 drivers/gpu/drm/i915/i915_request.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index d5ab8054d5e5..4143d0c6b343 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -797,7 +797,7 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
 		return 0;
 
 	err = 0;
-	if (intel_timeline_sync_is_later(i915_request_timeline(rq), fence))
+	if (!intel_timeline_sync_is_later(i915_request_timeline(rq), fence))
 		err = i915_sw_fence_await_dma_fence(&rq->submit,
 						    fence, 0,
 						    I915_FENCE_GFP);

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 07/23] drm/i915: Return early for await_start on same timeline
       [not found] <20200529081810.10747-1-prasad.nallani@intel.com>
  2020-05-29  8:17 ` [PATCH 04/23] drm/i915/execlists: Always force a context reload when rewinding RING_TAIL Prasad Nallani
  2020-05-29  8:17 ` [PATCH 06/23] drm/i915: Actually emit the await_start Prasad Nallani
@ 2020-05-29  8:17 ` Prasad Nallani
  2020-05-29  8:17 ` [PATCH 08/23] drm/i915: Defer semaphore priority bumping to a workqueue Prasad Nallani
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 8+ messages in thread
From: Prasad Nallani @ 2020-05-29  8:17 UTC (permalink / raw)
  To: gfx-internal-devel
  Cc: andi.shyti, Chris Wilson, Tvrtko Ursulin, stable, Tvrtko Ursulin

From: Chris Wilson <chris@chris-wilson.co.uk>

Requests within a timeline are ordered by that timeline, so awaiting for
the start of a request within the timeline is a no-op. This used to work
by falling out of the mutex_trylock() as the signaler and waiter had the
same timeline and not returning an error.

Fixes: 6a79d848403d ("drm/i915: Lock signaler timeline while navigating")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: <stable@vger.kernel.org> # v5.5+
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200305134822.2750496-1-chris@chris-wilson.co.uk
(cherry picked from commit ab7a69020fb5d5c7ba19fba60f62fd6f9ca9f779)
---
 drivers/gpu/drm/i915/i915_request.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 4143d0c6b343..1efc82f78f44 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -767,8 +767,8 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
 	struct dma_fence *fence;
 	int err;
 
-	GEM_BUG_ON(i915_request_timeline(rq) ==
-		   rcu_access_pointer(signal->timeline));
+	if (i915_request_timeline(rq) == rcu_access_pointer(signal->timeline))
+		return 0;
 
 	fence = NULL;
 	rcu_read_lock();

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 08/23] drm/i915: Defer semaphore priority bumping to a workqueue
       [not found] <20200529081810.10747-1-prasad.nallani@intel.com>
                   ` (2 preceding siblings ...)
  2020-05-29  8:17 ` [PATCH 07/23] drm/i915: Return early for await_start on same timeline Prasad Nallani
@ 2020-05-29  8:17 ` Prasad Nallani
  2020-05-29  8:17 ` [PATCH 09/23] drm/i915/execlists: Enable timeslice on partial virtual engine dequeue Prasad Nallani
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 8+ messages in thread
From: Prasad Nallani @ 2020-05-29  8:17 UTC (permalink / raw)
  To: gfx-internal-devel; +Cc: andi.shyti, Chris Wilson, Tvrtko Ursulin, stable

From: Chris Wilson <chris@chris-wilson.co.uk>

Since the semaphore fence may be signaled from inside an interrupt
handler from inside a request holding its request->lock, we cannot then
enter into the engine->active.lock for processing the semaphore priority
bump as we may traverse our call tree and end up on another held
request.

CPU 0:
[ 2243.218864]  _raw_spin_lock_irqsave+0x9a/0xb0
[ 2243.218867]  i915_schedule_bump_priority+0x49/0x80 [i915]
[ 2243.218869]  semaphore_notify+0x6d/0x98 [i915]
[ 2243.218871]  __i915_sw_fence_complete+0x61/0x420 [i915]
[ 2243.218874]  ? kmem_cache_free+0x211/0x290
[ 2243.218876]  i915_sw_fence_complete+0x58/0x80 [i915]
[ 2243.218879]  dma_i915_sw_fence_wake+0x3e/0x80 [i915]
[ 2243.218881]  signal_irq_work+0x571/0x690 [i915]
[ 2243.218883]  irq_work_run_list+0xd7/0x120
[ 2243.218885]  irq_work_run+0x1d/0x50
[ 2243.218887]  smp_irq_work_interrupt+0x21/0x30
[ 2243.218889]  irq_work_interrupt+0xf/0x20

CPU 1:
[ 2242.173107]  _raw_spin_lock+0x8f/0xa0
[ 2242.173110]  __i915_request_submit+0x64/0x4a0 [i915]
[ 2242.173112]  __execlists_submission_tasklet+0x8ee/0x2120 [i915]
[ 2242.173114]  ? i915_sched_lookup_priolist+0x1e3/0x2b0 [i915]
[ 2242.173117]  execlists_submit_request+0x2e8/0x2f0 [i915]
[ 2242.173119]  submit_notify+0x8f/0xc0 [i915]
[ 2242.173121]  __i915_sw_fence_complete+0x61/0x420 [i915]
[ 2242.173124]  ? _raw_spin_unlock_irqrestore+0x39/0x40
[ 2242.173137]  i915_sw_fence_complete+0x58/0x80 [i915]
[ 2242.173140]  i915_sw_fence_commit+0x16/0x20 [i915]

Closes: https://gitlab.freedesktop.org/drm/intel/issues/1318
Fixes: b7404c7ecb38 ("drm/i915: Bump ready tasks ahead of busywaits")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v5.2+
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200310101720.9944-1-chris@chris-wilson.co.uk
(cherry picked from commit 209df10bb4536c81c2540df96c02cd079435357f)
---
 drivers/gpu/drm/i915/i915_request.c | 22 +++++++++++++++++-----
 drivers/gpu/drm/i915/i915_request.h |  2 ++
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 1efc82f78f44..1fd94723a5cd 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -527,19 +527,31 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
+static void irq_semaphore_cb(struct irq_work *wrk)
+{
+	struct i915_request *rq =
+		container_of(wrk, typeof(*rq), semaphore_work);
+
+	i915_schedule_bump_priority(rq, I915_PRIORITY_NOSEMAPHORE);
+	i915_request_put(rq);
+}
+
 static int __i915_sw_fence_call
 semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 {
-	struct i915_request *request =
-		container_of(fence, typeof(*request), semaphore);
+	struct i915_request *rq = container_of(fence, typeof(*rq), semaphore);
 
 	switch (state) {
 	case FENCE_COMPLETE:
-		i915_schedule_bump_priority(request, I915_PRIORITY_NOSEMAPHORE);
+		if (!(READ_ONCE(rq->sched.attr.priority) & I915_PRIORITY_NOSEMAPHORE)) {
+			i915_request_get(rq);
+			init_irq_work(&rq->semaphore_work, irq_semaphore_cb);
+			irq_work_queue(&rq->semaphore_work);
+		}
 		break;
 
 	case FENCE_FREE:
-		i915_request_put(request);
+		i915_request_put(rq);
 		break;
 	}
 
@@ -1296,9 +1308,9 @@ void __i915_request_queue(struct i915_request *rq,
 	 * decide whether to preempt the entire chain so that it is ready to
 	 * run at the earliest possible convenience.
 	 */
-	i915_sw_fence_commit(&rq->semaphore);
 	if (attr && rq->engine->schedule)
 		rq->engine->schedule(rq, attr);
+	i915_sw_fence_commit(&rq->semaphore);
 	i915_sw_fence_commit(&rq->submit);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index f57eadcf3583..fccc339949ec 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -26,6 +26,7 @@
 #define I915_REQUEST_H
 
 #include <linux/dma-fence.h>
+#include <linux/irq_work.h>
 #include <linux/lockdep.h>
 
 #include "gem/i915_gem_context_types.h"
@@ -208,6 +209,7 @@ struct i915_request {
 	};
 	struct list_head execute_cb;
 	struct i915_sw_fence semaphore;
+	struct irq_work semaphore_work;
 
 	/*
 	 * A list of everyone we wait upon, and everyone who waits upon us.

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 09/23] drm/i915/execlists: Enable timeslice on partial virtual engine dequeue
       [not found] <20200529081810.10747-1-prasad.nallani@intel.com>
                   ` (3 preceding siblings ...)
  2020-05-29  8:17 ` [PATCH 08/23] drm/i915: Defer semaphore priority bumping to a workqueue Prasad Nallani
@ 2020-05-29  8:17 ` Prasad Nallani
  2020-05-29  8:17 ` [PATCH 10/23] drm/i915/gt: Close race between cacheline_retire and free Prasad Nallani
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 8+ messages in thread
From: Prasad Nallani @ 2020-05-29  8:17 UTC (permalink / raw)
  To: gfx-internal-devel
  Cc: andi.shyti, Chris Wilson, Mika Kuoppala, Tvrtko Ursulin, stable,
	Jani Nikula

From: Chris Wilson <chris@chris-wilson.co.uk>

If we stop filling the ELSP due to an incompatible virtual engine
request, check if we should enable the timeslice on behalf of the queue.

This fixes the case where we are inspecting the last->next element when
we know that the last element is the last request in the execution queue,
and so decided we did not need to enable timeslicing despite the intent
to do so!

Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v5.4+
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200306113012.3184606-1-chris@chris-wilson.co.uk
(cherry picked from commit 3df2deed411e0f1b7312baf0139aab8bba4c0410)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
(cherry picked from commit eafc2aa20fba319b6e791a1b0c45a91511eccb6b)
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index c50e5da842ca..f12ff18e9bed 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1808,11 +1808,9 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
 	if (!intel_engine_has_timeslices(engine))
 		return false;
 
-	if (list_is_last(&rq->sched.link, &engine->active.requests))
-		return false;
-
-	hint = max(rq_prio(list_next_entry(rq, sched.link)),
-		   engine->execlists.queue_priority_hint);
+	hint = engine->execlists.queue_priority_hint;
+	if (!list_is_last(&rq->sched.link, &engine->active.requests))
+		hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
 
 	return hint >= effective_prio(rq);
 }
@@ -1854,6 +1852,18 @@ static void set_timeslice(struct intel_engine_cs *engine)
 	set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
 }
 
+static void start_timeslice(struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists *execlists = &engine->execlists;
+
+	execlists->switch_priority_hint = execlists->queue_priority_hint;
+
+	if (timer_pending(&execlists->timer))
+		return;
+
+	set_timer_ms(&execlists->timer, timeslice(engine));
+}
+
 static void record_preemption(struct intel_engine_execlists *execlists)
 {
 	(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
@@ -2017,11 +2027,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				 * Even if ELSP[1] is occupied and not worthy
 				 * of timeslices, our queue might be.
 				 */
-				if (!execlists->timer.expires &&
-				    need_timeslice(engine, last))
-					set_timer_ms(&execlists->timer,
-						     timeslice(engine));
-
+				start_timeslice(engine);
 				return;
 			}
 		}
@@ -2056,7 +2062,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 			if (last && !can_merge_rq(last, rq)) {
 				spin_unlock(&ve->base.active.lock);
-				return; /* leave this for another */
+				start_timeslice(engine);
+				return; /* leave this for another sibling */
 			}
 
 			ENGINE_TRACE(engine,

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 10/23] drm/i915/gt: Close race between cacheline_retire and free
       [not found] <20200529081810.10747-1-prasad.nallani@intel.com>
                   ` (4 preceding siblings ...)
  2020-05-29  8:17 ` [PATCH 09/23] drm/i915/execlists: Enable timeslice on partial virtual engine dequeue Prasad Nallani
@ 2020-05-29  8:17 ` Prasad Nallani
  2020-05-29  8:17 ` [PATCH 11/23] drm/i915/gen12: Disable preemption timeout Prasad Nallani
  2020-05-29  8:18 ` [PATCH 13/23] drm/i915/gem: Flush all the reloc_gpu batch Prasad Nallani
  7 siblings, 0 replies; 8+ messages in thread
From: Prasad Nallani @ 2020-05-29  8:17 UTC (permalink / raw)
  To: gfx-internal-devel
  Cc: andi.shyti, Chris Wilson, Tvrtko Ursulin, Mika Kuoppala,
	Matthew Auld, stable, Jani Nikula

From: Chris Wilson <chris@chris-wilson.co.uk>

If the cacheline may still be busy, atomically mark it for future
release, and only if we can determine that it will never be used again,
immediately free it.

Closes: https://gitlab.freedesktop.org/drm/intel/issues/1392
Fixes: ebece7539242 ("drm/i915: Keep timeline HWSP allocated until idle across the system")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: <stable@vger.kernel.org> # v5.2+
Link: https://patchwork.freedesktop.org/patch/msgid/20200306154647.3528345-1-chris@chris-wilson.co.uk
(cherry picked from commit 2d4bd971f5baa51418625f379a69f5d58b5a0450)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
(cherry picked from commit 8ea6bb8e4d47e07518e5dba4f5cb77e210f0df82)
---
 drivers/gpu/drm/i915/gt/intel_timeline.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 23405e1558c6..3621273ad99a 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -192,11 +192,15 @@ static void cacheline_release(struct intel_timeline_cacheline *cl)
 
 static void cacheline_free(struct intel_timeline_cacheline *cl)
 {
+	if (!i915_active_acquire_if_busy(&cl->active)) {
+		__idle_cacheline_free(cl);
+		return;
+	}
+
 	GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE));
 	cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE);
 
-	if (i915_active_is_idle(&cl->active))
-		__idle_cacheline_free(cl);
+	i915_active_release(&cl->active);
 }
 
 int intel_timeline_init(struct intel_timeline *timeline,

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 11/23] drm/i915/gen12: Disable preemption timeout
       [not found] <20200529081810.10747-1-prasad.nallani@intel.com>
                   ` (5 preceding siblings ...)
  2020-05-29  8:17 ` [PATCH 10/23] drm/i915/gt: Close race between cacheline_retire and free Prasad Nallani
@ 2020-05-29  8:17 ` Prasad Nallani
  2020-05-29  8:18 ` [PATCH 13/23] drm/i915/gem: Flush all the reloc_gpu batch Prasad Nallani
  7 siblings, 0 replies; 8+ messages in thread
From: Prasad Nallani @ 2020-05-29  8:17 UTC (permalink / raw)
  To: gfx-internal-devel
  Cc: andi.shyti, Tvrtko Ursulin, Chris Wilson, Joonas Lahtinen,
	Michal Mrozek, stable, Michal Mrozek

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Allow super long OpenCL workloads which cannot be preempted within
the default timeout to run out of the box.

v2:
 * Make it stick out more and apply only to RCS. (Chris)

v3:
 * Mention platform override in kconfig. (Joonas)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michal Mrozek <michal.mrozek@intel.com>
Cc: <stable@vger.kernel.org> # v5.6+
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Michal Mrozek <Michal.mrozek@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200312115748.29970-1-tvrtko.ursulin@linux.intel.com
(cherry picked from commit 07bcfd1291de77ffa9b627b4442783aba1335229)
---
 drivers/gpu/drm/i915/Kconfig.profile      |  4 ++++
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 13 +++++++++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index ba8767fc0d6e..0bfd276c19fe 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -41,6 +41,10 @@ config DRM_I915_PREEMPT_TIMEOUT
 
 	  May be 0 to disable the timeout.
 
+	  The compiled in default may get overridden at driver probe time on
+	  certain platforms and certain engines which will be reflected in the
+	  sysfs control.
+
 config DRM_I915_MAX_REQUEST_BUSYWAIT
 	int "Busywait for request completion limit (ns)"
 	default 8000 # nanoseconds
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 8bab5767f30c..b42001f937dd 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -274,6 +274,7 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
 static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 {
 	const struct engine_info *info = &intel_engines[id];
+	struct drm_i915_private *i915 = gt->i915;
 	struct intel_engine_cs *engine;
 
 	BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
@@ -300,11 +301,11 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	engine->id = id;
 	engine->legacy_idx = INVALID_ENGINE;
 	engine->mask = BIT(id);
-	engine->i915 = gt->i915;
+	engine->i915 = i915;
 	engine->gt = gt;
 	engine->uncore = gt->uncore;
 	engine->hw_id = engine->guc_id = info->hw_id;
-	engine->mmio_base = __engine_mmio_base(gt->i915, info->mmio_bases);
+	engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases);
 
 	engine->class = info->class;
 	engine->instance = info->instance;
@@ -321,11 +322,15 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	engine->props.timeslice_duration_ms =
 		CONFIG_DRM_I915_TIMESLICE_DURATION;
 
+	/* Override to uninterruptible for OpenCL workloads. */
+	if (INTEL_GEN(i915) == 12 && engine->class == RENDER_CLASS)
+		engine->props.preempt_timeout_ms = 0;
+
 	engine->context_size = intel_engine_context_size(gt, engine->class);
 	if (WARN_ON(engine->context_size > BIT(20)))
 		engine->context_size = 0;
 	if (engine->context_size)
-		DRIVER_CAPS(gt->i915)->has_logical_contexts = true;
+		DRIVER_CAPS(i915)->has_logical_contexts = true;
 
 	/* Nothing to do here, execute in order of dependencies */
 	engine->schedule = NULL;
@@ -341,7 +346,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	gt->engine_class[info->class][info->instance] = engine;
 	gt->engine[id] = engine;
 
-	gt->i915->engine[id] = engine;
+	i915->engine[id] = engine;
 
 	return 0;
 }

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 13/23] drm/i915/gem: Flush all the reloc_gpu batch
       [not found] <20200529081810.10747-1-prasad.nallani@intel.com>
                   ` (6 preceding siblings ...)
  2020-05-29  8:17 ` [PATCH 11/23] drm/i915/gen12: Disable preemption timeout Prasad Nallani
@ 2020-05-29  8:18 ` Prasad Nallani
  7 siblings, 0 replies; 8+ messages in thread
From: Prasad Nallani @ 2020-05-29  8:18 UTC (permalink / raw)
  To: gfx-internal-devel; +Cc: andi.shyti, Chris Wilson, Matthew Auld, stable

From: Chris Wilson <chris@chris-wilson.co.uk>

__i915_gem_object_flush_map() takes a byte range, so feed it the written
bytes and do not mistake the u32 index as bytes!

Fixes: a679f58d0510 ("drm/i915: Flush pages on acquisition")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Cc: <stable@vger.kernel.org> # v5.2+
Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200406114821.10949-1-chris@chris-wilson.co.uk
(cherry picked from commit 30c88a47f1abd5744908d3681f54dcf823fe2a12)
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 5c1fa7513a53..4de2a6b0b29d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -915,11 +915,13 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
 
 static void reloc_gpu_flush(struct reloc_cache *cache)
 {
-	GEM_BUG_ON(cache->rq_size >= cache->rq->batch->obj->base.size / sizeof(u32));
+	struct drm_i915_gem_object *obj = cache->rq->batch->obj;
+
+	GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
 	cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
 
-	__i915_gem_object_flush_map(cache->rq->batch->obj, 0, cache->rq_size);
-	i915_gem_object_unpin_map(cache->rq->batch->obj);
+	__i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1));
+	i915_gem_object_unpin_map(obj);
 
 	intel_gt_chipset_flush(cache->rq->engine->gt);
 

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-05-29  8:20 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20200529081810.10747-1-prasad.nallani@intel.com>
2020-05-29  8:17 ` [PATCH 04/23] drm/i915/execlists: Always force a context reload when rewinding RING_TAIL Prasad Nallani
2020-05-29  8:17 ` [PATCH 06/23] drm/i915: Actually emit the await_start Prasad Nallani
2020-05-29  8:17 ` [PATCH 07/23] drm/i915: Return early for await_start on same timeline Prasad Nallani
2020-05-29  8:17 ` [PATCH 08/23] drm/i915: Defer semaphore priority bumping to a workqueue Prasad Nallani
2020-05-29  8:17 ` [PATCH 09/23] drm/i915/execlists: Enable timeslice on partial virtual engine dequeue Prasad Nallani
2020-05-29  8:17 ` [PATCH 10/23] drm/i915/gt: Close race between cacheline_retire and free Prasad Nallani
2020-05-29  8:17 ` [PATCH 11/23] drm/i915/gen12: Disable preemption timeout Prasad Nallani
2020-05-29  8:18 ` [PATCH 13/23] drm/i915/gem: Flush all the reloc_gpu batch Prasad Nallani

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.