* [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine @ 2020-01-15 8:33 Chris Wilson 2020-01-15 8:33 ` [Intel-gfx] [PATCH 2/3] drm/i915/gt: Allow temporary suspension of inflight requests Chris Wilson ` (8 more replies) 0 siblings, 9 replies; 21+ messages in thread From: Chris Wilson @ 2020-01-15 8:33 UTC (permalink / raw) To: intel-gfx Since commit 422d7df4f090 ("drm/i915: Replace engine->timeline with a plain list"), we used the default embedded priotree slot for the virtual engine request queue, which means we can also use the same solitary slot with the scheduler. However, the priolist is expected to be guarded by the engine->active.lock, but this is not true for the virtual engine v2: Update i915_sched_node.link explanation for current usage where it is a link on both the queue and on the runlists. References: 422d7df4f090 ("drm/i915: Replace engine->timeline with a plain list") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> --- drivers/gpu/drm/i915/gt/intel_lrc.c | 13 ++++++++----- drivers/gpu/drm/i915/i915_request.c | 4 +++- drivers/gpu/drm/i915/i915_request.h | 17 +++++++++++++++++ drivers/gpu/drm/i915/i915_scheduler.c | 22 ++++++++++------------ 4 files changed, 38 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 9e430590fb3a..f0cbd240a8c2 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -985,6 +985,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); list_move(&rq->sched.link, pl); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + active = rq; } else { struct intel_engine_cs *owner = rq->context->engine; @@ -2430,11 +2432,12 @@ static void execlists_preempt(struct timer_list *timer) } static void queue_request(struct intel_engine_cs *engine, - struct i915_sched_node *node, - int prio) + struct i915_request *rq) { - GEM_BUG_ON(!list_empty(&node->link)); - list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio)); + GEM_BUG_ON(!list_empty(&rq->sched.link)); + list_add_tail(&rq->sched.link, + i915_sched_lookup_priolist(engine, rq_prio(rq))); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); } static void __submit_queue_imm(struct intel_engine_cs *engine) @@ -2470,7 +2473,7 @@ static void execlists_submit_request(struct i915_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->active.lock, flags); - queue_request(engine, &request->sched, rq_prio(request)); + queue_request(engine, request); GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); GEM_BUG_ON(list_empty(&request->sched.link)); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index be185886e4fc..9ed0d3bc7249 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -408,8 +408,10 @@ bool __i915_request_submit(struct i915_request *request) xfer: /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); - if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) + if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) { list_move_tail(&request->sched.link, &engine->active.requests); + clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); + } if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) && diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 031433691a06..a9f0d3c8d8b7 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -70,6 +70,18 @@ enum { */ I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS, + /* + * I915_FENCE_FLAG_PQUEUE - this request is ready for execution + * + * Using the scheduler, when a request is ready for execution it is put + * into the priority queue, and removed from the queue when transferred + * to the HW runlists. We want to track its membership within that + * queue so that we can easily check before rescheduling. + * + * See i915_request_in_priority_queue() + */ + I915_FENCE_FLAG_PQUEUE, + /* * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list * @@ -361,6 +373,11 @@ static inline bool i915_request_is_active(const struct i915_request *rq) return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); } +static inline bool i915_request_in_priority_queue(const struct i915_request *rq) +{ + return test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); +} + /** * Returns true if seq1 is later than seq2. */ diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index bf87c70bfdd9..db3da81b7f05 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -326,20 +326,18 @@ static void __i915_schedule(struct i915_sched_node *node, node->attr.priority = prio; - if (list_empty(&node->link)) { - /* - * If the request is not in the priolist queue because - * it is not yet runnable, then it doesn't contribute - * to our preemption decisions. On the other hand, - * if the request is on the HW, it too is not in the - * queue; but in that case we may still need to reorder - * the inflight requests. - */ + /* + * Once the request is ready, it will be place into the + * priority lists and then onto the HW runlist. Before the + * request is ready, it does not contribute to our preemption + * decisions and we can safely ignore it, as it will, and + * any preemption required, be dealt with upon submission. + * See engine->submit_request() + */ + if (list_empty(&node->link)) continue; - } - if (!intel_engine_is_virtual(engine) && - !i915_request_is_active(node_to_request(node))) { + if (i915_request_in_priority_queue(node_to_request(node))) { if (!cache.priolist) cache.priolist = i915_sched_lookup_priolist(engine, -- 2.25.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply related [flat|nested] 21+ messages in thread
* [Intel-gfx] [PATCH 2/3] drm/i915/gt: Allow temporary suspension of inflight requests 2020-01-15 8:33 [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine Chris Wilson @ 2020-01-15 8:33 ` Chris Wilson 2020-01-15 10:58 ` Tvrtko Ursulin 2020-01-15 11:10 ` [Intel-gfx] [PATCH v3] " Chris Wilson 2020-01-15 8:33 ` [Intel-gfx] [PATCH 3/3] drm/i915/execlists: Offline error capture Chris Wilson ` (7 subsequent siblings) 8 siblings, 2 replies; 21+ messages in thread From: Chris Wilson @ 2020-01-15 8:33 UTC (permalink / raw) To: intel-gfx In order to support out-of-line error capture, we need to remove the active request from HW and put it to one side while a worker compresses and stores all the details associated with that request. (As that compression may take an arbitrary user-controlled amount of time, we want to let the engine continue running on other workloads while the hanging request is dumped.) Not only do we need to remove the active request, but we also have to remove its context and all requests that were dependent on it (both in flight, queued and future submission). Finally once the capture is complete, we need to be able to resubmit the request and its dependents and allow them to execute. v2: Replace stack recursion with a simple list. References: https://gitlab.freedesktop.org/drm/intel/issues/738 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 1 + drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + drivers/gpu/drm/i915/gt/intel_lrc.c | 159 ++++++++++++++++++- drivers/gpu/drm/i915/gt/selftest_lrc.c | 103 ++++++++++++ drivers/gpu/drm/i915/i915_request.h | 22 +++ 5 files changed, 282 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index f451ef376548..c296aaf381e7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -671,6 +671,7 @@ void intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) { INIT_LIST_HEAD(&engine->active.requests); + INIT_LIST_HEAD(&engine->active.hold); spin_lock_init(&engine->active.lock); lockdep_set_subclass(&engine->active.lock, subclass); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 00287515e7af..77e68c7643de 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -295,6 +295,7 @@ struct intel_engine_cs { struct { spinlock_t lock; struct list_head requests; + struct list_head hold; /* ready requests, but on hold */ } active; struct llist_head barrier_tasks; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index f0cbd240a8c2..43c19dc9c0c7 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2353,6 +2353,145 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) } } +static void __execlists_hold(struct i915_request *rq) +{ + LIST_HEAD(list); + + do { + struct i915_dependency *p; + + if (i915_request_is_active(rq)) + __i915_request_unsubmit(rq); + + RQ_TRACE(rq, "on hold\n"); + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + list_move_tail(&rq->sched.link, &rq->engine->active.hold); + i915_request_set_hold(rq); + + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { + struct i915_request *w = + container_of(p->waiter, typeof(*w), sched); + + /* Leave semaphores spinning on the other engines */ + if (w->engine != rq->engine) + continue; + + if (list_empty(&w->sched.link)) + continue; /* Not yet submitted */ + + if (i915_request_completed(w)) + continue; + + if (i915_request_has_hold(rq)) + continue; + + list_move_tail(&w->sched.link, &list); + } + + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); + } while(rq); +} + +__maybe_unused +static void execlists_hold(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + spin_lock_irq(&engine->active.lock); + + /* + * Transfer this request onto the hold queue to prevent it + * being resumbitted to HW (and potentially completed) before we have + * released it. Since we may have already submitted following + * requests, we need to remove those as well. + */ + GEM_BUG_ON(i915_request_completed(rq)); + GEM_BUG_ON(i915_request_has_hold(rq)); + GEM_BUG_ON(rq->engine != engine); + __execlists_hold(rq); + + spin_unlock_irq(&engine->active.lock); +} + +static bool hold_request(const struct i915_request *rq) +{ + struct i915_dependency *p; + + /* + * If one of our ancestors is still on hold, we must also still be on + * hold, otherwise we will bypass it and execute before it. + */ + list_for_each_entry(p, &rq->sched.signalers_list, signal_link) { + const struct i915_request *s = + container_of(p->signaler, typeof(*s), sched); + + if (s->engine != rq->engine) + continue; + + return i915_request_has_hold(s); + } + + return false; +} + +static void __execlists_unhold(struct i915_request *rq) +{ + LIST_HEAD(list); + + do { + struct i915_dependency *p; + + GEM_BUG_ON(!i915_request_has_hold(rq)); + GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); + + i915_request_clear_hold(rq); + list_move_tail(&rq->sched.link, + i915_sched_lookup_priolist(rq->engine, + rq_prio(rq))); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + RQ_TRACE(rq, "hold release\n"); + + /* Also release any children on this engine that are ready */ + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { + struct i915_request *w = + container_of(p->waiter, typeof(*w), sched); + + if (w->engine != rq->engine) + continue; + + if (!i915_request_has_hold(rq)) + continue; + + /* Check that no other parents are on hold */ + if (hold_request(rq)) + continue; + + list_move_tail(&w->sched.link, &list); + } + + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); + } while(rq); +} + +__maybe_unused +static void execlists_unhold(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + spin_lock_irq(&engine->active.lock); + + /* + * Move this request back to the priority queue, and all of its + * children and grandchildren that were suspended along with it. + */ + __execlists_unhold(rq); + + if (rq_prio(rq) > engine->execlists.queue_priority_hint) { + engine->execlists.queue_priority_hint = rq_prio(rq); + tasklet_hi_schedule(&engine->execlists.tasklet); + } + + spin_unlock_irq(&engine->active.lock); +} + static noinline void preempt_reset(struct intel_engine_cs *engine) { const unsigned int bit = I915_RESET_ENGINE + engine->id; @@ -2465,6 +2604,13 @@ static void submit_queue(struct intel_engine_cs *engine, __submit_queue_imm(engine); } +static bool on_hold(const struct intel_engine_cs *engine, + const struct i915_request *rq) +{ + GEM_BUG_ON(i915_request_has_hold(rq)); + return !list_empty(&engine->active.hold) && hold_request(rq); +} + static void execlists_submit_request(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; @@ -2473,12 +2619,17 @@ static void execlists_submit_request(struct i915_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->active.lock, flags); - queue_request(engine, request); + if (unlikely(on_hold(engine, request))) { /* ancestor is suspended */ + list_add_tail(&request->sched.link, &engine->active.hold); + i915_request_set_hold(request); + } else { + queue_request(engine, request); - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - GEM_BUG_ON(list_empty(&request->sched.link)); + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + GEM_BUG_ON(list_empty(&request->sched.link)); - submit_queue(engine, request); + submit_queue(engine, request); + } spin_unlock_irqrestore(&engine->active.lock, flags); } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 15cda024e3e4..78501d79c0ea 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -285,6 +285,108 @@ static int live_unlite_preempt(void *arg) return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); } +static int live_hold_reset(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + /* + * In order to support offline error capture for fast preempt reset, + * we need to decouple the guilty request and ensure that it and its + * descendents are not executed while the capture is in progress. + */ + + if (!intel_has_reset_engine(gt)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + unsigned long heartbeat; + struct i915_request *rq; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + engine_heartbeat_disable(engine, &heartbeat); + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + intel_gt_set_wedged(gt); + err = -ETIME; + goto out; + } + + /* We have our request executing, now remove it and reset */ + + if (test_and_set_bit(I915_RESET_ENGINE + id, + >->reset.flags)) { + spin_unlock_irq(&engine->active.lock); + intel_gt_set_wedged(gt); + err = -EBUSY; + goto out; + } + tasklet_disable(&engine->execlists.tasklet); + + engine->execlists.tasklet.func(engine->execlists.tasklet.data); + GEM_BUG_ON(execlists_active(&engine->execlists) != rq); + + execlists_hold(engine, rq); + GEM_BUG_ON(!i915_request_has_hold(rq)); + + intel_engine_reset(engine, NULL); + GEM_BUG_ON(rq->fence.error != -EIO); + + tasklet_enable(&engine->execlists.tasklet); + clear_and_wake_up_bit(I915_RESET_ENGINE + id, + >->reset.flags); + + /* Check that we do not resubmit the held request */ + i915_request_get(rq); + if (!i915_request_wait(rq, 0, HZ / 5)) { + pr_err("%s: on hold request completed!\n", + engine->name); + i915_request_put(rq); + err = -EIO; + goto out; + } + GEM_BUG_ON(!i915_request_has_hold(rq)); + + /* But is resubmitted on release */ + execlists_unhold(engine, rq); + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + pr_err("%s: held request did not complete!\n", + engine->name); + intel_gt_set_wedged(gt); + err = -ETIME; + } + i915_request_put(rq); + +out: + engine_heartbeat_enable(engine, heartbeat); + intel_context_put(ce); + if (err) + break; + } + + igt_spinner_fini(&spin); + return err; +} + static int emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) { @@ -3315,6 +3417,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_sanitycheck), SUBTEST(live_unlite_switch), SUBTEST(live_unlite_preempt), + SUBTEST(live_hold_reset), SUBTEST(live_timeslice_preempt), SUBTEST(live_timeslice_queue), SUBTEST(live_busywait_preempt), diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index a9f0d3c8d8b7..47fa5419c74f 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -90,6 +90,13 @@ enum { */ I915_FENCE_FLAG_SIGNAL, + /* + * I915_FENCE_FLAG_HOLD - this request is currently on hold + * + * This request has been suspended, pending an ongoing investigation. + */ + I915_FENCE_FLAG_HOLD, + /* * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted * @@ -500,6 +507,21 @@ static inline bool i915_request_has_sentinel(const struct i915_request *rq) return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags)); } +static inline bool i915_request_has_hold(const struct i915_request *rq) +{ + return unlikely(test_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags)); +} + +static inline void i915_request_set_hold(struct i915_request *rq) +{ + set_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); +} + +static inline void i915_request_clear_hold(struct i915_request *rq) +{ + clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); +} + static inline struct intel_timeline * i915_request_timeline(struct i915_request *rq) { -- 2.25.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [Intel-gfx] [PATCH 2/3] drm/i915/gt: Allow temporary suspension of inflight requests 2020-01-15 8:33 ` [Intel-gfx] [PATCH 2/3] drm/i915/gt: Allow temporary suspension of inflight requests Chris Wilson @ 2020-01-15 10:58 ` Tvrtko Ursulin 2020-01-15 11:01 ` Chris Wilson 2020-01-15 11:10 ` [Intel-gfx] [PATCH v3] " Chris Wilson 1 sibling, 1 reply; 21+ messages in thread From: Tvrtko Ursulin @ 2020-01-15 10:58 UTC (permalink / raw) To: Chris Wilson, intel-gfx On 15/01/2020 08:33, Chris Wilson wrote: > In order to support out-of-line error capture, we need to remove the > active request from HW and put it to one side while a worker compresses > and stores all the details associated with that request. (As that > compression may take an arbitrary user-controlled amount of time, we > want to let the engine continue running on other workloads while the > hanging request is dumped.) Not only do we need to remove the active > request, but we also have to remove its context and all requests that > were dependent on it (both in flight, queued and future submission). > > Finally once the capture is complete, we need to be able to resubmit the > request and its dependents and allow them to execute. > > v2: Replace stack recursion with a simple list. > > References: https://gitlab.freedesktop.org/drm/intel/issues/738 > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 1 + > drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + > drivers/gpu/drm/i915/gt/intel_lrc.c | 159 ++++++++++++++++++- > drivers/gpu/drm/i915/gt/selftest_lrc.c | 103 ++++++++++++ > drivers/gpu/drm/i915/i915_request.h | 22 +++ > 5 files changed, 282 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > index f451ef376548..c296aaf381e7 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > @@ -671,6 +671,7 @@ void > intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) > { > INIT_LIST_HEAD(&engine->active.requests); > + INIT_LIST_HEAD(&engine->active.hold); > > spin_lock_init(&engine->active.lock); > lockdep_set_subclass(&engine->active.lock, subclass); > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h > index 00287515e7af..77e68c7643de 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h > @@ -295,6 +295,7 @@ struct intel_engine_cs { > struct { > spinlock_t lock; > struct list_head requests; > + struct list_head hold; /* ready requests, but on hold */ > } active; > > struct llist_head barrier_tasks; > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > index f0cbd240a8c2..43c19dc9c0c7 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -2353,6 +2353,145 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) > } > } > > +static void __execlists_hold(struct i915_request *rq) > +{ > + LIST_HEAD(list); > + > + do { > + struct i915_dependency *p; > + > + if (i915_request_is_active(rq)) > + __i915_request_unsubmit(rq); > + > + RQ_TRACE(rq, "on hold\n"); > + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); > + list_move_tail(&rq->sched.link, &rq->engine->active.hold); > + i915_request_set_hold(rq); > + > + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { > + struct i915_request *w = > + container_of(p->waiter, typeof(*w), sched); > + > + /* Leave semaphores spinning on the other engines */ > + if (w->engine != rq->engine) > + continue; > + > + if (list_empty(&w->sched.link)) > + continue; /* Not yet submitted */ > + > + if (i915_request_completed(w)) > + continue; > + > + if (i915_request_has_hold(rq)) > + continue; > + > + list_move_tail(&w->sched.link, &list); > + } > + > + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); > + } while(rq); > +} > + > +__maybe_unused > +static void execlists_hold(struct intel_engine_cs *engine, > + struct i915_request *rq) > +{ > + spin_lock_irq(&engine->active.lock); > + > + /* > + * Transfer this request onto the hold queue to prevent it > + * being resumbitted to HW (and potentially completed) before we have > + * released it. Since we may have already submitted following > + * requests, we need to remove those as well. > + */ > + GEM_BUG_ON(i915_request_completed(rq)); > + GEM_BUG_ON(i915_request_has_hold(rq)); > + GEM_BUG_ON(rq->engine != engine); > + __execlists_hold(rq); > + > + spin_unlock_irq(&engine->active.lock); > +} > + > +static bool hold_request(const struct i915_request *rq) > +{ > + struct i915_dependency *p; > + > + /* > + * If one of our ancestors is still on hold, we must also still be on > + * hold, otherwise we will bypass it and execute before it. > + */ > + list_for_each_entry(p, &rq->sched.signalers_list, signal_link) { > + const struct i915_request *s = > + container_of(p->signaler, typeof(*s), sched); > + > + if (s->engine != rq->engine) > + continue; > + > + return i915_request_has_hold(s); It shouldn't be: if (i915_request_has_hold(s)) return true; ? > + } > + > + return false; > +} > + > +static void __execlists_unhold(struct i915_request *rq) > +{ > + LIST_HEAD(list); > + > + do { > + struct i915_dependency *p; > + > + GEM_BUG_ON(!i915_request_has_hold(rq)); > + GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); > + > + i915_request_clear_hold(rq); > + list_move_tail(&rq->sched.link, > + i915_sched_lookup_priolist(rq->engine, > + rq_prio(rq))); > + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); > + RQ_TRACE(rq, "hold release\n"); > + > + /* Also release any children on this engine that are ready */ > + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { > + struct i915_request *w = > + container_of(p->waiter, typeof(*w), sched); > + > + if (w->engine != rq->engine) > + continue; > + > + if (!i915_request_has_hold(rq)) is_held? > + continue; > + > + /* Check that no other parents are on hold */ > + if (hold_request(rq)) > + continue; This would be two simultaneous hangs in the interlinked hierarchy? But since the engine must be the same, can't be possible. Regards, Tvrtko > + > + list_move_tail(&w->sched.link, &list); > + } > + > + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); > + } while(rq); > +} > + > +__maybe_unused > +static void execlists_unhold(struct intel_engine_cs *engine, > + struct i915_request *rq) > +{ > + spin_lock_irq(&engine->active.lock); > + > + /* > + * Move this request back to the priority queue, and all of its > + * children and grandchildren that were suspended along with it. > + */ > + __execlists_unhold(rq); > + > + if (rq_prio(rq) > engine->execlists.queue_priority_hint) { > + engine->execlists.queue_priority_hint = rq_prio(rq); > + tasklet_hi_schedule(&engine->execlists.tasklet); > + } > + > + spin_unlock_irq(&engine->active.lock); > +} > + > static noinline void preempt_reset(struct intel_engine_cs *engine) > { > const unsigned int bit = I915_RESET_ENGINE + engine->id; > @@ -2465,6 +2604,13 @@ static void submit_queue(struct intel_engine_cs *engine, > __submit_queue_imm(engine); > } > > +static bool on_hold(const struct intel_engine_cs *engine, > + const struct i915_request *rq) > +{ > + GEM_BUG_ON(i915_request_has_hold(rq)); > + return !list_empty(&engine->active.hold) && hold_request(rq); > +} > + > static void execlists_submit_request(struct i915_request *request) > { > struct intel_engine_cs *engine = request->engine; > @@ -2473,12 +2619,17 @@ static void execlists_submit_request(struct i915_request *request) > /* Will be called from irq-context when using foreign fences. */ > spin_lock_irqsave(&engine->active.lock, flags); > > - queue_request(engine, request); > + if (unlikely(on_hold(engine, request))) { /* ancestor is suspended */ > + list_add_tail(&request->sched.link, &engine->active.hold); > + i915_request_set_hold(request); > + } else { > + queue_request(engine, request); > > - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); > - GEM_BUG_ON(list_empty(&request->sched.link)); > + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); > + GEM_BUG_ON(list_empty(&request->sched.link)); > > - submit_queue(engine, request); > + submit_queue(engine, request); > + } > > spin_unlock_irqrestore(&engine->active.lock, flags); > } > diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c > index 15cda024e3e4..78501d79c0ea 100644 > --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c > +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c > @@ -285,6 +285,108 @@ static int live_unlite_preempt(void *arg) > return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); > } > > +static int live_hold_reset(void *arg) > +{ > + struct intel_gt *gt = arg; > + struct intel_engine_cs *engine; > + enum intel_engine_id id; > + struct igt_spinner spin; > + int err = 0; > + > + /* > + * In order to support offline error capture for fast preempt reset, > + * we need to decouple the guilty request and ensure that it and its > + * descendents are not executed while the capture is in progress. > + */ > + > + if (!intel_has_reset_engine(gt)) > + return 0; > + > + if (igt_spinner_init(&spin, gt)) > + return -ENOMEM; > + > + for_each_engine(engine, gt, id) { > + struct intel_context *ce; > + unsigned long heartbeat; > + struct i915_request *rq; > + > + ce = intel_context_create(engine); > + if (IS_ERR(ce)) { > + err = PTR_ERR(ce); > + break; > + } > + > + engine_heartbeat_disable(engine, &heartbeat); > + > + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); > + if (IS_ERR(rq)) { > + err = PTR_ERR(rq); > + goto out; > + } > + i915_request_add(rq); > + > + if (!igt_wait_for_spinner(&spin, rq)) { > + intel_gt_set_wedged(gt); > + err = -ETIME; > + goto out; > + } > + > + /* We have our request executing, now remove it and reset */ > + > + if (test_and_set_bit(I915_RESET_ENGINE + id, > + >->reset.flags)) { > + spin_unlock_irq(&engine->active.lock); > + intel_gt_set_wedged(gt); > + err = -EBUSY; > + goto out; > + } > + tasklet_disable(&engine->execlists.tasklet); > + > + engine->execlists.tasklet.func(engine->execlists.tasklet.data); > + GEM_BUG_ON(execlists_active(&engine->execlists) != rq); > + > + execlists_hold(engine, rq); > + GEM_BUG_ON(!i915_request_has_hold(rq)); > + > + intel_engine_reset(engine, NULL); > + GEM_BUG_ON(rq->fence.error != -EIO); > + > + tasklet_enable(&engine->execlists.tasklet); > + clear_and_wake_up_bit(I915_RESET_ENGINE + id, > + >->reset.flags); > + > + /* Check that we do not resubmit the held request */ > + i915_request_get(rq); > + if (!i915_request_wait(rq, 0, HZ / 5)) { > + pr_err("%s: on hold request completed!\n", > + engine->name); > + i915_request_put(rq); > + err = -EIO; > + goto out; > + } > + GEM_BUG_ON(!i915_request_has_hold(rq)); > + > + /* But is resubmitted on release */ > + execlists_unhold(engine, rq); > + if (i915_request_wait(rq, 0, HZ / 5) < 0) { > + pr_err("%s: held request did not complete!\n", > + engine->name); > + intel_gt_set_wedged(gt); > + err = -ETIME; > + } > + i915_request_put(rq); > + > +out: > + engine_heartbeat_enable(engine, heartbeat); > + intel_context_put(ce); > + if (err) > + break; > + } > + > + igt_spinner_fini(&spin); > + return err; > +} > + > static int > emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) > { > @@ -3315,6 +3417,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) > SUBTEST(live_sanitycheck), > SUBTEST(live_unlite_switch), > SUBTEST(live_unlite_preempt), > + SUBTEST(live_hold_reset), > SUBTEST(live_timeslice_preempt), > SUBTEST(live_timeslice_queue), > SUBTEST(live_busywait_preempt), > diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h > index a9f0d3c8d8b7..47fa5419c74f 100644 > --- a/drivers/gpu/drm/i915/i915_request.h > +++ b/drivers/gpu/drm/i915/i915_request.h > @@ -90,6 +90,13 @@ enum { > */ > I915_FENCE_FLAG_SIGNAL, > > + /* > + * I915_FENCE_FLAG_HOLD - this request is currently on hold > + * > + * This request has been suspended, pending an ongoing investigation. > + */ > + I915_FENCE_FLAG_HOLD, > + > /* > * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted > * > @@ -500,6 +507,21 @@ static inline bool i915_request_has_sentinel(const struct i915_request *rq) > return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags)); > } > > +static inline bool i915_request_has_hold(const struct i915_request *rq) > +{ > + return unlikely(test_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags)); > +} > + > +static inline void i915_request_set_hold(struct i915_request *rq) > +{ > + set_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); > +} > + > +static inline void i915_request_clear_hold(struct i915_request *rq) > +{ > + clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); > +} > + > static inline struct intel_timeline * > i915_request_timeline(struct i915_request *rq) > { > _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Intel-gfx] [PATCH 2/3] drm/i915/gt: Allow temporary suspension of inflight requests 2020-01-15 10:58 ` Tvrtko Ursulin @ 2020-01-15 11:01 ` Chris Wilson 0 siblings, 0 replies; 21+ messages in thread From: Chris Wilson @ 2020-01-15 11:01 UTC (permalink / raw) To: Tvrtko Ursulin, intel-gfx Quoting Tvrtko Ursulin (2020-01-15 10:58:39) > > On 15/01/2020 08:33, Chris Wilson wrote: > > +static bool hold_request(const struct i915_request *rq) > > +{ > > + struct i915_dependency *p; > > + > > + /* > > + * If one of our ancestors is still on hold, we must also still be on > > + * hold, otherwise we will bypass it and execute before it. > > + */ > > + list_for_each_entry(p, &rq->sched.signalers_list, signal_link) { > > + const struct i915_request *s = > > + container_of(p->signaler, typeof(*s), sched); > > + > > + if (s->engine != rq->engine) > > + continue; > > + > > + return i915_request_has_hold(s); > > It shouldn't be: > > if (i915_request_has_hold(s)) > return true; > > ? Yes, it should be. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 21+ messages in thread
* [Intel-gfx] [PATCH v3] drm/i915/gt: Allow temporary suspension of inflight requests 2020-01-15 8:33 ` [Intel-gfx] [PATCH 2/3] drm/i915/gt: Allow temporary suspension of inflight requests Chris Wilson 2020-01-15 10:58 ` Tvrtko Ursulin @ 2020-01-15 11:10 ` Chris Wilson 2020-01-15 11:37 ` Tvrtko Ursulin 2020-01-16 17:12 ` Tvrtko Ursulin 1 sibling, 2 replies; 21+ messages in thread From: Chris Wilson @ 2020-01-15 11:10 UTC (permalink / raw) To: intel-gfx In order to support out-of-line error capture, we need to remove the active request from HW and put it to one side while a worker compresses and stores all the details associated with that request. (As that compression may take an arbitrary user-controlled amount of time, we want to let the engine continue running on other workloads while the hanging request is dumped.) Not only do we need to remove the active request, but we also have to remove its context and all requests that were dependent on it (both in flight, queued and future submission). Finally once the capture is complete, we need to be able to resubmit the request and its dependents and allow them to execute. v2: Replace stack recursion with a simple list. v3: Check all the parents, not just the first, when searching for a stuck ancestor! References: https://gitlab.freedesktop.org/drm/intel/issues/738 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 1 + drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + drivers/gpu/drm/i915/gt/intel_lrc.c | 160 ++++++++++++++++++- drivers/gpu/drm/i915/gt/selftest_lrc.c | 103 ++++++++++++ drivers/gpu/drm/i915/i915_request.h | 22 +++ 5 files changed, 283 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index f451ef376548..c296aaf381e7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -671,6 +671,7 @@ void intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) { INIT_LIST_HEAD(&engine->active.requests); + INIT_LIST_HEAD(&engine->active.hold); spin_lock_init(&engine->active.lock); lockdep_set_subclass(&engine->active.lock, subclass); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 00287515e7af..77e68c7643de 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -295,6 +295,7 @@ struct intel_engine_cs { struct { spinlock_t lock; struct list_head requests; + struct list_head hold; /* ready requests, but on hold */ } active; struct llist_head barrier_tasks; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index f0cbd240a8c2..05a05ceeac6a 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2353,6 +2353,146 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) } } +static void __execlists_hold(struct i915_request *rq) +{ + LIST_HEAD(list); + + do { + struct i915_dependency *p; + + if (i915_request_is_active(rq)) + __i915_request_unsubmit(rq); + + RQ_TRACE(rq, "on hold\n"); + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + list_move_tail(&rq->sched.link, &rq->engine->active.hold); + i915_request_set_hold(rq); + + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { + struct i915_request *w = + container_of(p->waiter, typeof(*w), sched); + + /* Leave semaphores spinning on the other engines */ + if (w->engine != rq->engine) + continue; + + if (list_empty(&w->sched.link)) + continue; /* Not yet submitted */ + + if (i915_request_completed(w)) + continue; + + if (i915_request_has_hold(rq)) + continue; + + list_move_tail(&w->sched.link, &list); + } + + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); + } while (rq); +} + +__maybe_unused +static void execlists_hold(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + spin_lock_irq(&engine->active.lock); + + /* + * Transfer this request onto the hold queue to prevent it + * being resumbitted to HW (and potentially completed) before we have + * released it. Since we may have already submitted following + * requests, we need to remove those as well. + */ + GEM_BUG_ON(i915_request_completed(rq)); + GEM_BUG_ON(i915_request_has_hold(rq)); + GEM_BUG_ON(rq->engine != engine); + __execlists_hold(rq); + + spin_unlock_irq(&engine->active.lock); +} + +static bool hold_request(const struct i915_request *rq) +{ + struct i915_dependency *p; + + /* + * If one of our ancestors is on hold, we must also be on hold, + * otherwise we will bypass it and execute before it. + */ + list_for_each_entry(p, &rq->sched.signalers_list, signal_link) { + const struct i915_request *s = + container_of(p->signaler, typeof(*s), sched); + + if (s->engine != rq->engine) + continue; + + if (i915_request_has_hold(s)) + return true; + } + + return false; +} + +static void __execlists_unhold(struct i915_request *rq) +{ + LIST_HEAD(list); + + do { + struct i915_dependency *p; + + GEM_BUG_ON(!i915_request_has_hold(rq)); + GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); + + i915_request_clear_hold(rq); + list_move_tail(&rq->sched.link, + i915_sched_lookup_priolist(rq->engine, + rq_prio(rq))); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + RQ_TRACE(rq, "hold release\n"); + + /* Also release any children on this engine that are ready */ + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { + struct i915_request *w = + container_of(p->waiter, typeof(*w), sched); + + if (w->engine != rq->engine) + continue; + + if (!i915_request_has_hold(rq)) + continue; + + /* Check that no other parents are on hold */ + if (hold_request(rq)) + continue; + + list_move_tail(&w->sched.link, &list); + } + + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); + } while (rq); +} + +__maybe_unused +static void execlists_unhold(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + spin_lock_irq(&engine->active.lock); + + /* + * Move this request back to the priority queue, and all of its + * children and grandchildren that were suspended along with it. + */ + __execlists_unhold(rq); + + if (rq_prio(rq) > engine->execlists.queue_priority_hint) { + engine->execlists.queue_priority_hint = rq_prio(rq); + tasklet_hi_schedule(&engine->execlists.tasklet); + } + + spin_unlock_irq(&engine->active.lock); +} + static noinline void preempt_reset(struct intel_engine_cs *engine) { const unsigned int bit = I915_RESET_ENGINE + engine->id; @@ -2465,6 +2605,13 @@ static void submit_queue(struct intel_engine_cs *engine, __submit_queue_imm(engine); } +static bool on_hold(const struct intel_engine_cs *engine, + const struct i915_request *rq) +{ + GEM_BUG_ON(i915_request_has_hold(rq)); + return !list_empty(&engine->active.hold) && hold_request(rq); +} + static void execlists_submit_request(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; @@ -2473,12 +2620,17 @@ static void execlists_submit_request(struct i915_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->active.lock, flags); - queue_request(engine, request); + if (unlikely(on_hold(engine, request))) { /* ancestor is suspended */ + list_add_tail(&request->sched.link, &engine->active.hold); + i915_request_set_hold(request); + } else { + queue_request(engine, request); - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - GEM_BUG_ON(list_empty(&request->sched.link)); + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + GEM_BUG_ON(list_empty(&request->sched.link)); - submit_queue(engine, request); + submit_queue(engine, request); + } spin_unlock_irqrestore(&engine->active.lock, flags); } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 15cda024e3e4..78501d79c0ea 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -285,6 +285,108 @@ static int live_unlite_preempt(void *arg) return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); } +static int live_hold_reset(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + /* + * In order to support offline error capture for fast preempt reset, + * we need to decouple the guilty request and ensure that it and its + * descendents are not executed while the capture is in progress. + */ + + if (!intel_has_reset_engine(gt)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + unsigned long heartbeat; + struct i915_request *rq; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + engine_heartbeat_disable(engine, &heartbeat); + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + intel_gt_set_wedged(gt); + err = -ETIME; + goto out; + } + + /* We have our request executing, now remove it and reset */ + + if (test_and_set_bit(I915_RESET_ENGINE + id, + >->reset.flags)) { + spin_unlock_irq(&engine->active.lock); + intel_gt_set_wedged(gt); + err = -EBUSY; + goto out; + } + tasklet_disable(&engine->execlists.tasklet); + + engine->execlists.tasklet.func(engine->execlists.tasklet.data); + GEM_BUG_ON(execlists_active(&engine->execlists) != rq); + + execlists_hold(engine, rq); + GEM_BUG_ON(!i915_request_has_hold(rq)); + + intel_engine_reset(engine, NULL); + GEM_BUG_ON(rq->fence.error != -EIO); + + tasklet_enable(&engine->execlists.tasklet); + clear_and_wake_up_bit(I915_RESET_ENGINE + id, + >->reset.flags); + + /* Check that we do not resubmit the held request */ + i915_request_get(rq); + if (!i915_request_wait(rq, 0, HZ / 5)) { + pr_err("%s: on hold request completed!\n", + engine->name); + i915_request_put(rq); + err = -EIO; + goto out; + } + GEM_BUG_ON(!i915_request_has_hold(rq)); + + /* But is resubmitted on release */ + execlists_unhold(engine, rq); + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + pr_err("%s: held request did not complete!\n", + engine->name); + intel_gt_set_wedged(gt); + err = -ETIME; + } + i915_request_put(rq); + +out: + engine_heartbeat_enable(engine, heartbeat); + intel_context_put(ce); + if (err) + break; + } + + igt_spinner_fini(&spin); + return err; +} + static int emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) { @@ -3315,6 +3417,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_sanitycheck), SUBTEST(live_unlite_switch), SUBTEST(live_unlite_preempt), + SUBTEST(live_hold_reset), SUBTEST(live_timeslice_preempt), SUBTEST(live_timeslice_queue), SUBTEST(live_busywait_preempt), diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index a9f0d3c8d8b7..47fa5419c74f 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -90,6 +90,13 @@ enum { */ I915_FENCE_FLAG_SIGNAL, + /* + * I915_FENCE_FLAG_HOLD - this request is currently on hold + * + * This request has been suspended, pending an ongoing investigation. + */ + I915_FENCE_FLAG_HOLD, + /* * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted * @@ -500,6 +507,21 @@ static inline bool i915_request_has_sentinel(const struct i915_request *rq) return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags)); } +static inline bool i915_request_has_hold(const struct i915_request *rq) +{ + return unlikely(test_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags)); +} + +static inline void i915_request_set_hold(struct i915_request *rq) +{ + set_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); +} + +static inline void i915_request_clear_hold(struct i915_request *rq) +{ + clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); +} + static inline struct intel_timeline * i915_request_timeline(struct i915_request *rq) { -- 2.25.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [Intel-gfx] [PATCH v3] drm/i915/gt: Allow temporary suspension of inflight requests 2020-01-15 11:10 ` [Intel-gfx] [PATCH v3] " Chris Wilson @ 2020-01-15 11:37 ` Tvrtko Ursulin 2020-01-15 11:46 ` Chris Wilson 2020-01-16 17:12 ` Tvrtko Ursulin 1 sibling, 1 reply; 21+ messages in thread From: Tvrtko Ursulin @ 2020-01-15 11:37 UTC (permalink / raw) To: Chris Wilson, intel-gfx On 15/01/2020 11:10, Chris Wilson wrote: > In order to support out-of-line error capture, we need to remove the > active request from HW and put it to one side while a worker compresses > and stores all the details associated with that request. (As that > compression may take an arbitrary user-controlled amount of time, we > want to let the engine continue running on other workloads while the > hanging request is dumped.) Not only do we need to remove the active > request, but we also have to remove its context and all requests that > were dependent on it (both in flight, queued and future submission). > > Finally once the capture is complete, we need to be able to resubmit the > request and its dependents and allow them to execute. > > v2: Replace stack recursion with a simple list. > v3: Check all the parents, not just the first, when searching for a > stuck ancestor! > > References: https://gitlab.freedesktop.org/drm/intel/issues/738 > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 1 + > drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + > drivers/gpu/drm/i915/gt/intel_lrc.c | 160 ++++++++++++++++++- > drivers/gpu/drm/i915/gt/selftest_lrc.c | 103 ++++++++++++ > drivers/gpu/drm/i915/i915_request.h | 22 +++ > 5 files changed, 283 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > index f451ef376548..c296aaf381e7 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > @@ -671,6 +671,7 @@ void > intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) > { > INIT_LIST_HEAD(&engine->active.requests); > + INIT_LIST_HEAD(&engine->active.hold); > > spin_lock_init(&engine->active.lock); > lockdep_set_subclass(&engine->active.lock, subclass); > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h > index 00287515e7af..77e68c7643de 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h > @@ -295,6 +295,7 @@ struct intel_engine_cs { > struct { > spinlock_t lock; > struct list_head requests; > + struct list_head hold; /* ready requests, but on hold */ > } active; > > struct llist_head barrier_tasks; > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > index f0cbd240a8c2..05a05ceeac6a 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -2353,6 +2353,146 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) > } > } > > +static void __execlists_hold(struct i915_request *rq) > +{ > + LIST_HEAD(list); > + > + do { > + struct i915_dependency *p; > + > + if (i915_request_is_active(rq)) > + __i915_request_unsubmit(rq); > + > + RQ_TRACE(rq, "on hold\n"); > + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); > + list_move_tail(&rq->sched.link, &rq->engine->active.hold); > + i915_request_set_hold(rq); > + > + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { > + struct i915_request *w = > + container_of(p->waiter, typeof(*w), sched); > + > + /* Leave semaphores spinning on the other engines */ > + if (w->engine != rq->engine) > + continue; > + > + if (list_empty(&w->sched.link)) > + continue; /* Not yet submitted */ > + > + if (i915_request_completed(w)) > + continue; > + > + if (i915_request_has_hold(rq)) > + continue; > + > + list_move_tail(&w->sched.link, &list); > + } > + > + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); > + } while (rq); > +} > + > +__maybe_unused > +static void execlists_hold(struct intel_engine_cs *engine, > + struct i915_request *rq) > +{ > + spin_lock_irq(&engine->active.lock); > + > + /* > + * Transfer this request onto the hold queue to prevent it > + * being resumbitted to HW (and potentially completed) before we have > + * released it. Since we may have already submitted following > + * requests, we need to remove those as well. > + */ > + GEM_BUG_ON(i915_request_completed(rq)); > + GEM_BUG_ON(i915_request_has_hold(rq)); > + GEM_BUG_ON(rq->engine != engine); > + __execlists_hold(rq); > + > + spin_unlock_irq(&engine->active.lock); > +} > + > +static bool hold_request(const struct i915_request *rq) > +{ > + struct i915_dependency *p; > + > + /* > + * If one of our ancestors is on hold, we must also be on hold, > + * otherwise we will bypass it and execute before it. > + */ > + list_for_each_entry(p, &rq->sched.signalers_list, signal_link) { > + const struct i915_request *s = > + container_of(p->signaler, typeof(*s), sched); > + > + if (s->engine != rq->engine) > + continue; > + > + if (i915_request_has_hold(s)) > + return true; > + } > + > + return false; > +} > + > +static void __execlists_unhold(struct i915_request *rq) > +{ > + LIST_HEAD(list); > + > + do { > + struct i915_dependency *p; > + > + GEM_BUG_ON(!i915_request_has_hold(rq)); > + GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); > + > + i915_request_clear_hold(rq); > + list_move_tail(&rq->sched.link, > + i915_sched_lookup_priolist(rq->engine, > + rq_prio(rq))); > + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); > + RQ_TRACE(rq, "hold release\n"); > + > + /* Also release any children on this engine that are ready */ > + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { > + struct i915_request *w = > + container_of(p->waiter, typeof(*w), sched); > + > + if (w->engine != rq->engine) > + continue; > + > + if (!i915_request_has_hold(rq)) > + continue; > + > + /* Check that no other parents are on hold */ > + if (hold_request(rq)) > + continue; I had a question on this check. How can it be other parents on the same engine on hold if there can be one engine reset at a time? Oh and also I was thinking would i915_request_has_hold be better called i915_request_is_held? Or is_on_hold? Regards, Tvrtko > + > + list_move_tail(&w->sched.link, &list); > + } > + > + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); > + } while (rq); > +} > + > +__maybe_unused > +static void execlists_unhold(struct intel_engine_cs *engine, > + struct i915_request *rq) > +{ > + spin_lock_irq(&engine->active.lock); > + > + /* > + * Move this request back to the priority queue, and all of its > + * children and grandchildren that were suspended along with it. > + */ > + __execlists_unhold(rq); > + > + if (rq_prio(rq) > engine->execlists.queue_priority_hint) { > + engine->execlists.queue_priority_hint = rq_prio(rq); > + tasklet_hi_schedule(&engine->execlists.tasklet); > + } > + > + spin_unlock_irq(&engine->active.lock); > +} > + > static noinline void preempt_reset(struct intel_engine_cs *engine) > { > const unsigned int bit = I915_RESET_ENGINE + engine->id; > @@ -2465,6 +2605,13 @@ static void submit_queue(struct intel_engine_cs *engine, > __submit_queue_imm(engine); > } > > +static bool on_hold(const struct intel_engine_cs *engine, > + const struct i915_request *rq) > +{ > + GEM_BUG_ON(i915_request_has_hold(rq)); > + return !list_empty(&engine->active.hold) && hold_request(rq); > +} > + > static void execlists_submit_request(struct i915_request *request) > { > struct intel_engine_cs *engine = request->engine; > @@ -2473,12 +2620,17 @@ static void execlists_submit_request(struct i915_request *request) > /* Will be called from irq-context when using foreign fences. */ > spin_lock_irqsave(&engine->active.lock, flags); > > - queue_request(engine, request); > + if (unlikely(on_hold(engine, request))) { /* ancestor is suspended */ > + list_add_tail(&request->sched.link, &engine->active.hold); > + i915_request_set_hold(request); > + } else { > + queue_request(engine, request); > > - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); > - GEM_BUG_ON(list_empty(&request->sched.link)); > + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); > + GEM_BUG_ON(list_empty(&request->sched.link)); > > - submit_queue(engine, request); > + submit_queue(engine, request); > + } > > spin_unlock_irqrestore(&engine->active.lock, flags); > } > diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c > index 15cda024e3e4..78501d79c0ea 100644 > --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c > +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c > @@ -285,6 +285,108 @@ static int live_unlite_preempt(void *arg) > return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); > } > > +static int live_hold_reset(void *arg) > +{ > + struct intel_gt *gt = arg; > + struct intel_engine_cs *engine; > + enum intel_engine_id id; > + struct igt_spinner spin; > + int err = 0; > + > + /* > + * In order to support offline error capture for fast preempt reset, > + * we need to decouple the guilty request and ensure that it and its > + * descendents are not executed while the capture is in progress. > + */ > + > + if (!intel_has_reset_engine(gt)) > + return 0; > + > + if (igt_spinner_init(&spin, gt)) > + return -ENOMEM; > + > + for_each_engine(engine, gt, id) { > + struct intel_context *ce; > + unsigned long heartbeat; > + struct i915_request *rq; > + > + ce = intel_context_create(engine); > + if (IS_ERR(ce)) { > + err = PTR_ERR(ce); > + break; > + } > + > + engine_heartbeat_disable(engine, &heartbeat); > + > + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); > + if (IS_ERR(rq)) { > + err = PTR_ERR(rq); > + goto out; > + } > + i915_request_add(rq); > + > + if (!igt_wait_for_spinner(&spin, rq)) { > + intel_gt_set_wedged(gt); > + err = -ETIME; > + goto out; > + } > + > + /* We have our request executing, now remove it and reset */ > + > + if (test_and_set_bit(I915_RESET_ENGINE + id, > + >->reset.flags)) { > + spin_unlock_irq(&engine->active.lock); > + intel_gt_set_wedged(gt); > + err = -EBUSY; > + goto out; > + } > + tasklet_disable(&engine->execlists.tasklet); > + > + engine->execlists.tasklet.func(engine->execlists.tasklet.data); > + GEM_BUG_ON(execlists_active(&engine->execlists) != rq); > + > + execlists_hold(engine, rq); > + GEM_BUG_ON(!i915_request_has_hold(rq)); > + > + intel_engine_reset(engine, NULL); > + GEM_BUG_ON(rq->fence.error != -EIO); > + > + tasklet_enable(&engine->execlists.tasklet); > + clear_and_wake_up_bit(I915_RESET_ENGINE + id, > + >->reset.flags); > + > + /* Check that we do not resubmit the held request */ > + i915_request_get(rq); > + if (!i915_request_wait(rq, 0, HZ / 5)) { > + pr_err("%s: on hold request completed!\n", > + engine->name); > + i915_request_put(rq); > + err = -EIO; > + goto out; > + } > + GEM_BUG_ON(!i915_request_has_hold(rq)); > + > + /* But is resubmitted on release */ > + execlists_unhold(engine, rq); > + if (i915_request_wait(rq, 0, HZ / 5) < 0) { > + pr_err("%s: held request did not complete!\n", > + engine->name); > + intel_gt_set_wedged(gt); > + err = -ETIME; > + } > + i915_request_put(rq); > + > +out: > + engine_heartbeat_enable(engine, heartbeat); > + intel_context_put(ce); > + if (err) > + break; > + } > + > + igt_spinner_fini(&spin); > + return err; > +} > + > static int > emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) > { > @@ -3315,6 +3417,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) > SUBTEST(live_sanitycheck), > SUBTEST(live_unlite_switch), > SUBTEST(live_unlite_preempt), > + SUBTEST(live_hold_reset), > SUBTEST(live_timeslice_preempt), > SUBTEST(live_timeslice_queue), > SUBTEST(live_busywait_preempt), > diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h > index a9f0d3c8d8b7..47fa5419c74f 100644 > --- a/drivers/gpu/drm/i915/i915_request.h > +++ b/drivers/gpu/drm/i915/i915_request.h > @@ -90,6 +90,13 @@ enum { > */ > I915_FENCE_FLAG_SIGNAL, > > + /* > + * I915_FENCE_FLAG_HOLD - this request is currently on hold > + * > + * This request has been suspended, pending an ongoing investigation. > + */ > + I915_FENCE_FLAG_HOLD, > + > /* > * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted > * > @@ -500,6 +507,21 @@ static inline bool i915_request_has_sentinel(const struct i915_request *rq) > return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags)); > } > > +static inline bool i915_request_has_hold(const struct i915_request *rq) > +{ > + return unlikely(test_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags)); > +} > + > +static inline void i915_request_set_hold(struct i915_request *rq) > +{ > + set_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); > +} > + > +static inline void i915_request_clear_hold(struct i915_request *rq) > +{ > + clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); > +} > + > static inline struct intel_timeline * > i915_request_timeline(struct i915_request *rq) > { > _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Intel-gfx] [PATCH v3] drm/i915/gt: Allow temporary suspension of inflight requests 2020-01-15 11:37 ` Tvrtko Ursulin @ 2020-01-15 11:46 ` Chris Wilson 0 siblings, 0 replies; 21+ messages in thread From: Chris Wilson @ 2020-01-15 11:46 UTC (permalink / raw) To: Tvrtko Ursulin, intel-gfx Quoting Tvrtko Ursulin (2020-01-15 11:37:23) > > On 15/01/2020 11:10, Chris Wilson wrote: > > In order to support out-of-line error capture, we need to remove the > > active request from HW and put it to one side while a worker compresses > > and stores all the details associated with that request. (As that > > compression may take an arbitrary user-controlled amount of time, we > > want to let the engine continue running on other workloads while the > > hanging request is dumped.) Not only do we need to remove the active > > request, but we also have to remove its context and all requests that > > were dependent on it (both in flight, queued and future submission). > > > > Finally once the capture is complete, we need to be able to resubmit the > > request and its dependents and allow them to execute. > > > > v2: Replace stack recursion with a simple list. > > v3: Check all the parents, not just the first, when searching for a > > stuck ancestor! > > > > References: https://gitlab.freedesktop.org/drm/intel/issues/738 > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > --- > > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 1 + > > drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + > > drivers/gpu/drm/i915/gt/intel_lrc.c | 160 ++++++++++++++++++- > > drivers/gpu/drm/i915/gt/selftest_lrc.c | 103 ++++++++++++ > > drivers/gpu/drm/i915/i915_request.h | 22 +++ > > 5 files changed, 283 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > index f451ef376548..c296aaf381e7 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > @@ -671,6 +671,7 @@ void > > intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) > > { > > INIT_LIST_HEAD(&engine->active.requests); > > + INIT_LIST_HEAD(&engine->active.hold); > > > > spin_lock_init(&engine->active.lock); > > lockdep_set_subclass(&engine->active.lock, subclass); > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h > > index 00287515e7af..77e68c7643de 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h > > @@ -295,6 +295,7 @@ struct intel_engine_cs { > > struct { > > spinlock_t lock; > > struct list_head requests; > > + struct list_head hold; /* ready requests, but on hold */ > > } active; > > > > struct llist_head barrier_tasks; > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > > index f0cbd240a8c2..05a05ceeac6a 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > > @@ -2353,6 +2353,146 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) > > } > > } > > > > +static void __execlists_hold(struct i915_request *rq) > > +{ > > + LIST_HEAD(list); > > + > > + do { > > + struct i915_dependency *p; > > + > > + if (i915_request_is_active(rq)) > > + __i915_request_unsubmit(rq); > > + > > + RQ_TRACE(rq, "on hold\n"); > > + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); > > + list_move_tail(&rq->sched.link, &rq->engine->active.hold); > > + i915_request_set_hold(rq); > > + > > + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { > > + struct i915_request *w = > > + container_of(p->waiter, typeof(*w), sched); > > + > > + /* Leave semaphores spinning on the other engines */ > > + if (w->engine != rq->engine) > > + continue; > > + > > + if (list_empty(&w->sched.link)) > > + continue; /* Not yet submitted */ > > + > > + if (i915_request_completed(w)) > > + continue; > > + > > + if (i915_request_has_hold(rq)) > > + continue; > > + > > + list_move_tail(&w->sched.link, &list); > > + } > > + > > + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); > > + } while (rq); > > +} > > + > > +__maybe_unused > > +static void execlists_hold(struct intel_engine_cs *engine, > > + struct i915_request *rq) > > +{ > > + spin_lock_irq(&engine->active.lock); > > + > > + /* > > + * Transfer this request onto the hold queue to prevent it > > + * being resumbitted to HW (and potentially completed) before we have > > + * released it. Since we may have already submitted following > > + * requests, we need to remove those as well. > > + */ > > + GEM_BUG_ON(i915_request_completed(rq)); > > + GEM_BUG_ON(i915_request_has_hold(rq)); > > + GEM_BUG_ON(rq->engine != engine); > > + __execlists_hold(rq); > > + > > + spin_unlock_irq(&engine->active.lock); > > +} > > + > > +static bool hold_request(const struct i915_request *rq) > > +{ > > + struct i915_dependency *p; > > + > > + /* > > + * If one of our ancestors is on hold, we must also be on hold, > > + * otherwise we will bypass it and execute before it. > > + */ > > + list_for_each_entry(p, &rq->sched.signalers_list, signal_link) { > > + const struct i915_request *s = > > + container_of(p->signaler, typeof(*s), sched); > > + > > + if (s->engine != rq->engine) > > + continue; > > + > > + if (i915_request_has_hold(s)) > > + return true; > > + } > > + > > + return false; > > +} > > + > > +static void __execlists_unhold(struct i915_request *rq) > > +{ > > + LIST_HEAD(list); > > + > > + do { > > + struct i915_dependency *p; > > + > > + GEM_BUG_ON(!i915_request_has_hold(rq)); > > + GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); > > + > > + i915_request_clear_hold(rq); > > + list_move_tail(&rq->sched.link, > > + i915_sched_lookup_priolist(rq->engine, > > + rq_prio(rq))); > > + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); > > + RQ_TRACE(rq, "hold release\n"); > > + > > + /* Also release any children on this engine that are ready */ > > + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { > > + struct i915_request *w = > > + container_of(p->waiter, typeof(*w), sched); > > + > > + if (w->engine != rq->engine) > > + continue; > > + > > + if (!i915_request_has_hold(rq)) > > + continue; > > + > > + /* Check that no other parents are on hold */ > > + if (hold_request(rq)) > > + continue; > > I had a question on this check. How can it be other parents on the same > engine on hold if there can be one engine reset at a time? We hold onto the request for capture past the reset. So there could be multiple capture workers in flight, if we have a flurry of clients each triggering a GPU hang. > Oh and also I was thinking would i915_request_has_hold be better called > i915_request_is_held? Or is_on_hold? i915_request_on_hold() has been popping into my held as I read it. Fits with the on_priority_queue() and I might do a i915_request_is_ready() { return !list_empty()) }. (I am formulating a plan to s/active.requests/active.run/ and s/i915_request_is_active/i915_request_on_runlist/) Then is_active() could be return !list_empty(). -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Intel-gfx] [PATCH v3] drm/i915/gt: Allow temporary suspension of inflight requests 2020-01-15 11:10 ` [Intel-gfx] [PATCH v3] " Chris Wilson 2020-01-15 11:37 ` Tvrtko Ursulin @ 2020-01-16 17:12 ` Tvrtko Ursulin 1 sibling, 0 replies; 21+ messages in thread From: Tvrtko Ursulin @ 2020-01-16 17:12 UTC (permalink / raw) To: Chris Wilson, intel-gfx On 15/01/2020 11:10, Chris Wilson wrote: > In order to support out-of-line error capture, we need to remove the > active request from HW and put it to one side while a worker compresses > and stores all the details associated with that request. (As that > compression may take an arbitrary user-controlled amount of time, we > want to let the engine continue running on other workloads while the > hanging request is dumped.) Not only do we need to remove the active > request, but we also have to remove its context and all requests that > were dependent on it (both in flight, queued and future submission). > > Finally once the capture is complete, we need to be able to resubmit the > request and its dependents and allow them to execute. > > v2: Replace stack recursion with a simple list. > v3: Check all the parents, not just the first, when searching for a > stuck ancestor! > > References: https://gitlab.freedesktop.org/drm/intel/issues/738 > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko > --- > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 1 + > drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + > drivers/gpu/drm/i915/gt/intel_lrc.c | 160 ++++++++++++++++++- > drivers/gpu/drm/i915/gt/selftest_lrc.c | 103 ++++++++++++ > drivers/gpu/drm/i915/i915_request.h | 22 +++ > 5 files changed, 283 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > index f451ef376548..c296aaf381e7 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > @@ -671,6 +671,7 @@ void > intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) > { > INIT_LIST_HEAD(&engine->active.requests); > + INIT_LIST_HEAD(&engine->active.hold); > > spin_lock_init(&engine->active.lock); > lockdep_set_subclass(&engine->active.lock, subclass); > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h > index 00287515e7af..77e68c7643de 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h > @@ -295,6 +295,7 @@ struct intel_engine_cs { > struct { > spinlock_t lock; > struct list_head requests; > + struct list_head hold; /* ready requests, but on hold */ > } active; > > struct llist_head barrier_tasks; > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > index f0cbd240a8c2..05a05ceeac6a 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -2353,6 +2353,146 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) > } > } > > +static void __execlists_hold(struct i915_request *rq) > +{ > + LIST_HEAD(list); > + > + do { > + struct i915_dependency *p; > + > + if (i915_request_is_active(rq)) > + __i915_request_unsubmit(rq); > + > + RQ_TRACE(rq, "on hold\n"); > + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); > + list_move_tail(&rq->sched.link, &rq->engine->active.hold); > + i915_request_set_hold(rq); > + > + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { > + struct i915_request *w = > + container_of(p->waiter, typeof(*w), sched); > + > + /* Leave semaphores spinning on the other engines */ > + if (w->engine != rq->engine) > + continue; > + > + if (list_empty(&w->sched.link)) > + continue; /* Not yet submitted */ > + > + if (i915_request_completed(w)) > + continue; > + > + if (i915_request_has_hold(rq)) > + continue; > + > + list_move_tail(&w->sched.link, &list); > + } > + > + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); > + } while (rq); > +} > + > +__maybe_unused > +static void execlists_hold(struct intel_engine_cs *engine, > + struct i915_request *rq) > +{ > + spin_lock_irq(&engine->active.lock); > + > + /* > + * Transfer this request onto the hold queue to prevent it > + * being resumbitted to HW (and potentially completed) before we have > + * released it. Since we may have already submitted following > + * requests, we need to remove those as well. > + */ > + GEM_BUG_ON(i915_request_completed(rq)); > + GEM_BUG_ON(i915_request_has_hold(rq)); > + GEM_BUG_ON(rq->engine != engine); > + __execlists_hold(rq); > + > + spin_unlock_irq(&engine->active.lock); > +} > + > +static bool hold_request(const struct i915_request *rq) > +{ > + struct i915_dependency *p; > + > + /* > + * If one of our ancestors is on hold, we must also be on hold, > + * otherwise we will bypass it and execute before it. > + */ > + list_for_each_entry(p, &rq->sched.signalers_list, signal_link) { > + const struct i915_request *s = > + container_of(p->signaler, typeof(*s), sched); > + > + if (s->engine != rq->engine) > + continue; > + > + if (i915_request_has_hold(s)) > + return true; > + } > + > + return false; > +} > + > +static void __execlists_unhold(struct i915_request *rq) > +{ > + LIST_HEAD(list); > + > + do { > + struct i915_dependency *p; > + > + GEM_BUG_ON(!i915_request_has_hold(rq)); > + GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); > + > + i915_request_clear_hold(rq); > + list_move_tail(&rq->sched.link, > + i915_sched_lookup_priolist(rq->engine, > + rq_prio(rq))); > + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); > + RQ_TRACE(rq, "hold release\n"); > + > + /* Also release any children on this engine that are ready */ > + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { > + struct i915_request *w = > + container_of(p->waiter, typeof(*w), sched); > + > + if (w->engine != rq->engine) > + continue; > + > + if (!i915_request_has_hold(rq)) > + continue; > + > + /* Check that no other parents are on hold */ > + if (hold_request(rq)) > + continue; > + > + list_move_tail(&w->sched.link, &list); > + } > + > + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); > + } while (rq); > +} > + > +__maybe_unused > +static void execlists_unhold(struct intel_engine_cs *engine, > + struct i915_request *rq) > +{ > + spin_lock_irq(&engine->active.lock); > + > + /* > + * Move this request back to the priority queue, and all of its > + * children and grandchildren that were suspended along with it. > + */ > + __execlists_unhold(rq); > + > + if (rq_prio(rq) > engine->execlists.queue_priority_hint) { > + engine->execlists.queue_priority_hint = rq_prio(rq); > + tasklet_hi_schedule(&engine->execlists.tasklet); > + } > + > + spin_unlock_irq(&engine->active.lock); > +} > + > static noinline void preempt_reset(struct intel_engine_cs *engine) > { > const unsigned int bit = I915_RESET_ENGINE + engine->id; > @@ -2465,6 +2605,13 @@ static void submit_queue(struct intel_engine_cs *engine, > __submit_queue_imm(engine); > } > > +static bool on_hold(const struct intel_engine_cs *engine, > + const struct i915_request *rq) > +{ > + GEM_BUG_ON(i915_request_has_hold(rq)); > + return !list_empty(&engine->active.hold) && hold_request(rq); > +} > + > static void execlists_submit_request(struct i915_request *request) > { > struct intel_engine_cs *engine = request->engine; > @@ -2473,12 +2620,17 @@ static void execlists_submit_request(struct i915_request *request) > /* Will be called from irq-context when using foreign fences. */ > spin_lock_irqsave(&engine->active.lock, flags); > > - queue_request(engine, request); > + if (unlikely(on_hold(engine, request))) { /* ancestor is suspended */ > + list_add_tail(&request->sched.link, &engine->active.hold); > + i915_request_set_hold(request); > + } else { > + queue_request(engine, request); > > - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); > - GEM_BUG_ON(list_empty(&request->sched.link)); > + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); > + GEM_BUG_ON(list_empty(&request->sched.link)); > > - submit_queue(engine, request); > + submit_queue(engine, request); > + } > > spin_unlock_irqrestore(&engine->active.lock, flags); > } > diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c > index 15cda024e3e4..78501d79c0ea 100644 > --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c > +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c > @@ -285,6 +285,108 @@ static int live_unlite_preempt(void *arg) > return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); > } > > +static int live_hold_reset(void *arg) > +{ > + struct intel_gt *gt = arg; > + struct intel_engine_cs *engine; > + enum intel_engine_id id; > + struct igt_spinner spin; > + int err = 0; > + > + /* > + * In order to support offline error capture for fast preempt reset, > + * we need to decouple the guilty request and ensure that it and its > + * descendents are not executed while the capture is in progress. > + */ > + > + if (!intel_has_reset_engine(gt)) > + return 0; > + > + if (igt_spinner_init(&spin, gt)) > + return -ENOMEM; > + > + for_each_engine(engine, gt, id) { > + struct intel_context *ce; > + unsigned long heartbeat; > + struct i915_request *rq; > + > + ce = intel_context_create(engine); > + if (IS_ERR(ce)) { > + err = PTR_ERR(ce); > + break; > + } > + > + engine_heartbeat_disable(engine, &heartbeat); > + > + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); > + if (IS_ERR(rq)) { > + err = PTR_ERR(rq); > + goto out; > + } > + i915_request_add(rq); > + > + if (!igt_wait_for_spinner(&spin, rq)) { > + intel_gt_set_wedged(gt); > + err = -ETIME; > + goto out; > + } > + > + /* We have our request executing, now remove it and reset */ > + > + if (test_and_set_bit(I915_RESET_ENGINE + id, > + >->reset.flags)) { > + spin_unlock_irq(&engine->active.lock); > + intel_gt_set_wedged(gt); > + err = -EBUSY; > + goto out; > + } > + tasklet_disable(&engine->execlists.tasklet); > + > + engine->execlists.tasklet.func(engine->execlists.tasklet.data); > + GEM_BUG_ON(execlists_active(&engine->execlists) != rq); > + > + execlists_hold(engine, rq); > + GEM_BUG_ON(!i915_request_has_hold(rq)); > + > + intel_engine_reset(engine, NULL); > + GEM_BUG_ON(rq->fence.error != -EIO); > + > + tasklet_enable(&engine->execlists.tasklet); > + clear_and_wake_up_bit(I915_RESET_ENGINE + id, > + >->reset.flags); > + > + /* Check that we do not resubmit the held request */ > + i915_request_get(rq); > + if (!i915_request_wait(rq, 0, HZ / 5)) { > + pr_err("%s: on hold request completed!\n", > + engine->name); > + i915_request_put(rq); > + err = -EIO; > + goto out; > + } > + GEM_BUG_ON(!i915_request_has_hold(rq)); > + > + /* But is resubmitted on release */ > + execlists_unhold(engine, rq); > + if (i915_request_wait(rq, 0, HZ / 5) < 0) { > + pr_err("%s: held request did not complete!\n", > + engine->name); > + intel_gt_set_wedged(gt); > + err = -ETIME; > + } > + i915_request_put(rq); > + > +out: > + engine_heartbeat_enable(engine, heartbeat); > + intel_context_put(ce); > + if (err) > + break; > + } > + > + igt_spinner_fini(&spin); > + return err; > +} > + > static int > emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) > { > @@ -3315,6 +3417,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) > SUBTEST(live_sanitycheck), > SUBTEST(live_unlite_switch), > SUBTEST(live_unlite_preempt), > + SUBTEST(live_hold_reset), > SUBTEST(live_timeslice_preempt), > SUBTEST(live_timeslice_queue), > SUBTEST(live_busywait_preempt), > diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h > index a9f0d3c8d8b7..47fa5419c74f 100644 > --- a/drivers/gpu/drm/i915/i915_request.h > +++ b/drivers/gpu/drm/i915/i915_request.h > @@ -90,6 +90,13 @@ enum { > */ > I915_FENCE_FLAG_SIGNAL, > > + /* > + * I915_FENCE_FLAG_HOLD - this request is currently on hold > + * > + * This request has been suspended, pending an ongoing investigation. > + */ > + I915_FENCE_FLAG_HOLD, > + > /* > * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted > * > @@ -500,6 +507,21 @@ static inline bool i915_request_has_sentinel(const struct i915_request *rq) > return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags)); > } > > +static inline bool i915_request_has_hold(const struct i915_request *rq) > +{ > + return unlikely(test_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags)); > +} > + > +static inline void i915_request_set_hold(struct i915_request *rq) > +{ > + set_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); > +} > + > +static inline void i915_request_clear_hold(struct i915_request *rq) > +{ > + clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); > +} > + > static inline struct intel_timeline * > i915_request_timeline(struct i915_request *rq) > { > _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 21+ messages in thread
* [Intel-gfx] [PATCH 3/3] drm/i915/execlists: Offline error capture 2020-01-15 8:33 [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine Chris Wilson 2020-01-15 8:33 ` [Intel-gfx] [PATCH 2/3] drm/i915/gt: Allow temporary suspension of inflight requests Chris Wilson @ 2020-01-15 8:33 ` Chris Wilson 2020-01-16 17:22 ` Tvrtko Ursulin 2020-01-15 9:02 ` [Intel-gfx] [PATCH v2] drm/i915: Keep track of request among the scheduling lists Chris Wilson ` (6 subsequent siblings) 8 siblings, 1 reply; 21+ messages in thread From: Chris Wilson @ 2020-01-15 8:33 UTC (permalink / raw) To: intel-gfx Currently, we skip error capture upon forced preemption. We apply forced preemption when there is a higher priority request that should be running but is being blocked, and we skip inline error capture so that the preemption request is not further delayed by a user controlled capture -- extending the denial of service. However, preemption reset is also used for heartbeats and regular GPU hangs. By skipping the error capture, we remove the ability to debug GPU hangs. In order to capture the error without delaying the preemption request further, we can do an out-of-line capture by removing the guilty request from the execution queue and scheduling a work to dump that request. When removing a request, we need to remove the entire context and all descendants from the execution queue, so that they do not jump past. Closes: https://gitlab.freedesktop.org/drm/intel/issues/738 Fixes: 3a7a92aba8fb ("drm/i915/execlists: Force preemption") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> --- drivers/gpu/drm/i915/gt/intel_lrc.c | 120 +++++++++++++++++++++++++++- 1 file changed, 118 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 43c19dc9c0c7..a84477df32bd 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2392,7 +2392,6 @@ static void __execlists_hold(struct i915_request *rq) } while(rq); } -__maybe_unused static void execlists_hold(struct intel_engine_cs *engine, struct i915_request *rq) { @@ -2472,7 +2471,6 @@ static void __execlists_unhold(struct i915_request *rq) } while(rq); } -__maybe_unused static void execlists_unhold(struct intel_engine_cs *engine, struct i915_request *rq) { @@ -2492,6 +2490,121 @@ static void execlists_unhold(struct intel_engine_cs *engine, spin_unlock_irq(&engine->active.lock); } +struct execlists_capture { + struct work_struct work; + struct i915_request *rq; + struct i915_gpu_coredump *error; +}; + +static void execlists_capture_work(struct work_struct *work) +{ + struct execlists_capture *cap = container_of(work, typeof(*cap), work); + const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN; + struct intel_engine_cs *engine = cap->rq->engine; + struct intel_gt_coredump *gt = cap->error->gt; + struct intel_engine_capture_vma *vma; + + /* Compress all the objects attached to the request, slow! */ + vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp); + if (vma) { + struct i915_vma_compress *compress = + i915_vma_capture_prepare(gt); + + intel_engine_coredump_add_vma(gt->engine, vma, compress); + i915_vma_capture_finish(gt, compress); + } + + gt->simulated = gt->engine->simulated; + cap->error->simulated = gt->simulated; + + /* Publish the error state, and announce it to the world */ + i915_error_state_store(cap->error); + i915_gpu_coredump_put(cap->error); + + /* Return this request and all that depend upon it for signaling */ + execlists_unhold(engine, cap->rq); + + kfree(cap); +} + +static struct i915_gpu_coredump *capture_regs(struct intel_engine_cs *engine) +{ + const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; + struct i915_gpu_coredump *e; + + e = i915_gpu_coredump_alloc(engine->i915, gfp); + if (!e) + return NULL; + + e->gt = intel_gt_coredump_alloc(engine->gt, gfp); + if (!e->gt) + goto err; + + e->gt->engine = intel_engine_coredump_alloc(engine, gfp); + if (!e->gt->engine) + goto err_gt; + + return e; + +err_gt: + kfree(e->gt); +err: + kfree(e); + return NULL; +} + +static void execlists_capture(struct intel_engine_cs *engine) +{ + struct execlists_capture *cap; + + if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)) + return; + + cap = kmalloc(sizeof(*cap), GFP_ATOMIC); + if (!cap) + return; + + cap->rq = execlists_active(&engine->execlists); + GEM_BUG_ON(!cap->rq); + + cap->rq = active_request(cap->rq->context->timeline, cap->rq); + + /* + * We need to _quickly_ capture the engine state before we reset. + * We are inside an atomic section (softirq) here and we are delaying + * the forced preemption event. + */ + cap->error = capture_regs(engine); + if (!cap->error) + goto err_free; + + if (i915_request_completed(cap->rq)) /* oops, not so guilty! */ + goto err_store; + + /* + * Remove the request from the execlists queue, and take ownership + * of the request. We pass it to our worker who will _slowly_ compress + * all the pages the _user_ requested for debugging their batch, after + * which we return it to the queue for signaling. + * + * By removing them from the execlists queue, we also remove the + * requests from being processed by __unwind_incomplete_requests() + * during the intel_engine_reset(), and so they will *not* be replayed + * afterwards. + */ + execlists_hold(engine, cap->rq); + + INIT_WORK(&cap->work, execlists_capture_work); + schedule_work(&cap->work); + return; + +err_store: + i915_error_state_store(cap->error); + i915_gpu_coredump_put(cap->error); +err_free: + kfree(cap); +} + static noinline void preempt_reset(struct intel_engine_cs *engine) { const unsigned int bit = I915_RESET_ENGINE + engine->id; @@ -2509,6 +2622,9 @@ static noinline void preempt_reset(struct intel_engine_cs *engine) ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n", READ_ONCE(engine->props.preempt_timeout_ms), jiffies_to_msecs(jiffies - engine->execlists.preempt.expires)); + + ring_set_paused(engine, 1); /* Freeze the request in place */ + execlists_capture(engine); intel_engine_reset(engine, "preemption time out"); tasklet_enable(&engine->execlists.tasklet); -- 2.25.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [Intel-gfx] [PATCH 3/3] drm/i915/execlists: Offline error capture 2020-01-15 8:33 ` [Intel-gfx] [PATCH 3/3] drm/i915/execlists: Offline error capture Chris Wilson @ 2020-01-16 17:22 ` Tvrtko Ursulin 2020-01-16 17:48 ` Chris Wilson 0 siblings, 1 reply; 21+ messages in thread From: Tvrtko Ursulin @ 2020-01-16 17:22 UTC (permalink / raw) To: Chris Wilson, intel-gfx On 15/01/2020 08:33, Chris Wilson wrote: > Currently, we skip error capture upon forced preemption. We apply forced > preemption when there is a higher priority request that should be > running but is being blocked, and we skip inline error capture so that > the preemption request is not further delayed by a user controlled > capture -- extending the denial of service. > > However, preemption reset is also used for heartbeats and regular GPU > hangs. By skipping the error capture, we remove the ability to debug GPU > hangs. > > In order to capture the error without delaying the preemption request > further, we can do an out-of-line capture by removing the guilty request > from the execution queue and scheduling a work to dump that request. > When removing a request, we need to remove the entire context and all > descendants from the execution queue, so that they do not jump past. > > Closes: https://gitlab.freedesktop.org/drm/intel/issues/738 > Fixes: 3a7a92aba8fb ("drm/i915/execlists: Force preemption") > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/gt/intel_lrc.c | 120 +++++++++++++++++++++++++++- > 1 file changed, 118 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > index 43c19dc9c0c7..a84477df32bd 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -2392,7 +2392,6 @@ static void __execlists_hold(struct i915_request *rq) > } while(rq); > } > > -__maybe_unused > static void execlists_hold(struct intel_engine_cs *engine, > struct i915_request *rq) > { > @@ -2472,7 +2471,6 @@ static void __execlists_unhold(struct i915_request *rq) > } while(rq); > } > > -__maybe_unused > static void execlists_unhold(struct intel_engine_cs *engine, > struct i915_request *rq) > { > @@ -2492,6 +2490,121 @@ static void execlists_unhold(struct intel_engine_cs *engine, > spin_unlock_irq(&engine->active.lock); > } > > +struct execlists_capture { > + struct work_struct work; > + struct i915_request *rq; > + struct i915_gpu_coredump *error; > +}; > + > +static void execlists_capture_work(struct work_struct *work) > +{ > + struct execlists_capture *cap = container_of(work, typeof(*cap), work); > + const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN; > + struct intel_engine_cs *engine = cap->rq->engine; > + struct intel_gt_coredump *gt = cap->error->gt; > + struct intel_engine_capture_vma *vma; > + > + /* Compress all the objects attached to the request, slow! */ > + vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp); > + if (vma) { > + struct i915_vma_compress *compress = > + i915_vma_capture_prepare(gt); > + > + intel_engine_coredump_add_vma(gt->engine, vma, compress); > + i915_vma_capture_finish(gt, compress); > + } > + > + gt->simulated = gt->engine->simulated; > + cap->error->simulated = gt->simulated; > + > + /* Publish the error state, and announce it to the world */ > + i915_error_state_store(cap->error); > + i915_gpu_coredump_put(cap->error); > + > + /* Return this request and all that depend upon it for signaling */ > + execlists_unhold(engine, cap->rq); > + > + kfree(cap); > +} > + > +static struct i915_gpu_coredump *capture_regs(struct intel_engine_cs *engine) > +{ > + const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; > + struct i915_gpu_coredump *e; > + > + e = i915_gpu_coredump_alloc(engine->i915, gfp); > + if (!e) > + return NULL; > + > + e->gt = intel_gt_coredump_alloc(engine->gt, gfp); > + if (!e->gt) > + goto err; > + > + e->gt->engine = intel_engine_coredump_alloc(engine, gfp); > + if (!e->gt->engine) > + goto err_gt; > + > + return e; > + > +err_gt: > + kfree(e->gt); > +err: > + kfree(e); > + return NULL; > +} > + > +static void execlists_capture(struct intel_engine_cs *engine) > +{ > + struct execlists_capture *cap; > + > + if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)) > + return; > + > + cap = kmalloc(sizeof(*cap), GFP_ATOMIC); > + if (!cap) > + return; > + > + cap->rq = execlists_active(&engine->execlists); > + GEM_BUG_ON(!cap->rq); > + > + cap->rq = active_request(cap->rq->context->timeline, cap->rq); Old code, but why is active_request taking the timeline as a separate param when it always seems to be rq->context->timeline? > + > + /* > + * We need to _quickly_ capture the engine state before we reset. > + * We are inside an atomic section (softirq) here and we are delaying > + * the forced preemption event. > + */ > + cap->error = capture_regs(engine); > + if (!cap->error) > + goto err_free; > + > + if (i915_request_completed(cap->rq)) /* oops, not so guilty! */ > + goto err_store; Should this be a bug on? Doesn't look active_request() can return a non-completed request. Hm I guess we can make a wrong decision to reset the engine. But in any case, if request has completed in the meantime, why go to i915_error_state_store which will log a hang in dmesg? > + > + /* > + * Remove the request from the execlists queue, and take ownership > + * of the request. We pass it to our worker who will _slowly_ compress > + * all the pages the _user_ requested for debugging their batch, after > + * which we return it to the queue for signaling. > + * > + * By removing them from the execlists queue, we also remove the > + * requests from being processed by __unwind_incomplete_requests() > + * during the intel_engine_reset(), and so they will *not* be replayed > + * afterwards. > + */ > + execlists_hold(engine, cap->rq); > + > + INIT_WORK(&cap->work, execlists_capture_work); > + schedule_work(&cap->work); > + return; > + > +err_store: > + i915_error_state_store(cap->error); > + i915_gpu_coredump_put(cap->error); > +err_free: > + kfree(cap); > +} > + > static noinline void preempt_reset(struct intel_engine_cs *engine) > { > const unsigned int bit = I915_RESET_ENGINE + engine->id; > @@ -2509,6 +2622,9 @@ static noinline void preempt_reset(struct intel_engine_cs *engine) > ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n", > READ_ONCE(engine->props.preempt_timeout_ms), > jiffies_to_msecs(jiffies - engine->execlists.preempt.expires)); > + > + ring_set_paused(engine, 1); /* Freeze the request in place */ Who unsets this flags? > + execlists_capture(engine); > intel_engine_reset(engine, "preemption time out"); > > tasklet_enable(&engine->execlists.tasklet); > Regards, Tvrtko _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Intel-gfx] [PATCH 3/3] drm/i915/execlists: Offline error capture 2020-01-16 17:22 ` Tvrtko Ursulin @ 2020-01-16 17:48 ` Chris Wilson 2020-01-16 18:14 ` Tvrtko Ursulin 0 siblings, 1 reply; 21+ messages in thread From: Chris Wilson @ 2020-01-16 17:48 UTC (permalink / raw) To: Tvrtko Ursulin, intel-gfx Quoting Tvrtko Ursulin (2020-01-16 17:22:10) > > On 15/01/2020 08:33, Chris Wilson wrote: > > Currently, we skip error capture upon forced preemption. We apply forced > > preemption when there is a higher priority request that should be > > running but is being blocked, and we skip inline error capture so that > > the preemption request is not further delayed by a user controlled > > capture -- extending the denial of service. > > > > However, preemption reset is also used for heartbeats and regular GPU > > hangs. By skipping the error capture, we remove the ability to debug GPU > > hangs. > > > > In order to capture the error without delaying the preemption request > > further, we can do an out-of-line capture by removing the guilty request > > from the execution queue and scheduling a work to dump that request. > > When removing a request, we need to remove the entire context and all > > descendants from the execution queue, so that they do not jump past. > > > > Closes: https://gitlab.freedesktop.org/drm/intel/issues/738 > > Fixes: 3a7a92aba8fb ("drm/i915/execlists: Force preemption") > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > --- > > drivers/gpu/drm/i915/gt/intel_lrc.c | 120 +++++++++++++++++++++++++++- > > 1 file changed, 118 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > > index 43c19dc9c0c7..a84477df32bd 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > > @@ -2392,7 +2392,6 @@ static void __execlists_hold(struct i915_request *rq) > > } while(rq); > > } > > > > -__maybe_unused > > static void execlists_hold(struct intel_engine_cs *engine, > > struct i915_request *rq) > > { > > @@ -2472,7 +2471,6 @@ static void __execlists_unhold(struct i915_request *rq) > > } while(rq); > > } > > > > -__maybe_unused > > static void execlists_unhold(struct intel_engine_cs *engine, > > struct i915_request *rq) > > { > > @@ -2492,6 +2490,121 @@ static void execlists_unhold(struct intel_engine_cs *engine, > > spin_unlock_irq(&engine->active.lock); > > } > > > > +struct execlists_capture { > > + struct work_struct work; > > + struct i915_request *rq; > > + struct i915_gpu_coredump *error; > > +}; > > + > > +static void execlists_capture_work(struct work_struct *work) > > +{ > > + struct execlists_capture *cap = container_of(work, typeof(*cap), work); > > + const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN; > > + struct intel_engine_cs *engine = cap->rq->engine; > > + struct intel_gt_coredump *gt = cap->error->gt; > > + struct intel_engine_capture_vma *vma; > > + > > + /* Compress all the objects attached to the request, slow! */ > > + vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp); > > + if (vma) { > > + struct i915_vma_compress *compress = > > + i915_vma_capture_prepare(gt); > > + > > + intel_engine_coredump_add_vma(gt->engine, vma, compress); > > + i915_vma_capture_finish(gt, compress); > > + } > > + > > + gt->simulated = gt->engine->simulated; > > + cap->error->simulated = gt->simulated; > > + > > + /* Publish the error state, and announce it to the world */ > > + i915_error_state_store(cap->error); > > + i915_gpu_coredump_put(cap->error); > > + > > + /* Return this request and all that depend upon it for signaling */ > > + execlists_unhold(engine, cap->rq); > > + > > + kfree(cap); > > +} > > + > > +static struct i915_gpu_coredump *capture_regs(struct intel_engine_cs *engine) > > +{ > > + const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; > > + struct i915_gpu_coredump *e; > > + > > + e = i915_gpu_coredump_alloc(engine->i915, gfp); > > + if (!e) > > + return NULL; > > + > > + e->gt = intel_gt_coredump_alloc(engine->gt, gfp); > > + if (!e->gt) > > + goto err; > > + > > + e->gt->engine = intel_engine_coredump_alloc(engine, gfp); > > + if (!e->gt->engine) > > + goto err_gt; > > + > > + return e; > > + > > +err_gt: > > + kfree(e->gt); > > +err: > > + kfree(e); > > + return NULL; > > +} > > + > > +static void execlists_capture(struct intel_engine_cs *engine) > > +{ > > + struct execlists_capture *cap; > > + > > + if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)) > > + return; > > + > > + cap = kmalloc(sizeof(*cap), GFP_ATOMIC); > > + if (!cap) > > + return; > > + > > + cap->rq = execlists_active(&engine->execlists); > > + GEM_BUG_ON(!cap->rq); > > + > > + cap->rq = active_request(cap->rq->context->timeline, cap->rq); > > Old code, but why is active_request taking the timeline as a separate > param when it always seems to be rq->context->timeline? It grew out of walking along the engine without a request. Old habits. > > + /* > > + * We need to _quickly_ capture the engine state before we reset. > > + * We are inside an atomic section (softirq) here and we are delaying > > + * the forced preemption event. > > + */ > > + cap->error = capture_regs(engine); > > + if (!cap->error) > > + goto err_free; > > + > > + if (i915_request_completed(cap->rq)) /* oops, not so guilty! */ > > + goto err_store; > > Should this be a bug on? Doesn't look active_request() can return a > non-completed request. Hm I guess we can make a wrong decision to reset > the engine. Aye. Until we actually invoke the reset, the engine is still active and so may have advanced. We call ring_set_paused() so it doesn't get too far ahead, but that still lets the breadcrumb tick over, so it is still possible for the active_request() to complete (but no more). > But in any case, if request has completed in the meantime, why go to > i915_error_state_store which will log a hang in dmesg? Because we are about to call intel_reset_engine(), so want some debug clue as to why we got into a situation where we invoked the forced preemption. I thought it might be useful to see the engine state, and to drop the "oops, please file a bug request" because of the reset. > > + /* > > + * Remove the request from the execlists queue, and take ownership > > + * of the request. We pass it to our worker who will _slowly_ compress > > + * all the pages the _user_ requested for debugging their batch, after > > + * which we return it to the queue for signaling. > > + * > > + * By removing them from the execlists queue, we also remove the > > + * requests from being processed by __unwind_incomplete_requests() > > + * during the intel_engine_reset(), and so they will *not* be replayed > > + * afterwards. > > + */ > > + execlists_hold(engine, cap->rq); > > + > > + INIT_WORK(&cap->work, execlists_capture_work); > > + schedule_work(&cap->work); > > + return; > > + > > +err_store: > > + i915_error_state_store(cap->error); > > + i915_gpu_coredump_put(cap->error); > > +err_free: > > + kfree(cap); > > +} > > + > > static noinline void preempt_reset(struct intel_engine_cs *engine) > > { > > const unsigned int bit = I915_RESET_ENGINE + engine->id; > > @@ -2509,6 +2622,9 @@ static noinline void preempt_reset(struct intel_engine_cs *engine) > > ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n", > > READ_ONCE(engine->props.preempt_timeout_ms), > > jiffies_to_msecs(jiffies - engine->execlists.preempt.expires)); > > + > > + ring_set_paused(engine, 1); /* Freeze the request in place */ > > Who unsets this flags? Reset -> reset_csb_pointers -> ring_set_paused(0). -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Intel-gfx] [PATCH 3/3] drm/i915/execlists: Offline error capture 2020-01-16 17:48 ` Chris Wilson @ 2020-01-16 18:14 ` Tvrtko Ursulin 2020-01-16 18:32 ` Chris Wilson 0 siblings, 1 reply; 21+ messages in thread From: Tvrtko Ursulin @ 2020-01-16 18:14 UTC (permalink / raw) To: Chris Wilson, intel-gfx On 16/01/2020 17:48, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2020-01-16 17:22:10) >> >> On 15/01/2020 08:33, Chris Wilson wrote: >>> Currently, we skip error capture upon forced preemption. We apply forced >>> preemption when there is a higher priority request that should be >>> running but is being blocked, and we skip inline error capture so that >>> the preemption request is not further delayed by a user controlled >>> capture -- extending the denial of service. >>> >>> However, preemption reset is also used for heartbeats and regular GPU >>> hangs. By skipping the error capture, we remove the ability to debug GPU >>> hangs. >>> >>> In order to capture the error without delaying the preemption request >>> further, we can do an out-of-line capture by removing the guilty request >>> from the execution queue and scheduling a work to dump that request. >>> When removing a request, we need to remove the entire context and all >>> descendants from the execution queue, so that they do not jump past. >>> >>> Closes: https://gitlab.freedesktop.org/drm/intel/issues/738 >>> Fixes: 3a7a92aba8fb ("drm/i915/execlists: Force preemption") >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> >>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >>> --- >>> drivers/gpu/drm/i915/gt/intel_lrc.c | 120 +++++++++++++++++++++++++++- >>> 1 file changed, 118 insertions(+), 2 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c >>> index 43c19dc9c0c7..a84477df32bd 100644 >>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c >>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c >>> @@ -2392,7 +2392,6 @@ static void __execlists_hold(struct i915_request *rq) >>> } while(rq); >>> } >>> >>> -__maybe_unused >>> static void execlists_hold(struct intel_engine_cs *engine, >>> struct i915_request *rq) >>> { >>> @@ -2472,7 +2471,6 @@ static void __execlists_unhold(struct i915_request *rq) >>> } while(rq); >>> } >>> >>> -__maybe_unused >>> static void execlists_unhold(struct intel_engine_cs *engine, >>> struct i915_request *rq) >>> { >>> @@ -2492,6 +2490,121 @@ static void execlists_unhold(struct intel_engine_cs *engine, >>> spin_unlock_irq(&engine->active.lock); >>> } >>> >>> +struct execlists_capture { >>> + struct work_struct work; >>> + struct i915_request *rq; >>> + struct i915_gpu_coredump *error; >>> +}; >>> + >>> +static void execlists_capture_work(struct work_struct *work) >>> +{ >>> + struct execlists_capture *cap = container_of(work, typeof(*cap), work); >>> + const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN; >>> + struct intel_engine_cs *engine = cap->rq->engine; >>> + struct intel_gt_coredump *gt = cap->error->gt; >>> + struct intel_engine_capture_vma *vma; >>> + >>> + /* Compress all the objects attached to the request, slow! */ >>> + vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp); >>> + if (vma) { >>> + struct i915_vma_compress *compress = >>> + i915_vma_capture_prepare(gt); >>> + >>> + intel_engine_coredump_add_vma(gt->engine, vma, compress); >>> + i915_vma_capture_finish(gt, compress); >>> + } >>> + >>> + gt->simulated = gt->engine->simulated; >>> + cap->error->simulated = gt->simulated; >>> + >>> + /* Publish the error state, and announce it to the world */ >>> + i915_error_state_store(cap->error); >>> + i915_gpu_coredump_put(cap->error); >>> + >>> + /* Return this request and all that depend upon it for signaling */ >>> + execlists_unhold(engine, cap->rq); >>> + >>> + kfree(cap); >>> +} >>> + >>> +static struct i915_gpu_coredump *capture_regs(struct intel_engine_cs *engine) >>> +{ >>> + const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; >>> + struct i915_gpu_coredump *e; >>> + >>> + e = i915_gpu_coredump_alloc(engine->i915, gfp); >>> + if (!e) >>> + return NULL; >>> + >>> + e->gt = intel_gt_coredump_alloc(engine->gt, gfp); >>> + if (!e->gt) >>> + goto err; >>> + >>> + e->gt->engine = intel_engine_coredump_alloc(engine, gfp); >>> + if (!e->gt->engine) >>> + goto err_gt; >>> + >>> + return e; >>> + >>> +err_gt: >>> + kfree(e->gt); >>> +err: >>> + kfree(e); >>> + return NULL; >>> +} >>> + >>> +static void execlists_capture(struct intel_engine_cs *engine) >>> +{ >>> + struct execlists_capture *cap; >>> + >>> + if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)) >>> + return; >>> + >>> + cap = kmalloc(sizeof(*cap), GFP_ATOMIC); >>> + if (!cap) >>> + return; >>> + >>> + cap->rq = execlists_active(&engine->execlists); >>> + GEM_BUG_ON(!cap->rq); >>> + >>> + cap->rq = active_request(cap->rq->context->timeline, cap->rq); >> >> Old code, but why is active_request taking the timeline as a separate >> param when it always seems to be rq->context->timeline? > > It grew out of walking along the engine without a request. Old habits. > >>> + /* >>> + * We need to _quickly_ capture the engine state before we reset. >>> + * We are inside an atomic section (softirq) here and we are delaying >>> + * the forced preemption event. >>> + */ >>> + cap->error = capture_regs(engine); >>> + if (!cap->error) >>> + goto err_free; >>> + >>> + if (i915_request_completed(cap->rq)) /* oops, not so guilty! */ >>> + goto err_store; >> >> Should this be a bug on? Doesn't look active_request() can return a >> non-completed request. Hm I guess we can make a wrong decision to reset >> the engine. > > Aye. Until we actually invoke the reset, the engine is still active and > so may have advanced. We call ring_set_paused() so it doesn't get too > far ahead, but that still lets the breadcrumb tick over, so it is still > possible for the active_request() to complete (but no more). ... >> But in any case, if request has completed in the meantime, why go to >> i915_error_state_store which will log a hang in dmesg? > > Because we are about to call intel_reset_engine(), so want some debug > clue as to why we got into a situation where we invoked the forced > preemption. I thought it might be useful to see the engine state, and to > drop the "oops, please file a bug request" because of the reset. ... so we could still decide to bail out if request completed in the meantime and give up on the whole reset business. Why not if not? I guess it is of little practical difference, micro-second here or there before a potential false positive. >>> + /* >>> + * Remove the request from the execlists queue, and take ownership >>> + * of the request. We pass it to our worker who will _slowly_ compress >>> + * all the pages the _user_ requested for debugging their batch, after >>> + * which we return it to the queue for signaling. >>> + * >>> + * By removing them from the execlists queue, we also remove the >>> + * requests from being processed by __unwind_incomplete_requests() >>> + * during the intel_engine_reset(), and so they will *not* be replayed >>> + * afterwards. >>> + */ >>> + execlists_hold(engine, cap->rq); >>> + >>> + INIT_WORK(&cap->work, execlists_capture_work); >>> + schedule_work(&cap->work); >>> + return; >>> + >>> +err_store: >>> + i915_error_state_store(cap->error); >>> + i915_gpu_coredump_put(cap->error); >>> +err_free: >>> + kfree(cap); >>> +} >>> + >>> static noinline void preempt_reset(struct intel_engine_cs *engine) >>> { >>> const unsigned int bit = I915_RESET_ENGINE + engine->id; >>> @@ -2509,6 +2622,9 @@ static noinline void preempt_reset(struct intel_engine_cs *engine) >>> ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n", >>> READ_ONCE(engine->props.preempt_timeout_ms), >>> jiffies_to_msecs(jiffies - engine->execlists.preempt.expires)); >>> + >>> + ring_set_paused(engine, 1); /* Freeze the request in place */ >> >> Who unsets this flags? > > Reset -> reset_csb_pointers -> ring_set_paused(0). Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [Intel-gfx] [PATCH 3/3] drm/i915/execlists: Offline error capture 2020-01-16 18:14 ` Tvrtko Ursulin @ 2020-01-16 18:32 ` Chris Wilson 0 siblings, 0 replies; 21+ messages in thread From: Chris Wilson @ 2020-01-16 18:32 UTC (permalink / raw) To: Tvrtko Ursulin, intel-gfx Quoting Tvrtko Ursulin (2020-01-16 18:14:24) > > On 16/01/2020 17:48, Chris Wilson wrote: > > Quoting Tvrtko Ursulin (2020-01-16 17:22:10) > >> > >> On 15/01/2020 08:33, Chris Wilson wrote: > >>> + /* > >>> + * We need to _quickly_ capture the engine state before we reset. > >>> + * We are inside an atomic section (softirq) here and we are delaying > >>> + * the forced preemption event. > >>> + */ > >>> + cap->error = capture_regs(engine); > >>> + if (!cap->error) > >>> + goto err_free; > >>> + > >>> + if (i915_request_completed(cap->rq)) /* oops, not so guilty! */ > >>> + goto err_store; > >> > >> Should this be a bug on? Doesn't look active_request() can return a > >> non-completed request. Hm I guess we can make a wrong decision to reset > >> the engine. > > > > Aye. Until we actually invoke the reset, the engine is still active and > > so may have advanced. We call ring_set_paused() so it doesn't get too > > far ahead, but that still lets the breadcrumb tick over, so it is still > > possible for the active_request() to complete (but no more). > > ... > > >> But in any case, if request has completed in the meantime, why go to > >> i915_error_state_store which will log a hang in dmesg? > > > > Because we are about to call intel_reset_engine(), so want some debug > > clue as to why we got into a situation where we invoked the forced > > preemption. I thought it might be useful to see the engine state, and to > > drop the "oops, please file a bug request" because of the reset. > > ... so we could still decide to bail out if request completed in the > meantime and give up on the whole reset business. Why not if not? I > guess it is of little practical difference, micro-second here or there > before a potential false positive. (When I first added the check here, it was following a hacky __intel_gt_reset() to ensure the engine had stopped, so I needed to always do a real reset to cleanup the mess.) Hmm. I was about to say "but the preemption window expired and we need to reset". However, if we have completed this request and having done since our earlier inspection, it must also hit an arbitration point where the preemption will take place. So yes, we can bail out here quietly if we find ourselves with a completed request at the last moment. For simplicity, I'm just going to ignore the troublemaker and put it on the hold list. * Note that because we have not yet reset the engine at this point, * it is possible for the request that we have identified as being * guilty, did in fact complete and we will then hit an arbitration * point allowing the preemption to succeed. The likelihood of that * is very low (as the capturing of the engine registers should be * fast enough to run inside an irq-off atomic section!), so we will * simply hold that request accountable for being non-preemptible * long enough to force the reset. We will then skip the completed request when it comes time to dequeue. Business as usual in the land of preempt-to-busy. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 21+ messages in thread
* [Intel-gfx] [PATCH v2] drm/i915: Keep track of request among the scheduling lists 2020-01-15 8:33 [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine Chris Wilson 2020-01-15 8:33 ` [Intel-gfx] [PATCH 2/3] drm/i915/gt: Allow temporary suspension of inflight requests Chris Wilson 2020-01-15 8:33 ` [Intel-gfx] [PATCH 3/3] drm/i915/execlists: Offline error capture Chris Wilson @ 2020-01-15 9:02 ` Chris Wilson 2020-01-16 17:23 ` Tvrtko Ursulin 2020-01-15 9:44 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev2) Patchwork ` (5 subsequent siblings) 8 siblings, 1 reply; 21+ messages in thread From: Chris Wilson @ 2020-01-15 9:02 UTC (permalink / raw) To: intel-gfx If we keep track of when the i915_request.sched.link is on the HW runlist, or in the priority queue we can simplify our interactions with the request (such as during rescheduling). This also simplifies the next patch where we introduce a new in-between list, for requests that are ready but neither on the run list or in the queue. v2: Update i915_sched_node.link explanation for current usage where it is a link on both the queue and on the runlists. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> --- drivers/gpu/drm/i915/gt/intel_lrc.c | 13 ++++++++----- drivers/gpu/drm/i915/i915_request.c | 4 +++- drivers/gpu/drm/i915/i915_request.h | 17 +++++++++++++++++ drivers/gpu/drm/i915/i915_scheduler.c | 22 ++++++++++------------ 4 files changed, 38 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 9e430590fb3a..f0cbd240a8c2 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -985,6 +985,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); list_move(&rq->sched.link, pl); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + active = rq; } else { struct intel_engine_cs *owner = rq->context->engine; @@ -2430,11 +2432,12 @@ static void execlists_preempt(struct timer_list *timer) } static void queue_request(struct intel_engine_cs *engine, - struct i915_sched_node *node, - int prio) + struct i915_request *rq) { - GEM_BUG_ON(!list_empty(&node->link)); - list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio)); + GEM_BUG_ON(!list_empty(&rq->sched.link)); + list_add_tail(&rq->sched.link, + i915_sched_lookup_priolist(engine, rq_prio(rq))); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); } static void __submit_queue_imm(struct intel_engine_cs *engine) @@ -2470,7 +2473,7 @@ static void execlists_submit_request(struct i915_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->active.lock, flags); - queue_request(engine, &request->sched, rq_prio(request)); + queue_request(engine, request); GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); GEM_BUG_ON(list_empty(&request->sched.link)); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index be185886e4fc..9ed0d3bc7249 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -408,8 +408,10 @@ bool __i915_request_submit(struct i915_request *request) xfer: /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); - if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) + if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) { list_move_tail(&request->sched.link, &engine->active.requests); + clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); + } if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) && diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 031433691a06..a9f0d3c8d8b7 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -70,6 +70,18 @@ enum { */ I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS, + /* + * I915_FENCE_FLAG_PQUEUE - this request is ready for execution + * + * Using the scheduler, when a request is ready for execution it is put + * into the priority queue, and removed from the queue when transferred + * to the HW runlists. We want to track its membership within that + * queue so that we can easily check before rescheduling. + * + * See i915_request_in_priority_queue() + */ + I915_FENCE_FLAG_PQUEUE, + /* * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list * @@ -361,6 +373,11 @@ static inline bool i915_request_is_active(const struct i915_request *rq) return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); } +static inline bool i915_request_in_priority_queue(const struct i915_request *rq) +{ + return test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); +} + /** * Returns true if seq1 is later than seq2. */ diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index bf87c70bfdd9..db3da81b7f05 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -326,20 +326,18 @@ static void __i915_schedule(struct i915_sched_node *node, node->attr.priority = prio; - if (list_empty(&node->link)) { - /* - * If the request is not in the priolist queue because - * it is not yet runnable, then it doesn't contribute - * to our preemption decisions. On the other hand, - * if the request is on the HW, it too is not in the - * queue; but in that case we may still need to reorder - * the inflight requests. - */ + /* + * Once the request is ready, it will be place into the + * priority lists and then onto the HW runlist. Before the + * request is ready, it does not contribute to our preemption + * decisions and we can safely ignore it, as it will, and + * any preemption required, be dealt with upon submission. + * See engine->submit_request() + */ + if (list_empty(&node->link)) continue; - } - if (!intel_engine_is_virtual(engine) && - !i915_request_is_active(node_to_request(node))) { + if (i915_request_in_priority_queue(node_to_request(node))) { if (!cache.priolist) cache.priolist = i915_sched_lookup_priolist(engine, -- 2.25.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [Intel-gfx] [PATCH v2] drm/i915: Keep track of request among the scheduling lists 2020-01-15 9:02 ` [Intel-gfx] [PATCH v2] drm/i915: Keep track of request among the scheduling lists Chris Wilson @ 2020-01-16 17:23 ` Tvrtko Ursulin 0 siblings, 0 replies; 21+ messages in thread From: Tvrtko Ursulin @ 2020-01-16 17:23 UTC (permalink / raw) To: Chris Wilson, intel-gfx On 15/01/2020 09:02, Chris Wilson wrote: > If we keep track of when the i915_request.sched.link is on the HW > runlist, or in the priority queue we can simplify our interactions with > the request (such as during rescheduling). This also simplifies the next > patch where we introduce a new in-between list, for requests that are > ready but neither on the run list or in the queue. > > v2: Update i915_sched_node.link explanation for current usage where it > is a link on both the queue and on the runlists. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/gt/intel_lrc.c | 13 ++++++++----- > drivers/gpu/drm/i915/i915_request.c | 4 +++- > drivers/gpu/drm/i915/i915_request.h | 17 +++++++++++++++++ > drivers/gpu/drm/i915/i915_scheduler.c | 22 ++++++++++------------ > 4 files changed, 38 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > index 9e430590fb3a..f0cbd240a8c2 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -985,6 +985,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) > GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); > > list_move(&rq->sched.link, pl); > + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); > + > active = rq; > } else { > struct intel_engine_cs *owner = rq->context->engine; > @@ -2430,11 +2432,12 @@ static void execlists_preempt(struct timer_list *timer) > } > > static void queue_request(struct intel_engine_cs *engine, > - struct i915_sched_node *node, > - int prio) > + struct i915_request *rq) > { > - GEM_BUG_ON(!list_empty(&node->link)); > - list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio)); > + GEM_BUG_ON(!list_empty(&rq->sched.link)); > + list_add_tail(&rq->sched.link, > + i915_sched_lookup_priolist(engine, rq_prio(rq))); > + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); > } > > static void __submit_queue_imm(struct intel_engine_cs *engine) > @@ -2470,7 +2473,7 @@ static void execlists_submit_request(struct i915_request *request) > /* Will be called from irq-context when using foreign fences. */ > spin_lock_irqsave(&engine->active.lock, flags); > > - queue_request(engine, &request->sched, rq_prio(request)); > + queue_request(engine, request); > > GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); > GEM_BUG_ON(list_empty(&request->sched.link)); > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index be185886e4fc..9ed0d3bc7249 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -408,8 +408,10 @@ bool __i915_request_submit(struct i915_request *request) > xfer: /* We may be recursing from the signal callback of another i915 fence */ > spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); > > - if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) > + if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) { > list_move_tail(&request->sched.link, &engine->active.requests); > + clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); > + } > > if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && > !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) && > diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h > index 031433691a06..a9f0d3c8d8b7 100644 > --- a/drivers/gpu/drm/i915/i915_request.h > +++ b/drivers/gpu/drm/i915/i915_request.h > @@ -70,6 +70,18 @@ enum { > */ > I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS, > > + /* > + * I915_FENCE_FLAG_PQUEUE - this request is ready for execution > + * > + * Using the scheduler, when a request is ready for execution it is put > + * into the priority queue, and removed from the queue when transferred > + * to the HW runlists. We want to track its membership within that > + * queue so that we can easily check before rescheduling. > + * > + * See i915_request_in_priority_queue() > + */ > + I915_FENCE_FLAG_PQUEUE, > + > /* > * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list > * > @@ -361,6 +373,11 @@ static inline bool i915_request_is_active(const struct i915_request *rq) > return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); > } > > +static inline bool i915_request_in_priority_queue(const struct i915_request *rq) > +{ > + return test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); > +} > + > /** > * Returns true if seq1 is later than seq2. > */ > diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c > index bf87c70bfdd9..db3da81b7f05 100644 > --- a/drivers/gpu/drm/i915/i915_scheduler.c > +++ b/drivers/gpu/drm/i915/i915_scheduler.c > @@ -326,20 +326,18 @@ static void __i915_schedule(struct i915_sched_node *node, > > node->attr.priority = prio; > > - if (list_empty(&node->link)) { > - /* > - * If the request is not in the priolist queue because > - * it is not yet runnable, then it doesn't contribute > - * to our preemption decisions. On the other hand, > - * if the request is on the HW, it too is not in the > - * queue; but in that case we may still need to reorder > - * the inflight requests. > - */ > + /* > + * Once the request is ready, it will be place into the > + * priority lists and then onto the HW runlist. Before the > + * request is ready, it does not contribute to our preemption > + * decisions and we can safely ignore it, as it will, and > + * any preemption required, be dealt with upon submission. > + * See engine->submit_request() > + */ > + if (list_empty(&node->link)) > continue; > - } > > - if (!intel_engine_is_virtual(engine) && > - !i915_request_is_active(node_to_request(node))) { > + if (i915_request_in_priority_queue(node_to_request(node))) { > if (!cache.priolist) > cache.priolist = > i915_sched_lookup_priolist(engine, > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 21+ messages in thread
* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev2) 2020-01-15 8:33 [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine Chris Wilson ` (2 preceding siblings ...) 2020-01-15 9:02 ` [Intel-gfx] [PATCH v2] drm/i915: Keep track of request among the scheduling lists Chris Wilson @ 2020-01-15 9:44 ` Patchwork 2020-01-15 10:06 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork ` (4 subsequent siblings) 8 siblings, 0 replies; 21+ messages in thread From: Patchwork @ 2020-01-15 9:44 UTC (permalink / raw) To: Chris Wilson; +Cc: intel-gfx == Series Details == Series: series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev2) URL : https://patchwork.freedesktop.org/series/72048/ State : warning == Summary == $ dim checkpatch origin/drm-tip 201e1a96d586 drm/i915: Keep track of request among the scheduling lists e4a508266496 drm/i915/gt: Allow temporary suspension of inflight requests -:92: ERROR:SPACING: space required before the open parenthesis '(' #92: FILE: drivers/gpu/drm/i915/gt/intel_lrc.c:2392: + } while(rq); -:172: ERROR:SPACING: space required before the open parenthesis '(' #172: FILE: drivers/gpu/drm/i915/gt/intel_lrc.c:2472: + } while(rq); total: 2 errors, 0 warnings, 0 checks, 342 lines checked 4aab7f2eb367 drm/i915/execlists: Offline error capture _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 21+ messages in thread
* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev2) 2020-01-15 8:33 [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine Chris Wilson ` (3 preceding siblings ...) 2020-01-15 9:44 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev2) Patchwork @ 2020-01-15 10:06 ` Patchwork 2020-01-15 10:06 ` [Intel-gfx] ✗ Fi.CI.BUILD: warning " Patchwork ` (3 subsequent siblings) 8 siblings, 0 replies; 21+ messages in thread From: Patchwork @ 2020-01-15 10:06 UTC (permalink / raw) To: Chris Wilson; +Cc: intel-gfx == Series Details == Series: series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev2) URL : https://patchwork.freedesktop.org/series/72048/ State : success == Summary == CI Bug Log - changes from CI_DRM_7748 -> Patchwork_16106 ==================================================== Summary ------- **SUCCESS** No regressions found. External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/index.html Known issues ------------ Here are the changes found in Patchwork_16106 that come from known issues: ### IGT changes ### #### Issues hit #### * igt@gem_ctx_switch@rcs0: - fi-icl-guc: [PASS][1] -> [INCOMPLETE][2] ([i915#140]) [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-icl-guc/igt@gem_ctx_switch@rcs0.html [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-icl-guc/igt@gem_ctx_switch@rcs0.html * igt@i915_module_load@reload-with-fault-injection: - fi-kbl-x1275: [PASS][3] -> [INCOMPLETE][4] ([i915#879]) [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-kbl-x1275/igt@i915_module_load@reload-with-fault-injection.html [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-kbl-x1275/igt@i915_module_load@reload-with-fault-injection.html * igt@i915_pm_rpm@module-reload: - fi-skl-6770hq: [PASS][5] -> [FAIL][6] ([i915#178]) [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html * igt@i915_selftest@live_execlists: - fi-icl-y: [PASS][7] -> [DMESG-FAIL][8] ([fdo#108569]) [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-icl-y/igt@i915_selftest@live_execlists.html [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-icl-y/igt@i915_selftest@live_execlists.html * igt@i915_selftest@live_hangcheck: - fi-kbl-7500u: [PASS][9] -> [DMESG-FAIL][10] ([i915#889]) +7 similar issues [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-kbl-7500u/igt@i915_selftest@live_hangcheck.html [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-kbl-7500u/igt@i915_selftest@live_hangcheck.html * igt@i915_selftest@live_vma: - fi-kbl-7500u: [PASS][11] -> [DMESG-WARN][12] ([i915#889]) +23 similar issues [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-kbl-7500u/igt@i915_selftest@live_vma.html [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-kbl-7500u/igt@i915_selftest@live_vma.html #### Possible fixes #### * igt@gem_exec_gttfill@basic: - fi-bsw-n3050: [TIMEOUT][13] ([fdo#112271]) -> [PASS][14] [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-bsw-n3050/igt@gem_exec_gttfill@basic.html [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-bsw-n3050/igt@gem_exec_gttfill@basic.html * igt@i915_module_load@reload-with-fault-injection: - fi-skl-6700k2: [INCOMPLETE][15] ([i915#671]) -> [PASS][16] [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-skl-6700k2/igt@i915_module_load@reload-with-fault-injection.html [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-skl-6700k2/igt@i915_module_load@reload-with-fault-injection.html * igt@i915_selftest@live_blt: - fi-hsw-4770: [DMESG-FAIL][17] ([i915#563]) -> [PASS][18] [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-hsw-4770/igt@i915_selftest@live_blt.html [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-hsw-4770/igt@i915_selftest@live_blt.html * igt@i915_selftest@live_gem_contexts: - fi-hsw-4770r: [DMESG-FAIL][19] ([i915#722]) -> [PASS][20] [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-hsw-4770r/igt@i915_selftest@live_gem_contexts.html [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-hsw-4770r/igt@i915_selftest@live_gem_contexts.html * igt@kms_chamelium@hdmi-hpd-fast: - fi-kbl-7500u: [FAIL][21] ([fdo#111096] / [i915#323]) -> [PASS][22] [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html #### Warnings #### * igt@i915_selftest@live_blt: - fi-hsw-4770r: [DMESG-FAIL][23] ([i915#553] / [i915#725]) -> [DMESG-FAIL][24] ([i915#725]) [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-hsw-4770r/igt@i915_selftest@live_blt.html [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-hsw-4770r/igt@i915_selftest@live_blt.html * igt@kms_chamelium@common-hpd-after-suspend: - fi-icl-u2: [FAIL][25] ([i915#217]) -> [DMESG-WARN][26] ([IGT#4] / [i915#263]) [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-icl-u2/igt@kms_chamelium@common-hpd-after-suspend.html [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/fi-icl-u2/igt@kms_chamelium@common-hpd-after-suspend.html {name}: This element is suppressed. This means it is ignored when computing the status of the difference (SUCCESS, WARNING, or FAILURE). [IGT#4]: https://gitlab.freedesktop.org/drm/igt-gpu-tools/issues/4 [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569 [fdo#111096]: https://bugs.freedesktop.org/show_bug.cgi?id=111096 [fdo#112271]: https://bugs.freedesktop.org/show_bug.cgi?id=112271 [i915#140]: https://gitlab.freedesktop.org/drm/intel/issues/140 [i915#178]: https://gitlab.freedesktop.org/drm/intel/issues/178 [i915#217]: https://gitlab.freedesktop.org/drm/intel/issues/217 [i915#263]: https://gitlab.freedesktop.org/drm/intel/issues/263 [i915#323]: https://gitlab.freedesktop.org/drm/intel/issues/323 [i915#553]: https://gitlab.freedesktop.org/drm/intel/issues/553 [i915#563]: https://gitlab.freedesktop.org/drm/intel/issues/563 [i915#671]: https://gitlab.freedesktop.org/drm/intel/issues/671 [i915#722]: https://gitlab.freedesktop.org/drm/intel/issues/722 [i915#725]: https://gitlab.freedesktop.org/drm/intel/issues/725 [i915#879]: https://gitlab.freedesktop.org/drm/intel/issues/879 [i915#889]: https://gitlab.freedesktop.org/drm/intel/issues/889 [i915#937]: https://gitlab.freedesktop.org/drm/intel/issues/937 Participating hosts (42 -> 46) ------------------------------ Additional (8): fi-byt-j1900 fi-ivb-3770 fi-skl-lmem fi-blb-e6850 fi-byt-n2820 fi-bsw-nick fi-skl-6600u fi-snb-2600 Missing (4): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-bsw-cyan Build changes ------------- * CI: CI-20190529 -> None * Linux: CI_DRM_7748 -> Patchwork_16106 CI-20190529: 20190529 CI_DRM_7748: 1793de9a4215356790b87608fcfc9e99eeb6954d @ git://anongit.freedesktop.org/gfx-ci/linux IGT_5365: e9ec0ed63b25c86861ffac3c8601cc4d1b910b65 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools Patchwork_16106: 4aab7f2eb367114a402d222d92c77f495553ff42 @ git://anongit.freedesktop.org/gfx-ci/linux == Kernel 32bit build == Warning: Kernel 32bit buildtest failed: https://intel-gfx-ci.01.org/Patchwork_16106/build_32bit.log CALL scripts/checksyscalls.sh CALL scripts/atomic/check-atomics.sh CHK include/generated/compile.h Kernel: arch/x86/boot/bzImage is ready (#1) Building modules, stage 2. MODPOST 122 modules ERROR: "__udivdi3" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined! scripts/Makefile.modpost:93: recipe for target '__modpost' failed make[1]: *** [__modpost] Error 1 Makefile:1282: recipe for target 'modules' failed make: *** [modules] Error 2 == Linux commits == 4aab7f2eb367 drm/i915/execlists: Offline error capture e4a508266496 drm/i915/gt: Allow temporary suspension of inflight requests 201e1a96d586 drm/i915: Keep track of request among the scheduling lists == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/index.html _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 21+ messages in thread
* [Intel-gfx] ✗ Fi.CI.BUILD: warning for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev2) 2020-01-15 8:33 [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine Chris Wilson ` (4 preceding siblings ...) 2020-01-15 10:06 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork @ 2020-01-15 10:06 ` Patchwork 2020-01-15 14:37 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev3) Patchwork ` (2 subsequent siblings) 8 siblings, 0 replies; 21+ messages in thread From: Patchwork @ 2020-01-15 10:06 UTC (permalink / raw) To: Chris Wilson; +Cc: intel-gfx == Series Details == Series: series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev2) URL : https://patchwork.freedesktop.org/series/72048/ State : warning == Summary == CALL scripts/checksyscalls.sh CALL scripts/atomic/check-atomics.sh CHK include/generated/compile.h Kernel: arch/x86/boot/bzImage is ready (#1) Building modules, stage 2. MODPOST 122 modules ERROR: "__udivdi3" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined! scripts/Makefile.modpost:93: recipe for target '__modpost' failed make[1]: *** [__modpost] Error 1 Makefile:1282: recipe for target 'modules' failed make: *** [modules] Error 2 == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16106/build_32bit.log _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 21+ messages in thread
* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev3) 2020-01-15 8:33 [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine Chris Wilson ` (5 preceding siblings ...) 2020-01-15 10:06 ` [Intel-gfx] ✗ Fi.CI.BUILD: warning " Patchwork @ 2020-01-15 14:37 ` Patchwork 2020-01-15 14:37 ` [Intel-gfx] ✗ Fi.CI.BUILD: warning " Patchwork 2020-01-17 20:47 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork 8 siblings, 0 replies; 21+ messages in thread From: Patchwork @ 2020-01-15 14:37 UTC (permalink / raw) To: Chris Wilson; +Cc: intel-gfx == Series Details == Series: series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev3) URL : https://patchwork.freedesktop.org/series/72048/ State : success == Summary == CI Bug Log - changes from CI_DRM_7748 -> Patchwork_16108 ==================================================== Summary ------- **SUCCESS** No regressions found. External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/index.html Known issues ------------ Here are the changes found in Patchwork_16108 that come from known issues: ### IGT changes ### #### Issues hit #### * igt@i915_module_load@reload-with-fault-injection: - fi-bxt-dsi: [PASS][1] -> [DMESG-WARN][2] ([i915#889]) [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-bxt-dsi/igt@i915_module_load@reload-with-fault-injection.html [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/fi-bxt-dsi/igt@i915_module_load@reload-with-fault-injection.html * igt@i915_pm_rpm@module-reload: - fi-kbl-guc: [PASS][3] -> [SKIP][4] ([fdo#109271]) [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-kbl-guc/igt@i915_pm_rpm@module-reload.html [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/fi-kbl-guc/igt@i915_pm_rpm@module-reload.html - fi-skl-6770hq: [PASS][5] -> [DMESG-FAIL][6] ([i915#178] / [i915#889]) [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html * igt@i915_selftest@live_active: - fi-skl-6770hq: [PASS][7] -> [DMESG-WARN][8] ([i915#889]) +22 similar issues [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-skl-6770hq/igt@i915_selftest@live_active.html [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/fi-skl-6770hq/igt@i915_selftest@live_active.html * igt@i915_selftest@live_execlists: - fi-icl-y: [PASS][9] -> [DMESG-FAIL][10] ([fdo#108569]) [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-icl-y/igt@i915_selftest@live_execlists.html [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/fi-icl-y/igt@i915_selftest@live_execlists.html * igt@i915_selftest@live_uncore: - fi-skl-6770hq: [PASS][11] -> [DMESG-FAIL][12] ([i915#889]) +7 similar issues [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-skl-6770hq/igt@i915_selftest@live_uncore.html [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/fi-skl-6770hq/igt@i915_selftest@live_uncore.html #### Possible fixes #### * igt@gem_exec_gttfill@basic: - fi-bsw-n3050: [TIMEOUT][13] ([fdo#112271]) -> [PASS][14] [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-bsw-n3050/igt@gem_exec_gttfill@basic.html [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/fi-bsw-n3050/igt@gem_exec_gttfill@basic.html * igt@i915_module_load@reload-with-fault-injection: - fi-skl-6700k2: [INCOMPLETE][15] ([i915#671]) -> [PASS][16] [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-skl-6700k2/igt@i915_module_load@reload-with-fault-injection.html [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/fi-skl-6700k2/igt@i915_module_load@reload-with-fault-injection.html #### Warnings #### * igt@i915_selftest@live_blt: - fi-hsw-4770: [DMESG-FAIL][17] ([i915#563]) -> [DMESG-FAIL][18] ([i915#770]) [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-hsw-4770/igt@i915_selftest@live_blt.html [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/fi-hsw-4770/igt@i915_selftest@live_blt.html * igt@kms_chamelium@common-hpd-after-suspend: - fi-icl-u2: [FAIL][19] ([i915#217]) -> [DMESG-WARN][20] ([IGT#4] / [i915#263]) [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/fi-icl-u2/igt@kms_chamelium@common-hpd-after-suspend.html [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/fi-icl-u2/igt@kms_chamelium@common-hpd-after-suspend.html {name}: This element is suppressed. This means it is ignored when computing the status of the difference (SUCCESS, WARNING, or FAILURE). [IGT#4]: https://gitlab.freedesktop.org/drm/igt-gpu-tools/issues/4 [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569 [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271 [fdo#112271]: https://bugs.freedesktop.org/show_bug.cgi?id=112271 [i915#178]: https://gitlab.freedesktop.org/drm/intel/issues/178 [i915#217]: https://gitlab.freedesktop.org/drm/intel/issues/217 [i915#263]: https://gitlab.freedesktop.org/drm/intel/issues/263 [i915#563]: https://gitlab.freedesktop.org/drm/intel/issues/563 [i915#671]: https://gitlab.freedesktop.org/drm/intel/issues/671 [i915#770]: https://gitlab.freedesktop.org/drm/intel/issues/770 [i915#889]: https://gitlab.freedesktop.org/drm/intel/issues/889 [i915#937]: https://gitlab.freedesktop.org/drm/intel/issues/937 Participating hosts (42 -> 44) ------------------------------ Additional (8): fi-byt-j1900 fi-ivb-3770 fi-skl-lmem fi-blb-e6850 fi-byt-n2820 fi-bsw-nick fi-skl-6600u fi-snb-2600 Missing (6): fi-hsw-4770r fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-gdg-551 Build changes ------------- * CI: CI-20190529 -> None * Linux: CI_DRM_7748 -> Patchwork_16108 CI-20190529: 20190529 CI_DRM_7748: 1793de9a4215356790b87608fcfc9e99eeb6954d @ git://anongit.freedesktop.org/gfx-ci/linux IGT_5365: e9ec0ed63b25c86861ffac3c8601cc4d1b910b65 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools Patchwork_16108: d72dea73722073cce4d092c81365a8609bf3f2a0 @ git://anongit.freedesktop.org/gfx-ci/linux == Kernel 32bit build == Warning: Kernel 32bit buildtest failed: https://intel-gfx-ci.01.org/Patchwork_16108/build_32bit.log CALL scripts/checksyscalls.sh CALL scripts/atomic/check-atomics.sh CHK include/generated/compile.h Kernel: arch/x86/boot/bzImage is ready (#1) Building modules, stage 2. MODPOST 122 modules ERROR: "__udivdi3" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined! scripts/Makefile.modpost:93: recipe for target '__modpost' failed make[1]: *** [__modpost] Error 1 Makefile:1282: recipe for target 'modules' failed make: *** [modules] Error 2 == Linux commits == d72dea737220 drm/i915/execlists: Offline error capture fdb604f0360e drm/i915/gt: Allow temporary suspension of inflight requests ed160ebd901e drm/i915: Keep track of request among the scheduling lists == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/index.html _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 21+ messages in thread
* [Intel-gfx] ✗ Fi.CI.BUILD: warning for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev3) 2020-01-15 8:33 [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine Chris Wilson ` (6 preceding siblings ...) 2020-01-15 14:37 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev3) Patchwork @ 2020-01-15 14:37 ` Patchwork 2020-01-17 20:47 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork 8 siblings, 0 replies; 21+ messages in thread From: Patchwork @ 2020-01-15 14:37 UTC (permalink / raw) To: Chris Wilson; +Cc: intel-gfx == Series Details == Series: series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev3) URL : https://patchwork.freedesktop.org/series/72048/ State : warning == Summary == CALL scripts/checksyscalls.sh CALL scripts/atomic/check-atomics.sh CHK include/generated/compile.h Kernel: arch/x86/boot/bzImage is ready (#1) Building modules, stage 2. MODPOST 122 modules ERROR: "__udivdi3" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined! scripts/Makefile.modpost:93: recipe for target '__modpost' failed make[1]: *** [__modpost] Error 1 Makefile:1282: recipe for target 'modules' failed make: *** [modules] Error 2 == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/build_32bit.log _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 21+ messages in thread
* [Intel-gfx] ✗ Fi.CI.IGT: failure for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev3) 2020-01-15 8:33 [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine Chris Wilson ` (7 preceding siblings ...) 2020-01-15 14:37 ` [Intel-gfx] ✗ Fi.CI.BUILD: warning " Patchwork @ 2020-01-17 20:47 ` Patchwork 8 siblings, 0 replies; 21+ messages in thread From: Patchwork @ 2020-01-17 20:47 UTC (permalink / raw) To: Chris Wilson; +Cc: intel-gfx == Series Details == Series: series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev3) URL : https://patchwork.freedesktop.org/series/72048/ State : failure == Summary == CI Bug Log - changes from CI_DRM_7748_full -> Patchwork_16108_full ==================================================== Summary ------- **FAILURE** Serious unknown changes coming with Patchwork_16108_full absolutely need to be verified manually. If you think the reported changes have nothing to do with the changes introduced in Patchwork_16108_full, please notify your bug team to allow them to document this new failure mode, which will reduce false positives in CI. Possible new issues ------------------- Here are the unknown changes that may have been introduced in Patchwork_16108_full: ### IGT changes ### #### Possible regressions #### * igt@gem_exec_async@concurrent-writes-bsd1: - shard-tglb: [PASS][1] -> [FAIL][2] [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb1/igt@gem_exec_async@concurrent-writes-bsd1.html [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb4/igt@gem_exec_async@concurrent-writes-bsd1.html * igt@gem_exec_async@concurrent-writes-bsd2: - shard-tglb: NOTRUN -> [FAIL][3] [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb8/igt@gem_exec_async@concurrent-writes-bsd2.html * igt@runner@aborted: - shard-kbl: NOTRUN -> ([FAIL][4], [FAIL][5], [FAIL][6]) ([i915#841]) [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl3/igt@runner@aborted.html [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl7/igt@runner@aborted.html [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl7/igt@runner@aborted.html #### Warnings #### * igt@runner@aborted: - shard-apl: [FAIL][7] ([i915#667]) -> ([FAIL][8], [FAIL][9]) [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-apl6/igt@runner@aborted.html [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-apl8/igt@runner@aborted.html [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-apl7/igt@runner@aborted.html Known issues ------------ Here are the changes found in Patchwork_16108_full that come from known issues: ### IGT changes ### #### Issues hit #### * igt@gem_ctx_persistence@processes: - shard-glk: [PASS][10] -> [FAIL][11] ([i915#570]) [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-glk1/igt@gem_ctx_persistence@processes.html [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-glk5/igt@gem_ctx_persistence@processes.html * igt@gem_ctx_persistence@vcs0-mixed-process: - shard-glk: [PASS][12] -> [FAIL][13] ([i915#679]) [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-glk1/igt@gem_ctx_persistence@vcs0-mixed-process.html [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-glk8/igt@gem_ctx_persistence@vcs0-mixed-process.html * igt@gem_ctx_persistence@vcs1-persistence: - shard-iclb: [PASS][14] -> [SKIP][15] ([fdo#109276] / [fdo#112080]) +1 similar issue [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb4/igt@gem_ctx_persistence@vcs1-persistence.html [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb6/igt@gem_ctx_persistence@vcs1-persistence.html * igt@gem_eio@in-flight-suspend: - shard-skl: [PASS][16] -> [INCOMPLETE][17] ([i915#69]) [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-skl9/igt@gem_eio@in-flight-suspend.html [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-skl5/igt@gem_eio@in-flight-suspend.html * igt@gem_eio@kms: - shard-snb: [PASS][18] -> [INCOMPLETE][19] ([i915#82]) [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-snb2/igt@gem_eio@kms.html [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-snb5/igt@gem_eio@kms.html * igt@gem_exec_balancer@hang: - shard-kbl: [PASS][20] -> [INCOMPLETE][21] ([fdo#103665]) [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-kbl7/igt@gem_exec_balancer@hang.html [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl3/igt@gem_exec_balancer@hang.html * igt@gem_exec_create@basic: - shard-tglb: [PASS][22] -> [INCOMPLETE][23] ([fdo#111736] / [i915#472]) [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb7/igt@gem_exec_create@basic.html [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb3/igt@gem_exec_create@basic.html * igt@gem_exec_parallel@vcs1-fds: - shard-iclb: [PASS][24] -> [SKIP][25] ([fdo#112080]) +9 similar issues [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb1/igt@gem_exec_parallel@vcs1-fds.html [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb6/igt@gem_exec_parallel@vcs1-fds.html * igt@gem_exec_schedule@independent-bsd2: - shard-iclb: [PASS][26] -> [SKIP][27] ([fdo#109276]) +24 similar issues [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb2/igt@gem_exec_schedule@independent-bsd2.html [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb8/igt@gem_exec_schedule@independent-bsd2.html * igt@gem_exec_schedule@pi-distinct-iova-bsd: - shard-iclb: [PASS][28] -> [SKIP][29] ([i915#677]) [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb5/igt@gem_exec_schedule@pi-distinct-iova-bsd.html [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb2/igt@gem_exec_schedule@pi-distinct-iova-bsd.html * igt@gem_exec_schedule@preempt-other-chain-bsd: - shard-iclb: [PASS][30] -> [SKIP][31] ([fdo#112146]) +7 similar issues [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb6/igt@gem_exec_schedule@preempt-other-chain-bsd.html [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb2/igt@gem_exec_schedule@preempt-other-chain-bsd.html * igt@gem_exec_schedule@preempt-queue-render: - shard-tglb: [PASS][32] -> [INCOMPLETE][33] ([fdo#111606] / [fdo#111677] / [i915#472]) +2 similar issues [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb8/igt@gem_exec_schedule@preempt-queue-render.html [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb6/igt@gem_exec_schedule@preempt-queue-render.html * igt@gem_exec_schedule@preempt-queue-vebox: - shard-tglb: [PASS][34] -> [INCOMPLETE][35] ([fdo#111677] / [i915#472]) [34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb5/igt@gem_exec_schedule@preempt-queue-vebox.html [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb3/igt@gem_exec_schedule@preempt-queue-vebox.html * igt@gem_exec_schedule@smoketest-bsd1: - shard-tglb: [PASS][36] -> [INCOMPLETE][37] ([i915#463] / [i915#472]) [36]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb8/igt@gem_exec_schedule@smoketest-bsd1.html [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb6/igt@gem_exec_schedule@smoketest-bsd1.html * igt@gem_persistent_relocs@forked-faulting-reloc-thrashing: - shard-kbl: [PASS][38] -> [INCOMPLETE][39] ([fdo#103665] / [i915#530]) [38]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-kbl4/igt@gem_persistent_relocs@forked-faulting-reloc-thrashing.html [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl1/igt@gem_persistent_relocs@forked-faulting-reloc-thrashing.html * igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrash-inactive: - shard-tglb: [PASS][40] -> [TIMEOUT][41] ([fdo#112126] / [fdo#112271]) [40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb1/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrash-inactive.html [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb5/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrash-inactive.html * igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrashing: - shard-apl: [PASS][42] -> [TIMEOUT][43] ([fdo#112271] / [i915#530]) [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-apl8/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrashing.html [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-apl7/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrashing.html - shard-glk: [PASS][44] -> [TIMEOUT][45] ([fdo#112271] / [i915#530]) [44]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-glk4/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrashing.html [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-glk4/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrashing.html * igt@gem_persistent_relocs@forked-interruptible-thrashing: - shard-skl: [PASS][46] -> [TIMEOUT][47] ([fdo#112271] / [i915#530]) +1 similar issue [46]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-skl2/igt@gem_persistent_relocs@forked-interruptible-thrashing.html [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-skl10/igt@gem_persistent_relocs@forked-interruptible-thrashing.html - shard-glk: [PASS][48] -> [TIMEOUT][49] ([fdo#112271]) [48]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-glk4/igt@gem_persistent_relocs@forked-interruptible-thrashing.html [49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-glk4/igt@gem_persistent_relocs@forked-interruptible-thrashing.html - shard-iclb: [PASS][50] -> [FAIL][51] ([i915#520]) [50]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb4/igt@gem_persistent_relocs@forked-interruptible-thrashing.html [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb6/igt@gem_persistent_relocs@forked-interruptible-thrashing.html - shard-apl: [PASS][52] -> [INCOMPLETE][53] ([CI#80] / [fdo#103927] / [i915#530]) [52]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-apl6/igt@gem_persistent_relocs@forked-interruptible-thrashing.html [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-apl1/igt@gem_persistent_relocs@forked-interruptible-thrashing.html - shard-kbl: [PASS][54] -> [TIMEOUT][55] ([fdo#112271] / [i915#530]) [54]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-kbl7/igt@gem_persistent_relocs@forked-interruptible-thrashing.html [55]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl4/igt@gem_persistent_relocs@forked-interruptible-thrashing.html * igt@gem_persistent_relocs@forked-thrashing: - shard-hsw: [PASS][56] -> [INCOMPLETE][57] ([i915#530] / [i915#61]) +1 similar issue [56]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-hsw5/igt@gem_persistent_relocs@forked-thrashing.html [57]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-hsw7/igt@gem_persistent_relocs@forked-thrashing.html * igt@i915_selftest@live_execlists: - shard-kbl: [PASS][58] -> [DMESG-FAIL][59] ([i915#841]) [58]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-kbl2/igt@i915_selftest@live_execlists.html [59]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl7/igt@i915_selftest@live_execlists.html * igt@kms_color@pipe-a-ctm-0-5: - shard-skl: [PASS][60] -> [DMESG-WARN][61] ([i915#109]) [60]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-skl5/igt@kms_color@pipe-a-ctm-0-5.html [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-skl5/igt@kms_color@pipe-a-ctm-0-5.html * igt@kms_cursor_crc@pipe-b-cursor-128x128-sliding: - shard-skl: [PASS][62] -> [FAIL][63] ([i915#54]) [62]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-skl10/igt@kms_cursor_crc@pipe-b-cursor-128x128-sliding.html [63]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-skl10/igt@kms_cursor_crc@pipe-b-cursor-128x128-sliding.html * igt@kms_flip@flip-vs-expired-vblank: - shard-skl: [PASS][64] -> [FAIL][65] ([i915#79]) [64]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-skl7/igt@kms_flip@flip-vs-expired-vblank.html [65]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-skl9/igt@kms_flip@flip-vs-expired-vblank.html * igt@kms_frontbuffer_tracking@fbcpsr-1p-offscren-pri-shrfb-draw-blt: - shard-tglb: [PASS][66] -> [FAIL][67] ([i915#49]) [66]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb3/igt@kms_frontbuffer_tracking@fbcpsr-1p-offscren-pri-shrfb-draw-blt.html [67]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb3/igt@kms_frontbuffer_tracking@fbcpsr-1p-offscren-pri-shrfb-draw-blt.html * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a: - shard-kbl: [PASS][68] -> [DMESG-WARN][69] ([i915#180]) +3 similar issues [68]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-kbl3/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html [69]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl1/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html * igt@kms_plane@plane-position-covered-pipe-c-planes: - shard-skl: [PASS][70] -> [FAIL][71] ([i915#247]) [70]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-skl2/igt@kms_plane@plane-position-covered-pipe-c-planes.html [71]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-skl4/igt@kms_plane@plane-position-covered-pipe-c-planes.html * igt@kms_plane_alpha_blend@pipe-c-coverage-7efc: - shard-skl: [PASS][72] -> [FAIL][73] ([fdo#108145] / [i915#265]) +1 similar issue [72]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-skl2/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html [73]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-skl4/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html * igt@kms_psr@psr2_cursor_mmap_cpu: - shard-iclb: [PASS][74] -> [SKIP][75] ([fdo#109441]) +3 similar issues [74]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb2/igt@kms_psr@psr2_cursor_mmap_cpu.html [75]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb8/igt@kms_psr@psr2_cursor_mmap_cpu.html * igt@kms_setmode@basic: - shard-apl: [PASS][76] -> [FAIL][77] ([i915#31]) [76]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-apl7/igt@kms_setmode@basic.html [77]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-apl3/igt@kms_setmode@basic.html * igt@kms_vblank@pipe-c-ts-continuation-suspend: - shard-apl: [PASS][78] -> [DMESG-WARN][79] ([i915#180]) +1 similar issue [78]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-apl6/igt@kms_vblank@pipe-c-ts-continuation-suspend.html [79]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-apl1/igt@kms_vblank@pipe-c-ts-continuation-suspend.html * igt@prime_mmap_coherency@ioctl-errors: - shard-hsw: [PASS][80] -> [INCOMPLETE][81] ([i915#61]) [80]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-hsw1/igt@prime_mmap_coherency@ioctl-errors.html [81]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-hsw5/igt@prime_mmap_coherency@ioctl-errors.html #### Possible fixes #### * igt@drm_import_export@prime: - shard-hsw: [INCOMPLETE][82] ([CI#80] / [i915#61]) -> [PASS][83] [82]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-hsw2/igt@drm_import_export@prime.html [83]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-hsw5/igt@drm_import_export@prime.html * igt@gem_busy@close-race: - shard-tglb: [INCOMPLETE][84] ([i915#472] / [i915#977]) -> [PASS][85] [84]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb6/igt@gem_busy@close-race.html [85]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb5/igt@gem_busy@close-race.html * igt@gem_ctx_persistence@bcs0-mixed-process: - shard-apl: [FAIL][86] ([i915#679]) -> [PASS][87] +1 similar issue [86]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-apl4/igt@gem_ctx_persistence@bcs0-mixed-process.html [87]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-apl2/igt@gem_ctx_persistence@bcs0-mixed-process.html * igt@gem_ctx_persistence@vcs1-queued: - shard-iclb: [SKIP][88] ([fdo#109276] / [fdo#112080]) -> [PASS][89] +4 similar issues [88]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb5/igt@gem_ctx_persistence@vcs1-queued.html [89]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb2/igt@gem_ctx_persistence@vcs1-queued.html * igt@gem_eio@in-flight-contexts-1us: - shard-snb: [FAIL][90] ([i915#490]) -> [PASS][91] [90]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-snb6/igt@gem_eio@in-flight-contexts-1us.html [91]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-snb7/igt@gem_eio@in-flight-contexts-1us.html * igt@gem_eio@in-flight-external: - shard-tglb: [INCOMPLETE][92] ([i915#472] / [i915#534]) -> [PASS][93] [92]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb6/igt@gem_eio@in-flight-external.html [93]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb3/igt@gem_eio@in-flight-external.html * igt@gem_exec_parallel@basic: - shard-tglb: [INCOMPLETE][94] ([i915#472] / [i915#476]) -> [PASS][95] [94]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb3/igt@gem_exec_parallel@basic.html [95]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb4/igt@gem_exec_parallel@basic.html * igt@gem_exec_schedule@pi-common-bsd: - shard-iclb: [SKIP][96] ([i915#677]) -> [PASS][97] +2 similar issues [96]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb1/igt@gem_exec_schedule@pi-common-bsd.html [97]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb6/igt@gem_exec_schedule@pi-common-bsd.html * igt@gem_exec_schedule@preempt-queue-contexts-render: - shard-tglb: [INCOMPLETE][98] ([fdo#111606] / [fdo#111677] / [i915#472]) -> [PASS][99] [98]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb6/igt@gem_exec_schedule@preempt-queue-contexts-render.html [99]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb5/igt@gem_exec_schedule@preempt-queue-contexts-render.html * igt@gem_exec_schedule@preemptive-hang-bsd: - shard-iclb: [SKIP][100] ([fdo#112146]) -> [PASS][101] +7 similar issues [100]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb2/igt@gem_exec_schedule@preemptive-hang-bsd.html [101]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb8/igt@gem_exec_schedule@preemptive-hang-bsd.html * igt@gem_exec_suspend@basic-s0: - shard-tglb: [INCOMPLETE][102] ([i915#456] / [i915#472]) -> [PASS][103] [102]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb3/igt@gem_exec_suspend@basic-s0.html [103]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb4/igt@gem_exec_suspend@basic-s0.html * igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrash-inactive: - shard-apl: [TIMEOUT][104] ([fdo#112271]) -> [PASS][105] [104]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-apl6/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrash-inactive.html [105]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-apl6/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrash-inactive.html * igt@gem_persistent_relocs@forked-interruptible-thrash-inactive: - shard-kbl: [TIMEOUT][106] ([fdo#112271] / [i915#530]) -> [PASS][107] +1 similar issue [106]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-kbl1/igt@gem_persistent_relocs@forked-interruptible-thrash-inactive.html [107]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl4/igt@gem_persistent_relocs@forked-interruptible-thrash-inactive.html * igt@gem_persistent_relocs@forked-thrashing: - shard-kbl: [INCOMPLETE][108] ([fdo#103665] / [i915#530]) -> [PASS][109] [108]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-kbl3/igt@gem_persistent_relocs@forked-thrashing.html [109]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl7/igt@gem_persistent_relocs@forked-thrashing.html * igt@gem_pipe_control_store_loop@reused-buffer: - shard-tglb: [INCOMPLETE][110] ([i915#472] / [i915#707] / [i915#796]) -> [PASS][111] [110]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb6/igt@gem_pipe_control_store_loop@reused-buffer.html [111]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb7/igt@gem_pipe_control_store_loop@reused-buffer.html * igt@gem_ppgtt@flink-and-close-vma-leak: - shard-kbl: [FAIL][112] ([i915#644]) -> [PASS][113] [112]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-kbl3/igt@gem_ppgtt@flink-and-close-vma-leak.html [113]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl7/igt@gem_ppgtt@flink-and-close-vma-leak.html * igt@gem_softpin@noreloc-s3: - shard-apl: [DMESG-WARN][114] ([i915#180]) -> [PASS][115] +1 similar issue [114]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-apl4/igt@gem_softpin@noreloc-s3.html [115]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-apl2/igt@gem_softpin@noreloc-s3.html * igt@gem_sync@basic-store-each: - shard-tglb: [INCOMPLETE][116] ([i915#472]) -> [PASS][117] +1 similar issue [116]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-tglb6/igt@gem_sync@basic-store-each.html [117]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-tglb1/igt@gem_sync@basic-store-each.html * igt@i915_pm_rps@reset: - shard-iclb: [FAIL][118] ([i915#413]) -> [PASS][119] [118]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb4/igt@i915_pm_rps@reset.html [119]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb1/igt@i915_pm_rps@reset.html * igt@kms_color@pipe-b-ctm-0-75: - shard-skl: [DMESG-WARN][120] ([i915#109]) -> [PASS][121] [120]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-skl3/igt@kms_color@pipe-b-ctm-0-75.html [121]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-skl10/igt@kms_color@pipe-b-ctm-0-75.html * igt@kms_draw_crc@draw-method-rgb565-render-untiled: - shard-apl: [DMESG-WARN][122] -> [PASS][123] [122]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-apl1/igt@kms_draw_crc@draw-method-rgb565-render-untiled.html [123]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-apl7/igt@kms_draw_crc@draw-method-rgb565-render-untiled.html * igt@kms_flip@flip-vs-expired-vblank-interruptible: - shard-skl: [FAIL][124] ([i915#79]) -> [PASS][125] [124]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-skl4/igt@kms_flip@flip-vs-expired-vblank-interruptible.html [125]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-skl1/igt@kms_flip@flip-vs-expired-vblank-interruptible.html * igt@kms_flip@flip-vs-suspend-interruptible: - shard-glk: [INCOMPLETE][126] ([i915#58] / [k.org#198133]) -> [PASS][127] [126]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-glk8/igt@kms_flip@flip-vs-suspend-interruptible.html [127]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-glk1/igt@kms_flip@flip-vs-suspend-interruptible.html * igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min: - shard-skl: [FAIL][128] ([fdo#108145]) -> [PASS][129] [128]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-skl5/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min.html [129]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-skl5/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min.html * igt@kms_psr2_su@frontbuffer: - shard-iclb: [SKIP][130] ([fdo#109642] / [fdo#111068]) -> [PASS][131] [130]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb5/igt@kms_psr2_su@frontbuffer.html [131]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb2/igt@kms_psr2_su@frontbuffer.html * igt@kms_psr@psr2_sprite_plane_move: - shard-iclb: [SKIP][132] ([fdo#109441]) -> [PASS][133] +3 similar issues [132]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb5/igt@kms_psr@psr2_sprite_plane_move.html [133]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb2/igt@kms_psr@psr2_sprite_plane_move.html * igt@kms_vblank@pipe-a-ts-continuation-suspend: - shard-kbl: [DMESG-WARN][134] ([i915#180]) -> [PASS][135] +6 similar issues [134]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-kbl4/igt@kms_vblank@pipe-a-ts-continuation-suspend.html [135]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-kbl3/igt@kms_vblank@pipe-a-ts-continuation-suspend.html * igt@perf_pmu@busy-no-semaphores-vcs1: - shard-iclb: [SKIP][136] ([fdo#112080]) -> [PASS][137] +13 similar issues [136]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb6/igt@perf_pmu@busy-no-semaphores-vcs1.html [137]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb1/igt@perf_pmu@busy-no-semaphores-vcs1.html * igt@prime_vgem@fence-wait-bsd2: - shard-iclb: [SKIP][138] ([fdo#109276]) -> [PASS][139] +21 similar issues [138]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb6/igt@prime_vgem@fence-wait-bsd2.html [139]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb1/igt@prime_vgem@fence-wait-bsd2.html #### Warnings #### * igt@kms_dp_dsc@basic-dsc-enable-edp: - shard-iclb: [SKIP][140] ([fdo#109349]) -> [DMESG-WARN][141] ([fdo#107724]) [140]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7748/shard-iclb5/igt@kms_dp_dsc@basic-dsc-enable-edp.html [141]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/shard-iclb2/igt@kms_dp_dsc@basic-dsc-enable-edp.html [CI#80]: https://gitlab.freedesktop.org/gfx-ci/i915-infra/issues/80 [fdo#103665]: https://bugs.freedesktop.org/show_bug.cgi?id=103665 [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927 [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724 [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145 [fdo#109276]: https://bugs.freedesktop.org/show_bug.cgi?id=109276 [fdo#109349]: https://bugs.freedesktop.org/show_bug.cgi?id=109349 [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441 [fdo#109642]: https://bugs.freedesktop.org/show_bug.cgi?id=109642 [fdo#111068]: https://bugs.freedesktop.org/show_bug.cgi?id=111068 [fdo#111606]: https://bugs.freedesktop.org/show_bug.cgi?id=111606 [fdo#111677]: https://bugs.freedesktop.org/show_bug.cgi?id=111677 [fdo#111736]: https://bugs.freedesktop.org/show_bug.cgi?id=111736 [fdo#112080]: https://bugs.freedesktop.org/show_bug.cgi?id=112080 [fdo#112126]: https://bugs.freedesktop.org/show_bug.cgi?id=112126 [fdo#112146]: https://bugs.freedesktop.org/show_bug.cgi?id=112146 [fdo#112271]: https://bugs.freedesktop.org/show_bug.cgi?id=112271 [i915#109]: https://gitlab.freedesktop.org/drm/intel/issues/109 [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180 [i915#247]: https://gitlab.freedesktop.org/drm/intel/issues/247 [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265 [i915#31]: https://gitlab.freedesktop.org/drm/intel/issues/31 [i915#413]: https://gitlab.freedesktop.org/drm/intel/issues/413 [i915#456]: https://gitlab.freedesktop.org/drm/intel/issues/456 [i915#463]: https://gitlab.freedesktop.org/drm/intel/issues/463 [i915#472]: https://gitlab.freedesktop.org/drm/intel/issues/472 [i915#476]: https://gitlab.freedesktop.org/drm/intel/issues/476 [i915#49]: https://gitlab.freedesktop.org/drm/intel/issues/49 [i915#490]: https://gitlab.freedesktop.org/drm/intel/issues/490 [i915#520]: https://gitlab.freedesktop.org/drm/intel/issues/520 [i915#530]: https://gitlab.freedesktop.org/drm/intel/issues/530 [i915#534]: https://gitlab.freedesktop.org/drm/intel/issues/534 [i915#54]: https://gitlab.freedesktop.org/drm/intel/issues/54 [i915#570]: https://gitlab.freedesktop.org/drm/intel/issues/570 [i915#58]: https://gitlab.freedesktop.org/drm/intel/issues/58 [i915#61]: https://gitlab.freedesktop.org/drm/intel/issues/61 [i915#644]: https://gitlab.freedesktop.org/drm/intel/issues/644 [i915#667]: https://gitlab.freedesktop.org/drm/intel/issues/667 [i915#677]: https://gitlab.freedesktop.org/drm/intel/issues/677 [i915#679]: https://gitlab.freedesktop.org/drm/intel/issues/679 [i915#69]: https://gitlab.freedesktop.org/drm/intel/issues/69 [i915#707]: https://gitlab.freedesktop.org/drm/intel/issues/707 [i915#79]: https://gitlab.freedesktop.org/drm/intel/issues/79 [i915#796]: https://gitlab.freedesktop.org/drm/intel/issues/796 [i915#82]: https://gitlab.freedesktop.org/drm/intel/issues/82 [i915#841]: https://gitlab.freedesktop.org/drm/intel/issues/841 [i915#977]: https://gitlab.freedesktop.org/drm/intel/issues/977 [k.org#198133]: https://bugzilla.kernel.org/show_bug.cgi?id=198133 Participating hosts (10 -> 10) ------------------------------ No changes in participating hosts Build changes ------------- * CI: CI-20190529 -> None * Linux == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16108/index.html _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 21+ messages in thread
end of thread, other threads:[~2020-01-17 20:47 UTC | newest] Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2020-01-15 8:33 [Intel-gfx] [PATCH 1/3] drm/i915: Use common priotree lists for virtual engine Chris Wilson 2020-01-15 8:33 ` [Intel-gfx] [PATCH 2/3] drm/i915/gt: Allow temporary suspension of inflight requests Chris Wilson 2020-01-15 10:58 ` Tvrtko Ursulin 2020-01-15 11:01 ` Chris Wilson 2020-01-15 11:10 ` [Intel-gfx] [PATCH v3] " Chris Wilson 2020-01-15 11:37 ` Tvrtko Ursulin 2020-01-15 11:46 ` Chris Wilson 2020-01-16 17:12 ` Tvrtko Ursulin 2020-01-15 8:33 ` [Intel-gfx] [PATCH 3/3] drm/i915/execlists: Offline error capture Chris Wilson 2020-01-16 17:22 ` Tvrtko Ursulin 2020-01-16 17:48 ` Chris Wilson 2020-01-16 18:14 ` Tvrtko Ursulin 2020-01-16 18:32 ` Chris Wilson 2020-01-15 9:02 ` [Intel-gfx] [PATCH v2] drm/i915: Keep track of request among the scheduling lists Chris Wilson 2020-01-16 17:23 ` Tvrtko Ursulin 2020-01-15 9:44 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev2) Patchwork 2020-01-15 10:06 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork 2020-01-15 10:06 ` [Intel-gfx] ✗ Fi.CI.BUILD: warning " Patchwork 2020-01-15 14:37 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [v2] drm/i915: Keep track of request among the scheduling lists (rev3) Patchwork 2020-01-15 14:37 ` [Intel-gfx] ✗ Fi.CI.BUILD: warning " Patchwork 2020-01-17 20:47 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.