* [PATCH] drm/i915: Be more gentle when exiting non-persistent contexts @ 2021-08-05 12:05 ` Tvrtko Ursulin 0 siblings, 0 replies; 7+ messages in thread From: Tvrtko Ursulin @ 2021-08-05 12:05 UTC (permalink / raw) To: Intel-gfx; +Cc: dri-devel, Tvrtko Ursulin, Chris Wilson, Zhen Han From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> When a non-persistent context exits we currently mark it as banned in order to trigger fast termination of any outstanding GPU jobs it may have left running. In doing so we apply a very strict 1ms limit in which the left over job has to preempt before we issues an engine resets. Some workloads are not able to cleanly preempt in that time window and it can be argued that it would instead be better to give them a bit more grace since avoiding engine resets is generally preferrable. To achieve this the patch splits handling of banned contexts from simply closed non-persistent ones and then applies different timeouts for both and also extends the criteria which determines if a request should be scheduled back in after preemption or not. 15ms preempt timeout grace is given to exited non-persistent contexts which have been empirically tested to satisfy customers requirements and still provides reasonably quick cleanup post exit. v2: * Streamline fast path checks. v3: * Simplify by using only schedulable status. * Increase timeout to 20ms. v4: * Fix live_execlists selftest. v5: * Fix logic in kill_engines. v6: * Rebase. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Zhen Han <zhen.han@intel.com> --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 22 +++++++++++++------ drivers/gpu/drm/i915/gt/intel_context.c | 2 ++ drivers/gpu/drm/i915/gt/intel_context.h | 17 +++++++++++++- drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + .../drm/i915/gt/intel_execlists_submission.c | 11 ++++++++-- drivers/gpu/drm/i915/gt/selftest_execlists.c | 20 +++++++++++------ drivers/gpu/drm/i915/i915_request.c | 2 +- 7 files changed, 57 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index cff72679ad7c..21fe5d4057ab 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1065,7 +1065,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce) return engine; } -static void kill_engines(struct i915_gem_engines *engines, bool ban) +static void +kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent) { struct i915_gem_engines_iter it; struct intel_context *ce; @@ -1079,8 +1080,15 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) */ for_each_gem_engine(ce, engines, it) { struct intel_engine_cs *engine; + bool skip = false; + + if (ban) + skip = intel_context_ban(ce, NULL); + else if (!persistent) + skip = !intel_context_clear_schedulable(ce); - if (ban && intel_context_ban(ce, NULL)) + /* Already previously banned or made non-schedulable? */ + if (skip) continue; /* @@ -1093,7 +1101,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) engine = active_engine(ce); /* First attempt to gracefully cancel the context */ - if (engine && !__cancel_engine(engine) && ban) + if (engine && !__cancel_engine(engine) && (ban || !persistent)) /* * If we are unable to send a preemptive pulse to bump * the context from the GPU, we have to resort to a full @@ -1105,8 +1113,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) static void kill_context(struct i915_gem_context *ctx) { - bool ban = (!i915_gem_context_is_persistent(ctx) || - !ctx->i915->params.enable_hangcheck); struct i915_gem_engines *pos, *next; spin_lock_irq(&ctx->stale.lock); @@ -1119,7 +1125,8 @@ static void kill_context(struct i915_gem_context *ctx) spin_unlock_irq(&ctx->stale.lock); - kill_engines(pos, ban); + kill_engines(pos, !ctx->i915->params.enable_hangcheck, + i915_gem_context_is_persistent(ctx)); spin_lock_irq(&ctx->stale.lock); GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence)); @@ -1165,7 +1172,8 @@ static void engines_idle_release(struct i915_gem_context *ctx, kill: if (list_empty(&engines->link)) /* raced, already closed */ - kill_engines(engines, true); + kill_engines(engines, true, + i915_gem_context_is_persistent(ctx)); i915_sw_fence_commit(&engines->fence); } diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 745e84c72c90..bc1701ef1578 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -382,6 +382,8 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) ce->ring = NULL; ce->ring_size = SZ_4K; + __set_bit(CONTEXT_SCHEDULABLE, &ce->flags); + ewma_runtime_init(&ce->runtime.avg); ce->vm = i915_vm_get(engine->gt->vm); diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index c41098950746..5b50716654dd 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -251,7 +251,22 @@ static inline bool intel_context_is_banned(const struct intel_context *ce) static inline bool intel_context_set_banned(struct intel_context *ce) { - return test_and_set_bit(CONTEXT_BANNED, &ce->flags); + bool banned = test_and_set_bit(CONTEXT_BANNED, &ce->flags); + + if (!banned) + clear_bit(CONTEXT_SCHEDULABLE, &ce->flags); + + return banned; +} + +static inline bool intel_context_clear_schedulable(struct intel_context *ce) +{ + return test_and_clear_bit(CONTEXT_SCHEDULABLE, &ce->flags); +} + +static inline bool intel_context_is_schedulable(const struct intel_context *ce) +{ + return test_bit(CONTEXT_SCHEDULABLE, &ce->flags); } static inline bool intel_context_ban(struct intel_context *ce, diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index e54351a170e2..3306c70c9c54 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -112,6 +112,7 @@ struct intel_context { #define CONTEXT_FORCE_SINGLE_SUBMISSION 7 #define CONTEXT_NOPREEMPT 8 #define CONTEXT_LRCA_DIRTY 9 +#define CONTEXT_SCHEDULABLE 10 /* Unless banned or non-persistent closed. */ struct { u64 timeout_us; diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index de5f9c86b9a4..778f3cda3c71 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -478,7 +478,7 @@ __execlists_schedule_in(struct i915_request *rq) !intel_engine_has_heartbeat(engine))) intel_context_set_banned(ce); - if (unlikely(intel_context_is_banned(ce) || bad_request(rq))) + if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq))) reset_active(rq, engine); if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) @@ -1222,12 +1222,19 @@ static void record_preemption(struct intel_engine_execlists *execlists) static unsigned long active_preempt_timeout(struct intel_engine_cs *engine, const struct i915_request *rq) { + struct intel_context *ce; + if (!rq) return 0; + ce = rq->context; + /* Force a fast reset for terminated contexts (ignoring sysfs!) */ - if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq))) + if (unlikely(intel_context_is_banned(ce) || bad_request(rq))) return 1; + /* Longer grace for closed non-persistent contexts to avoid resets. */ + else if (unlikely(!intel_context_is_schedulable(ce))) + return 20; return READ_ONCE(engine->props.preempt_timeout_ms); } diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index f12ffe797639..da36c015caf4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -2050,6 +2050,12 @@ struct live_preempt_cancel { struct preempt_client a, b; }; +static void context_clear_banned(struct intel_context *ce) +{ + clear_bit(CONTEXT_BANNED, &ce->flags); + set_bit(CONTEXT_SCHEDULABLE, &ce->flags); +} + static int __cancel_active0(struct live_preempt_cancel *arg) { struct i915_request *rq; @@ -2068,7 +2074,7 @@ static int __cancel_active0(struct live_preempt_cancel *arg) if (IS_ERR(rq)) return PTR_ERR(rq); - clear_bit(CONTEXT_BANNED, &rq->context->flags); + context_clear_banned(rq->context); i915_request_get(rq); i915_request_add(rq); if (!igt_wait_for_spinner(&arg->a.spin, rq)) { @@ -2112,7 +2118,7 @@ static int __cancel_active1(struct live_preempt_cancel *arg) if (IS_ERR(rq[0])) return PTR_ERR(rq[0]); - clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); + context_clear_banned(rq[0]->context); i915_request_get(rq[0]); i915_request_add(rq[0]); if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { @@ -2128,7 +2134,7 @@ static int __cancel_active1(struct live_preempt_cancel *arg) goto out; } - clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); + context_clear_banned(rq[1]->context); i915_request_get(rq[1]); err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); i915_request_add(rq[1]); @@ -2183,7 +2189,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg) if (IS_ERR(rq[0])) return PTR_ERR(rq[0]); - clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); + context_clear_banned(rq[0]->context); i915_request_get(rq[0]); i915_request_add(rq[0]); if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { @@ -2197,7 +2203,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg) goto out; } - clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); + context_clear_banned(rq[1]->context); i915_request_get(rq[1]); err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); i915_request_add(rq[1]); @@ -2273,7 +2279,7 @@ static int __cancel_hostile(struct live_preempt_cancel *arg) if (IS_ERR(rq)) return PTR_ERR(rq); - clear_bit(CONTEXT_BANNED, &rq->context->flags); + context_clear_banned(rq->context); i915_request_get(rq); i915_request_add(rq); if (!igt_wait_for_spinner(&arg->a.spin, rq)) { @@ -2329,7 +2335,7 @@ static int __cancel_fail(struct live_preempt_cancel *arg) if (IS_ERR(rq)) return PTR_ERR(rq); - clear_bit(CONTEXT_BANNED, &rq->context->flags); + context_clear_banned(rq->context); i915_request_get(rq); i915_request_add(rq); if (!igt_wait_for_spinner(&arg->a.spin, rq)) { diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index ce446716d092..b1a9bec83339 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -583,7 +583,7 @@ bool __i915_request_submit(struct i915_request *request) goto active; } - if (unlikely(intel_context_is_banned(request->context))) + if (unlikely(!intel_context_is_schedulable(request->context))) i915_request_set_error_once(request, -EIO); if (unlikely(fatal_error(request->fence.error))) -- 2.30.2 ^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Intel-gfx] [PATCH] drm/i915: Be more gentle when exiting non-persistent contexts @ 2021-08-05 12:05 ` Tvrtko Ursulin 0 siblings, 0 replies; 7+ messages in thread From: Tvrtko Ursulin @ 2021-08-05 12:05 UTC (permalink / raw) To: Intel-gfx; +Cc: dri-devel, Tvrtko Ursulin, Chris Wilson, Zhen Han From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> When a non-persistent context exits we currently mark it as banned in order to trigger fast termination of any outstanding GPU jobs it may have left running. In doing so we apply a very strict 1ms limit in which the left over job has to preempt before we issues an engine resets. Some workloads are not able to cleanly preempt in that time window and it can be argued that it would instead be better to give them a bit more grace since avoiding engine resets is generally preferrable. To achieve this the patch splits handling of banned contexts from simply closed non-persistent ones and then applies different timeouts for both and also extends the criteria which determines if a request should be scheduled back in after preemption or not. 15ms preempt timeout grace is given to exited non-persistent contexts which have been empirically tested to satisfy customers requirements and still provides reasonably quick cleanup post exit. v2: * Streamline fast path checks. v3: * Simplify by using only schedulable status. * Increase timeout to 20ms. v4: * Fix live_execlists selftest. v5: * Fix logic in kill_engines. v6: * Rebase. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Zhen Han <zhen.han@intel.com> --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 22 +++++++++++++------ drivers/gpu/drm/i915/gt/intel_context.c | 2 ++ drivers/gpu/drm/i915/gt/intel_context.h | 17 +++++++++++++- drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + .../drm/i915/gt/intel_execlists_submission.c | 11 ++++++++-- drivers/gpu/drm/i915/gt/selftest_execlists.c | 20 +++++++++++------ drivers/gpu/drm/i915/i915_request.c | 2 +- 7 files changed, 57 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index cff72679ad7c..21fe5d4057ab 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1065,7 +1065,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce) return engine; } -static void kill_engines(struct i915_gem_engines *engines, bool ban) +static void +kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent) { struct i915_gem_engines_iter it; struct intel_context *ce; @@ -1079,8 +1080,15 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) */ for_each_gem_engine(ce, engines, it) { struct intel_engine_cs *engine; + bool skip = false; + + if (ban) + skip = intel_context_ban(ce, NULL); + else if (!persistent) + skip = !intel_context_clear_schedulable(ce); - if (ban && intel_context_ban(ce, NULL)) + /* Already previously banned or made non-schedulable? */ + if (skip) continue; /* @@ -1093,7 +1101,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) engine = active_engine(ce); /* First attempt to gracefully cancel the context */ - if (engine && !__cancel_engine(engine) && ban) + if (engine && !__cancel_engine(engine) && (ban || !persistent)) /* * If we are unable to send a preemptive pulse to bump * the context from the GPU, we have to resort to a full @@ -1105,8 +1113,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) static void kill_context(struct i915_gem_context *ctx) { - bool ban = (!i915_gem_context_is_persistent(ctx) || - !ctx->i915->params.enable_hangcheck); struct i915_gem_engines *pos, *next; spin_lock_irq(&ctx->stale.lock); @@ -1119,7 +1125,8 @@ static void kill_context(struct i915_gem_context *ctx) spin_unlock_irq(&ctx->stale.lock); - kill_engines(pos, ban); + kill_engines(pos, !ctx->i915->params.enable_hangcheck, + i915_gem_context_is_persistent(ctx)); spin_lock_irq(&ctx->stale.lock); GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence)); @@ -1165,7 +1172,8 @@ static void engines_idle_release(struct i915_gem_context *ctx, kill: if (list_empty(&engines->link)) /* raced, already closed */ - kill_engines(engines, true); + kill_engines(engines, true, + i915_gem_context_is_persistent(ctx)); i915_sw_fence_commit(&engines->fence); } diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 745e84c72c90..bc1701ef1578 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -382,6 +382,8 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) ce->ring = NULL; ce->ring_size = SZ_4K; + __set_bit(CONTEXT_SCHEDULABLE, &ce->flags); + ewma_runtime_init(&ce->runtime.avg); ce->vm = i915_vm_get(engine->gt->vm); diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index c41098950746..5b50716654dd 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -251,7 +251,22 @@ static inline bool intel_context_is_banned(const struct intel_context *ce) static inline bool intel_context_set_banned(struct intel_context *ce) { - return test_and_set_bit(CONTEXT_BANNED, &ce->flags); + bool banned = test_and_set_bit(CONTEXT_BANNED, &ce->flags); + + if (!banned) + clear_bit(CONTEXT_SCHEDULABLE, &ce->flags); + + return banned; +} + +static inline bool intel_context_clear_schedulable(struct intel_context *ce) +{ + return test_and_clear_bit(CONTEXT_SCHEDULABLE, &ce->flags); +} + +static inline bool intel_context_is_schedulable(const struct intel_context *ce) +{ + return test_bit(CONTEXT_SCHEDULABLE, &ce->flags); } static inline bool intel_context_ban(struct intel_context *ce, diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index e54351a170e2..3306c70c9c54 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -112,6 +112,7 @@ struct intel_context { #define CONTEXT_FORCE_SINGLE_SUBMISSION 7 #define CONTEXT_NOPREEMPT 8 #define CONTEXT_LRCA_DIRTY 9 +#define CONTEXT_SCHEDULABLE 10 /* Unless banned or non-persistent closed. */ struct { u64 timeout_us; diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index de5f9c86b9a4..778f3cda3c71 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -478,7 +478,7 @@ __execlists_schedule_in(struct i915_request *rq) !intel_engine_has_heartbeat(engine))) intel_context_set_banned(ce); - if (unlikely(intel_context_is_banned(ce) || bad_request(rq))) + if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq))) reset_active(rq, engine); if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) @@ -1222,12 +1222,19 @@ static void record_preemption(struct intel_engine_execlists *execlists) static unsigned long active_preempt_timeout(struct intel_engine_cs *engine, const struct i915_request *rq) { + struct intel_context *ce; + if (!rq) return 0; + ce = rq->context; + /* Force a fast reset for terminated contexts (ignoring sysfs!) */ - if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq))) + if (unlikely(intel_context_is_banned(ce) || bad_request(rq))) return 1; + /* Longer grace for closed non-persistent contexts to avoid resets. */ + else if (unlikely(!intel_context_is_schedulable(ce))) + return 20; return READ_ONCE(engine->props.preempt_timeout_ms); } diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index f12ffe797639..da36c015caf4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -2050,6 +2050,12 @@ struct live_preempt_cancel { struct preempt_client a, b; }; +static void context_clear_banned(struct intel_context *ce) +{ + clear_bit(CONTEXT_BANNED, &ce->flags); + set_bit(CONTEXT_SCHEDULABLE, &ce->flags); +} + static int __cancel_active0(struct live_preempt_cancel *arg) { struct i915_request *rq; @@ -2068,7 +2074,7 @@ static int __cancel_active0(struct live_preempt_cancel *arg) if (IS_ERR(rq)) return PTR_ERR(rq); - clear_bit(CONTEXT_BANNED, &rq->context->flags); + context_clear_banned(rq->context); i915_request_get(rq); i915_request_add(rq); if (!igt_wait_for_spinner(&arg->a.spin, rq)) { @@ -2112,7 +2118,7 @@ static int __cancel_active1(struct live_preempt_cancel *arg) if (IS_ERR(rq[0])) return PTR_ERR(rq[0]); - clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); + context_clear_banned(rq[0]->context); i915_request_get(rq[0]); i915_request_add(rq[0]); if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { @@ -2128,7 +2134,7 @@ static int __cancel_active1(struct live_preempt_cancel *arg) goto out; } - clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); + context_clear_banned(rq[1]->context); i915_request_get(rq[1]); err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); i915_request_add(rq[1]); @@ -2183,7 +2189,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg) if (IS_ERR(rq[0])) return PTR_ERR(rq[0]); - clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); + context_clear_banned(rq[0]->context); i915_request_get(rq[0]); i915_request_add(rq[0]); if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { @@ -2197,7 +2203,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg) goto out; } - clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); + context_clear_banned(rq[1]->context); i915_request_get(rq[1]); err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); i915_request_add(rq[1]); @@ -2273,7 +2279,7 @@ static int __cancel_hostile(struct live_preempt_cancel *arg) if (IS_ERR(rq)) return PTR_ERR(rq); - clear_bit(CONTEXT_BANNED, &rq->context->flags); + context_clear_banned(rq->context); i915_request_get(rq); i915_request_add(rq); if (!igt_wait_for_spinner(&arg->a.spin, rq)) { @@ -2329,7 +2335,7 @@ static int __cancel_fail(struct live_preempt_cancel *arg) if (IS_ERR(rq)) return PTR_ERR(rq); - clear_bit(CONTEXT_BANNED, &rq->context->flags); + context_clear_banned(rq->context); i915_request_get(rq); i915_request_add(rq); if (!igt_wait_for_spinner(&arg->a.spin, rq)) { diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index ce446716d092..b1a9bec83339 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -583,7 +583,7 @@ bool __i915_request_submit(struct i915_request *request) goto active; } - if (unlikely(intel_context_is_banned(request->context))) + if (unlikely(!intel_context_is_schedulable(request->context))) i915_request_set_error_once(request, -EIO); if (unlikely(fatal_error(request->fence.error))) -- 2.30.2 ^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Be more gentle when exiting non-persistent contexts 2021-08-05 12:05 ` [Intel-gfx] " Tvrtko Ursulin (?) @ 2021-08-05 15:04 ` Patchwork 2021-08-05 16:10 ` Tvrtko Ursulin -1 siblings, 1 reply; 7+ messages in thread From: Patchwork @ 2021-08-05 15:04 UTC (permalink / raw) To: Tvrtko Ursulin; +Cc: intel-gfx [-- Attachment #1: Type: text/plain, Size: 5650 bytes --] == Series Details == Series: drm/i915: Be more gentle when exiting non-persistent contexts URL : https://patchwork.freedesktop.org/series/93420/ State : failure == Summary == CI Bug Log - changes from CI_DRM_10450 -> Patchwork_20775 ==================================================== Summary ------- **FAILURE** Serious unknown changes coming with Patchwork_20775 absolutely need to be verified manually. If you think the reported changes have nothing to do with the changes introduced in Patchwork_20775, please notify your bug team to allow them to document this new failure mode, which will reduce false positives in CI. External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/index.html Possible new issues ------------------- Here are the unknown changes that may have been introduced in Patchwork_20775: ### IGT changes ### #### Possible regressions #### * igt@i915_selftest@live@gt_lrc: - fi-rkl-guc: [PASS][1] -> [DMESG-WARN][2] [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10450/fi-rkl-guc/igt@i915_selftest@live@gt_lrc.html [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-rkl-guc/igt@i915_selftest@live@gt_lrc.html Known issues ------------ Here are the changes found in Patchwork_20775 that come from known issues: ### IGT changes ### #### Issues hit #### * igt@amdgpu/amd_basic@query-info: - fi-bsw-kefka: NOTRUN -> [SKIP][3] ([fdo#109271]) +17 similar issues [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-bsw-kefka/igt@amdgpu/amd_basic@query-info.html * igt@gem_exec_fence@basic-busy@bcs0: - fi-kbl-soraka: NOTRUN -> [SKIP][4] ([fdo#109271]) +26 similar issues [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-kbl-soraka/igt@gem_exec_fence@basic-busy@bcs0.html * igt@gem_huc_copy@huc-copy: - fi-kbl-soraka: NOTRUN -> [SKIP][5] ([fdo#109271] / [i915#2190]) [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-kbl-soraka/igt@gem_huc_copy@huc-copy.html * igt@i915_pm_rpm@basic-rte: - fi-kbl-soraka: NOTRUN -> [FAIL][6] ([i915#579]) [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-kbl-soraka/igt@i915_pm_rpm@basic-rte.html * igt@i915_selftest@live@gt_pm: - fi-kbl-soraka: NOTRUN -> [DMESG-FAIL][7] ([i915#1886] / [i915#2291]) [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-kbl-soraka/igt@i915_selftest@live@gt_pm.html * igt@i915_selftest@live@late_gt_pm: - fi-bsw-nick: [PASS][8] -> [DMESG-FAIL][9] ([i915#2927]) [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10450/fi-bsw-nick/igt@i915_selftest@live@late_gt_pm.html [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-bsw-nick/igt@i915_selftest@live@late_gt_pm.html * igt@kms_chamelium@common-hpd-after-suspend: - fi-kbl-soraka: NOTRUN -> [SKIP][10] ([fdo#109271] / [fdo#111827]) +8 similar issues [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-kbl-soraka/igt@kms_chamelium@common-hpd-after-suspend.html * igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d: - fi-kbl-soraka: NOTRUN -> [SKIP][11] ([fdo#109271] / [i915#533]) [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-kbl-soraka/igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d.html * igt@runner@aborted: - fi-bsw-nick: NOTRUN -> [FAIL][12] ([fdo#109271] / [i915#1436]) [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-bsw-nick/igt@runner@aborted.html #### Possible fixes #### * igt@i915_selftest@live@execlists: - fi-bsw-kefka: [INCOMPLETE][13] ([i915#2940]) -> [PASS][14] [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10450/fi-bsw-kefka/igt@i915_selftest@live@execlists.html [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-bsw-kefka/igt@i915_selftest@live@execlists.html {name}: This element is suppressed. This means it is ignored when computing the status of the difference (SUCCESS, WARNING, or FAILURE). [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271 [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827 [i915#1436]: https://gitlab.freedesktop.org/drm/intel/issues/1436 [i915#1886]: https://gitlab.freedesktop.org/drm/intel/issues/1886 [i915#2190]: https://gitlab.freedesktop.org/drm/intel/issues/2190 [i915#2291]: https://gitlab.freedesktop.org/drm/intel/issues/2291 [i915#2927]: https://gitlab.freedesktop.org/drm/intel/issues/2927 [i915#2940]: https://gitlab.freedesktop.org/drm/intel/issues/2940 [i915#3303]: https://gitlab.freedesktop.org/drm/intel/issues/3303 [i915#533]: https://gitlab.freedesktop.org/drm/intel/issues/533 [i915#579]: https://gitlab.freedesktop.org/drm/intel/issues/579 Participating hosts (40 -> 35) ------------------------------ Additional (1): fi-kbl-soraka Missing (6): fi-ilk-m540 fi-hsw-4200u fi-bsw-cyan fi-ctg-p8600 fi-bdw-samus bat-jsl-1 Build changes ------------- * Linux: CI_DRM_10450 -> Patchwork_20775 CI-20190529: 20190529 CI_DRM_10450: 51d9c8293e8446e921b74d996982ade862fcfa5c @ git://anongit.freedesktop.org/gfx-ci/linux IGT_6160: 4287344dd6a39d9036c5fb9a047a7d8f10bee981 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git Patchwork_20775: 62efd2820c620b384c9688fa71ad6e8a5f6d27fc @ git://anongit.freedesktop.org/gfx-ci/linux == Linux commits == 62efd2820c62 drm/i915: Be more gentle when exiting non-persistent contexts == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/index.html [-- Attachment #2: Type: text/html, Size: 6800 bytes --] ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Be more gentle when exiting non-persistent contexts 2021-08-05 15:04 ` [Intel-gfx] ✗ Fi.CI.BAT: failure for " Patchwork @ 2021-08-05 16:10 ` Tvrtko Ursulin 2021-08-05 18:24 ` Matthew Brost 0 siblings, 1 reply; 7+ messages in thread From: Tvrtko Ursulin @ 2021-08-05 16:10 UTC (permalink / raw) To: intel-gfx On 05/08/2021 16:04, Patchwork wrote: > *Patch Details* > *Series:* drm/i915: Be more gentle when exiting non-persistent contexts > *URL:* https://patchwork.freedesktop.org/series/93420/ > <https://patchwork.freedesktop.org/series/93420/> > *State:* failure > *Details:* > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/index.html > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/index.html> > > > CI Bug Log - changes from CI_DRM_10450 -> Patchwork_20775 > > > Summary > > *FAILURE* > > Serious unknown changes coming with Patchwork_20775 absolutely need to be > verified manually. > > If you think the reported changes have nothing to do with the changes > introduced in Patchwork_20775, please notify your bug team to allow them > to document this new failure mode, which will reduce false positives in CI. > > External URL: > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/index.html > > > Possible new issues > > Here are the unknown changes that may have been introduced in > Patchwork_20775: > > > IGT changes > > > Possible regressions > > * igt@i915_selftest@live@gt_lrc: > o fi-rkl-guc: PASS > <https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10450/fi-rkl-guc/igt@i915_selftest@live@gt_lrc.html> > -> DMESG-WARN > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-rkl-guc/igt@i915_selftest@live@gt_lrc.html> <6> [233.928677] i915: Running intel_lrc_live_selftests/live_lrc_isolation <3> [233.988780] i915 0000:00:02.0: [drm] *ERROR* rcs0 context redzone overwritten! Something GuC specific by the look of it, or at least I haven't found the same signature elsewhere. But in any case it is not related to this patch. Regards, Tvrtko > > > Known issues > > Here are the changes found in Patchwork_20775 that come from known issues: > > > IGT changes > > > Issues hit > > * > > igt@amdgpu/amd_basic@query-info: > > o fi-bsw-kefka: NOTRUN -> SKIP > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-bsw-kefka/igt@amdgpu/amd_basic@query-info.html> > (fdo#109271 > <https://bugs.freedesktop.org/show_bug.cgi?id=109271>) +17 > similar issues > * > > igt@gem_exec_fence@basic-busy@bcs0: > > o fi-kbl-soraka: NOTRUN -> SKIP > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-kbl-soraka/igt@gem_exec_fence@basic-busy@bcs0.html> > (fdo#109271 > <https://bugs.freedesktop.org/show_bug.cgi?id=109271>) +26 > similar issues > * > > igt@gem_huc_copy@huc-copy: > > o fi-kbl-soraka: NOTRUN -> SKIP > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-kbl-soraka/igt@gem_huc_copy@huc-copy.html> > (fdo#109271 > <https://bugs.freedesktop.org/show_bug.cgi?id=109271> / > i915#2190 <https://gitlab.freedesktop.org/drm/intel/issues/2190>) > * > > igt@i915_pm_rpm@basic-rte: > > o fi-kbl-soraka: NOTRUN -> FAIL > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-kbl-soraka/igt@i915_pm_rpm@basic-rte.html> > (i915#579 <https://gitlab.freedesktop.org/drm/intel/issues/579>) > * > > igt@i915_selftest@live@gt_pm: > > o fi-kbl-soraka: NOTRUN -> DMESG-FAIL > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-kbl-soraka/igt@i915_selftest@live@gt_pm.html> > (i915#1886 > <https://gitlab.freedesktop.org/drm/intel/issues/1886> / > i915#2291 <https://gitlab.freedesktop.org/drm/intel/issues/2291>) > * > > igt@i915_selftest@live@late_gt_pm: > > o fi-bsw-nick: PASS > <https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10450/fi-bsw-nick/igt@i915_selftest@live@late_gt_pm.html> > -> DMESG-FAIL > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-bsw-nick/igt@i915_selftest@live@late_gt_pm.html> > (i915#2927 <https://gitlab.freedesktop.org/drm/intel/issues/2927>) > * > > igt@kms_chamelium@common-hpd-after-suspend: > > o fi-kbl-soraka: NOTRUN -> SKIP > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-kbl-soraka/igt@kms_chamelium@common-hpd-after-suspend.html> > (fdo#109271 > <https://bugs.freedesktop.org/show_bug.cgi?id=109271> / > fdo#111827 > <https://bugs.freedesktop.org/show_bug.cgi?id=111827>) +8 > similar issues > * > > igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d: > > o fi-kbl-soraka: NOTRUN -> SKIP > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-kbl-soraka/igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d.html> > (fdo#109271 > <https://bugs.freedesktop.org/show_bug.cgi?id=109271> / i915#533 > <https://gitlab.freedesktop.org/drm/intel/issues/533>) > * > > igt@runner@aborted: > > o fi-bsw-nick: NOTRUN -> FAIL > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-bsw-nick/igt@runner@aborted.html> > (fdo#109271 > <https://bugs.freedesktop.org/show_bug.cgi?id=109271> / > i915#1436 <https://gitlab.freedesktop.org/drm/intel/issues/1436>) > > > Possible fixes > > * igt@i915_selftest@live@execlists: > o fi-bsw-kefka: INCOMPLETE > <https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10450/fi-bsw-kefka/igt@i915_selftest@live@execlists.html> > (i915#2940 > <https://gitlab.freedesktop.org/drm/intel/issues/2940>) -> PASS > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-bsw-kefka/igt@i915_selftest@live@execlists.html> > > {name}: This element is suppressed. This means it is ignored when computing > the status of the difference (SUCCESS, WARNING, or FAILURE). > > > Participating hosts (40 -> 35) > > Additional (1): fi-kbl-soraka > Missing (6): fi-ilk-m540 fi-hsw-4200u fi-bsw-cyan fi-ctg-p8600 > fi-bdw-samus bat-jsl-1 > > > Build changes > > * Linux: CI_DRM_10450 -> Patchwork_20775 > > CI-20190529: 20190529 > CI_DRM_10450: 51d9c8293e8446e921b74d996982ade862fcfa5c @ > git://anongit.freedesktop.org/gfx-ci/linux > IGT_6160: 4287344dd6a39d9036c5fb9a047a7d8f10bee981 @ > https://gitlab.freedesktop.org/drm/igt-gpu-tools.git > Patchwork_20775: 62efd2820c620b384c9688fa71ad6e8a5f6d27fc @ > git://anongit.freedesktop.org/gfx-ci/linux > > == Linux commits == > > 62efd2820c62 drm/i915: Be more gentle when exiting non-persistent contexts > ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Be more gentle when exiting non-persistent contexts 2021-08-05 16:10 ` Tvrtko Ursulin @ 2021-08-05 18:24 ` Matthew Brost 0 siblings, 0 replies; 7+ messages in thread From: Matthew Brost @ 2021-08-05 18:24 UTC (permalink / raw) To: Tvrtko Ursulin; +Cc: intel-gfx On Thu, Aug 05, 2021 at 05:10:29PM +0100, Tvrtko Ursulin wrote: > > On 05/08/2021 16:04, Patchwork wrote: > > *Patch Details* > > *Series:* drm/i915: Be more gentle when exiting non-persistent contexts > > *URL:* https://patchwork.freedesktop.org/series/93420/ > > <https://patchwork.freedesktop.org/series/93420/> > > *State:* failure > > *Details:* > > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/index.html > > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/index.html> > > > > > > CI Bug Log - changes from CI_DRM_10450 -> Patchwork_20775 > > > > > > Summary > > > > *FAILURE* > > > > Serious unknown changes coming with Patchwork_20775 absolutely need to be > > verified manually. > > > > If you think the reported changes have nothing to do with the changes > > introduced in Patchwork_20775, please notify your bug team to allow them > > to document this new failure mode, which will reduce false positives in CI. > > > > External URL: > > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/index.html > > > > > > Possible new issues > > > > Here are the unknown changes that may have been introduced in > > Patchwork_20775: > > > > > > IGT changes > > > > > > Possible regressions > > > > * igt@i915_selftest@live@gt_lrc: > > o fi-rkl-guc: PASS > > <https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10450/fi-rkl-guc/igt@i915_selftest@live@gt_lrc.html> > > -> DMESG-WARN > > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-rkl-guc/igt@i915_selftest@live@gt_lrc.html> > > <6> [233.928677] i915: Running intel_lrc_live_selftests/live_lrc_isolation > <3> [233.988780] i915 0000:00:02.0: [drm] *ERROR* rcs0 context redzone overwritten! > > Something GuC specific by the look of it, or at least I haven't found the same signature elsewhere. But in any case it is not related to this patch. > No sure what this is about. Ran this locally on a RKL machine and it passed just fine for me. Something to keep an eye on as CI gets fully enabled with GuC submission. Also BTW, speaking of CI & GuC submission it isn't all that great yet. Maybe ping me when you have the next rev of this patch and I can run series of tests with GuC submission related to banning / persistence. Matt > Regards, > > Tvrtko > > > > > > > Known issues > > > > Here are the changes found in Patchwork_20775 that come from known issues: > > > > > > IGT changes > > > > > > Issues hit > > > > * > > > > igt@amdgpu/amd_basic@query-info: > > > > o fi-bsw-kefka: NOTRUN -> SKIP > > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-bsw-kefka/igt@amdgpu/amd_basic@query-info.html> > > (fdo#109271 > > <https://bugs.freedesktop.org/show_bug.cgi?id=109271>) +17 > > similar issues > > * > > > > igt@gem_exec_fence@basic-busy@bcs0: > > > > o fi-kbl-soraka: NOTRUN -> SKIP > > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-kbl-soraka/igt@gem_exec_fence@basic-busy@bcs0.html> > > (fdo#109271 > > <https://bugs.freedesktop.org/show_bug.cgi?id=109271>) +26 > > similar issues > > * > > > > igt@gem_huc_copy@huc-copy: > > > > o fi-kbl-soraka: NOTRUN -> SKIP > > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-kbl-soraka/igt@gem_huc_copy@huc-copy.html> > > (fdo#109271 > > <https://bugs.freedesktop.org/show_bug.cgi?id=109271> / > > i915#2190 <https://gitlab.freedesktop.org/drm/intel/issues/2190>) > > * > > > > igt@i915_pm_rpm@basic-rte: > > > > o fi-kbl-soraka: NOTRUN -> FAIL > > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-kbl-soraka/igt@i915_pm_rpm@basic-rte.html> > > (i915#579 <https://gitlab.freedesktop.org/drm/intel/issues/579>) > > * > > > > igt@i915_selftest@live@gt_pm: > > > > o fi-kbl-soraka: NOTRUN -> DMESG-FAIL > > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-kbl-soraka/igt@i915_selftest@live@gt_pm.html> > > (i915#1886 > > <https://gitlab.freedesktop.org/drm/intel/issues/1886> / > > i915#2291 <https://gitlab.freedesktop.org/drm/intel/issues/2291>) > > * > > > > igt@i915_selftest@live@late_gt_pm: > > > > o fi-bsw-nick: PASS > > <https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10450/fi-bsw-nick/igt@i915_selftest@live@late_gt_pm.html> > > -> DMESG-FAIL > > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-bsw-nick/igt@i915_selftest@live@late_gt_pm.html> > > (i915#2927 <https://gitlab.freedesktop.org/drm/intel/issues/2927>) > > * > > > > igt@kms_chamelium@common-hpd-after-suspend: > > > > o fi-kbl-soraka: NOTRUN -> SKIP > > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-kbl-soraka/igt@kms_chamelium@common-hpd-after-suspend.html> > > (fdo#109271 > > <https://bugs.freedesktop.org/show_bug.cgi?id=109271> / > > fdo#111827 > > <https://bugs.freedesktop.org/show_bug.cgi?id=111827>) +8 > > similar issues > > * > > > > igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d: > > > > o fi-kbl-soraka: NOTRUN -> SKIP > > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-kbl-soraka/igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d.html> > > (fdo#109271 > > <https://bugs.freedesktop.org/show_bug.cgi?id=109271> / i915#533 > > <https://gitlab.freedesktop.org/drm/intel/issues/533>) > > * > > > > igt@runner@aborted: > > > > o fi-bsw-nick: NOTRUN -> FAIL > > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-bsw-nick/igt@runner@aborted.html> > > (fdo#109271 > > <https://bugs.freedesktop.org/show_bug.cgi?id=109271> / > > i915#1436 <https://gitlab.freedesktop.org/drm/intel/issues/1436>) > > > > > > Possible fixes > > > > * igt@i915_selftest@live@execlists: > > o fi-bsw-kefka: INCOMPLETE > > <https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10450/fi-bsw-kefka/igt@i915_selftest@live@execlists.html> > > (i915#2940 > > <https://gitlab.freedesktop.org/drm/intel/issues/2940>) -> PASS > > <https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20775/fi-bsw-kefka/igt@i915_selftest@live@execlists.html> > > > > {name}: This element is suppressed. This means it is ignored when computing > > the status of the difference (SUCCESS, WARNING, or FAILURE). > > > > > > Participating hosts (40 -> 35) > > > > Additional (1): fi-kbl-soraka > > Missing (6): fi-ilk-m540 fi-hsw-4200u fi-bsw-cyan fi-ctg-p8600 > > fi-bdw-samus bat-jsl-1 > > > > > > Build changes > > > > * Linux: CI_DRM_10450 -> Patchwork_20775 > > > > CI-20190529: 20190529 > > CI_DRM_10450: 51d9c8293e8446e921b74d996982ade862fcfa5c @ > > git://anongit.freedesktop.org/gfx-ci/linux > > IGT_6160: 4287344dd6a39d9036c5fb9a047a7d8f10bee981 @ > > https://gitlab.freedesktop.org/drm/igt-gpu-tools.git > > Patchwork_20775: 62efd2820c620b384c9688fa71ad6e8a5f6d27fc @ > > git://anongit.freedesktop.org/gfx-ci/linux > > > > == Linux commits == > > > > 62efd2820c62 drm/i915: Be more gentle when exiting non-persistent contexts > > ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915: Be more gentle when exiting non-persistent contexts 2021-08-05 12:05 ` [Intel-gfx] " Tvrtko Ursulin (?) (?) @ 2021-08-05 16:32 ` Matthew Brost 2021-08-06 10:44 ` Tvrtko Ursulin -1 siblings, 1 reply; 7+ messages in thread From: Matthew Brost @ 2021-08-05 16:32 UTC (permalink / raw) To: Tvrtko Ursulin Cc: Intel-gfx, dri-devel, Tvrtko Ursulin, Chris Wilson, Zhen Han On Thu, Aug 05, 2021 at 01:05:09PM +0100, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > When a non-persistent context exits we currently mark it as banned in > order to trigger fast termination of any outstanding GPU jobs it may have > left running. > > In doing so we apply a very strict 1ms limit in which the left over job > has to preempt before we issues an engine resets. > > Some workloads are not able to cleanly preempt in that time window and it > can be argued that it would instead be better to give them a bit more > grace since avoiding engine resets is generally preferrable. > > To achieve this the patch splits handling of banned contexts from simply > closed non-persistent ones and then applies different timeouts for both > and also extends the criteria which determines if a request should be > scheduled back in after preemption or not. > > 15ms preempt timeout grace is given to exited non-persistent contexts > which have been empirically tested to satisfy customers requirements > and still provides reasonably quick cleanup post exit. > I think you need to rework your thinking here a bit as this a very execlists specific solution and the GuC needs to be considered. > v2: > * Streamline fast path checks. > > v3: > * Simplify by using only schedulable status. > * Increase timeout to 20ms. > > v4: > * Fix live_execlists selftest. > > v5: > * Fix logic in kill_engines. > > v6: > * Rebase. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Zhen Han <zhen.han@intel.com> > --- > drivers/gpu/drm/i915/gem/i915_gem_context.c | 22 +++++++++++++------ > drivers/gpu/drm/i915/gt/intel_context.c | 2 ++ > drivers/gpu/drm/i915/gt/intel_context.h | 17 +++++++++++++- > drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + > .../drm/i915/gt/intel_execlists_submission.c | 11 ++++++++-- > drivers/gpu/drm/i915/gt/selftest_execlists.c | 20 +++++++++++------ > drivers/gpu/drm/i915/i915_request.c | 2 +- > 7 files changed, 57 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c > index cff72679ad7c..21fe5d4057ab 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > @@ -1065,7 +1065,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce) > return engine; > } > > -static void kill_engines(struct i915_gem_engines *engines, bool ban) > +static void > +kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent) > { > struct i915_gem_engines_iter it; > struct intel_context *ce; > @@ -1079,8 +1080,15 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) > */ > for_each_gem_engine(ce, engines, it) { > struct intel_engine_cs *engine; > + bool skip = false; > + > + if (ban) > + skip = intel_context_ban(ce, NULL); > + else if (!persistent) > + skip = !intel_context_clear_schedulable(ce); schedulable doesn't hook into the backend at all, while intel_context_ban does. In the case of GuC submission intel_context_ban changes to preemption timeout to 1 us and disables scheduling resulting in the context getting kicked off the hardware immediately. You likely need to update intel_context_clear_schedulable to use the same vfunc as intel_context_ban() but accept an argument for the value of the preemption timeout. For a ban user a lower value, for clearing schedulable use a higher value. > > - if (ban && intel_context_ban(ce, NULL)) > + /* Already previously banned or made non-schedulable? */ > + if (skip) > continue; > > /* > @@ -1093,7 +1101,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) > engine = active_engine(ce); > > /* First attempt to gracefully cancel the context */ > - if (engine && !__cancel_engine(engine) && ban) > + if (engine && !__cancel_engine(engine) && (ban || !persistent)) > /* > * If we are unable to send a preemptive pulse to bump > * the context from the GPU, we have to resort to a full > @@ -1105,8 +1113,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) > > static void kill_context(struct i915_gem_context *ctx) > { > - bool ban = (!i915_gem_context_is_persistent(ctx) || > - !ctx->i915->params.enable_hangcheck); > struct i915_gem_engines *pos, *next; > > spin_lock_irq(&ctx->stale.lock); > @@ -1119,7 +1125,8 @@ static void kill_context(struct i915_gem_context *ctx) > > spin_unlock_irq(&ctx->stale.lock); > > - kill_engines(pos, ban); > + kill_engines(pos, !ctx->i915->params.enable_hangcheck, > + i915_gem_context_is_persistent(ctx)); > > spin_lock_irq(&ctx->stale.lock); > GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence)); > @@ -1165,7 +1172,8 @@ static void engines_idle_release(struct i915_gem_context *ctx, > > kill: > if (list_empty(&engines->link)) /* raced, already closed */ > - kill_engines(engines, true); > + kill_engines(engines, true, > + i915_gem_context_is_persistent(ctx)); > > i915_sw_fence_commit(&engines->fence); > } > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c > index 745e84c72c90..bc1701ef1578 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context.c > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > @@ -382,6 +382,8 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) > ce->ring = NULL; > ce->ring_size = SZ_4K; > > + __set_bit(CONTEXT_SCHEDULABLE, &ce->flags); > + > ewma_runtime_init(&ce->runtime.avg); > > ce->vm = i915_vm_get(engine->gt->vm); > diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h > index c41098950746..5b50716654dd 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context.h > +++ b/drivers/gpu/drm/i915/gt/intel_context.h > @@ -251,7 +251,22 @@ static inline bool intel_context_is_banned(const struct intel_context *ce) > > static inline bool intel_context_set_banned(struct intel_context *ce) > { > - return test_and_set_bit(CONTEXT_BANNED, &ce->flags); > + bool banned = test_and_set_bit(CONTEXT_BANNED, &ce->flags); > + > + if (!banned) > + clear_bit(CONTEXT_SCHEDULABLE, &ce->flags); > + > + return banned; > +} > + > +static inline bool intel_context_clear_schedulable(struct intel_context *ce) > +{ > + return test_and_clear_bit(CONTEXT_SCHEDULABLE, &ce->flags); > +} > + > +static inline bool intel_context_is_schedulable(const struct intel_context *ce) > +{ > + return test_bit(CONTEXT_SCHEDULABLE, &ce->flags); > } > > static inline bool intel_context_ban(struct intel_context *ce, > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h > index e54351a170e2..3306c70c9c54 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h > @@ -112,6 +112,7 @@ struct intel_context { > #define CONTEXT_FORCE_SINGLE_SUBMISSION 7 > #define CONTEXT_NOPREEMPT 8 > #define CONTEXT_LRCA_DIRTY 9 > +#define CONTEXT_SCHEDULABLE 10 /* Unless banned or non-persistent closed. */ > > struct { > u64 timeout_us; > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > index de5f9c86b9a4..778f3cda3c71 100644 > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > @@ -478,7 +478,7 @@ __execlists_schedule_in(struct i915_request *rq) > !intel_engine_has_heartbeat(engine))) > intel_context_set_banned(ce); > > - if (unlikely(intel_context_is_banned(ce) || bad_request(rq))) > + if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq))) > reset_active(rq, engine); > > if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) > @@ -1222,12 +1222,19 @@ static void record_preemption(struct intel_engine_execlists *execlists) > static unsigned long active_preempt_timeout(struct intel_engine_cs *engine, > const struct i915_request *rq) > { > + struct intel_context *ce; > + > if (!rq) > return 0; > > + ce = rq->context; > + > /* Force a fast reset for terminated contexts (ignoring sysfs!) */ > - if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq))) > + if (unlikely(intel_context_is_banned(ce) || bad_request(rq))) > return 1; > + /* Longer grace for closed non-persistent contexts to avoid resets. */ > + else if (unlikely(!intel_context_is_schedulable(ce))) > + return 20; Likely want a define for '1' and '20' too. Matt > > return READ_ONCE(engine->props.preempt_timeout_ms); > } > diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c > index f12ffe797639..da36c015caf4 100644 > --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c > +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c > @@ -2050,6 +2050,12 @@ struct live_preempt_cancel { > struct preempt_client a, b; > }; > > +static void context_clear_banned(struct intel_context *ce) > +{ > + clear_bit(CONTEXT_BANNED, &ce->flags); > + set_bit(CONTEXT_SCHEDULABLE, &ce->flags); > +} > + > static int __cancel_active0(struct live_preempt_cancel *arg) > { > struct i915_request *rq; > @@ -2068,7 +2074,7 @@ static int __cancel_active0(struct live_preempt_cancel *arg) > if (IS_ERR(rq)) > return PTR_ERR(rq); > > - clear_bit(CONTEXT_BANNED, &rq->context->flags); > + context_clear_banned(rq->context); > i915_request_get(rq); > i915_request_add(rq); > if (!igt_wait_for_spinner(&arg->a.spin, rq)) { > @@ -2112,7 +2118,7 @@ static int __cancel_active1(struct live_preempt_cancel *arg) > if (IS_ERR(rq[0])) > return PTR_ERR(rq[0]); > > - clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); > + context_clear_banned(rq[0]->context); > i915_request_get(rq[0]); > i915_request_add(rq[0]); > if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { > @@ -2128,7 +2134,7 @@ static int __cancel_active1(struct live_preempt_cancel *arg) > goto out; > } > > - clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); > + context_clear_banned(rq[1]->context); > i915_request_get(rq[1]); > err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); > i915_request_add(rq[1]); > @@ -2183,7 +2189,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg) > if (IS_ERR(rq[0])) > return PTR_ERR(rq[0]); > > - clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); > + context_clear_banned(rq[0]->context); > i915_request_get(rq[0]); > i915_request_add(rq[0]); > if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { > @@ -2197,7 +2203,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg) > goto out; > } > > - clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); > + context_clear_banned(rq[1]->context); > i915_request_get(rq[1]); > err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); > i915_request_add(rq[1]); > @@ -2273,7 +2279,7 @@ static int __cancel_hostile(struct live_preempt_cancel *arg) > if (IS_ERR(rq)) > return PTR_ERR(rq); > > - clear_bit(CONTEXT_BANNED, &rq->context->flags); > + context_clear_banned(rq->context); > i915_request_get(rq); > i915_request_add(rq); > if (!igt_wait_for_spinner(&arg->a.spin, rq)) { > @@ -2329,7 +2335,7 @@ static int __cancel_fail(struct live_preempt_cancel *arg) > if (IS_ERR(rq)) > return PTR_ERR(rq); > > - clear_bit(CONTEXT_BANNED, &rq->context->flags); > + context_clear_banned(rq->context); > i915_request_get(rq); > i915_request_add(rq); > if (!igt_wait_for_spinner(&arg->a.spin, rq)) { > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index ce446716d092..b1a9bec83339 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -583,7 +583,7 @@ bool __i915_request_submit(struct i915_request *request) > goto active; > } > > - if (unlikely(intel_context_is_banned(request->context))) > + if (unlikely(!intel_context_is_schedulable(request->context))) > i915_request_set_error_once(request, -EIO); > > if (unlikely(fatal_error(request->fence.error))) > -- > 2.30.2 > ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915: Be more gentle when exiting non-persistent contexts 2021-08-05 16:32 ` [Intel-gfx] [PATCH] " Matthew Brost @ 2021-08-06 10:44 ` Tvrtko Ursulin 0 siblings, 0 replies; 7+ messages in thread From: Tvrtko Ursulin @ 2021-08-06 10:44 UTC (permalink / raw) To: Matthew Brost Cc: Intel-gfx, dri-devel, Tvrtko Ursulin, Chris Wilson, Zhen Han On 05/08/2021 17:32, Matthew Brost wrote: > On Thu, Aug 05, 2021 at 01:05:09PM +0100, Tvrtko Ursulin wrote: >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> >> When a non-persistent context exits we currently mark it as banned in >> order to trigger fast termination of any outstanding GPU jobs it may have >> left running. >> >> In doing so we apply a very strict 1ms limit in which the left over job >> has to preempt before we issues an engine resets. >> >> Some workloads are not able to cleanly preempt in that time window and it >> can be argued that it would instead be better to give them a bit more >> grace since avoiding engine resets is generally preferrable. >> >> To achieve this the patch splits handling of banned contexts from simply >> closed non-persistent ones and then applies different timeouts for both >> and also extends the criteria which determines if a request should be >> scheduled back in after preemption or not. >> >> 15ms preempt timeout grace is given to exited non-persistent contexts >> which have been empirically tested to satisfy customers requirements >> and still provides reasonably quick cleanup post exit. >> > > I think you need to rework your thinking here a bit as this a very > execlists specific solution and the GuC needs to be considered. Slipped my mind GuC patches were merged in the meantime. (This patch predates that.) But I think wording in the commit message is fine. It is just the implementation that now has to handle the GuC as well. >> v2: >> * Streamline fast path checks. >> >> v3: >> * Simplify by using only schedulable status. >> * Increase timeout to 20ms. >> >> v4: >> * Fix live_execlists selftest. >> >> v5: >> * Fix logic in kill_engines. >> >> v6: >> * Rebase. >> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> Cc: Chris Wilson <chris@chris-wilson.co.uk> >> Cc: Zhen Han <zhen.han@intel.com> >> --- >> drivers/gpu/drm/i915/gem/i915_gem_context.c | 22 +++++++++++++------ >> drivers/gpu/drm/i915/gt/intel_context.c | 2 ++ >> drivers/gpu/drm/i915/gt/intel_context.h | 17 +++++++++++++- >> drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + >> .../drm/i915/gt/intel_execlists_submission.c | 11 ++++++++-- >> drivers/gpu/drm/i915/gt/selftest_execlists.c | 20 +++++++++++------ >> drivers/gpu/drm/i915/i915_request.c | 2 +- >> 7 files changed, 57 insertions(+), 18 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c >> index cff72679ad7c..21fe5d4057ab 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c >> @@ -1065,7 +1065,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce) >> return engine; >> } >> >> -static void kill_engines(struct i915_gem_engines *engines, bool ban) >> +static void >> +kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent) >> { >> struct i915_gem_engines_iter it; >> struct intel_context *ce; >> @@ -1079,8 +1080,15 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) >> */ >> for_each_gem_engine(ce, engines, it) { >> struct intel_engine_cs *engine; >> + bool skip = false; >> + >> + if (ban) >> + skip = intel_context_ban(ce, NULL); >> + else if (!persistent) >> + skip = !intel_context_clear_schedulable(ce); > > schedulable doesn't hook into the backend at all, while > intel_context_ban does. In the case of GuC submission intel_context_ban > changes to preemption timeout to 1 us and disables scheduling resulting > in the context getting kicked off the hardware immediately. You likely > need to update intel_context_clear_schedulable to use the same vfunc as > intel_context_ban() but accept an argument for the value of the > preemption timeout. For a ban user a lower value, for clearing > schedulable use a higher value. Okay I'll have a look. Might go back to closed flag as opposed to schedulable as well since I don't quite like schedulable being the odd one out. > >> >> - if (ban && intel_context_ban(ce, NULL)) >> + /* Already previously banned or made non-schedulable? */ >> + if (skip) >> continue; >> >> /* >> @@ -1093,7 +1101,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) >> engine = active_engine(ce); >> >> /* First attempt to gracefully cancel the context */ >> - if (engine && !__cancel_engine(engine) && ban) >> + if (engine && !__cancel_engine(engine) && (ban || !persistent)) >> /* >> * If we are unable to send a preemptive pulse to bump >> * the context from the GPU, we have to resort to a full >> @@ -1105,8 +1113,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) >> >> static void kill_context(struct i915_gem_context *ctx) >> { >> - bool ban = (!i915_gem_context_is_persistent(ctx) || >> - !ctx->i915->params.enable_hangcheck); >> struct i915_gem_engines *pos, *next; >> >> spin_lock_irq(&ctx->stale.lock); >> @@ -1119,7 +1125,8 @@ static void kill_context(struct i915_gem_context *ctx) >> >> spin_unlock_irq(&ctx->stale.lock); >> >> - kill_engines(pos, ban); >> + kill_engines(pos, !ctx->i915->params.enable_hangcheck, >> + i915_gem_context_is_persistent(ctx)); >> >> spin_lock_irq(&ctx->stale.lock); >> GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence)); >> @@ -1165,7 +1172,8 @@ static void engines_idle_release(struct i915_gem_context *ctx, >> >> kill: >> if (list_empty(&engines->link)) /* raced, already closed */ >> - kill_engines(engines, true); >> + kill_engines(engines, true, >> + i915_gem_context_is_persistent(ctx)); >> >> i915_sw_fence_commit(&engines->fence); >> } >> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c >> index 745e84c72c90..bc1701ef1578 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_context.c >> +++ b/drivers/gpu/drm/i915/gt/intel_context.c >> @@ -382,6 +382,8 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) >> ce->ring = NULL; >> ce->ring_size = SZ_4K; >> >> + __set_bit(CONTEXT_SCHEDULABLE, &ce->flags); >> + >> ewma_runtime_init(&ce->runtime.avg); >> >> ce->vm = i915_vm_get(engine->gt->vm); >> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h >> index c41098950746..5b50716654dd 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_context.h >> +++ b/drivers/gpu/drm/i915/gt/intel_context.h >> @@ -251,7 +251,22 @@ static inline bool intel_context_is_banned(const struct intel_context *ce) >> >> static inline bool intel_context_set_banned(struct intel_context *ce) >> { >> - return test_and_set_bit(CONTEXT_BANNED, &ce->flags); >> + bool banned = test_and_set_bit(CONTEXT_BANNED, &ce->flags); >> + >> + if (!banned) >> + clear_bit(CONTEXT_SCHEDULABLE, &ce->flags); >> + >> + return banned; >> +} >> + >> +static inline bool intel_context_clear_schedulable(struct intel_context *ce) >> +{ >> + return test_and_clear_bit(CONTEXT_SCHEDULABLE, &ce->flags); >> +} >> + >> +static inline bool intel_context_is_schedulable(const struct intel_context *ce) >> +{ >> + return test_bit(CONTEXT_SCHEDULABLE, &ce->flags); >> } >> >> static inline bool intel_context_ban(struct intel_context *ce, >> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h >> index e54351a170e2..3306c70c9c54 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h >> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h >> @@ -112,6 +112,7 @@ struct intel_context { >> #define CONTEXT_FORCE_SINGLE_SUBMISSION 7 >> #define CONTEXT_NOPREEMPT 8 >> #define CONTEXT_LRCA_DIRTY 9 >> +#define CONTEXT_SCHEDULABLE 10 /* Unless banned or non-persistent closed. */ >> >> struct { >> u64 timeout_us; >> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >> index de5f9c86b9a4..778f3cda3c71 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >> @@ -478,7 +478,7 @@ __execlists_schedule_in(struct i915_request *rq) >> !intel_engine_has_heartbeat(engine))) >> intel_context_set_banned(ce); >> >> - if (unlikely(intel_context_is_banned(ce) || bad_request(rq))) >> + if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq))) >> reset_active(rq, engine); >> >> if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) >> @@ -1222,12 +1222,19 @@ static void record_preemption(struct intel_engine_execlists *execlists) >> static unsigned long active_preempt_timeout(struct intel_engine_cs *engine, >> const struct i915_request *rq) >> { >> + struct intel_context *ce; >> + >> if (!rq) >> return 0; >> >> + ce = rq->context; >> + >> /* Force a fast reset for terminated contexts (ignoring sysfs!) */ >> - if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq))) >> + if (unlikely(intel_context_is_banned(ce) || bad_request(rq))) >> return 1; >> + /* Longer grace for closed non-persistent contexts to avoid resets. */ >> + else if (unlikely(!intel_context_is_schedulable(ce))) >> + return 20; > > Likely want a define for '1' and '20' too. Since the addition of GuC yeah, true. Regards, Tvrtko > > Matt > >> >> return READ_ONCE(engine->props.preempt_timeout_ms); >> } >> diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c >> index f12ffe797639..da36c015caf4 100644 >> --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c >> +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c >> @@ -2050,6 +2050,12 @@ struct live_preempt_cancel { >> struct preempt_client a, b; >> }; >> >> +static void context_clear_banned(struct intel_context *ce) >> +{ >> + clear_bit(CONTEXT_BANNED, &ce->flags); >> + set_bit(CONTEXT_SCHEDULABLE, &ce->flags); >> +} >> + >> static int __cancel_active0(struct live_preempt_cancel *arg) >> { >> struct i915_request *rq; >> @@ -2068,7 +2074,7 @@ static int __cancel_active0(struct live_preempt_cancel *arg) >> if (IS_ERR(rq)) >> return PTR_ERR(rq); >> >> - clear_bit(CONTEXT_BANNED, &rq->context->flags); >> + context_clear_banned(rq->context); >> i915_request_get(rq); >> i915_request_add(rq); >> if (!igt_wait_for_spinner(&arg->a.spin, rq)) { >> @@ -2112,7 +2118,7 @@ static int __cancel_active1(struct live_preempt_cancel *arg) >> if (IS_ERR(rq[0])) >> return PTR_ERR(rq[0]); >> >> - clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); >> + context_clear_banned(rq[0]->context); >> i915_request_get(rq[0]); >> i915_request_add(rq[0]); >> if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { >> @@ -2128,7 +2134,7 @@ static int __cancel_active1(struct live_preempt_cancel *arg) >> goto out; >> } >> >> - clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); >> + context_clear_banned(rq[1]->context); >> i915_request_get(rq[1]); >> err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); >> i915_request_add(rq[1]); >> @@ -2183,7 +2189,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg) >> if (IS_ERR(rq[0])) >> return PTR_ERR(rq[0]); >> >> - clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); >> + context_clear_banned(rq[0]->context); >> i915_request_get(rq[0]); >> i915_request_add(rq[0]); >> if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { >> @@ -2197,7 +2203,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg) >> goto out; >> } >> >> - clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); >> + context_clear_banned(rq[1]->context); >> i915_request_get(rq[1]); >> err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); >> i915_request_add(rq[1]); >> @@ -2273,7 +2279,7 @@ static int __cancel_hostile(struct live_preempt_cancel *arg) >> if (IS_ERR(rq)) >> return PTR_ERR(rq); >> >> - clear_bit(CONTEXT_BANNED, &rq->context->flags); >> + context_clear_banned(rq->context); >> i915_request_get(rq); >> i915_request_add(rq); >> if (!igt_wait_for_spinner(&arg->a.spin, rq)) { >> @@ -2329,7 +2335,7 @@ static int __cancel_fail(struct live_preempt_cancel *arg) >> if (IS_ERR(rq)) >> return PTR_ERR(rq); >> >> - clear_bit(CONTEXT_BANNED, &rq->context->flags); >> + context_clear_banned(rq->context); >> i915_request_get(rq); >> i915_request_add(rq); >> if (!igt_wait_for_spinner(&arg->a.spin, rq)) { >> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c >> index ce446716d092..b1a9bec83339 100644 >> --- a/drivers/gpu/drm/i915/i915_request.c >> +++ b/drivers/gpu/drm/i915/i915_request.c >> @@ -583,7 +583,7 @@ bool __i915_request_submit(struct i915_request *request) >> goto active; >> } >> >> - if (unlikely(intel_context_is_banned(request->context))) >> + if (unlikely(!intel_context_is_schedulable(request->context))) >> i915_request_set_error_once(request, -EIO); >> >> if (unlikely(fatal_error(request->fence.error))) >> -- >> 2.30.2 >> ^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2021-08-06 10:44 UTC | newest] Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2021-08-05 12:05 [PATCH] drm/i915: Be more gentle when exiting non-persistent contexts Tvrtko Ursulin 2021-08-05 12:05 ` [Intel-gfx] " Tvrtko Ursulin 2021-08-05 15:04 ` [Intel-gfx] ✗ Fi.CI.BAT: failure for " Patchwork 2021-08-05 16:10 ` Tvrtko Ursulin 2021-08-05 18:24 ` Matthew Brost 2021-08-05 16:32 ` [Intel-gfx] [PATCH] " Matthew Brost 2021-08-06 10:44 ` Tvrtko Ursulin
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.