All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Matthew Auld <matthew.william.auld@gmail.com>
Cc: Intel Graphics Development <Intel-gfx@lists.freedesktop.org>,
	ML dri-devel <dri-devel@lists.freedesktop.org>,
	Chris Wilson <chris@chris-wilson.co.uk>
Subject: Re: [Intel-gfx] [PATCH 1/6] drm/i915: Individual request cancellation
Date: Tue, 23 Mar 2021 09:48:18 +0000	[thread overview]
Message-ID: <fdfde4a0-b748-bcd9-e427-080eeaef3bec@linux.intel.com> (raw)
In-Reply-To: <CAM0jSHNLVqvtMPs+vdiDVpiZwotruqxyCLzBjbZSoVHaCDd3rg@mail.gmail.com>


On 22/03/2021 15:38, Matthew Auld wrote:
> On Thu, 18 Mar 2021 at 17:04, Tvrtko Ursulin
> <tvrtko.ursulin@linux.intel.com> wrote:
>>
>> From: Chris Wilson <chris@chris-wilson.co.uk>
>>
>> Currently, we cancel outstanding requests within a context when the
>> context is closed. We may also want to cancel individual requests using
>> the same graceful preemption mechanism.
>>
>> v2 (Tvrtko):
>>   * Cancel waiters carefully considering no timeline lock and RCU.
>>   * Fixed selftests.
>>
>> v3 (Tvrtko):
>>   * Remove error propagation to waiters for now.
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   .../gpu/drm/i915/gt/intel_engine_heartbeat.c  |   1 +
>>   .../drm/i915/gt/intel_execlists_submission.c  |   9 +-
>>   drivers/gpu/drm/i915/i915_request.c           |  52 ++++-
>>   drivers/gpu/drm/i915/i915_request.h           |   4 +-
>>   drivers/gpu/drm/i915/selftests/i915_request.c | 201 ++++++++++++++++++
>>   5 files changed, 261 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
>> index 0b062fad1837..e2fb3ae2aaf3 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
>> @@ -314,6 +314,7 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
>>                  mutex_unlock(&ce->timeline->mutex);
>>          }
>>
>> +       intel_engine_flush_scheduler(engine);
>>          intel_engine_pm_put(engine);
>>          return err;
>>   }
>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> index 85ff5fe861b4..4c2acb5a6c0a 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> @@ -421,6 +421,11 @@ static void reset_active(struct i915_request *rq,
>>          ce->lrc.lrca = lrc_update_regs(ce, engine, head);
>>   }
>>
>> +static bool bad_request(const struct i915_request *rq)
>> +{
>> +       return rq->fence.error && i915_request_started(rq);
>> +}
>> +
>>   static struct intel_engine_cs *
>>   __execlists_schedule_in(struct i915_request *rq)
>>   {
>> @@ -433,7 +438,7 @@ __execlists_schedule_in(struct i915_request *rq)
>>                       !intel_engine_has_heartbeat(engine)))
>>                  intel_context_set_banned(ce);
>>
>> -       if (unlikely(intel_context_is_banned(ce)))
>> +       if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
>>                  reset_active(rq, engine);
>>
>>          if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
>> @@ -1112,7 +1117,7 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
>>                  return 0;
>>
>>          /* Force a fast reset for terminated contexts (ignoring sysfs!) */
>> -       if (unlikely(intel_context_is_banned(rq->context)))
>> +       if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
>>                  return 1;
>>
>>          return READ_ONCE(engine->props.preempt_timeout_ms);
>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>> index e7b4c4bc41a6..b4511ac05e9a 100644
>> --- a/drivers/gpu/drm/i915/i915_request.c
>> +++ b/drivers/gpu/drm/i915/i915_request.c
>> @@ -33,7 +33,10 @@
>>   #include "gem/i915_gem_context.h"
>>   #include "gt/intel_breadcrumbs.h"
>>   #include "gt/intel_context.h"
>> +#include "gt/intel_engine.h"
>> +#include "gt/intel_engine_heartbeat.h"
>>   #include "gt/intel_gpu_commands.h"
>> +#include "gt/intel_reset.h"
>>   #include "gt/intel_ring.h"
>>   #include "gt/intel_rps.h"
>>
>> @@ -429,20 +432,22 @@ void __i915_request_skip(struct i915_request *rq)
>>          rq->infix = rq->postfix;
>>   }
>>
>> -void i915_request_set_error_once(struct i915_request *rq, int error)
>> +bool i915_request_set_error_once(struct i915_request *rq, int error)
>>   {
>>          int old;
>>
>>          GEM_BUG_ON(!IS_ERR_VALUE((long)error));
>>
>>          if (i915_request_signaled(rq))
>> -               return;
>> +               return false;
>>
>>          old = READ_ONCE(rq->fence.error);
>>          do {
>>                  if (fatal_error(old))
>> -                       return;
>> +                       return false;
>>          } while (!try_cmpxchg(&rq->fence.error, &old, error));
>> +
>> +       return true;
>>   }
>>
>>   struct i915_request *i915_request_mark_eio(struct i915_request *rq)
>> @@ -609,6 +614,47 @@ void i915_request_unsubmit(struct i915_request *request)
>>          spin_unlock_irqrestore(&se->lock, flags);
>>   }
>>
>> +static struct intel_engine_cs *active_engine(struct i915_request *rq)
>> +{
>> +       struct intel_engine_cs *engine, *locked;
>> +
>> +       locked = READ_ONCE(rq->engine);
>> +       spin_lock_irq(&locked->sched.lock);
>> +       while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
>> +               spin_unlock(&locked->sched.lock);
>> +               locked = engine;
>> +               spin_lock(&locked->sched.lock);
>> +       }
>> +
>> +       engine = NULL;
>> +       if (i915_request_is_active(rq) && !__i915_request_is_complete(rq))
>> +               engine = locked;
>> +
>> +       spin_unlock_irq(&locked->sched.lock);
>> +
>> +       return engine;
> 
> Bad idea to reuse __active_engine() somehow?

I can try and see how it ends up looking.

> Reviewed-by: Matthew Auld <matthew.auld@intel.com>
> 

Thanks,

Tvrtko
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

WARNING: multiple messages have this Message-ID (diff)
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Matthew Auld <matthew.william.auld@gmail.com>
Cc: Intel Graphics Development <Intel-gfx@lists.freedesktop.org>,
	ML dri-devel <dri-devel@lists.freedesktop.org>,
	Chris Wilson <chris@chris-wilson.co.uk>
Subject: Re: [Intel-gfx] [PATCH 1/6] drm/i915: Individual request cancellation
Date: Tue, 23 Mar 2021 09:48:18 +0000	[thread overview]
Message-ID: <fdfde4a0-b748-bcd9-e427-080eeaef3bec@linux.intel.com> (raw)
In-Reply-To: <CAM0jSHNLVqvtMPs+vdiDVpiZwotruqxyCLzBjbZSoVHaCDd3rg@mail.gmail.com>


On 22/03/2021 15:38, Matthew Auld wrote:
> On Thu, 18 Mar 2021 at 17:04, Tvrtko Ursulin
> <tvrtko.ursulin@linux.intel.com> wrote:
>>
>> From: Chris Wilson <chris@chris-wilson.co.uk>
>>
>> Currently, we cancel outstanding requests within a context when the
>> context is closed. We may also want to cancel individual requests using
>> the same graceful preemption mechanism.
>>
>> v2 (Tvrtko):
>>   * Cancel waiters carefully considering no timeline lock and RCU.
>>   * Fixed selftests.
>>
>> v3 (Tvrtko):
>>   * Remove error propagation to waiters for now.
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   .../gpu/drm/i915/gt/intel_engine_heartbeat.c  |   1 +
>>   .../drm/i915/gt/intel_execlists_submission.c  |   9 +-
>>   drivers/gpu/drm/i915/i915_request.c           |  52 ++++-
>>   drivers/gpu/drm/i915/i915_request.h           |   4 +-
>>   drivers/gpu/drm/i915/selftests/i915_request.c | 201 ++++++++++++++++++
>>   5 files changed, 261 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
>> index 0b062fad1837..e2fb3ae2aaf3 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
>> @@ -314,6 +314,7 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
>>                  mutex_unlock(&ce->timeline->mutex);
>>          }
>>
>> +       intel_engine_flush_scheduler(engine);
>>          intel_engine_pm_put(engine);
>>          return err;
>>   }
>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> index 85ff5fe861b4..4c2acb5a6c0a 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> @@ -421,6 +421,11 @@ static void reset_active(struct i915_request *rq,
>>          ce->lrc.lrca = lrc_update_regs(ce, engine, head);
>>   }
>>
>> +static bool bad_request(const struct i915_request *rq)
>> +{
>> +       return rq->fence.error && i915_request_started(rq);
>> +}
>> +
>>   static struct intel_engine_cs *
>>   __execlists_schedule_in(struct i915_request *rq)
>>   {
>> @@ -433,7 +438,7 @@ __execlists_schedule_in(struct i915_request *rq)
>>                       !intel_engine_has_heartbeat(engine)))
>>                  intel_context_set_banned(ce);
>>
>> -       if (unlikely(intel_context_is_banned(ce)))
>> +       if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
>>                  reset_active(rq, engine);
>>
>>          if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
>> @@ -1112,7 +1117,7 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
>>                  return 0;
>>
>>          /* Force a fast reset for terminated contexts (ignoring sysfs!) */
>> -       if (unlikely(intel_context_is_banned(rq->context)))
>> +       if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
>>                  return 1;
>>
>>          return READ_ONCE(engine->props.preempt_timeout_ms);
>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>> index e7b4c4bc41a6..b4511ac05e9a 100644
>> --- a/drivers/gpu/drm/i915/i915_request.c
>> +++ b/drivers/gpu/drm/i915/i915_request.c
>> @@ -33,7 +33,10 @@
>>   #include "gem/i915_gem_context.h"
>>   #include "gt/intel_breadcrumbs.h"
>>   #include "gt/intel_context.h"
>> +#include "gt/intel_engine.h"
>> +#include "gt/intel_engine_heartbeat.h"
>>   #include "gt/intel_gpu_commands.h"
>> +#include "gt/intel_reset.h"
>>   #include "gt/intel_ring.h"
>>   #include "gt/intel_rps.h"
>>
>> @@ -429,20 +432,22 @@ void __i915_request_skip(struct i915_request *rq)
>>          rq->infix = rq->postfix;
>>   }
>>
>> -void i915_request_set_error_once(struct i915_request *rq, int error)
>> +bool i915_request_set_error_once(struct i915_request *rq, int error)
>>   {
>>          int old;
>>
>>          GEM_BUG_ON(!IS_ERR_VALUE((long)error));
>>
>>          if (i915_request_signaled(rq))
>> -               return;
>> +               return false;
>>
>>          old = READ_ONCE(rq->fence.error);
>>          do {
>>                  if (fatal_error(old))
>> -                       return;
>> +                       return false;
>>          } while (!try_cmpxchg(&rq->fence.error, &old, error));
>> +
>> +       return true;
>>   }
>>
>>   struct i915_request *i915_request_mark_eio(struct i915_request *rq)
>> @@ -609,6 +614,47 @@ void i915_request_unsubmit(struct i915_request *request)
>>          spin_unlock_irqrestore(&se->lock, flags);
>>   }
>>
>> +static struct intel_engine_cs *active_engine(struct i915_request *rq)
>> +{
>> +       struct intel_engine_cs *engine, *locked;
>> +
>> +       locked = READ_ONCE(rq->engine);
>> +       spin_lock_irq(&locked->sched.lock);
>> +       while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
>> +               spin_unlock(&locked->sched.lock);
>> +               locked = engine;
>> +               spin_lock(&locked->sched.lock);
>> +       }
>> +
>> +       engine = NULL;
>> +       if (i915_request_is_active(rq) && !__i915_request_is_complete(rq))
>> +               engine = locked;
>> +
>> +       spin_unlock_irq(&locked->sched.lock);
>> +
>> +       return engine;
> 
> Bad idea to reuse __active_engine() somehow?

I can try and see how it ends up looking.

> Reviewed-by: Matthew Auld <matthew.auld@intel.com>
> 

Thanks,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2021-03-23  9:48 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-18 17:04 [PATCH v3 0/6] Default request/fence expiry + watchdog Tvrtko Ursulin
2021-03-18 17:04 ` [Intel-gfx] " Tvrtko Ursulin
2021-03-18 17:04 ` [PATCH 1/6] drm/i915: Individual request cancellation Tvrtko Ursulin
2021-03-18 17:04   ` [Intel-gfx] " Tvrtko Ursulin
2021-03-22 15:38   ` Matthew Auld
2021-03-22 15:38     ` Matthew Auld
2021-03-23  9:48     ` Tvrtko Ursulin [this message]
2021-03-23  9:48       ` Tvrtko Ursulin
2021-03-18 17:04 ` [PATCH 2/6] drm/i915: Restrict sentinel requests further Tvrtko Ursulin
2021-03-18 17:04   ` [Intel-gfx] " Tvrtko Ursulin
2021-03-22 17:12   ` Matthew Auld
2021-03-22 17:12     ` Matthew Auld
2021-03-23  9:09     ` Tvrtko Ursulin
2021-03-23  9:09       ` Tvrtko Ursulin
2021-03-18 17:04 ` [PATCH 3/6] drm/i915: Handle async cancellation in sentinel assert Tvrtko Ursulin
2021-03-18 17:04   ` [Intel-gfx] " Tvrtko Ursulin
2021-03-23 10:09   ` Matthew Auld
2021-03-23 10:09     ` Matthew Auld
2021-03-18 17:04 ` [PATCH 4/6] drm/i915: Request watchdog infrastructure Tvrtko Ursulin
2021-03-18 17:04   ` [Intel-gfx] " Tvrtko Ursulin
2021-03-22 13:29   ` [PATCH v3 " Tvrtko Ursulin
2021-03-22 13:29     ` [Intel-gfx] " Tvrtko Ursulin
2021-03-23 10:54     ` Matthew Auld
2021-03-23 10:54       ` Matthew Auld
2021-03-23 11:09       ` Tvrtko Ursulin
2021-03-23 11:09         ` Tvrtko Ursulin
2021-03-23 11:40         ` Matthew Auld
2021-03-23 11:40           ` Matthew Auld
2021-03-18 17:04 ` [PATCH 5/6] drm/i915: Fail too long user submissions by default Tvrtko Ursulin
2021-03-18 17:04   ` [Intel-gfx] " Tvrtko Ursulin
2021-03-23 15:56   ` Matthew Auld
2021-03-23 15:56     ` Matthew Auld
2021-03-18 17:04 ` [PATCH 6/6] drm/i915: Allow configuring default request expiry via modparam Tvrtko Ursulin
2021-03-18 17:04   ` [Intel-gfx] " Tvrtko Ursulin
2021-03-18 19:07 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Default request/fence expiry + watchdog (rev3) Patchwork
2021-03-18 19:36 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2021-03-19  1:17 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
2021-03-22 13:37   ` Tvrtko Ursulin
2021-03-22 13:37     ` [Intel-gfx] " Tvrtko Ursulin
2021-03-22 13:41     ` Daniel Vetter
2021-03-22 13:41       ` [Intel-gfx] " Daniel Vetter
2021-03-22 14:05 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Default request/fence expiry + watchdog (rev4) Patchwork
2021-03-22 14:33 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2021-03-16 16:23 [PATCH 0/6] Default request/fence expiry + watchdog Tvrtko Ursulin
2021-03-16 16:23 ` [Intel-gfx] [PATCH 1/6] drm/i915: Individual request cancellation Tvrtko Ursulin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=fdfde4a0-b748-bcd9-e427-080eeaef3bec@linux.intel.com \
    --to=tvrtko.ursulin@linux.intel.com \
    --cc=Intel-gfx@lists.freedesktop.org \
    --cc=chris@chris-wilson.co.uk \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=matthew.william.auld@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.