dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Matthew Auld <matthew.william.auld@gmail.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>,
	Intel Graphics Development <Intel-gfx@lists.freedesktop.org>,
	ML dri-devel <dri-devel@lists.freedesktop.org>
Subject: Re: [Intel-gfx] [PATCH v3 4/6] drm/i915: Request watchdog infrastructure
Date: Tue, 23 Mar 2021 11:09:42 +0000	[thread overview]
Message-ID: <a5feaa5f-3086-952d-8edf-c66c18ce95b7@linux.intel.com> (raw)
In-Reply-To: <CAM0jSHM=o3pdsuSTd2hFh6mRqCiJVPhXgjRMLLGT=N8Q+SAWwQ@mail.gmail.com>


On 23/03/2021 10:54, Matthew Auld wrote:
> On Mon, 22 Mar 2021 at 13:29, Tvrtko Ursulin
> <tvrtko.ursulin@linux.intel.com> wrote:
>>
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Prepares the plumbing for setting request/fence expiration time. All code
>> is put in place but is never activeted due yet missing ability to actually
> 
>                   activated
> 
>> configure the timer.
>>
>> Outline of the basic operation:
>>
>> A timer is started when request is ready for execution. If the request
>> completes (retires) before the timer fires, timer is cancelled and nothing
>> further happens.
>>
>> If the timer fires request is added to a lockless list and worker queued.
>> Purpose of this is twofold: a) It allows request cancellation from a more
>> friendly context and b) coalesces multiple expirations into a single event
>> of consuming the list.
>>
>> Worker locklessly consumes the list of expired requests and cancels them
>> all using previous added i915_request_cancel().
>>
>> Associated timeout value is stored in rq->context.watchdog.timeout_us.
>>
>> v2:
>>   * Log expiration.
>>
>> v3:
>>   * Include more information about user timeline in the log message.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>> ---
>>   drivers/gpu/drm/i915/gt/intel_context_types.h |  4 ++
>>   .../drm/i915/gt/intel_execlists_submission.h  |  2 +
>>   drivers/gpu/drm/i915/gt/intel_gt.c            |  3 +
>>   drivers/gpu/drm/i915/gt/intel_gt.h            |  2 +
>>   drivers/gpu/drm/i915/gt/intel_gt_requests.c   | 28 ++++++++++
>>   drivers/gpu/drm/i915/gt/intel_gt_types.h      |  7 +++
>>   drivers/gpu/drm/i915/i915_request.c           | 56 +++++++++++++++++++
>>   drivers/gpu/drm/i915/i915_request.h           |  8 +++
>>   8 files changed, 110 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
>> index 0ea18c9e2aca..65a5730a4f5b 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
>> @@ -99,6 +99,10 @@ struct intel_context {
>>   #define CONTEXT_FORCE_SINGLE_SUBMISSION        7
>>   #define CONTEXT_NOPREEMPT              8
>>
>> +       struct {
>> +               u64 timeout_us;
>> +       } watchdog;
>> +
>>          u32 *lrc_reg_state;
>>          union {
>>                  struct {
>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
>> index f7bd3fccfee8..4ca9b475e252 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
>> @@ -6,6 +6,7 @@
>>   #ifndef __INTEL_EXECLISTS_SUBMISSION_H__
>>   #define __INTEL_EXECLISTS_SUBMISSION_H__
>>
>> +#include <linux/llist.h>
>>   #include <linux/types.h>
>>
>>   struct drm_printer;
>> @@ -13,6 +14,7 @@ struct drm_printer;
>>   struct i915_request;
>>   struct intel_context;
>>   struct intel_engine_cs;
>> +struct intel_gt;
>>
>>   enum {
>>          INTEL_CONTEXT_SCHEDULE_IN = 0,
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
>> index ca76f93bc03d..8d77dcbad059 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
>> @@ -31,6 +31,9 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
>>          INIT_LIST_HEAD(&gt->closed_vma);
>>          spin_lock_init(&gt->closed_lock);
>>
>> +       init_llist_head(&gt->watchdog.list);
>> +       INIT_WORK(&gt->watchdog.work, intel_gt_watchdog_work);
>> +
>>          intel_gt_init_buffer_pool(gt);
>>          intel_gt_init_reset(gt);
>>          intel_gt_init_requests(gt);
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
>> index a17bd8b3195f..7ec395cace69 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gt.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_gt.h
>> @@ -78,4 +78,6 @@ static inline bool intel_gt_is_wedged(const struct intel_gt *gt)
>>   void intel_gt_info_print(const struct intel_gt_info *info,
>>                           struct drm_printer *p);
>>
>> +void intel_gt_watchdog_work(struct work_struct *work);
>> +
>>   #endif /* __INTEL_GT_H__ */
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
>> index 36ec97f79174..fbfd19b2e5f2 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
>> @@ -8,6 +8,7 @@
>>   #include "i915_drv.h" /* for_each_engine() */
>>   #include "i915_request.h"
>>   #include "intel_engine_heartbeat.h"
>> +#include "intel_execlists_submission.h"
>>   #include "intel_gt.h"
>>   #include "intel_gt_pm.h"
>>   #include "intel_gt_requests.h"
>> @@ -242,4 +243,31 @@ void intel_gt_fini_requests(struct intel_gt *gt)
>>   {
>>          /* Wait until the work is marked as finished before unloading! */
>>          cancel_delayed_work_sync(&gt->requests.retire_work);
>> +
>> +       flush_work(&gt->watchdog.work);
>> +}
>> +
>> +void intel_gt_watchdog_work(struct work_struct *work)
>> +{
>> +       struct intel_gt *gt =
>> +               container_of(work, typeof(*gt), watchdog.work);
>> +       struct i915_request *rq, *rn;
>> +       struct llist_node *first;
>> +
>> +       first = llist_del_all(&gt->watchdog.list);
>> +       if (!first)
>> +               return;
>> +
>> +       llist_for_each_entry_safe(rq, rn, first, watchdog.link) {
>> +               if (!i915_request_completed(rq)) {
>> +                       struct dma_fence *f = &rq->fence;
>> +
>> +                       pr_notice("Fence expiration time out i915-%s:%s:%llx!\n",
>> +                                 f->ops->get_driver_name(f),
>> +                                 f->ops->get_timeline_name(f),
>> +                                 f->seqno);
>> +                       i915_request_cancel(rq, -EINTR);
>> +               }
>> +               i915_request_put(rq);
>> +       }
>>   }
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>> index 626af37c7790..d70ebcc6f19f 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>> @@ -8,10 +8,12 @@
>>
>>   #include <linux/ktime.h>
>>   #include <linux/list.h>
>> +#include <linux/llist.h>
>>   #include <linux/mutex.h>
>>   #include <linux/notifier.h>
>>   #include <linux/spinlock.h>
>>   #include <linux/types.h>
>> +#include <linux/workqueue.h>
>>
>>   #include "uc/intel_uc.h"
>>
>> @@ -62,6 +64,11 @@ struct intel_gt {
>>                  struct delayed_work retire_work;
>>          } requests;
>>
>> +       struct {
>> +               struct llist_head list;
>> +               struct work_struct work;
>> +       } watchdog;
>> +
>>          struct intel_wakeref wakeref;
>>          atomic_t user_wakeref;
>>
>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>> index b4511ac05e9a..9dd5e588b0a4 100644
>> --- a/drivers/gpu/drm/i915/i915_request.c
>> +++ b/drivers/gpu/drm/i915/i915_request.c
>> @@ -277,6 +277,57 @@ static void remove_from_engine(struct i915_request *rq)
>>          __notify_execute_cb_imm(rq);
>>   }
>>
>> +static void __rq_init_watchdog(struct i915_request *rq)
>> +{
>> +       rq->watchdog.timer.function = NULL;
>> +}
>> +
>> +static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer)
>> +{
>> +       struct i915_request *rq =
>> +               container_of(hrtimer, struct i915_request, watchdog.timer);
>> +       struct intel_gt *gt = rq->engine->gt;
>> +
>> +       if (!i915_request_completed(rq)) {
>> +               if (llist_add(&rq->watchdog.link, &gt->watchdog.list))
>> +                       schedule_work(&gt->watchdog.work);
>> +       } else {
>> +               i915_request_put(rq);
>> +       }
>> +
>> +       return HRTIMER_NORESTART;
>> +}
>> +
>> +static void __rq_arm_watchdog(struct i915_request *rq)
>> +{
>> +       struct i915_request_watchdog *wdg = &rq->watchdog;
>> +       struct intel_context *ce = rq->context;
>> +
>> +       if (!ce->watchdog.timeout_us)
>> +               return;
>> +
>> +       hrtimer_init(&wdg->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>> +       wdg->timer.function = __rq_watchdog_expired;
>> +       hrtimer_start_range_ns(&wdg->timer,
>> +                              ns_to_ktime(ce->watchdog.timeout_us *
>> +                                          NSEC_PER_USEC),
>> +                               /*
>> +                                * FIXME check if it gives the "not sooner"
>> +                                * guarantee or slack is both ways
>> +                                */
> 
> It looks like the slack/fuzziness just delays the timer, in case it
> can coalesce multiple timer events. So shouldn't be sooner I think?

I couldn't quickly figure it out when I looked at the implementation so 
I left this comment. But it was only relevant at a time I thought we 
would be exposing context param to allow userspace control. With the 
only user being default expiry which is not sensitive to precision or 
accuracy, I simply need to remove this comment.

> 
>> +                               NSEC_PER_MSEC,
> 
> Formatting.

Which part? I think indentation/alignment is correct.

> 
> Reviewed-by: Matthew Auld <matthew.auld@intel.com>
> 

Thanks,

Tvrtko
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

  reply	other threads:[~2021-03-23 11:09 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-18 17:04 [PATCH v3 0/6] Default request/fence expiry + watchdog Tvrtko Ursulin
2021-03-18 17:04 ` [PATCH 1/6] drm/i915: Individual request cancellation Tvrtko Ursulin
2021-03-22 15:38   ` [Intel-gfx] " Matthew Auld
2021-03-23  9:48     ` Tvrtko Ursulin
2021-03-18 17:04 ` [PATCH 2/6] drm/i915: Restrict sentinel requests further Tvrtko Ursulin
2021-03-22 17:12   ` [Intel-gfx] " Matthew Auld
2021-03-23  9:09     ` Tvrtko Ursulin
2021-03-18 17:04 ` [PATCH 3/6] drm/i915: Handle async cancellation in sentinel assert Tvrtko Ursulin
2021-03-23 10:09   ` [Intel-gfx] " Matthew Auld
2021-03-18 17:04 ` [PATCH 4/6] drm/i915: Request watchdog infrastructure Tvrtko Ursulin
2021-03-22 13:29   ` [PATCH v3 " Tvrtko Ursulin
2021-03-23 10:54     ` [Intel-gfx] " Matthew Auld
2021-03-23 11:09       ` Tvrtko Ursulin [this message]
2021-03-23 11:40         ` Matthew Auld
2021-03-18 17:04 ` [PATCH 5/6] drm/i915: Fail too long user submissions by default Tvrtko Ursulin
2021-03-23 15:56   ` [Intel-gfx] " Matthew Auld
2021-03-18 17:04 ` [PATCH 6/6] drm/i915: Allow configuring default request expiry via modparam Tvrtko Ursulin
     [not found] ` <161611666102.8628.1124825882873170304@emeril.freedesktop.org>
2021-03-22 13:37   ` ✗ Fi.CI.IGT: failure for Default request/fence expiry + watchdog (rev3) Tvrtko Ursulin
2021-03-22 13:41     ` Daniel Vetter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a5feaa5f-3086-952d-8edf-c66c18ce95b7@linux.intel.com \
    --to=tvrtko.ursulin@linux.intel.com \
    --cc=Intel-gfx@lists.freedesktop.org \
    --cc=daniel.vetter@ffwll.ch \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=matthew.william.auld@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).