All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH] drm/i915/gt: Autotune idle timeouts
@ 2020-02-25 16:23 Chris Wilson
  2020-02-26 17:39 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
  2020-02-26 18:34 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
  0 siblings, 2 replies; 3+ messages in thread
From: Chris Wilson @ 2020-02-25 16:23 UTC (permalink / raw)
  To: intel-gfx

As we measure how long it takes for each heartbeat when idling the
system, we have a reasonable expectation for the baseline latency when
idling. We can use this baseline to estimate how long we expect it
should take to idle, and so provide a more precise upper bound for
declaring a problem.

References: b81e4d9b5941 ("drm/i915/gt: Track engine round-trip times")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Stuart Summers <stuart.summers@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gt.c            |  4 +--
 drivers/gpu/drm/i915/gt/intel_gt_pm.c         |  2 +-
 drivers/gpu/drm/i915/gt/intel_gt_requests.c   | 27 +++++++++++++++++++
 drivers/gpu/drm/i915/gt/selftest_rc6.c        |  2 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/i915_gem.h               |  2 --
 .../gpu/drm/i915/selftests/igt_flush_test.c   |  2 +-
 7 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 3dea8881e915..4057ac4d350a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -440,7 +440,7 @@ static int __engines_record_defaults(struct intel_gt *gt)
 	}
 
 	/* Flush the default context image to memory, and enable powersaving. */
-	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+	if (intel_gt_wait_for_idle(gt, 10) == -ETIME) {
 		err = -EIO;
 		goto out;
 	}
@@ -543,7 +543,7 @@ static int __engines_verify_workarounds(struct intel_gt *gt)
 	}
 
 	/* Flush and restore the kernel context for safety */
-	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME)
+	if (intel_gt_wait_for_idle(gt, 10) == -ETIME)
 		err = -EIO;
 
 	return err;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 8b653c0f5e5f..202550c06139 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -248,7 +248,7 @@ static void wait_for_suspend(struct intel_gt *gt)
 	if (!intel_gt_pm_is_awake(gt))
 		return;
 
-	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+	if (intel_gt_wait_for_idle(gt, 10) == -ETIME) {
 		/*
 		 * Forcibly cancel outstanding work and leave
 		 * the gpu quiet.
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index 8a5054f21bf8..5c9797cb3d0b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -180,12 +180,39 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
 	return active_count ? timeout : 0;
 }
 
+static long
+intel_gt_timeout(struct intel_gt *gt, intel_engine_mask_t mask, int factor)
+{
+	struct intel_engine_cs *engine;
+	intel_engine_mask_t tmp;
+	unsigned long max;
+
+	max = 0;
+	for_each_engine_masked(engine, gt, mask, tmp) {
+		unsigned long latency;
+
+		latency = ewma__engine_latency_read(&engine->latency);
+		if (latency > max)
+			max = latency;
+
+		factor++; /* allow each engine to flush pm sequentially */
+	}
+	if (max == 0) /* no latency measured yet */
+		return MAX_SCHEDULE_TIMEOUT;
+
+	return usecs_to_jiffies(max * factor) + 1;
+}
+
 int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
 {
 	/* If the device is asleep, we have no requests outstanding */
 	if (!intel_gt_pm_is_awake(gt))
 		return 0;
 
+	/* Adjust our expected jiffie timeout based on historical latency */
+	if (timeout < MAX_SCHEDULE_TIMEOUT)
+		timeout = intel_gt_timeout(gt, ALL_ENGINES, timeout);
+
 	while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) {
 		cond_resched();
 		if (signal_pending(current))
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c
index 5f7e2dcf5686..70d040b39685 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rc6.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
@@ -176,7 +176,7 @@ int live_rc6_ctx_wa(void *arg)
 				goto out;
 			}
 
-			if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) {
+			if (intel_gt_wait_for_idle(gt, 2) == -ETIME) {
 				intel_gt_set_wedged(gt);
 				err = -ETIME;
 				goto out;
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index c2578a0f2f14..31f6ca3e6d76 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -789,7 +789,7 @@ static int live_hwsp_rollover_kernel(void *arg)
 		int i;
 
 		engine_heartbeat_disable(engine, &heartbeat);
-		if (intel_gt_wait_for_idle(gt, HZ / 2)) {
+		if (intel_gt_wait_for_idle(gt, 5)) {
 			err = -EIO;
 			goto out;
 		}
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 1753c84d6c0d..51c64ae7833f 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -82,8 +82,6 @@ struct drm_i915_private;
 #define GEM_TRACE_DUMP_ON(expr) BUILD_BUG_ON_INVALID(expr)
 #endif
 
-#define I915_GEM_IDLE_TIMEOUT (HZ / 5)
-
 static inline void tasklet_lock(struct tasklet_struct *t)
 {
 	while (!tasklet_trylock(t))
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index 7b0939e3f007..44700f29a8e7 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -19,7 +19,7 @@ int igt_flush_test(struct drm_i915_private *i915)
 
 	cond_resched();
 
-	if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) {
+	if (intel_gt_wait_for_idle(gt, 5) == -ETIME) {
 		pr_err("%pS timed out, cancelling all further testing.\n",
 		       __builtin_return_address(0));
 
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915/gt: Autotune idle timeouts
  2020-02-25 16:23 [Intel-gfx] [PATCH] drm/i915/gt: Autotune idle timeouts Chris Wilson
@ 2020-02-26 17:39 ` Patchwork
  2020-02-26 18:34 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
  1 sibling, 0 replies; 3+ messages in thread
From: Patchwork @ 2020-02-26 17:39 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gt: Autotune idle timeouts
URL   : https://patchwork.freedesktop.org/series/73919/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
251374a10d4d drm/i915/gt: Autotune idle timeouts
-:12: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit b81e4d9b5941 ("drm/i915/gt: Track engine round-trip times")'
#12: 
References: b81e4d9b5941 ("drm/i915/gt: Track engine round-trip times")

total: 1 errors, 0 warnings, 0 checks, 95 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915/gt: Autotune idle timeouts
  2020-02-25 16:23 [Intel-gfx] [PATCH] drm/i915/gt: Autotune idle timeouts Chris Wilson
  2020-02-26 17:39 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
@ 2020-02-26 18:34 ` Patchwork
  1 sibling, 0 replies; 3+ messages in thread
From: Patchwork @ 2020-02-26 18:34 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/gt: Autotune idle timeouts
URL   : https://patchwork.freedesktop.org/series/73919/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_8008 -> Patchwork_16709
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_16709 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_16709, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16709/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_16709:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live_gt_lrc:
    - fi-bwr-2160:        [PASS][1] -> [FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8008/fi-bwr-2160/igt@i915_selftest@live_gt_lrc.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16709/fi-bwr-2160/igt@i915_selftest@live_gt_lrc.html

  
Known issues
------------

  Here are the changes found in Patchwork_16709 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_mmap@basic:
    - fi-tgl-y:           [PASS][3] -> [DMESG-WARN][4] ([CI#94] / [i915#402])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8008/fi-tgl-y/igt@gem_mmap@basic.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16709/fi-tgl-y/igt@gem_mmap@basic.html

  * igt@i915_selftest@live_gem_contexts:
    - fi-tgl-y:           [PASS][5] -> [INCOMPLETE][6] ([CI#94] / [i915#455])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8008/fi-tgl-y/igt@i915_selftest@live_gem_contexts.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16709/fi-tgl-y/igt@i915_selftest@live_gem_contexts.html

  
#### Possible fixes ####

  * igt@i915_selftest@live_execlists:
    - fi-icl-y:           [DMESG-FAIL][7] ([fdo#108569]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8008/fi-icl-y/igt@i915_selftest@live_execlists.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16709/fi-icl-y/igt@i915_selftest@live_execlists.html

  * igt@kms_addfb_basic@bad-pitch-0:
    - fi-tgl-y:           [DMESG-WARN][9] ([CI#94] / [i915#402]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8008/fi-tgl-y/igt@kms_addfb_basic@bad-pitch-0.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16709/fi-tgl-y/igt@kms_addfb_basic@bad-pitch-0.html

  
#### Warnings ####

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-kbl-7500u:       [FAIL][11] ([fdo#111407]) -> [FAIL][12] ([fdo#111096] / [i915#323])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8008/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16709/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [CI#94]: https://gitlab.freedesktop.org/gfx-ci/i915-infra/issues/94
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#111096]: https://bugs.freedesktop.org/show_bug.cgi?id=111096
  [fdo#111407]: https://bugs.freedesktop.org/show_bug.cgi?id=111407
  [i915#1233]: https://gitlab.freedesktop.org/drm/intel/issues/1233
  [i915#323]: https://gitlab.freedesktop.org/drm/intel/issues/323
  [i915#402]: https://gitlab.freedesktop.org/drm/intel/issues/402
  [i915#455]: https://gitlab.freedesktop.org/drm/intel/issues/455


Participating hosts (52 -> 41)
------------------------------

  Additional (1): fi-byt-n2820 
  Missing    (12): fi-ilk-m540 fi-hsw-4200u fi-skl-6770hq fi-byt-squawks fi-bsw-cyan fi-snb-2520m fi-ctg-p8600 fi-cfl-8109u fi-skl-lmem fi-byt-clapper fi-bdw-samus fi-snb-2600 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8008 -> Patchwork_16709

  CI-20190529: 20190529
  CI_DRM_8008: 13b6e2575f2c05722679bc1c9d0b97c13bde49a1 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5469: 4f875016eb1ebc211b8aadb280ae16c7e6cdc8ba @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_16709: 251374a10d4da8da6dc216851ce57cf13c0fe6c0 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

251374a10d4d drm/i915/gt: Autotune idle timeouts

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16709/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-02-26 18:34 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-25 16:23 [Intel-gfx] [PATCH] drm/i915/gt: Autotune idle timeouts Chris Wilson
2020-02-26 17:39 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
2020-02-26 18:34 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.