All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915/pmu: Do not assume fixed hrtimer period
@ 2018-06-04 12:52 Tvrtko Ursulin
  2018-06-04 12:59 ` Chris Wilson
                   ` (4 more replies)
  0 siblings, 5 replies; 13+ messages in thread
From: Tvrtko Ursulin @ 2018-06-04 12:52 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

As Chris has discovered on his Ivybridge, and later automated test runs
have confirmed, on most of our platforms hrtimer faced with heavy GPU load
ca occasionally become sufficiently imprecise to affect PMU sampling
calculations.

This means we cannot assume sampling frequency is what we asked for, but
we need to measure the interval ourselves.

This patch is similar to Chris' original proposal for per-engine counters,
but instead of introducing a new set to work around the problem with
frequency sampling, it swaps around the way internal frequency accounting
is done. Instead of accumulating current frequency and dividing by
sampling frequency on readout, it accumulates frequency scaled by each
period.

Testcase: igt/perf_pmu/*busy* # snb, ivb, hsw
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_pmu.c | 61 ++++++++++++++++++++++-----------
 drivers/gpu/drm/i915/i915_pmu.h |  8 +++++
 2 files changed, 49 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index dc87797db500..ff3e3f865567 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -127,6 +127,7 @@ static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
 {
 	if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
 		i915->pmu.timer_enabled = true;
+		i915->pmu.timer_last = ktime_get();
 		hrtimer_start_range_ns(&i915->pmu.timer,
 				       ns_to_ktime(PERIOD), 0,
 				       HRTIMER_MODE_REL_PINNED);
@@ -155,12 +156,13 @@ static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
 }
 
 static void
-update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
+add_sample(struct i915_pmu_sample *sample, u32 val)
 {
-	sample->cur += mul_u32_u32(val, unit);
+	sample->cur += val;
 }
 
-static void engines_sample(struct drm_i915_private *dev_priv)
+static void
+engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
@@ -182,8 +184,9 @@ static void engines_sample(struct drm_i915_private *dev_priv)
 
 		val = !i915_seqno_passed(current_seqno, last_seqno);
 
-		update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
-			      PERIOD, val);
+		if (val)
+			add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
+				   period_ns);
 
 		if (val && (engine->pmu.enable &
 		    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
@@ -194,11 +197,13 @@ static void engines_sample(struct drm_i915_private *dev_priv)
 			val = 0;
 		}
 
-		update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
-			      PERIOD, !!(val & RING_WAIT));
+		if (val & RING_WAIT)
+			add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
+				   period_ns);
 
-		update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
-			      PERIOD, !!(val & RING_WAIT_SEMAPHORE));
+		if (val & RING_WAIT_SEMAPHORE)
+			add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
+				   period_ns);
 	}
 
 	if (fw)
@@ -207,7 +212,14 @@ static void engines_sample(struct drm_i915_private *dev_priv)
 	intel_runtime_pm_put(dev_priv);
 }
 
-static void frequency_sample(struct drm_i915_private *dev_priv)
+static void
+add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
+{
+	sample->cur += mul_u32_u32(val, mul);
+}
+
+static void
+frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 {
 	if (dev_priv->pmu.enable &
 	    config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
@@ -221,15 +233,17 @@ static void frequency_sample(struct drm_i915_private *dev_priv)
 			intel_runtime_pm_put(dev_priv);
 		}
 
-		update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
-			      1, intel_gpu_freq(dev_priv, val));
+		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
+			        intel_gpu_freq(dev_priv, val),
+				period_ns / 1000);
 	}
 
 	if (dev_priv->pmu.enable &
 	    config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
-		update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1,
-			      intel_gpu_freq(dev_priv,
-					     dev_priv->gt_pm.rps.cur_freq));
+		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ],
+				intel_gpu_freq(dev_priv,
+					       dev_priv->gt_pm.rps.cur_freq),
+				period_ns / 1000);
 	}
 }
 
@@ -237,14 +251,21 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
 {
 	struct drm_i915_private *i915 =
 		container_of(hrtimer, struct drm_i915_private, pmu.timer);
+	unsigned int period_ns;
+	ktime_t now;
 
 	if (!READ_ONCE(i915->pmu.timer_enabled))
 		return HRTIMER_NORESTART;
 
-	engines_sample(i915);
-	frequency_sample(i915);
+	now = ktime_get();
+	period_ns = ktime_to_ns(ktime_sub(now, i915->pmu.timer_last));
+	i915->pmu.timer_last = now;
+
+	engines_sample(i915, period_ns);
+	frequency_sample(i915, period_ns);
+
+	hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
 
-	hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
 	return HRTIMER_RESTART;
 }
 
@@ -519,12 +540,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
 		case I915_PMU_ACTUAL_FREQUENCY:
 			val =
 			   div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur,
-				   FREQUENCY);
+				   1000000 /* to MHz */);
 			break;
 		case I915_PMU_REQUESTED_FREQUENCY:
 			val =
 			   div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
-				   FREQUENCY);
+				   1000000 /* to MHz */);
 			break;
 		case I915_PMU_INTERRUPTS:
 			val = count_interrupts(i915);
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 2ba735299f7c..7f164ca3db12 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -65,6 +65,14 @@ struct i915_pmu {
 	 * event types.
 	 */
 	u64 enable;
+
+	/**
+	 * @timer_last:
+	 *
+	 * Timestmap of the previous timer invocation.
+	 */
+	ktime_t timer_last;
+
 	/**
 	 * @enable_count: Reference counts for the enabled events.
 	 *
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915/pmu: Do not assume fixed hrtimer period
  2018-06-04 12:52 [PATCH] drm/i915/pmu: Do not assume fixed hrtimer period Tvrtko Ursulin
@ 2018-06-04 12:59 ` Chris Wilson
  2018-06-04 13:03   ` Chris Wilson
  2018-06-04 13:05 ` ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 13+ messages in thread
From: Chris Wilson @ 2018-06-04 12:59 UTC (permalink / raw)
  To: Tvrtko Ursulin, Intel-gfx

Quoting Tvrtko Ursulin (2018-06-04 13:52:39)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> As Chris has discovered on his Ivybridge, and later automated test runs
> have confirmed, on most of our platforms hrtimer faced with heavy GPU load
> ca occasionally become sufficiently imprecise to affect PMU sampling

s/ca/can/

> calculations.
> 
> This means we cannot assume sampling frequency is what we asked for, but
> we need to measure the interval ourselves.
> 
> This patch is similar to Chris' original proposal for per-engine counters,
> but instead of introducing a new set to work around the problem with
> frequency sampling, it swaps around the way internal frequency accounting
> is done. Instead of accumulating current frequency and dividing by
> sampling frequency on readout, it accumulates frequency scaled by each
> period.

My ulterior motive failed to win favour ;)
 
> Testcase: igt/perf_pmu/*busy* # snb, ivb, hsw
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

> @@ -237,14 +251,21 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
>  {
>         struct drm_i915_private *i915 =
>                 container_of(hrtimer, struct drm_i915_private, pmu.timer);
> +       unsigned int period_ns;
> +       ktime_t now;
>  
>         if (!READ_ONCE(i915->pmu.timer_enabled))
>                 return HRTIMER_NORESTART;
>  
> -       engines_sample(i915);
> -       frequency_sample(i915);
> +       now = ktime_get();
> +       period_ns = ktime_to_ns(ktime_sub(now, i915->pmu.timer_last));
> +       i915->pmu.timer_last = now;

Maybe worth a comment about the lag coming from the timer outweighs our
concern with using a single timestamp over multiple samplers, with
indeterminate delays due to forcewake.

> +       engines_sample(i915, period_ns);
> +       frequency_sample(i915, period_ns);
> +
> +       hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
>  
> -       hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
>         return HRTIMER_RESTART;
>  }
>  
> @@ -519,12 +540,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
>                 case I915_PMU_ACTUAL_FREQUENCY:
>                         val =
>                            div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur,
> -                                  FREQUENCY);
> +                                  1000000 /* to MHz */);

USEC_PER_SECS /* to MHz */

Storage in sample[] is MHz.usec, so here we cancel out the usec to leave
MHz.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915/pmu: Do not assume fixed hrtimer period
  2018-06-04 12:59 ` Chris Wilson
@ 2018-06-04 13:03   ` Chris Wilson
  2018-06-04 15:01     ` Tvrtko Ursulin
  0 siblings, 1 reply; 13+ messages in thread
From: Chris Wilson @ 2018-06-04 13:03 UTC (permalink / raw)
  To: Tvrtko Ursulin, Intel-gfx

Quoting Chris Wilson (2018-06-04 13:59:12)
> Quoting Tvrtko Ursulin (2018-06-04 13:52:39)
> > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > 
> > As Chris has discovered on his Ivybridge, and later automated test runs
> > have confirmed, on most of our platforms hrtimer faced with heavy GPU load
> > ca occasionally become sufficiently imprecise to affect PMU sampling
> 
> s/ca/can/
> 
> > calculations.
> > 
> > This means we cannot assume sampling frequency is what we asked for, but
> > we need to measure the interval ourselves.
> > 
> > This patch is similar to Chris' original proposal for per-engine counters,
> > but instead of introducing a new set to work around the problem with
> > frequency sampling, it swaps around the way internal frequency accounting
> > is done. Instead of accumulating current frequency and dividing by
> > sampling frequency on readout, it accumulates frequency scaled by each
> > period.
> 
> My ulterior motive failed to win favour ;)
>  
> > Testcase: igt/perf_pmu/*busy* # snb, ivb, hsw
> > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

I should point out that even with this fix (or rather my patch), we can
still see errors of 25-30us, enough to fail the test. However, without
the fix our errors can be orders of magnitude worse (e.g. reporting 80us
busy instead of 500us).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for drm/i915/pmu: Do not assume fixed hrtimer period
  2018-06-04 12:52 [PATCH] drm/i915/pmu: Do not assume fixed hrtimer period Tvrtko Ursulin
  2018-06-04 12:59 ` Chris Wilson
@ 2018-06-04 13:05 ` Patchwork
  2018-06-04 13:26 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 13+ messages in thread
From: Patchwork @ 2018-06-04 13:05 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/pmu: Do not assume fixed hrtimer period
URL   : https://patchwork.freedesktop.org/series/44191/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
9461f26f5ca1 drm/i915/pmu: Do not assume fixed hrtimer period
-:109: ERROR:CODE_INDENT: code indent should use tabs where possible
#109: FILE: drivers/gpu/drm/i915/i915_pmu.c:237:
+^I^I^I        intel_gpu_freq(dev_priv, val),$

-:109: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#109: FILE: drivers/gpu/drm/i915/i915_pmu.c:237:
+		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
+			        intel_gpu_freq(dev_priv, val),

total: 1 errors, 0 warnings, 1 checks, 140 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/pmu: Do not assume fixed hrtimer period
  2018-06-04 12:52 [PATCH] drm/i915/pmu: Do not assume fixed hrtimer period Tvrtko Ursulin
  2018-06-04 12:59 ` Chris Wilson
  2018-06-04 13:05 ` ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
@ 2018-06-04 13:26 ` Patchwork
  2018-06-04 14:17 ` ✓ Fi.CI.IGT: " Patchwork
  2018-06-05 10:57 ` ✗ Fi.CI.BAT: failure for drm/i915/pmu: Do not assume fixed hrtimer period (rev2) Patchwork
  4 siblings, 0 replies; 13+ messages in thread
From: Patchwork @ 2018-06-04 13:26 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/pmu: Do not assume fixed hrtimer period
URL   : https://patchwork.freedesktop.org/series/44191/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4275 -> Patchwork_9184 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/44191/revisions/1/mbox/

== Known issues ==

  Here are the changes found in Patchwork_9184 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@gem_exec_suspend@basic-s3:
      fi-skl-gvtdvm:      NOTRUN -> INCOMPLETE (fdo#105600, fdo#104108)

    
    ==== Possible fixes ====

    igt@debugfs_test@read_all_entries:
      fi-bdw-gvtdvm:      DMESG-WARN (fdo#105600) -> PASS +2

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
      fi-cnl-psr:         DMESG-WARN (fdo#104951) -> PASS

    
  fdo#104108 https://bugs.freedesktop.org/show_bug.cgi?id=104108
  fdo#104951 https://bugs.freedesktop.org/show_bug.cgi?id=104951
  fdo#105600 https://bugs.freedesktop.org/show_bug.cgi?id=105600


== Participating hosts (40 -> 39) ==

  Additional (3): fi-hsw-peppy fi-skl-guc fi-skl-gvtdvm 
  Missing    (4): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-skl-6700hq 


== Build changes ==

    * Linux: CI_DRM_4275 -> Patchwork_9184

  CI_DRM_4275: 8fdb62e0511e81fa935059c274a2457361fdb679 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4505: 8a8f0271a71e2e0d2a2caa4d41f4ad1d9c89670e @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9184: 9461f26f5ca17e53d311f5c2a1e823e99586c8fd @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

9461f26f5ca1 drm/i915/pmu: Do not assume fixed hrtimer period

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9184/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* ✓ Fi.CI.IGT: success for drm/i915/pmu: Do not assume fixed hrtimer period
  2018-06-04 12:52 [PATCH] drm/i915/pmu: Do not assume fixed hrtimer period Tvrtko Ursulin
                   ` (2 preceding siblings ...)
  2018-06-04 13:26 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-06-04 14:17 ` Patchwork
  2018-06-05 10:57 ` ✗ Fi.CI.BAT: failure for drm/i915/pmu: Do not assume fixed hrtimer period (rev2) Patchwork
  4 siblings, 0 replies; 13+ messages in thread
From: Patchwork @ 2018-06-04 14:17 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/pmu: Do not assume fixed hrtimer period
URL   : https://patchwork.freedesktop.org/series/44191/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4275_full -> Patchwork_9184_full =

== Summary - WARNING ==

  Minor unknown changes coming with Patchwork_9184_full need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_9184_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/44191/revisions/1/mbox/

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_9184_full:

  === IGT changes ===

    ==== Warnings ====

    igt@gem_mocs_settings@mocs-rc6-vebox:
      shard-kbl:          PASS -> SKIP +1

    
== Known issues ==

  Here are the changes found in Patchwork_9184_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@drv_selftest@live_hangcheck:
      shard-apl:          PASS -> DMESG-FAIL (fdo#106560)

    igt@kms_cursor_legacy@2x-long-cursor-vs-flip-atomic:
      shard-hsw:          PASS -> FAIL (fdo#105767)

    igt@kms_cursor_legacy@2x-nonblocking-modeset-vs-cursor-atomic:
      shard-glk:          PASS -> FAIL (fdo#105454, fdo#106509)

    igt@kms_flip@2x-dpms-vs-vblank-race:
      shard-hsw:          PASS -> FAIL (fdo#103060)

    igt@kms_flip@flip-vs-panning-vs-hang:
      shard-snb:          PASS -> DMESG-WARN (fdo#103821)

    igt@kms_flip_tiling@flip-x-tiled:
      shard-glk:          PASS -> FAIL (fdo#103822, fdo#104724)

    igt@kms_rotation_crc@sprite-rotation-180:
      shard-hsw:          PASS -> FAIL (fdo#103925, fdo#104724)

    igt@kms_setmode@basic:
      shard-apl:          PASS -> FAIL (fdo#99912)

    
    ==== Possible fixes ====

    igt@drv_selftest@live_gtt:
      shard-glk:          INCOMPLETE (k.org#198133, fdo#103359) -> PASS

    igt@kms_flip@basic-flip-vs-wf_vblank:
      shard-hsw:          FAIL (fdo#103928) -> PASS

    igt@kms_flip@plain-flip-ts-check:
      shard-hsw:          FAIL (fdo#100368) -> PASS
      shard-glk:          FAIL (fdo#100368) -> PASS

    igt@kms_flip_tiling@flip-y-tiled:
      shard-glk:          FAIL (fdo#103822, fdo#104724) -> PASS

    
    ==== Warnings ====

    igt@gem_eio@suspend:
      shard-snb:          DMESG-FAIL (fdo#106808) -> INCOMPLETE (fdo#105411)

    
  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
  fdo#103359 https://bugs.freedesktop.org/show_bug.cgi?id=103359
  fdo#103821 https://bugs.freedesktop.org/show_bug.cgi?id=103821
  fdo#103822 https://bugs.freedesktop.org/show_bug.cgi?id=103822
  fdo#103925 https://bugs.freedesktop.org/show_bug.cgi?id=103925
  fdo#103928 https://bugs.freedesktop.org/show_bug.cgi?id=103928
  fdo#104724 https://bugs.freedesktop.org/show_bug.cgi?id=104724
  fdo#105411 https://bugs.freedesktop.org/show_bug.cgi?id=105411
  fdo#105454 https://bugs.freedesktop.org/show_bug.cgi?id=105454
  fdo#105767 https://bugs.freedesktop.org/show_bug.cgi?id=105767
  fdo#106509 https://bugs.freedesktop.org/show_bug.cgi?id=106509
  fdo#106560 https://bugs.freedesktop.org/show_bug.cgi?id=106560
  fdo#106808 https://bugs.freedesktop.org/show_bug.cgi?id=106808
  fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912
  k.org#198133 https://bugzilla.kernel.org/show_bug.cgi?id=198133


== Participating hosts (5 -> 5) ==

  No changes in participating hosts


== Build changes ==

    * Linux: CI_DRM_4275 -> Patchwork_9184

  CI_DRM_4275: 8fdb62e0511e81fa935059c274a2457361fdb679 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4505: 8a8f0271a71e2e0d2a2caa4d41f4ad1d9c89670e @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9184: 9461f26f5ca17e53d311f5c2a1e823e99586c8fd @ git://anongit.freedesktop.org/gfx-ci/linux

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9184/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915/pmu: Do not assume fixed hrtimer period
  2018-06-04 13:03   ` Chris Wilson
@ 2018-06-04 15:01     ` Tvrtko Ursulin
  2018-06-04 15:11       ` Chris Wilson
  0 siblings, 1 reply; 13+ messages in thread
From: Tvrtko Ursulin @ 2018-06-04 15:01 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, Intel-gfx


On 04/06/2018 14:03, Chris Wilson wrote:
> Quoting Chris Wilson (2018-06-04 13:59:12)
>> Quoting Tvrtko Ursulin (2018-06-04 13:52:39)
>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>
>>> As Chris has discovered on his Ivybridge, and later automated test runs
>>> have confirmed, on most of our platforms hrtimer faced with heavy GPU load
>>> ca occasionally become sufficiently imprecise to affect PMU sampling
>>
>> s/ca/can/
>>
>>> calculations.
>>>
>>> This means we cannot assume sampling frequency is what we asked for, but
>>> we need to measure the interval ourselves.
>>>
>>> This patch is similar to Chris' original proposal for per-engine counters,
>>> but instead of introducing a new set to work around the problem with
>>> frequency sampling, it swaps around the way internal frequency accounting
>>> is done. Instead of accumulating current frequency and dividing by
>>> sampling frequency on readout, it accumulates frequency scaled by each
>>> period.
>>
>> My ulterior motive failed to win favour ;)
>>   
>>> Testcase: igt/perf_pmu/*busy* # snb, ivb, hsw
>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> I should point out that even with this fix (or rather my patch), we can

I did not mean to steal it, just that you seemed very uncompromising on 
the new counters approach. If you prefer you can respin your patch with 
this approach for frequency counters, it is fine by me.

> still see errors of 25-30us, enough to fail the test. However, without
> the fix our errors can be orders of magnitude worse (e.g. reporting 80us
> busy instead of 500us).

Yeah I guess if the timer is delayed it is delayed and all samples just 
won't be there. I don't see a way to fix that elegantly. Even practically.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915/pmu: Do not assume fixed hrtimer period
  2018-06-04 15:01     ` Tvrtko Ursulin
@ 2018-06-04 15:11       ` Chris Wilson
  2018-06-05  8:51         ` Tvrtko Ursulin
  0 siblings, 1 reply; 13+ messages in thread
From: Chris Wilson @ 2018-06-04 15:11 UTC (permalink / raw)
  To: Tvrtko Ursulin, Tvrtko Ursulin, Intel-gfx

Quoting Tvrtko Ursulin (2018-06-04 16:01:26)
> 
> On 04/06/2018 14:03, Chris Wilson wrote:
> > Quoting Chris Wilson (2018-06-04 13:59:12)
> >> Quoting Tvrtko Ursulin (2018-06-04 13:52:39)
> >>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>
> >>> As Chris has discovered on his Ivybridge, and later automated test runs
> >>> have confirmed, on most of our platforms hrtimer faced with heavy GPU load
> >>> ca occasionally become sufficiently imprecise to affect PMU sampling
> >>
> >> s/ca/can/
> >>
> >>> calculations.
> >>>
> >>> This means we cannot assume sampling frequency is what we asked for, but
> >>> we need to measure the interval ourselves.
> >>>
> >>> This patch is similar to Chris' original proposal for per-engine counters,
> >>> but instead of introducing a new set to work around the problem with
> >>> frequency sampling, it swaps around the way internal frequency accounting
> >>> is done. Instead of accumulating current frequency and dividing by
> >>> sampling frequency on readout, it accumulates frequency scaled by each
> >>> period.
> >>
> >> My ulterior motive failed to win favour ;)
> >>   
> >>> Testcase: igt/perf_pmu/*busy* # snb, ivb, hsw
> >>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> > 
> > I should point out that even with this fix (or rather my patch), we can
> 
> I did not mean to steal it,

Don't mind, I view such patches as "this here is a problem, and what I
think can be done". I'm quite happy to be told "nope, the problem
is..."; the end result is we fix and move on.

> just that you seemed very uncompromising on 
> the new counters approach. If you prefer you can respin your patch with 
> this approach for frequency counters, it is fine by me.

I'm just not convinced yet by the frequency sampler, and I still feel
that we would be better off with cycles. I just haven't found a
convincing argument ;)
 
> > still see errors of 25-30us, enough to fail the test. However, without
> > the fix our errors can be orders of magnitude worse (e.g. reporting 80us
> > busy instead of 500us).
> 
> Yeah I guess if the timer is delayed it is delayed and all samples just 
> won't be there. I don't see a way to fix that elegantly. Even practically.

Right, but it's not the case that we aren't sampling. If the timer was
delayed entirely, then we would see 0 (start_val == end_val). I'm not
sure exactly what goes wrong, but it probably is related to the timer
being unreliable. (It's just that in this case we are sampling a square
wave, so really hard to mess up!)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915/pmu: Do not assume fixed hrtimer period
  2018-06-04 15:11       ` Chris Wilson
@ 2018-06-05  8:51         ` Tvrtko Ursulin
  2018-06-05  9:06           ` Chris Wilson
  0 siblings, 1 reply; 13+ messages in thread
From: Tvrtko Ursulin @ 2018-06-05  8:51 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, Intel-gfx


On 04/06/2018 16:11, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-06-04 16:01:26)
>>
>> On 04/06/2018 14:03, Chris Wilson wrote:
>>> Quoting Chris Wilson (2018-06-04 13:59:12)
>>>> Quoting Tvrtko Ursulin (2018-06-04 13:52:39)
>>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>
>>>>> As Chris has discovered on his Ivybridge, and later automated test runs
>>>>> have confirmed, on most of our platforms hrtimer faced with heavy GPU load
>>>>> ca occasionally become sufficiently imprecise to affect PMU sampling
>>>>
>>>> s/ca/can/
>>>>
>>>>> calculations.
>>>>>
>>>>> This means we cannot assume sampling frequency is what we asked for, but
>>>>> we need to measure the interval ourselves.
>>>>>
>>>>> This patch is similar to Chris' original proposal for per-engine counters,
>>>>> but instead of introducing a new set to work around the problem with
>>>>> frequency sampling, it swaps around the way internal frequency accounting
>>>>> is done. Instead of accumulating current frequency and dividing by
>>>>> sampling frequency on readout, it accumulates frequency scaled by each
>>>>> period.
>>>>
>>>> My ulterior motive failed to win favour ;)
>>>>    
>>>>> Testcase: igt/perf_pmu/*busy* # snb, ivb, hsw
>>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>
>>> I should point out that even with this fix (or rather my patch), we can
>>
>> I did not mean to steal it,
> 
> Don't mind, I view such patches as "this here is a problem, and what I
> think can be done". I'm quite happy to be told "nope, the problem
> is..."; the end result is we fix and move on.
> 
>> just that you seemed very uncompromising on
>> the new counters approach. If you prefer you can respin your patch with
>> this approach for frequency counters, it is fine by me.
> 
> I'm just not convinced yet by the frequency sampler, and I still feel
> that we would be better off with cycles. I just haven't found a
> convincing argument ;)

Just imagine .unit file has Mcycles instead of MHz in it? :)

Since we went with this when adding it initially I think we should 
exercise restrain with deprecating and adding so quickly.

>>> still see errors of 25-30us, enough to fail the test. However, without
>>> the fix our errors can be orders of magnitude worse (e.g. reporting 80us
>>> busy instead of 500us).
>>
>> Yeah I guess if the timer is delayed it is delayed and all samples just
>> won't be there. I don't see a way to fix that elegantly. Even practically.
> 
> Right, but it's not the case that we aren't sampling. If the timer was
> delayed entirely, then we would see 0 (start_val == end_val). I'm not
> sure exactly what goes wrong, but it probably is related to the timer
> being unreliable. (It's just that in this case we are sampling a square
> wave, so really hard to mess up!)

Hm maybe I am not completely understanding what you are seeing. I 
thought that multiple timer overruns (aka delay by multiple periods) 
explains everything.

Then even with this patch, if they happen to happen (!) at the end of a 
sampling period (as observed from userspace), the large-ish chunk of 
samples (depending on the total sampling duration) may be missing 
(pending), and so still observed as fail.

Difference after this patch is that when the timer eventually fires the 
counter will account for the delay, so subsequent queries will be fine.

If readout waited for at least one real sampling tick, that would be 
solved, but by several other undesirable consequences. At least it would 
slow down readout to 200Hz, but I don't know from the top of my head if 
it wouldn't cause unresolvable deadlock scenarios, or if it is even 
technically possible within the perf framework. (Since I don't think we 
should do it, I don't even want to start thinking about it.)

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915/pmu: Do not assume fixed hrtimer period
  2018-06-05  8:51         ` Tvrtko Ursulin
@ 2018-06-05  9:06           ` Chris Wilson
  2018-06-05 11:35             ` Tvrtko Ursulin
  0 siblings, 1 reply; 13+ messages in thread
From: Chris Wilson @ 2018-06-05  9:06 UTC (permalink / raw)
  To: Tvrtko Ursulin, Tvrtko Ursulin, Intel-gfx

Quoting Tvrtko Ursulin (2018-06-05 09:51:01)
> 
> On 04/06/2018 16:11, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-06-04 16:01:26)
> >>
> >> On 04/06/2018 14:03, Chris Wilson wrote:
> >>> Quoting Chris Wilson (2018-06-04 13:59:12)
> >>>> Quoting Tvrtko Ursulin (2018-06-04 13:52:39)
> >>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>>>
> >>>>> As Chris has discovered on his Ivybridge, and later automated test runs
> >>>>> have confirmed, on most of our platforms hrtimer faced with heavy GPU load
> >>>>> ca occasionally become sufficiently imprecise to affect PMU sampling
> >>>>
> >>>> s/ca/can/
> >>>>
> >>>>> calculations.
> >>>>>
> >>>>> This means we cannot assume sampling frequency is what we asked for, but
> >>>>> we need to measure the interval ourselves.
> >>>>>
> >>>>> This patch is similar to Chris' original proposal for per-engine counters,
> >>>>> but instead of introducing a new set to work around the problem with
> >>>>> frequency sampling, it swaps around the way internal frequency accounting
> >>>>> is done. Instead of accumulating current frequency and dividing by
> >>>>> sampling frequency on readout, it accumulates frequency scaled by each
> >>>>> period.
> >>>>
> >>>> My ulterior motive failed to win favour ;)
> >>>>    
> >>>>> Testcase: igt/perf_pmu/*busy* # snb, ivb, hsw
> >>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>>> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >>>> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>>
> >>> I should point out that even with this fix (or rather my patch), we can
> >>
> >> I did not mean to steal it,
> > 
> > Don't mind, I view such patches as "this here is a problem, and what I
> > think can be done". I'm quite happy to be told "nope, the problem
> > is..."; the end result is we fix and move on.
> > 
> >> just that you seemed very uncompromising on
> >> the new counters approach. If you prefer you can respin your patch with
> >> this approach for frequency counters, it is fine by me.
> > 
> > I'm just not convinced yet by the frequency sampler, and I still feel
> > that we would be better off with cycles. I just haven't found a
> > convincing argument ;)
> 
> Just imagine .unit file has Mcycles instead of MHz in it? :)
> 
> Since we went with this when adding it initially I think we should 
> exercise restrain with deprecating and adding so quickly.
> 
> >>> still see errors of 25-30us, enough to fail the test. However, without
> >>> the fix our errors can be orders of magnitude worse (e.g. reporting 80us
> >>> busy instead of 500us).
> >>
> >> Yeah I guess if the timer is delayed it is delayed and all samples just
> >> won't be there. I don't see a way to fix that elegantly. Even practically.
> > 
> > Right, but it's not the case that we aren't sampling. If the timer was
> > delayed entirely, then we would see 0 (start_val == end_val). I'm not
> > sure exactly what goes wrong, but it probably is related to the timer
> > being unreliable. (It's just that in this case we are sampling a square
> > wave, so really hard to mess up!)
> 
> Hm maybe I am not completely understanding what you are seeing. I 
> thought that multiple timer overruns (aka delay by multiple periods) 
> explains everything.
> 
> Then even with this patch, if they happen to happen (!) at the end of a 
> sampling period (as observed from userspace), the large-ish chunk of 
> samples (depending on the total sampling duration) may be missing 
> (pending), and so still observed as fail.

Ah, but with the variable period timer, we can force the timer to run
before we report the same (perf_event_read). That should still be inside
the busy batch, and it still has the same inaccuracy (~28us, for a small
sample population, measuring the tail is on my wishlist):

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 34cc6f6..8578a43 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -534,6 +534,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
                        val = engine->pmu.sample[sample].cur;
                }
        } else {
+               if (hrtimer_cancel(&i915->pmu.timer)) {
+                       i915_sample(&i915->pmu.timer);
+                       hrtimer_start_expires(&i915->pmu.timer,
+                                             HRTIMER_MODE_ABS);
+               }
+
                switch (event->attr.config) {
                case I915_PMU_ACTUAL_FREQUENCY:
                        val =

I haven't thought enough about why it still has the tail, nor measured 
the distribution of errors. I just wanted to warn that we aren't quite
out of the mess yet, but with the variable period we do at least stop
drowning!
 
> Difference after this patch is that when the timer eventually fires the 
> counter will account for the delay, so subsequent queries will be fine.
> 
> If readout waited for at least one real sampling tick, that would be 
> solved, but by several other undesirable consequences. At least it would 
> slow down readout to 200Hz, but I don't know from the top of my head if 
> it wouldn't cause unresolvable deadlock scenarios, or if it is even 
> technically possible within the perf framework. (Since I don't think we 
> should do it, I don't even want to start thinking about it.)

The above should do what you have in mind, I think.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* ✗ Fi.CI.BAT: failure for drm/i915/pmu: Do not assume fixed hrtimer period (rev2)
  2018-06-04 12:52 [PATCH] drm/i915/pmu: Do not assume fixed hrtimer period Tvrtko Ursulin
                   ` (3 preceding siblings ...)
  2018-06-04 14:17 ` ✓ Fi.CI.IGT: " Patchwork
@ 2018-06-05 10:57 ` Patchwork
  4 siblings, 0 replies; 13+ messages in thread
From: Patchwork @ 2018-06-05 10:57 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/pmu: Do not assume fixed hrtimer period (rev2)
URL   : https://patchwork.freedesktop.org/series/44191/
State : failure

== Summary ==

Applying: drm/i915/pmu: Do not assume fixed hrtimer period
error: sha1 information is lacking or useless (drivers/gpu/drm/i915/i915_pmu.c).
error: could not build fake ancestor
Patch failed at 0001 drm/i915/pmu: Do not assume fixed hrtimer period
The copy of the patch that failed is found in: .git/rebase-apply/patch
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915/pmu: Do not assume fixed hrtimer period
  2018-06-05  9:06           ` Chris Wilson
@ 2018-06-05 11:35             ` Tvrtko Ursulin
  2018-06-05 11:42               ` Chris Wilson
  0 siblings, 1 reply; 13+ messages in thread
From: Tvrtko Ursulin @ 2018-06-05 11:35 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, Intel-gfx


On 05/06/2018 10:06, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-06-05 09:51:01)
>>
>> On 04/06/2018 16:11, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2018-06-04 16:01:26)
>>>>
>>>> On 04/06/2018 14:03, Chris Wilson wrote:
>>>>> Quoting Chris Wilson (2018-06-04 13:59:12)
>>>>>> Quoting Tvrtko Ursulin (2018-06-04 13:52:39)
>>>>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>>>
>>>>>>> As Chris has discovered on his Ivybridge, and later automated test runs
>>>>>>> have confirmed, on most of our platforms hrtimer faced with heavy GPU load
>>>>>>> ca occasionally become sufficiently imprecise to affect PMU sampling
>>>>>>
>>>>>> s/ca/can/
>>>>>>
>>>>>>> calculations.
>>>>>>>
>>>>>>> This means we cannot assume sampling frequency is what we asked for, but
>>>>>>> we need to measure the interval ourselves.
>>>>>>>
>>>>>>> This patch is similar to Chris' original proposal for per-engine counters,
>>>>>>> but instead of introducing a new set to work around the problem with
>>>>>>> frequency sampling, it swaps around the way internal frequency accounting
>>>>>>> is done. Instead of accumulating current frequency and dividing by
>>>>>>> sampling frequency on readout, it accumulates frequency scaled by each
>>>>>>> period.
>>>>>>
>>>>>> My ulterior motive failed to win favour ;)
>>>>>>     
>>>>>>> Testcase: igt/perf_pmu/*busy* # snb, ivb, hsw
>>>>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>>> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>>>> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>>
>>>>> I should point out that even with this fix (or rather my patch), we can
>>>>
>>>> I did not mean to steal it,
>>>
>>> Don't mind, I view such patches as "this here is a problem, and what I
>>> think can be done". I'm quite happy to be told "nope, the problem
>>> is..."; the end result is we fix and move on.
>>>
>>>> just that you seemed very uncompromising on
>>>> the new counters approach. If you prefer you can respin your patch with
>>>> this approach for frequency counters, it is fine by me.
>>>
>>> I'm just not convinced yet by the frequency sampler, and I still feel
>>> that we would be better off with cycles. I just haven't found a
>>> convincing argument ;)
>>
>> Just imagine .unit file has Mcycles instead of MHz in it? :)
>>
>> Since we went with this when adding it initially I think we should
>> exercise restrain with deprecating and adding so quickly.
>>
>>>>> still see errors of 25-30us, enough to fail the test. However, without
>>>>> the fix our errors can be orders of magnitude worse (e.g. reporting 80us
>>>>> busy instead of 500us).
>>>>
>>>> Yeah I guess if the timer is delayed it is delayed and all samples just
>>>> won't be there. I don't see a way to fix that elegantly. Even practically.
>>>
>>> Right, but it's not the case that we aren't sampling. If the timer was
>>> delayed entirely, then we would see 0 (start_val == end_val). I'm not
>>> sure exactly what goes wrong, but it probably is related to the timer
>>> being unreliable. (It's just that in this case we are sampling a square
>>> wave, so really hard to mess up!)
>>
>> Hm maybe I am not completely understanding what you are seeing. I
>> thought that multiple timer overruns (aka delay by multiple periods)
>> explains everything.
>>
>> Then even with this patch, if they happen to happen (!) at the end of a
>> sampling period (as observed from userspace), the large-ish chunk of
>> samples (depending on the total sampling duration) may be missing
>> (pending), and so still observed as fail.
> 
> Ah, but with the variable period timer, we can force the timer to run
> before we report the same (perf_event_read). That should still be inside
> the busy batch, and it still has the same inaccuracy (~28us, for a small
> sample population, measuring the tail is on my wishlist):
> 
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 34cc6f6..8578a43 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -534,6 +534,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
>                          val = engine->pmu.sample[sample].cur;
>                  }
>          } else {
> +               if (hrtimer_cancel(&i915->pmu.timer)) {
> +                       i915_sample(&i915->pmu.timer);
> +                       hrtimer_start_expires(&i915->pmu.timer,
> +                                             HRTIMER_MODE_ABS);
> +               }
> +
>                  switch (event->attr.config) {
>                  case I915_PMU_ACTUAL_FREQUENCY:
>                          val =
> 
> I haven't thought enough about why it still has the tail, nor measured
> the distribution of errors. I just wanted to warn that we aren't quite
> out of the mess yet, but with the variable period we do at least stop
> drowning!
>   
>> Difference after this patch is that when the timer eventually fires the
>> counter will account for the delay, so subsequent queries will be fine.
>>
>> If readout waited for at least one real sampling tick, that would be
>> solved, but by several other undesirable consequences. At least it would
>> slow down readout to 200Hz, but I don't know from the top of my head if
>> it wouldn't cause unresolvable deadlock scenarios, or if it is even
>> technically possible within the perf framework. (Since I don't think we
>> should do it, I don't even want to start thinking about it.)
> 
> The above should do what you have in mind, I think.

I think you got the direct sample criteria reversed - it should be if 
the timer wasn't running. If it was, it means it just exited so the 
sampling is already pretty correct.

Also I was thinking to limit this to only when the timer was delayed so 
to avoid many readers messing with a single timer alot. In other words 
we keep accepting to sampling error (0.5% I think for steady input 
signal) and only improve the timer delay scenario.

I'll send a patch to discuss on in more detail.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] drm/i915/pmu: Do not assume fixed hrtimer period
  2018-06-05 11:35             ` Tvrtko Ursulin
@ 2018-06-05 11:42               ` Chris Wilson
  0 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2018-06-05 11:42 UTC (permalink / raw)
  To: Tvrtko Ursulin, Tvrtko Ursulin, Intel-gfx

Quoting Tvrtko Ursulin (2018-06-05 12:35:36)
> 
> On 05/06/2018 10:06, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-06-05 09:51:01)
> >>
> >> On 04/06/2018 16:11, Chris Wilson wrote:
> >>> Quoting Tvrtko Ursulin (2018-06-04 16:01:26)
> >>>>
> >>>> On 04/06/2018 14:03, Chris Wilson wrote:
> >>>>> Quoting Chris Wilson (2018-06-04 13:59:12)
> >>>>>> Quoting Tvrtko Ursulin (2018-06-04 13:52:39)
> >>>>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>>>>>
> >>>>>>> As Chris has discovered on his Ivybridge, and later automated test runs
> >>>>>>> have confirmed, on most of our platforms hrtimer faced with heavy GPU load
> >>>>>>> ca occasionally become sufficiently imprecise to affect PMU sampling
> >>>>>>
> >>>>>> s/ca/can/
> >>>>>>
> >>>>>>> calculations.
> >>>>>>>
> >>>>>>> This means we cannot assume sampling frequency is what we asked for, but
> >>>>>>> we need to measure the interval ourselves.
> >>>>>>>
> >>>>>>> This patch is similar to Chris' original proposal for per-engine counters,
> >>>>>>> but instead of introducing a new set to work around the problem with
> >>>>>>> frequency sampling, it swaps around the way internal frequency accounting
> >>>>>>> is done. Instead of accumulating current frequency and dividing by
> >>>>>>> sampling frequency on readout, it accumulates frequency scaled by each
> >>>>>>> period.
> >>>>>>
> >>>>>> My ulterior motive failed to win favour ;)
> >>>>>>     
> >>>>>>> Testcase: igt/perf_pmu/*busy* # snb, ivb, hsw
> >>>>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>>>>> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>>>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >>>>>> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>>>>
> >>>>> I should point out that even with this fix (or rather my patch), we can
> >>>>
> >>>> I did not mean to steal it,
> >>>
> >>> Don't mind, I view such patches as "this here is a problem, and what I
> >>> think can be done". I'm quite happy to be told "nope, the problem
> >>> is..."; the end result is we fix and move on.
> >>>
> >>>> just that you seemed very uncompromising on
> >>>> the new counters approach. If you prefer you can respin your patch with
> >>>> this approach for frequency counters, it is fine by me.
> >>>
> >>> I'm just not convinced yet by the frequency sampler, and I still feel
> >>> that we would be better off with cycles. I just haven't found a
> >>> convincing argument ;)
> >>
> >> Just imagine .unit file has Mcycles instead of MHz in it? :)
> >>
> >> Since we went with this when adding it initially I think we should
> >> exercise restrain with deprecating and adding so quickly.
> >>
> >>>>> still see errors of 25-30us, enough to fail the test. However, without
> >>>>> the fix our errors can be orders of magnitude worse (e.g. reporting 80us
> >>>>> busy instead of 500us).
> >>>>
> >>>> Yeah I guess if the timer is delayed it is delayed and all samples just
> >>>> won't be there. I don't see a way to fix that elegantly. Even practically.
> >>>
> >>> Right, but it's not the case that we aren't sampling. If the timer was
> >>> delayed entirely, then we would see 0 (start_val == end_val). I'm not
> >>> sure exactly what goes wrong, but it probably is related to the timer
> >>> being unreliable. (It's just that in this case we are sampling a square
> >>> wave, so really hard to mess up!)
> >>
> >> Hm maybe I am not completely understanding what you are seeing. I
> >> thought that multiple timer overruns (aka delay by multiple periods)
> >> explains everything.
> >>
> >> Then even with this patch, if they happen to happen (!) at the end of a
> >> sampling period (as observed from userspace), the large-ish chunk of
> >> samples (depending on the total sampling duration) may be missing
> >> (pending), and so still observed as fail.
> > 
> > Ah, but with the variable period timer, we can force the timer to run
> > before we report the same (perf_event_read). That should still be inside
> > the busy batch, and it still has the same inaccuracy (~28us, for a small
> > sample population, measuring the tail is on my wishlist):
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> > index 34cc6f6..8578a43 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.c
> > +++ b/drivers/gpu/drm/i915/i915_pmu.c
> > @@ -534,6 +534,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
> >                          val = engine->pmu.sample[sample].cur;
> >                  }
> >          } else {
> > +               if (hrtimer_cancel(&i915->pmu.timer)) {
> > +                       i915_sample(&i915->pmu.timer);
> > +                       hrtimer_start_expires(&i915->pmu.timer,
> > +                                             HRTIMER_MODE_ABS);
> > +               }
> > +
> >                  switch (event->attr.config) {
> >                  case I915_PMU_ACTUAL_FREQUENCY:
> >                          val =
> > 
> > I haven't thought enough about why it still has the tail, nor measured
> > the distribution of errors. I just wanted to warn that we aren't quite
> > out of the mess yet, but with the variable period we do at least stop
> > drowning!
> >   
> >> Difference after this patch is that when the timer eventually fires the
> >> counter will account for the delay, so subsequent queries will be fine.
> >>
> >> If readout waited for at least one real sampling tick, that would be
> >> solved, but by several other undesirable consequences. At least it would
> >> slow down readout to 200Hz, but I don't know from the top of my head if
> >> it wouldn't cause unresolvable deadlock scenarios, or if it is even
> >> technically possible within the perf framework. (Since I don't think we
> >> should do it, I don't even want to start thinking about it.)
> > 
> > The above should do what you have in mind, I think.
> 
> I think you got the direct sample criteria reversed - it should be if 
> the timer wasn't running. If it was, it means it just exited so the 
> sampling is already pretty correct.

Note it returns 1 if the timer was queued or active, 0 if the time
wasn't iirc; it's try_to_cancel that has the more complex interface.
(It should always be true along this path if we are using the timer.)

> Also I was thinking to limit this to only when the timer was delayed so 
> to avoid many readers messing with a single timer alot. In other words 
> we keep accepting to sampling error (0.5% I think for steady input 
> signal) and only improve the timer delay scenario.

You mean just if time_between_samples < 10us, return. It didn't seem
worth it. There are still a number of issues with the timer granularity
that makes chasing edge cases ... disappointing.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2018-06-05 11:42 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-04 12:52 [PATCH] drm/i915/pmu: Do not assume fixed hrtimer period Tvrtko Ursulin
2018-06-04 12:59 ` Chris Wilson
2018-06-04 13:03   ` Chris Wilson
2018-06-04 15:01     ` Tvrtko Ursulin
2018-06-04 15:11       ` Chris Wilson
2018-06-05  8:51         ` Tvrtko Ursulin
2018-06-05  9:06           ` Chris Wilson
2018-06-05 11:35             ` Tvrtko Ursulin
2018-06-05 11:42               ` Chris Wilson
2018-06-04 13:05 ` ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
2018-06-04 13:26 ` ✓ Fi.CI.BAT: success " Patchwork
2018-06-04 14:17 ` ✓ Fi.CI.IGT: " Patchwork
2018-06-05 10:57 ` ✗ Fi.CI.BAT: failure for drm/i915/pmu: Do not assume fixed hrtimer period (rev2) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.