All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915/pmu: Measure sampler intervals
@ 2018-05-25 17:11 Chris Wilson
  2018-05-25 17:31 ` Tvrtko Ursulin
                   ` (5 more replies)
  0 siblings, 6 replies; 15+ messages in thread
From: Chris Wilson @ 2018-05-25 17:11 UTC (permalink / raw)
  To: intel-gfx

hrtimer is not reliable enough to assume fixed intervals, and so even
coarse accuracy (in the face of kasan and similar heavy debugging) we
need to measure the actual interval between sample.

While using a single timestamp to compute the interval does not allow
very fine accuracy (consider the impact of a slow forcewake between
different samples after the timestamp is read) is much better than
assuming the interval.

Testcase: igt/perf_pmu #ivb
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_pmu.c | 20 +++++++++++++-------
 drivers/gpu/drm/i915/i915_pmu.h |  4 ++++
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index dc87797db500..f5087515eb43 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -127,6 +127,7 @@ static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
 {
 	if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
 		i915->pmu.timer_enabled = true;
+		i915->pmu.timestamp = ktime_get();
 		hrtimer_start_range_ns(&i915->pmu.timer,
 				       ns_to_ktime(PERIOD), 0,
 				       HRTIMER_MODE_REL_PINNED);
@@ -160,7 +161,7 @@ update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
 	sample->cur += mul_u32_u32(val, unit);
 }
 
-static void engines_sample(struct drm_i915_private *dev_priv)
+static void engines_sample(struct drm_i915_private *dev_priv, u64 period)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
@@ -183,7 +184,7 @@ static void engines_sample(struct drm_i915_private *dev_priv)
 		val = !i915_seqno_passed(current_seqno, last_seqno);
 
 		update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
-			      PERIOD, val);
+			      period, val);
 
 		if (val && (engine->pmu.enable &
 		    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
@@ -195,10 +196,10 @@ static void engines_sample(struct drm_i915_private *dev_priv)
 		}
 
 		update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
-			      PERIOD, !!(val & RING_WAIT));
+			      period, !!(val & RING_WAIT));
 
 		update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
-			      PERIOD, !!(val & RING_WAIT_SEMAPHORE));
+			      period, !!(val & RING_WAIT_SEMAPHORE));
 	}
 
 	if (fw)
@@ -207,7 +208,7 @@ static void engines_sample(struct drm_i915_private *dev_priv)
 	intel_runtime_pm_put(dev_priv);
 }
 
-static void frequency_sample(struct drm_i915_private *dev_priv)
+static void frequency_sample(struct drm_i915_private *dev_priv, u64 period)
 {
 	if (dev_priv->pmu.enable &
 	    config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
@@ -237,12 +238,17 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
 {
 	struct drm_i915_private *i915 =
 		container_of(hrtimer, struct drm_i915_private, pmu.timer);
+	ktime_t now, period;
 
 	if (!READ_ONCE(i915->pmu.timer_enabled))
 		return HRTIMER_NORESTART;
 
-	engines_sample(i915);
-	frequency_sample(i915);
+	now = ktime_get();
+	period = ktime_sub(now, i915->pmu.timestamp);
+	i915->pmu.timestamp = now;
+
+	engines_sample(i915, period);
+	frequency_sample(i915, period);
 
 	hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
 	return HRTIMER_RESTART;
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 2ba735299f7c..0f1e4642077e 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -52,6 +52,10 @@ struct i915_pmu {
 	 * @timer: Timer for internal i915 PMU sampling.
 	 */
 	struct hrtimer timer;
+	/**
+	 * @timestamp: Timestamp of last internal i915 PMU sampling.
+	 */
+	ktime_t timestamp;
 	/**
 	 * @enable: Bitmask of all currently enabled events.
 	 *
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/i915/pmu: Measure sampler intervals
  2018-05-25 17:11 [PATCH] drm/i915/pmu: Measure sampler intervals Chris Wilson
@ 2018-05-25 17:31 ` Tvrtko Ursulin
  2018-05-25 17:45   ` Chris Wilson
  2018-05-25 18:06 ` ✓ Fi.CI.BAT: success for " Patchwork
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 15+ messages in thread
From: Tvrtko Ursulin @ 2018-05-25 17:31 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/05/2018 18:11, Chris Wilson wrote:
> hrtimer is not reliable enough to assume fixed intervals, and so even
> coarse accuracy (in the face of kasan and similar heavy debugging) we
> need to measure the actual interval between sample.

It doesn't even average out to something acceptable under such Kconfigs? 
Horror.. precise but inaccurate. /O\

> While using a single timestamp to compute the interval does not allow
> very fine accuracy (consider the impact of a slow forcewake between
> different samples after the timestamp is read) is much better than
> assuming the interval.
> 
> Testcase: igt/perf_pmu #ivb
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_pmu.c | 20 +++++++++++++-------
>   drivers/gpu/drm/i915/i915_pmu.h |  4 ++++
>   2 files changed, 17 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index dc87797db500..f5087515eb43 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -127,6 +127,7 @@ static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
>   {
>   	if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
>   		i915->pmu.timer_enabled = true;
> +		i915->pmu.timestamp = ktime_get();
>   		hrtimer_start_range_ns(&i915->pmu.timer,
>   				       ns_to_ktime(PERIOD), 0,
>   				       HRTIMER_MODE_REL_PINNED);
> @@ -160,7 +161,7 @@ update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
>   	sample->cur += mul_u32_u32(val, unit);
>   }
>   
> -static void engines_sample(struct drm_i915_private *dev_priv)
> +static void engines_sample(struct drm_i915_private *dev_priv, u64 period)
>   {
>   	struct intel_engine_cs *engine;
>   	enum intel_engine_id id;
> @@ -183,7 +184,7 @@ static void engines_sample(struct drm_i915_private *dev_priv)
>   		val = !i915_seqno_passed(current_seqno, last_seqno);
>   
>   		update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
> -			      PERIOD, val);
> +			      period, val);
>   
>   		if (val && (engine->pmu.enable &
>   		    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
> @@ -195,10 +196,10 @@ static void engines_sample(struct drm_i915_private *dev_priv)
>   		}
>   
>   		update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
> -			      PERIOD, !!(val & RING_WAIT));
> +			      period, !!(val & RING_WAIT));
>   
>   		update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
> -			      PERIOD, !!(val & RING_WAIT_SEMAPHORE));
> +			      period, !!(val & RING_WAIT_SEMAPHORE));
>   	}
>   
>   	if (fw)
> @@ -207,7 +208,7 @@ static void engines_sample(struct drm_i915_private *dev_priv)
>   	intel_runtime_pm_put(dev_priv);
>   }
>   
> -static void frequency_sample(struct drm_i915_private *dev_priv)
> +static void frequency_sample(struct drm_i915_private *dev_priv, u64 period)

Period is unused in this function.

But more importantly that leads to a problem. When reading the counter 
the frequencies accumulator is divided by FREQUENCY define, which is 
inverse of PERIOD. If the error is big enough to mess up the engines 
sampling, is it big enough to affect the frequencies as well?

Improving that would need average frequency between two counter reads. 
Which looks tricky to shoehorn into the pmu api. Maybe primitive running 
average would do.

>   {
>   	if (dev_priv->pmu.enable &
>   	    config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
> @@ -237,12 +238,17 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
>   {
>   	struct drm_i915_private *i915 =
>   		container_of(hrtimer, struct drm_i915_private, pmu.timer);
> +	ktime_t now, period;
>   
>   	if (!READ_ONCE(i915->pmu.timer_enabled))
>   		return HRTIMER_NORESTART;
>   
> -	engines_sample(i915);
> -	frequency_sample(i915);
> +	now = ktime_get();
> +	period = ktime_sub(now, i915->pmu.timestamp);
> +	i915->pmu.timestamp = now;
> +
> +	engines_sample(i915, period);
> +	frequency_sample(i915, period);
>   
>   	hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
>   	return HRTIMER_RESTART;
> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> index 2ba735299f7c..0f1e4642077e 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.h
> +++ b/drivers/gpu/drm/i915/i915_pmu.h
> @@ -52,6 +52,10 @@ struct i915_pmu {
>   	 * @timer: Timer for internal i915 PMU sampling.
>   	 */
>   	struct hrtimer timer;
> +	/**
> +	 * @timestamp: Timestamp of last internal i915 PMU sampling.
> +	 */
> +	ktime_t timestamp;
>   	/**
>   	 * @enable: Bitmask of all currently enabled events.
>   	 *
> 

Patch looks okay, just loses the some of the optimisation potential so I 
am guessing we won't be thinking about replacing multiplies and divides 
with shift any more. :)

But the question of frequency counters is now bothering me.

And if this problem is limited to Kasan then how much we want to 
complicate things to make that work?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/i915/pmu: Measure sampler intervals
  2018-05-25 17:31 ` Tvrtko Ursulin
@ 2018-05-25 17:45   ` Chris Wilson
  2018-05-30 10:57     ` Tvrtko Ursulin
  0 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2018-05-25 17:45 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-05-25 18:31:35)
> 
> On 25/05/2018 18:11, Chris Wilson wrote:
> > hrtimer is not reliable enough to assume fixed intervals, and so even
> > coarse accuracy (in the face of kasan and similar heavy debugging) we
> > need to measure the actual interval between sample.
> 
> It doesn't even average out to something acceptable under such Kconfigs? 
> Horror.. precise but inaccurate. /O\
> 
> > While using a single timestamp to compute the interval does not allow
> > very fine accuracy (consider the impact of a slow forcewake between
> > different samples after the timestamp is read) is much better than
> > assuming the interval.
> > 
> > Testcase: igt/perf_pmu #ivb
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_pmu.c | 20 +++++++++++++-------
> >   drivers/gpu/drm/i915/i915_pmu.h |  4 ++++
> >   2 files changed, 17 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> > index dc87797db500..f5087515eb43 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.c
> > +++ b/drivers/gpu/drm/i915/i915_pmu.c
> > @@ -127,6 +127,7 @@ static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
> >   {
> >       if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
> >               i915->pmu.timer_enabled = true;
> > +             i915->pmu.timestamp = ktime_get();
> >               hrtimer_start_range_ns(&i915->pmu.timer,
> >                                      ns_to_ktime(PERIOD), 0,
> >                                      HRTIMER_MODE_REL_PINNED);
> > @@ -160,7 +161,7 @@ update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
> >       sample->cur += mul_u32_u32(val, unit);
> >   }
> >   
> > -static void engines_sample(struct drm_i915_private *dev_priv)
> > +static void engines_sample(struct drm_i915_private *dev_priv, u64 period)
> >   {
> >       struct intel_engine_cs *engine;
> >       enum intel_engine_id id;
> > @@ -183,7 +184,7 @@ static void engines_sample(struct drm_i915_private *dev_priv)
> >               val = !i915_seqno_passed(current_seqno, last_seqno);
> >   
> >               update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
> > -                           PERIOD, val);
> > +                           period, val);
> >   
> >               if (val && (engine->pmu.enable &
> >                   (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
> > @@ -195,10 +196,10 @@ static void engines_sample(struct drm_i915_private *dev_priv)
> >               }
> >   
> >               update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
> > -                           PERIOD, !!(val & RING_WAIT));
> > +                           period, !!(val & RING_WAIT));
> >   
> >               update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
> > -                           PERIOD, !!(val & RING_WAIT_SEMAPHORE));
> > +                           period, !!(val & RING_WAIT_SEMAPHORE));
> >       }
> >   
> >       if (fw)
> > @@ -207,7 +208,7 @@ static void engines_sample(struct drm_i915_private *dev_priv)
> >       intel_runtime_pm_put(dev_priv);
> >   }
> >   
> > -static void frequency_sample(struct drm_i915_private *dev_priv)
> > +static void frequency_sample(struct drm_i915_private *dev_priv, u64 period)
> 
> Period is unused in this function.
> 
> But more importantly that leads to a problem. When reading the counter 
> the frequencies accumulator is divided by FREQUENCY define, which is 
> inverse of PERIOD. If the error is big enough to mess up the engines 
> sampling, is it big enough to affect the frequencies as well?

Yes, but fixing up frequencies I left for another patch, because it's
going to be more involved (having to choose the divider more carefully)
and would you believe it, but CI only complains about busy sampling ;)

I passed in the period as a reminder.
 
> Improving that would need average frequency between two counter reads. 
> Which looks tricky to shoehorn into the pmu api. Maybe primitive running 
> average would do.

My plan was to expose cycles (Frequency x time) to the user, and then
they calculate frequency by the comparing their own samples. (Since we
give them (time, samples) for each pmu read).

> >   {
> >       if (dev_priv->pmu.enable &
> >           config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
> > @@ -237,12 +238,17 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
> >   {
> >       struct drm_i915_private *i915 =
> >               container_of(hrtimer, struct drm_i915_private, pmu.timer);
> > +     ktime_t now, period;
> >   
> >       if (!READ_ONCE(i915->pmu.timer_enabled))
> >               return HRTIMER_NORESTART;
> >   
> > -     engines_sample(i915);
> > -     frequency_sample(i915);
> > +     now = ktime_get();
> > +     period = ktime_sub(now, i915->pmu.timestamp);
> > +     i915->pmu.timestamp = now;
> > +
> > +     engines_sample(i915, period);
> > +     frequency_sample(i915, period);
> >   
> >       hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
> >       return HRTIMER_RESTART;
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> > index 2ba735299f7c..0f1e4642077e 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.h
> > +++ b/drivers/gpu/drm/i915/i915_pmu.h
> > @@ -52,6 +52,10 @@ struct i915_pmu {
> >        * @timer: Timer for internal i915 PMU sampling.
> >        */
> >       struct hrtimer timer;
> > +     /**
> > +      * @timestamp: Timestamp of last internal i915 PMU sampling.
> > +      */
> > +     ktime_t timestamp;
> >       /**
> >        * @enable: Bitmask of all currently enabled events.
> >        *
> > 
> 
> Patch looks okay, just loses the some of the optimisation potential so I 
> am guessing we won't be thinking about replacing multiplies and divides 
> with shift any more. :)
> 
> But the question of frequency counters is now bothering me.
> 
> And if this problem is limited to Kasan then how much we want to 
> complicate things to make that work?

Not just kasan, but ivb really. That's the only to have never worked
whatever the config. kasan affects more CI machines.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/pmu: Measure sampler intervals
  2018-05-25 17:11 [PATCH] drm/i915/pmu: Measure sampler intervals Chris Wilson
  2018-05-25 17:31 ` Tvrtko Ursulin
@ 2018-05-25 18:06 ` Patchwork
  2018-05-26  3:50 ` ✓ Fi.CI.IGT: " Patchwork
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2018-05-25 18:06 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/pmu: Measure sampler intervals
URL   : https://patchwork.freedesktop.org/series/43795/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4243 -> Patchwork_9124 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/43795/revisions/1/mbox/

== Known issues ==

  Here are the changes found in Patchwork_9124 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
      fi-snb-2520m:       PASS -> INCOMPLETE (fdo#103713)

    
    ==== Possible fixes ====

    igt@kms_flip@basic-flip-vs-wf_vblank:
      fi-skl-6770hq:      FAIL (fdo#103928, fdo#100368) -> PASS

    igt@kms_pipe_crc_basic@nonblocking-crc-pipe-b-frame-sequence:
      fi-skl-6770hq:      FAIL (fdo#103481) -> PASS

    
  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#103481 https://bugs.freedesktop.org/show_bug.cgi?id=103481
  fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
  fdo#103928 https://bugs.freedesktop.org/show_bug.cgi?id=103928


== Participating hosts (44 -> 39) ==

  Missing    (5): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-skl-6700hq 


== Build changes ==

    * Linux: CI_DRM_4243 -> Patchwork_9124

  CI_DRM_4243: 7af0b742eec473a202f327e5148757f988b65305 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4499: f560ae5a464331f03f0a669ed46b8c9e56526187 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9124: 1620ac1abdcf8e4c1102eb46b9719b4484db140a @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

1620ac1abdcf drm/i915/pmu: Measure sampler intervals

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9124/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✓ Fi.CI.IGT: success for drm/i915/pmu: Measure sampler intervals
  2018-05-25 17:11 [PATCH] drm/i915/pmu: Measure sampler intervals Chris Wilson
  2018-05-25 17:31 ` Tvrtko Ursulin
  2018-05-25 18:06 ` ✓ Fi.CI.BAT: success for " Patchwork
@ 2018-05-26  3:50 ` Patchwork
  2018-05-30 11:55 ` [PATCH v2] " Chris Wilson
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2018-05-26  3:50 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/pmu: Measure sampler intervals
URL   : https://patchwork.freedesktop.org/series/43795/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4243_full -> Patchwork_9124_full =

== Summary - WARNING ==

  Minor unknown changes coming with Patchwork_9124_full need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_9124_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/43795/revisions/1/mbox/

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_9124_full:

  === IGT changes ===

    ==== Warnings ====

    igt@pm_rc6_residency@rc6-accuracy:
      shard-snb:          SKIP -> PASS

    
== Known issues ==

  Here are the changes found in Patchwork_9124_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@drv_selftest@live_gtt:
      shard-glk:          PASS -> INCOMPLETE (fdo#103359, k.org#198133)

    igt@kms_atomic_transition@1x-modeset-transitions-nonblocking-fencing:
      shard-glk:          PASS -> FAIL (fdo#105703) +1

    igt@kms_flip@2x-plain-flip-ts-check-interruptible:
      shard-glk:          PASS -> FAIL (fdo#100368) +1

    
    ==== Possible fixes ====

    igt@kms_flip@flip-vs-expired-vblank-interruptible:
      shard-glk:          FAIL (fdo#102887, fdo#105363) -> PASS

    igt@kms_flip@flip-vs-wf_vblank-interruptible:
      shard-glk:          FAIL (fdo#100368) -> PASS

    igt@kms_flip@wf_vblank-ts-check-interruptible:
      shard-hsw:          FAIL (fdo#100368) -> PASS

    igt@kms_flip_tiling@flip-to-x-tiled:
      shard-glk:          FAIL (fdo#104724) -> PASS

    igt@kms_flip_tiling@flip-y-tiled:
      shard-glk:          FAIL (fdo#103822, fdo#104724) -> PASS

    igt@kms_setmode@basic:
      shard-apl:          FAIL (fdo#99912) -> PASS

    
  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
  fdo#103359 https://bugs.freedesktop.org/show_bug.cgi?id=103359
  fdo#103822 https://bugs.freedesktop.org/show_bug.cgi?id=103822
  fdo#104724 https://bugs.freedesktop.org/show_bug.cgi?id=104724
  fdo#105363 https://bugs.freedesktop.org/show_bug.cgi?id=105363
  fdo#105703 https://bugs.freedesktop.org/show_bug.cgi?id=105703
  fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912
  k.org#198133 https://bugzilla.kernel.org/show_bug.cgi?id=198133


== Participating hosts (5 -> 4) ==

  Missing    (1): shard-kbl 


== Build changes ==

    * Linux: CI_DRM_4243 -> Patchwork_9124

  CI_DRM_4243: 7af0b742eec473a202f327e5148757f988b65305 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4499: f560ae5a464331f03f0a669ed46b8c9e56526187 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9124: 1620ac1abdcf8e4c1102eb46b9719b4484db140a @ git://anongit.freedesktop.org/gfx-ci/linux

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9124/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/i915/pmu: Measure sampler intervals
  2018-05-25 17:45   ` Chris Wilson
@ 2018-05-30 10:57     ` Tvrtko Ursulin
  2018-05-30 11:07       ` Chris Wilson
  0 siblings, 1 reply; 15+ messages in thread
From: Tvrtko Ursulin @ 2018-05-30 10:57 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 25/05/2018 18:45, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-05-25 18:31:35)
>>
>> On 25/05/2018 18:11, Chris Wilson wrote:
>>> hrtimer is not reliable enough to assume fixed intervals, and so even
>>> coarse accuracy (in the face of kasan and similar heavy debugging) we
>>> need to measure the actual interval between sample.
>>
>> It doesn't even average out to something acceptable under such Kconfigs?
>> Horror.. precise but inaccurate. /O\
>>
>>> While using a single timestamp to compute the interval does not allow
>>> very fine accuracy (consider the impact of a slow forcewake between
>>> different samples after the timestamp is read) is much better than
>>> assuming the interval.
>>>
>>> Testcase: igt/perf_pmu #ivb
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_pmu.c | 20 +++++++++++++-------
>>>    drivers/gpu/drm/i915/i915_pmu.h |  4 ++++
>>>    2 files changed, 17 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
>>> index dc87797db500..f5087515eb43 100644
>>> --- a/drivers/gpu/drm/i915/i915_pmu.c
>>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
>>> @@ -127,6 +127,7 @@ static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
>>>    {
>>>        if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
>>>                i915->pmu.timer_enabled = true;
>>> +             i915->pmu.timestamp = ktime_get();
>>>                hrtimer_start_range_ns(&i915->pmu.timer,
>>>                                       ns_to_ktime(PERIOD), 0,
>>>                                       HRTIMER_MODE_REL_PINNED);
>>> @@ -160,7 +161,7 @@ update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
>>>        sample->cur += mul_u32_u32(val, unit);
>>>    }
>>>    
>>> -static void engines_sample(struct drm_i915_private *dev_priv)
>>> +static void engines_sample(struct drm_i915_private *dev_priv, u64 period)
>>>    {
>>>        struct intel_engine_cs *engine;
>>>        enum intel_engine_id id;
>>> @@ -183,7 +184,7 @@ static void engines_sample(struct drm_i915_private *dev_priv)
>>>                val = !i915_seqno_passed(current_seqno, last_seqno);
>>>    
>>>                update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
>>> -                           PERIOD, val);
>>> +                           period, val);
>>>    
>>>                if (val && (engine->pmu.enable &
>>>                    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
>>> @@ -195,10 +196,10 @@ static void engines_sample(struct drm_i915_private *dev_priv)
>>>                }
>>>    
>>>                update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
>>> -                           PERIOD, !!(val & RING_WAIT));
>>> +                           period, !!(val & RING_WAIT));
>>>    
>>>                update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
>>> -                           PERIOD, !!(val & RING_WAIT_SEMAPHORE));
>>> +                           period, !!(val & RING_WAIT_SEMAPHORE));
>>>        }
>>>    
>>>        if (fw)
>>> @@ -207,7 +208,7 @@ static void engines_sample(struct drm_i915_private *dev_priv)
>>>        intel_runtime_pm_put(dev_priv);
>>>    }
>>>    
>>> -static void frequency_sample(struct drm_i915_private *dev_priv)
>>> +static void frequency_sample(struct drm_i915_private *dev_priv, u64 period)
>>
>> Period is unused in this function.
>>
>> But more importantly that leads to a problem. When reading the counter
>> the frequencies accumulator is divided by FREQUENCY define, which is
>> inverse of PERIOD. If the error is big enough to mess up the engines
>> sampling, is it big enough to affect the frequencies as well?
> 
> Yes, but fixing up frequencies I left for another patch, because it's
> going to be more involved (having to choose the divider more carefully)
> and would you believe it, but CI only complains about busy sampling ;)
> 
> I passed in the period as a reminder.

It might only remind someone to remove the unused variable so I am not 
sure it is worth it.

>> Improving that would need average frequency between two counter reads.
>> Which looks tricky to shoehorn into the pmu api. Maybe primitive running
>> average would do.
> 
> My plan was to expose cycles (Frequency x time) to the user, and then
> they calculate frequency by the comparing their own samples. (Since we
> give them (time, samples) for each pmu read).

Frequency times which time? On read? Don't exactly follow. :(

> 
>>>    {
>>>        if (dev_priv->pmu.enable &
>>>            config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
>>> @@ -237,12 +238,17 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
>>>    {
>>>        struct drm_i915_private *i915 =
>>>                container_of(hrtimer, struct drm_i915_private, pmu.timer);
>>> +     ktime_t now, period;
>>>    
>>>        if (!READ_ONCE(i915->pmu.timer_enabled))
>>>                return HRTIMER_NORESTART;
>>>    
>>> -     engines_sample(i915);
>>> -     frequency_sample(i915);
>>> +     now = ktime_get();
>>> +     period = ktime_sub(now, i915->pmu.timestamp);
>>> +     i915->pmu.timestamp = now;
>>> +
>>> +     engines_sample(i915, period);
>>> +     frequency_sample(i915, period);
>>>    
>>>        hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
>>>        return HRTIMER_RESTART;
>>> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
>>> index 2ba735299f7c..0f1e4642077e 100644
>>> --- a/drivers/gpu/drm/i915/i915_pmu.h
>>> +++ b/drivers/gpu/drm/i915/i915_pmu.h
>>> @@ -52,6 +52,10 @@ struct i915_pmu {
>>>         * @timer: Timer for internal i915 PMU sampling.
>>>         */
>>>        struct hrtimer timer;
>>> +     /**
>>> +      * @timestamp: Timestamp of last internal i915 PMU sampling.
>>> +      */
>>> +     ktime_t timestamp;
>>>        /**
>>>         * @enable: Bitmask of all currently enabled events.
>>>         *
>>>
>>
>> Patch looks okay, just loses the some of the optimisation potential so I
>> am guessing we won't be thinking about replacing multiplies and divides
>> with shift any more. :)
>>
>> But the question of frequency counters is now bothering me.
>>
>> And if this problem is limited to Kasan then how much we want to
>> complicate things to make that work?
> 
> Not just kasan, but ivb really. That's the only to have never worked
> whatever the config. kasan affects more CI machines.

I am also thinking that with this approach we could start allowing timer 
slack, since we are measuring it anyway. It would release back some of 
the added cost of time queries. Well, not that they are significant. 
Digression anyway.

I want to understand why this is a problem on IVB and what is the 
solution for frequency counters.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/i915/pmu: Measure sampler intervals
  2018-05-30 10:57     ` Tvrtko Ursulin
@ 2018-05-30 11:07       ` Chris Wilson
  0 siblings, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2018-05-30 11:07 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-05-30 11:57:39)
> 
> On 25/05/2018 18:45, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-05-25 18:31:35)
> >>
> >> On 25/05/2018 18:11, Chris Wilson wrote:
> >>> hrtimer is not reliable enough to assume fixed intervals, and so even
> >>> coarse accuracy (in the face of kasan and similar heavy debugging) we
> >>> need to measure the actual interval between sample.
> >>
> >> It doesn't even average out to something acceptable under such Kconfigs?
> >> Horror.. precise but inaccurate. /O\
> >>
> >>> While using a single timestamp to compute the interval does not allow
> >>> very fine accuracy (consider the impact of a slow forcewake between
> >>> different samples after the timestamp is read) is much better than
> >>> assuming the interval.
> >>>
> >>> Testcase: igt/perf_pmu #ivb
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>> ---
> >>>    drivers/gpu/drm/i915/i915_pmu.c | 20 +++++++++++++-------
> >>>    drivers/gpu/drm/i915/i915_pmu.h |  4 ++++
> >>>    2 files changed, 17 insertions(+), 7 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> >>> index dc87797db500..f5087515eb43 100644
> >>> --- a/drivers/gpu/drm/i915/i915_pmu.c
> >>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> >>> @@ -127,6 +127,7 @@ static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
> >>>    {
> >>>        if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
> >>>                i915->pmu.timer_enabled = true;
> >>> +             i915->pmu.timestamp = ktime_get();
> >>>                hrtimer_start_range_ns(&i915->pmu.timer,
> >>>                                       ns_to_ktime(PERIOD), 0,
> >>>                                       HRTIMER_MODE_REL_PINNED);
> >>> @@ -160,7 +161,7 @@ update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
> >>>        sample->cur += mul_u32_u32(val, unit);
> >>>    }
> >>>    
> >>> -static void engines_sample(struct drm_i915_private *dev_priv)
> >>> +static void engines_sample(struct drm_i915_private *dev_priv, u64 period)
> >>>    {
> >>>        struct intel_engine_cs *engine;
> >>>        enum intel_engine_id id;
> >>> @@ -183,7 +184,7 @@ static void engines_sample(struct drm_i915_private *dev_priv)
> >>>                val = !i915_seqno_passed(current_seqno, last_seqno);
> >>>    
> >>>                update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
> >>> -                           PERIOD, val);
> >>> +                           period, val);
> >>>    
> >>>                if (val && (engine->pmu.enable &
> >>>                    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
> >>> @@ -195,10 +196,10 @@ static void engines_sample(struct drm_i915_private *dev_priv)
> >>>                }
> >>>    
> >>>                update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
> >>> -                           PERIOD, !!(val & RING_WAIT));
> >>> +                           period, !!(val & RING_WAIT));
> >>>    
> >>>                update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
> >>> -                           PERIOD, !!(val & RING_WAIT_SEMAPHORE));
> >>> +                           period, !!(val & RING_WAIT_SEMAPHORE));
> >>>        }
> >>>    
> >>>        if (fw)
> >>> @@ -207,7 +208,7 @@ static void engines_sample(struct drm_i915_private *dev_priv)
> >>>        intel_runtime_pm_put(dev_priv);
> >>>    }
> >>>    
> >>> -static void frequency_sample(struct drm_i915_private *dev_priv)
> >>> +static void frequency_sample(struct drm_i915_private *dev_priv, u64 period)
> >>
> >> Period is unused in this function.
> >>
> >> But more importantly that leads to a problem. When reading the counter
> >> the frequencies accumulator is divided by FREQUENCY define, which is
> >> inverse of PERIOD. If the error is big enough to mess up the engines
> >> sampling, is it big enough to affect the frequencies as well?
> > 
> > Yes, but fixing up frequencies I left for another patch, because it's
> > going to be more involved (having to choose the divider more carefully)
> > and would you believe it, but CI only complains about busy sampling ;)
> > 
> > I passed in the period as a reminder.
> 
> It might only remind someone to remove the unused variable so I am not 
> sure it is worth it.
> 
> >> Improving that would need average frequency between two counter reads.
> >> Which looks tricky to shoehorn into the pmu api. Maybe primitive running
> >> average would do.
> > 
> > My plan was to expose cycles (Frequency x time) to the user, and then
> > they calculate frequency by the comparing their own samples. (Since we
> > give them (time, samples) for each pmu read).
> 
> Frequency times which time? On read? Don't exactly follow. :(

On sample: sample[FREQ] += period * instantaneous_measurement

The (perf_event_read) caller has to compute freq by d_cycles / d_time.
i.e. we don't have a frequency sampler, but a cycles sampler. I think
this is a big enough change that we'd have to declare new ABI (deprecate
the old samplers, add new).

[snip]

> I am also thinking that with this approach we could start allowing timer 
> slack, since we are measuring it anyway. It would release back some of 
> the added cost of time queries. Well, not that they are significant. 
> Digression anyway.

Interesting, yeah.
 
> I want to understand why this is a problem on IVB and what is the 
> solution for frequency counters.

I can only say that from the looks of it, ivb has very poor hrtimer
resolution (I wonder if tsc/hpet are being used!). I think ivb was
before art? I too do not know quite why ivb is so special, but it is the
only one to have consistency failed in all drm-tip shard runs.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2] drm/i915/pmu: Measure sampler intervals
  2018-05-25 17:11 [PATCH] drm/i915/pmu: Measure sampler intervals Chris Wilson
                   ` (2 preceding siblings ...)
  2018-05-26  3:50 ` ✓ Fi.CI.IGT: " Patchwork
@ 2018-05-30 11:55 ` Chris Wilson
  2018-05-30 14:37   ` Tvrtko Ursulin
  2018-05-30 13:57 ` ✓ Fi.CI.BAT: success for drm/i915/pmu: Measure sampler intervals (rev2) Patchwork
  2018-05-30 15:11 ` ✗ Fi.CI.IGT: failure " Patchwork
  5 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2018-05-30 11:55 UTC (permalink / raw)
  To: intel-gfx

hrtimer is not reliable enough to assume fixed intervals, and so even
coarse accuracy (in the face of kasan and similar heavy debugging) we
need to measure the actual interval between sample.

While using a single timestamp to compute the interval does not allow
very fine accuracy (consider the impact of a slow forcewake between
different samples after the timestamp is read) is much better than
assuming the interval.

v2: Make use of the sample period for estimating the GPU clock cycles,
leaving the frequency calculation (the averaging) to the caller.
Introduce new samplers for reporting cycles instead of Hz.

Testcase: igt/perf_pmu #ivb
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_pmu.c | 44 ++++++++++++++++++++++++++-------
 drivers/gpu/drm/i915/i915_pmu.h |  6 +++++
 include/uapi/drm/i915_drm.h     |  2 ++
 3 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index dc87797db500..12033e47e3b4 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -86,6 +86,8 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
 	 */
 	enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
 		  config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
+		  config_enabled_mask(I915_PMU_ACTUAL_CLOCK) |
+		  config_enabled_mask(I915_PMU_REQUESTED_CLOCK) |
 		  ENGINE_SAMPLE_MASK;
 
 	/*
@@ -127,6 +129,7 @@ static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
 {
 	if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
 		i915->pmu.timer_enabled = true;
+		i915->pmu.timestamp = ktime_get();
 		hrtimer_start_range_ns(&i915->pmu.timer,
 				       ns_to_ktime(PERIOD), 0,
 				       HRTIMER_MODE_REL_PINNED);
@@ -160,7 +163,7 @@ update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
 	sample->cur += mul_u32_u32(val, unit);
 }
 
-static void engines_sample(struct drm_i915_private *dev_priv)
+static void engines_sample(struct drm_i915_private *dev_priv, u64 period)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
@@ -183,7 +186,7 @@ static void engines_sample(struct drm_i915_private *dev_priv)
 		val = !i915_seqno_passed(current_seqno, last_seqno);
 
 		update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
-			      PERIOD, val);
+			      period, val);
 
 		if (val && (engine->pmu.enable &
 		    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
@@ -195,10 +198,10 @@ static void engines_sample(struct drm_i915_private *dev_priv)
 		}
 
 		update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
-			      PERIOD, !!(val & RING_WAIT));
+			      period, !!(val & RING_WAIT));
 
 		update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
-			      PERIOD, !!(val & RING_WAIT_SEMAPHORE));
+			      period, !!(val & RING_WAIT_SEMAPHORE));
 	}
 
 	if (fw)
@@ -207,10 +210,11 @@ static void engines_sample(struct drm_i915_private *dev_priv)
 	intel_runtime_pm_put(dev_priv);
 }
 
-static void frequency_sample(struct drm_i915_private *dev_priv)
+static void frequency_sample(struct drm_i915_private *dev_priv, u64 period)
 {
 	if (dev_priv->pmu.enable &
-	    config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
+	    (config_enabled_mask(I915_PMU_ACTUAL_CLOCK) |
+	     config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY))) {
 		u32 val;
 
 		val = dev_priv->gt_pm.rps.cur_freq;
@@ -223,13 +227,20 @@ static void frequency_sample(struct drm_i915_private *dev_priv)
 
 		update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
 			      1, intel_gpu_freq(dev_priv, val));
+		update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_CLOCK_ACT],
+			      period, intel_gpu_freq(dev_priv, val));
 	}
 
 	if (dev_priv->pmu.enable &
-	    config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
+	    (config_enabled_mask(I915_PMU_REQUESTED_CLOCK) |
+	     config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY))) {
 		update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1,
 			      intel_gpu_freq(dev_priv,
 					     dev_priv->gt_pm.rps.cur_freq));
+		update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_CLOCK_REQ],
+			      period,
+			      intel_gpu_freq(dev_priv,
+					     dev_priv->gt_pm.rps.cur_freq));
 	}
 }
 
@@ -237,12 +248,17 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
 {
 	struct drm_i915_private *i915 =
 		container_of(hrtimer, struct drm_i915_private, pmu.timer);
+	ktime_t now, period;
 
 	if (!READ_ONCE(i915->pmu.timer_enabled))
 		return HRTIMER_NORESTART;
 
-	engines_sample(i915);
-	frequency_sample(i915);
+	now = ktime_get();
+	period = ktime_sub(now, i915->pmu.timestamp);
+	i915->pmu.timestamp = now;
+
+	engines_sample(i915, period);
+	frequency_sample(i915, period);
 
 	hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
 	return HRTIMER_RESTART;
@@ -313,11 +329,13 @@ config_status(struct drm_i915_private *i915, u64 config)
 {
 	switch (config) {
 	case I915_PMU_ACTUAL_FREQUENCY:
+	case I915_PMU_ACTUAL_CLOCK:
 		if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
 			/* Requires a mutex for sampling! */
 			return -ENODEV;
 		/* Fall-through. */
 	case I915_PMU_REQUESTED_FREQUENCY:
+	case I915_PMU_REQUESTED_CLOCK:
 		if (INTEL_GEN(i915) < 6)
 			return -ENODEV;
 		break;
@@ -526,6 +544,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
 			   div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
 				   FREQUENCY);
 			break;
+		case I915_PMU_ACTUAL_CLOCK:
+			val = i915->pmu.sample[__I915_SAMPLE_CLOCK_ACT].cur;
+			break;
+		case I915_PMU_REQUESTED_CLOCK:
+			val = i915->pmu.sample[__I915_SAMPLE_CLOCK_REQ].cur;
+			break;
 		case I915_PMU_INTERRUPTS:
 			val = count_interrupts(i915);
 			break;
@@ -803,6 +827,8 @@ create_event_attributes(struct drm_i915_private *i915)
 		__event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
 		__event(I915_PMU_INTERRUPTS, "interrupts", NULL),
 		__event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
+		__event(I915_PMU_ACTUAL_CLOCK, "actual-clock", "Mcycles"),
+		__event(I915_PMU_REQUESTED_CLOCK, "requested-clock", "Mcycles"),
 	};
 	static const struct {
 		enum drm_i915_pmu_engine_sample sample;
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 2ba735299f7c..9c4252d85b5e 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -17,6 +17,8 @@ struct drm_i915_private;
 enum {
 	__I915_SAMPLE_FREQ_ACT = 0,
 	__I915_SAMPLE_FREQ_REQ,
+	__I915_SAMPLE_CLOCK_ACT,
+	__I915_SAMPLE_CLOCK_REQ,
 	__I915_SAMPLE_RC6,
 	__I915_SAMPLE_RC6_ESTIMATED,
 	__I915_NUM_PMU_SAMPLERS
@@ -52,6 +54,10 @@ struct i915_pmu {
 	 * @timer: Timer for internal i915 PMU sampling.
 	 */
 	struct hrtimer timer;
+	/**
+	 * @timestamp: Timestamp of last internal i915 PMU sampling.
+	 */
+	ktime_t timestamp;
 	/**
 	 * @enable: Bitmask of all currently enabled events.
 	 *
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7f5634ce8e88..61ab71986274 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -139,6 +139,8 @@ enum drm_i915_pmu_engine_sample {
 #define I915_PMU_REQUESTED_FREQUENCY	__I915_PMU_OTHER(1)
 #define I915_PMU_INTERRUPTS		__I915_PMU_OTHER(2)
 #define I915_PMU_RC6_RESIDENCY		__I915_PMU_OTHER(3)
+#define I915_PMU_ACTUAL_CLOCK		__I915_PMU_OTHER(4)
+#define I915_PMU_REQUESTED_CLOCK	__I915_PMU_OTHER(5)
 
 #define I915_PMU_LAST I915_PMU_RC6_RESIDENCY
 
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/pmu: Measure sampler intervals (rev2)
  2018-05-25 17:11 [PATCH] drm/i915/pmu: Measure sampler intervals Chris Wilson
                   ` (3 preceding siblings ...)
  2018-05-30 11:55 ` [PATCH v2] " Chris Wilson
@ 2018-05-30 13:57 ` Patchwork
  2018-05-30 15:11 ` ✗ Fi.CI.IGT: failure " Patchwork
  5 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2018-05-30 13:57 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/pmu: Measure sampler intervals (rev2)
URL   : https://patchwork.freedesktop.org/series/43795/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4257 -> Patchwork_9148 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/43795/revisions/2/mbox/

== Known issues ==

  Here are the changes found in Patchwork_9148 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@drv_module_reload@basic-no-display:
      fi-glk-j4005:       PASS -> DMESG-WARN (fdo#106725)

    igt@kms_flip@basic-flip-vs-dpms:
      fi-glk-j4005:       PASS -> DMESG-WARN (fdo#106000)

    igt@kms_flip@basic-flip-vs-modeset:
      fi-glk-j4005:       PASS -> DMESG-WARN (fdo#106097, fdo#106000)

    igt@kms_flip@basic-flip-vs-wf_vblank:
      fi-glk-j4005:       PASS -> FAIL (fdo#100368)
      fi-cfl-s3:          PASS -> FAIL (fdo#100368, fdo#103928)

    
    ==== Possible fixes ====

    igt@kms_flip@basic-flip-vs-wf_vblank:
      fi-cnl-psr:         FAIL (fdo#100368) -> PASS

    igt@kms_frontbuffer_tracking@basic:
      fi-bsw-n3050:       INCOMPLETE (fdo#106729) -> PASS

    igt@kms_pipe_crc_basic@nonblocking-crc-pipe-c:
      fi-skl-guc:         FAIL (fdo#103191, fdo#104724) -> PASS

    igt@kms_pipe_crc_basic@read-crc-pipe-c:
      fi-glk-j4005:       DMESG-WARN (fdo#106097, fdo#106000) -> PASS

    igt@prime_vgem@basic-fence-flip:
      fi-ilk-650:         FAIL (fdo#104008) -> PASS

    
  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
  fdo#103928 https://bugs.freedesktop.org/show_bug.cgi?id=103928
  fdo#104008 https://bugs.freedesktop.org/show_bug.cgi?id=104008
  fdo#104724 https://bugs.freedesktop.org/show_bug.cgi?id=104724
  fdo#106000 https://bugs.freedesktop.org/show_bug.cgi?id=106000
  fdo#106097 https://bugs.freedesktop.org/show_bug.cgi?id=106097
  fdo#106725 https://bugs.freedesktop.org/show_bug.cgi?id=106725
  fdo#106729 https://bugs.freedesktop.org/show_bug.cgi?id=106729


== Participating hosts (45 -> 39) ==

  Missing    (6): fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-cfl-u2 fi-skl-6700hq 


== Build changes ==

    * Linux: CI_DRM_4257 -> Patchwork_9148

  CI_DRM_4257: 8aac35d26057479982a346c0e9cd57c2e930b7e1 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4501: 6796a604bab6df9c84af149e799902360afdd157 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9148: 5d03705baa619c391ea3b8ce9e531f0c19e39cfc @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

5d03705baa61 drm/i915/pmu: Measure sampler intervals

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9148/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] drm/i915/pmu: Measure sampler intervals
  2018-05-30 11:55 ` [PATCH v2] " Chris Wilson
@ 2018-05-30 14:37   ` Tvrtko Ursulin
  2018-05-30 14:55     ` Chris Wilson
  0 siblings, 1 reply; 15+ messages in thread
From: Tvrtko Ursulin @ 2018-05-30 14:37 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 30/05/2018 12:55, Chris Wilson wrote:
> hrtimer is not reliable enough to assume fixed intervals, and so even
> coarse accuracy (in the face of kasan and similar heavy debugging) we
> need to measure the actual interval between sample.
> 
> While using a single timestamp to compute the interval does not allow
> very fine accuracy (consider the impact of a slow forcewake between
> different samples after the timestamp is read) is much better than
> assuming the interval.
> 
> v2: Make use of the sample period for estimating the GPU clock cycles,
> leaving the frequency calculation (the averaging) to the caller.
> Introduce new samplers for reporting cycles instead of Hz.
> 
> Testcase: igt/perf_pmu #ivb
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_pmu.c | 44 ++++++++++++++++++++++++++-------
>   drivers/gpu/drm/i915/i915_pmu.h |  6 +++++
>   include/uapi/drm/i915_drm.h     |  2 ++
>   3 files changed, 43 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index dc87797db500..12033e47e3b4 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -86,6 +86,8 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
>   	 */
>   	enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
>   		  config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
> +		  config_enabled_mask(I915_PMU_ACTUAL_CLOCK) |
> +		  config_enabled_mask(I915_PMU_REQUESTED_CLOCK) |
>   		  ENGINE_SAMPLE_MASK;
>   
>   	/*
> @@ -127,6 +129,7 @@ static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
>   {
>   	if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
>   		i915->pmu.timer_enabled = true;
> +		i915->pmu.timestamp = ktime_get();
>   		hrtimer_start_range_ns(&i915->pmu.timer,
>   				       ns_to_ktime(PERIOD), 0,
>   				       HRTIMER_MODE_REL_PINNED);
> @@ -160,7 +163,7 @@ update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
>   	sample->cur += mul_u32_u32(val, unit);
>   }
>   
> -static void engines_sample(struct drm_i915_private *dev_priv)
> +static void engines_sample(struct drm_i915_private *dev_priv, u64 period)
>   {
>   	struct intel_engine_cs *engine;
>   	enum intel_engine_id id;
> @@ -183,7 +186,7 @@ static void engines_sample(struct drm_i915_private *dev_priv)
>   		val = !i915_seqno_passed(current_seqno, last_seqno);
>   
>   		update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
> -			      PERIOD, val);
> +			      period, val);
>   
>   		if (val && (engine->pmu.enable &
>   		    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
> @@ -195,10 +198,10 @@ static void engines_sample(struct drm_i915_private *dev_priv)
>   		}
>   
>   		update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
> -			      PERIOD, !!(val & RING_WAIT));
> +			      period, !!(val & RING_WAIT));
>   
>   		update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
> -			      PERIOD, !!(val & RING_WAIT_SEMAPHORE));
> +			      period, !!(val & RING_WAIT_SEMAPHORE));
>   	}
>   
>   	if (fw)
> @@ -207,10 +210,11 @@ static void engines_sample(struct drm_i915_private *dev_priv)
>   	intel_runtime_pm_put(dev_priv);
>   }
>   
> -static void frequency_sample(struct drm_i915_private *dev_priv)
> +static void frequency_sample(struct drm_i915_private *dev_priv, u64 period)
>   {
>   	if (dev_priv->pmu.enable &
> -	    config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
> +	    (config_enabled_mask(I915_PMU_ACTUAL_CLOCK) |
> +	     config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY))) {
>   		u32 val;
>   
>   		val = dev_priv->gt_pm.rps.cur_freq;
> @@ -223,13 +227,20 @@ static void frequency_sample(struct drm_i915_private *dev_priv)
>   
>   		update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
>   			      1, intel_gpu_freq(dev_priv, val));
> +		update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_CLOCK_ACT],
> +			      period, intel_gpu_freq(dev_priv, val));

Cache intel_gpu_freq in a local.

>   	}
>   
>   	if (dev_priv->pmu.enable &
> -	    config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
> +	    (config_enabled_mask(I915_PMU_REQUESTED_CLOCK) |
> +	     config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY))) {
>   		update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1,
>   			      intel_gpu_freq(dev_priv,
>   					     dev_priv->gt_pm.rps.cur_freq));
> +		update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_CLOCK_REQ],
> +			      period,
> +			      intel_gpu_freq(dev_priv,
> +					     dev_priv->gt_pm.rps.cur_freq));

Same here.

>   	}
>   }
>   
> @@ -237,12 +248,17 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
>   {
>   	struct drm_i915_private *i915 =
>   		container_of(hrtimer, struct drm_i915_private, pmu.timer);
> +	ktime_t now, period;
>   
>   	if (!READ_ONCE(i915->pmu.timer_enabled))
>   		return HRTIMER_NORESTART;
>   
> -	engines_sample(i915);
> -	frequency_sample(i915);
> +	now = ktime_get();
> +	period = ktime_sub(now, i915->pmu.timestamp);
> +	i915->pmu.timestamp = now;
> +
> +	engines_sample(i915, period);
> +	frequency_sample(i915, period);
>   
>   	hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
>   	return HRTIMER_RESTART;
> @@ -313,11 +329,13 @@ config_status(struct drm_i915_private *i915, u64 config)
>   {
>   	switch (config) {
>   	case I915_PMU_ACTUAL_FREQUENCY:
> +	case I915_PMU_ACTUAL_CLOCK:
>   		if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
>   			/* Requires a mutex for sampling! */
>   			return -ENODEV;
>   		/* Fall-through. */
>   	case I915_PMU_REQUESTED_FREQUENCY:
> +	case I915_PMU_REQUESTED_CLOCK:
>   		if (INTEL_GEN(i915) < 6)
>   			return -ENODEV;
>   		break;
> @@ -526,6 +544,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
>   			   div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
>   				   FREQUENCY);
>   			break;
> +		case I915_PMU_ACTUAL_CLOCK:
> +			val = i915->pmu.sample[__I915_SAMPLE_CLOCK_ACT].cur;
> +			break;
> +		case I915_PMU_REQUESTED_CLOCK:
> +			val = i915->pmu.sample[__I915_SAMPLE_CLOCK_REQ].cur;
> +			break;
>   		case I915_PMU_INTERRUPTS:
>   			val = count_interrupts(i915);
>   			break;
> @@ -803,6 +827,8 @@ create_event_attributes(struct drm_i915_private *i915)
>   		__event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
>   		__event(I915_PMU_INTERRUPTS, "interrupts", NULL),
>   		__event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
> +		__event(I915_PMU_ACTUAL_CLOCK, "actual-clock", "Mcycles"),
> +		__event(I915_PMU_REQUESTED_CLOCK, "requested-clock", "Mcycles"),
>   	};
>   	static const struct {
>   		enum drm_i915_pmu_engine_sample sample;
> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> index 2ba735299f7c..9c4252d85b5e 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.h
> +++ b/drivers/gpu/drm/i915/i915_pmu.h
> @@ -17,6 +17,8 @@ struct drm_i915_private;
>   enum {
>   	__I915_SAMPLE_FREQ_ACT = 0,
>   	__I915_SAMPLE_FREQ_REQ,
> +	__I915_SAMPLE_CLOCK_ACT,
> +	__I915_SAMPLE_CLOCK_REQ,
>   	__I915_SAMPLE_RC6,
>   	__I915_SAMPLE_RC6_ESTIMATED,
>   	__I915_NUM_PMU_SAMPLERS
> @@ -52,6 +54,10 @@ struct i915_pmu {
>   	 * @timer: Timer for internal i915 PMU sampling.
>   	 */
>   	struct hrtimer timer;
> +	/**
> +	 * @timestamp: Timestamp of last internal i915 PMU sampling.
> +	 */
> +	ktime_t timestamp;
>   	/**
>   	 * @enable: Bitmask of all currently enabled events.
>   	 *
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7f5634ce8e88..61ab71986274 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -139,6 +139,8 @@ enum drm_i915_pmu_engine_sample {
>   #define I915_PMU_REQUESTED_FREQUENCY	__I915_PMU_OTHER(1)
>   #define I915_PMU_INTERRUPTS		__I915_PMU_OTHER(2)
>   #define I915_PMU_RC6_RESIDENCY		__I915_PMU_OTHER(3)
> +#define I915_PMU_ACTUAL_CLOCK		__I915_PMU_OTHER(4)
> +#define I915_PMU_REQUESTED_CLOCK	__I915_PMU_OTHER(5)
>   
>   #define I915_PMU_LAST I915_PMU_RC6_RESIDENCY

Bump this one.

I want to know if we could get away without introducing a new pair of 
counter. For instance would running average of a period do for frequency 
readout? It depends on what kind of error we are facing. Or a moving 
average for some period? I would explore that but don't have an 
Ivybridge so could you have a look?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] drm/i915/pmu: Measure sampler intervals
  2018-05-30 14:37   ` Tvrtko Ursulin
@ 2018-05-30 14:55     ` Chris Wilson
  2018-05-30 15:27       ` Tvrtko Ursulin
  0 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2018-05-30 14:55 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-05-30 15:37:18)
> 
> On 30/05/2018 12:55, Chris Wilson wrote:
> > hrtimer is not reliable enough to assume fixed intervals, and so even
> > coarse accuracy (in the face of kasan and similar heavy debugging) we
> > need to measure the actual interval between sample.
> > 
> > While using a single timestamp to compute the interval does not allow
> > very fine accuracy (consider the impact of a slow forcewake between
> > different samples after the timestamp is read) is much better than
> > assuming the interval.
> > 
> > v2: Make use of the sample period for estimating the GPU clock cycles,
> > leaving the frequency calculation (the averaging) to the caller.
> > Introduce new samplers for reporting cycles instead of Hz.
> > 
> > Testcase: igt/perf_pmu #ivb
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_pmu.c | 44 ++++++++++++++++++++++++++-------
> >   drivers/gpu/drm/i915/i915_pmu.h |  6 +++++
> >   include/uapi/drm/i915_drm.h     |  2 ++
> >   3 files changed, 43 insertions(+), 9 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> > index dc87797db500..12033e47e3b4 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.c
> > +++ b/drivers/gpu/drm/i915/i915_pmu.c
> > @@ -86,6 +86,8 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
> >        */
> >       enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
> >                 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
> > +               config_enabled_mask(I915_PMU_ACTUAL_CLOCK) |
> > +               config_enabled_mask(I915_PMU_REQUESTED_CLOCK) |
> >                 ENGINE_SAMPLE_MASK;
> >   
> >       /*
> > @@ -127,6 +129,7 @@ static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
> >   {
> >       if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
> >               i915->pmu.timer_enabled = true;
> > +             i915->pmu.timestamp = ktime_get();
> >               hrtimer_start_range_ns(&i915->pmu.timer,
> >                                      ns_to_ktime(PERIOD), 0,
> >                                      HRTIMER_MODE_REL_PINNED);
> > @@ -160,7 +163,7 @@ update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
> >       sample->cur += mul_u32_u32(val, unit);
> >   }
> >   
> > -static void engines_sample(struct drm_i915_private *dev_priv)
> > +static void engines_sample(struct drm_i915_private *dev_priv, u64 period)
> >   {
> >       struct intel_engine_cs *engine;
> >       enum intel_engine_id id;
> > @@ -183,7 +186,7 @@ static void engines_sample(struct drm_i915_private *dev_priv)
> >               val = !i915_seqno_passed(current_seqno, last_seqno);
> >   
> >               update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
> > -                           PERIOD, val);
> > +                           period, val);
> >   
> >               if (val && (engine->pmu.enable &
> >                   (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
> > @@ -195,10 +198,10 @@ static void engines_sample(struct drm_i915_private *dev_priv)
> >               }
> >   
> >               update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
> > -                           PERIOD, !!(val & RING_WAIT));
> > +                           period, !!(val & RING_WAIT));
> >   
> >               update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
> > -                           PERIOD, !!(val & RING_WAIT_SEMAPHORE));
> > +                           period, !!(val & RING_WAIT_SEMAPHORE));
> >       }
> >   
> >       if (fw)
> > @@ -207,10 +210,11 @@ static void engines_sample(struct drm_i915_private *dev_priv)
> >       intel_runtime_pm_put(dev_priv);
> >   }
> >   
> > -static void frequency_sample(struct drm_i915_private *dev_priv)
> > +static void frequency_sample(struct drm_i915_private *dev_priv, u64 period)
> >   {
> >       if (dev_priv->pmu.enable &
> > -         config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
> > +         (config_enabled_mask(I915_PMU_ACTUAL_CLOCK) |
> > +          config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY))) {
> >               u32 val;
> >   
> >               val = dev_priv->gt_pm.rps.cur_freq;
> > @@ -223,13 +227,20 @@ static void frequency_sample(struct drm_i915_private *dev_priv)
> >   
> >               update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
> >                             1, intel_gpu_freq(dev_priv, val));
> > +             update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_CLOCK_ACT],
> > +                           period, intel_gpu_freq(dev_priv, val));
> 
> Cache intel_gpu_freq in a local.
> 
> >       }
> >   
> >       if (dev_priv->pmu.enable &
> > -         config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
> > +         (config_enabled_mask(I915_PMU_REQUESTED_CLOCK) |
> > +          config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY))) {
> >               update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1,
> >                             intel_gpu_freq(dev_priv,
> >                                            dev_priv->gt_pm.rps.cur_freq));
> > +             update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_CLOCK_REQ],
> > +                           period,
> > +                           intel_gpu_freq(dev_priv,
> > +                                          dev_priv->gt_pm.rps.cur_freq));
> 
> Same here.
> 
> >       }
> >   }
> >   
> > @@ -237,12 +248,17 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
> >   {
> >       struct drm_i915_private *i915 =
> >               container_of(hrtimer, struct drm_i915_private, pmu.timer);
> > +     ktime_t now, period;
> >   
> >       if (!READ_ONCE(i915->pmu.timer_enabled))
> >               return HRTIMER_NORESTART;
> >   
> > -     engines_sample(i915);
> > -     frequency_sample(i915);
> > +     now = ktime_get();
> > +     period = ktime_sub(now, i915->pmu.timestamp);
> > +     i915->pmu.timestamp = now;
> > +
> > +     engines_sample(i915, period);
> > +     frequency_sample(i915, period);
> >   
> >       hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
> >       return HRTIMER_RESTART;
> > @@ -313,11 +329,13 @@ config_status(struct drm_i915_private *i915, u64 config)
> >   {
> >       switch (config) {
> >       case I915_PMU_ACTUAL_FREQUENCY:
> > +     case I915_PMU_ACTUAL_CLOCK:
> >               if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
> >                       /* Requires a mutex for sampling! */
> >                       return -ENODEV;
> >               /* Fall-through. */
> >       case I915_PMU_REQUESTED_FREQUENCY:
> > +     case I915_PMU_REQUESTED_CLOCK:
> >               if (INTEL_GEN(i915) < 6)
> >                       return -ENODEV;
> >               break;
> > @@ -526,6 +544,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
> >                          div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
> >                                  FREQUENCY);
> >                       break;
> > +             case I915_PMU_ACTUAL_CLOCK:
> > +                     val = i915->pmu.sample[__I915_SAMPLE_CLOCK_ACT].cur;
> > +                     break;
> > +             case I915_PMU_REQUESTED_CLOCK:
> > +                     val = i915->pmu.sample[__I915_SAMPLE_CLOCK_REQ].cur;
> > +                     break;
> >               case I915_PMU_INTERRUPTS:
> >                       val = count_interrupts(i915);
> >                       break;
> > @@ -803,6 +827,8 @@ create_event_attributes(struct drm_i915_private *i915)
> >               __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
> >               __event(I915_PMU_INTERRUPTS, "interrupts", NULL),
> >               __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
> > +             __event(I915_PMU_ACTUAL_CLOCK, "actual-clock", "Mcycles"),
> > +             __event(I915_PMU_REQUESTED_CLOCK, "requested-clock", "Mcycles"),
> >       };
> >       static const struct {
> >               enum drm_i915_pmu_engine_sample sample;
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> > index 2ba735299f7c..9c4252d85b5e 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.h
> > +++ b/drivers/gpu/drm/i915/i915_pmu.h
> > @@ -17,6 +17,8 @@ struct drm_i915_private;
> >   enum {
> >       __I915_SAMPLE_FREQ_ACT = 0,
> >       __I915_SAMPLE_FREQ_REQ,
> > +     __I915_SAMPLE_CLOCK_ACT,
> > +     __I915_SAMPLE_CLOCK_REQ,
> >       __I915_SAMPLE_RC6,
> >       __I915_SAMPLE_RC6_ESTIMATED,
> >       __I915_NUM_PMU_SAMPLERS
> > @@ -52,6 +54,10 @@ struct i915_pmu {
> >        * @timer: Timer for internal i915 PMU sampling.
> >        */
> >       struct hrtimer timer;
> > +     /**
> > +      * @timestamp: Timestamp of last internal i915 PMU sampling.
> > +      */
> > +     ktime_t timestamp;
> >       /**
> >        * @enable: Bitmask of all currently enabled events.
> >        *
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 7f5634ce8e88..61ab71986274 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -139,6 +139,8 @@ enum drm_i915_pmu_engine_sample {
> >   #define I915_PMU_REQUESTED_FREQUENCY        __I915_PMU_OTHER(1)
> >   #define I915_PMU_INTERRUPTS         __I915_PMU_OTHER(2)
> >   #define I915_PMU_RC6_RESIDENCY              __I915_PMU_OTHER(3)
> > +#define I915_PMU_ACTUAL_CLOCK                __I915_PMU_OTHER(4)
> > +#define I915_PMU_REQUESTED_CLOCK     __I915_PMU_OTHER(5)
> >   
> >   #define I915_PMU_LAST I915_PMU_RC6_RESIDENCY
> 
> Bump this one.
> 
> I want to know if we could get away without introducing a new pair of 
> counter.

I wouldn't mind having my cycle counters back in the api ;)
They just seem to be easier for me to work with in userspace.

Or do you mean if we can just redefine the existing sampler?

> For instance would running average of a period do for frequency 
> readout? It depends on what kind of error we are facing. Or a moving 
> average for some period? I would explore that but don't have an 
> Ivybridge so could you have a look?

The crucial part is that we don't define the period, the user does.

Heh, once again we have two different ideas about what we want to
measure :) I'ld take cycles, with your suggestion we may as well do
instantaneous frequency and sample from within perf_event_read (no
averaging, or just a short ewma)?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✗ Fi.CI.IGT: failure for drm/i915/pmu: Measure sampler intervals (rev2)
  2018-05-25 17:11 [PATCH] drm/i915/pmu: Measure sampler intervals Chris Wilson
                   ` (4 preceding siblings ...)
  2018-05-30 13:57 ` ✓ Fi.CI.BAT: success for drm/i915/pmu: Measure sampler intervals (rev2) Patchwork
@ 2018-05-30 15:11 ` Patchwork
  5 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2018-05-30 15:11 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/pmu: Measure sampler intervals (rev2)
URL   : https://patchwork.freedesktop.org/series/43795/
State : failure

== Summary ==

= CI Bug Log - changes from CI_DRM_4257_full -> Patchwork_9148_full =

== Summary - FAILURE ==

  Serious unknown changes coming with Patchwork_9148_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_9148_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/43795/revisions/2/mbox/

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_9148_full:

  === IGT changes ===

    ==== Possible regressions ====

    igt@perf_pmu@other-init-4:
      shard-kbl:          PASS -> DMESG-FAIL

    igt@perf_pmu@other-read-4:
      shard-glk:          PASS -> DMESG-FAIL

    
    ==== Warnings ====

    igt@gem_mocs_settings@mocs-rc6-render:
      shard-kbl:          PASS -> SKIP +1

    
== Known issues ==

  Here are the changes found in Patchwork_9148_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@gem_ctx_isolation@bcs0-dirty-switch:
      shard-kbl:          PASS -> INCOMPLETE (fdo#103665) +1

    igt@kms_atomic_transition@1x-modeset-transitions-nonblocking:
      shard-glk:          PASS -> FAIL (fdo#105703)

    igt@kms_cursor_legacy@2x-nonblocking-modeset-vs-cursor-atomic:
      shard-glk:          PASS -> FAIL (fdo#106509, fdo#105454)

    igt@kms_flip@flip-vs-expired-vblank:
      shard-glk:          PASS -> FAIL (fdo#102887, fdo#105363)

    igt@kms_flip_tiling@flip-x-tiled:
      shard-glk:          PASS -> FAIL (fdo#104724)

    igt@perf_pmu@other-init-4:
      shard-snb:          PASS -> INCOMPLETE (fdo#105411) +1
      shard-apl:          PASS -> INCOMPLETE (fdo#103927) +1

    igt@perf_pmu@other-read-4:
      shard-hsw:          PASS -> INCOMPLETE (fdo#103540) +2

    igt@prime_vgem@wait-render:
      shard-glk:          PASS -> INCOMPLETE (k.org#198133, fdo#103359) +2

    
    ==== Possible fixes ====

    igt@drv_selftest@live_hangcheck:
      shard-kbl:          DMESG-FAIL (fdo#106560) -> PASS

    igt@gem_exec_parallel@render-fds:
      shard-snb:          INCOMPLETE (fdo#105411) -> PASS

    igt@kms_flip@flip-vs-expired-vblank-interruptible:
      shard-glk:          FAIL (fdo#105363) -> PASS

    igt@kms_flip@plain-flip-ts-check:
      shard-glk:          FAIL (fdo#100368) -> PASS

    igt@kms_flip_tiling@flip-to-x-tiled:
      shard-glk:          FAIL (fdo#104724) -> PASS

    igt@kms_flip_tiling@flip-to-y-tiled:
      shard-glk:          FAIL (fdo#104724, fdo#103822) -> PASS

    igt@perf_pmu@rc6-runtime-pm:
      shard-kbl:          FAIL (fdo#105010) -> PASS

    
  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
  fdo#103359 https://bugs.freedesktop.org/show_bug.cgi?id=103359
  fdo#103540 https://bugs.freedesktop.org/show_bug.cgi?id=103540
  fdo#103665 https://bugs.freedesktop.org/show_bug.cgi?id=103665
  fdo#103822 https://bugs.freedesktop.org/show_bug.cgi?id=103822
  fdo#103927 https://bugs.freedesktop.org/show_bug.cgi?id=103927
  fdo#104724 https://bugs.freedesktop.org/show_bug.cgi?id=104724
  fdo#105010 https://bugs.freedesktop.org/show_bug.cgi?id=105010
  fdo#105363 https://bugs.freedesktop.org/show_bug.cgi?id=105363
  fdo#105411 https://bugs.freedesktop.org/show_bug.cgi?id=105411
  fdo#105454 https://bugs.freedesktop.org/show_bug.cgi?id=105454
  fdo#105703 https://bugs.freedesktop.org/show_bug.cgi?id=105703
  fdo#106509 https://bugs.freedesktop.org/show_bug.cgi?id=106509
  fdo#106560 https://bugs.freedesktop.org/show_bug.cgi?id=106560
  k.org#198133 https://bugzilla.kernel.org/show_bug.cgi?id=198133


== Participating hosts (5 -> 5) ==

  No changes in participating hosts


== Build changes ==

    * Linux: CI_DRM_4257 -> Patchwork_9148

  CI_DRM_4257: 8aac35d26057479982a346c0e9cd57c2e930b7e1 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4501: 6796a604bab6df9c84af149e799902360afdd157 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9148: 5d03705baa619c391ea3b8ce9e531f0c19e39cfc @ git://anongit.freedesktop.org/gfx-ci/linux

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9148/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] drm/i915/pmu: Measure sampler intervals
  2018-05-30 14:55     ` Chris Wilson
@ 2018-05-30 15:27       ` Tvrtko Ursulin
  2018-05-30 15:37         ` Chris Wilson
  0 siblings, 1 reply; 15+ messages in thread
From: Tvrtko Ursulin @ 2018-05-30 15:27 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 30/05/2018 15:55, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-05-30 15:37:18)
>>
>> On 30/05/2018 12:55, Chris Wilson wrote:
>>> hrtimer is not reliable enough to assume fixed intervals, and so even
>>> coarse accuracy (in the face of kasan and similar heavy debugging) we
>>> need to measure the actual interval between sample.
>>>
>>> While using a single timestamp to compute the interval does not allow
>>> very fine accuracy (consider the impact of a slow forcewake between
>>> different samples after the timestamp is read) is much better than
>>> assuming the interval.
>>>
>>> v2: Make use of the sample period for estimating the GPU clock cycles,
>>> leaving the frequency calculation (the averaging) to the caller.
>>> Introduce new samplers for reporting cycles instead of Hz.
>>>
>>> Testcase: igt/perf_pmu #ivb
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_pmu.c | 44 ++++++++++++++++++++++++++-------
>>>    drivers/gpu/drm/i915/i915_pmu.h |  6 +++++
>>>    include/uapi/drm/i915_drm.h     |  2 ++
>>>    3 files changed, 43 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
>>> index dc87797db500..12033e47e3b4 100644
>>> --- a/drivers/gpu/drm/i915/i915_pmu.c
>>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
>>> @@ -86,6 +86,8 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
>>>         */
>>>        enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
>>>                  config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
>>> +               config_enabled_mask(I915_PMU_ACTUAL_CLOCK) |
>>> +               config_enabled_mask(I915_PMU_REQUESTED_CLOCK) |
>>>                  ENGINE_SAMPLE_MASK;
>>>    
>>>        /*
>>> @@ -127,6 +129,7 @@ static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
>>>    {
>>>        if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
>>>                i915->pmu.timer_enabled = true;
>>> +             i915->pmu.timestamp = ktime_get();
>>>                hrtimer_start_range_ns(&i915->pmu.timer,
>>>                                       ns_to_ktime(PERIOD), 0,
>>>                                       HRTIMER_MODE_REL_PINNED);
>>> @@ -160,7 +163,7 @@ update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
>>>        sample->cur += mul_u32_u32(val, unit);
>>>    }
>>>    
>>> -static void engines_sample(struct drm_i915_private *dev_priv)
>>> +static void engines_sample(struct drm_i915_private *dev_priv, u64 period)
>>>    {
>>>        struct intel_engine_cs *engine;
>>>        enum intel_engine_id id;
>>> @@ -183,7 +186,7 @@ static void engines_sample(struct drm_i915_private *dev_priv)
>>>                val = !i915_seqno_passed(current_seqno, last_seqno);
>>>    
>>>                update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
>>> -                           PERIOD, val);
>>> +                           period, val);
>>>    
>>>                if (val && (engine->pmu.enable &
>>>                    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
>>> @@ -195,10 +198,10 @@ static void engines_sample(struct drm_i915_private *dev_priv)
>>>                }
>>>    
>>>                update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
>>> -                           PERIOD, !!(val & RING_WAIT));
>>> +                           period, !!(val & RING_WAIT));
>>>    
>>>                update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
>>> -                           PERIOD, !!(val & RING_WAIT_SEMAPHORE));
>>> +                           period, !!(val & RING_WAIT_SEMAPHORE));
>>>        }
>>>    
>>>        if (fw)
>>> @@ -207,10 +210,11 @@ static void engines_sample(struct drm_i915_private *dev_priv)
>>>        intel_runtime_pm_put(dev_priv);
>>>    }
>>>    
>>> -static void frequency_sample(struct drm_i915_private *dev_priv)
>>> +static void frequency_sample(struct drm_i915_private *dev_priv, u64 period)
>>>    {
>>>        if (dev_priv->pmu.enable &
>>> -         config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
>>> +         (config_enabled_mask(I915_PMU_ACTUAL_CLOCK) |
>>> +          config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY))) {
>>>                u32 val;
>>>    
>>>                val = dev_priv->gt_pm.rps.cur_freq;
>>> @@ -223,13 +227,20 @@ static void frequency_sample(struct drm_i915_private *dev_priv)
>>>    
>>>                update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
>>>                              1, intel_gpu_freq(dev_priv, val));
>>> +             update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_CLOCK_ACT],
>>> +                           period, intel_gpu_freq(dev_priv, val));
>>
>> Cache intel_gpu_freq in a local.
>>
>>>        }
>>>    
>>>        if (dev_priv->pmu.enable &
>>> -         config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
>>> +         (config_enabled_mask(I915_PMU_REQUESTED_CLOCK) |
>>> +          config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY))) {
>>>                update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1,
>>>                              intel_gpu_freq(dev_priv,
>>>                                             dev_priv->gt_pm.rps.cur_freq));
>>> +             update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_CLOCK_REQ],
>>> +                           period,
>>> +                           intel_gpu_freq(dev_priv,
>>> +                                          dev_priv->gt_pm.rps.cur_freq));
>>
>> Same here.
>>
>>>        }
>>>    }
>>>    
>>> @@ -237,12 +248,17 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
>>>    {
>>>        struct drm_i915_private *i915 =
>>>                container_of(hrtimer, struct drm_i915_private, pmu.timer);
>>> +     ktime_t now, period;
>>>    
>>>        if (!READ_ONCE(i915->pmu.timer_enabled))
>>>                return HRTIMER_NORESTART;
>>>    
>>> -     engines_sample(i915);
>>> -     frequency_sample(i915);
>>> +     now = ktime_get();
>>> +     period = ktime_sub(now, i915->pmu.timestamp);
>>> +     i915->pmu.timestamp = now;
>>> +
>>> +     engines_sample(i915, period);
>>> +     frequency_sample(i915, period);
>>>    
>>>        hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
>>>        return HRTIMER_RESTART;
>>> @@ -313,11 +329,13 @@ config_status(struct drm_i915_private *i915, u64 config)
>>>    {
>>>        switch (config) {
>>>        case I915_PMU_ACTUAL_FREQUENCY:
>>> +     case I915_PMU_ACTUAL_CLOCK:
>>>                if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
>>>                        /* Requires a mutex for sampling! */
>>>                        return -ENODEV;
>>>                /* Fall-through. */
>>>        case I915_PMU_REQUESTED_FREQUENCY:
>>> +     case I915_PMU_REQUESTED_CLOCK:
>>>                if (INTEL_GEN(i915) < 6)
>>>                        return -ENODEV;
>>>                break;
>>> @@ -526,6 +544,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
>>>                           div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
>>>                                   FREQUENCY);
>>>                        break;
>>> +             case I915_PMU_ACTUAL_CLOCK:
>>> +                     val = i915->pmu.sample[__I915_SAMPLE_CLOCK_ACT].cur;
>>> +                     break;
>>> +             case I915_PMU_REQUESTED_CLOCK:
>>> +                     val = i915->pmu.sample[__I915_SAMPLE_CLOCK_REQ].cur;
>>> +                     break;
>>>                case I915_PMU_INTERRUPTS:
>>>                        val = count_interrupts(i915);
>>>                        break;
>>> @@ -803,6 +827,8 @@ create_event_attributes(struct drm_i915_private *i915)
>>>                __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
>>>                __event(I915_PMU_INTERRUPTS, "interrupts", NULL),
>>>                __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
>>> +             __event(I915_PMU_ACTUAL_CLOCK, "actual-clock", "Mcycles"),
>>> +             __event(I915_PMU_REQUESTED_CLOCK, "requested-clock", "Mcycles"),
>>>        };
>>>        static const struct {
>>>                enum drm_i915_pmu_engine_sample sample;
>>> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
>>> index 2ba735299f7c..9c4252d85b5e 100644
>>> --- a/drivers/gpu/drm/i915/i915_pmu.h
>>> +++ b/drivers/gpu/drm/i915/i915_pmu.h
>>> @@ -17,6 +17,8 @@ struct drm_i915_private;
>>>    enum {
>>>        __I915_SAMPLE_FREQ_ACT = 0,
>>>        __I915_SAMPLE_FREQ_REQ,
>>> +     __I915_SAMPLE_CLOCK_ACT,
>>> +     __I915_SAMPLE_CLOCK_REQ,
>>>        __I915_SAMPLE_RC6,
>>>        __I915_SAMPLE_RC6_ESTIMATED,
>>>        __I915_NUM_PMU_SAMPLERS
>>> @@ -52,6 +54,10 @@ struct i915_pmu {
>>>         * @timer: Timer for internal i915 PMU sampling.
>>>         */
>>>        struct hrtimer timer;
>>> +     /**
>>> +      * @timestamp: Timestamp of last internal i915 PMU sampling.
>>> +      */
>>> +     ktime_t timestamp;
>>>        /**
>>>         * @enable: Bitmask of all currently enabled events.
>>>         *
>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>> index 7f5634ce8e88..61ab71986274 100644
>>> --- a/include/uapi/drm/i915_drm.h
>>> +++ b/include/uapi/drm/i915_drm.h
>>> @@ -139,6 +139,8 @@ enum drm_i915_pmu_engine_sample {
>>>    #define I915_PMU_REQUESTED_FREQUENCY        __I915_PMU_OTHER(1)
>>>    #define I915_PMU_INTERRUPTS         __I915_PMU_OTHER(2)
>>>    #define I915_PMU_RC6_RESIDENCY              __I915_PMU_OTHER(3)
>>> +#define I915_PMU_ACTUAL_CLOCK                __I915_PMU_OTHER(4)
>>> +#define I915_PMU_REQUESTED_CLOCK     __I915_PMU_OTHER(5)
>>>    
>>>    #define I915_PMU_LAST I915_PMU_RC6_RESIDENCY
>>
>> Bump this one.
>>
>> I want to know if we could get away without introducing a new pair of
>> counter.
> 
> I wouldn't mind having my cycle counters back in the api ;)
> They just seem to be easier for me to work with in userspace.
> 
> Or do you mean if we can just redefine the existing sampler?

Keep the existing semantics but improve implementation sufficiently so 
they survive the IVB hrtimer problem.
>> For instance would running average of a period do for frequency
>> readout? It depends on what kind of error we are facing. Or a moving
>> average for some period? I would explore that but don't have an
>> Ivybridge so could you have a look?
> 
> The crucial part is that we don't define the period, the user does.
> 
> Heh, once again we have two different ideas about what we want to
> measure :) I'ld take cycles, with your suggestion we may as well do
> instantaneous frequency and sample from within perf_event_read (no
> averaging, or just a short ewma)?
For me the key question is how unstable is the IVB clock? Is it random 
jitter and by how much, or what. If we keep x ms moving average from 
frequency_sample, and use that to convert to Hz on the output, would it 
be good enough?

To me it is preferable to adding new counters. Especially if the error 
is so small that no one notices _and_ only on IVB.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] drm/i915/pmu: Measure sampler intervals
  2018-05-30 15:27       ` Tvrtko Ursulin
@ 2018-05-30 15:37         ` Chris Wilson
  2018-05-30 17:21           ` Tvrtko Ursulin
  0 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2018-05-30 15:37 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-05-30 16:27:02)
> 
> On 30/05/2018 15:55, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-05-30 15:37:18)
> >>
> >> On 30/05/2018 12:55, Chris Wilson wrote:
> >>> hrtimer is not reliable enough to assume fixed intervals, and so even
> >>> coarse accuracy (in the face of kasan and similar heavy debugging) we
> >>> need to measure the actual interval between sample.
> >>>
> >>> While using a single timestamp to compute the interval does not allow
> >>> very fine accuracy (consider the impact of a slow forcewake between
> >>> different samples after the timestamp is read) is much better than
> >>> assuming the interval.
> >>>
> >>> v2: Make use of the sample period for estimating the GPU clock cycles,
> >>> leaving the frequency calculation (the averaging) to the caller.
> >>> Introduce new samplers for reporting cycles instead of Hz.
> >>>
> >>> Testcase: igt/perf_pmu #ivb
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>> ---
> >>>    drivers/gpu/drm/i915/i915_pmu.c | 44 ++++++++++++++++++++++++++-------
> >>>    drivers/gpu/drm/i915/i915_pmu.h |  6 +++++
> >>>    include/uapi/drm/i915_drm.h     |  2 ++
> >>>    3 files changed, 43 insertions(+), 9 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> >>> index dc87797db500..12033e47e3b4 100644
> >>> --- a/drivers/gpu/drm/i915/i915_pmu.c
> >>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> >>> @@ -86,6 +86,8 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
> >>>         */
> >>>        enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
> >>>                  config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
> >>> +               config_enabled_mask(I915_PMU_ACTUAL_CLOCK) |
> >>> +               config_enabled_mask(I915_PMU_REQUESTED_CLOCK) |
> >>>                  ENGINE_SAMPLE_MASK;
> >>>    
> >>>        /*
> >>> @@ -127,6 +129,7 @@ static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
> >>>    {
> >>>        if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
> >>>                i915->pmu.timer_enabled = true;
> >>> +             i915->pmu.timestamp = ktime_get();
> >>>                hrtimer_start_range_ns(&i915->pmu.timer,
> >>>                                       ns_to_ktime(PERIOD), 0,
> >>>                                       HRTIMER_MODE_REL_PINNED);
> >>> @@ -160,7 +163,7 @@ update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
> >>>        sample->cur += mul_u32_u32(val, unit);
> >>>    }
> >>>    
> >>> -static void engines_sample(struct drm_i915_private *dev_priv)
> >>> +static void engines_sample(struct drm_i915_private *dev_priv, u64 period)
> >>>    {
> >>>        struct intel_engine_cs *engine;
> >>>        enum intel_engine_id id;
> >>> @@ -183,7 +186,7 @@ static void engines_sample(struct drm_i915_private *dev_priv)
> >>>                val = !i915_seqno_passed(current_seqno, last_seqno);
> >>>    
> >>>                update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
> >>> -                           PERIOD, val);
> >>> +                           period, val);
> >>>    
> >>>                if (val && (engine->pmu.enable &
> >>>                    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
> >>> @@ -195,10 +198,10 @@ static void engines_sample(struct drm_i915_private *dev_priv)
> >>>                }
> >>>    
> >>>                update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
> >>> -                           PERIOD, !!(val & RING_WAIT));
> >>> +                           period, !!(val & RING_WAIT));
> >>>    
> >>>                update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
> >>> -                           PERIOD, !!(val & RING_WAIT_SEMAPHORE));
> >>> +                           period, !!(val & RING_WAIT_SEMAPHORE));
> >>>        }
> >>>    
> >>>        if (fw)
> >>> @@ -207,10 +210,11 @@ static void engines_sample(struct drm_i915_private *dev_priv)
> >>>        intel_runtime_pm_put(dev_priv);
> >>>    }
> >>>    
> >>> -static void frequency_sample(struct drm_i915_private *dev_priv)
> >>> +static void frequency_sample(struct drm_i915_private *dev_priv, u64 period)
> >>>    {
> >>>        if (dev_priv->pmu.enable &
> >>> -         config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
> >>> +         (config_enabled_mask(I915_PMU_ACTUAL_CLOCK) |
> >>> +          config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY))) {
> >>>                u32 val;
> >>>    
> >>>                val = dev_priv->gt_pm.rps.cur_freq;
> >>> @@ -223,13 +227,20 @@ static void frequency_sample(struct drm_i915_private *dev_priv)
> >>>    
> >>>                update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
> >>>                              1, intel_gpu_freq(dev_priv, val));
> >>> +             update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_CLOCK_ACT],
> >>> +                           period, intel_gpu_freq(dev_priv, val));
> >>
> >> Cache intel_gpu_freq in a local.
> >>
> >>>        }
> >>>    
> >>>        if (dev_priv->pmu.enable &
> >>> -         config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
> >>> +         (config_enabled_mask(I915_PMU_REQUESTED_CLOCK) |
> >>> +          config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY))) {
> >>>                update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1,
> >>>                              intel_gpu_freq(dev_priv,
> >>>                                             dev_priv->gt_pm.rps.cur_freq));
> >>> +             update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_CLOCK_REQ],
> >>> +                           period,
> >>> +                           intel_gpu_freq(dev_priv,
> >>> +                                          dev_priv->gt_pm.rps.cur_freq));
> >>
> >> Same here.
> >>
> >>>        }
> >>>    }
> >>>    
> >>> @@ -237,12 +248,17 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
> >>>    {
> >>>        struct drm_i915_private *i915 =
> >>>                container_of(hrtimer, struct drm_i915_private, pmu.timer);
> >>> +     ktime_t now, period;
> >>>    
> >>>        if (!READ_ONCE(i915->pmu.timer_enabled))
> >>>                return HRTIMER_NORESTART;
> >>>    
> >>> -     engines_sample(i915);
> >>> -     frequency_sample(i915);
> >>> +     now = ktime_get();
> >>> +     period = ktime_sub(now, i915->pmu.timestamp);
> >>> +     i915->pmu.timestamp = now;
> >>> +
> >>> +     engines_sample(i915, period);
> >>> +     frequency_sample(i915, period);
> >>>    
> >>>        hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
> >>>        return HRTIMER_RESTART;
> >>> @@ -313,11 +329,13 @@ config_status(struct drm_i915_private *i915, u64 config)
> >>>    {
> >>>        switch (config) {
> >>>        case I915_PMU_ACTUAL_FREQUENCY:
> >>> +     case I915_PMU_ACTUAL_CLOCK:
> >>>                if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
> >>>                        /* Requires a mutex for sampling! */
> >>>                        return -ENODEV;
> >>>                /* Fall-through. */
> >>>        case I915_PMU_REQUESTED_FREQUENCY:
> >>> +     case I915_PMU_REQUESTED_CLOCK:
> >>>                if (INTEL_GEN(i915) < 6)
> >>>                        return -ENODEV;
> >>>                break;
> >>> @@ -526,6 +544,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
> >>>                           div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
> >>>                                   FREQUENCY);
> >>>                        break;
> >>> +             case I915_PMU_ACTUAL_CLOCK:
> >>> +                     val = i915->pmu.sample[__I915_SAMPLE_CLOCK_ACT].cur;
> >>> +                     break;
> >>> +             case I915_PMU_REQUESTED_CLOCK:
> >>> +                     val = i915->pmu.sample[__I915_SAMPLE_CLOCK_REQ].cur;
> >>> +                     break;
> >>>                case I915_PMU_INTERRUPTS:
> >>>                        val = count_interrupts(i915);
> >>>                        break;
> >>> @@ -803,6 +827,8 @@ create_event_attributes(struct drm_i915_private *i915)
> >>>                __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
> >>>                __event(I915_PMU_INTERRUPTS, "interrupts", NULL),
> >>>                __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
> >>> +             __event(I915_PMU_ACTUAL_CLOCK, "actual-clock", "Mcycles"),
> >>> +             __event(I915_PMU_REQUESTED_CLOCK, "requested-clock", "Mcycles"),
> >>>        };
> >>>        static const struct {
> >>>                enum drm_i915_pmu_engine_sample sample;
> >>> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> >>> index 2ba735299f7c..9c4252d85b5e 100644
> >>> --- a/drivers/gpu/drm/i915/i915_pmu.h
> >>> +++ b/drivers/gpu/drm/i915/i915_pmu.h
> >>> @@ -17,6 +17,8 @@ struct drm_i915_private;
> >>>    enum {
> >>>        __I915_SAMPLE_FREQ_ACT = 0,
> >>>        __I915_SAMPLE_FREQ_REQ,
> >>> +     __I915_SAMPLE_CLOCK_ACT,
> >>> +     __I915_SAMPLE_CLOCK_REQ,
> >>>        __I915_SAMPLE_RC6,
> >>>        __I915_SAMPLE_RC6_ESTIMATED,
> >>>        __I915_NUM_PMU_SAMPLERS
> >>> @@ -52,6 +54,10 @@ struct i915_pmu {
> >>>         * @timer: Timer for internal i915 PMU sampling.
> >>>         */
> >>>        struct hrtimer timer;
> >>> +     /**
> >>> +      * @timestamp: Timestamp of last internal i915 PMU sampling.
> >>> +      */
> >>> +     ktime_t timestamp;
> >>>        /**
> >>>         * @enable: Bitmask of all currently enabled events.
> >>>         *
> >>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> >>> index 7f5634ce8e88..61ab71986274 100644
> >>> --- a/include/uapi/drm/i915_drm.h
> >>> +++ b/include/uapi/drm/i915_drm.h
> >>> @@ -139,6 +139,8 @@ enum drm_i915_pmu_engine_sample {
> >>>    #define I915_PMU_REQUESTED_FREQUENCY        __I915_PMU_OTHER(1)
> >>>    #define I915_PMU_INTERRUPTS         __I915_PMU_OTHER(2)
> >>>    #define I915_PMU_RC6_RESIDENCY              __I915_PMU_OTHER(3)
> >>> +#define I915_PMU_ACTUAL_CLOCK                __I915_PMU_OTHER(4)
> >>> +#define I915_PMU_REQUESTED_CLOCK     __I915_PMU_OTHER(5)
> >>>    
> >>>    #define I915_PMU_LAST I915_PMU_RC6_RESIDENCY
> >>
> >> Bump this one.
> >>
> >> I want to know if we could get away without introducing a new pair of
> >> counter.
> > 
> > I wouldn't mind having my cycle counters back in the api ;)
> > They just seem to be easier for me to work with in userspace.
> > 
> > Or do you mean if we can just redefine the existing sampler?
> 
> Keep the existing semantics but improve implementation sufficiently so 
> they survive the IVB hrtimer problem.

(Now that I've started, bring back the cycle counters!)

> >> For instance would running average of a period do for frequency
> >> readout? It depends on what kind of error we are facing. Or a moving
> >> average for some period? I would explore that but don't have an
> >> Ivybridge so could you have a look?
> > 
> > The crucial part is that we don't define the period, the user does.
> > 
> > Heh, once again we have two different ideas about what we want to
> > measure :) I'ld take cycles, with your suggestion we may as well do
> > instantaneous frequency and sample from within perf_event_read (no
> > averaging, or just a short ewma)?
> For me the key question is how unstable is the IVB clock? Is it random 
> jitter and by how much, or what. If we keep x ms moving average from 
> frequency_sample, and use that to convert to Hz on the output, would it 
> be good enough?

We don't have that sort of test in perf_pmu. The closest we have was for
gem_ctx_freq, and there the frequency sampler was not very accurate
those (-100/+100 tolerances were not for thermal throttling). We can try
doing the same sawtooths -- just the challenge of systematic errors in
both the timer, the rps worker and the hw.
 
> To me it is preferable to adding new counters. Especially if the error 
> is so small that no one notices _and_ only on IVB.

I don't think it's fair to say that only IVB has a problem with the
hrtimer, it's just where it's most visible. Or to rule it out being a
problem for the future. The code is using hrtimer_forward() so it
already assumes it can and will miss samples :)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] drm/i915/pmu: Measure sampler intervals
  2018-05-30 15:37         ` Chris Wilson
@ 2018-05-30 17:21           ` Tvrtko Ursulin
  0 siblings, 0 replies; 15+ messages in thread
From: Tvrtko Ursulin @ 2018-05-30 17:21 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 30/05/2018 16:37, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-05-30 16:27:02)
>>
>> On 30/05/2018 15:55, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2018-05-30 15:37:18)
>>>>
>>>> On 30/05/2018 12:55, Chris Wilson wrote:
>>>>> hrtimer is not reliable enough to assume fixed intervals, and so even
>>>>> coarse accuracy (in the face of kasan and similar heavy debugging) we
>>>>> need to measure the actual interval between sample.
>>>>>
>>>>> While using a single timestamp to compute the interval does not allow
>>>>> very fine accuracy (consider the impact of a slow forcewake between
>>>>> different samples after the timestamp is read) is much better than
>>>>> assuming the interval.
>>>>>
>>>>> v2: Make use of the sample period for estimating the GPU clock cycles,
>>>>> leaving the frequency calculation (the averaging) to the caller.
>>>>> Introduce new samplers for reporting cycles instead of Hz.
>>>>>
>>>>> Testcase: igt/perf_pmu #ivb
>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>> ---
>>>>>     drivers/gpu/drm/i915/i915_pmu.c | 44 ++++++++++++++++++++++++++-------
>>>>>     drivers/gpu/drm/i915/i915_pmu.h |  6 +++++
>>>>>     include/uapi/drm/i915_drm.h     |  2 ++
>>>>>     3 files changed, 43 insertions(+), 9 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
>>>>> index dc87797db500..12033e47e3b4 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_pmu.c
>>>>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
>>>>> @@ -86,6 +86,8 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
>>>>>          */
>>>>>         enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
>>>>>                   config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
>>>>> +               config_enabled_mask(I915_PMU_ACTUAL_CLOCK) |
>>>>> +               config_enabled_mask(I915_PMU_REQUESTED_CLOCK) |
>>>>>                   ENGINE_SAMPLE_MASK;
>>>>>     
>>>>>         /*
>>>>> @@ -127,6 +129,7 @@ static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
>>>>>     {
>>>>>         if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
>>>>>                 i915->pmu.timer_enabled = true;
>>>>> +             i915->pmu.timestamp = ktime_get();
>>>>>                 hrtimer_start_range_ns(&i915->pmu.timer,
>>>>>                                        ns_to_ktime(PERIOD), 0,
>>>>>                                        HRTIMER_MODE_REL_PINNED);
>>>>> @@ -160,7 +163,7 @@ update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
>>>>>         sample->cur += mul_u32_u32(val, unit);
>>>>>     }
>>>>>     
>>>>> -static void engines_sample(struct drm_i915_private *dev_priv)
>>>>> +static void engines_sample(struct drm_i915_private *dev_priv, u64 period)
>>>>>     {
>>>>>         struct intel_engine_cs *engine;
>>>>>         enum intel_engine_id id;
>>>>> @@ -183,7 +186,7 @@ static void engines_sample(struct drm_i915_private *dev_priv)
>>>>>                 val = !i915_seqno_passed(current_seqno, last_seqno);
>>>>>     
>>>>>                 update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
>>>>> -                           PERIOD, val);
>>>>> +                           period, val);
>>>>>     
>>>>>                 if (val && (engine->pmu.enable &
>>>>>                     (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
>>>>> @@ -195,10 +198,10 @@ static void engines_sample(struct drm_i915_private *dev_priv)
>>>>>                 }
>>>>>     
>>>>>                 update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
>>>>> -                           PERIOD, !!(val & RING_WAIT));
>>>>> +                           period, !!(val & RING_WAIT));
>>>>>     
>>>>>                 update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
>>>>> -                           PERIOD, !!(val & RING_WAIT_SEMAPHORE));
>>>>> +                           period, !!(val & RING_WAIT_SEMAPHORE));
>>>>>         }
>>>>>     
>>>>>         if (fw)
>>>>> @@ -207,10 +210,11 @@ static void engines_sample(struct drm_i915_private *dev_priv)
>>>>>         intel_runtime_pm_put(dev_priv);
>>>>>     }
>>>>>     
>>>>> -static void frequency_sample(struct drm_i915_private *dev_priv)
>>>>> +static void frequency_sample(struct drm_i915_private *dev_priv, u64 period)
>>>>>     {
>>>>>         if (dev_priv->pmu.enable &
>>>>> -         config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
>>>>> +         (config_enabled_mask(I915_PMU_ACTUAL_CLOCK) |
>>>>> +          config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY))) {
>>>>>                 u32 val;
>>>>>     
>>>>>                 val = dev_priv->gt_pm.rps.cur_freq;
>>>>> @@ -223,13 +227,20 @@ static void frequency_sample(struct drm_i915_private *dev_priv)
>>>>>     
>>>>>                 update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
>>>>>                               1, intel_gpu_freq(dev_priv, val));
>>>>> +             update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_CLOCK_ACT],
>>>>> +                           period, intel_gpu_freq(dev_priv, val));
>>>>
>>>> Cache intel_gpu_freq in a local.
>>>>
>>>>>         }
>>>>>     
>>>>>         if (dev_priv->pmu.enable &
>>>>> -         config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
>>>>> +         (config_enabled_mask(I915_PMU_REQUESTED_CLOCK) |
>>>>> +          config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY))) {
>>>>>                 update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1,
>>>>>                               intel_gpu_freq(dev_priv,
>>>>>                                              dev_priv->gt_pm.rps.cur_freq));
>>>>> +             update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_CLOCK_REQ],
>>>>> +                           period,
>>>>> +                           intel_gpu_freq(dev_priv,
>>>>> +                                          dev_priv->gt_pm.rps.cur_freq));
>>>>
>>>> Same here.
>>>>
>>>>>         }
>>>>>     }
>>>>>     
>>>>> @@ -237,12 +248,17 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
>>>>>     {
>>>>>         struct drm_i915_private *i915 =
>>>>>                 container_of(hrtimer, struct drm_i915_private, pmu.timer);
>>>>> +     ktime_t now, period;
>>>>>     
>>>>>         if (!READ_ONCE(i915->pmu.timer_enabled))
>>>>>                 return HRTIMER_NORESTART;
>>>>>     
>>>>> -     engines_sample(i915);
>>>>> -     frequency_sample(i915);
>>>>> +     now = ktime_get();
>>>>> +     period = ktime_sub(now, i915->pmu.timestamp);
>>>>> +     i915->pmu.timestamp = now;
>>>>> +
>>>>> +     engines_sample(i915, period);
>>>>> +     frequency_sample(i915, period);
>>>>>     
>>>>>         hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
>>>>>         return HRTIMER_RESTART;
>>>>> @@ -313,11 +329,13 @@ config_status(struct drm_i915_private *i915, u64 config)
>>>>>     {
>>>>>         switch (config) {
>>>>>         case I915_PMU_ACTUAL_FREQUENCY:
>>>>> +     case I915_PMU_ACTUAL_CLOCK:
>>>>>                 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
>>>>>                         /* Requires a mutex for sampling! */
>>>>>                         return -ENODEV;
>>>>>                 /* Fall-through. */
>>>>>         case I915_PMU_REQUESTED_FREQUENCY:
>>>>> +     case I915_PMU_REQUESTED_CLOCK:
>>>>>                 if (INTEL_GEN(i915) < 6)
>>>>>                         return -ENODEV;
>>>>>                 break;
>>>>> @@ -526,6 +544,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
>>>>>                            div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
>>>>>                                    FREQUENCY);
>>>>>                         break;
>>>>> +             case I915_PMU_ACTUAL_CLOCK:
>>>>> +                     val = i915->pmu.sample[__I915_SAMPLE_CLOCK_ACT].cur;
>>>>> +                     break;
>>>>> +             case I915_PMU_REQUESTED_CLOCK:
>>>>> +                     val = i915->pmu.sample[__I915_SAMPLE_CLOCK_REQ].cur;
>>>>> +                     break;
>>>>>                 case I915_PMU_INTERRUPTS:
>>>>>                         val = count_interrupts(i915);
>>>>>                         break;
>>>>> @@ -803,6 +827,8 @@ create_event_attributes(struct drm_i915_private *i915)
>>>>>                 __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
>>>>>                 __event(I915_PMU_INTERRUPTS, "interrupts", NULL),
>>>>>                 __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
>>>>> +             __event(I915_PMU_ACTUAL_CLOCK, "actual-clock", "Mcycles"),
>>>>> +             __event(I915_PMU_REQUESTED_CLOCK, "requested-clock", "Mcycles"),
>>>>>         };
>>>>>         static const struct {
>>>>>                 enum drm_i915_pmu_engine_sample sample;
>>>>> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
>>>>> index 2ba735299f7c..9c4252d85b5e 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_pmu.h
>>>>> +++ b/drivers/gpu/drm/i915/i915_pmu.h
>>>>> @@ -17,6 +17,8 @@ struct drm_i915_private;
>>>>>     enum {
>>>>>         __I915_SAMPLE_FREQ_ACT = 0,
>>>>>         __I915_SAMPLE_FREQ_REQ,
>>>>> +     __I915_SAMPLE_CLOCK_ACT,
>>>>> +     __I915_SAMPLE_CLOCK_REQ,
>>>>>         __I915_SAMPLE_RC6,
>>>>>         __I915_SAMPLE_RC6_ESTIMATED,
>>>>>         __I915_NUM_PMU_SAMPLERS
>>>>> @@ -52,6 +54,10 @@ struct i915_pmu {
>>>>>          * @timer: Timer for internal i915 PMU sampling.
>>>>>          */
>>>>>         struct hrtimer timer;
>>>>> +     /**
>>>>> +      * @timestamp: Timestamp of last internal i915 PMU sampling.
>>>>> +      */
>>>>> +     ktime_t timestamp;
>>>>>         /**
>>>>>          * @enable: Bitmask of all currently enabled events.
>>>>>          *
>>>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>>>> index 7f5634ce8e88..61ab71986274 100644
>>>>> --- a/include/uapi/drm/i915_drm.h
>>>>> +++ b/include/uapi/drm/i915_drm.h
>>>>> @@ -139,6 +139,8 @@ enum drm_i915_pmu_engine_sample {
>>>>>     #define I915_PMU_REQUESTED_FREQUENCY        __I915_PMU_OTHER(1)
>>>>>     #define I915_PMU_INTERRUPTS         __I915_PMU_OTHER(2)
>>>>>     #define I915_PMU_RC6_RESIDENCY              __I915_PMU_OTHER(3)
>>>>> +#define I915_PMU_ACTUAL_CLOCK                __I915_PMU_OTHER(4)
>>>>> +#define I915_PMU_REQUESTED_CLOCK     __I915_PMU_OTHER(5)
>>>>>     
>>>>>     #define I915_PMU_LAST I915_PMU_RC6_RESIDENCY
>>>>
>>>> Bump this one.
>>>>
>>>> I want to know if we could get away without introducing a new pair of
>>>> counter.
>>>
>>> I wouldn't mind having my cycle counters back in the api ;)
>>> They just seem to be easier for me to work with in userspace.
>>>
>>> Or do you mean if we can just redefine the existing sampler?
>>
>> Keep the existing semantics but improve implementation sufficiently so
>> they survive the IVB hrtimer problem.
> 
> (Now that I've started, bring back the cycle counters!)

I don't know when we had them. In some older version of your PMU 
prototype? Why they are so useful?

>>>> For instance would running average of a period do for frequency
>>>> readout? It depends on what kind of error we are facing. Or a moving
>>>> average for some period? I would explore that but don't have an
>>>> Ivybridge so could you have a look?
>>>
>>> The crucial part is that we don't define the period, the user does.
>>>
>>> Heh, once again we have two different ideas about what we want to
>>> measure :) I'ld take cycles, with your suggestion we may as well do
>>> instantaneous frequency and sample from within perf_event_read (no
>>> averaging, or just a short ewma)?
>> For me the key question is how unstable is the IVB clock? Is it random
>> jitter and by how much, or what. If we keep x ms moving average from
>> frequency_sample, and use that to convert to Hz on the output, would it
>> be good enough?
> 
> We don't have that sort of test in perf_pmu. The closest we have was for
> gem_ctx_freq, and there the frequency sampler was not very accurate
> those (-100/+100 tolerances were not for thermal throttling). We can try
> doing the same sawtooths -- just the challenge of systematic errors in
> both the timer, the rps worker and the hw.
>   
>> To me it is preferable to adding new counters. Especially if the error
>> is so small that no one notices _and_ only on IVB.
> 
> I don't think it's fair to say that only IVB has a problem with the
> hrtimer, it's just where it's most visible. Or to rule it out being a

I agree it is a good idea in general to measure the period but am still 
thinking if we can avoid adding new counters.

> problem for the future. The code is using hrtimer_forward() so it
> already assumes it can and will miss samples :)

Hm yeah, we are drifting by however long it takes to calculate our 
stuff. It can be improved by taking time at the beginning of the 
callback and forwarding the timer relative to that (hrtimer_forward). I 
wonder if that would help on IVB. I'll check if it makes a difference on 
SKL for existing accuracy tests.

But I never considered a complete missed sample. I'll trybot a patch to 
see if it happens anywhere.

Regard,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2018-05-30 17:21 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-25 17:11 [PATCH] drm/i915/pmu: Measure sampler intervals Chris Wilson
2018-05-25 17:31 ` Tvrtko Ursulin
2018-05-25 17:45   ` Chris Wilson
2018-05-30 10:57     ` Tvrtko Ursulin
2018-05-30 11:07       ` Chris Wilson
2018-05-25 18:06 ` ✓ Fi.CI.BAT: success for " Patchwork
2018-05-26  3:50 ` ✓ Fi.CI.IGT: " Patchwork
2018-05-30 11:55 ` [PATCH v2] " Chris Wilson
2018-05-30 14:37   ` Tvrtko Ursulin
2018-05-30 14:55     ` Chris Wilson
2018-05-30 15:27       ` Tvrtko Ursulin
2018-05-30 15:37         ` Chris Wilson
2018-05-30 17:21           ` Tvrtko Ursulin
2018-05-30 13:57 ` ✓ Fi.CI.BAT: success for drm/i915/pmu: Measure sampler intervals (rev2) Patchwork
2018-05-30 15:11 ` ✗ Fi.CI.IGT: failure " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.