[PATCH i-g-t 1/2] igt/perf_pmu: Aim for a fixed number of iterations for calibrating accuracy

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH i-g-t 1/2] igt/perf_pmu: Aim for a fixed number of iterations for calibrating accuracy
@ 2018-08-08 14:59 ` Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-08-08 14:59 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Our observation is that the systematic error is proportional to the
number of iterations we perform; the suspicion is that it directly
correlates with the number of sleeps. Reduce the number of iterations,
to try and keep the error in check.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/perf_pmu.c | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 9a20abb6b..5a26d5272 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -1521,14 +1521,13 @@ static void __rearm_spin_batch(igt_spin_t *spin)
 
 static void
 accuracy(int gem_fd, const struct intel_execution_engine2 *e,
-	 unsigned long target_busy_pct)
+	 unsigned long target_busy_pct,
+	 unsigned long target_iters)
 {
-	unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
-	unsigned long idle_us = 100 * (busy_us - target_busy_pct *
-				busy_us / 100) / target_busy_pct;
 	const unsigned long min_test_us = 1e6;
-	const unsigned long pwm_calibration_us = min_test_us;
-	const unsigned long test_us = min_test_us;
+	unsigned long pwm_calibration_us;
+	unsigned long test_us;
+	unsigned long cycle_us, busy_us, idle_us;
 	double busy_r, expected;
 	uint64_t val[2];
 	uint64_t ts[2];
@@ -1538,18 +1537,27 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 	/* Sampling platforms cannot reach the high accuracy criteria. */
 	igt_require(gem_has_execlists(gem_fd));
 
-	while (idle_us < 2500) {
+	/* Aim for approximately 100 iterations for calibration */
+	cycle_us = min_test_us / target_iters;
+	busy_us = cycle_us * target_busy_pct / 100;
+	idle_us = cycle_us - busy_us;
+
+	while (idle_us < 2500 || busy_us < 2500) {
 		busy_us *= 2;
 		idle_us *= 2;
 	}
+	cycle_us = busy_us + idle_us;
+	pwm_calibration_us = target_iters * cycle_us / 2;
+	test_us = target_iters * cycle_us;
 
-	igt_info("calibration=%lums, test=%lums; ratio=%.2f%% (%luus/%luus)\n",
-		 pwm_calibration_us / 1000, test_us / 1000,
-		 (double)busy_us / (busy_us + idle_us) * 100.0,
+	igt_info("calibration=%lums, test=%lums, cycle=%lums; ratio=%.2f%% (%luus/%luus)\n",
+		 pwm_calibration_us / 1000, test_us / 1000, cycle_us / 1000,
+		 (double)busy_us / cycle_us * 100.0,
 		 busy_us, idle_us);
 
-	assert_within_epsilon((double)busy_us / (busy_us + idle_us),
-				(double)target_busy_pct / 100.0, tolerance);
+	assert_within_epsilon((double)busy_us / cycle_us,
+			      (double)target_busy_pct / 100.0,
+			      tolerance);
 
 	igt_assert(pipe(link) == 0);
 
@@ -1796,7 +1804,7 @@ igt_main
 			for (i = 0; i < ARRAY_SIZE(pct); i++) {
 				igt_subtest_f("busy-accuracy-%u-%s",
 					      pct[i], e->name)
-					accuracy(fd, e, pct[i]);
+					accuracy(fd, e, pct[i], 10);
 			}
 
 			igt_subtest_f("busy-hang-%s", e->name)
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [igt-dev] [PATCH i-g-t 1/2] igt/perf_pmu: Aim for a fixed number of iterations for calibrating accuracy
@ 2018-08-08 14:59 ` Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-08-08 14:59 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Tvrtko Ursulin

Our observation is that the systematic error is proportional to the
number of iterations we perform; the suspicion is that it directly
correlates with the number of sleeps. Reduce the number of iterations,
to try and keep the error in check.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/perf_pmu.c | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 9a20abb6b..5a26d5272 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -1521,14 +1521,13 @@ static void __rearm_spin_batch(igt_spin_t *spin)
 
 static void
 accuracy(int gem_fd, const struct intel_execution_engine2 *e,
-	 unsigned long target_busy_pct)
+	 unsigned long target_busy_pct,
+	 unsigned long target_iters)
 {
-	unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
-	unsigned long idle_us = 100 * (busy_us - target_busy_pct *
-				busy_us / 100) / target_busy_pct;
 	const unsigned long min_test_us = 1e6;
-	const unsigned long pwm_calibration_us = min_test_us;
-	const unsigned long test_us = min_test_us;
+	unsigned long pwm_calibration_us;
+	unsigned long test_us;
+	unsigned long cycle_us, busy_us, idle_us;
 	double busy_r, expected;
 	uint64_t val[2];
 	uint64_t ts[2];
@@ -1538,18 +1537,27 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 	/* Sampling platforms cannot reach the high accuracy criteria. */
 	igt_require(gem_has_execlists(gem_fd));
 
-	while (idle_us < 2500) {
+	/* Aim for approximately 100 iterations for calibration */
+	cycle_us = min_test_us / target_iters;
+	busy_us = cycle_us * target_busy_pct / 100;
+	idle_us = cycle_us - busy_us;
+
+	while (idle_us < 2500 || busy_us < 2500) {
 		busy_us *= 2;
 		idle_us *= 2;
 	}
+	cycle_us = busy_us + idle_us;
+	pwm_calibration_us = target_iters * cycle_us / 2;
+	test_us = target_iters * cycle_us;
 
-	igt_info("calibration=%lums, test=%lums; ratio=%.2f%% (%luus/%luus)\n",
-		 pwm_calibration_us / 1000, test_us / 1000,
-		 (double)busy_us / (busy_us + idle_us) * 100.0,
+	igt_info("calibration=%lums, test=%lums, cycle=%lums; ratio=%.2f%% (%luus/%luus)\n",
+		 pwm_calibration_us / 1000, test_us / 1000, cycle_us / 1000,
+		 (double)busy_us / cycle_us * 100.0,
 		 busy_us, idle_us);
 
-	assert_within_epsilon((double)busy_us / (busy_us + idle_us),
-				(double)target_busy_pct / 100.0, tolerance);
+	assert_within_epsilon((double)busy_us / cycle_us,
+			      (double)target_busy_pct / 100.0,
+			      tolerance);
 
 	igt_assert(pipe(link) == 0);
 
@@ -1796,7 +1804,7 @@ igt_main
 			for (i = 0; i < ARRAY_SIZE(pct); i++) {
 				igt_subtest_f("busy-accuracy-%u-%s",
 					      pct[i], e->name)
-					accuracy(fd, e, pct[i]);
+					accuracy(fd, e, pct[i], 10);
 			}
 
 			igt_subtest_f("busy-hang-%s", e->name)
-- 
2.18.0

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH i-g-t 2/2] igt/perf_pmu: Improve the presentation of the accuracy calibration
  2018-08-08 14:59 ` [igt-dev] " Chris Wilson
@ 2018-08-08 14:59   ` Chris Wilson
  -1 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-08-08 14:59 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Normalize the variance to stddev, and remove some redundant steps in
computing the time from itself.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/perf_pmu.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 5a26d5272..4e8da3d94 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -1577,8 +1577,8 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 		/* 1st pass is calibration, second pass is the test. */
 		for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
 			unsigned int target_idle_us = idle_us;
-			uint64_t busy_ns = 0, idle_ns = 0;
 			struct timespec start = { };
+			uint64_t busy_ns = 0;
 			unsigned long pass_ns = 0;
 			double avg = 0.0, var = 0.0;
 			unsigned int n = 0;
@@ -1589,6 +1589,7 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 				unsigned long loop_ns, loop_busy;
 				struct timespec _ts = { };
 				double err, tmp;
+				uint64_t now;
 
 				/* PWM idle sleep. */
 				_ts.tv_nsec = target_idle_us * 1000;
@@ -1605,14 +1606,13 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 				igt_spin_batch_end(spin);
 
 				/* Time accounting. */
-				loop_ns = igt_nsec_elapsed(&start);
-				loop_busy = loop_ns - loop_busy;
-				loop_ns -= pass_ns;
+				now = igt_nsec_elapsed(&start);
+				loop_busy = now - loop_busy;
+				loop_ns = now - pass_ns;
+				pass_ns = now;
 
 				busy_ns += loop_busy;
 				total_busy_ns += loop_busy;
-				idle_ns += loop_ns - loop_busy;
-				pass_ns += loop_ns;
 				total_ns += loop_ns;
 
 				/* Re-calibrate. */
@@ -1628,10 +1628,14 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 				var += (err - avg) * (err - tmp);
 			} while (pass_ns < timeout[pass]);
 
+			pass_ns = igt_nsec_elapsed(&start);
 			expected = (double)busy_ns / pass_ns;
-			igt_info("%u: busy %"PRIu64"us, idle %"PRIu64"us -> %.2f%% (target: %lu%%; average=%.2f, variance=%f)\n",
-				 pass, busy_ns / 1000, idle_ns / 1000,
-				 100 * expected, target_busy_pct, avg, var / n);
+
+			igt_info("%u: busy %"PRIu64"us, idle %"PRIu64"us -> %.2f%% (target: %lu%%; average=%.2f±%.3f%%)\n",
+				 pass, busy_ns / 1000, (pass_ns - busy_ns) / 1000,
+				 100 * expected, target_busy_pct,
+				 avg, sqrt(var / n));
+
 			write(link[1], &expected, sizeof(expected));
 		}
 
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [Intel-gfx] [PATCH i-g-t 2/2] igt/perf_pmu: Improve the presentation of the accuracy calibration
@ 2018-08-08 14:59   ` Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-08-08 14:59 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Normalize the variance to stddev, and remove some redundant steps in
computing the time from itself.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/perf_pmu.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 5a26d5272..4e8da3d94 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -1577,8 +1577,8 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 		/* 1st pass is calibration, second pass is the test. */
 		for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
 			unsigned int target_idle_us = idle_us;
-			uint64_t busy_ns = 0, idle_ns = 0;
 			struct timespec start = { };
+			uint64_t busy_ns = 0;
 			unsigned long pass_ns = 0;
 			double avg = 0.0, var = 0.0;
 			unsigned int n = 0;
@@ -1589,6 +1589,7 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 				unsigned long loop_ns, loop_busy;
 				struct timespec _ts = { };
 				double err, tmp;
+				uint64_t now;
 
 				/* PWM idle sleep. */
 				_ts.tv_nsec = target_idle_us * 1000;
@@ -1605,14 +1606,13 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 				igt_spin_batch_end(spin);
 
 				/* Time accounting. */
-				loop_ns = igt_nsec_elapsed(&start);
-				loop_busy = loop_ns - loop_busy;
-				loop_ns -= pass_ns;
+				now = igt_nsec_elapsed(&start);
+				loop_busy = now - loop_busy;
+				loop_ns = now - pass_ns;
+				pass_ns = now;
 
 				busy_ns += loop_busy;
 				total_busy_ns += loop_busy;
-				idle_ns += loop_ns - loop_busy;
-				pass_ns += loop_ns;
 				total_ns += loop_ns;
 
 				/* Re-calibrate. */
@@ -1628,10 +1628,14 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 				var += (err - avg) * (err - tmp);
 			} while (pass_ns < timeout[pass]);
 
+			pass_ns = igt_nsec_elapsed(&start);
 			expected = (double)busy_ns / pass_ns;
-			igt_info("%u: busy %"PRIu64"us, idle %"PRIu64"us -> %.2f%% (target: %lu%%; average=%.2f, variance=%f)\n",
-				 pass, busy_ns / 1000, idle_ns / 1000,
-				 100 * expected, target_busy_pct, avg, var / n);
+
+			igt_info("%u: busy %"PRIu64"us, idle %"PRIu64"us -> %.2f%% (target: %lu%%; average=%.2f±%.3f%%)\n",
+				 pass, busy_ns / 1000, (pass_ns - busy_ns) / 1000,
+				 100 * expected, target_busy_pct,
+				 avg, sqrt(var / n));
+
 			write(link[1], &expected, sizeof(expected));
 		}
 
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2] igt/perf_pmu: Aim for a fixed number of iterations for calibrating accuracy
  2018-08-08 14:59 ` [igt-dev] " Chris Wilson
  (?)
  (?)
@ 2018-08-08 15:38 ` Patchwork
  -1 siblings, 0 replies; 16+ messages in thread
From: Patchwork @ 2018-08-08 15:38 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: series starting with [i-g-t,1/2] igt/perf_pmu: Aim for a fixed number of iterations for calibrating accuracy
URL   : https://patchwork.freedesktop.org/series/47895/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4633 -> IGTPW_1694 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/47895/revisions/1/mbox/

== Known issues ==

  Here are the changes found in IGTPW_1694 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@drv_selftest@live_workarounds:
      {fi-cfl-8109u}:     PASS -> DMESG-FAIL (fdo#107292)
      {fi-bsw-kefka}:     PASS -> DMESG-FAIL (fdo#107292)
      fi-kbl-7560u:       PASS -> DMESG-FAIL (fdo#107292)

    
    ==== Possible fixes ====

    igt@drv_selftest@live_hangcheck:
      fi-skl-guc:         DMESG-FAIL (fdo#107174) -> PASS

    igt@drv_selftest@live_workarounds:
      fi-whl-u:           DMESG-FAIL (fdo#107292) -> PASS
      fi-kbl-x1275:       DMESG-FAIL (fdo#107292) -> PASS

    igt@kms_frontbuffer_tracking@basic:
      {fi-byt-clapper}:   FAIL (fdo#103167) -> PASS

    
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  fdo#103167 https://bugs.freedesktop.org/show_bug.cgi?id=103167
  fdo#107174 https://bugs.freedesktop.org/show_bug.cgi?id=107174
  fdo#107292 https://bugs.freedesktop.org/show_bug.cgi?id=107292


== Participating hosts (51 -> 46) ==

  Additional (1): fi-bxt-dsi 
  Missing    (6): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-gdg-551 


== Build changes ==

    * IGT: IGT_4588 -> IGTPW_1694

  CI_DRM_4633: ea6e3f703e4d234c9c8eaec6c533355c7454ecb6 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_1694: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1694/
  IGT_4588: 7e5abbe4d9b2129bbbf02be77a70cad3da2ab941 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1694/issues.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [igt-dev] ✓ Fi.CI.IGT: success for series starting with [i-g-t,1/2] igt/perf_pmu: Aim for a fixed number of iterations for calibrating accuracy
  2018-08-08 14:59 ` [igt-dev] " Chris Wilson
                   ` (2 preceding siblings ...)
  (?)
@ 2018-08-08 21:49 ` Patchwork
  -1 siblings, 0 replies; 16+ messages in thread
From: Patchwork @ 2018-08-08 21:49 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: series starting with [i-g-t,1/2] igt/perf_pmu: Aim for a fixed number of iterations for calibrating accuracy
URL   : https://patchwork.freedesktop.org/series/47895/
State : success

== Summary ==

= CI Bug Log - changes from IGT_4588_full -> IGTPW_1694_full =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/47895/revisions/1/mbox/

== Known issues ==

  Here are the changes found in IGTPW_1694_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@kms_setmode@basic:
      shard-kbl:          PASS -> FAIL (fdo#99912)

    igt@kms_vblank@pipe-a-ts-continuation-dpms-rpm:
      shard-kbl:          PASS -> FAIL (fdo#106539)
      shard-apl:          PASS -> FAIL (fdo#106539)
      shard-glk:          PASS -> FAIL (fdo#106539)
      shard-hsw:          PASS -> FAIL (fdo#106539)

    igt@perf@blocking:
      shard-hsw:          PASS -> FAIL (fdo#102252)

    
    ==== Possible fixes ====

    igt@drv_suspend@shrink:
      shard-snb:          INCOMPLETE (fdo#106886, fdo#105411) -> PASS

    igt@gem_softpin@evict-snoop-interruptible:
      shard-snb:          INCOMPLETE (fdo#105411) -> SKIP

    igt@kms_setmode@basic:
      shard-apl:          FAIL (fdo#99912) -> PASS

    igt@kms_vblank@pipe-a-ts-continuation-modeset-rpm:
      shard-apl:          FAIL (fdo#106539) -> PASS +1

    igt@pm_rpm@gem-pread:
      shard-glk:          WARN -> PASS

    igt@pm_rpm@modeset-non-lpsp-stress:
      shard-kbl:          FAIL (fdo#106539) -> PASS +1
      shard-hsw:          FAIL (fdo#106539) -> PASS +1
      shard-glk:          FAIL (fdo#106539) -> PASS +1

    
  fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252
  fdo#105411 https://bugs.freedesktop.org/show_bug.cgi?id=105411
  fdo#106539 https://bugs.freedesktop.org/show_bug.cgi?id=106539
  fdo#106886 https://bugs.freedesktop.org/show_bug.cgi?id=106886
  fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912


== Participating hosts (5 -> 5) ==

  No changes in participating hosts


== Build changes ==

    * IGT: IGT_4588 -> IGTPW_1694
    * Linux: CI_DRM_4632 -> CI_DRM_4633

  CI_DRM_4632: 648e2ff1094eabf43613f41d4d719c1a1f555dbb @ git://anongit.freedesktop.org/gfx-ci/linux
  CI_DRM_4633: ea6e3f703e4d234c9c8eaec6c533355c7454ecb6 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_1694: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1694/
  IGT_4588: 7e5abbe4d9b2129bbbf02be77a70cad3da2ab941 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1694/shards.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/2] igt/perf_pmu: Aim for a fixed number of iterations for calibrating accuracy
  2018-08-08 14:59 ` [igt-dev] " Chris Wilson
@ 2018-08-09 11:54   ` Tvrtko Ursulin
  -1 siblings, 0 replies; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-08-09 11:54 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/08/2018 15:59, Chris Wilson wrote:
> Our observation is that the systematic error is proportional to the
> number of iterations we perform; the suspicion is that it directly
> correlates with the number of sleeps. Reduce the number of iterations,
> to try and keep the error in check.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   tests/perf_pmu.c | 34 +++++++++++++++++++++-------------
>   1 file changed, 21 insertions(+), 13 deletions(-)
> 
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> index 9a20abb6b..5a26d5272 100644
> --- a/tests/perf_pmu.c
> +++ b/tests/perf_pmu.c
> @@ -1521,14 +1521,13 @@ static void __rearm_spin_batch(igt_spin_t *spin)
>   
>   static void
>   accuracy(int gem_fd, const struct intel_execution_engine2 *e,
> -	 unsigned long target_busy_pct)
> +	 unsigned long target_busy_pct,
> +	 unsigned long target_iters)
>   {
> -	unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
> -	unsigned long idle_us = 100 * (busy_us - target_busy_pct *
> -				busy_us / 100) / target_busy_pct;
>   	const unsigned long min_test_us = 1e6;
> -	const unsigned long pwm_calibration_us = min_test_us;
> -	const unsigned long test_us = min_test_us;
> +	unsigned long pwm_calibration_us;
> +	unsigned long test_us;
> +	unsigned long cycle_us, busy_us, idle_us;
>   	double busy_r, expected;
>   	uint64_t val[2];
>   	uint64_t ts[2];
> @@ -1538,18 +1537,27 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>   	/* Sampling platforms cannot reach the high accuracy criteria. */
>   	igt_require(gem_has_execlists(gem_fd));
>   
> -	while (idle_us < 2500) {
> +	/* Aim for approximately 100 iterations for calibration */
> +	cycle_us = min_test_us / target_iters;
> +	busy_us = cycle_us * target_busy_pct / 100;
> +	idle_us = cycle_us - busy_us;

2% load, 1s / 10 iters
	cycles_us = 100ms
	busy_us = 2ms
	idle_us = 98ms
...

> +
> +	while (idle_us < 2500 || busy_us < 2500) {
>   		busy_us *= 2;
>   		idle_us *= 2;

...

busy_us = 4ms
idle_us = 196ms

I fear here that even sampling timers will get it right with this long 
PWM cycle. So we miss to notice GuC mode is inaccurate for real world 
workloads.

Okay question is what are real work workloads.. are they really 
typically shorter than 4ms batches? And what PWM cycle we need here to 
notice this.

I had this empirically worked out to the values that were previously 
used AFAIR, or perhaps there was some leeway. Hmm.. I think finish the 
series with a patch to remove the skip on !has_execlists so CI tells us?

Regards,

Tvrtko

>   	}
> +	cycle_us = busy_us + idle_us;
> +	pwm_calibration_us = target_iters * cycle_us / 2;
> +	test_us = target_iters * cycle_us;
>   
> -	igt_info("calibration=%lums, test=%lums; ratio=%.2f%% (%luus/%luus)\n",
> -		 pwm_calibration_us / 1000, test_us / 1000,
> -		 (double)busy_us / (busy_us + idle_us) * 100.0,
> +	igt_info("calibration=%lums, test=%lums, cycle=%lums; ratio=%.2f%% (%luus/%luus)\n",
> +		 pwm_calibration_us / 1000, test_us / 1000, cycle_us / 1000,
> +		 (double)busy_us / cycle_us * 100.0,
>   		 busy_us, idle_us);
>   
> -	assert_within_epsilon((double)busy_us / (busy_us + idle_us),
> -				(double)target_busy_pct / 100.0, tolerance);
> +	assert_within_epsilon((double)busy_us / cycle_us,
> +			      (double)target_busy_pct / 100.0,
> +			      tolerance);
>   
>   	igt_assert(pipe(link) == 0);
>   
> @@ -1796,7 +1804,7 @@ igt_main
>   			for (i = 0; i < ARRAY_SIZE(pct); i++) {
>   				igt_subtest_f("busy-accuracy-%u-%s",
>   					      pct[i], e->name)
> -					accuracy(fd, e, pct[i]);
> +					accuracy(fd, e, pct[i], 10);
>   			}
>   
>   			igt_subtest_f("busy-hang-%s", e->name)
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/2] igt/perf_pmu: Aim for a fixed number of iterations for calibrating accuracy
@ 2018-08-09 11:54   ` Tvrtko Ursulin
  0 siblings, 0 replies; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-08-09 11:54 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev, Tvrtko Ursulin


On 08/08/2018 15:59, Chris Wilson wrote:
> Our observation is that the systematic error is proportional to the
> number of iterations we perform; the suspicion is that it directly
> correlates with the number of sleeps. Reduce the number of iterations,
> to try and keep the error in check.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   tests/perf_pmu.c | 34 +++++++++++++++++++++-------------
>   1 file changed, 21 insertions(+), 13 deletions(-)
> 
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> index 9a20abb6b..5a26d5272 100644
> --- a/tests/perf_pmu.c
> +++ b/tests/perf_pmu.c
> @@ -1521,14 +1521,13 @@ static void __rearm_spin_batch(igt_spin_t *spin)
>   
>   static void
>   accuracy(int gem_fd, const struct intel_execution_engine2 *e,
> -	 unsigned long target_busy_pct)
> +	 unsigned long target_busy_pct,
> +	 unsigned long target_iters)
>   {
> -	unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
> -	unsigned long idle_us = 100 * (busy_us - target_busy_pct *
> -				busy_us / 100) / target_busy_pct;
>   	const unsigned long min_test_us = 1e6;
> -	const unsigned long pwm_calibration_us = min_test_us;
> -	const unsigned long test_us = min_test_us;
> +	unsigned long pwm_calibration_us;
> +	unsigned long test_us;
> +	unsigned long cycle_us, busy_us, idle_us;
>   	double busy_r, expected;
>   	uint64_t val[2];
>   	uint64_t ts[2];
> @@ -1538,18 +1537,27 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>   	/* Sampling platforms cannot reach the high accuracy criteria. */
>   	igt_require(gem_has_execlists(gem_fd));
>   
> -	while (idle_us < 2500) {
> +	/* Aim for approximately 100 iterations for calibration */
> +	cycle_us = min_test_us / target_iters;
> +	busy_us = cycle_us * target_busy_pct / 100;
> +	idle_us = cycle_us - busy_us;

2% load, 1s / 10 iters
	cycles_us = 100ms
	busy_us = 2ms
	idle_us = 98ms
...

> +
> +	while (idle_us < 2500 || busy_us < 2500) {
>   		busy_us *= 2;
>   		idle_us *= 2;

...

busy_us = 4ms
idle_us = 196ms

I fear here that even sampling timers will get it right with this long 
PWM cycle. So we miss to notice GuC mode is inaccurate for real world 
workloads.

Okay question is what are real work workloads.. are they really 
typically shorter than 4ms batches? And what PWM cycle we need here to 
notice this.

I had this empirically worked out to the values that were previously 
used AFAIR, or perhaps there was some leeway. Hmm.. I think finish the 
series with a patch to remove the skip on !has_execlists so CI tells us?

Regards,

Tvrtko

>   	}
> +	cycle_us = busy_us + idle_us;
> +	pwm_calibration_us = target_iters * cycle_us / 2;
> +	test_us = target_iters * cycle_us;
>   
> -	igt_info("calibration=%lums, test=%lums; ratio=%.2f%% (%luus/%luus)\n",
> -		 pwm_calibration_us / 1000, test_us / 1000,
> -		 (double)busy_us / (busy_us + idle_us) * 100.0,
> +	igt_info("calibration=%lums, test=%lums, cycle=%lums; ratio=%.2f%% (%luus/%luus)\n",
> +		 pwm_calibration_us / 1000, test_us / 1000, cycle_us / 1000,
> +		 (double)busy_us / cycle_us * 100.0,
>   		 busy_us, idle_us);
>   
> -	assert_within_epsilon((double)busy_us / (busy_us + idle_us),
> -				(double)target_busy_pct / 100.0, tolerance);
> +	assert_within_epsilon((double)busy_us / cycle_us,
> +			      (double)target_busy_pct / 100.0,
> +			      tolerance);
>   
>   	igt_assert(pipe(link) == 0);
>   
> @@ -1796,7 +1804,7 @@ igt_main
>   			for (i = 0; i < ARRAY_SIZE(pct); i++) {
>   				igt_subtest_f("busy-accuracy-%u-%s",
>   					      pct[i], e->name)
> -					accuracy(fd, e, pct[i]);
> +					accuracy(fd, e, pct[i], 10);
>   			}
>   
>   			igt_subtest_f("busy-hang-%s", e->name)
> 
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/2] igt/perf_pmu: Aim for a fixed number of iterations for calibrating accuracy
  2018-08-09 11:54   ` Tvrtko Ursulin
@ 2018-08-10 13:25     ` Chris Wilson
  -1 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-08-10 13:25 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2018-08-09 12:54:41)
> 
> On 08/08/2018 15:59, Chris Wilson wrote:
> > Our observation is that the systematic error is proportional to the
> > number of iterations we perform; the suspicion is that it directly
> > correlates with the number of sleeps. Reduce the number of iterations,
> > to try and keep the error in check.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   tests/perf_pmu.c | 34 +++++++++++++++++++++-------------
> >   1 file changed, 21 insertions(+), 13 deletions(-)
> > 
> > diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> > index 9a20abb6b..5a26d5272 100644
> > --- a/tests/perf_pmu.c
> > +++ b/tests/perf_pmu.c
> > @@ -1521,14 +1521,13 @@ static void __rearm_spin_batch(igt_spin_t *spin)
> >   
> >   static void
> >   accuracy(int gem_fd, const struct intel_execution_engine2 *e,
> > -      unsigned long target_busy_pct)
> > +      unsigned long target_busy_pct,
> > +      unsigned long target_iters)
> >   {
> > -     unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
> > -     unsigned long idle_us = 100 * (busy_us - target_busy_pct *
> > -                             busy_us / 100) / target_busy_pct;
> >       const unsigned long min_test_us = 1e6;
> > -     const unsigned long pwm_calibration_us = min_test_us;
> > -     const unsigned long test_us = min_test_us;
> > +     unsigned long pwm_calibration_us;
> > +     unsigned long test_us;
> > +     unsigned long cycle_us, busy_us, idle_us;
> >       double busy_r, expected;
> >       uint64_t val[2];
> >       uint64_t ts[2];
> > @@ -1538,18 +1537,27 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
> >       /* Sampling platforms cannot reach the high accuracy criteria. */
> >       igt_require(gem_has_execlists(gem_fd));
> >   
> > -     while (idle_us < 2500) {
> > +     /* Aim for approximately 100 iterations for calibration */
> > +     cycle_us = min_test_us / target_iters;
> > +     busy_us = cycle_us * target_busy_pct / 100;
> > +     idle_us = cycle_us - busy_us;
> 
> 2% load, 1s / 10 iters
>         cycles_us = 100ms
>         busy_us = 2ms
>         idle_us = 98ms
> ...
> 
> > +
> > +     while (idle_us < 2500 || busy_us < 2500) {
> >               busy_us *= 2;
> >               idle_us *= 2;
> 
> ...
> 
> busy_us = 4ms
> idle_us = 196ms

Currently it is 250ms per 98:2 cycle and about 20ms per 50:50 cycle. So
we are only doing 4 and 50 iterations respectively.

10 cycles is strictly an improvement :-p
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/2] igt/perf_pmu: Aim for a fixed number of iterations for calibrating accuracy
@ 2018-08-10 13:25     ` Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-08-10 13:25 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev, Tvrtko Ursulin

Quoting Tvrtko Ursulin (2018-08-09 12:54:41)
> 
> On 08/08/2018 15:59, Chris Wilson wrote:
> > Our observation is that the systematic error is proportional to the
> > number of iterations we perform; the suspicion is that it directly
> > correlates with the number of sleeps. Reduce the number of iterations,
> > to try and keep the error in check.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   tests/perf_pmu.c | 34 +++++++++++++++++++++-------------
> >   1 file changed, 21 insertions(+), 13 deletions(-)
> > 
> > diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> > index 9a20abb6b..5a26d5272 100644
> > --- a/tests/perf_pmu.c
> > +++ b/tests/perf_pmu.c
> > @@ -1521,14 +1521,13 @@ static void __rearm_spin_batch(igt_spin_t *spin)
> >   
> >   static void
> >   accuracy(int gem_fd, const struct intel_execution_engine2 *e,
> > -      unsigned long target_busy_pct)
> > +      unsigned long target_busy_pct,
> > +      unsigned long target_iters)
> >   {
> > -     unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
> > -     unsigned long idle_us = 100 * (busy_us - target_busy_pct *
> > -                             busy_us / 100) / target_busy_pct;
> >       const unsigned long min_test_us = 1e6;
> > -     const unsigned long pwm_calibration_us = min_test_us;
> > -     const unsigned long test_us = min_test_us;
> > +     unsigned long pwm_calibration_us;
> > +     unsigned long test_us;
> > +     unsigned long cycle_us, busy_us, idle_us;
> >       double busy_r, expected;
> >       uint64_t val[2];
> >       uint64_t ts[2];
> > @@ -1538,18 +1537,27 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
> >       /* Sampling platforms cannot reach the high accuracy criteria. */
> >       igt_require(gem_has_execlists(gem_fd));
> >   
> > -     while (idle_us < 2500) {
> > +     /* Aim for approximately 100 iterations for calibration */
> > +     cycle_us = min_test_us / target_iters;
> > +     busy_us = cycle_us * target_busy_pct / 100;
> > +     idle_us = cycle_us - busy_us;
> 
> 2% load, 1s / 10 iters
>         cycles_us = 100ms
>         busy_us = 2ms
>         idle_us = 98ms
> ...
> 
> > +
> > +     while (idle_us < 2500 || busy_us < 2500) {
> >               busy_us *= 2;
> >               idle_us *= 2;
> 
> ...
> 
> busy_us = 4ms
> idle_us = 196ms

Currently it is 250ms per 98:2 cycle and about 20ms per 50:50 cycle. So
we are only doing 4 and 50 iterations respectively.

10 cycles is strictly an improvement :-p
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/2] igt/perf_pmu: Aim for a fixed number of iterations for calibrating accuracy
  2018-08-10 13:25     ` Chris Wilson
@ 2018-08-13  9:20       ` Tvrtko Ursulin
  -1 siblings, 0 replies; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-08-13  9:20 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 10/08/2018 14:25, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-08-09 12:54:41)
>>
>> On 08/08/2018 15:59, Chris Wilson wrote:
>>> Our observation is that the systematic error is proportional to the
>>> number of iterations we perform; the suspicion is that it directly
>>> correlates with the number of sleeps. Reduce the number of iterations,
>>> to try and keep the error in check.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    tests/perf_pmu.c | 34 +++++++++++++++++++++-------------
>>>    1 file changed, 21 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
>>> index 9a20abb6b..5a26d5272 100644
>>> --- a/tests/perf_pmu.c
>>> +++ b/tests/perf_pmu.c
>>> @@ -1521,14 +1521,13 @@ static void __rearm_spin_batch(igt_spin_t *spin)
>>>    
>>>    static void
>>>    accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>>> -      unsigned long target_busy_pct)
>>> +      unsigned long target_busy_pct,
>>> +      unsigned long target_iters)
>>>    {
>>> -     unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
>>> -     unsigned long idle_us = 100 * (busy_us - target_busy_pct *
>>> -                             busy_us / 100) / target_busy_pct;
>>>        const unsigned long min_test_us = 1e6;
>>> -     const unsigned long pwm_calibration_us = min_test_us;
>>> -     const unsigned long test_us = min_test_us;
>>> +     unsigned long pwm_calibration_us;
>>> +     unsigned long test_us;
>>> +     unsigned long cycle_us, busy_us, idle_us;
>>>        double busy_r, expected;
>>>        uint64_t val[2];
>>>        uint64_t ts[2];
>>> @@ -1538,18 +1537,27 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>>>        /* Sampling platforms cannot reach the high accuracy criteria. */
>>>        igt_require(gem_has_execlists(gem_fd));
>>>    
>>> -     while (idle_us < 2500) {
>>> +     /* Aim for approximately 100 iterations for calibration */
>>> +     cycle_us = min_test_us / target_iters;
>>> +     busy_us = cycle_us * target_busy_pct / 100;
>>> +     idle_us = cycle_us - busy_us;
>>
>> 2% load, 1s / 10 iters
>>          cycles_us = 100ms
>>          busy_us = 2ms
>>          idle_us = 98ms
>> ...
>>
>>> +
>>> +     while (idle_us < 2500 || busy_us < 2500) {
>>>                busy_us *= 2;
>>>                idle_us *= 2;
>>
>> ...
>>
>> busy_us = 4ms
>> idle_us = 196ms
> 
> Currently it is 250ms per 98:2 cycle and about 20ms per 50:50 cycle. So
> we are only doing 4 and 50 iterations respectively.
> 
> 10 cycles is strictly an improvement :-p

Hmm indeed. It seems I misremembered how it works. I'll re-read your 
patches.

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [igt-dev] [PATCH i-g-t 1/2] igt/perf_pmu: Aim for a fixed number of iterations for calibrating accuracy
@ 2018-08-13  9:20       ` Tvrtko Ursulin
  0 siblings, 0 replies; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-08-13  9:20 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev, Tvrtko Ursulin


On 10/08/2018 14:25, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-08-09 12:54:41)
>>
>> On 08/08/2018 15:59, Chris Wilson wrote:
>>> Our observation is that the systematic error is proportional to the
>>> number of iterations we perform; the suspicion is that it directly
>>> correlates with the number of sleeps. Reduce the number of iterations,
>>> to try and keep the error in check.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    tests/perf_pmu.c | 34 +++++++++++++++++++++-------------
>>>    1 file changed, 21 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
>>> index 9a20abb6b..5a26d5272 100644
>>> --- a/tests/perf_pmu.c
>>> +++ b/tests/perf_pmu.c
>>> @@ -1521,14 +1521,13 @@ static void __rearm_spin_batch(igt_spin_t *spin)
>>>    
>>>    static void
>>>    accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>>> -      unsigned long target_busy_pct)
>>> +      unsigned long target_busy_pct,
>>> +      unsigned long target_iters)
>>>    {
>>> -     unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
>>> -     unsigned long idle_us = 100 * (busy_us - target_busy_pct *
>>> -                             busy_us / 100) / target_busy_pct;
>>>        const unsigned long min_test_us = 1e6;
>>> -     const unsigned long pwm_calibration_us = min_test_us;
>>> -     const unsigned long test_us = min_test_us;
>>> +     unsigned long pwm_calibration_us;
>>> +     unsigned long test_us;
>>> +     unsigned long cycle_us, busy_us, idle_us;
>>>        double busy_r, expected;
>>>        uint64_t val[2];
>>>        uint64_t ts[2];
>>> @@ -1538,18 +1537,27 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>>>        /* Sampling platforms cannot reach the high accuracy criteria. */
>>>        igt_require(gem_has_execlists(gem_fd));
>>>    
>>> -     while (idle_us < 2500) {
>>> +     /* Aim for approximately 100 iterations for calibration */
>>> +     cycle_us = min_test_us / target_iters;
>>> +     busy_us = cycle_us * target_busy_pct / 100;
>>> +     idle_us = cycle_us - busy_us;
>>
>> 2% load, 1s / 10 iters
>>          cycles_us = 100ms
>>          busy_us = 2ms
>>          idle_us = 98ms
>> ...
>>
>>> +
>>> +     while (idle_us < 2500 || busy_us < 2500) {
>>>                busy_us *= 2;
>>>                idle_us *= 2;
>>
>> ...
>>
>> busy_us = 4ms
>> idle_us = 196ms
> 
> Currently it is 250ms per 98:2 cycle and about 20ms per 50:50 cycle. So
> we are only doing 4 and 50 iterations respectively.
> 
> 10 cycles is strictly an improvement :-p

Hmm indeed. It seems I misremembered how it works. I'll re-read your 
patches.

Regards,

Tvrtko

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH i-g-t 1/2] igt/perf_pmu: Aim for a fixed number of iterations for calibrating accuracy
  2018-08-08 14:59 ` [igt-dev] " Chris Wilson
@ 2018-08-30 16:31   ` Tvrtko Ursulin
  -1 siblings, 0 replies; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-08-30 16:31 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/08/2018 15:59, Chris Wilson wrote:
> Our observation is that the systematic error is proportional to the
> number of iterations we perform; the suspicion is that it directly
> correlates with the number of sleeps. Reduce the number of iterations,
> to try and keep the error in check.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   tests/perf_pmu.c | 34 +++++++++++++++++++++-------------
>   1 file changed, 21 insertions(+), 13 deletions(-)
> 
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> index 9a20abb6b..5a26d5272 100644
> --- a/tests/perf_pmu.c
> +++ b/tests/perf_pmu.c
> @@ -1521,14 +1521,13 @@ static void __rearm_spin_batch(igt_spin_t *spin)
>   
>   static void
>   accuracy(int gem_fd, const struct intel_execution_engine2 *e,
> -	 unsigned long target_busy_pct)
> +	 unsigned long target_busy_pct,
> +	 unsigned long target_iters)
>   {
> -	unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
> -	unsigned long idle_us = 100 * (busy_us - target_busy_pct *
> -				busy_us / 100) / target_busy_pct;
>   	const unsigned long min_test_us = 1e6;
> -	const unsigned long pwm_calibration_us = min_test_us;
> -	const unsigned long test_us = min_test_us;
> +	unsigned long pwm_calibration_us;
> +	unsigned long test_us;
> +	unsigned long cycle_us, busy_us, idle_us;
>   	double busy_r, expected;
>   	uint64_t val[2];
>   	uint64_t ts[2];
> @@ -1538,18 +1537,27 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>   	/* Sampling platforms cannot reach the high accuracy criteria. */
>   	igt_require(gem_has_execlists(gem_fd));
>   
> -	while (idle_us < 2500) {
> +	/* Aim for approximately 100 iterations for calibration */
> +	cycle_us = min_test_us / target_iters;
> +	busy_us = cycle_us * target_busy_pct / 100;
> +	idle_us = cycle_us - busy_us;
> +
> +	while (idle_us < 2500 || busy_us < 2500) {
>   		busy_us *= 2;
>   		idle_us *= 2;
>   	}
> +	cycle_us = busy_us + idle_us;
> +	pwm_calibration_us = target_iters * cycle_us / 2;

I'd be tempted not to halve the calibration phase, just to minimize the 
number of changes.

> +	test_us = target_iters * cycle_us;
>   
> -	igt_info("calibration=%lums, test=%lums; ratio=%.2f%% (%luus/%luus)\n",
> -		 pwm_calibration_us / 1000, test_us / 1000,
> -		 (double)busy_us / (busy_us + idle_us) * 100.0,
> +	igt_info("calibration=%lums, test=%lums, cycle=%lums; ratio=%.2f%% (%luus/%luus)\n",
> +		 pwm_calibration_us / 1000, test_us / 1000, cycle_us / 1000,
> +		 (double)busy_us / cycle_us * 100.0,
>   		 busy_us, idle_us);
>   
> -	assert_within_epsilon((double)busy_us / (busy_us + idle_us),
> -				(double)target_busy_pct / 100.0, tolerance);
> +	assert_within_epsilon((double)busy_us / cycle_us,
> +			      (double)target_busy_pct / 100.0,
> +			      tolerance);
>   
>   	igt_assert(pipe(link) == 0);
>   
> @@ -1796,7 +1804,7 @@ igt_main
>   			for (i = 0; i < ARRAY_SIZE(pct); i++) {
>   				igt_subtest_f("busy-accuracy-%u-%s",
>   					      pct[i], e->name)
> -					accuracy(fd, e, pct[i]);
> +					accuracy(fd, e, pct[i], 10);
>   			}
>   
>   			igt_subtest_f("busy-hang-%s", e->name)
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [igt-dev] [Intel-gfx] [PATCH i-g-t 1/2] igt/perf_pmu: Aim for a fixed number of iterations for calibrating accuracy
@ 2018-08-30 16:31   ` Tvrtko Ursulin
  0 siblings, 0 replies; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-08-30 16:31 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/08/2018 15:59, Chris Wilson wrote:
> Our observation is that the systematic error is proportional to the
> number of iterations we perform; the suspicion is that it directly
> correlates with the number of sleeps. Reduce the number of iterations,
> to try and keep the error in check.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   tests/perf_pmu.c | 34 +++++++++++++++++++++-------------
>   1 file changed, 21 insertions(+), 13 deletions(-)
> 
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> index 9a20abb6b..5a26d5272 100644
> --- a/tests/perf_pmu.c
> +++ b/tests/perf_pmu.c
> @@ -1521,14 +1521,13 @@ static void __rearm_spin_batch(igt_spin_t *spin)
>   
>   static void
>   accuracy(int gem_fd, const struct intel_execution_engine2 *e,
> -	 unsigned long target_busy_pct)
> +	 unsigned long target_busy_pct,
> +	 unsigned long target_iters)
>   {
> -	unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
> -	unsigned long idle_us = 100 * (busy_us - target_busy_pct *
> -				busy_us / 100) / target_busy_pct;
>   	const unsigned long min_test_us = 1e6;
> -	const unsigned long pwm_calibration_us = min_test_us;
> -	const unsigned long test_us = min_test_us;
> +	unsigned long pwm_calibration_us;
> +	unsigned long test_us;
> +	unsigned long cycle_us, busy_us, idle_us;
>   	double busy_r, expected;
>   	uint64_t val[2];
>   	uint64_t ts[2];
> @@ -1538,18 +1537,27 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>   	/* Sampling platforms cannot reach the high accuracy criteria. */
>   	igt_require(gem_has_execlists(gem_fd));
>   
> -	while (idle_us < 2500) {
> +	/* Aim for approximately 100 iterations for calibration */
> +	cycle_us = min_test_us / target_iters;
> +	busy_us = cycle_us * target_busy_pct / 100;
> +	idle_us = cycle_us - busy_us;
> +
> +	while (idle_us < 2500 || busy_us < 2500) {
>   		busy_us *= 2;
>   		idle_us *= 2;
>   	}
> +	cycle_us = busy_us + idle_us;
> +	pwm_calibration_us = target_iters * cycle_us / 2;

I'd be tempted not to halve the calibration phase, just to minimize the 
number of changes.

> +	test_us = target_iters * cycle_us;
>   
> -	igt_info("calibration=%lums, test=%lums; ratio=%.2f%% (%luus/%luus)\n",
> -		 pwm_calibration_us / 1000, test_us / 1000,
> -		 (double)busy_us / (busy_us + idle_us) * 100.0,
> +	igt_info("calibration=%lums, test=%lums, cycle=%lums; ratio=%.2f%% (%luus/%luus)\n",
> +		 pwm_calibration_us / 1000, test_us / 1000, cycle_us / 1000,
> +		 (double)busy_us / cycle_us * 100.0,
>   		 busy_us, idle_us);
>   
> -	assert_within_epsilon((double)busy_us / (busy_us + idle_us),
> -				(double)target_busy_pct / 100.0, tolerance);
> +	assert_within_epsilon((double)busy_us / cycle_us,
> +			      (double)target_busy_pct / 100.0,
> +			      tolerance);
>   
>   	igt_assert(pipe(link) == 0);
>   
> @@ -1796,7 +1804,7 @@ igt_main
>   			for (i = 0; i < ARRAY_SIZE(pct); i++) {
>   				igt_subtest_f("busy-accuracy-%u-%s",
>   					      pct[i], e->name)
> -					accuracy(fd, e, pct[i]);
> +					accuracy(fd, e, pct[i], 10);
>   			}
>   
>   			igt_subtest_f("busy-hang-%s", e->name)
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH i-g-t 2/2] igt/perf_pmu: Improve the presentation of the accuracy calibration
  2018-08-08 14:59   ` [Intel-gfx] " Chris Wilson
@ 2018-08-30 16:53     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-08-30 16:53 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/08/2018 15:59, Chris Wilson wrote:
> Normalize the variance to stddev, and remove some redundant steps in
> computing the time from itself.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   tests/perf_pmu.c | 22 +++++++++++++---------
>   1 file changed, 13 insertions(+), 9 deletions(-)
> 
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> index 5a26d5272..4e8da3d94 100644
> --- a/tests/perf_pmu.c
> +++ b/tests/perf_pmu.c
> @@ -1577,8 +1577,8 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>   		/* 1st pass is calibration, second pass is the test. */
>   		for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
>   			unsigned int target_idle_us = idle_us;
> -			uint64_t busy_ns = 0, idle_ns = 0;
>   			struct timespec start = { };
> +			uint64_t busy_ns = 0;
>   			unsigned long pass_ns = 0;
>   			double avg = 0.0, var = 0.0;
>   			unsigned int n = 0;
> @@ -1589,6 +1589,7 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>   				unsigned long loop_ns, loop_busy;
>   				struct timespec _ts = { };
>   				double err, tmp;
> +				uint64_t now;
>   
>   				/* PWM idle sleep. */
>   				_ts.tv_nsec = target_idle_us * 1000;
> @@ -1605,14 +1606,13 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>   				igt_spin_batch_end(spin);
>   
>   				/* Time accounting. */
> -				loop_ns = igt_nsec_elapsed(&start);
> -				loop_busy = loop_ns - loop_busy;
> -				loop_ns -= pass_ns;
> +				now = igt_nsec_elapsed(&start);
> +				loop_busy = now - loop_busy;
> +				loop_ns = now - pass_ns;
> +				pass_ns = now;
>   
>   				busy_ns += loop_busy;
>   				total_busy_ns += loop_busy;
> -				idle_ns += loop_ns - loop_busy;
> -				pass_ns += loop_ns;
>   				total_ns += loop_ns;

Looks okay, but ugh... just made me lose ten minutes reconstructing 
before and after for no real benefit. :I

>   
>   				/* Re-calibrate. */
> @@ -1628,10 +1628,14 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>   				var += (err - avg) * (err - tmp);
>   			} while (pass_ns < timeout[pass]);
>   
> +			pass_ns = igt_nsec_elapsed(&start);
>   			expected = (double)busy_ns / pass_ns;
> -			igt_info("%u: busy %"PRIu64"us, idle %"PRIu64"us -> %.2f%% (target: %lu%%; average=%.2f, variance=%f)\n",
> -				 pass, busy_ns / 1000, idle_ns / 1000,
> -				 100 * expected, target_busy_pct, avg, var / n);
> +
> +			igt_info("%u: busy %"PRIu64"us, idle %"PRIu64"us -> %.2f%% (target: %lu%%; average=%.2f±%.3f%%)\n",
> +				 pass, busy_ns / 1000, (pass_ns - busy_ns) / 1000,
> +				 100 * expected, target_busy_pct,
> +				 avg, sqrt(var / n));
> +
>   			write(link[1], &expected, sizeof(expected));
>   		}
>   
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [Intel-gfx] [PATCH i-g-t 2/2] igt/perf_pmu: Improve the presentation of the accuracy calibration
@ 2018-08-30 16:53     ` Tvrtko Ursulin
  0 siblings, 0 replies; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-08-30 16:53 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 08/08/2018 15:59, Chris Wilson wrote:
> Normalize the variance to stddev, and remove some redundant steps in
> computing the time from itself.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   tests/perf_pmu.c | 22 +++++++++++++---------
>   1 file changed, 13 insertions(+), 9 deletions(-)
> 
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> index 5a26d5272..4e8da3d94 100644
> --- a/tests/perf_pmu.c
> +++ b/tests/perf_pmu.c
> @@ -1577,8 +1577,8 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>   		/* 1st pass is calibration, second pass is the test. */
>   		for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
>   			unsigned int target_idle_us = idle_us;
> -			uint64_t busy_ns = 0, idle_ns = 0;
>   			struct timespec start = { };
> +			uint64_t busy_ns = 0;
>   			unsigned long pass_ns = 0;
>   			double avg = 0.0, var = 0.0;
>   			unsigned int n = 0;
> @@ -1589,6 +1589,7 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>   				unsigned long loop_ns, loop_busy;
>   				struct timespec _ts = { };
>   				double err, tmp;
> +				uint64_t now;
>   
>   				/* PWM idle sleep. */
>   				_ts.tv_nsec = target_idle_us * 1000;
> @@ -1605,14 +1606,13 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>   				igt_spin_batch_end(spin);
>   
>   				/* Time accounting. */
> -				loop_ns = igt_nsec_elapsed(&start);
> -				loop_busy = loop_ns - loop_busy;
> -				loop_ns -= pass_ns;
> +				now = igt_nsec_elapsed(&start);
> +				loop_busy = now - loop_busy;
> +				loop_ns = now - pass_ns;
> +				pass_ns = now;
>   
>   				busy_ns += loop_busy;
>   				total_busy_ns += loop_busy;
> -				idle_ns += loop_ns - loop_busy;
> -				pass_ns += loop_ns;
>   				total_ns += loop_ns;

Looks okay, but ugh... just made me lose ten minutes reconstructing 
before and after for no real benefit. :I

>   
>   				/* Re-calibrate. */
> @@ -1628,10 +1628,14 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>   				var += (err - avg) * (err - tmp);
>   			} while (pass_ns < timeout[pass]);
>   
> +			pass_ns = igt_nsec_elapsed(&start);
>   			expected = (double)busy_ns / pass_ns;
> -			igt_info("%u: busy %"PRIu64"us, idle %"PRIu64"us -> %.2f%% (target: %lu%%; average=%.2f, variance=%f)\n",
> -				 pass, busy_ns / 1000, idle_ns / 1000,
> -				 100 * expected, target_busy_pct, avg, var / n);
> +
> +			igt_info("%u: busy %"PRIu64"us, idle %"PRIu64"us -> %.2f%% (target: %lu%%; average=%.2f±%.3f%%)\n",
> +				 pass, busy_ns / 1000, (pass_ns - busy_ns) / 1000,
> +				 100 * expected, target_busy_pct,
> +				 avg, sqrt(var / n));
> +
>   			write(link[1], &expected, sizeof(expected));
>   		}
>   
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2018-08-30 16:53 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-08-08 14:59 [PATCH i-g-t 1/2] igt/perf_pmu: Aim for a fixed number of iterations for calibrating accuracy Chris Wilson
2018-08-08 14:59 ` [igt-dev] " Chris Wilson
2018-08-08 14:59 ` [PATCH i-g-t 2/2] igt/perf_pmu: Improve the presentation of the accuracy calibration Chris Wilson
2018-08-08 14:59   ` [Intel-gfx] " Chris Wilson
2018-08-30 16:53   ` Tvrtko Ursulin
2018-08-30 16:53     ` [Intel-gfx] " Tvrtko Ursulin
2018-08-08 15:38 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/2] igt/perf_pmu: Aim for a fixed number of iterations for calibrating accuracy Patchwork
2018-08-08 21:49 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
2018-08-09 11:54 ` [igt-dev] [PATCH i-g-t 1/2] " Tvrtko Ursulin
2018-08-09 11:54   ` Tvrtko Ursulin
2018-08-10 13:25   ` Chris Wilson
2018-08-10 13:25     ` Chris Wilson
2018-08-13  9:20     ` Tvrtko Ursulin
2018-08-13  9:20       ` Tvrtko Ursulin
2018-08-30 16:31 ` Tvrtko Ursulin
2018-08-30 16:31   ` [igt-dev] [Intel-gfx] " Tvrtko Ursulin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.