All of lore.kernel.org
 help / color / mirror / Atom feed
* [CI i-g-t] tests/perf_pmu: Avoid RT thread for accuracy test
@ 2018-03-26 10:57 ` Tvrtko Ursulin
  0 siblings, 0 replies; 43+ messages in thread
From: Tvrtko Ursulin @ 2018-03-26 10:57 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Realtime scheduling interferes with execlists submission (tasklet) so try
to simplify the PWM loop in a few ways:

 * Drop RT.
 * Longer batches for smaller systematic error.
 * More truthful test duration calculation.
 * Less clock queries.
 * No self-adjust - instead just report the achieved cycle and let the
   parent check against it.
 * Report absolute cycle error.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/perf_pmu.c | 80 ++++++++++++++++++++------------------------------------
 1 file changed, 29 insertions(+), 51 deletions(-)

diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index f27b7ec7d2c2..4436a7a49141 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -1504,12 +1504,6 @@ test_enable_race(int gem_fd, const struct intel_execution_engine2 *e)
 	gem_quiescent_gpu(gem_fd);
 }
 
-static double __error(double val, double ref)
-{
-	igt_assert(ref > 1e-5 /* smallval */);
-	return (100.0 * val / ref) - 100.0;
-}
-
 static void __rearm_spin_batch(igt_spin_t *spin)
 {
 	const uint32_t mi_arb_chk = 0x5 << 23;
@@ -1532,13 +1526,12 @@ static void
 accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 	 unsigned long target_busy_pct)
 {
-	const unsigned int min_test_loops = 7;
-	const unsigned long min_test_us = 1e6;
-	unsigned long busy_us = 2500;
+	unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
 	unsigned long idle_us = 100 * (busy_us - target_busy_pct *
 				busy_us / 100) / target_busy_pct;
-	unsigned long pwm_calibration_us;
-	unsigned long test_us;
+	const unsigned long min_test_us = 1e6;
+	const unsigned long pwm_calibration_us = min_test_us;
+	const unsigned long test_us = min_test_us;
 	double busy_r, expected;
 	uint64_t val[2];
 	uint64_t ts[2];
@@ -1553,13 +1546,6 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 		idle_us *= 2;
 	}
 
-	pwm_calibration_us = min_test_loops * (busy_us + idle_us);
-	while (pwm_calibration_us < min_test_us)
-		pwm_calibration_us += busy_us + idle_us;
-	test_us = min_test_loops * (idle_us + busy_us);
-	while (test_us < min_test_us)
-		test_us += busy_us + idle_us;
-
 	igt_info("calibration=%lums, test=%lums; ratio=%.2f%% (%luus/%luus)\n",
 		 pwm_calibration_us / 1000, test_us / 1000,
 		 (double)busy_us / (busy_us + idle_us) * 100.0,
@@ -1572,20 +1558,11 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 
 	/* Emit PWM pattern on the engine from a child. */
 	igt_fork(child, 1) {
-		struct sched_param rt = { .sched_priority = 99 };
 		const unsigned long timeout[] = {
 			pwm_calibration_us * 1000, test_us * 1000
 		};
-		uint64_t total_busy_ns = 0, total_idle_ns = 0;
+		uint64_t busy_ns = 0, idle_ns = 0;
 		igt_spin_t *spin;
-		int ret;
-
-		/* We need the best sleep accuracy we can get. */
-		ret = sched_setscheduler(0,
-					 SCHED_FIFO | SCHED_RESET_ON_FORK,
-					 &rt);
-		if (ret)
-			igt_warn("Failed to set scheduling policy!\n");
 
 		/* Allocate our spin batch and idle it. */
 		spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
@@ -1594,37 +1571,38 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 
 		/* 1st pass is calibration, second pass is the test. */
 		for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
-			uint64_t busy_ns = -total_busy_ns;
-			uint64_t idle_ns = -total_idle_ns;
-			struct timespec test_start = { };
+			struct timespec start = { };
+			unsigned long pass_ns = 0;
+
+			igt_nsec_elapsed(&start);
 
-			igt_nsec_elapsed(&test_start);
 			do {
-				unsigned int target_idle_us, t_busy;
+				unsigned long loop_ns, loop_busy;
+				struct timespec _ts = { };
+
+				/* PWM idle sleep. */
+				_ts.tv_nsec = idle_us * 1000;
+				nanosleep(&_ts, NULL);
 
 				/* Restart the spinbatch. */
 				__rearm_spin_batch(spin);
 				__submit_spin_batch(gem_fd, spin, e, 0);
 
-				/*
-				 * Note that the submission may be delayed to a
-				 * tasklet (ksoftirqd) which cannot run until we
-				 * sleep as we hog the cpu (we are RT).
-				 */
-
-				t_busy = measured_usleep(busy_us);
+				/* PWM busy sleep. */
+				loop_busy = igt_nsec_elapsed(&start);
+				_ts.tv_nsec = busy_us * 1000;
+				nanosleep(&_ts, NULL);
 				igt_spin_batch_end(spin);
-				gem_sync(gem_fd, spin->handle);
-
-				total_busy_ns += t_busy;
 
-				target_idle_us =
-					(100 * total_busy_ns / target_busy_pct - (total_busy_ns + total_idle_ns)) / 1000;
-				total_idle_ns += measured_usleep(target_idle_us);
-			} while (igt_nsec_elapsed(&test_start) < timeout[pass]);
+				/* Time accounting. */
+				loop_ns = igt_nsec_elapsed(&start);
+				loop_busy = loop_ns - loop_busy;
+				loop_ns -= pass_ns;
 
-			busy_ns += total_busy_ns;
-			idle_ns += total_idle_ns;
+				busy_ns += loop_busy;
+				idle_ns += loop_ns - loop_busy;
+				pass_ns += loop_ns;
+			} while (pass_ns < timeout[pass]);
 
 			expected = (double)busy_ns / (busy_ns + idle_ns);
 			igt_info("%u: busy %"PRIu64"us, idle %"PRIu64"us: %.2f%% (target: %lu%%)\n",
@@ -1655,8 +1633,8 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 
 	busy_r = (double)(val[1] - val[0]) / (ts[1] - ts[0]);
 
-	igt_info("error=%.2f%% (%.2f%% vs %.2f%%)\n",
-		 __error(busy_r, expected), 100 * busy_r, 100 * expected);
+	igt_info("error=%.2f (%.2f%% vs %.2f%%)\n",
+		 (busy_r - expected) * 100, 100 * busy_r, 100 * expected);
 
 	assert_within(100.0 * busy_r, 100.0 * expected, 2);
 }
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [Intel-gfx] [CI i-g-t] tests/perf_pmu: Avoid RT thread for accuracy test
@ 2018-03-26 10:57 ` Tvrtko Ursulin
  0 siblings, 0 replies; 43+ messages in thread
From: Tvrtko Ursulin @ 2018-03-26 10:57 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Realtime scheduling interferes with execlists submission (tasklet) so try
to simplify the PWM loop in a few ways:

 * Drop RT.
 * Longer batches for smaller systematic error.
 * More truthful test duration calculation.
 * Less clock queries.
 * No self-adjust - instead just report the achieved cycle and let the
   parent check against it.
 * Report absolute cycle error.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/perf_pmu.c | 80 ++++++++++++++++++++------------------------------------
 1 file changed, 29 insertions(+), 51 deletions(-)

diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index f27b7ec7d2c2..4436a7a49141 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -1504,12 +1504,6 @@ test_enable_race(int gem_fd, const struct intel_execution_engine2 *e)
 	gem_quiescent_gpu(gem_fd);
 }
 
-static double __error(double val, double ref)
-{
-	igt_assert(ref > 1e-5 /* smallval */);
-	return (100.0 * val / ref) - 100.0;
-}
-
 static void __rearm_spin_batch(igt_spin_t *spin)
 {
 	const uint32_t mi_arb_chk = 0x5 << 23;
@@ -1532,13 +1526,12 @@ static void
 accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 	 unsigned long target_busy_pct)
 {
-	const unsigned int min_test_loops = 7;
-	const unsigned long min_test_us = 1e6;
-	unsigned long busy_us = 2500;
+	unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
 	unsigned long idle_us = 100 * (busy_us - target_busy_pct *
 				busy_us / 100) / target_busy_pct;
-	unsigned long pwm_calibration_us;
-	unsigned long test_us;
+	const unsigned long min_test_us = 1e6;
+	const unsigned long pwm_calibration_us = min_test_us;
+	const unsigned long test_us = min_test_us;
 	double busy_r, expected;
 	uint64_t val[2];
 	uint64_t ts[2];
@@ -1553,13 +1546,6 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 		idle_us *= 2;
 	}
 
-	pwm_calibration_us = min_test_loops * (busy_us + idle_us);
-	while (pwm_calibration_us < min_test_us)
-		pwm_calibration_us += busy_us + idle_us;
-	test_us = min_test_loops * (idle_us + busy_us);
-	while (test_us < min_test_us)
-		test_us += busy_us + idle_us;
-
 	igt_info("calibration=%lums, test=%lums; ratio=%.2f%% (%luus/%luus)\n",
 		 pwm_calibration_us / 1000, test_us / 1000,
 		 (double)busy_us / (busy_us + idle_us) * 100.0,
@@ -1572,20 +1558,11 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 
 	/* Emit PWM pattern on the engine from a child. */
 	igt_fork(child, 1) {
-		struct sched_param rt = { .sched_priority = 99 };
 		const unsigned long timeout[] = {
 			pwm_calibration_us * 1000, test_us * 1000
 		};
-		uint64_t total_busy_ns = 0, total_idle_ns = 0;
+		uint64_t busy_ns = 0, idle_ns = 0;
 		igt_spin_t *spin;
-		int ret;
-
-		/* We need the best sleep accuracy we can get. */
-		ret = sched_setscheduler(0,
-					 SCHED_FIFO | SCHED_RESET_ON_FORK,
-					 &rt);
-		if (ret)
-			igt_warn("Failed to set scheduling policy!\n");
 
 		/* Allocate our spin batch and idle it. */
 		spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
@@ -1594,37 +1571,38 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 
 		/* 1st pass is calibration, second pass is the test. */
 		for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
-			uint64_t busy_ns = -total_busy_ns;
-			uint64_t idle_ns = -total_idle_ns;
-			struct timespec test_start = { };
+			struct timespec start = { };
+			unsigned long pass_ns = 0;
+
+			igt_nsec_elapsed(&start);
 
-			igt_nsec_elapsed(&test_start);
 			do {
-				unsigned int target_idle_us, t_busy;
+				unsigned long loop_ns, loop_busy;
+				struct timespec _ts = { };
+
+				/* PWM idle sleep. */
+				_ts.tv_nsec = idle_us * 1000;
+				nanosleep(&_ts, NULL);
 
 				/* Restart the spinbatch. */
 				__rearm_spin_batch(spin);
 				__submit_spin_batch(gem_fd, spin, e, 0);
 
-				/*
-				 * Note that the submission may be delayed to a
-				 * tasklet (ksoftirqd) which cannot run until we
-				 * sleep as we hog the cpu (we are RT).
-				 */
-
-				t_busy = measured_usleep(busy_us);
+				/* PWM busy sleep. */
+				loop_busy = igt_nsec_elapsed(&start);
+				_ts.tv_nsec = busy_us * 1000;
+				nanosleep(&_ts, NULL);
 				igt_spin_batch_end(spin);
-				gem_sync(gem_fd, spin->handle);
-
-				total_busy_ns += t_busy;
 
-				target_idle_us =
-					(100 * total_busy_ns / target_busy_pct - (total_busy_ns + total_idle_ns)) / 1000;
-				total_idle_ns += measured_usleep(target_idle_us);
-			} while (igt_nsec_elapsed(&test_start) < timeout[pass]);
+				/* Time accounting. */
+				loop_ns = igt_nsec_elapsed(&start);
+				loop_busy = loop_ns - loop_busy;
+				loop_ns -= pass_ns;
 
-			busy_ns += total_busy_ns;
-			idle_ns += total_idle_ns;
+				busy_ns += loop_busy;
+				idle_ns += loop_ns - loop_busy;
+				pass_ns += loop_ns;
+			} while (pass_ns < timeout[pass]);
 
 			expected = (double)busy_ns / (busy_ns + idle_ns);
 			igt_info("%u: busy %"PRIu64"us, idle %"PRIu64"us: %.2f%% (target: %lu%%)\n",
@@ -1655,8 +1633,8 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 
 	busy_r = (double)(val[1] - val[0]) / (ts[1] - ts[0]);
 
-	igt_info("error=%.2f%% (%.2f%% vs %.2f%%)\n",
-		 __error(busy_r, expected), 100 * busy_r, 100 * expected);
+	igt_info("error=%.2f (%.2f%% vs %.2f%%)\n",
+		 (busy_r - expected) * 100, 100 * busy_r, 100 * expected);
 
 	assert_within(100.0 * busy_r, 100.0 * expected, 2);
 }
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* Re: [igt-dev] [CI i-g-t] tests/perf_pmu: Avoid RT thread for accuracy test
  2018-03-26 10:57 ` [Intel-gfx] " Tvrtko Ursulin
@ 2018-03-26 11:17   ` Chris Wilson
  -1 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2018-03-26 11:17 UTC (permalink / raw)
  To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx

Quoting Tvrtko Ursulin (2018-03-26 11:57:58)
>  * No self-adjust - instead just report the achieved cycle and let the
>    parent check against it.

Sniff, I was rather proud of our achievement. I had it in mind as a
template for future autocalibration routines. Is it really useless
overengineering, or worse broken?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [igt-dev] [CI i-g-t] tests/perf_pmu: Avoid RT thread for accuracy test
@ 2018-03-26 11:17   ` Chris Wilson
  0 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2018-03-26 11:17 UTC (permalink / raw)
  To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx, Tvrtko Ursulin

Quoting Tvrtko Ursulin (2018-03-26 11:57:58)
>  * No self-adjust - instead just report the achieved cycle and let the
>    parent check against it.

Sniff, I was rather proud of our achievement. I had it in mind as a
template for future autocalibration routines. Is it really useless
overengineering, or worse broken?
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for tests/perf_pmu: Avoid RT thread for accuracy test
  2018-03-26 10:57 ` [Intel-gfx] " Tvrtko Ursulin
  (?)
  (?)
@ 2018-03-26 11:23 ` Patchwork
  -1 siblings, 0 replies; 43+ messages in thread
From: Patchwork @ 2018-03-26 11:23 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: igt-dev

== Series Details ==

Series: tests/perf_pmu: Avoid RT thread for accuracy test
URL   : https://patchwork.freedesktop.org/series/40662/
State : success

== Summary ==

IGT patchset tested on top of latest successful build
a9741da52ad1963f7632ef1e852cbe1c3bcc601e tests/perf_pmu: Improve accuracy by waiting on spinner to start

with latest DRM-Tip kernel build CI_DRM_3978
94f5d9189e61 drm-tip: 2018y-03m-23d-23h-41m-40s UTC integration manifest

No testlist changes.

---- Known issues:

Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-c:
                incomplete -> PASS       (fi-bxt-dsi) fdo#103927
                incomplete -> PASS       (fi-hsw-4770) fdo#104944

fdo#103927 https://bugs.freedesktop.org/show_bug.cgi?id=103927
fdo#104944 https://bugs.freedesktop.org/show_bug.cgi?id=104944

fi-bdw-5557u     total:285  pass:264  dwarn:0   dfail:0   fail:0   skip:21  time:432s
fi-bdw-gvtdvm    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:446s
fi-blb-e6850     total:285  pass:220  dwarn:1   dfail:0   fail:0   skip:64  time:388s
fi-bsw-n3050     total:285  pass:239  dwarn:0   dfail:0   fail:0   skip:46  time:540s
fi-bwr-2160      total:285  pass:180  dwarn:0   dfail:0   fail:0   skip:105 time:301s
fi-bxt-dsi       total:285  pass:255  dwarn:0   dfail:0   fail:0   skip:30  time:527s
fi-bxt-j4205     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:516s
fi-byt-j1900     total:285  pass:250  dwarn:0   dfail:0   fail:0   skip:35  time:519s
fi-byt-n2820     total:285  pass:246  dwarn:0   dfail:0   fail:0   skip:39  time:509s
fi-cfl-8700k     total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:410s
fi-cfl-u         total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:511s
fi-elk-e7500     total:285  pass:225  dwarn:1   dfail:0   fail:0   skip:59  time:434s
fi-gdg-551       total:285  pass:177  dwarn:0   dfail:0   fail:0   skip:108 time:314s
fi-glk-1         total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:537s
fi-hsw-4770      total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:403s
fi-ilk-650       total:285  pass:225  dwarn:0   dfail:0   fail:0   skip:60  time:422s
fi-ivb-3520m     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:465s
fi-ivb-3770      total:285  pass:252  dwarn:0   dfail:0   fail:0   skip:33  time:430s
fi-kbl-7500u     total:285  pass:260  dwarn:1   dfail:0   fail:0   skip:24  time:478s
fi-kbl-7567u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:472s
fi-kbl-r         total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:514s
fi-pnv-d510      total:285  pass:219  dwarn:1   dfail:0   fail:0   skip:65  time:649s
fi-skl-6260u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:443s
fi-skl-6600u     total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:532s
fi-skl-6700k2    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:507s
fi-skl-6770hq    total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:512s
fi-skl-guc       total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:430s
fi-skl-gvtdvm    total:285  pass:262  dwarn:0   dfail:0   fail:0   skip:23  time:453s
fi-snb-2520m     total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:592s
fi-snb-2600      total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:403s
Blacklisted hosts:
fi-cfl-s3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:568s
fi-cnl-psr       total:224  pass:198  dwarn:0   dfail:0   fail:1   skip:24 
fi-glk-j4005     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:488s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1191/issues.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [igt-dev] [CI i-g-t] tests/perf_pmu: Avoid RT thread for accuracy test
  2018-03-26 11:17   ` Chris Wilson
@ 2018-03-26 12:40     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 43+ messages in thread
From: Tvrtko Ursulin @ 2018-03-26 12:40 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx


On 26/03/2018 12:17, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-03-26 11:57:58)
>>   * No self-adjust - instead just report the achieved cycle and let the
>>     parent check against it.
> 
> Sniff, I was rather proud of our achievement. I had it in mind as a
> template for future autocalibration routines. Is it really useless
> overengineering, or worse broken?

It works fine I think, but the problem is I cannot locate a source of 
systematic error which seems proportional to number of loop iterations. 
:( After battling with trying to improve it for a couple days I decided 
to try to see how the simpler approach will fare on the shards.

There's the tasklet delay, which made me think things could be better 
without RT. And then polling on the spinner makes it worse in all cases 
for me, however I fiddle with it. So again, I wanted to try the 
simplification..

The version from this patch seems super stable on my system, but the 50% 
case still has an apparent +.5-6% systematic error. Maybe on the shards 
it will not be as stable..

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [igt-dev] [Intel-gfx] [CI i-g-t] tests/perf_pmu: Avoid RT thread for accuracy test
@ 2018-03-26 12:40     ` Tvrtko Ursulin
  0 siblings, 0 replies; 43+ messages in thread
From: Tvrtko Ursulin @ 2018-03-26 12:40 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx


On 26/03/2018 12:17, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-03-26 11:57:58)
>>   * No self-adjust - instead just report the achieved cycle and let the
>>     parent check against it.
> 
> Sniff, I was rather proud of our achievement. I had it in mind as a
> template for future autocalibration routines. Is it really useless
> overengineering, or worse broken?

It works fine I think, but the problem is I cannot locate a source of 
systematic error which seems proportional to number of loop iterations. 
:( After battling with trying to improve it for a couple days I decided 
to try to see how the simpler approach will fare on the shards.

There's the tasklet delay, which made me think things could be better 
without RT. And then polling on the spinner makes it worse in all cases 
for me, however I fiddle with it. So again, I wanted to try the 
simplification..

The version from this patch seems super stable on my system, but the 50% 
case still has an apparent +.5-6% systematic error. Maybe on the shards 
it will not be as stable..

Regards,

Tvrtko
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [igt-dev] ✗ Fi.CI.IGT: warning for tests/perf_pmu: Avoid RT thread for accuracy test
  2018-03-26 10:57 ` [Intel-gfx] " Tvrtko Ursulin
                   ` (2 preceding siblings ...)
  (?)
@ 2018-03-26 13:04 ` Patchwork
  -1 siblings, 0 replies; 43+ messages in thread
From: Patchwork @ 2018-03-26 13:04 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: igt-dev

== Series Details ==

Series: tests/perf_pmu: Avoid RT thread for accuracy test
URL   : https://patchwork.freedesktop.org/series/40662/
State : warning

== Summary ==

---- Possible new issues:

Test kms_flip:
        Subgroup 2x-single-buffer-flip-vs-dpms-off-vs-modeset:
                pass       -> DMESG-WARN (shard-hsw)

---- Known issues:

Test kms_flip:
        Subgroup 2x-flip-vs-expired-vblank-interruptible:
                fail       -> PASS       (shard-hsw) fdo#102887
        Subgroup 2x-plain-flip-fb-recreate-interruptible:
                pass       -> FAIL       (shard-hsw) fdo#100368 +1
        Subgroup dpms-vs-vblank-race-interruptible:
                fail       -> PASS       (shard-hsw) fdo#103060 +2
Test kms_plane_multiple:
        Subgroup atomic-pipe-a-tiling-x:
                fail       -> PASS       (shard-snb) fdo#103166
Test kms_rotation_crc:
        Subgroup sprite-rotation-180:
                fail       -> PASS       (shard-snb) fdo#103925
Test kms_setmode:
        Subgroup basic:
                pass       -> FAIL       (shard-apl) fdo#99912
Test kms_vblank:
        Subgroup pipe-a-accuracy-idle:
                pass       -> FAIL       (shard-hsw) fdo#102583

fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
fdo#103166 https://bugs.freedesktop.org/show_bug.cgi?id=103166
fdo#103925 https://bugs.freedesktop.org/show_bug.cgi?id=103925
fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912
fdo#102583 https://bugs.freedesktop.org/show_bug.cgi?id=102583

shard-apl        total:3495 pass:1831 dwarn:1   dfail:0   fail:7   skip:1655 time:12795s
shard-hsw        total:3495 pass:1780 dwarn:2   dfail:0   fail:3   skip:1709 time:11646s
shard-snb        total:3495 pass:1375 dwarn:1   dfail:0   fail:2   skip:2117 time:6961s
Blacklisted hosts:
shard-kbl        total:3477 pass:1937 dwarn:1   dfail:0   fail:9   skip:1529 time:9473s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1191/shards.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [igt-dev] ✗ Fi.CI.BAT: failure for tests/perf_pmu: Avoid RT thread for accuracy test
  2018-03-26 10:57 ` [Intel-gfx] " Tvrtko Ursulin
                   ` (3 preceding siblings ...)
  (?)
@ 2018-03-27 14:31 ` Patchwork
  -1 siblings, 0 replies; 43+ messages in thread
From: Patchwork @ 2018-03-27 14:31 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: igt-dev

== Series Details ==

Series: tests/perf_pmu: Avoid RT thread for accuracy test
URL   : https://patchwork.freedesktop.org/series/40662/
State : failure

== Summary ==

IGT patchset tested on top of latest successful build
a9741da52ad1963f7632ef1e852cbe1c3bcc601e tests/perf_pmu: Improve accuracy by waiting on spinner to start

with latest DRM-Tip kernel build CI_DRM_3988
e39cd9a53fda drm-tip: 2018y-03m-27d-13h-33m-55s UTC integration manifest

No testlist changes.

---- Possible new issues:

Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-c:
                pass       -> FAIL       (fi-kbl-7567u)

---- Known issues:

Test gem_mmap_gtt:
        Subgroup basic-small-bo-tiledx:
                pass       -> FAIL       (fi-gdg-551) fdo#102575
Test kms_pipe_crc_basic:
        Subgroup nonblocking-crc-pipe-a-frame-sequence:
                fail       -> PASS       (fi-skl-6770hq) fdo#103481
        Subgroup suspend-read-crc-pipe-c:
                incomplete -> PASS       (fi-bxt-dsi) fdo#103927
                pass       -> INCOMPLETE (fi-skl-6700k2) fdo#104108

fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
fdo#103481 https://bugs.freedesktop.org/show_bug.cgi?id=103481
fdo#103927 https://bugs.freedesktop.org/show_bug.cgi?id=103927
fdo#104108 https://bugs.freedesktop.org/show_bug.cgi?id=104108

fi-bdw-5557u     total:285  pass:264  dwarn:0   dfail:0   fail:0   skip:21  time:433s
fi-bdw-gvtdvm    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:443s
fi-blb-e6850     total:285  pass:220  dwarn:1   dfail:0   fail:0   skip:64  time:383s
fi-bsw-n3050     total:285  pass:239  dwarn:0   dfail:0   fail:0   skip:46  time:544s
fi-bwr-2160      total:285  pass:180  dwarn:0   dfail:0   fail:0   skip:105 time:299s
fi-bxt-dsi       total:285  pass:255  dwarn:0   dfail:0   fail:0   skip:30  time:517s
fi-bxt-j4205     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:519s
fi-byt-j1900     total:285  pass:250  dwarn:0   dfail:0   fail:0   skip:35  time:528s
fi-byt-n2820     total:285  pass:246  dwarn:0   dfail:0   fail:0   skip:39  time:513s
fi-cfl-8700k     total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:413s
fi-cfl-s3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:568s
fi-cfl-u         total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:512s
fi-cnl-y3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:587s
fi-elk-e7500     total:285  pass:225  dwarn:1   dfail:0   fail:0   skip:59  time:430s
fi-gdg-551       total:285  pass:176  dwarn:0   dfail:0   fail:1   skip:108 time:327s
fi-glk-1         total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:540s
fi-hsw-4770      total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:405s
fi-ilk-650       total:285  pass:225  dwarn:0   dfail:0   fail:0   skip:60  time:420s
fi-ivb-3520m     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:474s
fi-ivb-3770      total:285  pass:252  dwarn:0   dfail:0   fail:0   skip:33  time:432s
fi-kbl-7500u     total:285  pass:260  dwarn:1   dfail:0   fail:0   skip:24  time:476s
fi-kbl-7567u     total:285  pass:264  dwarn:0   dfail:0   fail:1   skip:20  time:469s
fi-kbl-r         total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:519s
fi-pnv-d510      total:285  pass:219  dwarn:1   dfail:0   fail:0   skip:65  time:659s
fi-skl-6260u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:442s
fi-skl-6600u     total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:534s
fi-skl-6700k2    total:243  pass:222  dwarn:0   dfail:0   fail:0   skip:20 
fi-skl-6770hq    total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:502s
fi-skl-guc       total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:433s
fi-skl-gvtdvm    total:285  pass:262  dwarn:0   dfail:0   fail:0   skip:23  time:450s
fi-snb-2520m     total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:585s
fi-snb-2600      total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:403s
Blacklisted hosts:
fi-cnl-psr       total:285  pass:256  dwarn:3   dfail:0   fail:0   skip:26  time:516s
fi-glk-j4005     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:491s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1193/issues.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [igt-dev] ✗ Fi.CI.BAT: warning for tests/perf_pmu: Avoid RT thread for accuracy test
  2018-03-26 10:57 ` [Intel-gfx] " Tvrtko Ursulin
                   ` (4 preceding siblings ...)
  (?)
@ 2018-03-27 17:08 ` Patchwork
  -1 siblings, 0 replies; 43+ messages in thread
From: Patchwork @ 2018-03-27 17:08 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: igt-dev

== Series Details ==

Series: tests/perf_pmu: Avoid RT thread for accuracy test
URL   : https://patchwork.freedesktop.org/series/40662/
State : warning

== Summary ==

IGT patchset tested on top of latest successful build
a9741da52ad1963f7632ef1e852cbe1c3bcc601e tests/perf_pmu: Improve accuracy by waiting on spinner to start

with latest DRM-Tip kernel build CI_DRM_3989
ff7820832182 drm-tip: 2018y-03m-27d-14h-31m-00s UTC integration manifest

No testlist changes.

---- Possible new issues:

Test drv_module_reload:
        Subgroup basic-reload-inject:
                pass       -> DMESG-WARN (fi-cfl-8700k)

---- Known issues:

Test gem_mmap_gtt:
        Subgroup basic-small-bo-tiledx:
                pass       -> FAIL       (fi-gdg-551) fdo#102575
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                dmesg-warn -> PASS       (fi-cnl-y3) fdo#104951

fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
fdo#104951 https://bugs.freedesktop.org/show_bug.cgi?id=104951

fi-bdw-5557u     total:285  pass:264  dwarn:0   dfail:0   fail:0   skip:21  time:435s
fi-bdw-gvtdvm    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:443s
fi-blb-e6850     total:285  pass:220  dwarn:1   dfail:0   fail:0   skip:64  time:380s
fi-bsw-n3050     total:285  pass:239  dwarn:0   dfail:0   fail:0   skip:46  time:540s
fi-bwr-2160      total:285  pass:180  dwarn:0   dfail:0   fail:0   skip:105 time:301s
fi-bxt-dsi       total:285  pass:255  dwarn:0   dfail:0   fail:0   skip:30  time:518s
fi-bxt-j4205     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:518s
fi-byt-j1900     total:285  pass:250  dwarn:0   dfail:0   fail:0   skip:35  time:528s
fi-byt-n2820     total:285  pass:246  dwarn:0   dfail:0   fail:0   skip:39  time:518s
fi-cfl-8700k     total:285  pass:256  dwarn:1   dfail:0   fail:0   skip:28  time:411s
fi-cfl-s3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:569s
fi-cfl-u         total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:513s
fi-cnl-y3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:592s
fi-elk-e7500     total:285  pass:225  dwarn:1   dfail:0   fail:0   skip:59  time:430s
fi-gdg-551       total:285  pass:176  dwarn:0   dfail:0   fail:1   skip:108 time:328s
fi-glk-1         total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:540s
fi-hsw-4770      total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:406s
fi-ilk-650       total:285  pass:225  dwarn:0   dfail:0   fail:0   skip:60  time:421s
fi-ivb-3520m     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:476s
fi-ivb-3770      total:285  pass:252  dwarn:0   dfail:0   fail:0   skip:33  time:430s
fi-kbl-7500u     total:285  pass:260  dwarn:1   dfail:0   fail:0   skip:24  time:476s
fi-kbl-7567u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:473s
fi-kbl-r         total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:519s
fi-pnv-d510      total:285  pass:219  dwarn:1   dfail:0   fail:0   skip:65  time:668s
fi-skl-6260u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:449s
fi-skl-6600u     total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:539s
fi-skl-6700k2    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:504s
fi-skl-6770hq    total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:523s
fi-skl-guc       total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:430s
fi-skl-gvtdvm    total:285  pass:262  dwarn:0   dfail:0   fail:0   skip:23  time:446s
fi-snb-2520m     total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:578s
fi-snb-2600      total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:405s
Blacklisted hosts:
fi-cnl-psr       total:285  pass:255  dwarn:3   dfail:0   fail:1   skip:26  time:530s
fi-glk-j4005     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:487s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1198/issues.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for tests/perf_pmu: Avoid RT thread for accuracy test
  2018-03-26 10:57 ` [Intel-gfx] " Tvrtko Ursulin
                   ` (5 preceding siblings ...)
  (?)
@ 2018-03-28  9:22 ` Patchwork
  -1 siblings, 0 replies; 43+ messages in thread
From: Patchwork @ 2018-03-28  9:22 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: igt-dev

== Series Details ==

Series: tests/perf_pmu: Avoid RT thread for accuracy test
URL   : https://patchwork.freedesktop.org/series/40662/
State : success

== Summary ==

IGT patchset tested on top of latest successful build
0a393229479d61829a2f8671c1a46f533c76b385 igt/gem_ctx_isolation: Reset a scratch context

with latest DRM-Tip kernel build CI_DRM_3994
23c67dc0cf31 drm-tip: 2018y-03m-28d-06h-36m-40s UTC integration manifest

No testlist changes.

---- Known issues:

Test gem_mmap_gtt:
        Subgroup basic-small-bo-tiledx:
                pass       -> FAIL       (fi-gdg-551) fdo#102575
Test kms_flip:
        Subgroup basic-flip-vs-wf_vblank:
                pass       -> FAIL       (fi-cfl-s3) fdo#100368

fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368

fi-bdw-5557u     total:285  pass:264  dwarn:0   dfail:0   fail:0   skip:21  time:436s
fi-bdw-gvtdvm    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:452s
fi-blb-e6850     total:285  pass:220  dwarn:1   dfail:0   fail:0   skip:64  time:381s
fi-bsw-n3050     total:285  pass:239  dwarn:0   dfail:0   fail:0   skip:46  time:542s
fi-bwr-2160      total:285  pass:180  dwarn:0   dfail:0   fail:0   skip:105 time:299s
fi-bxt-dsi       total:285  pass:255  dwarn:0   dfail:0   fail:0   skip:30  time:518s
fi-bxt-j4205     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:519s
fi-byt-j1900     total:285  pass:250  dwarn:0   dfail:0   fail:0   skip:35  time:529s
fi-byt-n2820     total:285  pass:246  dwarn:0   dfail:0   fail:0   skip:39  time:513s
fi-cfl-8700k     total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:410s
fi-cfl-s3        total:285  pass:258  dwarn:0   dfail:0   fail:1   skip:26  time:569s
fi-cfl-u         total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:514s
fi-cnl-y3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:592s
fi-elk-e7500     total:285  pass:225  dwarn:1   dfail:0   fail:0   skip:59  time:430s
fi-gdg-551       total:285  pass:176  dwarn:0   dfail:0   fail:1   skip:108 time:323s
fi-glk-1         total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:537s
fi-hsw-4770      total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:406s
fi-ilk-650       total:285  pass:225  dwarn:0   dfail:0   fail:0   skip:60  time:422s
fi-ivb-3520m     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:474s
fi-ivb-3770      total:285  pass:252  dwarn:0   dfail:0   fail:0   skip:33  time:432s
fi-kbl-7500u     total:285  pass:260  dwarn:1   dfail:0   fail:0   skip:24  time:475s
fi-kbl-7567u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:475s
fi-kbl-r         total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:515s
fi-pnv-d510      total:285  pass:219  dwarn:1   dfail:0   fail:0   skip:65  time:665s
fi-skl-6260u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:445s
fi-skl-6600u     total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:532s
fi-skl-6700k2    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:511s
fi-skl-6770hq    total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:505s
fi-skl-guc       total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:435s
fi-skl-gvtdvm    total:285  pass:262  dwarn:0   dfail:0   fail:0   skip:23  time:447s
fi-snb-2520m     total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:586s
Blacklisted hosts:
fi-cnl-psr       total:285  pass:256  dwarn:3   dfail:0   fail:0   skip:26  time:519s
fi-glk-j4005     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:489s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1202/issues.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [igt-dev] ✓ Fi.CI.IGT: success for tests/perf_pmu: Avoid RT thread for accuracy test
  2018-03-26 10:57 ` [Intel-gfx] " Tvrtko Ursulin
                   ` (6 preceding siblings ...)
  (?)
@ 2018-03-28 14:36 ` Patchwork
  2018-03-28 16:56   ` Tvrtko Ursulin
  -1 siblings, 1 reply; 43+ messages in thread
From: Patchwork @ 2018-03-28 14:36 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: igt-dev

== Series Details ==

Series: tests/perf_pmu: Avoid RT thread for accuracy test
URL   : https://patchwork.freedesktop.org/series/40662/
State : success

== Summary ==

---- Known issues:

Test kms_flip:
        Subgroup flip-vs-expired-vblank-interruptible:
                pass       -> FAIL       (shard-hsw) fdo#102887
        Subgroup modeset-vs-vblank-race-interruptible:
                pass       -> FAIL       (shard-hsw) fdo#103060
        Subgroup plain-flip-ts-check-interruptible:
                fail       -> PASS       (shard-hsw) fdo#100368 +1
Test kms_setmode:
        Subgroup basic:
                pass       -> FAIL       (shard-hsw) fdo#99912
Test perf:
        Subgroup blocking:
                pass       -> FAIL       (shard-hsw) fdo#102252

fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912
fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252

shard-apl        total:3495 pass:1831 dwarn:1   dfail:0   fail:7   skip:1655 time:12855s
shard-hsw        total:3495 pass:1780 dwarn:1   dfail:0   fail:4   skip:1709 time:11575s
shard-snb        total:3495 pass:1373 dwarn:1   dfail:0   fail:4   skip:2117 time:6892s
Blacklisted hosts:
shard-kbl        total:3425 pass:1910 dwarn:10  dfail:1   fail:9   skip:1494 time:9421s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1202/shards.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [igt-dev] ✓ Fi.CI.IGT: success for tests/perf_pmu: Avoid RT thread for accuracy test
  2018-03-28 14:36 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
@ 2018-03-28 16:56   ` Tvrtko Ursulin
  2018-03-28 17:10     ` Chris Wilson
  0 siblings, 1 reply; 43+ messages in thread
From: Tvrtko Ursulin @ 2018-03-28 16:56 UTC (permalink / raw)
  To: igt-dev, Patchwork, Tvrtko Ursulin


Solid results from this simplified version so far from two runs. But 
it's impossible to know whether it improves (or worsens) the long term 
results on KBL, which had occasional fails, or GLK, which had more 
frequent fails but has disappeared from the farm?

Regards,

Tvrtko


On 28/03/2018 15:36, Patchwork wrote:
> == Series Details ==
> 
> Series: tests/perf_pmu: Avoid RT thread for accuracy test
> URL   : https://patchwork.freedesktop.org/series/40662/
> State : success
> 
> == Summary ==
> 
> ---- Known issues:
> 
> Test kms_flip:
>          Subgroup flip-vs-expired-vblank-interruptible:
>                  pass       -> FAIL       (shard-hsw) fdo#102887
>          Subgroup modeset-vs-vblank-race-interruptible:
>                  pass       -> FAIL       (shard-hsw) fdo#103060
>          Subgroup plain-flip-ts-check-interruptible:
>                  fail       -> PASS       (shard-hsw) fdo#100368 +1
> Test kms_setmode:
>          Subgroup basic:
>                  pass       -> FAIL       (shard-hsw) fdo#99912
> Test perf:
>          Subgroup blocking:
>                  pass       -> FAIL       (shard-hsw) fdo#102252
> 
> fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
> fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
> fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
> fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912
> fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252
> 
> shard-apl        total:3495 pass:1831 dwarn:1   dfail:0   fail:7   skip:1655 time:12855s
> shard-hsw        total:3495 pass:1780 dwarn:1   dfail:0   fail:4   skip:1709 time:11575s
> shard-snb        total:3495 pass:1373 dwarn:1   dfail:0   fail:4   skip:2117 time:6892s
> Blacklisted hosts:
> shard-kbl        total:3425 pass:1910 dwarn:10  dfail:1   fail:9   skip:1494 time:9421s
> 
> == Logs ==
> 
> For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1202/shards.html
> _______________________________________________
> igt-dev mailing list
> igt-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/igt-dev
> 
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [igt-dev] ✓ Fi.CI.IGT: success for tests/perf_pmu: Avoid RT thread for accuracy test
  2018-03-28 16:56   ` Tvrtko Ursulin
@ 2018-03-28 17:10     ` Chris Wilson
  0 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2018-03-28 17:10 UTC (permalink / raw)
  To: Tvrtko Ursulin, igt-dev, Patchwork, Tvrtko Ursulin

Quoting Tvrtko Ursulin (2018-03-28 17:56:53)
> 
> Solid results from this simplified version so far from two runs. But 
> it's impossible to know whether it improves (or worsens) the long term 
> results on KBL, which had occasional fails, or GLK, which had more 
> frequent fails but has disappeared from the farm?

But I have been noticing that we've been drifting from our target ;)

I'm not opposed, I just grew a little attached to that calibration loop.
I really would like to see if we can just drop RT and keep everything
else :)
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH i-g-t v2] tests/perf_pmu: Avoid RT thread for accuracy test
  2018-03-26 10:57 ` [Intel-gfx] " Tvrtko Ursulin
@ 2018-04-03 12:38   ` Tvrtko Ursulin
  -1 siblings, 0 replies; 43+ messages in thread
From: Tvrtko Ursulin @ 2018-04-03 12:38 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Realtime scheduling interferes with execlists submission (tasklet) so try
to simplify the PWM loop in a few ways:

 * Drop RT.
 * Longer batches for smaller systematic error.
 * More truthful test duration calculation.
 * Less clock queries.
 * No self-adjust - instead just report the achieved cycle and let the
   parent check against it.
 * Report absolute cycle error.

v2:
 * Bring back self-adjust. (Chris Wilson)
   (But slightly fixed version with no overflow.)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/perf_pmu.c | 97 +++++++++++++++++++++++++-------------------------------
 1 file changed, 43 insertions(+), 54 deletions(-)

diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index f27b7ec7d2c2..0cfacd4a8fbe 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -1504,12 +1504,6 @@ test_enable_race(int gem_fd, const struct intel_execution_engine2 *e)
 	gem_quiescent_gpu(gem_fd);
 }
 
-static double __error(double val, double ref)
-{
-	igt_assert(ref > 1e-5 /* smallval */);
-	return (100.0 * val / ref) - 100.0;
-}
-
 static void __rearm_spin_batch(igt_spin_t *spin)
 {
 	const uint32_t mi_arb_chk = 0x5 << 23;
@@ -1532,13 +1526,12 @@ static void
 accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 	 unsigned long target_busy_pct)
 {
-	const unsigned int min_test_loops = 7;
-	const unsigned long min_test_us = 1e6;
-	unsigned long busy_us = 2500;
+	unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
 	unsigned long idle_us = 100 * (busy_us - target_busy_pct *
 				busy_us / 100) / target_busy_pct;
-	unsigned long pwm_calibration_us;
-	unsigned long test_us;
+	const unsigned long min_test_us = 1e6;
+	const unsigned long pwm_calibration_us = min_test_us;
+	const unsigned long test_us = min_test_us;
 	double busy_r, expected;
 	uint64_t val[2];
 	uint64_t ts[2];
@@ -1553,13 +1546,6 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 		idle_us *= 2;
 	}
 
-	pwm_calibration_us = min_test_loops * (busy_us + idle_us);
-	while (pwm_calibration_us < min_test_us)
-		pwm_calibration_us += busy_us + idle_us;
-	test_us = min_test_loops * (idle_us + busy_us);
-	while (test_us < min_test_us)
-		test_us += busy_us + idle_us;
-
 	igt_info("calibration=%lums, test=%lums; ratio=%.2f%% (%luus/%luus)\n",
 		 pwm_calibration_us / 1000, test_us / 1000,
 		 (double)busy_us / (busy_us + idle_us) * 100.0,
@@ -1572,20 +1558,11 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 
 	/* Emit PWM pattern on the engine from a child. */
 	igt_fork(child, 1) {
-		struct sched_param rt = { .sched_priority = 99 };
 		const unsigned long timeout[] = {
 			pwm_calibration_us * 1000, test_us * 1000
 		};
-		uint64_t total_busy_ns = 0, total_idle_ns = 0;
+		uint64_t total_busy_ns = 0, total_ns = 0;
 		igt_spin_t *spin;
-		int ret;
-
-		/* We need the best sleep accuracy we can get. */
-		ret = sched_setscheduler(0,
-					 SCHED_FIFO | SCHED_RESET_ON_FORK,
-					 &rt);
-		if (ret)
-			igt_warn("Failed to set scheduling policy!\n");
 
 		/* Allocate our spin batch and idle it. */
 		spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
@@ -1594,39 +1571,51 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 
 		/* 1st pass is calibration, second pass is the test. */
 		for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
-			uint64_t busy_ns = -total_busy_ns;
-			uint64_t idle_ns = -total_idle_ns;
-			struct timespec test_start = { };
+			unsigned int target_idle_us = idle_us;
+			uint64_t busy_ns = 0, idle_ns = 0;
+			struct timespec start = { };
+			unsigned long pass_ns = 0;
+
+			igt_nsec_elapsed(&start);
 
-			igt_nsec_elapsed(&test_start);
 			do {
-				unsigned int target_idle_us, t_busy;
+				unsigned long loop_ns, loop_busy;
+				struct timespec _ts = { };
+				double err;
+
+				/* PWM idle sleep. */
+				_ts.tv_nsec = target_idle_us * 1000;
+				nanosleep(&_ts, NULL);
 
 				/* Restart the spinbatch. */
 				__rearm_spin_batch(spin);
 				__submit_spin_batch(gem_fd, spin, e, 0);
 
-				/*
-				 * Note that the submission may be delayed to a
-				 * tasklet (ksoftirqd) which cannot run until we
-				 * sleep as we hog the cpu (we are RT).
-				 */
-
-				t_busy = measured_usleep(busy_us);
+				/* PWM busy sleep. */
+				loop_busy = igt_nsec_elapsed(&start);
+				_ts.tv_nsec = busy_us * 1000;
+				nanosleep(&_ts, NULL);
 				igt_spin_batch_end(spin);
-				gem_sync(gem_fd, spin->handle);
-
-				total_busy_ns += t_busy;
-
-				target_idle_us =
-					(100 * total_busy_ns / target_busy_pct - (total_busy_ns + total_idle_ns)) / 1000;
-				total_idle_ns += measured_usleep(target_idle_us);
-			} while (igt_nsec_elapsed(&test_start) < timeout[pass]);
-
-			busy_ns += total_busy_ns;
-			idle_ns += total_idle_ns;
 
-			expected = (double)busy_ns / (busy_ns + idle_ns);
+				/* Time accounting. */
+				loop_ns = igt_nsec_elapsed(&start);
+				loop_busy = loop_ns - loop_busy;
+				loop_ns -= pass_ns;
+
+				busy_ns += loop_busy;
+				total_busy_ns += loop_busy;
+				idle_ns += loop_ns - loop_busy;
+				pass_ns += loop_ns;
+				total_ns += loop_ns;
+
+				/* Re-calibrate. */
+				err = (double)total_busy_ns / total_ns -
+				      (double)target_busy_pct / 100.0;
+				target_idle_us = (double)target_idle_us *
+						 (1.0 + err);
+			} while (pass_ns < timeout[pass]);
+
+			expected = (double)busy_ns / pass_ns;
 			igt_info("%u: busy %"PRIu64"us, idle %"PRIu64"us: %.2f%% (target: %lu%%)\n",
 				 pass, busy_ns / 1000, idle_ns / 1000,
 				 100 * expected, target_busy_pct);
@@ -1655,8 +1644,8 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 
 	busy_r = (double)(val[1] - val[0]) / (ts[1] - ts[0]);
 
-	igt_info("error=%.2f%% (%.2f%% vs %.2f%%)\n",
-		 __error(busy_r, expected), 100 * busy_r, 100 * expected);
+	igt_info("error=%.2f (%.2f%% vs %.2f%%)\n",
+		 (busy_r - expected) * 100, 100 * busy_r, 100 * expected);
 
 	assert_within(100.0 * busy_r, 100.0 * expected, 2);
 }
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [Intel-gfx] [PATCH i-g-t v2] tests/perf_pmu: Avoid RT thread for accuracy test
@ 2018-04-03 12:38   ` Tvrtko Ursulin
  0 siblings, 0 replies; 43+ messages in thread
From: Tvrtko Ursulin @ 2018-04-03 12:38 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Realtime scheduling interferes with execlists submission (tasklet) so try
to simplify the PWM loop in a few ways:

 * Drop RT.
 * Longer batches for smaller systematic error.
 * More truthful test duration calculation.
 * Less clock queries.
 * No self-adjust - instead just report the achieved cycle and let the
   parent check against it.
 * Report absolute cycle error.

v2:
 * Bring back self-adjust. (Chris Wilson)
   (But slightly fixed version with no overflow.)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/perf_pmu.c | 97 +++++++++++++++++++++++++-------------------------------
 1 file changed, 43 insertions(+), 54 deletions(-)

diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index f27b7ec7d2c2..0cfacd4a8fbe 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -1504,12 +1504,6 @@ test_enable_race(int gem_fd, const struct intel_execution_engine2 *e)
 	gem_quiescent_gpu(gem_fd);
 }
 
-static double __error(double val, double ref)
-{
-	igt_assert(ref > 1e-5 /* smallval */);
-	return (100.0 * val / ref) - 100.0;
-}
-
 static void __rearm_spin_batch(igt_spin_t *spin)
 {
 	const uint32_t mi_arb_chk = 0x5 << 23;
@@ -1532,13 +1526,12 @@ static void
 accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 	 unsigned long target_busy_pct)
 {
-	const unsigned int min_test_loops = 7;
-	const unsigned long min_test_us = 1e6;
-	unsigned long busy_us = 2500;
+	unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
 	unsigned long idle_us = 100 * (busy_us - target_busy_pct *
 				busy_us / 100) / target_busy_pct;
-	unsigned long pwm_calibration_us;
-	unsigned long test_us;
+	const unsigned long min_test_us = 1e6;
+	const unsigned long pwm_calibration_us = min_test_us;
+	const unsigned long test_us = min_test_us;
 	double busy_r, expected;
 	uint64_t val[2];
 	uint64_t ts[2];
@@ -1553,13 +1546,6 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 		idle_us *= 2;
 	}
 
-	pwm_calibration_us = min_test_loops * (busy_us + idle_us);
-	while (pwm_calibration_us < min_test_us)
-		pwm_calibration_us += busy_us + idle_us;
-	test_us = min_test_loops * (idle_us + busy_us);
-	while (test_us < min_test_us)
-		test_us += busy_us + idle_us;
-
 	igt_info("calibration=%lums, test=%lums; ratio=%.2f%% (%luus/%luus)\n",
 		 pwm_calibration_us / 1000, test_us / 1000,
 		 (double)busy_us / (busy_us + idle_us) * 100.0,
@@ -1572,20 +1558,11 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 
 	/* Emit PWM pattern on the engine from a child. */
 	igt_fork(child, 1) {
-		struct sched_param rt = { .sched_priority = 99 };
 		const unsigned long timeout[] = {
 			pwm_calibration_us * 1000, test_us * 1000
 		};
-		uint64_t total_busy_ns = 0, total_idle_ns = 0;
+		uint64_t total_busy_ns = 0, total_ns = 0;
 		igt_spin_t *spin;
-		int ret;
-
-		/* We need the best sleep accuracy we can get. */
-		ret = sched_setscheduler(0,
-					 SCHED_FIFO | SCHED_RESET_ON_FORK,
-					 &rt);
-		if (ret)
-			igt_warn("Failed to set scheduling policy!\n");
 
 		/* Allocate our spin batch and idle it. */
 		spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
@@ -1594,39 +1571,51 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 
 		/* 1st pass is calibration, second pass is the test. */
 		for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
-			uint64_t busy_ns = -total_busy_ns;
-			uint64_t idle_ns = -total_idle_ns;
-			struct timespec test_start = { };
+			unsigned int target_idle_us = idle_us;
+			uint64_t busy_ns = 0, idle_ns = 0;
+			struct timespec start = { };
+			unsigned long pass_ns = 0;
+
+			igt_nsec_elapsed(&start);
 
-			igt_nsec_elapsed(&test_start);
 			do {
-				unsigned int target_idle_us, t_busy;
+				unsigned long loop_ns, loop_busy;
+				struct timespec _ts = { };
+				double err;
+
+				/* PWM idle sleep. */
+				_ts.tv_nsec = target_idle_us * 1000;
+				nanosleep(&_ts, NULL);
 
 				/* Restart the spinbatch. */
 				__rearm_spin_batch(spin);
 				__submit_spin_batch(gem_fd, spin, e, 0);
 
-				/*
-				 * Note that the submission may be delayed to a
-				 * tasklet (ksoftirqd) which cannot run until we
-				 * sleep as we hog the cpu (we are RT).
-				 */
-
-				t_busy = measured_usleep(busy_us);
+				/* PWM busy sleep. */
+				loop_busy = igt_nsec_elapsed(&start);
+				_ts.tv_nsec = busy_us * 1000;
+				nanosleep(&_ts, NULL);
 				igt_spin_batch_end(spin);
-				gem_sync(gem_fd, spin->handle);
-
-				total_busy_ns += t_busy;
-
-				target_idle_us =
-					(100 * total_busy_ns / target_busy_pct - (total_busy_ns + total_idle_ns)) / 1000;
-				total_idle_ns += measured_usleep(target_idle_us);
-			} while (igt_nsec_elapsed(&test_start) < timeout[pass]);
-
-			busy_ns += total_busy_ns;
-			idle_ns += total_idle_ns;
 
-			expected = (double)busy_ns / (busy_ns + idle_ns);
+				/* Time accounting. */
+				loop_ns = igt_nsec_elapsed(&start);
+				loop_busy = loop_ns - loop_busy;
+				loop_ns -= pass_ns;
+
+				busy_ns += loop_busy;
+				total_busy_ns += loop_busy;
+				idle_ns += loop_ns - loop_busy;
+				pass_ns += loop_ns;
+				total_ns += loop_ns;
+
+				/* Re-calibrate. */
+				err = (double)total_busy_ns / total_ns -
+				      (double)target_busy_pct / 100.0;
+				target_idle_us = (double)target_idle_us *
+						 (1.0 + err);
+			} while (pass_ns < timeout[pass]);
+
+			expected = (double)busy_ns / pass_ns;
 			igt_info("%u: busy %"PRIu64"us, idle %"PRIu64"us: %.2f%% (target: %lu%%)\n",
 				 pass, busy_ns / 1000, idle_ns / 1000,
 				 100 * expected, target_busy_pct);
@@ -1655,8 +1644,8 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 
 	busy_r = (double)(val[1] - val[0]) / (ts[1] - ts[0]);
 
-	igt_info("error=%.2f%% (%.2f%% vs %.2f%%)\n",
-		 __error(busy_r, expected), 100 * busy_r, 100 * expected);
+	igt_info("error=%.2f (%.2f%% vs %.2f%%)\n",
+		 (busy_r - expected) * 100, 100 * busy_r, 100 * expected);
 
 	assert_within(100.0 * busy_r, 100.0 * expected, 2);
 }
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* Re: [PATCH i-g-t v2] tests/perf_pmu: Avoid RT thread for accuracy test
  2018-04-03 12:38   ` [Intel-gfx] " Tvrtko Ursulin
@ 2018-04-03 13:10     ` Chris Wilson
  -1 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2018-04-03 13:10 UTC (permalink / raw)
  To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx

Quoting Tvrtko Ursulin (2018-04-03 13:38:25)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Realtime scheduling interferes with execlists submission (tasklet) so try
> to simplify the PWM loop in a few ways:
> 
>  * Drop RT.
>  * Longer batches for smaller systematic error.
>  * More truthful test duration calculation.
>  * Less clock queries.
>  * No self-adjust - instead just report the achieved cycle and let the
>    parent check against it.
>  * Report absolute cycle error.
> 
> v2:
>  * Bring back self-adjust. (Chris Wilson)
>    (But slightly fixed version with no overflow.)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  tests/perf_pmu.c | 97 +++++++++++++++++++++++++-------------------------------
>  1 file changed, 43 insertions(+), 54 deletions(-)
> 
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> index f27b7ec7d2c2..0cfacd4a8fbe 100644
> --- a/tests/perf_pmu.c
> +++ b/tests/perf_pmu.c
> @@ -1504,12 +1504,6 @@ test_enable_race(int gem_fd, const struct intel_execution_engine2 *e)
>         gem_quiescent_gpu(gem_fd);
>  }
>  
> -static double __error(double val, double ref)
> -{
> -       igt_assert(ref > 1e-5 /* smallval */);
> -       return (100.0 * val / ref) - 100.0;
> -}
> -
>  static void __rearm_spin_batch(igt_spin_t *spin)
>  {
>         const uint32_t mi_arb_chk = 0x5 << 23;
> @@ -1532,13 +1526,12 @@ static void
>  accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>          unsigned long target_busy_pct)
>  {
> -       const unsigned int min_test_loops = 7;
> -       const unsigned long min_test_us = 1e6;
> -       unsigned long busy_us = 2500;
> +       unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
>         unsigned long idle_us = 100 * (busy_us - target_busy_pct *
>                                 busy_us / 100) / target_busy_pct;
> -       unsigned long pwm_calibration_us;
> -       unsigned long test_us;
> +       const unsigned long min_test_us = 1e6;
> +       const unsigned long pwm_calibration_us = min_test_us;
> +       const unsigned long test_us = min_test_us;
>         double busy_r, expected;
>         uint64_t val[2];
>         uint64_t ts[2];
> @@ -1553,13 +1546,6 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>                 idle_us *= 2;
>         }
>  
> -       pwm_calibration_us = min_test_loops * (busy_us + idle_us);
> -       while (pwm_calibration_us < min_test_us)
> -               pwm_calibration_us += busy_us + idle_us;
> -       test_us = min_test_loops * (idle_us + busy_us);
> -       while (test_us < min_test_us)
> -               test_us += busy_us + idle_us;
> -
>         igt_info("calibration=%lums, test=%lums; ratio=%.2f%% (%luus/%luus)\n",
>                  pwm_calibration_us / 1000, test_us / 1000,
>                  (double)busy_us / (busy_us + idle_us) * 100.0,
> @@ -1572,20 +1558,11 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>  
>         /* Emit PWM pattern on the engine from a child. */
>         igt_fork(child, 1) {
> -               struct sched_param rt = { .sched_priority = 99 };
>                 const unsigned long timeout[] = {
>                         pwm_calibration_us * 1000, test_us * 1000
>                 };
> -               uint64_t total_busy_ns = 0, total_idle_ns = 0;
> +               uint64_t total_busy_ns = 0, total_ns = 0;
>                 igt_spin_t *spin;
> -               int ret;
> -
> -               /* We need the best sleep accuracy we can get. */
> -               ret = sched_setscheduler(0,
> -                                        SCHED_FIFO | SCHED_RESET_ON_FORK,
> -                                        &rt);
> -               if (ret)
> -                       igt_warn("Failed to set scheduling policy!\n");
>  
>                 /* Allocate our spin batch and idle it. */
>                 spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
> @@ -1594,39 +1571,51 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>  
>                 /* 1st pass is calibration, second pass is the test. */
>                 for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
> -                       uint64_t busy_ns = -total_busy_ns;
> -                       uint64_t idle_ns = -total_idle_ns;
> -                       struct timespec test_start = { };
> +                       unsigned int target_idle_us = idle_us;
> +                       uint64_t busy_ns = 0, idle_ns = 0;
> +                       struct timespec start = { };
> +                       unsigned long pass_ns = 0;
> +
> +                       igt_nsec_elapsed(&start);
>  
> -                       igt_nsec_elapsed(&test_start);
>                         do {
> -                               unsigned int target_idle_us, t_busy;
> +                               unsigned long loop_ns, loop_busy;
> +                               struct timespec _ts = { };
> +                               double err;
> +
> +                               /* PWM idle sleep. */
> +                               _ts.tv_nsec = target_idle_us * 1000;
> +                               nanosleep(&_ts, NULL);
>  
>                                 /* Restart the spinbatch. */
>                                 __rearm_spin_batch(spin);
>                                 __submit_spin_batch(gem_fd, spin, e, 0);
>  
> -                               /*
> -                                * Note that the submission may be delayed to a
> -                                * tasklet (ksoftirqd) which cannot run until we
> -                                * sleep as we hog the cpu (we are RT).
> -                                */
> -
> -                               t_busy = measured_usleep(busy_us);
> +                               /* PWM busy sleep. */
> +                               loop_busy = igt_nsec_elapsed(&start);
> +                               _ts.tv_nsec = busy_us * 1000;
> +                               nanosleep(&_ts, NULL);
>                                 igt_spin_batch_end(spin);
> -                               gem_sync(gem_fd, spin->handle);
> -
> -                               total_busy_ns += t_busy;
> -
> -                               target_idle_us =
> -                                       (100 * total_busy_ns / target_busy_pct - (total_busy_ns + total_idle_ns)) / 1000;
> -                               total_idle_ns += measured_usleep(target_idle_us);
> -                       } while (igt_nsec_elapsed(&test_start) < timeout[pass]);
> -
> -                       busy_ns += total_busy_ns;
> -                       idle_ns += total_idle_ns;
>  
> -                       expected = (double)busy_ns / (busy_ns + idle_ns);
> +                               /* Time accounting. */
> +                               loop_ns = igt_nsec_elapsed(&start);
> +                               loop_busy = loop_ns - loop_busy;
> +                               loop_ns -= pass_ns;
> +
> +                               busy_ns += loop_busy;
> +                               total_busy_ns += loop_busy;
> +                               idle_ns += loop_ns - loop_busy;
> +                               pass_ns += loop_ns;
> +                               total_ns += loop_ns;
> +
> +                               /* Re-calibrate. */
> +                               err = (double)total_busy_ns / total_ns -
> +                                     (double)target_busy_pct / 100.0;
> +                               target_idle_us = (double)target_idle_us *
> +                                                (1.0 + err);

Previously the question we answered was how long should I sleep for the
busy:idle ratio to hit the target.

expected_total_ns = 100.0 * total_busy_ns / target_busy_pct;
target_idle_us = (expected_total_ns - current_total_ns) / 1000;

	unsigned long loop_ns, loop_busy;
	struct timespec _ts = { };
	double err;

	/* PWM idle sleep. */
	_ts.tv_nsec = target_idle_us * 1000;
	nanosleep(&_ts, NULL);

Assuming no >1s sleeps.
(Ok, so the sleep after recalc is still here.)

	/* Restart the spinbatch. */
	__rearm_spin_batch(spin);
	__submit_spin_batch(gem_fd, spin, e, 0);
  
	/* PWM busy sleep. */
	loop_busy = igt_nsec_elapsed(&start);
	_ts.tv_nsec = busy_us * 1000;
	nanosleep(&_ts, NULL);
	igt_spin_batch_end(spin);

	/* Time accounting. */
	loop_ns = igt_nsec_elapsed(&start);
	loop_busy = loop_ns - loop_busy;
	loop_ns -= pass_ns;

So pass_ns is time from start of calibration, loop_ns is time for this
loop.

	busy_ns += loop_busy;
	total_busy_ns += loop_busy;

busy_ns will be calibration pass, total all passes?

	idle_ns += loop_ns - loop_busy;

And idle is the residual between the time up to this point, and what has
been busy.

	pass_ns += loop_ns;
	total_ns += loop_ns;

	/* Re-calibrate. */
	err = (double)total_busy_ns / total_ns -
	      (double)target_busy_pct / 100.0;

Hmm, I thought you didn't like the run on calculations, and wanted to
reset between passes? (Have I got total_busy_ns and busy_ns confused?)

	target_idle_us = (double)target_idle_us * (1.0 + err);

Ok, I'm tired, but... So, if busy is 10% larger than expected, sleep 10%
longer to try and compensate, would be the gist.

And this is because you always sleep and spin together and so cannot
just sleep to compensate for the earlier inaccuracy. Which means we
never truly try to correct the error in the same pass, but apply a
correction factor for the next.

To me it seems like the closed system with each loop being "spin then
adjusted sleep" will autocorrect and more likely to finish correct (as
we are less reliant on the next loop for the accuracy). It's pretty much
immaterial, as we expect the pmu to match the measurements (and not our
expectations), but I find the one pass does all much simpler to follow.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [igt-dev] [Intel-gfx] [PATCH i-g-t v2] tests/perf_pmu: Avoid RT thread for accuracy test
@ 2018-04-03 13:10     ` Chris Wilson
  0 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2018-04-03 13:10 UTC (permalink / raw)
  To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx

Quoting Tvrtko Ursulin (2018-04-03 13:38:25)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Realtime scheduling interferes with execlists submission (tasklet) so try
> to simplify the PWM loop in a few ways:
> 
>  * Drop RT.
>  * Longer batches for smaller systematic error.
>  * More truthful test duration calculation.
>  * Less clock queries.
>  * No self-adjust - instead just report the achieved cycle and let the
>    parent check against it.
>  * Report absolute cycle error.
> 
> v2:
>  * Bring back self-adjust. (Chris Wilson)
>    (But slightly fixed version with no overflow.)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  tests/perf_pmu.c | 97 +++++++++++++++++++++++++-------------------------------
>  1 file changed, 43 insertions(+), 54 deletions(-)
> 
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> index f27b7ec7d2c2..0cfacd4a8fbe 100644
> --- a/tests/perf_pmu.c
> +++ b/tests/perf_pmu.c
> @@ -1504,12 +1504,6 @@ test_enable_race(int gem_fd, const struct intel_execution_engine2 *e)
>         gem_quiescent_gpu(gem_fd);
>  }
>  
> -static double __error(double val, double ref)
> -{
> -       igt_assert(ref > 1e-5 /* smallval */);
> -       return (100.0 * val / ref) - 100.0;
> -}
> -
>  static void __rearm_spin_batch(igt_spin_t *spin)
>  {
>         const uint32_t mi_arb_chk = 0x5 << 23;
> @@ -1532,13 +1526,12 @@ static void
>  accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>          unsigned long target_busy_pct)
>  {
> -       const unsigned int min_test_loops = 7;
> -       const unsigned long min_test_us = 1e6;
> -       unsigned long busy_us = 2500;
> +       unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
>         unsigned long idle_us = 100 * (busy_us - target_busy_pct *
>                                 busy_us / 100) / target_busy_pct;
> -       unsigned long pwm_calibration_us;
> -       unsigned long test_us;
> +       const unsigned long min_test_us = 1e6;
> +       const unsigned long pwm_calibration_us = min_test_us;
> +       const unsigned long test_us = min_test_us;
>         double busy_r, expected;
>         uint64_t val[2];
>         uint64_t ts[2];
> @@ -1553,13 +1546,6 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>                 idle_us *= 2;
>         }
>  
> -       pwm_calibration_us = min_test_loops * (busy_us + idle_us);
> -       while (pwm_calibration_us < min_test_us)
> -               pwm_calibration_us += busy_us + idle_us;
> -       test_us = min_test_loops * (idle_us + busy_us);
> -       while (test_us < min_test_us)
> -               test_us += busy_us + idle_us;
> -
>         igt_info("calibration=%lums, test=%lums; ratio=%.2f%% (%luus/%luus)\n",
>                  pwm_calibration_us / 1000, test_us / 1000,
>                  (double)busy_us / (busy_us + idle_us) * 100.0,
> @@ -1572,20 +1558,11 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>  
>         /* Emit PWM pattern on the engine from a child. */
>         igt_fork(child, 1) {
> -               struct sched_param rt = { .sched_priority = 99 };
>                 const unsigned long timeout[] = {
>                         pwm_calibration_us * 1000, test_us * 1000
>                 };
> -               uint64_t total_busy_ns = 0, total_idle_ns = 0;
> +               uint64_t total_busy_ns = 0, total_ns = 0;
>                 igt_spin_t *spin;
> -               int ret;
> -
> -               /* We need the best sleep accuracy we can get. */
> -               ret = sched_setscheduler(0,
> -                                        SCHED_FIFO | SCHED_RESET_ON_FORK,
> -                                        &rt);
> -               if (ret)
> -                       igt_warn("Failed to set scheduling policy!\n");
>  
>                 /* Allocate our spin batch and idle it. */
>                 spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
> @@ -1594,39 +1571,51 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>  
>                 /* 1st pass is calibration, second pass is the test. */
>                 for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
> -                       uint64_t busy_ns = -total_busy_ns;
> -                       uint64_t idle_ns = -total_idle_ns;
> -                       struct timespec test_start = { };
> +                       unsigned int target_idle_us = idle_us;
> +                       uint64_t busy_ns = 0, idle_ns = 0;
> +                       struct timespec start = { };
> +                       unsigned long pass_ns = 0;
> +
> +                       igt_nsec_elapsed(&start);
>  
> -                       igt_nsec_elapsed(&test_start);
>                         do {
> -                               unsigned int target_idle_us, t_busy;
> +                               unsigned long loop_ns, loop_busy;
> +                               struct timespec _ts = { };
> +                               double err;
> +
> +                               /* PWM idle sleep. */
> +                               _ts.tv_nsec = target_idle_us * 1000;
> +                               nanosleep(&_ts, NULL);
>  
>                                 /* Restart the spinbatch. */
>                                 __rearm_spin_batch(spin);
>                                 __submit_spin_batch(gem_fd, spin, e, 0);
>  
> -                               /*
> -                                * Note that the submission may be delayed to a
> -                                * tasklet (ksoftirqd) which cannot run until we
> -                                * sleep as we hog the cpu (we are RT).
> -                                */
> -
> -                               t_busy = measured_usleep(busy_us);
> +                               /* PWM busy sleep. */
> +                               loop_busy = igt_nsec_elapsed(&start);
> +                               _ts.tv_nsec = busy_us * 1000;
> +                               nanosleep(&_ts, NULL);
>                                 igt_spin_batch_end(spin);
> -                               gem_sync(gem_fd, spin->handle);
> -
> -                               total_busy_ns += t_busy;
> -
> -                               target_idle_us =
> -                                       (100 * total_busy_ns / target_busy_pct - (total_busy_ns + total_idle_ns)) / 1000;
> -                               total_idle_ns += measured_usleep(target_idle_us);
> -                       } while (igt_nsec_elapsed(&test_start) < timeout[pass]);
> -
> -                       busy_ns += total_busy_ns;
> -                       idle_ns += total_idle_ns;
>  
> -                       expected = (double)busy_ns / (busy_ns + idle_ns);
> +                               /* Time accounting. */
> +                               loop_ns = igt_nsec_elapsed(&start);
> +                               loop_busy = loop_ns - loop_busy;
> +                               loop_ns -= pass_ns;
> +
> +                               busy_ns += loop_busy;
> +                               total_busy_ns += loop_busy;
> +                               idle_ns += loop_ns - loop_busy;
> +                               pass_ns += loop_ns;
> +                               total_ns += loop_ns;
> +
> +                               /* Re-calibrate. */
> +                               err = (double)total_busy_ns / total_ns -
> +                                     (double)target_busy_pct / 100.0;
> +                               target_idle_us = (double)target_idle_us *
> +                                                (1.0 + err);

Previously the question we answered was how long should I sleep for the
busy:idle ratio to hit the target.

expected_total_ns = 100.0 * total_busy_ns / target_busy_pct;
target_idle_us = (expected_total_ns - current_total_ns) / 1000;

	unsigned long loop_ns, loop_busy;
	struct timespec _ts = { };
	double err;

	/* PWM idle sleep. */
	_ts.tv_nsec = target_idle_us * 1000;
	nanosleep(&_ts, NULL);

Assuming no >1s sleeps.
(Ok, so the sleep after recalc is still here.)

	/* Restart the spinbatch. */
	__rearm_spin_batch(spin);
	__submit_spin_batch(gem_fd, spin, e, 0);
  
	/* PWM busy sleep. */
	loop_busy = igt_nsec_elapsed(&start);
	_ts.tv_nsec = busy_us * 1000;
	nanosleep(&_ts, NULL);
	igt_spin_batch_end(spin);

	/* Time accounting. */
	loop_ns = igt_nsec_elapsed(&start);
	loop_busy = loop_ns - loop_busy;
	loop_ns -= pass_ns;

So pass_ns is time from start of calibration, loop_ns is time for this
loop.

	busy_ns += loop_busy;
	total_busy_ns += loop_busy;

busy_ns will be calibration pass, total all passes?

	idle_ns += loop_ns - loop_busy;

And idle is the residual between the time up to this point, and what has
been busy.

	pass_ns += loop_ns;
	total_ns += loop_ns;

	/* Re-calibrate. */
	err = (double)total_busy_ns / total_ns -
	      (double)target_busy_pct / 100.0;

Hmm, I thought you didn't like the run on calculations, and wanted to
reset between passes? (Have I got total_busy_ns and busy_ns confused?)

	target_idle_us = (double)target_idle_us * (1.0 + err);

Ok, I'm tired, but... So, if busy is 10% larger than expected, sleep 10%
longer to try and compensate, would be the gist.

And this is because you always sleep and spin together and so cannot
just sleep to compensate for the earlier inaccuracy. Which means we
never truly try to correct the error in the same pass, but apply a
correction factor for the next.

To me it seems like the closed system with each loop being "spin then
adjusted sleep" will autocorrect and more likely to finish correct (as
we are less reliant on the next loop for the accuracy). It's pretty much
immaterial, as we expect the pmu to match the measurements (and not our
expectations), but I find the one pass does all much simpler to follow.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for tests/perf_pmu: Avoid RT thread for accuracy test (rev2)
  2018-03-26 10:57 ` [Intel-gfx] " Tvrtko Ursulin
                   ` (8 preceding siblings ...)
  (?)
@ 2018-04-03 14:23 ` Patchwork
  -1 siblings, 0 replies; 43+ messages in thread
From: Patchwork @ 2018-04-03 14:23 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: igt-dev

== Series Details ==

Series: tests/perf_pmu: Avoid RT thread for accuracy test (rev2)
URL   : https://patchwork.freedesktop.org/series/40662/
State : success

== Summary ==

IGT patchset tested on top of latest successful build
da00bf83aba3b516922efa1f338381189461aa4a tests/kms_plane_scaling: fb height to be min 16 for NV12

with latest DRM-Tip kernel build CI_DRM_4017
29940f138482 drm-tip: 2018y-04m-03d-13h-23m-36s UTC integration manifest

No testlist changes.

---- Known issues:

Test kms_pipe_crc_basic:
        Subgroup hang-read-crc-pipe-c:
                fail       -> PASS       (fi-skl-6700k2) fdo#103191
        Subgroup suspend-read-crc-pipe-b:
                pass       -> INCOMPLETE (fi-snb-2520m) fdo#103713

fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713

fi-bdw-5557u     total:285  pass:264  dwarn:0   dfail:0   fail:0   skip:21  time:432s
fi-bdw-gvtdvm    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:440s
fi-blb-e6850     total:285  pass:220  dwarn:1   dfail:0   fail:0   skip:64  time:382s
fi-bsw-n3050     total:285  pass:239  dwarn:0   dfail:0   fail:0   skip:46  time:543s
fi-bwr-2160      total:285  pass:180  dwarn:0   dfail:0   fail:0   skip:105 time:298s
fi-bxt-dsi       total:285  pass:255  dwarn:0   dfail:0   fail:0   skip:30  time:520s
fi-bxt-j4205     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:515s
fi-byt-j1900     total:285  pass:250  dwarn:0   dfail:0   fail:0   skip:35  time:524s
fi-byt-n2820     total:285  pass:246  dwarn:0   dfail:0   fail:0   skip:39  time:512s
fi-cfl-8700k     total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:411s
fi-cfl-s3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:561s
fi-cfl-u         total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:511s
fi-cnl-y3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:587s
fi-elk-e7500     total:285  pass:225  dwarn:1   dfail:0   fail:0   skip:59  time:429s
fi-gdg-551       total:285  pass:176  dwarn:0   dfail:0   fail:1   skip:108 time:316s
fi-glk-1         total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:539s
fi-hsw-4770      total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:406s
fi-ilk-650       total:285  pass:225  dwarn:0   dfail:0   fail:0   skip:60  time:424s
fi-ivb-3520m     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:459s
fi-ivb-3770      total:285  pass:252  dwarn:0   dfail:0   fail:0   skip:33  time:434s
fi-kbl-7500u     total:285  pass:260  dwarn:1   dfail:0   fail:0   skip:24  time:474s
fi-kbl-7567u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:468s
fi-kbl-r         total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:510s
fi-pnv-d510      total:285  pass:219  dwarn:1   dfail:0   fail:0   skip:65  time:668s
fi-skl-6260u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:446s
fi-skl-6600u     total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:535s
fi-skl-6700k2    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:504s
fi-skl-6770hq    total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:505s
fi-skl-guc       total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:430s
fi-skl-gvtdvm    total:285  pass:262  dwarn:0   dfail:0   fail:0   skip:23  time:450s
fi-snb-2520m     total:242  pass:208  dwarn:0   dfail:0   fail:0   skip:33 
fi-snb-2600      total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:399s
Blacklisted hosts:
fi-cnl-psr       total:285  pass:256  dwarn:3   dfail:0   fail:0   skip:26  time:529s
fi-glk-j4005     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:489s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1218/issues.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH i-g-t v2] tests/perf_pmu: Avoid RT thread for accuracy test
  2018-04-03 13:10     ` [igt-dev] [Intel-gfx] " Chris Wilson
@ 2018-04-03 16:09       ` Tvrtko Ursulin
  -1 siblings, 0 replies; 43+ messages in thread
From: Tvrtko Ursulin @ 2018-04-03 16:09 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx


On 03/04/2018 14:10, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-04-03 13:38:25)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Realtime scheduling interferes with execlists submission (tasklet) so try
>> to simplify the PWM loop in a few ways:
>>
>>   * Drop RT.
>>   * Longer batches for smaller systematic error.
>>   * More truthful test duration calculation.
>>   * Less clock queries.
>>   * No self-adjust - instead just report the achieved cycle and let the
>>     parent check against it.
>>   * Report absolute cycle error.
>>
>> v2:
>>   * Bring back self-adjust. (Chris Wilson)
>>     (But slightly fixed version with no overflow.)
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   tests/perf_pmu.c | 97 +++++++++++++++++++++++++-------------------------------
>>   1 file changed, 43 insertions(+), 54 deletions(-)
>>
>> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
>> index f27b7ec7d2c2..0cfacd4a8fbe 100644
>> --- a/tests/perf_pmu.c
>> +++ b/tests/perf_pmu.c
>> @@ -1504,12 +1504,6 @@ test_enable_race(int gem_fd, const struct intel_execution_engine2 *e)
>>          gem_quiescent_gpu(gem_fd);
>>   }
>>   
>> -static double __error(double val, double ref)
>> -{
>> -       igt_assert(ref > 1e-5 /* smallval */);
>> -       return (100.0 * val / ref) - 100.0;
>> -}
>> -
>>   static void __rearm_spin_batch(igt_spin_t *spin)
>>   {
>>          const uint32_t mi_arb_chk = 0x5 << 23;
>> @@ -1532,13 +1526,12 @@ static void
>>   accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>>           unsigned long target_busy_pct)
>>   {
>> -       const unsigned int min_test_loops = 7;
>> -       const unsigned long min_test_us = 1e6;
>> -       unsigned long busy_us = 2500;
>> +       unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
>>          unsigned long idle_us = 100 * (busy_us - target_busy_pct *
>>                                  busy_us / 100) / target_busy_pct;
>> -       unsigned long pwm_calibration_us;
>> -       unsigned long test_us;
>> +       const unsigned long min_test_us = 1e6;
>> +       const unsigned long pwm_calibration_us = min_test_us;
>> +       const unsigned long test_us = min_test_us;
>>          double busy_r, expected;
>>          uint64_t val[2];
>>          uint64_t ts[2];
>> @@ -1553,13 +1546,6 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>>                  idle_us *= 2;
>>          }
>>   
>> -       pwm_calibration_us = min_test_loops * (busy_us + idle_us);
>> -       while (pwm_calibration_us < min_test_us)
>> -               pwm_calibration_us += busy_us + idle_us;
>> -       test_us = min_test_loops * (idle_us + busy_us);
>> -       while (test_us < min_test_us)
>> -               test_us += busy_us + idle_us;
>> -
>>          igt_info("calibration=%lums, test=%lums; ratio=%.2f%% (%luus/%luus)\n",
>>                   pwm_calibration_us / 1000, test_us / 1000,
>>                   (double)busy_us / (busy_us + idle_us) * 100.0,
>> @@ -1572,20 +1558,11 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>>   
>>          /* Emit PWM pattern on the engine from a child. */
>>          igt_fork(child, 1) {
>> -               struct sched_param rt = { .sched_priority = 99 };
>>                  const unsigned long timeout[] = {
>>                          pwm_calibration_us * 1000, test_us * 1000
>>                  };
>> -               uint64_t total_busy_ns = 0, total_idle_ns = 0;
>> +               uint64_t total_busy_ns = 0, total_ns = 0;
>>                  igt_spin_t *spin;
>> -               int ret;
>> -
>> -               /* We need the best sleep accuracy we can get. */
>> -               ret = sched_setscheduler(0,
>> -                                        SCHED_FIFO | SCHED_RESET_ON_FORK,
>> -                                        &rt);
>> -               if (ret)
>> -                       igt_warn("Failed to set scheduling policy!\n");
>>   
>>                  /* Allocate our spin batch and idle it. */
>>                  spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
>> @@ -1594,39 +1571,51 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>>   
>>                  /* 1st pass is calibration, second pass is the test. */
>>                  for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
>> -                       uint64_t busy_ns = -total_busy_ns;
>> -                       uint64_t idle_ns = -total_idle_ns;
>> -                       struct timespec test_start = { };
>> +                       unsigned int target_idle_us = idle_us;
>> +                       uint64_t busy_ns = 0, idle_ns = 0;
>> +                       struct timespec start = { };
>> +                       unsigned long pass_ns = 0;
>> +
>> +                       igt_nsec_elapsed(&start);
>>   
>> -                       igt_nsec_elapsed(&test_start);
>>                          do {
>> -                               unsigned int target_idle_us, t_busy;
>> +                               unsigned long loop_ns, loop_busy;
>> +                               struct timespec _ts = { };
>> +                               double err;
>> +
>> +                               /* PWM idle sleep. */
>> +                               _ts.tv_nsec = target_idle_us * 1000;
>> +                               nanosleep(&_ts, NULL);
>>   
>>                                  /* Restart the spinbatch. */
>>                                  __rearm_spin_batch(spin);
>>                                  __submit_spin_batch(gem_fd, spin, e, 0);
>>   
>> -                               /*
>> -                                * Note that the submission may be delayed to a
>> -                                * tasklet (ksoftirqd) which cannot run until we
>> -                                * sleep as we hog the cpu (we are RT).
>> -                                */
>> -
>> -                               t_busy = measured_usleep(busy_us);
>> +                               /* PWM busy sleep. */
>> +                               loop_busy = igt_nsec_elapsed(&start);
>> +                               _ts.tv_nsec = busy_us * 1000;
>> +                               nanosleep(&_ts, NULL);
>>                                  igt_spin_batch_end(spin);
>> -                               gem_sync(gem_fd, spin->handle);
>> -
>> -                               total_busy_ns += t_busy;
>> -
>> -                               target_idle_us =
>> -                                       (100 * total_busy_ns / target_busy_pct - (total_busy_ns + total_idle_ns)) / 1000;
>> -                               total_idle_ns += measured_usleep(target_idle_us);
>> -                       } while (igt_nsec_elapsed(&test_start) < timeout[pass]);
>> -
>> -                       busy_ns += total_busy_ns;
>> -                       idle_ns += total_idle_ns;
>>   
>> -                       expected = (double)busy_ns / (busy_ns + idle_ns);
>> +                               /* Time accounting. */
>> +                               loop_ns = igt_nsec_elapsed(&start);
>> +                               loop_busy = loop_ns - loop_busy;
>> +                               loop_ns -= pass_ns;
>> +
>> +                               busy_ns += loop_busy;
>> +                               total_busy_ns += loop_busy;
>> +                               idle_ns += loop_ns - loop_busy;
>> +                               pass_ns += loop_ns;
>> +                               total_ns += loop_ns;
>> +
>> +                               /* Re-calibrate. */
>> +                               err = (double)total_busy_ns / total_ns -
>> +                                     (double)target_busy_pct / 100.0;
>> +                               target_idle_us = (double)target_idle_us *
>> +                                                (1.0 + err);
> 
> Previously the question we answered was how long should I sleep for the
> busy:idle ratio to hit the target.
> 
> expected_total_ns = 100.0 * total_busy_ns / target_busy_pct;
> target_idle_us = (expected_total_ns - current_total_ns) / 1000;

Yes, and the overflow (or underflow, depending how you look at it) was 
here. Usually in the first loop iteration for me, when expected_total_ns 
is smaller than current_total_ns.

But mostly I think this should have a minor effect, unless some systems 
can hit it more often.

> 
> 	unsigned long loop_ns, loop_busy;
> 	struct timespec _ts = { };
> 	double err;
> 
> 	/* PWM idle sleep. */
> 	_ts.tv_nsec = target_idle_us * 1000;
> 	nanosleep(&_ts, NULL);
> 
> Assuming no >1s sleeps.
> (Ok, so the sleep after recalc is still here.)
> 
> 	/* Restart the spinbatch. */
> 	__rearm_spin_batch(spin);
> 	__submit_spin_batch(gem_fd, spin, e, 0);
>    
> 	/* PWM busy sleep. */
> 	loop_busy = igt_nsec_elapsed(&start);
> 	_ts.tv_nsec = busy_us * 1000;
> 	nanosleep(&_ts, NULL);
> 	igt_spin_batch_end(spin);
> 
> 	/* Time accounting. */
> 	loop_ns = igt_nsec_elapsed(&start);
> 	loop_busy = loop_ns - loop_busy;
> 	loop_ns -= pass_ns;
> 
> So pass_ns is time from start of calibration, loop_ns is time for this
> loop.
> 
> 	busy_ns += loop_busy;
> 	total_busy_ns += loop_busy;
> 
> busy_ns will be calibration pass, total all passes?

busy_ns/idle_ns are the current pass. There is also total_busy/idle_ns 
at one level up, which are the totals.

> 
> 	idle_ns += loop_ns - loop_busy;
> 
> And idle is the residual between the time up to this point, and what has
> been busy.

Yes, I wanted to simplify and have reduced it to two clock queries per 
loop only. It maybe isn't the easiest to follow. :I

> 	pass_ns += loop_ns;
> 	total_ns += loop_ns;
> 
> 	/* Re-calibrate. */
> 	err = (double)total_busy_ns / total_ns -
> 	      (double)target_busy_pct / 100.0;
> 
> Hmm, I thought you didn't like the run on calculations, and wanted to
> reset between passes? (Have I got total_busy_ns and busy_ns confused?)

No, I did not like the aggregate in igt_info only. For calibration total 
times are better I think.

With an exception that the "expected" ratio, as reported to the parent, 
is based on the 2nd pass. That is so the error of the first pass, the 
initial and hopefully all calibration that is needed, is not included in 
the value we assert against, since it is not the one parent will sample 
PMU busyness either.

> 	target_idle_us = (double)target_idle_us * (1.0 + err);
> 
> Ok, I'm tired, but... So, if busy is 10% larger than expected, sleep 10%
> longer to try and compensate, would be the gist.

Correct.

> And this is because you always sleep and spin together and so cannot
> just sleep to compensate for the earlier inaccuracy. Which means we
> never truly try to correct the error in the same pass, but apply a
> correction factor for the next.

Correct.
> To me it seems like the closed system with each loop being "spin then
> adjusted sleep" will autocorrect and more likely to finish correct (as
> we are less reliant on the next loop for the accuracy). It's pretty much
> immaterial, as we expect the pmu to match the measurements (and not our
> expectations), but I find the one pass does all much simpler to follow.

Since we do a good number of loops, and hope the calibration will 
converge quickly (which it does for me), I don't see that there is an 
issue there.

With this loop it, for me locally, consistently underestimates by ~+0.03 
- +0.04% for 2% and 98% tests, and ~+0.20 - +0.60%. In am not so happy 
with the fact error seems systematic (seems to be that each loop adds ~ 
+0.008 - +0.012% of error) - but don't have an idea on how to improve it 
further.

More of a problem will be if this still doesn't work that great on the 
CI like for instance if there latencies will be more random. IGT_TRACE 
equivalent to GEM_TRACE and dump the calibration passes comes to mind. :)

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [Intel-gfx] [PATCH i-g-t v2] tests/perf_pmu: Avoid RT thread for accuracy test
@ 2018-04-03 16:09       ` Tvrtko Ursulin
  0 siblings, 0 replies; 43+ messages in thread
From: Tvrtko Ursulin @ 2018-04-03 16:09 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx


On 03/04/2018 14:10, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-04-03 13:38:25)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Realtime scheduling interferes with execlists submission (tasklet) so try
>> to simplify the PWM loop in a few ways:
>>
>>   * Drop RT.
>>   * Longer batches for smaller systematic error.
>>   * More truthful test duration calculation.
>>   * Less clock queries.
>>   * No self-adjust - instead just report the achieved cycle and let the
>>     parent check against it.
>>   * Report absolute cycle error.
>>
>> v2:
>>   * Bring back self-adjust. (Chris Wilson)
>>     (But slightly fixed version with no overflow.)
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   tests/perf_pmu.c | 97 +++++++++++++++++++++++++-------------------------------
>>   1 file changed, 43 insertions(+), 54 deletions(-)
>>
>> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
>> index f27b7ec7d2c2..0cfacd4a8fbe 100644
>> --- a/tests/perf_pmu.c
>> +++ b/tests/perf_pmu.c
>> @@ -1504,12 +1504,6 @@ test_enable_race(int gem_fd, const struct intel_execution_engine2 *e)
>>          gem_quiescent_gpu(gem_fd);
>>   }
>>   
>> -static double __error(double val, double ref)
>> -{
>> -       igt_assert(ref > 1e-5 /* smallval */);
>> -       return (100.0 * val / ref) - 100.0;
>> -}
>> -
>>   static void __rearm_spin_batch(igt_spin_t *spin)
>>   {
>>          const uint32_t mi_arb_chk = 0x5 << 23;
>> @@ -1532,13 +1526,12 @@ static void
>>   accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>>           unsigned long target_busy_pct)
>>   {
>> -       const unsigned int min_test_loops = 7;
>> -       const unsigned long min_test_us = 1e6;
>> -       unsigned long busy_us = 2500;
>> +       unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
>>          unsigned long idle_us = 100 * (busy_us - target_busy_pct *
>>                                  busy_us / 100) / target_busy_pct;
>> -       unsigned long pwm_calibration_us;
>> -       unsigned long test_us;
>> +       const unsigned long min_test_us = 1e6;
>> +       const unsigned long pwm_calibration_us = min_test_us;
>> +       const unsigned long test_us = min_test_us;
>>          double busy_r, expected;
>>          uint64_t val[2];
>>          uint64_t ts[2];
>> @@ -1553,13 +1546,6 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>>                  idle_us *= 2;
>>          }
>>   
>> -       pwm_calibration_us = min_test_loops * (busy_us + idle_us);
>> -       while (pwm_calibration_us < min_test_us)
>> -               pwm_calibration_us += busy_us + idle_us;
>> -       test_us = min_test_loops * (idle_us + busy_us);
>> -       while (test_us < min_test_us)
>> -               test_us += busy_us + idle_us;
>> -
>>          igt_info("calibration=%lums, test=%lums; ratio=%.2f%% (%luus/%luus)\n",
>>                   pwm_calibration_us / 1000, test_us / 1000,
>>                   (double)busy_us / (busy_us + idle_us) * 100.0,
>> @@ -1572,20 +1558,11 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>>   
>>          /* Emit PWM pattern on the engine from a child. */
>>          igt_fork(child, 1) {
>> -               struct sched_param rt = { .sched_priority = 99 };
>>                  const unsigned long timeout[] = {
>>                          pwm_calibration_us * 1000, test_us * 1000
>>                  };
>> -               uint64_t total_busy_ns = 0, total_idle_ns = 0;
>> +               uint64_t total_busy_ns = 0, total_ns = 0;
>>                  igt_spin_t *spin;
>> -               int ret;
>> -
>> -               /* We need the best sleep accuracy we can get. */
>> -               ret = sched_setscheduler(0,
>> -                                        SCHED_FIFO | SCHED_RESET_ON_FORK,
>> -                                        &rt);
>> -               if (ret)
>> -                       igt_warn("Failed to set scheduling policy!\n");
>>   
>>                  /* Allocate our spin batch and idle it. */
>>                  spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
>> @@ -1594,39 +1571,51 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>>   
>>                  /* 1st pass is calibration, second pass is the test. */
>>                  for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
>> -                       uint64_t busy_ns = -total_busy_ns;
>> -                       uint64_t idle_ns = -total_idle_ns;
>> -                       struct timespec test_start = { };
>> +                       unsigned int target_idle_us = idle_us;
>> +                       uint64_t busy_ns = 0, idle_ns = 0;
>> +                       struct timespec start = { };
>> +                       unsigned long pass_ns = 0;
>> +
>> +                       igt_nsec_elapsed(&start);
>>   
>> -                       igt_nsec_elapsed(&test_start);
>>                          do {
>> -                               unsigned int target_idle_us, t_busy;
>> +                               unsigned long loop_ns, loop_busy;
>> +                               struct timespec _ts = { };
>> +                               double err;
>> +
>> +                               /* PWM idle sleep. */
>> +                               _ts.tv_nsec = target_idle_us * 1000;
>> +                               nanosleep(&_ts, NULL);
>>   
>>                                  /* Restart the spinbatch. */
>>                                  __rearm_spin_batch(spin);
>>                                  __submit_spin_batch(gem_fd, spin, e, 0);
>>   
>> -                               /*
>> -                                * Note that the submission may be delayed to a
>> -                                * tasklet (ksoftirqd) which cannot run until we
>> -                                * sleep as we hog the cpu (we are RT).
>> -                                */
>> -
>> -                               t_busy = measured_usleep(busy_us);
>> +                               /* PWM busy sleep. */
>> +                               loop_busy = igt_nsec_elapsed(&start);
>> +                               _ts.tv_nsec = busy_us * 1000;
>> +                               nanosleep(&_ts, NULL);
>>                                  igt_spin_batch_end(spin);
>> -                               gem_sync(gem_fd, spin->handle);
>> -
>> -                               total_busy_ns += t_busy;
>> -
>> -                               target_idle_us =
>> -                                       (100 * total_busy_ns / target_busy_pct - (total_busy_ns + total_idle_ns)) / 1000;
>> -                               total_idle_ns += measured_usleep(target_idle_us);
>> -                       } while (igt_nsec_elapsed(&test_start) < timeout[pass]);
>> -
>> -                       busy_ns += total_busy_ns;
>> -                       idle_ns += total_idle_ns;
>>   
>> -                       expected = (double)busy_ns / (busy_ns + idle_ns);
>> +                               /* Time accounting. */
>> +                               loop_ns = igt_nsec_elapsed(&start);
>> +                               loop_busy = loop_ns - loop_busy;
>> +                               loop_ns -= pass_ns;
>> +
>> +                               busy_ns += loop_busy;
>> +                               total_busy_ns += loop_busy;
>> +                               idle_ns += loop_ns - loop_busy;
>> +                               pass_ns += loop_ns;
>> +                               total_ns += loop_ns;
>> +
>> +                               /* Re-calibrate. */
>> +                               err = (double)total_busy_ns / total_ns -
>> +                                     (double)target_busy_pct / 100.0;
>> +                               target_idle_us = (double)target_idle_us *
>> +                                                (1.0 + err);
> 
> Previously the question we answered was how long should I sleep for the
> busy:idle ratio to hit the target.
> 
> expected_total_ns = 100.0 * total_busy_ns / target_busy_pct;
> target_idle_us = (expected_total_ns - current_total_ns) / 1000;

Yes, and the overflow (or underflow, depending how you look at it) was 
here. Usually in the first loop iteration for me, when expected_total_ns 
is smaller than current_total_ns.

But mostly I think this should have a minor effect, unless some systems 
can hit it more often.

> 
> 	unsigned long loop_ns, loop_busy;
> 	struct timespec _ts = { };
> 	double err;
> 
> 	/* PWM idle sleep. */
> 	_ts.tv_nsec = target_idle_us * 1000;
> 	nanosleep(&_ts, NULL);
> 
> Assuming no >1s sleeps.
> (Ok, so the sleep after recalc is still here.)
> 
> 	/* Restart the spinbatch. */
> 	__rearm_spin_batch(spin);
> 	__submit_spin_batch(gem_fd, spin, e, 0);
>    
> 	/* PWM busy sleep. */
> 	loop_busy = igt_nsec_elapsed(&start);
> 	_ts.tv_nsec = busy_us * 1000;
> 	nanosleep(&_ts, NULL);
> 	igt_spin_batch_end(spin);
> 
> 	/* Time accounting. */
> 	loop_ns = igt_nsec_elapsed(&start);
> 	loop_busy = loop_ns - loop_busy;
> 	loop_ns -= pass_ns;
> 
> So pass_ns is time from start of calibration, loop_ns is time for this
> loop.
> 
> 	busy_ns += loop_busy;
> 	total_busy_ns += loop_busy;
> 
> busy_ns will be calibration pass, total all passes?

busy_ns/idle_ns are the current pass. There is also total_busy/idle_ns 
at one level up, which are the totals.

> 
> 	idle_ns += loop_ns - loop_busy;
> 
> And idle is the residual between the time up to this point, and what has
> been busy.

Yes, I wanted to simplify and have reduced it to two clock queries per 
loop only. It maybe isn't the easiest to follow. :I

> 	pass_ns += loop_ns;
> 	total_ns += loop_ns;
> 
> 	/* Re-calibrate. */
> 	err = (double)total_busy_ns / total_ns -
> 	      (double)target_busy_pct / 100.0;
> 
> Hmm, I thought you didn't like the run on calculations, and wanted to
> reset between passes? (Have I got total_busy_ns and busy_ns confused?)

No, I did not like the aggregate in igt_info only. For calibration total 
times are better I think.

With an exception that the "expected" ratio, as reported to the parent, 
is based on the 2nd pass. That is so the error of the first pass, the 
initial and hopefully all calibration that is needed, is not included in 
the value we assert against, since it is not the one parent will sample 
PMU busyness either.

> 	target_idle_us = (double)target_idle_us * (1.0 + err);
> 
> Ok, I'm tired, but... So, if busy is 10% larger than expected, sleep 10%
> longer to try and compensate, would be the gist.

Correct.

> And this is because you always sleep and spin together and so cannot
> just sleep to compensate for the earlier inaccuracy. Which means we
> never truly try to correct the error in the same pass, but apply a
> correction factor for the next.

Correct.
> To me it seems like the closed system with each loop being "spin then
> adjusted sleep" will autocorrect and more likely to finish correct (as
> we are less reliant on the next loop for the accuracy). It's pretty much
> immaterial, as we expect the pmu to match the measurements (and not our
> expectations), but I find the one pass does all much simpler to follow.

Since we do a good number of loops, and hope the calibration will 
converge quickly (which it does for me), I don't see that there is an 
issue there.

With this loop it, for me locally, consistently underestimates by ~+0.03 
- +0.04% for 2% and 98% tests, and ~+0.20 - +0.60%. In am not so happy 
with the fact error seems systematic (seems to be that each loop adds ~ 
+0.008 - +0.012% of error) - but don't have an idea on how to improve it 
further.

More of a problem will be if this still doesn't work that great on the 
CI like for instance if there latencies will be more random. IGT_TRACE 
equivalent to GEM_TRACE and dump the calibration passes comes to mind. :)

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH i-g-t v2] tests/perf_pmu: Avoid RT thread for accuracy test
  2018-04-03 16:09       ` [Intel-gfx] " Tvrtko Ursulin
@ 2018-04-03 16:24         ` Chris Wilson
  -1 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2018-04-03 16:24 UTC (permalink / raw)
  To: Tvrtko Ursulin, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx

Quoting Tvrtko Ursulin (2018-04-03 17:09:09)
> 
> On 03/04/2018 14:10, Chris Wilson wrote:
> > To me it seems like the closed system with each loop being "spin then
> > adjusted sleep" will autocorrect and more likely to finish correct (as
> > we are less reliant on the next loop for the accuracy). It's pretty much
> > immaterial, as we expect the pmu to match the measurements (and not our
> > expectations), but I find the one pass does all much simpler to follow.
> 
> Since we do a good number of loops, and hope the calibration will 
> converge quickly (which it does for me), I don't see that there is an 
> issue there.

I'm sitting here drinking coffee trying to decide if it does converge ;)
That's the problem here, I need to actually find a pencil, some paper
and remember some basic maths for series convergence. Not happening with
the amount of coffee I need to drink at the moment.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [igt-dev] [Intel-gfx] [PATCH i-g-t v2] tests/perf_pmu: Avoid RT thread for accuracy test
@ 2018-04-03 16:24         ` Chris Wilson
  0 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2018-04-03 16:24 UTC (permalink / raw)
  To: Tvrtko Ursulin, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx

Quoting Tvrtko Ursulin (2018-04-03 17:09:09)
> 
> On 03/04/2018 14:10, Chris Wilson wrote:
> > To me it seems like the closed system with each loop being "spin then
> > adjusted sleep" will autocorrect and more likely to finish correct (as
> > we are less reliant on the next loop for the accuracy). It's pretty much
> > immaterial, as we expect the pmu to match the measurements (and not our
> > expectations), but I find the one pass does all much simpler to follow.
> 
> Since we do a good number of loops, and hope the calibration will 
> converge quickly (which it does for me), I don't see that there is an 
> issue there.

I'm sitting here drinking coffee trying to decide if it does converge ;)
That's the problem here, I need to actually find a pencil, some paper
and remember some basic maths for series convergence. Not happening with
the amount of coffee I need to drink at the moment.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH i-g-t v3] tests/perf_pmu: Avoid RT thread for accuracy test
  2018-04-03 12:38   ` [Intel-gfx] " Tvrtko Ursulin
@ 2018-04-03 16:39     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 43+ messages in thread
From: Tvrtko Ursulin @ 2018-04-03 16:39 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Realtime scheduling interferes with execlists submission (tasklet) so try
to simplify the PWM loop in a few ways:

 * Drop RT.
 * Longer batches for smaller systematic error.
 * More truthful test duration calculation.
 * Less clock queries.
 * No self-adjust - instead just report the achieved cycle and let the
   parent check against it.
 * Report absolute cycle error.

v2:
 * Bring back self-adjust. (Chris Wilson)
   (But slightly fixed version with no overflow.)

v3:
 * Log average and mean calibration for each pass.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/perf_pmu.c | 108 +++++++++++++++++++++++++++----------------------------
 1 file changed, 53 insertions(+), 55 deletions(-)

diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 2273ddb9e684..697008c855fd 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -1497,12 +1497,6 @@ test_enable_race(int gem_fd, const struct intel_execution_engine2 *e)
 	gem_quiescent_gpu(gem_fd);
 }
 
-static double __error(double val, double ref)
-{
-	igt_assert(ref > 1e-5 /* smallval */);
-	return (100.0 * val / ref) - 100.0;
-}
-
 static void __rearm_spin_batch(igt_spin_t *spin)
 {
 	const uint32_t mi_arb_chk = 0x5 << 23;
@@ -1525,13 +1519,12 @@ static void
 accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 	 unsigned long target_busy_pct)
 {
-	const unsigned int min_test_loops = 7;
-	const unsigned long min_test_us = 1e6;
-	unsigned long busy_us = 2500;
+	unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
 	unsigned long idle_us = 100 * (busy_us - target_busy_pct *
 				busy_us / 100) / target_busy_pct;
-	unsigned long pwm_calibration_us;
-	unsigned long test_us;
+	const unsigned long min_test_us = 1e6;
+	const unsigned long pwm_calibration_us = min_test_us;
+	const unsigned long test_us = min_test_us;
 	double busy_r, expected;
 	uint64_t val[2];
 	uint64_t ts[2];
@@ -1546,13 +1539,6 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 		idle_us *= 2;
 	}
 
-	pwm_calibration_us = min_test_loops * (busy_us + idle_us);
-	while (pwm_calibration_us < min_test_us)
-		pwm_calibration_us += busy_us + idle_us;
-	test_us = min_test_loops * (idle_us + busy_us);
-	while (test_us < min_test_us)
-		test_us += busy_us + idle_us;
-
 	igt_info("calibration=%lums, test=%lums; ratio=%.2f%% (%luus/%luus)\n",
 		 pwm_calibration_us / 1000, test_us / 1000,
 		 (double)busy_us / (busy_us + idle_us) * 100.0,
@@ -1565,20 +1551,11 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 
 	/* Emit PWM pattern on the engine from a child. */
 	igt_fork(child, 1) {
-		struct sched_param rt = { .sched_priority = 99 };
 		const unsigned long timeout[] = {
 			pwm_calibration_us * 1000, test_us * 1000
 		};
-		uint64_t total_busy_ns = 0, total_idle_ns = 0;
+		uint64_t total_busy_ns = 0, total_ns = 0;
 		igt_spin_t *spin;
-		int ret;
-
-		/* We need the best sleep accuracy we can get. */
-		ret = sched_setscheduler(0,
-					 SCHED_FIFO | SCHED_RESET_ON_FORK,
-					 &rt);
-		if (ret)
-			igt_warn("Failed to set scheduling policy!\n");
 
 		/* Allocate our spin batch and idle it. */
 		spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
@@ -1587,42 +1564,63 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 
 		/* 1st pass is calibration, second pass is the test. */
 		for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
-			uint64_t busy_ns = -total_busy_ns;
-			uint64_t idle_ns = -total_idle_ns;
-			struct timespec test_start = { };
+			unsigned int target_idle_us = idle_us;
+			uint64_t busy_ns = 0, idle_ns = 0;
+			struct timespec start = { };
+			unsigned long pass_ns = 0;
+			double avg = 0.0, var = 0.0;
+			unsigned int n = 0;
+
+			igt_nsec_elapsed(&start);
 
-			igt_nsec_elapsed(&test_start);
 			do {
-				unsigned int target_idle_us, t_busy;
+				unsigned long loop_ns, loop_busy;
+				struct timespec _ts = { };
+				double err, tmp;
+
+				/* PWM idle sleep. */
+				_ts.tv_nsec = target_idle_us * 1000;
+				nanosleep(&_ts, NULL);
 
 				/* Restart the spinbatch. */
 				__rearm_spin_batch(spin);
 				__submit_spin_batch(gem_fd, spin, e, 0);
 
-				/*
-				 * Note that the submission may be delayed to a
-				 * tasklet (ksoftirqd) which cannot run until we
-				 * sleep as we hog the cpu (we are RT).
-				 */
-
-				t_busy = measured_usleep(busy_us);
+				/* PWM busy sleep. */
+				loop_busy = igt_nsec_elapsed(&start);
+				_ts.tv_nsec = busy_us * 1000;
+				nanosleep(&_ts, NULL);
 				igt_spin_batch_end(spin);
-				gem_sync(gem_fd, spin->handle);
-
-				total_busy_ns += t_busy;
-
-				target_idle_us =
-					(100 * total_busy_ns / target_busy_pct - (total_busy_ns + total_idle_ns)) / 1000;
-				total_idle_ns += measured_usleep(target_idle_us);
-			} while (igt_nsec_elapsed(&test_start) < timeout[pass]);
-
-			busy_ns += total_busy_ns;
-			idle_ns += total_idle_ns;
 
-			expected = (double)busy_ns / (busy_ns + idle_ns);
-			igt_info("%u: busy %"PRIu64"us, idle %"PRIu64"us: %.2f%% (target: %lu%%)\n",
+				/* Time accounting. */
+				loop_ns = igt_nsec_elapsed(&start);
+				loop_busy = loop_ns - loop_busy;
+				loop_ns -= pass_ns;
+
+				busy_ns += loop_busy;
+				total_busy_ns += loop_busy;
+				idle_ns += loop_ns - loop_busy;
+				pass_ns += loop_ns;
+				total_ns += loop_ns;
+
+				/* Re-calibrate. */
+				err = (double)total_busy_ns / total_ns -
+				      (double)target_busy_pct / 100.0;
+				target_idle_us = (double)target_idle_us *
+						 (1.0 + err);
+
+				/* Running average and variance for debug. */
+				err = 100.0 * total_busy_ns / total_ns;
+				tmp = avg;
+				avg += (err - avg) / ++n;
+				var += (err - avg) * (err - tmp);
+// printf("%f * %f = %f\n", err - avg, err - tmp, (err - avg) * (err - tmp));
+			} while (pass_ns < timeout[pass]);
+
+			expected = (double)busy_ns / pass_ns;
+			igt_info("%u: busy %"PRIu64"us, idle %"PRIu64"us -> %.2f%% (target: %lu%%; average=%.2f, variance=%f)\n",
 				 pass, busy_ns / 1000, idle_ns / 1000,
-				 100 * expected, target_busy_pct);
+				 100 * expected, target_busy_pct, avg, var);
 			write(link[1], &expected, sizeof(expected));
 		}
 
@@ -1649,7 +1647,7 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 	busy_r = (double)(val[1] - val[0]) / (ts[1] - ts[0]);
 
 	igt_info("error=%.2f%% (%.2f%% vs %.2f%%)\n",
-		 __error(busy_r, expected), 100 * busy_r, 100 * expected);
+		 (busy_r - expected) * 100, 100 * busy_r, 100 * expected);
 
 	assert_within(100.0 * busy_r, 100.0 * expected, 2);
 }
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [igt-dev] [PATCH i-g-t v3] tests/perf_pmu: Avoid RT thread for accuracy test
@ 2018-04-03 16:39     ` Tvrtko Ursulin
  0 siblings, 0 replies; 43+ messages in thread
From: Tvrtko Ursulin @ 2018-04-03 16:39 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx, Tvrtko Ursulin

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Realtime scheduling interferes with execlists submission (tasklet) so try
to simplify the PWM loop in a few ways:

 * Drop RT.
 * Longer batches for smaller systematic error.
 * More truthful test duration calculation.
 * Less clock queries.
 * No self-adjust - instead just report the achieved cycle and let the
   parent check against it.
 * Report absolute cycle error.

v2:
 * Bring back self-adjust. (Chris Wilson)
   (But slightly fixed version with no overflow.)

v3:
 * Log average and mean calibration for each pass.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/perf_pmu.c | 108 +++++++++++++++++++++++++++----------------------------
 1 file changed, 53 insertions(+), 55 deletions(-)

diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 2273ddb9e684..697008c855fd 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -1497,12 +1497,6 @@ test_enable_race(int gem_fd, const struct intel_execution_engine2 *e)
 	gem_quiescent_gpu(gem_fd);
 }
 
-static double __error(double val, double ref)
-{
-	igt_assert(ref > 1e-5 /* smallval */);
-	return (100.0 * val / ref) - 100.0;
-}
-
 static void __rearm_spin_batch(igt_spin_t *spin)
 {
 	const uint32_t mi_arb_chk = 0x5 << 23;
@@ -1525,13 +1519,12 @@ static void
 accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 	 unsigned long target_busy_pct)
 {
-	const unsigned int min_test_loops = 7;
-	const unsigned long min_test_us = 1e6;
-	unsigned long busy_us = 2500;
+	unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
 	unsigned long idle_us = 100 * (busy_us - target_busy_pct *
 				busy_us / 100) / target_busy_pct;
-	unsigned long pwm_calibration_us;
-	unsigned long test_us;
+	const unsigned long min_test_us = 1e6;
+	const unsigned long pwm_calibration_us = min_test_us;
+	const unsigned long test_us = min_test_us;
 	double busy_r, expected;
 	uint64_t val[2];
 	uint64_t ts[2];
@@ -1546,13 +1539,6 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 		idle_us *= 2;
 	}
 
-	pwm_calibration_us = min_test_loops * (busy_us + idle_us);
-	while (pwm_calibration_us < min_test_us)
-		pwm_calibration_us += busy_us + idle_us;
-	test_us = min_test_loops * (idle_us + busy_us);
-	while (test_us < min_test_us)
-		test_us += busy_us + idle_us;
-
 	igt_info("calibration=%lums, test=%lums; ratio=%.2f%% (%luus/%luus)\n",
 		 pwm_calibration_us / 1000, test_us / 1000,
 		 (double)busy_us / (busy_us + idle_us) * 100.0,
@@ -1565,20 +1551,11 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 
 	/* Emit PWM pattern on the engine from a child. */
 	igt_fork(child, 1) {
-		struct sched_param rt = { .sched_priority = 99 };
 		const unsigned long timeout[] = {
 			pwm_calibration_us * 1000, test_us * 1000
 		};
-		uint64_t total_busy_ns = 0, total_idle_ns = 0;
+		uint64_t total_busy_ns = 0, total_ns = 0;
 		igt_spin_t *spin;
-		int ret;
-
-		/* We need the best sleep accuracy we can get. */
-		ret = sched_setscheduler(0,
-					 SCHED_FIFO | SCHED_RESET_ON_FORK,
-					 &rt);
-		if (ret)
-			igt_warn("Failed to set scheduling policy!\n");
 
 		/* Allocate our spin batch and idle it. */
 		spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
@@ -1587,42 +1564,63 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 
 		/* 1st pass is calibration, second pass is the test. */
 		for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
-			uint64_t busy_ns = -total_busy_ns;
-			uint64_t idle_ns = -total_idle_ns;
-			struct timespec test_start = { };
+			unsigned int target_idle_us = idle_us;
+			uint64_t busy_ns = 0, idle_ns = 0;
+			struct timespec start = { };
+			unsigned long pass_ns = 0;
+			double avg = 0.0, var = 0.0;
+			unsigned int n = 0;
+
+			igt_nsec_elapsed(&start);
 
-			igt_nsec_elapsed(&test_start);
 			do {
-				unsigned int target_idle_us, t_busy;
+				unsigned long loop_ns, loop_busy;
+				struct timespec _ts = { };
+				double err, tmp;
+
+				/* PWM idle sleep. */
+				_ts.tv_nsec = target_idle_us * 1000;
+				nanosleep(&_ts, NULL);
 
 				/* Restart the spinbatch. */
 				__rearm_spin_batch(spin);
 				__submit_spin_batch(gem_fd, spin, e, 0);
 
-				/*
-				 * Note that the submission may be delayed to a
-				 * tasklet (ksoftirqd) which cannot run until we
-				 * sleep as we hog the cpu (we are RT).
-				 */
-
-				t_busy = measured_usleep(busy_us);
+				/* PWM busy sleep. */
+				loop_busy = igt_nsec_elapsed(&start);
+				_ts.tv_nsec = busy_us * 1000;
+				nanosleep(&_ts, NULL);
 				igt_spin_batch_end(spin);
-				gem_sync(gem_fd, spin->handle);
-
-				total_busy_ns += t_busy;
-
-				target_idle_us =
-					(100 * total_busy_ns / target_busy_pct - (total_busy_ns + total_idle_ns)) / 1000;
-				total_idle_ns += measured_usleep(target_idle_us);
-			} while (igt_nsec_elapsed(&test_start) < timeout[pass]);
-
-			busy_ns += total_busy_ns;
-			idle_ns += total_idle_ns;
 
-			expected = (double)busy_ns / (busy_ns + idle_ns);
-			igt_info("%u: busy %"PRIu64"us, idle %"PRIu64"us: %.2f%% (target: %lu%%)\n",
+				/* Time accounting. */
+				loop_ns = igt_nsec_elapsed(&start);
+				loop_busy = loop_ns - loop_busy;
+				loop_ns -= pass_ns;
+
+				busy_ns += loop_busy;
+				total_busy_ns += loop_busy;
+				idle_ns += loop_ns - loop_busy;
+				pass_ns += loop_ns;
+				total_ns += loop_ns;
+
+				/* Re-calibrate. */
+				err = (double)total_busy_ns / total_ns -
+				      (double)target_busy_pct / 100.0;
+				target_idle_us = (double)target_idle_us *
+						 (1.0 + err);
+
+				/* Running average and variance for debug. */
+				err = 100.0 * total_busy_ns / total_ns;
+				tmp = avg;
+				avg += (err - avg) / ++n;
+				var += (err - avg) * (err - tmp);
+// printf("%f * %f = %f\n", err - avg, err - tmp, (err - avg) * (err - tmp));
+			} while (pass_ns < timeout[pass]);
+
+			expected = (double)busy_ns / pass_ns;
+			igt_info("%u: busy %"PRIu64"us, idle %"PRIu64"us -> %.2f%% (target: %lu%%; average=%.2f, variance=%f)\n",
 				 pass, busy_ns / 1000, idle_ns / 1000,
-				 100 * expected, target_busy_pct);
+				 100 * expected, target_busy_pct, avg, var);
 			write(link[1], &expected, sizeof(expected));
 		}
 
@@ -1649,7 +1647,7 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 	busy_r = (double)(val[1] - val[0]) / (ts[1] - ts[0]);
 
 	igt_info("error=%.2f%% (%.2f%% vs %.2f%%)\n",
-		 __error(busy_r, expected), 100 * busy_r, 100 * expected);
+		 (busy_r - expected) * 100, 100 * busy_r, 100 * expected);
 
 	assert_within(100.0 * busy_r, 100.0 * expected, 2);
 }
-- 
2.14.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [igt-dev] ✓ Fi.CI.IGT: success for tests/perf_pmu: Avoid RT thread for accuracy test (rev2)
  2018-03-26 10:57 ` [Intel-gfx] " Tvrtko Ursulin
                   ` (9 preceding siblings ...)
  (?)
@ 2018-04-03 16:41 ` Patchwork
  -1 siblings, 0 replies; 43+ messages in thread
From: Patchwork @ 2018-04-03 16:41 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: igt-dev

== Series Details ==

Series: tests/perf_pmu: Avoid RT thread for accuracy test (rev2)
URL   : https://patchwork.freedesktop.org/series/40662/
State : success

== Summary ==

---- Possible new issues:

Test kms_draw_crc:
        Subgroup draw-method-xrgb8888-mmap-wc-untiled:
                skip       -> PASS       (shard-snb)
Test kms_frontbuffer_tracking:
        Subgroup fbc-1p-offscren-pri-indfb-draw-mmap-cpu:
                skip       -> PASS       (shard-snb)
        Subgroup fbc-1p-primscrn-spr-indfb-move:
                skip       -> PASS       (shard-snb)
        Subgroup fbc-stridechange:
                fail       -> PASS       (shard-snb)
        Subgroup fbcpsr-rgb565-draw-blt:
                fail       -> SKIP       (shard-snb)
        Subgroup psr-2p-scndscrn-pri-indfb-draw-render:
                fail       -> SKIP       (shard-snb)
        Subgroup psr-2p-scndscrn-spr-indfb-onoff:
                fail       -> SKIP       (shard-snb)
Test kms_universal_plane:
        Subgroup universal-plane-pipe-a-sanity:
                fail       -> PASS       (shard-snb)
Test prime_vgem:
        Subgroup basic-fence-flip:
                skip       -> PASS       (shard-snb)

---- Known issues:

Test kms_flip:
        Subgroup 2x-flip-vs-blocking-wf-vblank:
                pass       -> FAIL       (shard-hsw) fdo#100368 +2
        Subgroup 2x-flip-vs-expired-vblank-interruptible:
                pass       -> FAIL       (shard-hsw) fdo#102887
        Subgroup modeset-vs-vblank-race:
                pass       -> FAIL       (shard-hsw) fdo#103060
Test kms_frontbuffer_tracking:
        Subgroup fbcpsr-2p-primscrn-shrfb-pgflip-blt:
                fail       -> SKIP       (shard-snb) fdo#103167 +1
Test kms_mmap_write_crc:
                dmesg-warn -> PASS       (shard-hsw) fdo#103286
Test kms_plane_multiple:
        Subgroup atomic-pipe-a-tiling-x:
                pass       -> FAIL       (shard-snb) fdo#103166

fdo#100368 
fdo#102887 
fdo#103060 
fdo#103167 
fdo#103286 
fdo#103166 

shard-apl        total:3496 pass:1832 dwarn:1   dfail:0   fail:7   skip:1655 time:12965s
shard-hsw        total:3496 pass:1779 dwarn:1   dfail:0   fail:5   skip:1710 time:11574s
shard-snb        total:3496 pass:1374 dwarn:1   dfail:0   fail:3   skip:2118 time:7007s
Blacklisted hosts:
shard-kbl        total:3496 pass:1957 dwarn:1   dfail:0   fail:6   skip:1532 time:9202s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1218/shards.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for tests/perf_pmu: Avoid RT thread for accuracy test (rev3)
  2018-03-26 10:57 ` [Intel-gfx] " Tvrtko Ursulin
                   ` (10 preceding siblings ...)
  (?)
@ 2018-04-03 17:15 ` Patchwork
  -1 siblings, 0 replies; 43+ messages in thread
From: Patchwork @ 2018-04-03 17:15 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: igt-dev

== Series Details ==

Series: tests/perf_pmu: Avoid RT thread for accuracy test (rev3)
URL   : https://patchwork.freedesktop.org/series/40662/
State : success

== Summary ==

IGT patchset tested on top of latest successful build
cad5fc06f954546042a432202cbe7e5a20fe1132 tests/gem_eio: Add reset and unwedge stress testing

with latest DRM-Tip kernel build CI_DRM_4017
29940f138482 drm-tip: 2018y-04m-03d-13h-23m-36s UTC integration manifest

No testlist changes.

---- Known issues:

Test debugfs_test:
        Subgroup read_all_entries:
                pass       -> INCOMPLETE (fi-snb-2520m) fdo#103713
Test kms_flip:
        Subgroup basic-flip-vs-wf_vblank:
                pass       -> INCOMPLETE (fi-elk-e7500) fdo#103989
Test kms_pipe_crc_basic:
        Subgroup hang-read-crc-pipe-c:
                fail       -> PASS       (fi-skl-6700k2) fdo#103191

fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
fdo#103989 https://bugs.freedesktop.org/show_bug.cgi?id=103989
fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191

fi-bdw-5557u     total:285  pass:264  dwarn:0   dfail:0   fail:0   skip:21  time:433s
fi-bdw-gvtdvm    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:446s
fi-blb-e6850     total:285  pass:220  dwarn:1   dfail:0   fail:0   skip:64  time:381s
fi-bsw-n3050     total:285  pass:239  dwarn:0   dfail:0   fail:0   skip:46  time:540s
fi-bwr-2160      total:285  pass:180  dwarn:0   dfail:0   fail:0   skip:105 time:301s
fi-bxt-dsi       total:285  pass:255  dwarn:0   dfail:0   fail:0   skip:30  time:514s
fi-bxt-j4205     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:516s
fi-byt-j1900     total:285  pass:250  dwarn:0   dfail:0   fail:0   skip:35  time:521s
fi-byt-n2820     total:285  pass:246  dwarn:0   dfail:0   fail:0   skip:39  time:513s
fi-cfl-8700k     total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:407s
fi-cfl-s3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:565s
fi-cfl-u         total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:512s
fi-cnl-y3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:586s
fi-elk-e7500     total:218  pass:171  dwarn:1   dfail:0   fail:0   skip:45 
fi-gdg-551       total:285  pass:176  dwarn:0   dfail:0   fail:1   skip:108 time:321s
fi-glk-1         total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:536s
fi-hsw-4770      total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:404s
fi-ilk-650       total:285  pass:225  dwarn:0   dfail:0   fail:0   skip:60  time:424s
fi-ivb-3520m     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:468s
fi-ivb-3770      total:285  pass:252  dwarn:0   dfail:0   fail:0   skip:33  time:439s
fi-kbl-7500u     total:285  pass:260  dwarn:1   dfail:0   fail:0   skip:24  time:473s
fi-kbl-7567u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:463s
fi-kbl-r         total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:510s
fi-pnv-d510      total:285  pass:219  dwarn:1   dfail:0   fail:0   skip:65  time:666s
fi-skl-6260u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:438s
fi-skl-6600u     total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:542s
fi-skl-6700k2    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:505s
fi-skl-6770hq    total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:502s
fi-skl-guc       total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:427s
fi-skl-gvtdvm    total:285  pass:262  dwarn:0   dfail:0   fail:0   skip:23  time:446s
fi-snb-2520m     total:3    pass:2    dwarn:0   dfail:0   fail:0   skip:0  
fi-snb-2600      total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:401s
Blacklisted hosts:
fi-cnl-psr       total:285  pass:256  dwarn:3   dfail:0   fail:0   skip:26  time:531s
fi-glk-j4005     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:496s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1219/issues.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [igt-dev] ✗ Fi.CI.IGT: failure for tests/perf_pmu: Avoid RT thread for accuracy test (rev3)
  2018-03-26 10:57 ` [Intel-gfx] " Tvrtko Ursulin
                   ` (11 preceding siblings ...)
  (?)
@ 2018-04-03 18:33 ` Patchwork
  2018-04-04 11:13   ` Tvrtko Ursulin
  -1 siblings, 1 reply; 43+ messages in thread
From: Patchwork @ 2018-04-03 18:33 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: igt-dev

== Series Details ==

Series: tests/perf_pmu: Avoid RT thread for accuracy test (rev3)
URL   : https://patchwork.freedesktop.org/series/40662/
State : failure

== Summary ==

---- Possible new issues:

Test kms_chv_cursor_fail:
        Subgroup pipe-a-64x64-left-edge:
                pass       -> FAIL       (shard-snb)
Test kms_draw_crc:
        Subgroup draw-method-rgb565-pwrite-xtiled:
                pass       -> SKIP       (shard-snb)
        Subgroup draw-method-xrgb8888-mmap-wc-untiled:
                skip       -> PASS       (shard-snb)
Test kms_frontbuffer_tracking:
        Subgroup fbc-1p-offscren-pri-indfb-draw-mmap-cpu:
                skip       -> PASS       (shard-snb)
        Subgroup fbc-1p-offscren-pri-shrfb-draw-mmap-cpu:
                pass       -> FAIL       (shard-snb)
        Subgroup fbc-1p-primscrn-spr-indfb-move:
                skip       -> PASS       (shard-snb)
        Subgroup fbc-2p-scndscrn-spr-indfb-draw-mmap-wc:
                skip       -> FAIL       (shard-snb)
        Subgroup fbc-stridechange:
                fail       -> PASS       (shard-snb)
        Subgroup fbcpsr-1p-primscrn-indfb-pgflip-blt:
                skip       -> FAIL       (shard-snb)
        Subgroup fbcpsr-1p-primscrn-pri-indfb-draw-blt:
                skip       -> FAIL       (shard-snb)
        Subgroup fbcpsr-2p-primscrn-spr-indfb-fullscreen:
                skip       -> FAIL       (shard-snb)
        Subgroup fbcpsr-rgb565-draw-blt:
                fail       -> SKIP       (shard-snb)
        Subgroup psr-1p-primscrn-cur-indfb-draw-render:
                skip       -> FAIL       (shard-snb)
        Subgroup psr-2p-scndscrn-pri-indfb-draw-render:
                fail       -> SKIP       (shard-snb)
        Subgroup psr-2p-scndscrn-spr-indfb-onoff:
                fail       -> SKIP       (shard-snb)
Test kms_universal_plane:
        Subgroup universal-plane-pipe-a-sanity:
                fail       -> PASS       (shard-snb)
Test kms_vblank:
        Subgroup pipe-a-query-forked-busy:
                pass       -> FAIL       (shard-snb)
        Subgroup pipe-a-ts-continuation-modeset-rpm:
                skip       -> FAIL       (shard-snb)
Test prime_vgem:
        Subgroup basic-fence-flip:
                skip       -> PASS       (shard-snb)

---- Known issues:

Test kms_cursor_legacy:
        Subgroup flip-vs-cursor-legacy:
                pass       -> FAIL       (shard-hsw) fdo#102670
Test kms_flip:
        Subgroup plain-flip-fb-recreate-interruptible:
                fail       -> PASS       (shard-hsw) fdo#100368 +2
Test kms_frontbuffer_tracking:
        Subgroup fbc-rgb565-draw-mmap-wc:
                fail       -> PASS       (shard-apl) fdo#103167 +1
Test kms_mmap_write_crc:
                dmesg-warn -> PASS       (shard-hsw) fdo#103286
Test kms_rotation_crc:
        Subgroup primary-rotation-180:
                pass       -> FAIL       (shard-snb) fdo#103925
Test kms_sysfs_edid_timing:
                warn       -> PASS       (shard-apl) fdo#100047

fdo#102670 https://bugs.freedesktop.org/show_bug.cgi?id=102670
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#103167 https://bugs.freedesktop.org/show_bug.cgi?id=103167
fdo#103286 https://bugs.freedesktop.org/show_bug.cgi?id=103286
fdo#103925 https://bugs.freedesktop.org/show_bug.cgi?id=103925
fdo#100047 https://bugs.freedesktop.org/show_bug.cgi?id=100047

shard-apl        total:3498 pass:1835 dwarn:1   dfail:0   fail:7   skip:1655 time:12922s
shard-hsw        total:3498 pass:1782 dwarn:1   dfail:0   fail:4   skip:1710 time:11552s
shard-snb        total:3498 pass:1372 dwarn:1   dfail:0   fail:16  skip:2109 time:7043s
Blacklisted hosts:
shard-kbl        total:3498 pass:1960 dwarn:1   dfail:0   fail:7   skip:1530 time:9398s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1219/shards.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH i-g-t v4] tests/perf_pmu: Avoid RT thread for accuracy test
  2018-04-03 16:39     ` [igt-dev] " Tvrtko Ursulin
@ 2018-04-04  9:51       ` Tvrtko Ursulin
  -1 siblings, 0 replies; 43+ messages in thread
From: Tvrtko Ursulin @ 2018-04-04  9:51 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Realtime scheduling interferes with execlists submission (tasklet) so try
to simplify the PWM loop in a few ways:

 * Drop RT.
 * Longer batches for smaller systematic error.
 * More truthful test duration calculation.
 * Less clock queries.
 * No self-adjust - instead just report the achieved cycle and let the
   parent check against it.
 * Report absolute cycle error.

v2:
 * Bring back self-adjust. (Chris Wilson)
   (But slightly fixed version with no overflow.)

v3:
 * Log average and mean calibration for each pass.

v4:
 * Eliminate development leftovers.
 * Fix variance logging.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/perf_pmu.c | 107 +++++++++++++++++++++++++++----------------------------
 1 file changed, 52 insertions(+), 55 deletions(-)

diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 2273ddb9e684..590e6526b069 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -1497,12 +1497,6 @@ test_enable_race(int gem_fd, const struct intel_execution_engine2 *e)
 	gem_quiescent_gpu(gem_fd);
 }
 
-static double __error(double val, double ref)
-{
-	igt_assert(ref > 1e-5 /* smallval */);
-	return (100.0 * val / ref) - 100.0;
-}
-
 static void __rearm_spin_batch(igt_spin_t *spin)
 {
 	const uint32_t mi_arb_chk = 0x5 << 23;
@@ -1525,13 +1519,12 @@ static void
 accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 	 unsigned long target_busy_pct)
 {
-	const unsigned int min_test_loops = 7;
-	const unsigned long min_test_us = 1e6;
-	unsigned long busy_us = 2500;
+	unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
 	unsigned long idle_us = 100 * (busy_us - target_busy_pct *
 				busy_us / 100) / target_busy_pct;
-	unsigned long pwm_calibration_us;
-	unsigned long test_us;
+	const unsigned long min_test_us = 1e6;
+	const unsigned long pwm_calibration_us = min_test_us;
+	const unsigned long test_us = min_test_us;
 	double busy_r, expected;
 	uint64_t val[2];
 	uint64_t ts[2];
@@ -1546,13 +1539,6 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 		idle_us *= 2;
 	}
 
-	pwm_calibration_us = min_test_loops * (busy_us + idle_us);
-	while (pwm_calibration_us < min_test_us)
-		pwm_calibration_us += busy_us + idle_us;
-	test_us = min_test_loops * (idle_us + busy_us);
-	while (test_us < min_test_us)
-		test_us += busy_us + idle_us;
-
 	igt_info("calibration=%lums, test=%lums; ratio=%.2f%% (%luus/%luus)\n",
 		 pwm_calibration_us / 1000, test_us / 1000,
 		 (double)busy_us / (busy_us + idle_us) * 100.0,
@@ -1565,20 +1551,11 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 
 	/* Emit PWM pattern on the engine from a child. */
 	igt_fork(child, 1) {
-		struct sched_param rt = { .sched_priority = 99 };
 		const unsigned long timeout[] = {
 			pwm_calibration_us * 1000, test_us * 1000
 		};
-		uint64_t total_busy_ns = 0, total_idle_ns = 0;
+		uint64_t total_busy_ns = 0, total_ns = 0;
 		igt_spin_t *spin;
-		int ret;
-
-		/* We need the best sleep accuracy we can get. */
-		ret = sched_setscheduler(0,
-					 SCHED_FIFO | SCHED_RESET_ON_FORK,
-					 &rt);
-		if (ret)
-			igt_warn("Failed to set scheduling policy!\n");
 
 		/* Allocate our spin batch and idle it. */
 		spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
@@ -1587,42 +1564,62 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 
 		/* 1st pass is calibration, second pass is the test. */
 		for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
-			uint64_t busy_ns = -total_busy_ns;
-			uint64_t idle_ns = -total_idle_ns;
-			struct timespec test_start = { };
+			unsigned int target_idle_us = idle_us;
+			uint64_t busy_ns = 0, idle_ns = 0;
+			struct timespec start = { };
+			unsigned long pass_ns = 0;
+			double avg = 0.0, var = 0.0;
+			unsigned int n = 0;
+
+			igt_nsec_elapsed(&start);
 
-			igt_nsec_elapsed(&test_start);
 			do {
-				unsigned int target_idle_us, t_busy;
+				unsigned long loop_ns, loop_busy;
+				struct timespec _ts = { };
+				double err, tmp;
+
+				/* PWM idle sleep. */
+				_ts.tv_nsec = target_idle_us * 1000;
+				nanosleep(&_ts, NULL);
 
 				/* Restart the spinbatch. */
 				__rearm_spin_batch(spin);
 				__submit_spin_batch(gem_fd, spin, e, 0);
 
-				/*
-				 * Note that the submission may be delayed to a
-				 * tasklet (ksoftirqd) which cannot run until we
-				 * sleep as we hog the cpu (we are RT).
-				 */
-
-				t_busy = measured_usleep(busy_us);
+				/* PWM busy sleep. */
+				loop_busy = igt_nsec_elapsed(&start);
+				_ts.tv_nsec = busy_us * 1000;
+				nanosleep(&_ts, NULL);
 				igt_spin_batch_end(spin);
-				gem_sync(gem_fd, spin->handle);
-
-				total_busy_ns += t_busy;
-
-				target_idle_us =
-					(100 * total_busy_ns / target_busy_pct - (total_busy_ns + total_idle_ns)) / 1000;
-				total_idle_ns += measured_usleep(target_idle_us);
-			} while (igt_nsec_elapsed(&test_start) < timeout[pass]);
-
-			busy_ns += total_busy_ns;
-			idle_ns += total_idle_ns;
 
-			expected = (double)busy_ns / (busy_ns + idle_ns);
-			igt_info("%u: busy %"PRIu64"us, idle %"PRIu64"us: %.2f%% (target: %lu%%)\n",
+				/* Time accounting. */
+				loop_ns = igt_nsec_elapsed(&start);
+				loop_busy = loop_ns - loop_busy;
+				loop_ns -= pass_ns;
+
+				busy_ns += loop_busy;
+				total_busy_ns += loop_busy;
+				idle_ns += loop_ns - loop_busy;
+				pass_ns += loop_ns;
+				total_ns += loop_ns;
+
+				/* Re-calibrate. */
+				err = (double)total_busy_ns / total_ns -
+				      (double)target_busy_pct / 100.0;
+				target_idle_us = (double)target_idle_us *
+						 (1.0 + err);
+
+				/* Running average and variance for debug. */
+				err = 100.0 * total_busy_ns / total_ns;
+				tmp = avg;
+				avg += (err - avg) / ++n;
+				var += (err - avg) * (err - tmp);
+			} while (pass_ns < timeout[pass]);
+
+			expected = (double)busy_ns / pass_ns;
+			igt_info("%u: busy %"PRIu64"us, idle %"PRIu64"us -> %.2f%% (target: %lu%%; average=%.2f, variance=%f)\n",
 				 pass, busy_ns / 1000, idle_ns / 1000,
-				 100 * expected, target_busy_pct);
+				 100 * expected, target_busy_pct, avg, var / n);
 			write(link[1], &expected, sizeof(expected));
 		}
 
@@ -1649,7 +1646,7 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 	busy_r = (double)(val[1] - val[0]) / (ts[1] - ts[0]);
 
 	igt_info("error=%.2f%% (%.2f%% vs %.2f%%)\n",
-		 __error(busy_r, expected), 100 * busy_r, 100 * expected);
+		 (busy_r - expected) * 100, 100 * busy_r, 100 * expected);
 
 	assert_within(100.0 * busy_r, 100.0 * expected, 2);
 }
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [Intel-gfx] [PATCH i-g-t v4] tests/perf_pmu: Avoid RT thread for accuracy test
@ 2018-04-04  9:51       ` Tvrtko Ursulin
  0 siblings, 0 replies; 43+ messages in thread
From: Tvrtko Ursulin @ 2018-04-04  9:51 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Realtime scheduling interferes with execlists submission (tasklet) so try
to simplify the PWM loop in a few ways:

 * Drop RT.
 * Longer batches for smaller systematic error.
 * More truthful test duration calculation.
 * Less clock queries.
 * No self-adjust - instead just report the achieved cycle and let the
   parent check against it.
 * Report absolute cycle error.

v2:
 * Bring back self-adjust. (Chris Wilson)
   (But slightly fixed version with no overflow.)

v3:
 * Log average and mean calibration for each pass.

v4:
 * Eliminate development leftovers.
 * Fix variance logging.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/perf_pmu.c | 107 +++++++++++++++++++++++++++----------------------------
 1 file changed, 52 insertions(+), 55 deletions(-)

diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 2273ddb9e684..590e6526b069 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -1497,12 +1497,6 @@ test_enable_race(int gem_fd, const struct intel_execution_engine2 *e)
 	gem_quiescent_gpu(gem_fd);
 }
 
-static double __error(double val, double ref)
-{
-	igt_assert(ref > 1e-5 /* smallval */);
-	return (100.0 * val / ref) - 100.0;
-}
-
 static void __rearm_spin_batch(igt_spin_t *spin)
 {
 	const uint32_t mi_arb_chk = 0x5 << 23;
@@ -1525,13 +1519,12 @@ static void
 accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 	 unsigned long target_busy_pct)
 {
-	const unsigned int min_test_loops = 7;
-	const unsigned long min_test_us = 1e6;
-	unsigned long busy_us = 2500;
+	unsigned long busy_us = 10000 - 100 * (1 + abs(50 - target_busy_pct));
 	unsigned long idle_us = 100 * (busy_us - target_busy_pct *
 				busy_us / 100) / target_busy_pct;
-	unsigned long pwm_calibration_us;
-	unsigned long test_us;
+	const unsigned long min_test_us = 1e6;
+	const unsigned long pwm_calibration_us = min_test_us;
+	const unsigned long test_us = min_test_us;
 	double busy_r, expected;
 	uint64_t val[2];
 	uint64_t ts[2];
@@ -1546,13 +1539,6 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 		idle_us *= 2;
 	}
 
-	pwm_calibration_us = min_test_loops * (busy_us + idle_us);
-	while (pwm_calibration_us < min_test_us)
-		pwm_calibration_us += busy_us + idle_us;
-	test_us = min_test_loops * (idle_us + busy_us);
-	while (test_us < min_test_us)
-		test_us += busy_us + idle_us;
-
 	igt_info("calibration=%lums, test=%lums; ratio=%.2f%% (%luus/%luus)\n",
 		 pwm_calibration_us / 1000, test_us / 1000,
 		 (double)busy_us / (busy_us + idle_us) * 100.0,
@@ -1565,20 +1551,11 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 
 	/* Emit PWM pattern on the engine from a child. */
 	igt_fork(child, 1) {
-		struct sched_param rt = { .sched_priority = 99 };
 		const unsigned long timeout[] = {
 			pwm_calibration_us * 1000, test_us * 1000
 		};
-		uint64_t total_busy_ns = 0, total_idle_ns = 0;
+		uint64_t total_busy_ns = 0, total_ns = 0;
 		igt_spin_t *spin;
-		int ret;
-
-		/* We need the best sleep accuracy we can get. */
-		ret = sched_setscheduler(0,
-					 SCHED_FIFO | SCHED_RESET_ON_FORK,
-					 &rt);
-		if (ret)
-			igt_warn("Failed to set scheduling policy!\n");
 
 		/* Allocate our spin batch and idle it. */
 		spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
@@ -1587,42 +1564,62 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 
 		/* 1st pass is calibration, second pass is the test. */
 		for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
-			uint64_t busy_ns = -total_busy_ns;
-			uint64_t idle_ns = -total_idle_ns;
-			struct timespec test_start = { };
+			unsigned int target_idle_us = idle_us;
+			uint64_t busy_ns = 0, idle_ns = 0;
+			struct timespec start = { };
+			unsigned long pass_ns = 0;
+			double avg = 0.0, var = 0.0;
+			unsigned int n = 0;
+
+			igt_nsec_elapsed(&start);
 
-			igt_nsec_elapsed(&test_start);
 			do {
-				unsigned int target_idle_us, t_busy;
+				unsigned long loop_ns, loop_busy;
+				struct timespec _ts = { };
+				double err, tmp;
+
+				/* PWM idle sleep. */
+				_ts.tv_nsec = target_idle_us * 1000;
+				nanosleep(&_ts, NULL);
 
 				/* Restart the spinbatch. */
 				__rearm_spin_batch(spin);
 				__submit_spin_batch(gem_fd, spin, e, 0);
 
-				/*
-				 * Note that the submission may be delayed to a
-				 * tasklet (ksoftirqd) which cannot run until we
-				 * sleep as we hog the cpu (we are RT).
-				 */
-
-				t_busy = measured_usleep(busy_us);
+				/* PWM busy sleep. */
+				loop_busy = igt_nsec_elapsed(&start);
+				_ts.tv_nsec = busy_us * 1000;
+				nanosleep(&_ts, NULL);
 				igt_spin_batch_end(spin);
-				gem_sync(gem_fd, spin->handle);
-
-				total_busy_ns += t_busy;
-
-				target_idle_us =
-					(100 * total_busy_ns / target_busy_pct - (total_busy_ns + total_idle_ns)) / 1000;
-				total_idle_ns += measured_usleep(target_idle_us);
-			} while (igt_nsec_elapsed(&test_start) < timeout[pass]);
-
-			busy_ns += total_busy_ns;
-			idle_ns += total_idle_ns;
 
-			expected = (double)busy_ns / (busy_ns + idle_ns);
-			igt_info("%u: busy %"PRIu64"us, idle %"PRIu64"us: %.2f%% (target: %lu%%)\n",
+				/* Time accounting. */
+				loop_ns = igt_nsec_elapsed(&start);
+				loop_busy = loop_ns - loop_busy;
+				loop_ns -= pass_ns;
+
+				busy_ns += loop_busy;
+				total_busy_ns += loop_busy;
+				idle_ns += loop_ns - loop_busy;
+				pass_ns += loop_ns;
+				total_ns += loop_ns;
+
+				/* Re-calibrate. */
+				err = (double)total_busy_ns / total_ns -
+				      (double)target_busy_pct / 100.0;
+				target_idle_us = (double)target_idle_us *
+						 (1.0 + err);
+
+				/* Running average and variance for debug. */
+				err = 100.0 * total_busy_ns / total_ns;
+				tmp = avg;
+				avg += (err - avg) / ++n;
+				var += (err - avg) * (err - tmp);
+			} while (pass_ns < timeout[pass]);
+
+			expected = (double)busy_ns / pass_ns;
+			igt_info("%u: busy %"PRIu64"us, idle %"PRIu64"us -> %.2f%% (target: %lu%%; average=%.2f, variance=%f)\n",
 				 pass, busy_ns / 1000, idle_ns / 1000,
-				 100 * expected, target_busy_pct);
+				 100 * expected, target_busy_pct, avg, var / n);
 			write(link[1], &expected, sizeof(expected));
 		}
 
@@ -1649,7 +1646,7 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 	busy_r = (double)(val[1] - val[0]) / (ts[1] - ts[0]);
 
 	igt_info("error=%.2f%% (%.2f%% vs %.2f%%)\n",
-		 __error(busy_r, expected), 100 * busy_r, 100 * expected);
+		 (busy_r - expected) * 100, 100 * busy_r, 100 * expected);
 
 	assert_within(100.0 * busy_r, 100.0 * expected, 2);
 }
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* Re: [igt-dev] ✗ Fi.CI.IGT: failure for tests/perf_pmu: Avoid RT thread for accuracy test (rev3)
  2018-04-03 18:33 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
@ 2018-04-04 11:13   ` Tvrtko Ursulin
  0 siblings, 0 replies; 43+ messages in thread
From: Tvrtko Ursulin @ 2018-04-04 11:13 UTC (permalink / raw)
  To: igt-dev, Patchwork, Tvrtko Ursulin


On 03/04/2018 19:33, Patchwork wrote:
> == Series Details ==
> 
> Series: tests/perf_pmu: Avoid RT thread for accuracy test (rev3)
> URL   : https://patchwork.freedesktop.org/series/40662/
> State : failure

[snip]

> shard-apl        total:3498 pass:1835 dwarn:1   dfail:0   fail:7   skip:1655 time:12922s
> shard-hsw        total:3498 pass:1782 dwarn:1   dfail:0   fail:4   skip:1710 time:11552s
> shard-snb        total:3498 pass:1372 dwarn:1   dfail:0   fail:16  skip:2109 time:7043s
> Blacklisted hosts:
> shard-kbl        total:3498 pass:1960 dwarn:1   dfail:0   fail:7   skip:1530 time:9398s

It all passed with quite stable results here.

One thing which is now visible with added variance reporting (even 
though I forgot to divide it by N), is that KBL suffers from much higher 
variance compared to APL.

APL manages to calibrate the loop to 0.01 - 0.08% variance, while KBL 
only manages 0.15 - 0.53%. (Need to divide by N, but relative comparison 
is fine.)

I don't know what to think since normally I'd expect APL to be more 
jittery. Clock source related messages during boot look similar on both. 
Even though they both complain about unstable clock.

Anyways, this much larger variance is perhaps a clue to explain sporadic 
failures on KBL.

Regards,

Tvrtko
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for tests/perf_pmu: Avoid RT thread for accuracy test (rev4)
  2018-03-26 10:57 ` [Intel-gfx] " Tvrtko Ursulin
                   ` (12 preceding siblings ...)
  (?)
@ 2018-04-04 13:46 ` Patchwork
  -1 siblings, 0 replies; 43+ messages in thread
From: Patchwork @ 2018-04-04 13:46 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: igt-dev

== Series Details ==

Series: tests/perf_pmu: Avoid RT thread for accuracy test (rev4)
URL   : https://patchwork.freedesktop.org/series/40662/
State : success

== Summary ==

IGT patchset tested on top of latest successful build
cad5fc06f954546042a432202cbe7e5a20fe1132 tests/gem_eio: Add reset and unwedge stress testing

with latest DRM-Tip kernel build CI_DRM_4020
4e6fa0d99f8f drm-tip: 2018y-04m-04d-12h-24m-54s UTC integration manifest

No testlist changes.

---- Known issues:

Test kms_chamelium:
        Subgroup dp-edid-read:
                pass       -> FAIL       (fi-kbl-7500u) fdo#102505
Test kms_flip:
        Subgroup basic-flip-vs-wf_vblank:
                pass       -> FAIL       (fi-glk-j4005) fdo#105644
Test kms_frontbuffer_tracking:
        Subgroup basic:
                fail       -> PASS       (fi-cnl-y3) fdo#103167
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                pass       -> INCOMPLETE (fi-snb-2520m) fdo#103713
Test prime_vgem:
        Subgroup basic-fence-flip:
                pass       -> FAIL       (fi-ilk-650) fdo#104008

fdo#102505 https://bugs.freedesktop.org/show_bug.cgi?id=102505
fdo#105644 https://bugs.freedesktop.org/show_bug.cgi?id=105644
fdo#103167 https://bugs.freedesktop.org/show_bug.cgi?id=103167
fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
fdo#104008 https://bugs.freedesktop.org/show_bug.cgi?id=104008

fi-bdw-5557u     total:285  pass:264  dwarn:0   dfail:0   fail:0   skip:21  time:429s
fi-bdw-gvtdvm    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:441s
fi-blb-e6850     total:285  pass:220  dwarn:1   dfail:0   fail:0   skip:64  time:382s
fi-bsw-n3050     total:285  pass:239  dwarn:0   dfail:0   fail:0   skip:46  time:549s
fi-bwr-2160      total:285  pass:180  dwarn:0   dfail:0   fail:0   skip:105 time:298s
fi-bxt-dsi       total:285  pass:255  dwarn:0   dfail:0   fail:0   skip:30  time:521s
fi-bxt-j4205     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:516s
fi-byt-j1900     total:285  pass:250  dwarn:0   dfail:0   fail:0   skip:35  time:524s
fi-byt-n2820     total:285  pass:246  dwarn:0   dfail:0   fail:0   skip:39  time:511s
fi-cfl-8700k     total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:410s
fi-cfl-s3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:561s
fi-cfl-u         total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:509s
fi-cnl-y3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:582s
fi-elk-e7500     total:285  pass:225  dwarn:1   dfail:0   fail:0   skip:59  time:420s
fi-gdg-551       total:285  pass:176  dwarn:0   dfail:0   fail:1   skip:108 time:316s
fi-glk-1         total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:540s
fi-glk-j4005     total:285  pass:255  dwarn:0   dfail:0   fail:1   skip:29  time:486s
fi-hsw-4770      total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:404s
fi-ilk-650       total:285  pass:224  dwarn:0   dfail:0   fail:1   skip:60  time:421s
fi-ivb-3520m     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:462s
fi-ivb-3770      total:285  pass:252  dwarn:0   dfail:0   fail:0   skip:33  time:438s
fi-kbl-7500u     total:285  pass:259  dwarn:1   dfail:0   fail:1   skip:24  time:478s
fi-kbl-7567u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:462s
fi-kbl-r         total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:514s
fi-pnv-d510      total:285  pass:220  dwarn:1   dfail:0   fail:0   skip:64  time:671s
fi-skl-6260u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:442s
fi-skl-6600u     total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:533s
fi-skl-6700k2    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:502s
fi-skl-6770hq    total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:522s
fi-skl-guc       total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:427s
fi-skl-gvtdvm    total:285  pass:262  dwarn:0   dfail:0   fail:0   skip:23  time:447s
fi-snb-2520m     total:242  pass:208  dwarn:0   dfail:0   fail:0   skip:33 
fi-snb-2600      total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:412s
Blacklisted hosts:
fi-cnl-psr       total:285  pass:256  dwarn:3   dfail:0   fail:0   skip:26  time:526s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1223/issues.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [igt-dev] ✓ Fi.CI.IGT: success for tests/perf_pmu: Avoid RT thread for accuracy test (rev4)
  2018-03-26 10:57 ` [Intel-gfx] " Tvrtko Ursulin
                   ` (13 preceding siblings ...)
  (?)
@ 2018-04-04 16:58 ` Patchwork
  -1 siblings, 0 replies; 43+ messages in thread
From: Patchwork @ 2018-04-04 16:58 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: igt-dev

== Series Details ==

Series: tests/perf_pmu: Avoid RT thread for accuracy test (rev4)
URL   : https://patchwork.freedesktop.org/series/40662/
State : success

== Summary ==

---- Possible new issues:

Test gem_exec_parallel:
        Subgroup vebox-contexts:
                fail       -> PASS       (shard-apl)

---- Known issues:

Test kms_flip:
        Subgroup modeset-vs-vblank-race:
                pass       -> FAIL       (shard-apl) fdo#103060 +1
        Subgroup plain-flip-ts-check-interruptible:
                pass       -> FAIL       (shard-hsw) fdo#100368 +1
Test kms_rotation_crc:
        Subgroup sprite-rotation-180:
                pass       -> FAIL       (shard-snb) fdo#103925

fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#103925 https://bugs.freedesktop.org/show_bug.cgi?id=103925

shard-apl        total:3498 pass:1833 dwarn:1   dfail:0   fail:8   skip:1655 time:12872s
shard-hsw        total:3498 pass:1782 dwarn:1   dfail:0   fail:4   skip:1710 time:11559s
shard-snb        total:3498 pass:1376 dwarn:1   dfail:0   fail:3   skip:2118 time:7073s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1223/shards.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH i-g-t v4] tests/perf_pmu: Avoid RT thread for accuracy test
  2018-04-04  9:51       ` [Intel-gfx] " Tvrtko Ursulin
@ 2018-04-11 13:23         ` Chris Wilson
  -1 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2018-04-11 13:23 UTC (permalink / raw)
  To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx

Quoting Tvrtko Ursulin (2018-04-04 10:51:52)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Realtime scheduling interferes with execlists submission (tasklet) so try
> to simplify the PWM loop in a few ways:
> 
>  * Drop RT.
>  * Longer batches for smaller systematic error.
>  * More truthful test duration calculation.
>  * Less clock queries.
>  * No self-adjust - instead just report the achieved cycle and let the
>    parent check against it.
>  * Report absolute cycle error.
> 
> v2:
>  * Bring back self-adjust. (Chris Wilson)
>    (But slightly fixed version with no overflow.)
> 
> v3:
>  * Log average and mean calibration for each pass.
> 
> v4:
>  * Eliminate development leftovers.
>  * Fix variance logging.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

From a pragmatic point of view, there's no point waiting for me to be
happy with the convergence if CI is, and the variance will definitely be
interesting (although you could have used igt_mean to compute the
iterative variance), so

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [igt-dev] [Intel-gfx] [PATCH i-g-t v4] tests/perf_pmu: Avoid RT thread for accuracy test
@ 2018-04-11 13:23         ` Chris Wilson
  0 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2018-04-11 13:23 UTC (permalink / raw)
  To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx

Quoting Tvrtko Ursulin (2018-04-04 10:51:52)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Realtime scheduling interferes with execlists submission (tasklet) so try
> to simplify the PWM loop in a few ways:
> 
>  * Drop RT.
>  * Longer batches for smaller systematic error.
>  * More truthful test duration calculation.
>  * Less clock queries.
>  * No self-adjust - instead just report the achieved cycle and let the
>    parent check against it.
>  * Report absolute cycle error.
> 
> v2:
>  * Bring back self-adjust. (Chris Wilson)
>    (But slightly fixed version with no overflow.)
> 
> v3:
>  * Log average and mean calibration for each pass.
> 
> v4:
>  * Eliminate development leftovers.
>  * Fix variance logging.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

From a pragmatic point of view, there's no point waiting for me to be
happy with the convergence if CI is, and the variance will definitely be
interesting (although you could have used igt_mean to compute the
iterative variance), so

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH i-g-t v4] tests/perf_pmu: Avoid RT thread for accuracy test
  2018-04-11 13:23         ` [igt-dev] [Intel-gfx] " Chris Wilson
@ 2018-04-11 13:52           ` Tvrtko Ursulin
  -1 siblings, 0 replies; 43+ messages in thread
From: Tvrtko Ursulin @ 2018-04-11 13:52 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx


On 11/04/2018 14:23, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-04-04 10:51:52)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Realtime scheduling interferes with execlists submission (tasklet) so try
>> to simplify the PWM loop in a few ways:
>>
>>   * Drop RT.
>>   * Longer batches for smaller systematic error.
>>   * More truthful test duration calculation.
>>   * Less clock queries.
>>   * No self-adjust - instead just report the achieved cycle and let the
>>     parent check against it.
>>   * Report absolute cycle error.
>>
>> v2:
>>   * Bring back self-adjust. (Chris Wilson)
>>     (But slightly fixed version with no overflow.)
>>
>> v3:
>>   * Log average and mean calibration for each pass.
>>
>> v4:
>>   * Eliminate development leftovers.
>>   * Fix variance logging.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
>  From a pragmatic point of view, there's no point waiting for me to be
> happy with the convergence if CI is, and the variance will definitely be
> interesting (although you could have used igt_mean to compute the
> iterative variance), so
> 
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Thanks, I've pushed it and so we'll see.

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [igt-dev] [Intel-gfx] [PATCH i-g-t v4] tests/perf_pmu: Avoid RT thread for accuracy test
@ 2018-04-11 13:52           ` Tvrtko Ursulin
  0 siblings, 0 replies; 43+ messages in thread
From: Tvrtko Ursulin @ 2018-04-11 13:52 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx


On 11/04/2018 14:23, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-04-04 10:51:52)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Realtime scheduling interferes with execlists submission (tasklet) so try
>> to simplify the PWM loop in a few ways:
>>
>>   * Drop RT.
>>   * Longer batches for smaller systematic error.
>>   * More truthful test duration calculation.
>>   * Less clock queries.
>>   * No self-adjust - instead just report the achieved cycle and let the
>>     parent check against it.
>>   * Report absolute cycle error.
>>
>> v2:
>>   * Bring back self-adjust. (Chris Wilson)
>>     (But slightly fixed version with no overflow.)
>>
>> v3:
>>   * Log average and mean calibration for each pass.
>>
>> v4:
>>   * Eliminate development leftovers.
>>   * Fix variance logging.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
>  From a pragmatic point of view, there's no point waiting for me to be
> happy with the convergence if CI is, and the variance will definitely be
> interesting (although you could have used igt_mean to compute the
> iterative variance), so
> 
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Thanks, I've pushed it and so we'll see.

Regards,

Tvrtko

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH i-g-t v4] tests/perf_pmu: Avoid RT thread for accuracy test
  2018-04-11 13:52           ` [igt-dev] [Intel-gfx] " Tvrtko Ursulin
@ 2018-04-14 11:35             ` Chris Wilson
  -1 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2018-04-14 11:35 UTC (permalink / raw)
  To: Tvrtko Ursulin, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx

Quoting Tvrtko Ursulin (2018-04-11 14:52:36)
> 
> On 11/04/2018 14:23, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-04-04 10:51:52)
> >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >> Realtime scheduling interferes with execlists submission (tasklet) so try
> >> to simplify the PWM loop in a few ways:
> >>
> >>   * Drop RT.
> >>   * Longer batches for smaller systematic error.
> >>   * More truthful test duration calculation.
> >>   * Less clock queries.
> >>   * No self-adjust - instead just report the achieved cycle and let the
> >>     parent check against it.
> >>   * Report absolute cycle error.
> >>
> >> v2:
> >>   * Bring back self-adjust. (Chris Wilson)
> >>     (But slightly fixed version with no overflow.)
> >>
> >> v3:
> >>   * Log average and mean calibration for each pass.
> >>
> >> v4:
> >>   * Eliminate development leftovers.
> >>   * Fix variance logging.
> >>
> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > 
> >  From a pragmatic point of view, there's no point waiting for me to be
> > happy with the convergence if CI is, and the variance will definitely be
> > interesting (although you could have used igt_mean to compute the
> > iterative variance), so
> > 
> > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Thanks, I've pushed it and so we'll see.

We should resurrect the RT variant in the near future. It's definitely
an issue in our driver that random userspace can impact execution of
unconnected others. (Handling RT starvation of workers is something we
have to be aware of elsewhere, commonly hits oom if we don't have an
escape clause.) Lots of words just to say, we should add a test for RT
to exercise the bad behaviour. Hmm, doesn't need to be pmu, just we need
an assertion that execution latency is bounded and no RT hog will delay
it.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [igt-dev] [Intel-gfx] [PATCH i-g-t v4] tests/perf_pmu: Avoid RT thread for accuracy test
@ 2018-04-14 11:35             ` Chris Wilson
  0 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2018-04-14 11:35 UTC (permalink / raw)
  To: Tvrtko Ursulin, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx

Quoting Tvrtko Ursulin (2018-04-11 14:52:36)
> 
> On 11/04/2018 14:23, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-04-04 10:51:52)
> >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >> Realtime scheduling interferes with execlists submission (tasklet) so try
> >> to simplify the PWM loop in a few ways:
> >>
> >>   * Drop RT.
> >>   * Longer batches for smaller systematic error.
> >>   * More truthful test duration calculation.
> >>   * Less clock queries.
> >>   * No self-adjust - instead just report the achieved cycle and let the
> >>     parent check against it.
> >>   * Report absolute cycle error.
> >>
> >> v2:
> >>   * Bring back self-adjust. (Chris Wilson)
> >>     (But slightly fixed version with no overflow.)
> >>
> >> v3:
> >>   * Log average and mean calibration for each pass.
> >>
> >> v4:
> >>   * Eliminate development leftovers.
> >>   * Fix variance logging.
> >>
> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > 
> >  From a pragmatic point of view, there's no point waiting for me to be
> > happy with the convergence if CI is, and the variance will definitely be
> > interesting (although you could have used igt_mean to compute the
> > iterative variance), so
> > 
> > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Thanks, I've pushed it and so we'll see.

We should resurrect the RT variant in the near future. It's definitely
an issue in our driver that random userspace can impact execution of
unconnected others. (Handling RT starvation of workers is something we
have to be aware of elsewhere, commonly hits oom if we don't have an
escape clause.) Lots of words just to say, we should add a test for RT
to exercise the bad behaviour. Hmm, doesn't need to be pmu, just we need
an assertion that execution latency is bounded and no RT hog will delay
it.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH i-g-t v4] tests/perf_pmu: Avoid RT thread for accuracy test
  2018-04-14 11:35             ` [igt-dev] [Intel-gfx] " Chris Wilson
@ 2018-04-16  9:55               ` Tvrtko Ursulin
  -1 siblings, 0 replies; 43+ messages in thread
From: Tvrtko Ursulin @ 2018-04-16  9:55 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx


On 14/04/2018 12:35, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-04-11 14:52:36)
>>
>> On 11/04/2018 14:23, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2018-04-04 10:51:52)
>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>
>>>> Realtime scheduling interferes with execlists submission (tasklet) so try
>>>> to simplify the PWM loop in a few ways:
>>>>
>>>>    * Drop RT.
>>>>    * Longer batches for smaller systematic error.
>>>>    * More truthful test duration calculation.
>>>>    * Less clock queries.
>>>>    * No self-adjust - instead just report the achieved cycle and let the
>>>>      parent check against it.
>>>>    * Report absolute cycle error.
>>>>
>>>> v2:
>>>>    * Bring back self-adjust. (Chris Wilson)
>>>>      (But slightly fixed version with no overflow.)
>>>>
>>>> v3:
>>>>    * Log average and mean calibration for each pass.
>>>>
>>>> v4:
>>>>    * Eliminate development leftovers.
>>>>    * Fix variance logging.
>>>>
>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>
>>>   From a pragmatic point of view, there's no point waiting for me to be
>>> happy with the convergence if CI is, and the variance will definitely be
>>> interesting (although you could have used igt_mean to compute the
>>> iterative variance), so
>>>
>>> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
>>
>> Thanks, I've pushed it and so we'll see.
> 
> We should resurrect the RT variant in the near future. It's definitely
> an issue in our driver that random userspace can impact execution of
> unconnected others. (Handling RT starvation of workers is something we
> have to be aware of elsewhere, commonly hits oom if we don't have an
> escape clause.) Lots of words just to say, we should add a test for RT
> to exercise the bad behaviour. Hmm, doesn't need to be pmu, just we need
> an assertion that execution latency is bounded and no RT hog will delay
> it.

Agreed, I can add a simple test to gem_exec_latency.

But with regards on how to fix this - re-enabling direct submission 
sounds simplest (not only indirect via tasklet) in theory although I do 
remember you were raising some issues with this route last time I 
mentioned it. It does sound like a conceptually correct thing to do.

As an alternative we could explore conversion effort and resulting 
latencies from conversion to threaded irq handler.

You also had a patch to improve tasklet scheduling in some cases now I 
remember. We can try that after I write the test as well. Although I 
have no idea how hard of a sell that would be.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [igt-dev] [Intel-gfx] [PATCH i-g-t v4] tests/perf_pmu: Avoid RT thread for accuracy test
@ 2018-04-16  9:55               ` Tvrtko Ursulin
  0 siblings, 0 replies; 43+ messages in thread
From: Tvrtko Ursulin @ 2018-04-16  9:55 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx


On 14/04/2018 12:35, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-04-11 14:52:36)
>>
>> On 11/04/2018 14:23, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2018-04-04 10:51:52)
>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>
>>>> Realtime scheduling interferes with execlists submission (tasklet) so try
>>>> to simplify the PWM loop in a few ways:
>>>>
>>>>    * Drop RT.
>>>>    * Longer batches for smaller systematic error.
>>>>    * More truthful test duration calculation.
>>>>    * Less clock queries.
>>>>    * No self-adjust - instead just report the achieved cycle and let the
>>>>      parent check against it.
>>>>    * Report absolute cycle error.
>>>>
>>>> v2:
>>>>    * Bring back self-adjust. (Chris Wilson)
>>>>      (But slightly fixed version with no overflow.)
>>>>
>>>> v3:
>>>>    * Log average and mean calibration for each pass.
>>>>
>>>> v4:
>>>>    * Eliminate development leftovers.
>>>>    * Fix variance logging.
>>>>
>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>
>>>   From a pragmatic point of view, there's no point waiting for me to be
>>> happy with the convergence if CI is, and the variance will definitely be
>>> interesting (although you could have used igt_mean to compute the
>>> iterative variance), so
>>>
>>> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
>>
>> Thanks, I've pushed it and so we'll see.
> 
> We should resurrect the RT variant in the near future. It's definitely
> an issue in our driver that random userspace can impact execution of
> unconnected others. (Handling RT starvation of workers is something we
> have to be aware of elsewhere, commonly hits oom if we don't have an
> escape clause.) Lots of words just to say, we should add a test for RT
> to exercise the bad behaviour. Hmm, doesn't need to be pmu, just we need
> an assertion that execution latency is bounded and no RT hog will delay
> it.

Agreed, I can add a simple test to gem_exec_latency.

But with regards on how to fix this - re-enabling direct submission 
sounds simplest (not only indirect via tasklet) in theory although I do 
remember you were raising some issues with this route last time I 
mentioned it. It does sound like a conceptually correct thing to do.

As an alternative we could explore conversion effort and resulting 
latencies from conversion to threaded irq handler.

You also had a patch to improve tasklet scheduling in some cases now I 
remember. We can try that after I write the test as well. Although I 
have no idea how hard of a sell that would be.

Regards,

Tvrtko
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH i-g-t v4] tests/perf_pmu: Avoid RT thread for accuracy test
  2018-04-16  9:55               ` [igt-dev] [Intel-gfx] " Tvrtko Ursulin
@ 2018-04-16 10:08                 ` Chris Wilson
  -1 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2018-04-16 10:08 UTC (permalink / raw)
  To: Tvrtko Ursulin, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx

Quoting Tvrtko Ursulin (2018-04-16 10:55:29)
> 
> On 14/04/2018 12:35, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-04-11 14:52:36)
> >>
> >> On 11/04/2018 14:23, Chris Wilson wrote:
> >>> Quoting Tvrtko Ursulin (2018-04-04 10:51:52)
> >>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>>
> >>>> Realtime scheduling interferes with execlists submission (tasklet) so try
> >>>> to simplify the PWM loop in a few ways:
> >>>>
> >>>>    * Drop RT.
> >>>>    * Longer batches for smaller systematic error.
> >>>>    * More truthful test duration calculation.
> >>>>    * Less clock queries.
> >>>>    * No self-adjust - instead just report the achieved cycle and let the
> >>>>      parent check against it.
> >>>>    * Report absolute cycle error.
> >>>>
> >>>> v2:
> >>>>    * Bring back self-adjust. (Chris Wilson)
> >>>>      (But slightly fixed version with no overflow.)
> >>>>
> >>>> v3:
> >>>>    * Log average and mean calibration for each pass.
> >>>>
> >>>> v4:
> >>>>    * Eliminate development leftovers.
> >>>>    * Fix variance logging.
> >>>>
> >>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>
> >>>   From a pragmatic point of view, there's no point waiting for me to be
> >>> happy with the convergence if CI is, and the variance will definitely be
> >>> interesting (although you could have used igt_mean to compute the
> >>> iterative variance), so
> >>>
> >>> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>
> >> Thanks, I've pushed it and so we'll see.
> > 
> > We should resurrect the RT variant in the near future. It's definitely
> > an issue in our driver that random userspace can impact execution of
> > unconnected others. (Handling RT starvation of workers is something we
> > have to be aware of elsewhere, commonly hits oom if we don't have an
> > escape clause.) Lots of words just to say, we should add a test for RT
> > to exercise the bad behaviour. Hmm, doesn't need to be pmu, just we need
> > an assertion that execution latency is bounded and no RT hog will delay
> > it.
> 
> Agreed, I can add a simple test to gem_exec_latency.
> 
> But with regards on how to fix this - re-enabling direct submission 
> sounds simplest (not only indirect via tasklet) in theory although I do 
> remember you were raising some issues with this route last time I 
> mentioned it. It does sound like a conceptually correct thing to do.

The problem comes down to that we want direct submission from the irq
handler, which the tasklet solves very nicely for us (most of the time).
Finding an alternative hook other than irq_exit() is the challenge,
irq_work might be acceptable.
 
> As an alternative we could explore conversion effort and resulting 
> latencies from conversion to threaded irq handler.

* shivers

Then we have at least consistently bad latency ;) And the sysadmin can
decide how to prioritise, boo.
 
> You also had a patch to improve tasklet scheduling in some cases now I 
> remember. We can try that after I write the test as well. Although I 
> have no idea how hard of a sell that would be.

I think the next plan for upstream tasklets is to try and avoid having
one vector influence the ksoftirqd latency of another. However, that
doesn't solve it for us, where it's likely we've consumed the tasklet
timeslice and so will still be deferred onto ksoftirqd. (It just solves
the case of netdev forcing us to ksoftirqd along with itself.) The hack
I use on top of that to always do at least one immediate execution of
HISOFTIRQ boils down to why just allow that special case, to which there
is no good answer.

Hmm, irq_work, my only concern is if it is run with irqs disabled. We
could live without, but that's an alarmingly big chunk of code.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [igt-dev] [Intel-gfx] [PATCH i-g-t v4] tests/perf_pmu: Avoid RT thread for accuracy test
@ 2018-04-16 10:08                 ` Chris Wilson
  0 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2018-04-16 10:08 UTC (permalink / raw)
  To: Tvrtko Ursulin, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx

Quoting Tvrtko Ursulin (2018-04-16 10:55:29)
> 
> On 14/04/2018 12:35, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-04-11 14:52:36)
> >>
> >> On 11/04/2018 14:23, Chris Wilson wrote:
> >>> Quoting Tvrtko Ursulin (2018-04-04 10:51:52)
> >>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>>
> >>>> Realtime scheduling interferes with execlists submission (tasklet) so try
> >>>> to simplify the PWM loop in a few ways:
> >>>>
> >>>>    * Drop RT.
> >>>>    * Longer batches for smaller systematic error.
> >>>>    * More truthful test duration calculation.
> >>>>    * Less clock queries.
> >>>>    * No self-adjust - instead just report the achieved cycle and let the
> >>>>      parent check against it.
> >>>>    * Report absolute cycle error.
> >>>>
> >>>> v2:
> >>>>    * Bring back self-adjust. (Chris Wilson)
> >>>>      (But slightly fixed version with no overflow.)
> >>>>
> >>>> v3:
> >>>>    * Log average and mean calibration for each pass.
> >>>>
> >>>> v4:
> >>>>    * Eliminate development leftovers.
> >>>>    * Fix variance logging.
> >>>>
> >>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>
> >>>   From a pragmatic point of view, there's no point waiting for me to be
> >>> happy with the convergence if CI is, and the variance will definitely be
> >>> interesting (although you could have used igt_mean to compute the
> >>> iterative variance), so
> >>>
> >>> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>
> >> Thanks, I've pushed it and so we'll see.
> > 
> > We should resurrect the RT variant in the near future. It's definitely
> > an issue in our driver that random userspace can impact execution of
> > unconnected others. (Handling RT starvation of workers is something we
> > have to be aware of elsewhere, commonly hits oom if we don't have an
> > escape clause.) Lots of words just to say, we should add a test for RT
> > to exercise the bad behaviour. Hmm, doesn't need to be pmu, just we need
> > an assertion that execution latency is bounded and no RT hog will delay
> > it.
> 
> Agreed, I can add a simple test to gem_exec_latency.
> 
> But with regards on how to fix this - re-enabling direct submission 
> sounds simplest (not only indirect via tasklet) in theory although I do 
> remember you were raising some issues with this route last time I 
> mentioned it. It does sound like a conceptually correct thing to do.

The problem comes down to that we want direct submission from the irq
handler, which the tasklet solves very nicely for us (most of the time).
Finding an alternative hook other than irq_exit() is the challenge,
irq_work might be acceptable.
 
> As an alternative we could explore conversion effort and resulting 
> latencies from conversion to threaded irq handler.

* shivers

Then we have at least consistently bad latency ;) And the sysadmin can
decide how to prioritise, boo.
 
> You also had a patch to improve tasklet scheduling in some cases now I 
> remember. We can try that after I write the test as well. Although I 
> have no idea how hard of a sell that would be.

I think the next plan for upstream tasklets is to try and avoid having
one vector influence the ksoftirqd latency of another. However, that
doesn't solve it for us, where it's likely we've consumed the tasklet
timeslice and so will still be deferred onto ksoftirqd. (It just solves
the case of netdev forcing us to ksoftirqd along with itself.) The hack
I use on top of that to always do at least one immediate execution of
HISOFTIRQ boils down to why just allow that special case, to which there
is no good answer.

Hmm, irq_work, my only concern is if it is run with irqs disabled. We
could live without, but that's an alarmingly big chunk of code.
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 43+ messages in thread

end of thread, other threads:[~2018-04-16 10:08 UTC | newest]

Thread overview: 43+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-26 10:57 [CI i-g-t] tests/perf_pmu: Avoid RT thread for accuracy test Tvrtko Ursulin
2018-03-26 10:57 ` [Intel-gfx] " Tvrtko Ursulin
2018-03-26 11:17 ` [igt-dev] " Chris Wilson
2018-03-26 11:17   ` Chris Wilson
2018-03-26 12:40   ` Tvrtko Ursulin
2018-03-26 12:40     ` [igt-dev] [Intel-gfx] " Tvrtko Ursulin
2018-03-26 11:23 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
2018-03-26 13:04 ` [igt-dev] ✗ Fi.CI.IGT: warning " Patchwork
2018-03-27 14:31 ` [igt-dev] ✗ Fi.CI.BAT: failure " Patchwork
2018-03-27 17:08 ` [igt-dev] ✗ Fi.CI.BAT: warning " Patchwork
2018-03-28  9:22 ` [igt-dev] ✓ Fi.CI.BAT: success " Patchwork
2018-03-28 14:36 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
2018-03-28 16:56   ` Tvrtko Ursulin
2018-03-28 17:10     ` Chris Wilson
2018-04-03 12:38 ` [PATCH i-g-t v2] " Tvrtko Ursulin
2018-04-03 12:38   ` [Intel-gfx] " Tvrtko Ursulin
2018-04-03 13:10   ` Chris Wilson
2018-04-03 13:10     ` [igt-dev] [Intel-gfx] " Chris Wilson
2018-04-03 16:09     ` Tvrtko Ursulin
2018-04-03 16:09       ` [Intel-gfx] " Tvrtko Ursulin
2018-04-03 16:24       ` Chris Wilson
2018-04-03 16:24         ` [igt-dev] [Intel-gfx] " Chris Wilson
2018-04-03 16:39   ` [PATCH i-g-t v3] " Tvrtko Ursulin
2018-04-03 16:39     ` [igt-dev] " Tvrtko Ursulin
2018-04-04  9:51     ` [PATCH i-g-t v4] " Tvrtko Ursulin
2018-04-04  9:51       ` [Intel-gfx] " Tvrtko Ursulin
2018-04-11 13:23       ` Chris Wilson
2018-04-11 13:23         ` [igt-dev] [Intel-gfx] " Chris Wilson
2018-04-11 13:52         ` Tvrtko Ursulin
2018-04-11 13:52           ` [igt-dev] [Intel-gfx] " Tvrtko Ursulin
2018-04-14 11:35           ` Chris Wilson
2018-04-14 11:35             ` [igt-dev] [Intel-gfx] " Chris Wilson
2018-04-16  9:55             ` Tvrtko Ursulin
2018-04-16  9:55               ` [igt-dev] [Intel-gfx] " Tvrtko Ursulin
2018-04-16 10:08               ` Chris Wilson
2018-04-16 10:08                 ` [igt-dev] [Intel-gfx] " Chris Wilson
2018-04-03 14:23 ` [igt-dev] ✓ Fi.CI.BAT: success for tests/perf_pmu: Avoid RT thread for accuracy test (rev2) Patchwork
2018-04-03 16:41 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
2018-04-03 17:15 ` [igt-dev] ✓ Fi.CI.BAT: success for tests/perf_pmu: Avoid RT thread for accuracy test (rev3) Patchwork
2018-04-03 18:33 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
2018-04-04 11:13   ` Tvrtko Ursulin
2018-04-04 13:46 ` [igt-dev] ✓ Fi.CI.BAT: success for tests/perf_pmu: Avoid RT thread for accuracy test (rev4) Patchwork
2018-04-04 16:58 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.