[PATCH kvm-unit-tests 13/13] x86: hyperv-v: Rewrite flaky hv_clock_test()

From: Vitaly Kuznetsov <vkuznets@redhat.com>
To: kvm@vger.kernel.org, Paolo Bonzini <pbonzini@redhat.com>
Cc: Metin Kaya <metikaya@amazon.com>
Subject: [PATCH kvm-unit-tests 13/13] x86: hyperv-v: Rewrite flaky hv_clock_test()
Date: Wed,  6 Mar 2024 18:18:23 +0100	[thread overview]
Message-ID: <20240306171823.761647-14-vkuznets@redhat.com> (raw)
In-Reply-To: <20240306171823.761647-1-vkuznets@redhat.com>

hv_clock_test() is reported to be flaky:
https://bugzilla.kernel.org/show_bug.cgi?id=217516

The test tries measuring the divergence between MSR based clock and TSC
page over one second and then expects delta to stay within the measured
range over another two seconds. This works well for a completely idle
system but if tasks get scheduled out, rescheduled to a different CPU,...
the test fails. Widening the expected range helps to certain extent but
even when the expected delta is "max_delta * 1024" sporadic failures still
occur.

Rewrite the test completely to make it stable. Check two things:
- MSR based and TSC page clocksources never go backwards.
- MSR based clocksource read between to TSC page reads stays within the
interval.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 x86/hyperv_clock.c | 55 +++++++++++++++++++++++++++-------------------
 1 file changed, 33 insertions(+), 22 deletions(-)

diff --git a/x86/hyperv_clock.c b/x86/hyperv_clock.c
index d0993bb75ac7..9061da8c6d2c 100644
--- a/x86/hyperv_clock.c
+++ b/x86/hyperv_clock.c
@@ -64,40 +64,51 @@ uint64_t loops[MAX_CPU];
 static void hv_clock_test(void *data)
 {
 	int i = (long)data;
-	uint64_t t = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
-	uint64_t end = t + 3 * TICKS_PER_SEC;
-	uint64_t msr_sample = t + TICKS_PER_SEC;
-	int min_delta = 123456, max_delta = -123456;
+	uint64_t t_msr_prev = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+	uint64_t t_page_prev = hv_clock_read();
+	uint64_t end = t_page_prev + TICKS_PER_SEC;
 	bool got_drift = false;
-	bool got_warp = false;
+	bool got_warp_msr = false;
+	bool got_warp_page = false;
 
 	ok[i] = true;
 	do {
-		uint64_t now = hv_clock_read();
-		int delta = rdmsr(HV_X64_MSR_TIME_REF_COUNT) - now;
-
-		min_delta = delta < min_delta ? delta : min_delta;
-		if (t < msr_sample) {
-			max_delta = delta > max_delta ? delta: max_delta;
-		} else if (delta < 0 || delta > max_delta * 3 / 2) {
-			printf("suspecting drift on CPU %d? delta = %d, acceptable [0, %d)\n", i,
-			       delta, max_delta);
+		uint64_t t_page_1, t_page_2, t_msr;
+
+		t_page_1 = hv_clock_read();
+		barrier();
+		t_msr = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+		barrier();
+		t_page_2 = hv_clock_read();
+
+		if (!got_drift && (t_msr < t_page_1 || t_msr > t_page_2)) {
+			printf("drift on CPU %d, MSR value = %ld, acceptable [%ld, %ld]\n", i,
+			       t_msr, t_page_1, t_page_2);
 			ok[i] = false;
 			got_drift = true;
-			max_delta *= 2;
 		}
 
-		if (now < t && !got_warp) {
-			printf("warp on CPU %d!\n", i);
+		if (!got_warp_msr && t_msr < t_msr_prev) {
+			printf("warp on CPU %d, MSR value = %ld prev MSR value = %ld!\n", i,
+			       t_msr, t_msr_prev);
 			ok[i] = false;
-			got_warp = true;
+			got_warp_msr = true;
 			break;
 		}
-		t = now;
-	} while(t < end);
 
-	if (!got_drift)
-		printf("delta on CPU %d was %d...%d\n", i, min_delta, max_delta);
+		if (!got_warp_page && t_page_1 < t_page_prev) {
+			printf("warp on CPU %d, TSC page value = %ld prev TSC page value = %ld!\n", i,
+			       t_page_1, t_page_prev);
+			ok[i] = false;
+			got_warp_page = true;
+			break;
+		}
+
+		t_page_prev = t_page_1;
+		t_msr_prev = t_msr;
+
+	} while(t_page_prev < end);
+
 	barrier();
 }
 
-- 
2.44.0