All of lore.kernel.org
 help / color / mirror / Atom feed
* [RHEL 8.3 BZ 1768622 PATCH 3/3] KVM: x86: use raw clock values consistently
       [not found] <20200518191516.165550666@fuller.cnet>
@ 2020-05-18 19:15 ` Marcelo Tosatti
  2020-05-19  8:58   ` Frantisek Hrbata
  0 siblings, 1 reply; 4+ messages in thread
From: Marcelo Tosatti @ 2020-05-18 19:15 UTC (permalink / raw)
  To: rhkernel-list
  Cc: Paolo Bonzini, Andrew Jones, Bandan Das, Igor Mammedov,
	Vitaly Kuznetsov, stable

BZ: 1768622 
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=28587593
commit 8171cd68806bd2fc28ef688e32fb2a3b3deb04e5

Commit 53fafdbb8b21f ("KVM: x86: switch KVMCLOCK base to monotonic raw
clock") changed kvmclock to use tkr_raw instead of tkr_mono.  However,
the default kvmclock_offset for the VM was still based on the monotonic
clock and, if the raw clock drifted enough from the monotonic clock,
this could cause a negative system_time to be written to the guest's
struct pvclock.  RHEL5 does not like it and (if it boots fast enough to
observe a negative time value) it hangs.

There is another thing to be careful about: getboottime64 returns the
host boot time with tkr_mono frequency, and subtracting the tkr_raw-based
kvmclock value will cause the wallclock to be off if tkr_raw drifts
from tkr_mono.  To avoid this, compute the wallclock delta from the
current time instead of being clever and using getboottime64.

Fixes: 53fafdbb8b21f ("KVM: x86: switch KVMCLOCK base to monotonic raw clock")
Cc: stable@vger.kernel.org
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Index: kernel-rhel/arch/x86/kvm/x86.c
===================================================================
--- kernel-rhel.orig/arch/x86/kvm/x86.c
+++ kernel-rhel/arch/x86/kvm/x86.c
@@ -1595,6 +1595,18 @@ static void update_pvclock_gtod(struct t
 
 	write_seqcount_end(&vdata->seq);
 }
+
+static s64 get_kvmclock_base_ns(void)
+{
+	/* Count up from boot time, but with the frequency of the raw clock.  */
+	return ktime_to_ns(ktime_add(ktime_get_raw(), pvclock_gtod_data.offs_boot));
+}
+#else
+static s64 get_kvmclock_base_ns(void)
+{
+	/* Master clock not used, so we can just use CLOCK_BOOTTIME.  */
+	return ktime_get_boottime_ns();
+}
 #endif
 
 void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
@@ -1608,7 +1620,7 @@ static void kvm_write_wall_clock(struct
 	int version;
 	int r;
 	struct pvclock_wall_clock wc;
-	struct timespec64 boot;
+	u64 wall_nsec;
 
 	if (!wall_clock)
 		return;
@@ -1628,17 +1640,12 @@ static void kvm_write_wall_clock(struct
 	/*
 	 * The guest calculates current wall clock time by adding
 	 * system time (updated by kvm_guest_time_update below) to the
-	 * wall clock specified here.  guest system time equals host
-	 * system time for us, thus we must fill in host boot time here.
+	 * wall clock specified here.  We do the reverse here.
 	 */
-	getboottime64(&boot);
+	wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm);
 
-	if (kvm->arch.kvmclock_offset) {
-		struct timespec64 ts = ns_to_timespec64(kvm->arch.kvmclock_offset);
-		boot = timespec64_sub(boot, ts);
-	}
-	wc.sec = (u32)boot.tv_sec; /* overflow in 2106 guest time */
-	wc.nsec = boot.tv_nsec;
+	wc.nsec = do_div(wall_nsec, 1000000000);
+	wc.sec = (u32)wall_nsec; /* overflow in 2106 guest time */
 	wc.version = version;
 
 	kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
@@ -1886,7 +1893,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu
 
 	raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
 	offset = kvm_compute_tsc_offset(vcpu, data);
-	ns = ktime_get_boot_ns();
+	ns = get_kvmclock_base_ns();
 	elapsed = ns - kvm->arch.last_tsc_nsec;
 
 	if (vcpu->arch.virtual_tsc_khz) {
@@ -2224,7 +2231,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
 	spin_lock(&ka->pvclock_gtod_sync_lock);
 	if (!ka->use_master_clock) {
 		spin_unlock(&ka->pvclock_gtod_sync_lock);
-		return ktime_get_boot_ns() + ka->kvmclock_offset;
+		return get_kvmclock_base_ns() + ka->kvmclock_offset;
 	}
 
 	hv_clock.tsc_timestamp = ka->master_cycle_now;
@@ -2240,7 +2247,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
 				   &hv_clock.tsc_to_system_mul);
 		ret = __pvclock_read_cycles(&hv_clock, rdtsc());
 	} else
-		ret = ktime_get_boot_ns() + ka->kvmclock_offset;
+		ret = get_kvmclock_base_ns() + ka->kvmclock_offset;
 
 	put_cpu();
 
@@ -2339,7 +2346,7 @@ static int kvm_guest_time_update(struct
 	}
 	if (!use_master_clock) {
 		host_tsc = rdtsc();
-		kernel_ns = ktime_get_boot_ns();
+		kernel_ns = get_kvmclock_base_ns();
 	}
 
 	tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
@@ -2379,6 +2386,7 @@ static int kvm_guest_time_update(struct
 	vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
 	vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
 	vcpu->last_guest_tsc = tsc_timestamp;
+	WARN_ON(vcpu->hv_clock.system_time < 0);
 
 	/* If the host uses TSC clocksource, then it is stable */
 	pvclock_flags = 0;
@@ -9486,7 +9494,7 @@ int kvm_arch_init_vm(struct kvm *kvm, un
 	mutex_init(&kvm->arch.apic_map_lock);
 	spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
 
-	kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
+	kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
 	pvclock_update_vm_gtod_copy(kvm);
 
 	kvm->arch.guest_can_read_msr_platform_info = true;



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RHEL 8.3 BZ 1768622 PATCH 3/3] KVM: x86: use raw clock values consistently
  2020-05-18 19:15 ` [RHEL 8.3 BZ 1768622 PATCH 3/3] KVM: x86: use raw clock values consistently Marcelo Tosatti
@ 2020-05-19  8:58   ` Frantisek Hrbata
  2020-05-19  9:18     ` Greg KH
  0 siblings, 1 reply; 4+ messages in thread
From: Frantisek Hrbata @ 2020-05-19  8:58 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: rhkernel-list, Andrew Jones, stable, Bandan Das, Igor Mammedov,
	Paolo Bonzini, Vitaly Kuznetsov

Hi Marcelo,

I'm marking this as superseded by pwid 303977

http://patchwork.lab.bos.redhat.com/patch/303977/

which was included in kernel-4.18.0-196.el8

    commit 9751522d92195bc64883c71e2bee8ed0fcbc5007
    Author: Vitaly Kuznetsov <vkuznets@redhat.com>
    Date:   Mon Apr 20 15:20:04 2020 -0400

    [x86] kvm: x86: use raw clock values consistently

    Message-id: <20200420152004.933168-1-vkuznets@redhat.com>
    Patchwork-id: 303977
    Patchwork-instance: patchwork
    O-Subject: [RHEL8.3 virt PATCH v2 312/614] KVM: x86: use raw clock values consistently
    Bugzilla: 1813987
    RH-Acked-by: Andrew Jones <drjones@redhat.com>
    RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
    RH-Acked-by: Tony Camuso <tcamuso@redhat.com>

Note there is a difference between yours and Vitaly's patch in the
following hunk

+@@ -1656,6 +1656,18 @@ static void update_pvclock_gtod(struct timekeeper *tk)

        write_seqcount_end(&vdata->seq);
  }
@@ -14,12 +16,12 @@
 +static s64 get_kvmclock_base_ns(void)
 +{
 +      /* Master clock not used, so we can just use CLOCK_BOOTTIME.  */
-+      return ktime_get_boottime_ns();
++      return ktime_get_boot_ns();
 +}
  #endif


This is in !CONFIG_X86_64 path, so I'm not sure how much we care about
this. Anyway Vitaly's patch corrects this, because
rhel does not have upstream commit 9285ec4c8b61d4930a575081abeba2cd4f449a74

Thank you

On Mon, May 18, 2020 at 04:15:19PM -0300, Marcelo Tosatti wrote:
> BZ: 1768622 
> Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=28587593
> commit 8171cd68806bd2fc28ef688e32fb2a3b3deb04e5
> 
> Commit 53fafdbb8b21f ("KVM: x86: switch KVMCLOCK base to monotonic raw
> clock") changed kvmclock to use tkr_raw instead of tkr_mono.  However,
> the default kvmclock_offset for the VM was still based on the monotonic
> clock and, if the raw clock drifted enough from the monotonic clock,
> this could cause a negative system_time to be written to the guest's
> struct pvclock.  RHEL5 does not like it and (if it boots fast enough to
> observe a negative time value) it hangs.
> 
> There is another thing to be careful about: getboottime64 returns the
> host boot time with tkr_mono frequency, and subtracting the tkr_raw-based
> kvmclock value will cause the wallclock to be off if tkr_raw drifts
> from tkr_mono.  To avoid this, compute the wallclock delta from the
> current time instead of being clever and using getboottime64.
> 
> Fixes: 53fafdbb8b21f ("KVM: x86: switch KVMCLOCK base to monotonic raw clock")
> Cc: stable@vger.kernel.org
> Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> 
> Index: kernel-rhel/arch/x86/kvm/x86.c
> ===================================================================
> --- kernel-rhel.orig/arch/x86/kvm/x86.c
> +++ kernel-rhel/arch/x86/kvm/x86.c
> @@ -1595,6 +1595,18 @@ static void update_pvclock_gtod(struct t
>  
>  	write_seqcount_end(&vdata->seq);
>  }
> +
> +static s64 get_kvmclock_base_ns(void)
> +{
> +	/* Count up from boot time, but with the frequency of the raw clock.  */
> +	return ktime_to_ns(ktime_add(ktime_get_raw(), pvclock_gtod_data.offs_boot));
> +}
> +#else
> +static s64 get_kvmclock_base_ns(void)
> +{
> +	/* Master clock not used, so we can just use CLOCK_BOOTTIME.  */
> +	return ktime_get_boottime_ns();
> +}
>  #endif
>  
>  void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
> @@ -1608,7 +1620,7 @@ static void kvm_write_wall_clock(struct
>  	int version;
>  	int r;
>  	struct pvclock_wall_clock wc;
> -	struct timespec64 boot;
> +	u64 wall_nsec;
>  
>  	if (!wall_clock)
>  		return;
> @@ -1628,17 +1640,12 @@ static void kvm_write_wall_clock(struct
>  	/*
>  	 * The guest calculates current wall clock time by adding
>  	 * system time (updated by kvm_guest_time_update below) to the
> -	 * wall clock specified here.  guest system time equals host
> -	 * system time for us, thus we must fill in host boot time here.
> +	 * wall clock specified here.  We do the reverse here.
>  	 */
> -	getboottime64(&boot);
> +	wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm);
>  
> -	if (kvm->arch.kvmclock_offset) {
> -		struct timespec64 ts = ns_to_timespec64(kvm->arch.kvmclock_offset);
> -		boot = timespec64_sub(boot, ts);
> -	}
> -	wc.sec = (u32)boot.tv_sec; /* overflow in 2106 guest time */
> -	wc.nsec = boot.tv_nsec;
> +	wc.nsec = do_div(wall_nsec, 1000000000);
> +	wc.sec = (u32)wall_nsec; /* overflow in 2106 guest time */
>  	wc.version = version;
>  
>  	kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
> @@ -1886,7 +1893,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu
>  
>  	raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
>  	offset = kvm_compute_tsc_offset(vcpu, data);
> -	ns = ktime_get_boot_ns();
> +	ns = get_kvmclock_base_ns();
>  	elapsed = ns - kvm->arch.last_tsc_nsec;
>  
>  	if (vcpu->arch.virtual_tsc_khz) {
> @@ -2224,7 +2231,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
>  	spin_lock(&ka->pvclock_gtod_sync_lock);
>  	if (!ka->use_master_clock) {
>  		spin_unlock(&ka->pvclock_gtod_sync_lock);
> -		return ktime_get_boot_ns() + ka->kvmclock_offset;
> +		return get_kvmclock_base_ns() + ka->kvmclock_offset;
>  	}
>  
>  	hv_clock.tsc_timestamp = ka->master_cycle_now;
> @@ -2240,7 +2247,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
>  				   &hv_clock.tsc_to_system_mul);
>  		ret = __pvclock_read_cycles(&hv_clock, rdtsc());
>  	} else
> -		ret = ktime_get_boot_ns() + ka->kvmclock_offset;
> +		ret = get_kvmclock_base_ns() + ka->kvmclock_offset;
>  
>  	put_cpu();
>  
> @@ -2339,7 +2346,7 @@ static int kvm_guest_time_update(struct
>  	}
>  	if (!use_master_clock) {
>  		host_tsc = rdtsc();
> -		kernel_ns = ktime_get_boot_ns();
> +		kernel_ns = get_kvmclock_base_ns();
>  	}
>  
>  	tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
> @@ -2379,6 +2386,7 @@ static int kvm_guest_time_update(struct
>  	vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
>  	vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
>  	vcpu->last_guest_tsc = tsc_timestamp;
> +	WARN_ON(vcpu->hv_clock.system_time < 0);
>  
>  	/* If the host uses TSC clocksource, then it is stable */
>  	pvclock_flags = 0;
> @@ -9486,7 +9494,7 @@ int kvm_arch_init_vm(struct kvm *kvm, un
>  	mutex_init(&kvm->arch.apic_map_lock);
>  	spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
>  
> -	kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
> +	kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
>  	pvclock_update_vm_gtod_copy(kvm);
>  
>  	kvm->arch.guest_can_read_msr_platform_info = true;
> 
> 

-- 
Frantisek Hrbata


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RHEL 8.3 BZ 1768622 PATCH 3/3] KVM: x86: use raw clock values consistently
  2020-05-19  8:58   ` Frantisek Hrbata
@ 2020-05-19  9:18     ` Greg KH
  2020-05-19  9:53       ` Paolo Bonzini
  0 siblings, 1 reply; 4+ messages in thread
From: Greg KH @ 2020-05-19  9:18 UTC (permalink / raw)
  To: Frantisek Hrbata
  Cc: Marcelo Tosatti, rhkernel-list, Andrew Jones, stable, Bandan Das,
	Igor Mammedov, Paolo Bonzini, Vitaly Kuznetsov

On Tue, May 19, 2020 at 10:58:52AM +0200, Frantisek Hrbata wrote:
> Hi Marcelo,
> 
> I'm marking this as superseded by pwid 303977
> 
> http://patchwork.lab.bos.redhat.com/patch/303977/
> 
> which was included in kernel-4.18.0-196.el8
> 
>     commit 9751522d92195bc64883c71e2bee8ed0fcbc5007
>     Author: Vitaly Kuznetsov <vkuznets@redhat.com>
>     Date:   Mon Apr 20 15:20:04 2020 -0400
> 
>     [x86] kvm: x86: use raw clock values consistently
> 
>     Message-id: <20200420152004.933168-1-vkuznets@redhat.com>
>     Patchwork-id: 303977
>     Patchwork-instance: patchwork
>     O-Subject: [RHEL8.3 virt PATCH v2 312/614] KVM: x86: use raw clock values consistently
>     Bugzilla: 1813987
>     RH-Acked-by: Andrew Jones <drjones@redhat.com>
>     RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
>     RH-Acked-by: Tony Camuso <tcamuso@redhat.com>
> 
> Note there is a difference between yours and Vitaly's patch in the
> following hunk
> 
> +@@ -1656,6 +1656,18 @@ static void update_pvclock_gtod(struct timekeeper *tk)
> 
>         write_seqcount_end(&vdata->seq);
>   }
> @@ -14,12 +16,12 @@
>  +static s64 get_kvmclock_base_ns(void)
>  +{
>  +      /* Master clock not used, so we can just use CLOCK_BOOTTIME.  */
> -+      return ktime_get_boottime_ns();
> ++      return ktime_get_boot_ns();
>  +}
>   #endif
> 
> 
> This is in !CONFIG_X86_64 path, so I'm not sure how much we care about
> this. Anyway Vitaly's patch corrects this, because
> rhel does not have upstream commit 9285ec4c8b61d4930a575081abeba2cd4f449a74

Gotta love private git repo discussions on a public mailing list :)

greg k-h

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RHEL 8.3 BZ 1768622 PATCH 3/3] KVM: x86: use raw clock values consistently
  2020-05-19  9:18     ` Greg KH
@ 2020-05-19  9:53       ` Paolo Bonzini
  0 siblings, 0 replies; 4+ messages in thread
From: Paolo Bonzini @ 2020-05-19  9:53 UTC (permalink / raw)
  To: Greg KH, Frantisek Hrbata
  Cc: Marcelo Tosatti, rhkernel-list, Andrew Jones, stable, Bandan Das,
	Igor Mammedov, Vitaly Kuznetsov

On 19/05/20 11:18, Greg KH wrote:
> Gotta love private git repo discussions on a public mailing list :)

Somebody has forgotten their suppresscc=all.

Paolo


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-05-19  9:54 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20200518191516.165550666@fuller.cnet>
2020-05-18 19:15 ` [RHEL 8.3 BZ 1768622 PATCH 3/3] KVM: x86: use raw clock values consistently Marcelo Tosatti
2020-05-19  8:58   ` Frantisek Hrbata
2020-05-19  9:18     ` Greg KH
2020-05-19  9:53       ` Paolo Bonzini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.