* [PATCH v4] KVM: x86: Use current rather than snapshotted TSC frequency if it is constant
@ 2022-05-11 1:42 Anton Romanov
2022-05-11 19:49 ` Sean Christopherson
0 siblings, 1 reply; 2+ messages in thread
From: Anton Romanov @ 2022-05-11 1:42 UTC (permalink / raw)
To: kvm, pbonzini; +Cc: seanjc, vkuznets, mlevitsk, Anton Romanov
Don't snapshot tsc_khz into per-cpu cpu_tsc_khz if the host TSC is
constant, in which case the actual TSC frequency will never change and thus
capturing TSC during initialization is unnecessary, KVM can simply use
tsc_khz. This value is snapshotted from
kvm_timer_init->kvmclock_cpu_online->tsc_khz_changed(NULL)
On CPUs with constant TSC, but not a hardware-specified TSC frequency,
snapshotting cpu_tsc_khz and using that to set a VM's target TSC frequency
can lead to VM to think its TSC frequency is not what it actually is if
refining the TSC completes after KVM snapshots tsc_khz. The actual
frequency never changes, only the kernel's calculation of what that
frequency is changes.
Ideally, KVM would not be able to race with TSC refinement, or would have
a hook into tsc_refine_calibration_work() to get an alert when refinement
is complete. Avoiding the race altogether isn't practical as refinement
takes a relative eternity; it's deliberately put on a work queue outside of
the normal boot sequence to avoid unnecessarily delaying boot.
Adding a hook is doable, but somewhat gross due to KVM's ability to be
built as a module. And if the TSC is constant, which is likely the case
for every VMX/SVM-capable CPU produced in the last decade, the race can be
hit if and only if userspace is able to create a VM before TSC refinement
completes; refinement is slow, but not that slow.
For now, punt on a proper fix, as not taking a snapshot can help some uses
cases and not taking a snapshot is arguably correct irrespective of the
race with refinement.
Signed-off-by: Anton Romanov <romanton@google.com>
---
v4 :
* minor feedback changes
* skip updating per-cpu tsc in kvm_hyperv_tsc_notifier
arch/x86/kvm/x86.c | 35 ++++++++++++++++++++++++++++-------
1 file changed, 28 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4790f0d7d40b..f0b4d5ae743b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2907,6 +2907,22 @@ static void kvm_update_masterclock(struct kvm *kvm)
kvm_end_pvclock_update(kvm);
}
+/*
+ * Use the kernel's tsc_khz directly if the TSC is constant, otherwise use KVM's
+ * per-CPU value (which may be zero if a CPU is going offline). Note, tsc_khz
+ * can change during boot even if the TSC is constant, as it's possible for KVM
+ * to be loaded before TSC calibration completes. Ideally, KVM would get a
+ * notification when calibration completes, but practically speaking calibration
+ * will complete before userspace is alive enough to create VMs.
+ */
+static unsigned long get_cpu_tsc_khz(void)
+{
+ if (static_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ return tsc_khz;
+ else
+ return __this_cpu_read(cpu_tsc_khz);
+}
+
/* Called within read_seqcount_begin/retry for kvm->pvclock_sc. */
static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
{
@@ -2917,7 +2933,8 @@ static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
get_cpu();
data->flags = 0;
- if (ka->use_master_clock && __this_cpu_read(cpu_tsc_khz)) {
+ if (ka->use_master_clock &&
+ (static_cpu_has(X86_FEATURE_CONSTANT_TSC) || __this_cpu_read(cpu_tsc_khz))) {
#ifdef CONFIG_X86_64
struct timespec64 ts;
@@ -2931,7 +2948,7 @@ static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
data->flags |= KVM_CLOCK_TSC_STABLE;
hv_clock.tsc_timestamp = ka->master_cycle_now;
hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
- kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
+ kvm_get_time_scale(NSEC_PER_SEC, get_cpu_tsc_khz() * 1000LL,
&hv_clock.tsc_shift,
&hv_clock.tsc_to_system_mul);
data->clock = __pvclock_read_cycles(&hv_clock, data->host_tsc);
@@ -3049,7 +3066,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
/* Keep irq disabled to prevent changes to the clock */
local_irq_save(flags);
- tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
+ tgt_tsc_khz = get_cpu_tsc_khz();
if (unlikely(tgt_tsc_khz == 0)) {
local_irq_restore(flags);
kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
@@ -8646,9 +8663,10 @@ static void tsc_khz_changed(void *data)
struct cpufreq_freqs *freq = data;
unsigned long khz = 0;
+ WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_CONSTANT_TSC));
if (data)
khz = freq->new;
- else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ else
khz = cpufreq_quick_get(raw_smp_processor_id());
if (!khz)
khz = tsc_khz;
@@ -8669,8 +8687,10 @@ static void kvm_hyperv_tsc_notifier(void)
hyperv_stop_tsc_emulation();
/* TSC frequency always matches when on Hyper-V */
- for_each_present_cpu(cpu)
- per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+ for_each_present_cpu(cpu)
+ per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+ }
kvm_max_guest_tsc_khz = tsc_khz;
list_for_each_entry(kvm, &vm_list, vm_list) {
@@ -8783,7 +8803,8 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = {
static int kvmclock_cpu_online(unsigned int cpu)
{
- tsc_khz_changed(NULL);
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ tsc_khz_changed(NULL);
return 0;
}
--
2.36.0.550.gb090851708-goog
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH v4] KVM: x86: Use current rather than snapshotted TSC frequency if it is constant
2022-05-11 1:42 [PATCH v4] KVM: x86: Use current rather than snapshotted TSC frequency if it is constant Anton Romanov
@ 2022-05-11 19:49 ` Sean Christopherson
0 siblings, 0 replies; 2+ messages in thread
From: Sean Christopherson @ 2022-05-11 19:49 UTC (permalink / raw)
To: Anton Romanov; +Cc: kvm, pbonzini, vkuznets, mlevitsk
On Wed, May 11, 2022, Anton Romanov wrote:
> @@ -8646,9 +8663,10 @@ static void tsc_khz_changed(void *data)
> struct cpufreq_freqs *freq = data;
> unsigned long khz = 0;
>
> + WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_CONSTANT_TSC));
Nit, please add a newline to isolate the WARN.
> if (data)
> khz = freq->new;
> - else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
> + else
> khz = cpufreq_quick_get(raw_smp_processor_id());
> if (!khz)
> khz = tsc_khz;
> @@ -8669,8 +8687,10 @@ static void kvm_hyperv_tsc_notifier(void)
> hyperv_stop_tsc_emulation();
>
> /* TSC frequency always matches when on Hyper-V */
> - for_each_present_cpu(cpu)
> - per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
> + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
> + for_each_present_cpu(cpu)
> + per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
> + }
> kvm_max_guest_tsc_khz = tsc_khz;
>
> list_for_each_entry(kvm, &vm_list, vm_list) {
> @@ -8783,7 +8803,8 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = {
>
> static int kvmclock_cpu_online(unsigned int cpu)
> {
> - tsc_khz_changed(NULL);
> + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
> + tsc_khz_changed(NULL);
Ah rats, I missed something in v3. Rather than handle this in the notifier, KVM
can simply not register the notifier in the first place. The CPUHP_AP_X86_KVM_CLK_ONLINE
hook exists purely to muck with cpu_tsc_khz. And that makes the WARN_ON_ONCE in
tsc_khz_changed() much less rendunat (having a caller and its callee check the
same thing in quick succession felt silly).
I.e. instead of modifying kvmclock_cpu_online(), do:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6567aec84223..e9efb8d00673 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8882,10 +8882,10 @@ static void kvm_timer_init(void)
}
cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
- }
- cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
- kvmclock_cpu_online, kvmclock_cpu_down_prep);
+ cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
+ kvmclock_cpu_online, kvmclock_cpu_down_prep);
+ }
}
#ifdef CONFIG_X86_64
@@ -9038,10 +9038,11 @@ void kvm_arch_exit(void)
#endif
kvm_lapic_exit();
- if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
- cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
+ cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
+ }
#ifdef CONFIG_X86_64
pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
irq_work_sync(&pvclock_irq_work);
> return 0;
> }
>
> --
> 2.36.0.550.gb090851708-goog
>
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2022-05-11 19:49 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-11 1:42 [PATCH v4] KVM: x86: Use current rather than snapshotted TSC frequency if it is constant Anton Romanov
2022-05-11 19:49 ` Sean Christopherson
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.