From mboxrd@z Thu Jan 1 00:00:00 1970 From: Igor Mammedov Subject: Re: x86: kvmclock: abstract save/restore sched_clock_state (v2) Date: Mon, 13 Feb 2012 16:20:24 +0100 Message-ID: <4F392A38.4030808@redhat.com> References: <20120207210542.GC20618@amt.cnet> <20120213130727.GA8052@amt.cnet> Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit Cc: kvm@vger.kernel.org, tglx@linutronix.de, mingo@redhat.com, hpa@zytor.com, x86@kernel.org, johnstul@us.ibm.com, riel@redhat.com, avi@redhat.com, amit.shah@redhat.com To: Marcelo Tosatti Return-path: Received: from mx1.redhat.com ([209.132.183.28]:11232 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753366Ab2BMPUo (ORCPT ); Mon, 13 Feb 2012 10:20:44 -0500 In-Reply-To: <20120213130727.GA8052@amt.cnet> Sender: kvm-owner@vger.kernel.org List-ID: On 02/13/2012 02:07 PM, Marcelo Tosatti wrote: > > Upon resume from hibernation, CPU 0's hvclock area contains the old > values for system_time and tsc_timestamp. It is necessary for the > hypervisor to update these values with uptodate ones before the CPU uses > them. > > Abstract TSC's save/restore sched_clock_state functions and use > restore_state to write to KVM_SYSTEM_TIME MSR, forcing an update. > > Also move restore_sched_clock_state before __restore_processor_state, > since the later calls CONFIG_LOCK_STAT's lockstat_clock (also for TSC). > Thanks to Igor Mammedov for tracking it down. > > Fixes suspend-to-disk with kvmclock. > > Signed-off-by: Marcelo Tosatti > > diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h > index 15d9915..c91e8b9 100644 > --- a/arch/x86/include/asm/tsc.h > +++ b/arch/x86/include/asm/tsc.h > @@ -61,7 +61,7 @@ extern void check_tsc_sync_source(int cpu); > extern void check_tsc_sync_target(void); > > extern int notsc_setup(char *); > -extern void save_sched_clock_state(void); > -extern void restore_sched_clock_state(void); > +extern void tsc_save_sched_clock_state(void); > +extern void tsc_restore_sched_clock_state(void); > > #endif /* _ASM_X86_TSC_H */ > diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h > index 5d0afac..baaca8d 100644 > --- a/arch/x86/include/asm/x86_init.h > +++ b/arch/x86/include/asm/x86_init.h > @@ -162,6 +162,8 @@ struct x86_cpuinit_ops { > * @is_untracked_pat_range exclude from PAT logic > * @nmi_init enable NMI on cpus > * @i8042_detect pre-detect if i8042 controller exists > + * @save_sched_clock_state: save state for sched_clock() on suspend > + * @restore_sched_clock_state: restore state for sched_clock() on resume > */ > struct x86_platform_ops { > unsigned long (*calibrate_tsc)(void); > @@ -173,6 +175,8 @@ struct x86_platform_ops { > void (*nmi_init)(void); > unsigned char (*get_nmi_reason)(void); > int (*i8042_detect)(void); > + void (*save_sched_clock_state)(void); > + void (*restore_sched_clock_state)(void); > }; > > struct pci_dev; > diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c > index ca4e735..57e6b78 100644 > --- a/arch/x86/kernel/kvmclock.c > +++ b/arch/x86/kernel/kvmclock.c > @@ -136,6 +136,15 @@ int kvm_register_clock(char *txt) > return ret; > } > > +void kvm_save_sched_clock_state(void) > +{ > +} > + > +void kvm_restore_sched_clock_state(void) > +{ > + kvm_register_clock("primary cpu clock, resume"); > +} > + > #ifdef CONFIG_X86_LOCAL_APIC > static void __cpuinit kvm_setup_secondary_clock(void) > { > @@ -195,6 +204,8 @@ void __init kvmclock_init(void) > x86_cpuinit.early_percpu_clock_init = > kvm_setup_secondary_clock; > #endif > + x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; > + x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; > machine_ops.shutdown = kvm_shutdown; > #ifdef CONFIG_KEXEC > machine_ops.crash_shutdown = kvm_crash_shutdown; > diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c > index a62c201..aed2aa1 100644 > --- a/arch/x86/kernel/tsc.c > +++ b/arch/x86/kernel/tsc.c > @@ -629,7 +629,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) > > static unsigned long long cyc2ns_suspend; > > -void save_sched_clock_state(void) > +void tsc_save_sched_clock_state(void) > { > if (!sched_clock_stable) > return; > @@ -645,7 +645,7 @@ void save_sched_clock_state(void) > * that sched_clock() continues from the point where it was left off during > * suspend. > */ > -void restore_sched_clock_state(void) > +void tsc_restore_sched_clock_state(void) > { > unsigned long long offset; > unsigned long flags; > diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c > index 6f2ec53..e9f265f 100644 > --- a/arch/x86/kernel/x86_init.c > +++ b/arch/x86/kernel/x86_init.c > @@ -108,7 +108,9 @@ struct x86_platform_ops x86_platform = { > .is_untracked_pat_range = is_ISA_range, > .nmi_init = default_nmi_init, > .get_nmi_reason = default_get_nmi_reason, > - .i8042_detect = default_i8042_detect > + .i8042_detect = default_i8042_detect, > + .save_sched_clock_state = tsc_save_sched_clock_state, > + .restore_sched_clock_state = tsc_restore_sched_clock_state, > }; > > EXPORT_SYMBOL_GPL(x86_platform); > diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c > index f10c0af..0e76a28 100644 > --- a/arch/x86/power/cpu.c > +++ b/arch/x86/power/cpu.c > @@ -114,7 +114,7 @@ static void __save_processor_state(struct saved_context *ctxt) > void save_processor_state(void) > { > __save_processor_state(&saved_context); > - save_sched_clock_state(); > + x86_platform.save_sched_clock_state(); > } > #ifdef CONFIG_X86_32 > EXPORT_SYMBOL(save_processor_state); > @@ -230,8 +230,8 @@ static void __restore_processor_state(struct saved_context *ctxt) > /* Needed by apm.c */ > void restore_processor_state(void) > { > + x86_platform.restore_sched_clock_state(); Isn't it too early? It is scarry to say hypervisor to write to some memory location and than completely replace page-tables and half of cpu state in __restore_processor_state. Wouldn't that have a potential of writing into a place that is not restored hv_clock and restored hv_clock might still be stale? > __restore_processor_state(&saved_context); > - restore_sched_clock_state(); > } > #ifdef CONFIG_X86_32 > EXPORT_SYMBOL(restore_processor_state); > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Thanks, Igor