* [qemu patch 0/2] improve kvmclock difference on migration @ 2016-11-14 12:36 ` Marcelo Tosatti 0 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-14 12:36 UTC (permalink / raw) To: kvm Cc: qemu-devel, Dr. David Alan Gilbert, Paolo Bonzini, Juan Quintela, Radim Krcmar, Eduardo Habkost See patches for details. ^ permalink raw reply [flat|nested] 46+ messages in thread
* [Qemu-devel] [qemu patch 0/2] improve kvmclock difference on migration @ 2016-11-14 12:36 ` Marcelo Tosatti 0 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-14 12:36 UTC (permalink / raw) To: kvm Cc: qemu-devel, Dr. David Alan Gilbert, Paolo Bonzini, Juan Quintela, Radim Krcmar, Eduardo Habkost See patches for details. ^ permalink raw reply [flat|nested] 46+ messages in thread
* [qemu patch 1/2] kvm: sync linux headers 2016-11-14 12:36 ` [Qemu-devel] " Marcelo Tosatti @ 2016-11-14 12:36 ` Marcelo Tosatti -1 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-14 12:36 UTC (permalink / raw) To: kvm Cc: qemu-devel, Dr. David Alan Gilbert, Paolo Bonzini, Juan Quintela, Radim Krcmar, Eduardo Habkost, Marcelo Tosatti [-- Attachment #1: sync-linux-headers.patch --] [-- Type: text/plain, Size: 5449 bytes --] Import KVM_CLOCK_TSC_STABLE. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h index 7361a16..b472b85 100644 --- a/include/standard-headers/linux/input.h +++ b/include/standard-headers/linux/input.h @@ -245,6 +245,7 @@ struct input_mask { #define BUS_SPI 0x1C #define BUS_RMI 0x1D #define BUS_CEC 0x1E +#define BUS_INTEL_ISHTP 0x1F /* * MT_TOOL types diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h index 4040951..e5a2e68 100644 --- a/include/standard-headers/linux/pci_regs.h +++ b/include/standard-headers/linux/pci_regs.h @@ -612,6 +612,8 @@ */ #define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */ #define PCI_EXP_DEVCAP2_ARI 0x00000020 /* Alternative Routing-ID */ +#define PCI_EXP_DEVCAP2_ATOMIC_ROUTE 0x00000040 /* Atomic Op routing */ +#define PCI_EXP_DEVCAP2_ATOMIC_COMP64 0x00000100 /* Atomic 64-bit compare */ #define PCI_EXP_DEVCAP2_LTR 0x00000800 /* Latency tolerance reporting */ #define PCI_EXP_DEVCAP2_OBFF_MASK 0x000c0000 /* OBFF support mechanism */ #define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */ @@ -619,6 +621,7 @@ #define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ #define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */ #define PCI_EXP_DEVCTL2_ARI 0x0020 /* Alternative Routing-ID */ +#define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040 /* Set Atomic requests */ #define PCI_EXP_DEVCTL2_IDO_REQ_EN 0x0100 /* Allow IDO for requests */ #define PCI_EXP_DEVCTL2_IDO_CMP_EN 0x0200 /* Allow IDO for completions */ #define PCI_EXP_DEVCTL2_LTR_EN 0x0400 /* Enable LTR mechanism */ @@ -671,7 +674,8 @@ #define PCI_EXT_CAP_ID_PMUX 0x1A /* Protocol Multiplexing */ #define PCI_EXT_CAP_ID_PASID 0x1B /* Process Address Space ID */ #define PCI_EXT_CAP_ID_DPC 0x1D /* Downstream Port Containment */ -#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_DPC +#define PCI_EXT_CAP_ID_PTM 0x1F /* Precision Time Measurement */ +#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PTM #define PCI_EXT_CAP_DSN_SIZEOF 12 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40 @@ -964,4 +968,13 @@ #define PCI_EXP_DPC_SOURCE_ID 10 /* DPC Source Identifier */ +/* Precision Time Measurement */ +#define PCI_PTM_CAP 0x04 /* PTM Capability */ +#define PCI_PTM_CAP_REQ 0x00000001 /* Requester capable */ +#define PCI_PTM_CAP_ROOT 0x00000004 /* Root capable */ +#define PCI_PTM_GRANULARITY_MASK 0x0000FF00 /* Clock granularity */ +#define PCI_PTM_CTRL 0x08 /* PTM Control */ +#define PCI_PTM_CTRL_ENABLE 0x00000001 /* PTM enable */ +#define PCI_PTM_CTRL_ROOT 0x00000002 /* Root select */ + #endif /* LINUX_PCI_REGS_H */ diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h index 541268c..2fb7859 100644 --- a/linux-headers/asm-arm/kvm.h +++ b/linux-headers/asm-arm/kvm.h @@ -84,6 +84,13 @@ struct kvm_regs { #define KVM_VGIC_V2_DIST_SIZE 0x1000 #define KVM_VGIC_V2_CPU_SIZE 0x2000 +/* Supported VGICv3 address types */ +#define KVM_VGIC_V3_ADDR_TYPE_DIST 2 +#define KVM_VGIC_V3_ADDR_TYPE_REDIST 3 + +#define KVM_VGIC_V3_DIST_SIZE SZ_64K +#define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K) + #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ #define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */ diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h index abeaf40..d45ea28 100644 --- a/linux-headers/asm-x86/unistd_32.h +++ b/linux-headers/asm-x86/unistd_32.h @@ -377,5 +377,8 @@ #define __NR_copy_file_range 377 #define __NR_preadv2 378 #define __NR_pwritev2 379 +#define __NR_pkey_mprotect 380 +#define __NR_pkey_alloc 381 +#define __NR_pkey_free 382 #endif /* _ASM_X86_UNISTD_32_H */ diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h index 73c3d1f..e22db91 100644 --- a/linux-headers/asm-x86/unistd_64.h +++ b/linux-headers/asm-x86/unistd_64.h @@ -330,5 +330,8 @@ #define __NR_copy_file_range 326 #define __NR_preadv2 327 #define __NR_pwritev2 328 +#define __NR_pkey_mprotect 329 +#define __NR_pkey_alloc 330 +#define __NR_pkey_free 331 #endif /* _ASM_X86_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h index e5aea76..84e58b2 100644 --- a/linux-headers/asm-x86/unistd_x32.h +++ b/linux-headers/asm-x86/unistd_x32.h @@ -283,6 +283,9 @@ #define __NR_membarrier (__X32_SYSCALL_BIT + 324) #define __NR_mlock2 (__X32_SYSCALL_BIT + 325) #define __NR_copy_file_range (__X32_SYSCALL_BIT + 326) +#define __NR_pkey_mprotect (__X32_SYSCALL_BIT + 329) +#define __NR_pkey_alloc (__X32_SYSCALL_BIT + 330) +#define __NR_pkey_free (__X32_SYSCALL_BIT + 331) #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) #define __NR_ioctl (__X32_SYSCALL_BIT + 514) diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 4806e06..bb0ed71 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -972,12 +972,19 @@ struct kvm_irqfd { __u8 pad[16]; }; +/* For KVM_CAP_ADJUST_CLOCK */ + +/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */ +#define KVM_CLOCK_TSC_STABLE 2 + struct kvm_clock_data { __u64 clock; __u32 flags; __u32 pad[9]; }; +/* For KVM_CAP_SW_TLB */ + #define KVM_MMU_FSL_BOOKE_NOHV 0 #define KVM_MMU_FSL_BOOKE_HV 1 ^ permalink raw reply related [flat|nested] 46+ messages in thread
* [Qemu-devel] [qemu patch 1/2] kvm: sync linux headers @ 2016-11-14 12:36 ` Marcelo Tosatti 0 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-14 12:36 UTC (permalink / raw) To: kvm Cc: qemu-devel, Dr. David Alan Gilbert, Paolo Bonzini, Juan Quintela, Radim Krcmar, Eduardo Habkost, Marcelo Tosatti [-- Attachment #1: sync-linux-headers.patch --] [-- Type: text/plain, Size: 5447 bytes --] Import KVM_CLOCK_TSC_STABLE. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h index 7361a16..b472b85 100644 --- a/include/standard-headers/linux/input.h +++ b/include/standard-headers/linux/input.h @@ -245,6 +245,7 @@ struct input_mask { #define BUS_SPI 0x1C #define BUS_RMI 0x1D #define BUS_CEC 0x1E +#define BUS_INTEL_ISHTP 0x1F /* * MT_TOOL types diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h index 4040951..e5a2e68 100644 --- a/include/standard-headers/linux/pci_regs.h +++ b/include/standard-headers/linux/pci_regs.h @@ -612,6 +612,8 @@ */ #define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */ #define PCI_EXP_DEVCAP2_ARI 0x00000020 /* Alternative Routing-ID */ +#define PCI_EXP_DEVCAP2_ATOMIC_ROUTE 0x00000040 /* Atomic Op routing */ +#define PCI_EXP_DEVCAP2_ATOMIC_COMP64 0x00000100 /* Atomic 64-bit compare */ #define PCI_EXP_DEVCAP2_LTR 0x00000800 /* Latency tolerance reporting */ #define PCI_EXP_DEVCAP2_OBFF_MASK 0x000c0000 /* OBFF support mechanism */ #define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */ @@ -619,6 +621,7 @@ #define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ #define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */ #define PCI_EXP_DEVCTL2_ARI 0x0020 /* Alternative Routing-ID */ +#define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040 /* Set Atomic requests */ #define PCI_EXP_DEVCTL2_IDO_REQ_EN 0x0100 /* Allow IDO for requests */ #define PCI_EXP_DEVCTL2_IDO_CMP_EN 0x0200 /* Allow IDO for completions */ #define PCI_EXP_DEVCTL2_LTR_EN 0x0400 /* Enable LTR mechanism */ @@ -671,7 +674,8 @@ #define PCI_EXT_CAP_ID_PMUX 0x1A /* Protocol Multiplexing */ #define PCI_EXT_CAP_ID_PASID 0x1B /* Process Address Space ID */ #define PCI_EXT_CAP_ID_DPC 0x1D /* Downstream Port Containment */ -#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_DPC +#define PCI_EXT_CAP_ID_PTM 0x1F /* Precision Time Measurement */ +#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PTM #define PCI_EXT_CAP_DSN_SIZEOF 12 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40 @@ -964,4 +968,13 @@ #define PCI_EXP_DPC_SOURCE_ID 10 /* DPC Source Identifier */ +/* Precision Time Measurement */ +#define PCI_PTM_CAP 0x04 /* PTM Capability */ +#define PCI_PTM_CAP_REQ 0x00000001 /* Requester capable */ +#define PCI_PTM_CAP_ROOT 0x00000004 /* Root capable */ +#define PCI_PTM_GRANULARITY_MASK 0x0000FF00 /* Clock granularity */ +#define PCI_PTM_CTRL 0x08 /* PTM Control */ +#define PCI_PTM_CTRL_ENABLE 0x00000001 /* PTM enable */ +#define PCI_PTM_CTRL_ROOT 0x00000002 /* Root select */ + #endif /* LINUX_PCI_REGS_H */ diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h index 541268c..2fb7859 100644 --- a/linux-headers/asm-arm/kvm.h +++ b/linux-headers/asm-arm/kvm.h @@ -84,6 +84,13 @@ struct kvm_regs { #define KVM_VGIC_V2_DIST_SIZE 0x1000 #define KVM_VGIC_V2_CPU_SIZE 0x2000 +/* Supported VGICv3 address types */ +#define KVM_VGIC_V3_ADDR_TYPE_DIST 2 +#define KVM_VGIC_V3_ADDR_TYPE_REDIST 3 + +#define KVM_VGIC_V3_DIST_SIZE SZ_64K +#define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K) + #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ #define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */ diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h index abeaf40..d45ea28 100644 --- a/linux-headers/asm-x86/unistd_32.h +++ b/linux-headers/asm-x86/unistd_32.h @@ -377,5 +377,8 @@ #define __NR_copy_file_range 377 #define __NR_preadv2 378 #define __NR_pwritev2 379 +#define __NR_pkey_mprotect 380 +#define __NR_pkey_alloc 381 +#define __NR_pkey_free 382 #endif /* _ASM_X86_UNISTD_32_H */ diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h index 73c3d1f..e22db91 100644 --- a/linux-headers/asm-x86/unistd_64.h +++ b/linux-headers/asm-x86/unistd_64.h @@ -330,5 +330,8 @@ #define __NR_copy_file_range 326 #define __NR_preadv2 327 #define __NR_pwritev2 328 +#define __NR_pkey_mprotect 329 +#define __NR_pkey_alloc 330 +#define __NR_pkey_free 331 #endif /* _ASM_X86_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h index e5aea76..84e58b2 100644 --- a/linux-headers/asm-x86/unistd_x32.h +++ b/linux-headers/asm-x86/unistd_x32.h @@ -283,6 +283,9 @@ #define __NR_membarrier (__X32_SYSCALL_BIT + 324) #define __NR_mlock2 (__X32_SYSCALL_BIT + 325) #define __NR_copy_file_range (__X32_SYSCALL_BIT + 326) +#define __NR_pkey_mprotect (__X32_SYSCALL_BIT + 329) +#define __NR_pkey_alloc (__X32_SYSCALL_BIT + 330) +#define __NR_pkey_free (__X32_SYSCALL_BIT + 331) #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) #define __NR_ioctl (__X32_SYSCALL_BIT + 514) diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 4806e06..bb0ed71 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -972,12 +972,19 @@ struct kvm_irqfd { __u8 pad[16]; }; +/* For KVM_CAP_ADJUST_CLOCK */ + +/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */ +#define KVM_CLOCK_TSC_STABLE 2 + struct kvm_clock_data { __u64 clock; __u32 flags; __u32 pad[9]; }; +/* For KVM_CAP_SW_TLB */ + #define KVM_MMU_FSL_BOOKE_NOHV 0 #define KVM_MMU_FSL_BOOKE_HV 1 ^ permalink raw reply related [flat|nested] 46+ messages in thread
* [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-14 12:36 ` [Qemu-devel] " Marcelo Tosatti @ 2016-11-14 12:36 ` Marcelo Tosatti -1 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-14 12:36 UTC (permalink / raw) To: kvm Cc: qemu-devel, Dr. David Alan Gilbert, Paolo Bonzini, Juan Quintela, Radim Krcmar, Eduardo Habkost, Marcelo Tosatti [-- Attachment #1: kvmclock-advance-clock.patch --] [-- Type: text/plain, Size: 6623 bytes --] Check for KVM_CAP_ADJUST_CLOCK capability KVM_CLOCK_TSC_STABLE, which indicates that KVM_GET_CLOCK returns a value as seen by the guest at that moment. For new machine types, use this value rather than reading from guest memory. This reduces kvmclock difference on migration from 5s to 0.1s (when max_downtime == 5s). Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> --- hw/i386/kvm/clock.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++-- include/hw/i386/pc.h | 5 ++ target-i386/kvm.c | 7 +++ target-i386/kvm_i386.h | 1 4 files changed, 98 insertions(+), 3 deletions(-) Index: qemu-mig-advance-clock/hw/i386/kvm/clock.c =================================================================== --- qemu-mig-advance-clock.orig/hw/i386/kvm/clock.c 2016-11-14 09:07:55.519856329 -0200 +++ qemu-mig-advance-clock/hw/i386/kvm/clock.c 2016-11-14 10:19:45.723254737 -0200 @@ -36,6 +36,12 @@ uint64_t clock; bool clock_valid; + + /* whether machine supports reliable KVM_GET_CLOCK */ + bool mach_use_reliable_get_clock; + + /* whether source host supported reliable KVM_GET_CLOCK */ + bool src_use_reliable_get_clock; } KVMClockState; struct pvclock_vcpu_time_info { @@ -91,15 +97,37 @@ if (running) { struct kvm_clock_data data = {}; - uint64_t time_at_migration = kvmclock_current_nsec(s); + uint64_t time_at_migration = 0; - s->clock_valid = false; + /* local (running VM) restore */ + if (s->clock_valid) { + /* + * if host does not support reliable KVM_GET_CLOCK, + * read kvmclock value from memory + */ + if (!kvm_has_adjust_clock_stable()) { + time_at_migration = kvmclock_current_nsec(s); + } + /* migration/savevm/init restore */ + } else { + /* + * use s->clock in case machine uses reliable + * get clock and host where vm was executing + * supported reliable get clock + */ + if (!s->mach_use_reliable_get_clock || + !s->src_use_reliable_get_clock) { + time_at_migration = kvmclock_current_nsec(s); + } + } - /* We can't rely on the migrated clock value, just discard it */ + /* We can't rely on the saved clock value, just discard it */ if (time_at_migration) { s->clock = time_at_migration; } + s->clock_valid = false; + data.clock = s->clock; ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data); if (ret < 0) { @@ -152,22 +180,76 @@ qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); } +static bool kvmclock_src_use_reliable_get_clock(void *opaque) +{ + KVMClockState *s = opaque; + + /* + * On machine types that support reliable KVM_GET_CLOCK, + * if host kernel does provide reliable KVM_GET_CLOCK, + * set src_use_reliable_get_clock=true so that destination + * avoids reading kvmclock from memory. + */ + if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { + s->src_use_reliable_get_clock = true; + } + + return s->src_use_reliable_get_clock; +} + +static const VMStateDescription kvmclock_reliable_get_clock = { + .name = "kvmclock/src_use_reliable_get_clock", + .version_id = 1, + .minimum_version_id = 1, + .needed = kvmclock_src_use_reliable_get_clock, + .fields = (VMStateField[]) { + VMSTATE_BOOL(src_use_reliable_get_clock, KVMClockState), + VMSTATE_END_OF_LIST() + } +}; + +static void kvmclock_pre_save(void *opaque) +{ + KVMClockState *s = opaque; + struct kvm_clock_data data; + int ret; + + ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); + if (ret < 0) { + fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); + abort(); + } + s->clock = data.clock; +} + static const VMStateDescription kvmclock_vmsd = { .name = "kvmclock", .version_id = 1, .minimum_version_id = 1, + .pre_save = kvmclock_pre_save, .fields = (VMStateField[]) { VMSTATE_UINT64(clock, KVMClockState), VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * []) { + &kvmclock_reliable_get_clock, + NULL } }; +static Property kvmclock_properties[] = { + DEFINE_PROP_BOOL("mach_use_reliable_get_clock", KVMClockState, + mach_use_reliable_get_clock, true), + DEFINE_PROP_END_OF_LIST(), +}; + static void kvmclock_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->realize = kvmclock_realize; dc->vmsd = &kvmclock_vmsd; + dc->props = kvmclock_properties; } static const TypeInfo kvmclock_info = { Index: qemu-mig-advance-clock/include/hw/i386/pc.h =================================================================== --- qemu-mig-advance-clock.orig/include/hw/i386/pc.h 2016-11-14 09:07:55.519856329 -0200 +++ qemu-mig-advance-clock/include/hw/i386/pc.h 2016-11-14 09:11:47.112200123 -0200 @@ -370,6 +370,11 @@ #define PC_COMPAT_2_7 \ HW_COMPAT_2_7 \ {\ + .driver = "kvmclock",\ + .property = "mach_use_reliable_get_clock",\ + .value = "off",\ + },\ + {\ .driver = TYPE_X86_CPU,\ .property = "l3-cache",\ .value = "off",\ Index: qemu-mig-advance-clock/target-i386/kvm.c =================================================================== --- qemu-mig-advance-clock.orig/target-i386/kvm.c 2016-11-14 09:07:55.520856330 -0200 +++ qemu-mig-advance-clock/target-i386/kvm.c 2016-11-14 09:11:47.125200142 -0200 @@ -117,6 +117,13 @@ return kvm_check_extension(kvm_state, KVM_CAP_X86_SMM); } +bool kvm_has_adjust_clock_stable(void) +{ + int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK); + + return (ret == KVM_CLOCK_TSC_STABLE); +} + bool kvm_allows_irq0_override(void) { return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing(); Index: qemu-mig-advance-clock/target-i386/kvm_i386.h =================================================================== --- qemu-mig-advance-clock.orig/target-i386/kvm_i386.h 2016-11-14 09:07:55.520856330 -0200 +++ qemu-mig-advance-clock/target-i386/kvm_i386.h 2016-11-14 09:11:47.125200142 -0200 @@ -17,6 +17,7 @@ bool kvm_allows_irq0_override(void); bool kvm_has_smm(void); +bool kvm_has_adjust_clock_stable(void); void kvm_synchronize_all_tsc(void); void kvm_arch_reset_vcpu(X86CPU *cs); void kvm_arch_do_init_vcpu(X86CPU *cs); ^ permalink raw reply [flat|nested] 46+ messages in thread
* [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-14 12:36 ` Marcelo Tosatti 0 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-14 12:36 UTC (permalink / raw) To: kvm Cc: qemu-devel, Dr. David Alan Gilbert, Paolo Bonzini, Juan Quintela, Radim Krcmar, Eduardo Habkost, Marcelo Tosatti [-- Attachment #1: kvmclock-advance-clock.patch --] [-- Type: text/plain, Size: 6621 bytes --] Check for KVM_CAP_ADJUST_CLOCK capability KVM_CLOCK_TSC_STABLE, which indicates that KVM_GET_CLOCK returns a value as seen by the guest at that moment. For new machine types, use this value rather than reading from guest memory. This reduces kvmclock difference on migration from 5s to 0.1s (when max_downtime == 5s). Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> --- hw/i386/kvm/clock.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++-- include/hw/i386/pc.h | 5 ++ target-i386/kvm.c | 7 +++ target-i386/kvm_i386.h | 1 4 files changed, 98 insertions(+), 3 deletions(-) Index: qemu-mig-advance-clock/hw/i386/kvm/clock.c =================================================================== --- qemu-mig-advance-clock.orig/hw/i386/kvm/clock.c 2016-11-14 09:07:55.519856329 -0200 +++ qemu-mig-advance-clock/hw/i386/kvm/clock.c 2016-11-14 10:19:45.723254737 -0200 @@ -36,6 +36,12 @@ uint64_t clock; bool clock_valid; + + /* whether machine supports reliable KVM_GET_CLOCK */ + bool mach_use_reliable_get_clock; + + /* whether source host supported reliable KVM_GET_CLOCK */ + bool src_use_reliable_get_clock; } KVMClockState; struct pvclock_vcpu_time_info { @@ -91,15 +97,37 @@ if (running) { struct kvm_clock_data data = {}; - uint64_t time_at_migration = kvmclock_current_nsec(s); + uint64_t time_at_migration = 0; - s->clock_valid = false; + /* local (running VM) restore */ + if (s->clock_valid) { + /* + * if host does not support reliable KVM_GET_CLOCK, + * read kvmclock value from memory + */ + if (!kvm_has_adjust_clock_stable()) { + time_at_migration = kvmclock_current_nsec(s); + } + /* migration/savevm/init restore */ + } else { + /* + * use s->clock in case machine uses reliable + * get clock and host where vm was executing + * supported reliable get clock + */ + if (!s->mach_use_reliable_get_clock || + !s->src_use_reliable_get_clock) { + time_at_migration = kvmclock_current_nsec(s); + } + } - /* We can't rely on the migrated clock value, just discard it */ + /* We can't rely on the saved clock value, just discard it */ if (time_at_migration) { s->clock = time_at_migration; } + s->clock_valid = false; + data.clock = s->clock; ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data); if (ret < 0) { @@ -152,22 +180,76 @@ qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); } +static bool kvmclock_src_use_reliable_get_clock(void *opaque) +{ + KVMClockState *s = opaque; + + /* + * On machine types that support reliable KVM_GET_CLOCK, + * if host kernel does provide reliable KVM_GET_CLOCK, + * set src_use_reliable_get_clock=true so that destination + * avoids reading kvmclock from memory. + */ + if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { + s->src_use_reliable_get_clock = true; + } + + return s->src_use_reliable_get_clock; +} + +static const VMStateDescription kvmclock_reliable_get_clock = { + .name = "kvmclock/src_use_reliable_get_clock", + .version_id = 1, + .minimum_version_id = 1, + .needed = kvmclock_src_use_reliable_get_clock, + .fields = (VMStateField[]) { + VMSTATE_BOOL(src_use_reliable_get_clock, KVMClockState), + VMSTATE_END_OF_LIST() + } +}; + +static void kvmclock_pre_save(void *opaque) +{ + KVMClockState *s = opaque; + struct kvm_clock_data data; + int ret; + + ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); + if (ret < 0) { + fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); + abort(); + } + s->clock = data.clock; +} + static const VMStateDescription kvmclock_vmsd = { .name = "kvmclock", .version_id = 1, .minimum_version_id = 1, + .pre_save = kvmclock_pre_save, .fields = (VMStateField[]) { VMSTATE_UINT64(clock, KVMClockState), VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * []) { + &kvmclock_reliable_get_clock, + NULL } }; +static Property kvmclock_properties[] = { + DEFINE_PROP_BOOL("mach_use_reliable_get_clock", KVMClockState, + mach_use_reliable_get_clock, true), + DEFINE_PROP_END_OF_LIST(), +}; + static void kvmclock_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->realize = kvmclock_realize; dc->vmsd = &kvmclock_vmsd; + dc->props = kvmclock_properties; } static const TypeInfo kvmclock_info = { Index: qemu-mig-advance-clock/include/hw/i386/pc.h =================================================================== --- qemu-mig-advance-clock.orig/include/hw/i386/pc.h 2016-11-14 09:07:55.519856329 -0200 +++ qemu-mig-advance-clock/include/hw/i386/pc.h 2016-11-14 09:11:47.112200123 -0200 @@ -370,6 +370,11 @@ #define PC_COMPAT_2_7 \ HW_COMPAT_2_7 \ {\ + .driver = "kvmclock",\ + .property = "mach_use_reliable_get_clock",\ + .value = "off",\ + },\ + {\ .driver = TYPE_X86_CPU,\ .property = "l3-cache",\ .value = "off",\ Index: qemu-mig-advance-clock/target-i386/kvm.c =================================================================== --- qemu-mig-advance-clock.orig/target-i386/kvm.c 2016-11-14 09:07:55.520856330 -0200 +++ qemu-mig-advance-clock/target-i386/kvm.c 2016-11-14 09:11:47.125200142 -0200 @@ -117,6 +117,13 @@ return kvm_check_extension(kvm_state, KVM_CAP_X86_SMM); } +bool kvm_has_adjust_clock_stable(void) +{ + int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK); + + return (ret == KVM_CLOCK_TSC_STABLE); +} + bool kvm_allows_irq0_override(void) { return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing(); Index: qemu-mig-advance-clock/target-i386/kvm_i386.h =================================================================== --- qemu-mig-advance-clock.orig/target-i386/kvm_i386.h 2016-11-14 09:07:55.520856330 -0200 +++ qemu-mig-advance-clock/target-i386/kvm_i386.h 2016-11-14 09:11:47.125200142 -0200 @@ -17,6 +17,7 @@ bool kvm_allows_irq0_override(void); bool kvm_has_smm(void); +bool kvm_has_adjust_clock_stable(void); void kvm_synchronize_all_tsc(void); void kvm_arch_reset_vcpu(X86CPU *cs); void kvm_arch_do_init_vcpu(X86CPU *cs); ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-14 12:36 ` [Qemu-devel] " Marcelo Tosatti @ 2016-11-14 13:54 ` Paolo Bonzini -1 siblings, 0 replies; 46+ messages in thread From: Paolo Bonzini @ 2016-11-14 13:54 UTC (permalink / raw) To: Marcelo Tosatti, kvm Cc: qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On 14/11/2016 13:36, Marcelo Tosatti wrote: > + /* local (running VM) restore */ > + if (s->clock_valid) { > + /* > + * if host does not support reliable KVM_GET_CLOCK, > + * read kvmclock value from memory > + */ > + if (!kvm_has_adjust_clock_stable()) { > + time_at_migration = kvmclock_current_nsec(s); Just assign to s->clock here... > + } > + /* migration/savevm/init restore */ > + } else { > + /* > + * use s->clock in case machine uses reliable > + * get clock and host where vm was executing > + * supported reliable get clock > + */ > + if (!s->mach_use_reliable_get_clock || > + !s->src_use_reliable_get_clock) { > + time_at_migration = kvmclock_current_nsec(s); ... and here, so that time_at_migration is not needed anymore. Also here it's enough to look at s->src_user_reliable_get_clock, because if s->mach_use_reliable_get_clock is false, s->src_use_reliable_get_clock will be false as well. > + } > + } > > - /* We can't rely on the migrated clock value, just discard it */ > + /* We can't rely on the saved clock value, just discard it */ > if (time_at_migration) { > s->clock = time_at_migration; [...] > > +static bool kvmclock_src_use_reliable_get_clock(void *opaque) > +{ > + KVMClockState *s = opaque; > + > + /* > + * On machine types that support reliable KVM_GET_CLOCK, > + * if host kernel does provide reliable KVM_GET_CLOCK, > + * set src_use_reliable_get_clock=true so that destination > + * avoids reading kvmclock from memory. > + */ > + if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { > + s->src_use_reliable_get_clock = true; > + } > + > + return s->src_use_reliable_get_clock; > +} Here you can just return s->mach_use_reliable_get_clock. To set s->src_use_reliable_get_clock, after issuing KVM_GET_CLOCK you can look at the KVM_CLOCK_TSC_STABLE bit in the kvm_clock struct's flags. You don't actually need kvm_has_adjust_clock_stable(), but please place the KVM_GET_CLOCK code for kvmclock_pre_save and kvmclock_vm_state_change in a common function. Also, just another small nit: please make your scripts use the "-p" option on diff. :) Thanks, Paolo ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-14 13:54 ` Paolo Bonzini 0 siblings, 0 replies; 46+ messages in thread From: Paolo Bonzini @ 2016-11-14 13:54 UTC (permalink / raw) To: Marcelo Tosatti, kvm Cc: qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On 14/11/2016 13:36, Marcelo Tosatti wrote: > + /* local (running VM) restore */ > + if (s->clock_valid) { > + /* > + * if host does not support reliable KVM_GET_CLOCK, > + * read kvmclock value from memory > + */ > + if (!kvm_has_adjust_clock_stable()) { > + time_at_migration = kvmclock_current_nsec(s); Just assign to s->clock here... > + } > + /* migration/savevm/init restore */ > + } else { > + /* > + * use s->clock in case machine uses reliable > + * get clock and host where vm was executing > + * supported reliable get clock > + */ > + if (!s->mach_use_reliable_get_clock || > + !s->src_use_reliable_get_clock) { > + time_at_migration = kvmclock_current_nsec(s); ... and here, so that time_at_migration is not needed anymore. Also here it's enough to look at s->src_user_reliable_get_clock, because if s->mach_use_reliable_get_clock is false, s->src_use_reliable_get_clock will be false as well. > + } > + } > > - /* We can't rely on the migrated clock value, just discard it */ > + /* We can't rely on the saved clock value, just discard it */ > if (time_at_migration) { > s->clock = time_at_migration; [...] > > +static bool kvmclock_src_use_reliable_get_clock(void *opaque) > +{ > + KVMClockState *s = opaque; > + > + /* > + * On machine types that support reliable KVM_GET_CLOCK, > + * if host kernel does provide reliable KVM_GET_CLOCK, > + * set src_use_reliable_get_clock=true so that destination > + * avoids reading kvmclock from memory. > + */ > + if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { > + s->src_use_reliable_get_clock = true; > + } > + > + return s->src_use_reliable_get_clock; > +} Here you can just return s->mach_use_reliable_get_clock. To set s->src_use_reliable_get_clock, after issuing KVM_GET_CLOCK you can look at the KVM_CLOCK_TSC_STABLE bit in the kvm_clock struct's flags. You don't actually need kvm_has_adjust_clock_stable(), but please place the KVM_GET_CLOCK code for kvmclock_pre_save and kvmclock_vm_state_change in a common function. Also, just another small nit: please make your scripts use the "-p" option on diff. :) Thanks, Paolo ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-14 13:54 ` [Qemu-devel] " Paolo Bonzini @ 2016-11-14 14:00 ` Marcelo Tosatti -1 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-14 14:00 UTC (permalink / raw) To: Paolo Bonzini Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On Mon, Nov 14, 2016 at 02:54:38PM +0100, Paolo Bonzini wrote: > > > On 14/11/2016 13:36, Marcelo Tosatti wrote: > > + /* local (running VM) restore */ > > + if (s->clock_valid) { > > + /* > > + * if host does not support reliable KVM_GET_CLOCK, > > + * read kvmclock value from memory > > + */ > > + if (!kvm_has_adjust_clock_stable()) { > > + time_at_migration = kvmclock_current_nsec(s); > > Just assign to s->clock here... If kvmclock is not enabled, you want to use s->clock, rather than 0. > > + } > > + /* migration/savevm/init restore */ > > + } else { > > + /* > > + * use s->clock in case machine uses reliable > > + * get clock and host where vm was executing > > + * supported reliable get clock > > + */ > > + if (!s->mach_use_reliable_get_clock || > > + !s->src_use_reliable_get_clock) { > > + time_at_migration = kvmclock_current_nsec(s); > > ... and here, so that time_at_migration is not needed anymore. Same as above. > Also here it's enough to look at s->src_user_reliable_get_clock, because > if s->mach_use_reliable_get_clock is false, > s->src_use_reliable_get_clock will be false as well. Yes, but i like the code annotation. > > + } > > + } > > > > - /* We can't rely on the migrated clock value, just discard it */ > > + /* We can't rely on the saved clock value, just discard it */ > > if (time_at_migration) { > > s->clock = time_at_migration; > > [...] > > > > > +static bool kvmclock_src_use_reliable_get_clock(void *opaque) > > +{ > > + KVMClockState *s = opaque; > > + > > + /* > > + * On machine types that support reliable KVM_GET_CLOCK, > > + * if host kernel does provide reliable KVM_GET_CLOCK, > > + * set src_use_reliable_get_clock=true so that destination > > + * avoids reading kvmclock from memory. > > + */ > > + if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { > > + s->src_use_reliable_get_clock = true; > > + } > > + > > + return s->src_use_reliable_get_clock; > > +} > > Here you can just return s->mach_use_reliable_get_clock. mach_use_reliable_get_clock can be true but host might not support it. > To set > s->src_use_reliable_get_clock, after issuing KVM_GET_CLOCK you can look > at the KVM_CLOCK_TSC_STABLE bit in the kvm_clock struct's flags. KVM_CLOCK_TSC_STABLE bit in the kvmclock structure != KVM_GET_CLOCK returns reliable value, right? > You don't actually need kvm_has_adjust_clock_stable(), but please place > the KVM_GET_CLOCK code for kvmclock_pre_save and > kvmclock_vm_state_change in a common function. Sure. > Also, just another small nit: please make your scripts use the "-p" > option on diff. :) Sure. > > Thanks, > > Paolo ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-14 14:00 ` Marcelo Tosatti 0 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-14 14:00 UTC (permalink / raw) To: Paolo Bonzini Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On Mon, Nov 14, 2016 at 02:54:38PM +0100, Paolo Bonzini wrote: > > > On 14/11/2016 13:36, Marcelo Tosatti wrote: > > + /* local (running VM) restore */ > > + if (s->clock_valid) { > > + /* > > + * if host does not support reliable KVM_GET_CLOCK, > > + * read kvmclock value from memory > > + */ > > + if (!kvm_has_adjust_clock_stable()) { > > + time_at_migration = kvmclock_current_nsec(s); > > Just assign to s->clock here... If kvmclock is not enabled, you want to use s->clock, rather than 0. > > + } > > + /* migration/savevm/init restore */ > > + } else { > > + /* > > + * use s->clock in case machine uses reliable > > + * get clock and host where vm was executing > > + * supported reliable get clock > > + */ > > + if (!s->mach_use_reliable_get_clock || > > + !s->src_use_reliable_get_clock) { > > + time_at_migration = kvmclock_current_nsec(s); > > ... and here, so that time_at_migration is not needed anymore. Same as above. > Also here it's enough to look at s->src_user_reliable_get_clock, because > if s->mach_use_reliable_get_clock is false, > s->src_use_reliable_get_clock will be false as well. Yes, but i like the code annotation. > > + } > > + } > > > > - /* We can't rely on the migrated clock value, just discard it */ > > + /* We can't rely on the saved clock value, just discard it */ > > if (time_at_migration) { > > s->clock = time_at_migration; > > [...] > > > > > +static bool kvmclock_src_use_reliable_get_clock(void *opaque) > > +{ > > + KVMClockState *s = opaque; > > + > > + /* > > + * On machine types that support reliable KVM_GET_CLOCK, > > + * if host kernel does provide reliable KVM_GET_CLOCK, > > + * set src_use_reliable_get_clock=true so that destination > > + * avoids reading kvmclock from memory. > > + */ > > + if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { > > + s->src_use_reliable_get_clock = true; > > + } > > + > > + return s->src_use_reliable_get_clock; > > +} > > Here you can just return s->mach_use_reliable_get_clock. mach_use_reliable_get_clock can be true but host might not support it. > To set > s->src_use_reliable_get_clock, after issuing KVM_GET_CLOCK you can look > at the KVM_CLOCK_TSC_STABLE bit in the kvm_clock struct's flags. KVM_CLOCK_TSC_STABLE bit in the kvmclock structure != KVM_GET_CLOCK returns reliable value, right? > You don't actually need kvm_has_adjust_clock_stable(), but please place > the KVM_GET_CLOCK code for kvmclock_pre_save and > kvmclock_vm_state_change in a common function. Sure. > Also, just another small nit: please make your scripts use the "-p" > option on diff. :) Sure. > > Thanks, > > Paolo ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-14 14:00 ` [Qemu-devel] " Marcelo Tosatti @ 2016-11-14 14:22 ` Paolo Bonzini -1 siblings, 0 replies; 46+ messages in thread From: Paolo Bonzini @ 2016-11-14 14:22 UTC (permalink / raw) To: Marcelo Tosatti Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On 14/11/2016 15:00, Marcelo Tosatti wrote: > On Mon, Nov 14, 2016 at 02:54:38PM +0100, Paolo Bonzini wrote: >> >> >> On 14/11/2016 13:36, Marcelo Tosatti wrote: >>> + /* local (running VM) restore */ >>> + if (s->clock_valid) { >>> + /* >>> + * if host does not support reliable KVM_GET_CLOCK, >>> + * read kvmclock value from memory >>> + */ >>> + if (!kvm_has_adjust_clock_stable()) { >>> + time_at_migration = kvmclock_current_nsec(s); >> >> Just assign to s->clock here... > > If kvmclock is not enabled, you want to use s->clock, > rather than 0. > >>> + } >>> + /* migration/savevm/init restore */ >>> + } else { >>> + /* >>> + * use s->clock in case machine uses reliable >>> + * get clock and host where vm was executing >>> + * supported reliable get clock >>> + */ >>> + if (!s->mach_use_reliable_get_clock || >>> + !s->src_use_reliable_get_clock) { >>> + time_at_migration = kvmclock_current_nsec(s); >> >> ... and here, so that time_at_migration is not needed anymore. > > Same as above. You're right. >> Also here it's enough to look at s->src_user_reliable_get_clock, because >> if s->mach_use_reliable_get_clock is false, >> s->src_use_reliable_get_clock will be false as well. > > Yes, but i like the code annotation. Ah, I think we're looking at it differently. I'm thinking "mach_use_reliable_get_clock is just for migration, src_use_reliable_get_clock is the state". Perhaps you're thinking of enabling/disabling the whole new code for old machines? What is the advantage? >>> + } >>> + } >>> >>> - /* We can't rely on the migrated clock value, just discard it */ >>> + /* We can't rely on the saved clock value, just discard it */ >>> if (time_at_migration) { >>> s->clock = time_at_migration; >> >> [...] >> >>> >>> +static bool kvmclock_src_use_reliable_get_clock(void *opaque) >>> +{ >>> + KVMClockState *s = opaque; >>> + >>> + /* >>> + * On machine types that support reliable KVM_GET_CLOCK, >>> + * if host kernel does provide reliable KVM_GET_CLOCK, >>> + * set src_use_reliable_get_clock=true so that destination >>> + * avoids reading kvmclock from memory. >>> + */ >>> + if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { >>> + s->src_use_reliable_get_clock = true; >>> + } >>> + >>> + return s->src_use_reliable_get_clock; >>> +} >> >> Here you can just return s->mach_use_reliable_get_clock. > > mach_use_reliable_get_clock can be true but host might not support it. Yes, but the "needed" function is only required to avoid breaking pc-i440fx-2.7 and earlier. If you return true here, you can still migrate a "false" value for src_use_reliable_get_clock. >> To set >> s->src_use_reliable_get_clock, after issuing KVM_GET_CLOCK you can look >> at the KVM_CLOCK_TSC_STABLE bit in the kvm_clock struct's flags. > > KVM_CLOCK_TSC_STABLE bit in the kvmclock structure != > KVM_GET_CLOCK returns reliable value, right? It is the same as "is using masterclock", which is actually a stricter condition than the KVM_CHECK_EXTENSION return value. The right check to use is whether masterclock is in use, and then the idea is to treat clock,src_use_reliable_get_clock as one tuple that is updated atomically. Paolo ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-14 14:22 ` Paolo Bonzini 0 siblings, 0 replies; 46+ messages in thread From: Paolo Bonzini @ 2016-11-14 14:22 UTC (permalink / raw) To: Marcelo Tosatti Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On 14/11/2016 15:00, Marcelo Tosatti wrote: > On Mon, Nov 14, 2016 at 02:54:38PM +0100, Paolo Bonzini wrote: >> >> >> On 14/11/2016 13:36, Marcelo Tosatti wrote: >>> + /* local (running VM) restore */ >>> + if (s->clock_valid) { >>> + /* >>> + * if host does not support reliable KVM_GET_CLOCK, >>> + * read kvmclock value from memory >>> + */ >>> + if (!kvm_has_adjust_clock_stable()) { >>> + time_at_migration = kvmclock_current_nsec(s); >> >> Just assign to s->clock here... > > If kvmclock is not enabled, you want to use s->clock, > rather than 0. > >>> + } >>> + /* migration/savevm/init restore */ >>> + } else { >>> + /* >>> + * use s->clock in case machine uses reliable >>> + * get clock and host where vm was executing >>> + * supported reliable get clock >>> + */ >>> + if (!s->mach_use_reliable_get_clock || >>> + !s->src_use_reliable_get_clock) { >>> + time_at_migration = kvmclock_current_nsec(s); >> >> ... and here, so that time_at_migration is not needed anymore. > > Same as above. You're right. >> Also here it's enough to look at s->src_user_reliable_get_clock, because >> if s->mach_use_reliable_get_clock is false, >> s->src_use_reliable_get_clock will be false as well. > > Yes, but i like the code annotation. Ah, I think we're looking at it differently. I'm thinking "mach_use_reliable_get_clock is just for migration, src_use_reliable_get_clock is the state". Perhaps you're thinking of enabling/disabling the whole new code for old machines? What is the advantage? >>> + } >>> + } >>> >>> - /* We can't rely on the migrated clock value, just discard it */ >>> + /* We can't rely on the saved clock value, just discard it */ >>> if (time_at_migration) { >>> s->clock = time_at_migration; >> >> [...] >> >>> >>> +static bool kvmclock_src_use_reliable_get_clock(void *opaque) >>> +{ >>> + KVMClockState *s = opaque; >>> + >>> + /* >>> + * On machine types that support reliable KVM_GET_CLOCK, >>> + * if host kernel does provide reliable KVM_GET_CLOCK, >>> + * set src_use_reliable_get_clock=true so that destination >>> + * avoids reading kvmclock from memory. >>> + */ >>> + if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { >>> + s->src_use_reliable_get_clock = true; >>> + } >>> + >>> + return s->src_use_reliable_get_clock; >>> +} >> >> Here you can just return s->mach_use_reliable_get_clock. > > mach_use_reliable_get_clock can be true but host might not support it. Yes, but the "needed" function is only required to avoid breaking pc-i440fx-2.7 and earlier. If you return true here, you can still migrate a "false" value for src_use_reliable_get_clock. >> To set >> s->src_use_reliable_get_clock, after issuing KVM_GET_CLOCK you can look >> at the KVM_CLOCK_TSC_STABLE bit in the kvm_clock struct's flags. > > KVM_CLOCK_TSC_STABLE bit in the kvmclock structure != > KVM_GET_CLOCK returns reliable value, right? It is the same as "is using masterclock", which is actually a stricter condition than the KVM_CHECK_EXTENSION return value. The right check to use is whether masterclock is in use, and then the idea is to treat clock,src_use_reliable_get_clock as one tuple that is updated atomically. Paolo ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-14 14:22 ` [Qemu-devel] " Paolo Bonzini @ 2016-11-14 14:50 ` Marcelo Tosatti -1 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-14 14:50 UTC (permalink / raw) To: Paolo Bonzini Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On Mon, Nov 14, 2016 at 03:22:02PM +0100, Paolo Bonzini wrote: > > > On 14/11/2016 15:00, Marcelo Tosatti wrote: > > On Mon, Nov 14, 2016 at 02:54:38PM +0100, Paolo Bonzini wrote: > >> > >> > >> On 14/11/2016 13:36, Marcelo Tosatti wrote: > >>> + /* local (running VM) restore */ > >>> + if (s->clock_valid) { > >>> + /* > >>> + * if host does not support reliable KVM_GET_CLOCK, > >>> + * read kvmclock value from memory > >>> + */ > >>> + if (!kvm_has_adjust_clock_stable()) { > >>> + time_at_migration = kvmclock_current_nsec(s); > >> > >> Just assign to s->clock here... > > > > If kvmclock is not enabled, you want to use s->clock, > > rather than 0. > > > >>> + } > >>> + /* migration/savevm/init restore */ > >>> + } else { > >>> + /* > >>> + * use s->clock in case machine uses reliable > >>> + * get clock and host where vm was executing > >>> + * supported reliable get clock > >>> + */ > >>> + if (!s->mach_use_reliable_get_clock || > >>> + !s->src_use_reliable_get_clock) { > >>> + time_at_migration = kvmclock_current_nsec(s); > >> > >> ... and here, so that time_at_migration is not needed anymore. > > > > Same as above. > > You're right. > > >> Also here it's enough to look at s->src_user_reliable_get_clock, because > >> if s->mach_use_reliable_get_clock is false, > >> s->src_use_reliable_get_clock will be false as well. > > > > Yes, but i like the code annotation. > > Ah, I think we're looking at it differently. Well, i didnt want to mix the meaning of the variables: + /* whether machine supports reliable KVM_GET_CLOCK */ + bool mach_use_reliable_get_clock; + + /* whether source host supported reliable KVM_GET_CLOCK */ + bool src_use_reliable_get_clock; See the comments on top (later if you look at the variable, then have to think: well it has one name, but its disabled by that other path as well, so its more than its name,etc...). > I'm thinking "mach_use_reliable_get_clock is just for migration, Thats whether the machine supports it. New machines have it enabled, olders don't. > src_use_reliable_get_clock is the state". Thats whether the migration source supported it. > Perhaps you're thinking of > enabling/disabling the whole new code for old machines? source destination behaviour supports supports on migration use s->clock, on stop/cont as well supports ~supports on migration use s->clock, on stop/cont read from guest mem ~support supports on migration read from guest, on stop/cont use kvm_get_clock/kvm_set_clock ~support ~support always read from guest memory Thats what should happen (and thats what the patch should implement). "support" means host supports your new KVM_GET_CLOCK/KVM_SET_CLOCK. > What is the > advantage? Well its necessary to use the correct thing, otherwise you see a time backwards event. > > >>> + } > >>> + } > >>> > >>> - /* We can't rely on the migrated clock value, just discard it */ > >>> + /* We can't rely on the saved clock value, just discard it */ > >>> if (time_at_migration) { > >>> s->clock = time_at_migration; > >> > >> [...] > >> > >>> > >>> +static bool kvmclock_src_use_reliable_get_clock(void *opaque) > >>> +{ > >>> + KVMClockState *s = opaque; > >>> + > >>> + /* > >>> + * On machine types that support reliable KVM_GET_CLOCK, > >>> + * if host kernel does provide reliable KVM_GET_CLOCK, > >>> + * set src_use_reliable_get_clock=true so that destination > >>> + * avoids reading kvmclock from memory. > >>> + */ > >>> + if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { > >>> + s->src_use_reliable_get_clock = true; > >>> + } > >>> + > >>> + return s->src_use_reliable_get_clock; > >>> +} > >> > >> Here you can just return s->mach_use_reliable_get_clock. > > > > mach_use_reliable_get_clock can be true but host might not support it. > > Yes, but the "needed" function is only required to avoid breaking > pc-i440fx-2.7 and earlier. "needed" is required so that the migration between: SRC DEST BEHAVIOUR ~support supports on migration read from guest, on stop/cont use kvm_get_clock/kvm_set_clock Destination does not use KVM_GET_CLOCK value (which is broken and should not be used). > If you return true here, you can still > migrate a "false" value for src_use_reliable_get_clock. But the source only uses a reliable KVM_GET_CLOCK if both conditions are true. And the subsection is only needed if the source uses a reliable KVM_GET_CLOCK. > >> To set > >> s->src_use_reliable_get_clock, after issuing KVM_GET_CLOCK you can look > >> at the KVM_CLOCK_TSC_STABLE bit in the kvm_clock struct's flags. > > > > KVM_CLOCK_TSC_STABLE bit in the kvmclock structure != > > KVM_GET_CLOCK returns reliable value, right? > > It is the same as "is using masterclock", which is actually a stricter > condition than the KVM_CHECK_EXTENSION return value. The right check to > use is whether masterclock is in use, Actually its "has a reliable KVM_GET_CLOCK" (which returns get_kernel_clock() + (rdtsc() - tsc_timestamp), "broken KVM_GET_CLOCK" = get_kernel_clock() > and then the idea is to treat > clock,src_use_reliable_get_clock as one tuple that is updated atomically. > > Paolo Hum, not sure i get this... ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-14 14:50 ` Marcelo Tosatti 0 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-14 14:50 UTC (permalink / raw) To: Paolo Bonzini Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On Mon, Nov 14, 2016 at 03:22:02PM +0100, Paolo Bonzini wrote: > > > On 14/11/2016 15:00, Marcelo Tosatti wrote: > > On Mon, Nov 14, 2016 at 02:54:38PM +0100, Paolo Bonzini wrote: > >> > >> > >> On 14/11/2016 13:36, Marcelo Tosatti wrote: > >>> + /* local (running VM) restore */ > >>> + if (s->clock_valid) { > >>> + /* > >>> + * if host does not support reliable KVM_GET_CLOCK, > >>> + * read kvmclock value from memory > >>> + */ > >>> + if (!kvm_has_adjust_clock_stable()) { > >>> + time_at_migration = kvmclock_current_nsec(s); > >> > >> Just assign to s->clock here... > > > > If kvmclock is not enabled, you want to use s->clock, > > rather than 0. > > > >>> + } > >>> + /* migration/savevm/init restore */ > >>> + } else { > >>> + /* > >>> + * use s->clock in case machine uses reliable > >>> + * get clock and host where vm was executing > >>> + * supported reliable get clock > >>> + */ > >>> + if (!s->mach_use_reliable_get_clock || > >>> + !s->src_use_reliable_get_clock) { > >>> + time_at_migration = kvmclock_current_nsec(s); > >> > >> ... and here, so that time_at_migration is not needed anymore. > > > > Same as above. > > You're right. > > >> Also here it's enough to look at s->src_user_reliable_get_clock, because > >> if s->mach_use_reliable_get_clock is false, > >> s->src_use_reliable_get_clock will be false as well. > > > > Yes, but i like the code annotation. > > Ah, I think we're looking at it differently. Well, i didnt want to mix the meaning of the variables: + /* whether machine supports reliable KVM_GET_CLOCK */ + bool mach_use_reliable_get_clock; + + /* whether source host supported reliable KVM_GET_CLOCK */ + bool src_use_reliable_get_clock; See the comments on top (later if you look at the variable, then have to think: well it has one name, but its disabled by that other path as well, so its more than its name,etc...). > I'm thinking "mach_use_reliable_get_clock is just for migration, Thats whether the machine supports it. New machines have it enabled, olders don't. > src_use_reliable_get_clock is the state". Thats whether the migration source supported it. > Perhaps you're thinking of > enabling/disabling the whole new code for old machines? source destination behaviour supports supports on migration use s->clock, on stop/cont as well supports ~supports on migration use s->clock, on stop/cont read from guest mem ~support supports on migration read from guest, on stop/cont use kvm_get_clock/kvm_set_clock ~support ~support always read from guest memory Thats what should happen (and thats what the patch should implement). "support" means host supports your new KVM_GET_CLOCK/KVM_SET_CLOCK. > What is the > advantage? Well its necessary to use the correct thing, otherwise you see a time backwards event. > > >>> + } > >>> + } > >>> > >>> - /* We can't rely on the migrated clock value, just discard it */ > >>> + /* We can't rely on the saved clock value, just discard it */ > >>> if (time_at_migration) { > >>> s->clock = time_at_migration; > >> > >> [...] > >> > >>> > >>> +static bool kvmclock_src_use_reliable_get_clock(void *opaque) > >>> +{ > >>> + KVMClockState *s = opaque; > >>> + > >>> + /* > >>> + * On machine types that support reliable KVM_GET_CLOCK, > >>> + * if host kernel does provide reliable KVM_GET_CLOCK, > >>> + * set src_use_reliable_get_clock=true so that destination > >>> + * avoids reading kvmclock from memory. > >>> + */ > >>> + if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { > >>> + s->src_use_reliable_get_clock = true; > >>> + } > >>> + > >>> + return s->src_use_reliable_get_clock; > >>> +} > >> > >> Here you can just return s->mach_use_reliable_get_clock. > > > > mach_use_reliable_get_clock can be true but host might not support it. > > Yes, but the "needed" function is only required to avoid breaking > pc-i440fx-2.7 and earlier. "needed" is required so that the migration between: SRC DEST BEHAVIOUR ~support supports on migration read from guest, on stop/cont use kvm_get_clock/kvm_set_clock Destination does not use KVM_GET_CLOCK value (which is broken and should not be used). > If you return true here, you can still > migrate a "false" value for src_use_reliable_get_clock. But the source only uses a reliable KVM_GET_CLOCK if both conditions are true. And the subsection is only needed if the source uses a reliable KVM_GET_CLOCK. > >> To set > >> s->src_use_reliable_get_clock, after issuing KVM_GET_CLOCK you can look > >> at the KVM_CLOCK_TSC_STABLE bit in the kvm_clock struct's flags. > > > > KVM_CLOCK_TSC_STABLE bit in the kvmclock structure != > > KVM_GET_CLOCK returns reliable value, right? > > It is the same as "is using masterclock", which is actually a stricter > condition than the KVM_CHECK_EXTENSION return value. The right check to > use is whether masterclock is in use, Actually its "has a reliable KVM_GET_CLOCK" (which returns get_kernel_clock() + (rdtsc() - tsc_timestamp), "broken KVM_GET_CLOCK" = get_kernel_clock() > and then the idea is to treat > clock,src_use_reliable_get_clock as one tuple that is updated atomically. > > Paolo Hum, not sure i get this... ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-14 14:50 ` [Qemu-devel] " Marcelo Tosatti @ 2016-11-14 15:00 ` Paolo Bonzini -1 siblings, 0 replies; 46+ messages in thread From: Paolo Bonzini @ 2016-11-14 15:00 UTC (permalink / raw) To: Marcelo Tosatti Cc: Eduardo Habkost, kvm, Juan Quintela, Radim Krcmar, qemu-devel, Dr. David Alan Gilbert On 14/11/2016 15:50, Marcelo Tosatti wrote: > Well, i didnt want to mix the meaning of the variables: > > + /* whether machine supports reliable KVM_GET_CLOCK */ > + bool mach_use_reliable_get_clock; > + > + /* whether source host supported reliable KVM_GET_CLOCK */ > + bool src_use_reliable_get_clock; > > See the comments on top (later if you look at the variable, > then have to think: well it has one name, but its disabled > by that other path as well, so its more than its > name,etc...). > >> I'm thinking "mach_use_reliable_get_clock is just for migration, > > Thats whether the machine supports it. New machines have it enabled, > olders don't. Yes. >> src_use_reliable_get_clock is the state". > > Thats whether the migration source supported it. But it's not used only for migration. It's used on every vmstate change (running->stop and stop->running, isn't it? I think that, apart from the migration case, it's better to use s->clock if kvmclock is stable, even on older machine types. >>>>> +static bool kvmclock_src_use_reliable_get_clock(void *opaque) >>>>> +{ >>>>> + KVMClockState *s = opaque; >>>>> + >>>>> + /* >>>>> + * On machine types that support reliable KVM_GET_CLOCK, >>>>> + * if host kernel does provide reliable KVM_GET_CLOCK, >>>>> + * set src_use_reliable_get_clock=true so that destination >>>>> + * avoids reading kvmclock from memory. >>>>> + */ >>>>> + if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { >>>>> + s->src_use_reliable_get_clock = true; >>>>> + } >>>>> + >>>>> + return s->src_use_reliable_get_clock; >>>>> +} >>>> >>>> Here you can just return s->mach_use_reliable_get_clock. >>> >>> mach_use_reliable_get_clock can be true but host might not support it. >> >> Yes, but the "needed" function is only required to avoid breaking >> pc-i440fx-2.7 and earlier. > > "needed" is required so that the migration between: > > SRC DEST BEHAVIOUR > ~support supports on migration read from guest, > on stop/cont use > kvm_get_clock/kvm_set_clock > > Destination does not use KVM_GET_CLOCK value (which is > broken and should not be used). If needed returns false, the destination will see src_use_reliable_get_clock = false anyway. If needed returns true, the destination can still see src_use_reliable_get_clock = false if that's the value. needed src_use_reliable_get_clock effect false false use kvmclock_current_nsec false true use kvmclock_current_nsec true false use kvmclock_current_nsec true true use s->clock So the idea is: - set s->clock and s->reliable_get_clock on every KVM_GET_CLOCK - on migration source, use subsections and s->mach_use_reliable_get_clock to avoid breaking migration on pre-2.8 machine types - on migration destination, use .pre_load so that s->reliable_get_clock is initialized to false on older machine types >> If you return true here, you can still >> migrate a "false" value for src_use_reliable_get_clock. > > But the source only uses a reliable KVM_GET_CLOCK if > both conditions are true. > > And the subsection is only needed if the source > uses a reliable KVM_GET_CLOCK. Yes, but the point of the subsection is just to avoid breaking migration format. >> It is the same as "is using masterclock", which is actually a stricter >> condition than the KVM_CHECK_EXTENSION return value. The right check to >> use is whether masterclock is in use, > > Actually its "has a reliable KVM_GET_CLOCK" (which returns > get_kernel_clock() + (rdtsc() - tsc_timestamp), > > "broken KVM_GET_CLOCK" = get_kernel_clock() Yes. And these end up being the same. Paolo ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-14 15:00 ` Paolo Bonzini 0 siblings, 0 replies; 46+ messages in thread From: Paolo Bonzini @ 2016-11-14 15:00 UTC (permalink / raw) To: Marcelo Tosatti Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On 14/11/2016 15:50, Marcelo Tosatti wrote: > Well, i didnt want to mix the meaning of the variables: > > + /* whether machine supports reliable KVM_GET_CLOCK */ > + bool mach_use_reliable_get_clock; > + > + /* whether source host supported reliable KVM_GET_CLOCK */ > + bool src_use_reliable_get_clock; > > See the comments on top (later if you look at the variable, > then have to think: well it has one name, but its disabled > by that other path as well, so its more than its > name,etc...). > >> I'm thinking "mach_use_reliable_get_clock is just for migration, > > Thats whether the machine supports it. New machines have it enabled, > olders don't. Yes. >> src_use_reliable_get_clock is the state". > > Thats whether the migration source supported it. But it's not used only for migration. It's used on every vmstate change (running->stop and stop->running, isn't it? I think that, apart from the migration case, it's better to use s->clock if kvmclock is stable, even on older machine types. >>>>> +static bool kvmclock_src_use_reliable_get_clock(void *opaque) >>>>> +{ >>>>> + KVMClockState *s = opaque; >>>>> + >>>>> + /* >>>>> + * On machine types that support reliable KVM_GET_CLOCK, >>>>> + * if host kernel does provide reliable KVM_GET_CLOCK, >>>>> + * set src_use_reliable_get_clock=true so that destination >>>>> + * avoids reading kvmclock from memory. >>>>> + */ >>>>> + if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { >>>>> + s->src_use_reliable_get_clock = true; >>>>> + } >>>>> + >>>>> + return s->src_use_reliable_get_clock; >>>>> +} >>>> >>>> Here you can just return s->mach_use_reliable_get_clock. >>> >>> mach_use_reliable_get_clock can be true but host might not support it. >> >> Yes, but the "needed" function is only required to avoid breaking >> pc-i440fx-2.7 and earlier. > > "needed" is required so that the migration between: > > SRC DEST BEHAVIOUR > ~support supports on migration read from guest, > on stop/cont use > kvm_get_clock/kvm_set_clock > > Destination does not use KVM_GET_CLOCK value (which is > broken and should not be used). If needed returns false, the destination will see src_use_reliable_get_clock = false anyway. If needed returns true, the destination can still see src_use_reliable_get_clock = false if that's the value. needed src_use_reliable_get_clock effect false false use kvmclock_current_nsec false true use kvmclock_current_nsec true false use kvmclock_current_nsec true true use s->clock So the idea is: - set s->clock and s->reliable_get_clock on every KVM_GET_CLOCK - on migration source, use subsections and s->mach_use_reliable_get_clock to avoid breaking migration on pre-2.8 machine types - on migration destination, use .pre_load so that s->reliable_get_clock is initialized to false on older machine types >> If you return true here, you can still >> migrate a "false" value for src_use_reliable_get_clock. > > But the source only uses a reliable KVM_GET_CLOCK if > both conditions are true. > > And the subsection is only needed if the source > uses a reliable KVM_GET_CLOCK. Yes, but the point of the subsection is just to avoid breaking migration format. >> It is the same as "is using masterclock", which is actually a stricter >> condition than the KVM_CHECK_EXTENSION return value. The right check to >> use is whether masterclock is in use, > > Actually its "has a reliable KVM_GET_CLOCK" (which returns > get_kernel_clock() + (rdtsc() - tsc_timestamp), > > "broken KVM_GET_CLOCK" = get_kernel_clock() Yes. And these end up being the same. Paolo ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-14 15:00 ` [Qemu-devel] " Paolo Bonzini @ 2016-11-14 15:40 ` Marcelo Tosatti -1 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-14 15:40 UTC (permalink / raw) To: Paolo Bonzini Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On Mon, Nov 14, 2016 at 04:00:38PM +0100, Paolo Bonzini wrote: > > > On 14/11/2016 15:50, Marcelo Tosatti wrote: > > Well, i didnt want to mix the meaning of the variables: > > > > + /* whether machine supports reliable KVM_GET_CLOCK */ > > + bool mach_use_reliable_get_clock; > > + > > + /* whether source host supported reliable KVM_GET_CLOCK */ > > + bool src_use_reliable_get_clock; > > > > See the comments on top (later if you look at the variable, > > then have to think: well it has one name, but its disabled > > by that other path as well, so its more than its > > name,etc...). > > > >> I'm thinking "mach_use_reliable_get_clock is just for migration, > > > > Thats whether the machine supports it. New machines have it enabled, > > olders don't. > > Yes. > > >> src_use_reliable_get_clock is the state". > > > > Thats whether the migration source supported it. > > But it's not used only for migration. It's used on every vmstate change > (running->stop and stop->running, isn't it? No. Its used only if s->clock_valid == false, which means either: migration/savevm/init. Now for savevm there is a bug: > I think that, apart from > the migration case, it's better to use s->clock if kvmclock is stable, > even on older machine types. Yes, its already doing that: /* local (running VM) restore */ if (s->clock_valid) { /* * if host does not support reliable KVM_GET_CLOCK, * read kvmclock value from memory */ if (!kvm_has_adjust_clock_stable()) { time_at_migration = kvmclock_current_nsec(s); } It only uses kvmclock_current_nsec() if kvm_has_adjust_clock_stable is false. > >>>>> +static bool kvmclock_src_use_reliable_get_clock(void *opaque) > >>>>> +{ > >>>>> + KVMClockState *s = opaque; > >>>>> + > >>>>> + /* > >>>>> + * On machine types that support reliable KVM_GET_CLOCK, > >>>>> + * if host kernel does provide reliable KVM_GET_CLOCK, > >>>>> + * set src_use_reliable_get_clock=true so that destination > >>>>> + * avoids reading kvmclock from memory. > >>>>> + */ > >>>>> + if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { > >>>>> + s->src_use_reliable_get_clock = true; > >>>>> + } > >>>>> + > >>>>> + return s->src_use_reliable_get_clock; > >>>>> +} > >>>> > >>>> Here you can just return s->mach_use_reliable_get_clock. > >>> > >>> mach_use_reliable_get_clock can be true but host might not support it. > >> > >> Yes, but the "needed" function is only required to avoid breaking > >> pc-i440fx-2.7 and earlier. > > > > "needed" is required so that the migration between: > > > > SRC DEST BEHAVIOUR > > ~support supports on migration read from guest, > > on stop/cont use > > kvm_get_clock/kvm_set_clock > > > > Destination does not use KVM_GET_CLOCK value (which is > > broken and should not be used). > > If needed returns false, the destination will see > src_use_reliable_get_clock = false anyway. Causing it to read the clock from memory, thats what should happen. > If needed returns true, the destination can still see > src_use_reliable_get_clock = false if that's the value. You are asking me to change from: +static bool kvmclock_src_use_reliable_get_clock(void *opaque) +{ + KVMClockState *s = opaque; + + /* + * On machine types that support reliable KVM_GET_CLOCK, + * if host kernel does provide reliable KVM_GET_CLOCK, + * set src_use_reliable_get_clock=true so that destination + * avoids reading kvmclock from memory. + */ + if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { + s->src_use_reliable_get_clock = true; + } + + return s->src_use_reliable_get_clock; +} to static bool kvmclock_src_use_reliable_get_clock(void *opaque) { KVMClockState *s = opaque; /* * On machine types that support reliable KVM_GET_CLOCK, * if host kernel does provide reliable KVM_GET_CLOCK, * set src_use_reliable_get_clock=true so that destination * avoids reading kvmclock from memory. */ if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { s->src_use_reliable_get_clock = true; } return s->mach_use_reliable_get_clock; } Ah, OK, done. > needed src_use_reliable_get_clock effect > false false use kvmclock_current_nsec > false true use kvmclock_current_nsec > true false use kvmclock_current_nsec > true true use s->clock > > > So the idea is: > > - set s->clock and s->reliable_get_clock on every KVM_GET_CLOCK Already do that (apart from s->clock_valid which is necessary). > - on migration source, use subsections and > s->mach_use_reliable_get_clock to avoid breaking migration on pre-2.8 > machine types Migration is already broken when you use different machine types. So s->src_use_reliable_get_clock is only used to indicate to the destination that: "you can use KVM_GET_CLOCK value, its safe". > - on migration destination, use .pre_load so that s->reliable_get_clock > is initialized to false on older machine types Thats what mach_use_reliable_get_clock does already: static Property kvmclock_properties[] = { DEFINE_PROP_BOOL("mach_use_reliable_get_clock", KVMClockState, mach_use_reliable_get_clock, true), DEFINE_PROP_END_OF_LIST(), }; + .driver = "kvmclock",\ + .property = "mach_use_reliable_get_clock",\ + .value = "off",\ + },\ + {\ > > >> If you return true here, you can still > >> migrate a "false" value for src_use_reliable_get_clock. > > > > But the source only uses a reliable KVM_GET_CLOCK if > > both conditions are true. > > > > And the subsection is only needed if the source > > uses a reliable KVM_GET_CLOCK. > > Yes, but the point of the subsection is just to avoid breaking migration > format. Its a new creative use of the subsection. > >> It is the same as "is using masterclock", which is actually a stricter > >> condition than the KVM_CHECK_EXTENSION return value. The right check to > >> use is whether masterclock is in use, > > > > Actually its "has a reliable KVM_GET_CLOCK" (which returns > > get_kernel_clock() + (rdtsc() - tsc_timestamp), > > > > "broken KVM_GET_CLOCK" = get_kernel_clock() > > Yes. And these end up being the same. No. You can have masterclock enabled, KVM_TSC_STABLE set in pvclock flags, and unreliable KVM_GET_CLOCK (without your patch to KVM_GET_CLOCK). Paolo not sure if there is anything further you want me to change at this point ? Index: qemu-mig-advance-clock/hw/i386/kvm/clock.c =================================================================== --- qemu-mig-advance-clock.orig/hw/i386/kvm/clock.c 2016-11-14 10:40:39.748116312 -0200 +++ qemu-mig-advance-clock/hw/i386/kvm/clock.c 2016-11-14 13:38:29.299955042 -0200 @@ -36,6 +36,12 @@ uint64_t clock; bool clock_valid; + + /* whether machine supports reliable KVM_GET_CLOCK */ + bool mach_use_reliable_get_clock; + + /* whether source host supported reliable KVM_GET_CLOCK */ + bool src_use_reliable_get_clock; } KVMClockState; struct pvclock_vcpu_time_info { @@ -81,6 +87,19 @@ return nsec + time.system_time; } +static uint64_t kvm_get_clock(void) +{ + struct kvm_clock_data data; + int ret; + + ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); + if (ret < 0) { + fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); + abort(); + } + return data.clock; +} + static void kvmclock_vm_state_change(void *opaque, int running, RunState state) { @@ -91,15 +110,37 @@ if (running) { struct kvm_clock_data data = {}; - uint64_t time_at_migration = kvmclock_current_nsec(s); + uint64_t pvclock_via_mem = 0; - s->clock_valid = false; + /* local (running VM) restore */ + if (s->clock_valid) { + /* + * if host does not support reliable KVM_GET_CLOCK, + * read kvmclock value from memory + */ + if (!kvm_has_adjust_clock_stable()) { + pvclock_via_mem = kvmclock_current_nsec(s); + } + /* migration/savevm/init restore */ + } else { + /* + * use s->clock in case machine uses reliable + * get clock and source host supported + * reliable get clock + */ + if (!(s->mach_use_reliable_get_clock && + s->src_use_reliable_get_clock)) { + pvclock_via_mem = kvmclock_current_nsec(s); + } + } - /* We can't rely on the migrated clock value, just discard it */ - if (time_at_migration) { - s->clock = time_at_migration; + /* We can't rely on the saved clock value, just discard it */ + if (pvclock_via_mem) { + s->clock = pvclock_via_mem; } + s->clock_valid = false; + data.clock = s->clock; ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data); if (ret < 0) { @@ -120,8 +161,6 @@ } } } else { - struct kvm_clock_data data; - int ret; if (s->clock_valid) { return; @@ -129,13 +168,7 @@ kvm_synchronize_all_tsc(); - ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); - if (ret < 0) { - fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); - abort(); - } - s->clock = data.clock; - + s->clock = kvm_get_clock(); /* * If the VM is stopped, declare the clock state valid to * avoid re-reading it on next vmsave (which would return @@ -152,22 +185,69 @@ qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); } +static bool kvmclock_src_use_reliable_get_clock(void *opaque) +{ + KVMClockState *s = opaque; + + /* + * On machine types that support reliable KVM_GET_CLOCK, + * if host kernel does provide reliable KVM_GET_CLOCK, + * set src_use_reliable_get_clock=true so that destination + * avoids reading kvmclock from memory. + */ + if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { + s->src_use_reliable_get_clock = true; + } + + return s->mach_use_reliable_get_clock; +} + +static const VMStateDescription kvmclock_reliable_get_clock = { + .name = "kvmclock/src_use_reliable_get_clock", + .version_id = 1, + .minimum_version_id = 1, + .needed = kvmclock_src_use_reliable_get_clock, + .fields = (VMStateField[]) { + VMSTATE_BOOL(src_use_reliable_get_clock, KVMClockState), + VMSTATE_END_OF_LIST() + } +}; + +static void kvmclock_pre_save(void *opaque) +{ + KVMClockState *s = opaque; + + s->clock = kvm_get_clock(); +} + static const VMStateDescription kvmclock_vmsd = { .name = "kvmclock", .version_id = 1, .minimum_version_id = 1, + .pre_save = kvmclock_pre_save, .fields = (VMStateField[]) { VMSTATE_UINT64(clock, KVMClockState), VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * []) { + &kvmclock_reliable_get_clock, + NULL } }; +static Property kvmclock_properties[] = { + DEFINE_PROP_BOOL("mach_use_reliable_get_clock", KVMClockState, + mach_use_reliable_get_clock, true), + DEFINE_PROP_END_OF_LIST(), +}; + static void kvmclock_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->realize = kvmclock_realize; dc->vmsd = &kvmclock_vmsd; + dc->props = kvmclock_properties; } static const TypeInfo kvmclock_info = { Index: qemu-mig-advance-clock/include/hw/i386/pc.h =================================================================== --- qemu-mig-advance-clock.orig/include/hw/i386/pc.h 2016-11-14 10:40:39.748116312 -0200 +++ qemu-mig-advance-clock/include/hw/i386/pc.h 2016-11-14 10:40:48.059128649 -0200 @@ -370,6 +370,11 @@ #define PC_COMPAT_2_7 \ HW_COMPAT_2_7 \ {\ + .driver = "kvmclock",\ + .property = "mach_use_reliable_get_clock",\ + .value = "off",\ + },\ + {\ .driver = TYPE_X86_CPU,\ .property = "l3-cache",\ .value = "off",\ Index: qemu-mig-advance-clock/target-i386/kvm.c =================================================================== --- qemu-mig-advance-clock.orig/target-i386/kvm.c 2016-11-14 10:40:39.750116314 -0200 +++ qemu-mig-advance-clock/target-i386/kvm.c 2016-11-14 10:40:48.061128652 -0200 @@ -117,6 +117,13 @@ return kvm_check_extension(kvm_state, KVM_CAP_X86_SMM); } +bool kvm_has_adjust_clock_stable(void) +{ + int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK); + + return (ret == KVM_CLOCK_TSC_STABLE); +} + bool kvm_allows_irq0_override(void) { return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing(); Index: qemu-mig-advance-clock/target-i386/kvm_i386.h =================================================================== --- qemu-mig-advance-clock.orig/target-i386/kvm_i386.h 2016-11-14 10:40:39.751116316 -0200 +++ qemu-mig-advance-clock/target-i386/kvm_i386.h 2016-11-14 10:40:48.061128652 -0200 @@ -17,6 +17,7 @@ bool kvm_allows_irq0_override(void); bool kvm_has_smm(void); +bool kvm_has_adjust_clock_stable(void); void kvm_synchronize_all_tsc(void); void kvm_arch_reset_vcpu(X86CPU *cs); void kvm_arch_do_init_vcpu(X86CPU *cs); ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-14 15:40 ` Marcelo Tosatti 0 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-14 15:40 UTC (permalink / raw) To: Paolo Bonzini Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On Mon, Nov 14, 2016 at 04:00:38PM +0100, Paolo Bonzini wrote: > > > On 14/11/2016 15:50, Marcelo Tosatti wrote: > > Well, i didnt want to mix the meaning of the variables: > > > > + /* whether machine supports reliable KVM_GET_CLOCK */ > > + bool mach_use_reliable_get_clock; > > + > > + /* whether source host supported reliable KVM_GET_CLOCK */ > > + bool src_use_reliable_get_clock; > > > > See the comments on top (later if you look at the variable, > > then have to think: well it has one name, but its disabled > > by that other path as well, so its more than its > > name,etc...). > > > >> I'm thinking "mach_use_reliable_get_clock is just for migration, > > > > Thats whether the machine supports it. New machines have it enabled, > > olders don't. > > Yes. > > >> src_use_reliable_get_clock is the state". > > > > Thats whether the migration source supported it. > > But it's not used only for migration. It's used on every vmstate change > (running->stop and stop->running, isn't it? No. Its used only if s->clock_valid == false, which means either: migration/savevm/init. Now for savevm there is a bug: > I think that, apart from > the migration case, it's better to use s->clock if kvmclock is stable, > even on older machine types. Yes, its already doing that: /* local (running VM) restore */ if (s->clock_valid) { /* * if host does not support reliable KVM_GET_CLOCK, * read kvmclock value from memory */ if (!kvm_has_adjust_clock_stable()) { time_at_migration = kvmclock_current_nsec(s); } It only uses kvmclock_current_nsec() if kvm_has_adjust_clock_stable is false. > >>>>> +static bool kvmclock_src_use_reliable_get_clock(void *opaque) > >>>>> +{ > >>>>> + KVMClockState *s = opaque; > >>>>> + > >>>>> + /* > >>>>> + * On machine types that support reliable KVM_GET_CLOCK, > >>>>> + * if host kernel does provide reliable KVM_GET_CLOCK, > >>>>> + * set src_use_reliable_get_clock=true so that destination > >>>>> + * avoids reading kvmclock from memory. > >>>>> + */ > >>>>> + if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { > >>>>> + s->src_use_reliable_get_clock = true; > >>>>> + } > >>>>> + > >>>>> + return s->src_use_reliable_get_clock; > >>>>> +} > >>>> > >>>> Here you can just return s->mach_use_reliable_get_clock. > >>> > >>> mach_use_reliable_get_clock can be true but host might not support it. > >> > >> Yes, but the "needed" function is only required to avoid breaking > >> pc-i440fx-2.7 and earlier. > > > > "needed" is required so that the migration between: > > > > SRC DEST BEHAVIOUR > > ~support supports on migration read from guest, > > on stop/cont use > > kvm_get_clock/kvm_set_clock > > > > Destination does not use KVM_GET_CLOCK value (which is > > broken and should not be used). > > If needed returns false, the destination will see > src_use_reliable_get_clock = false anyway. Causing it to read the clock from memory, thats what should happen. > If needed returns true, the destination can still see > src_use_reliable_get_clock = false if that's the value. You are asking me to change from: +static bool kvmclock_src_use_reliable_get_clock(void *opaque) +{ + KVMClockState *s = opaque; + + /* + * On machine types that support reliable KVM_GET_CLOCK, + * if host kernel does provide reliable KVM_GET_CLOCK, + * set src_use_reliable_get_clock=true so that destination + * avoids reading kvmclock from memory. + */ + if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { + s->src_use_reliable_get_clock = true; + } + + return s->src_use_reliable_get_clock; +} to static bool kvmclock_src_use_reliable_get_clock(void *opaque) { KVMClockState *s = opaque; /* * On machine types that support reliable KVM_GET_CLOCK, * if host kernel does provide reliable KVM_GET_CLOCK, * set src_use_reliable_get_clock=true so that destination * avoids reading kvmclock from memory. */ if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { s->src_use_reliable_get_clock = true; } return s->mach_use_reliable_get_clock; } Ah, OK, done. > needed src_use_reliable_get_clock effect > false false use kvmclock_current_nsec > false true use kvmclock_current_nsec > true false use kvmclock_current_nsec > true true use s->clock > > > So the idea is: > > - set s->clock and s->reliable_get_clock on every KVM_GET_CLOCK Already do that (apart from s->clock_valid which is necessary). > - on migration source, use subsections and > s->mach_use_reliable_get_clock to avoid breaking migration on pre-2.8 > machine types Migration is already broken when you use different machine types. So s->src_use_reliable_get_clock is only used to indicate to the destination that: "you can use KVM_GET_CLOCK value, its safe". > - on migration destination, use .pre_load so that s->reliable_get_clock > is initialized to false on older machine types Thats what mach_use_reliable_get_clock does already: static Property kvmclock_properties[] = { DEFINE_PROP_BOOL("mach_use_reliable_get_clock", KVMClockState, mach_use_reliable_get_clock, true), DEFINE_PROP_END_OF_LIST(), }; + .driver = "kvmclock",\ + .property = "mach_use_reliable_get_clock",\ + .value = "off",\ + },\ + {\ > > >> If you return true here, you can still > >> migrate a "false" value for src_use_reliable_get_clock. > > > > But the source only uses a reliable KVM_GET_CLOCK if > > both conditions are true. > > > > And the subsection is only needed if the source > > uses a reliable KVM_GET_CLOCK. > > Yes, but the point of the subsection is just to avoid breaking migration > format. Its a new creative use of the subsection. > >> It is the same as "is using masterclock", which is actually a stricter > >> condition than the KVM_CHECK_EXTENSION return value. The right check to > >> use is whether masterclock is in use, > > > > Actually its "has a reliable KVM_GET_CLOCK" (which returns > > get_kernel_clock() + (rdtsc() - tsc_timestamp), > > > > "broken KVM_GET_CLOCK" = get_kernel_clock() > > Yes. And these end up being the same. No. You can have masterclock enabled, KVM_TSC_STABLE set in pvclock flags, and unreliable KVM_GET_CLOCK (without your patch to KVM_GET_CLOCK). Paolo not sure if there is anything further you want me to change at this point ? Index: qemu-mig-advance-clock/hw/i386/kvm/clock.c =================================================================== --- qemu-mig-advance-clock.orig/hw/i386/kvm/clock.c 2016-11-14 10:40:39.748116312 -0200 +++ qemu-mig-advance-clock/hw/i386/kvm/clock.c 2016-11-14 13:38:29.299955042 -0200 @@ -36,6 +36,12 @@ uint64_t clock; bool clock_valid; + + /* whether machine supports reliable KVM_GET_CLOCK */ + bool mach_use_reliable_get_clock; + + /* whether source host supported reliable KVM_GET_CLOCK */ + bool src_use_reliable_get_clock; } KVMClockState; struct pvclock_vcpu_time_info { @@ -81,6 +87,19 @@ return nsec + time.system_time; } +static uint64_t kvm_get_clock(void) +{ + struct kvm_clock_data data; + int ret; + + ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); + if (ret < 0) { + fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); + abort(); + } + return data.clock; +} + static void kvmclock_vm_state_change(void *opaque, int running, RunState state) { @@ -91,15 +110,37 @@ if (running) { struct kvm_clock_data data = {}; - uint64_t time_at_migration = kvmclock_current_nsec(s); + uint64_t pvclock_via_mem = 0; - s->clock_valid = false; + /* local (running VM) restore */ + if (s->clock_valid) { + /* + * if host does not support reliable KVM_GET_CLOCK, + * read kvmclock value from memory + */ + if (!kvm_has_adjust_clock_stable()) { + pvclock_via_mem = kvmclock_current_nsec(s); + } + /* migration/savevm/init restore */ + } else { + /* + * use s->clock in case machine uses reliable + * get clock and source host supported + * reliable get clock + */ + if (!(s->mach_use_reliable_get_clock && + s->src_use_reliable_get_clock)) { + pvclock_via_mem = kvmclock_current_nsec(s); + } + } - /* We can't rely on the migrated clock value, just discard it */ - if (time_at_migration) { - s->clock = time_at_migration; + /* We can't rely on the saved clock value, just discard it */ + if (pvclock_via_mem) { + s->clock = pvclock_via_mem; } + s->clock_valid = false; + data.clock = s->clock; ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data); if (ret < 0) { @@ -120,8 +161,6 @@ } } } else { - struct kvm_clock_data data; - int ret; if (s->clock_valid) { return; @@ -129,13 +168,7 @@ kvm_synchronize_all_tsc(); - ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); - if (ret < 0) { - fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); - abort(); - } - s->clock = data.clock; - + s->clock = kvm_get_clock(); /* * If the VM is stopped, declare the clock state valid to * avoid re-reading it on next vmsave (which would return @@ -152,22 +185,69 @@ qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); } +static bool kvmclock_src_use_reliable_get_clock(void *opaque) +{ + KVMClockState *s = opaque; + + /* + * On machine types that support reliable KVM_GET_CLOCK, + * if host kernel does provide reliable KVM_GET_CLOCK, + * set src_use_reliable_get_clock=true so that destination + * avoids reading kvmclock from memory. + */ + if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { + s->src_use_reliable_get_clock = true; + } + + return s->mach_use_reliable_get_clock; +} + +static const VMStateDescription kvmclock_reliable_get_clock = { + .name = "kvmclock/src_use_reliable_get_clock", + .version_id = 1, + .minimum_version_id = 1, + .needed = kvmclock_src_use_reliable_get_clock, + .fields = (VMStateField[]) { + VMSTATE_BOOL(src_use_reliable_get_clock, KVMClockState), + VMSTATE_END_OF_LIST() + } +}; + +static void kvmclock_pre_save(void *opaque) +{ + KVMClockState *s = opaque; + + s->clock = kvm_get_clock(); +} + static const VMStateDescription kvmclock_vmsd = { .name = "kvmclock", .version_id = 1, .minimum_version_id = 1, + .pre_save = kvmclock_pre_save, .fields = (VMStateField[]) { VMSTATE_UINT64(clock, KVMClockState), VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * []) { + &kvmclock_reliable_get_clock, + NULL } }; +static Property kvmclock_properties[] = { + DEFINE_PROP_BOOL("mach_use_reliable_get_clock", KVMClockState, + mach_use_reliable_get_clock, true), + DEFINE_PROP_END_OF_LIST(), +}; + static void kvmclock_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->realize = kvmclock_realize; dc->vmsd = &kvmclock_vmsd; + dc->props = kvmclock_properties; } static const TypeInfo kvmclock_info = { Index: qemu-mig-advance-clock/include/hw/i386/pc.h =================================================================== --- qemu-mig-advance-clock.orig/include/hw/i386/pc.h 2016-11-14 10:40:39.748116312 -0200 +++ qemu-mig-advance-clock/include/hw/i386/pc.h 2016-11-14 10:40:48.059128649 -0200 @@ -370,6 +370,11 @@ #define PC_COMPAT_2_7 \ HW_COMPAT_2_7 \ {\ + .driver = "kvmclock",\ + .property = "mach_use_reliable_get_clock",\ + .value = "off",\ + },\ + {\ .driver = TYPE_X86_CPU,\ .property = "l3-cache",\ .value = "off",\ Index: qemu-mig-advance-clock/target-i386/kvm.c =================================================================== --- qemu-mig-advance-clock.orig/target-i386/kvm.c 2016-11-14 10:40:39.750116314 -0200 +++ qemu-mig-advance-clock/target-i386/kvm.c 2016-11-14 10:40:48.061128652 -0200 @@ -117,6 +117,13 @@ return kvm_check_extension(kvm_state, KVM_CAP_X86_SMM); } +bool kvm_has_adjust_clock_stable(void) +{ + int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK); + + return (ret == KVM_CLOCK_TSC_STABLE); +} + bool kvm_allows_irq0_override(void) { return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing(); Index: qemu-mig-advance-clock/target-i386/kvm_i386.h =================================================================== --- qemu-mig-advance-clock.orig/target-i386/kvm_i386.h 2016-11-14 10:40:39.751116316 -0200 +++ qemu-mig-advance-clock/target-i386/kvm_i386.h 2016-11-14 10:40:48.061128652 -0200 @@ -17,6 +17,7 @@ bool kvm_allows_irq0_override(void); bool kvm_has_smm(void); +bool kvm_has_adjust_clock_stable(void); void kvm_synchronize_all_tsc(void); void kvm_arch_reset_vcpu(X86CPU *cs); void kvm_arch_do_init_vcpu(X86CPU *cs); ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-14 15:40 ` [Qemu-devel] " Marcelo Tosatti @ 2016-11-14 16:43 ` Paolo Bonzini -1 siblings, 0 replies; 46+ messages in thread From: Paolo Bonzini @ 2016-11-14 16:43 UTC (permalink / raw) To: Marcelo Tosatti Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On 14/11/2016 16:40, Marcelo Tosatti wrote: > static bool kvmclock_src_use_reliable_get_clock(void *opaque) > { > KVMClockState *s = opaque; > > /* > * On machine types that support reliable KVM_GET_CLOCK, > * if host kernel does provide reliable KVM_GET_CLOCK, > * set src_use_reliable_get_clock=true so that destination > * avoids reading kvmclock from memory. > */ > if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) > { > s->src_use_reliable_get_clock = true; > } > > return s->mach_use_reliable_get_clock; > } > > > Ah, OK, done. s->src_use_reliable_get_clock should not be set with KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK. > So s->src_use_reliable_get_clock is only used to indicate > to the destination that: "you can use KVM_GET_CLOCK value, > its safe". Yes, we agree. I was listing all the points, not just those where we disagree. Actually I'm not sure where we disagree, except on using flags from KVM_CHECK_EXTENSION vs. flags from KVM_GET_CLOCK... Paolo ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-14 16:43 ` Paolo Bonzini 0 siblings, 0 replies; 46+ messages in thread From: Paolo Bonzini @ 2016-11-14 16:43 UTC (permalink / raw) To: Marcelo Tosatti Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On 14/11/2016 16:40, Marcelo Tosatti wrote: > static bool kvmclock_src_use_reliable_get_clock(void *opaque) > { > KVMClockState *s = opaque; > > /* > * On machine types that support reliable KVM_GET_CLOCK, > * if host kernel does provide reliable KVM_GET_CLOCK, > * set src_use_reliable_get_clock=true so that destination > * avoids reading kvmclock from memory. > */ > if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) > { > s->src_use_reliable_get_clock = true; > } > > return s->mach_use_reliable_get_clock; > } > > > Ah, OK, done. s->src_use_reliable_get_clock should not be set with KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK. > So s->src_use_reliable_get_clock is only used to indicate > to the destination that: "you can use KVM_GET_CLOCK value, > its safe". Yes, we agree. I was listing all the points, not just those where we disagree. Actually I'm not sure where we disagree, except on using flags from KVM_CHECK_EXTENSION vs. flags from KVM_GET_CLOCK... Paolo ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-14 16:43 ` [Qemu-devel] " Paolo Bonzini @ 2016-11-14 17:13 ` Marcelo Tosatti -1 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-14 17:13 UTC (permalink / raw) To: Paolo Bonzini Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On Mon, Nov 14, 2016 at 05:43:33PM +0100, Paolo Bonzini wrote: > > > On 14/11/2016 16:40, Marcelo Tosatti wrote: > > static bool kvmclock_src_use_reliable_get_clock(void *opaque) > > { > > KVMClockState *s = opaque; > > > > /* > > * On machine types that support reliable KVM_GET_CLOCK, > > * if host kernel does provide reliable KVM_GET_CLOCK, > > * set src_use_reliable_get_clock=true so that destination > > * avoids reading kvmclock from memory. > > */ > > if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) > > { > > s->src_use_reliable_get_clock = true; > > } > > > > return s->mach_use_reliable_get_clock; > > } > > > > > > Ah, OK, done. > > s->src_use_reliable_get_clock should not be set with > KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK. Well, thats not right: What matters is the presence of get_kvmclock_ns which returns a value that the guest sees. get_kernel_monotonic_clock() + kvmclock_offset + (rdtsc() - tsc_timestamp) IOW what the guest sees. And you changed that in commit 108b249c453dd7132599ab6dc7e435a7036c193f Author: Paolo Bonzini <pbonzini@redhat.com> Date: Thu Sep 1 14:21:03 2016 +0200 KVM: x86: introduce get_kvmclock_ns And the correct behaviour (once KVM_GET_CLOCK is fixed per previous message to return rdtsc - tsc_timestamp for the non masterclock case) depends on this commit above, not on masterclock. > > So s->src_use_reliable_get_clock is only used to indicate > > to the destination that: "you can use KVM_GET_CLOCK value, > > its safe". > > Yes, we agree. I was listing all the points, not just those where we > disagree. Actually I'm not sure where we disagree, except on using > flags from KVM_CHECK_EXTENSION vs. flags from KVM_GET_CLOCK... > > Paolo ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-14 17:13 ` Marcelo Tosatti 0 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-14 17:13 UTC (permalink / raw) To: Paolo Bonzini Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On Mon, Nov 14, 2016 at 05:43:33PM +0100, Paolo Bonzini wrote: > > > On 14/11/2016 16:40, Marcelo Tosatti wrote: > > static bool kvmclock_src_use_reliable_get_clock(void *opaque) > > { > > KVMClockState *s = opaque; > > > > /* > > * On machine types that support reliable KVM_GET_CLOCK, > > * if host kernel does provide reliable KVM_GET_CLOCK, > > * set src_use_reliable_get_clock=true so that destination > > * avoids reading kvmclock from memory. > > */ > > if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) > > { > > s->src_use_reliable_get_clock = true; > > } > > > > return s->mach_use_reliable_get_clock; > > } > > > > > > Ah, OK, done. > > s->src_use_reliable_get_clock should not be set with > KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK. Well, thats not right: What matters is the presence of get_kvmclock_ns which returns a value that the guest sees. get_kernel_monotonic_clock() + kvmclock_offset + (rdtsc() - tsc_timestamp) IOW what the guest sees. And you changed that in commit 108b249c453dd7132599ab6dc7e435a7036c193f Author: Paolo Bonzini <pbonzini@redhat.com> Date: Thu Sep 1 14:21:03 2016 +0200 KVM: x86: introduce get_kvmclock_ns And the correct behaviour (once KVM_GET_CLOCK is fixed per previous message to return rdtsc - tsc_timestamp for the non masterclock case) depends on this commit above, not on masterclock. > > So s->src_use_reliable_get_clock is only used to indicate > > to the destination that: "you can use KVM_GET_CLOCK value, > > its safe". > > Yes, we agree. I was listing all the points, not just those where we > disagree. Actually I'm not sure where we disagree, except on using > flags from KVM_CHECK_EXTENSION vs. flags from KVM_GET_CLOCK... > > Paolo ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-14 17:13 ` [Qemu-devel] " Marcelo Tosatti @ 2016-11-14 17:20 ` Paolo Bonzini -1 siblings, 0 replies; 46+ messages in thread From: Paolo Bonzini @ 2016-11-14 17:20 UTC (permalink / raw) To: Marcelo Tosatti Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On 14/11/2016 18:13, Marcelo Tosatti wrote: > On Mon, Nov 14, 2016 at 05:43:33PM +0100, Paolo Bonzini wrote: >> >> >> On 14/11/2016 16:40, Marcelo Tosatti wrote: >>> static bool kvmclock_src_use_reliable_get_clock(void *opaque) >>> { >>> KVMClockState *s = opaque; >>> >>> /* >>> * On machine types that support reliable KVM_GET_CLOCK, >>> * if host kernel does provide reliable KVM_GET_CLOCK, >>> * set src_use_reliable_get_clock=true so that destination >>> * avoids reading kvmclock from memory. >>> */ >>> if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) >>> { >>> s->src_use_reliable_get_clock = true; >>> } >>> >>> return s->mach_use_reliable_get_clock; >>> } >>> >>> >>> Ah, OK, done. >> >> s->src_use_reliable_get_clock should not be set with >> KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK. > > Well, thats not right: What matters is the presence of get_kvmclock_ns > which returns a value that the guest sees. > > get_kernel_monotonic_clock() + kvmclock_offset + > (rdtsc() - tsc_timestamp) > > IOW what the guest sees. And you changed that in > > commit 108b249c453dd7132599ab6dc7e435a7036c193f > Author: Paolo Bonzini <pbonzini@redhat.com> > Date: Thu Sep 1 14:21:03 2016 +0200 > > KVM: x86: introduce get_kvmclock_ns > > And the correct behaviour (once KVM_GET_CLOCK is fixed per > previous message to return rdtsc - tsc_timestamp for the > non masterclock case) depends on this commit above, > not on masterclock. This commit in turn only gets the correct behavior if "vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT" (and it will be changed soon to ka->use_masterclock). KVM_CHECK_EXTENSION can still return KVM_CLOCK_TSC_STABLE even if the masterclock is disabled, because KVM_CHECK_EXTENSION only tells you which flags are known for this version of the KVM module. To see if the masterclock is enabled _now_, you need to check what KVM_GET_CLOCK sets in the flags. From the KVM_CLOCK_TSC_STABLE patch: user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0; Thanks, Paolo >>> So s->src_use_reliable_get_clock is only used to indicate >>> to the destination that: "you can use KVM_GET_CLOCK value, >>> its safe". >> >> Yes, we agree. I was listing all the points, not just those where we >> disagree. Actually I'm not sure where we disagree, except on using >> flags from KVM_CHECK_EXTENSION vs. flags from KVM_GET_CLOCK... >> >> Paolo ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-14 17:20 ` Paolo Bonzini 0 siblings, 0 replies; 46+ messages in thread From: Paolo Bonzini @ 2016-11-14 17:20 UTC (permalink / raw) To: Marcelo Tosatti Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On 14/11/2016 18:13, Marcelo Tosatti wrote: > On Mon, Nov 14, 2016 at 05:43:33PM +0100, Paolo Bonzini wrote: >> >> >> On 14/11/2016 16:40, Marcelo Tosatti wrote: >>> static bool kvmclock_src_use_reliable_get_clock(void *opaque) >>> { >>> KVMClockState *s = opaque; >>> >>> /* >>> * On machine types that support reliable KVM_GET_CLOCK, >>> * if host kernel does provide reliable KVM_GET_CLOCK, >>> * set src_use_reliable_get_clock=true so that destination >>> * avoids reading kvmclock from memory. >>> */ >>> if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) >>> { >>> s->src_use_reliable_get_clock = true; >>> } >>> >>> return s->mach_use_reliable_get_clock; >>> } >>> >>> >>> Ah, OK, done. >> >> s->src_use_reliable_get_clock should not be set with >> KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK. > > Well, thats not right: What matters is the presence of get_kvmclock_ns > which returns a value that the guest sees. > > get_kernel_monotonic_clock() + kvmclock_offset + > (rdtsc() - tsc_timestamp) > > IOW what the guest sees. And you changed that in > > commit 108b249c453dd7132599ab6dc7e435a7036c193f > Author: Paolo Bonzini <pbonzini@redhat.com> > Date: Thu Sep 1 14:21:03 2016 +0200 > > KVM: x86: introduce get_kvmclock_ns > > And the correct behaviour (once KVM_GET_CLOCK is fixed per > previous message to return rdtsc - tsc_timestamp for the > non masterclock case) depends on this commit above, > not on masterclock. This commit in turn only gets the correct behavior if "vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT" (and it will be changed soon to ka->use_masterclock). KVM_CHECK_EXTENSION can still return KVM_CLOCK_TSC_STABLE even if the masterclock is disabled, because KVM_CHECK_EXTENSION only tells you which flags are known for this version of the KVM module. To see if the masterclock is enabled _now_, you need to check what KVM_GET_CLOCK sets in the flags. From the KVM_CLOCK_TSC_STABLE patch: user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0; Thanks, Paolo >>> So s->src_use_reliable_get_clock is only used to indicate >>> to the destination that: "you can use KVM_GET_CLOCK value, >>> its safe". >> >> Yes, we agree. I was listing all the points, not just those where we >> disagree. Actually I'm not sure where we disagree, except on using >> flags from KVM_CHECK_EXTENSION vs. flags from KVM_GET_CLOCK... >> >> Paolo ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-14 17:20 ` [Qemu-devel] " Paolo Bonzini @ 2016-11-14 18:15 ` Marcelo Tosatti -1 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-14 18:15 UTC (permalink / raw) To: Paolo Bonzini Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On Mon, Nov 14, 2016 at 06:20:29PM +0100, Paolo Bonzini wrote: > > > On 14/11/2016 18:13, Marcelo Tosatti wrote: > > On Mon, Nov 14, 2016 at 05:43:33PM +0100, Paolo Bonzini wrote: > >> > >> > >> On 14/11/2016 16:40, Marcelo Tosatti wrote: > >>> static bool kvmclock_src_use_reliable_get_clock(void *opaque) > >>> { > >>> KVMClockState *s = opaque; > >>> > >>> /* > >>> * On machine types that support reliable KVM_GET_CLOCK, > >>> * if host kernel does provide reliable KVM_GET_CLOCK, > >>> * set src_use_reliable_get_clock=true so that destination > >>> * avoids reading kvmclock from memory. > >>> */ > >>> if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) > >>> { > >>> s->src_use_reliable_get_clock = true; > >>> } > >>> > >>> return s->mach_use_reliable_get_clock; > >>> } > >>> > >>> > >>> Ah, OK, done. > >> > >> s->src_use_reliable_get_clock should not be set with > >> KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK. > > > > Well, thats not right: What matters is the presence of get_kvmclock_ns > > which returns a value that the guest sees. > > > > get_kernel_monotonic_clock() + kvmclock_offset + > > (rdtsc() - tsc_timestamp) > > > > IOW what the guest sees. And you changed that in > > > > commit 108b249c453dd7132599ab6dc7e435a7036c193f > > Author: Paolo Bonzini <pbonzini@redhat.com> > > Date: Thu Sep 1 14:21:03 2016 +0200 > > > > KVM: x86: introduce get_kvmclock_ns > > > > And the correct behaviour (once KVM_GET_CLOCK is fixed per > > previous message to return rdtsc - tsc_timestamp for the > > non masterclock case) depends on this commit above, > > not on masterclock. > > This commit in turn only gets the correct behavior if > "vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT" (and it will be > changed soon to ka->use_masterclock). KVM_CHECK_EXTENSION can still > return KVM_CLOCK_TSC_STABLE even if the masterclock is disabled, > because KVM_CHECK_EXTENSION only tells you which flags are known for > this version of the KVM module. What QEMU wants is to use KVM_GET_CLOCK at pre_save independently of whether masterclock is enabled or not... it just depends on KVM_GET_CLOCK being correct for the masterclock case (108b249c453dd7132599ab6dc7e435a7036c193f). So a "reliable KVM_GET_CLOCK" (that does not timebackward when masterclock is enabled) is much simpler to userspace than "whether masterclock is enabled or not". If you have a reason why that should not be the case, let me know. > To see if the masterclock is enabled _now_, you need to check what > KVM_GET_CLOCK sets in the flags. From the KVM_CLOCK_TSC_STABLE patch: > > user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0; Again, whether masterclock is enable is independent of being able to use KVM_GET_CLOCK at pre_save. ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-14 18:15 ` Marcelo Tosatti 0 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-14 18:15 UTC (permalink / raw) To: Paolo Bonzini Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On Mon, Nov 14, 2016 at 06:20:29PM +0100, Paolo Bonzini wrote: > > > On 14/11/2016 18:13, Marcelo Tosatti wrote: > > On Mon, Nov 14, 2016 at 05:43:33PM +0100, Paolo Bonzini wrote: > >> > >> > >> On 14/11/2016 16:40, Marcelo Tosatti wrote: > >>> static bool kvmclock_src_use_reliable_get_clock(void *opaque) > >>> { > >>> KVMClockState *s = opaque; > >>> > >>> /* > >>> * On machine types that support reliable KVM_GET_CLOCK, > >>> * if host kernel does provide reliable KVM_GET_CLOCK, > >>> * set src_use_reliable_get_clock=true so that destination > >>> * avoids reading kvmclock from memory. > >>> */ > >>> if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) > >>> { > >>> s->src_use_reliable_get_clock = true; > >>> } > >>> > >>> return s->mach_use_reliable_get_clock; > >>> } > >>> > >>> > >>> Ah, OK, done. > >> > >> s->src_use_reliable_get_clock should not be set with > >> KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK. > > > > Well, thats not right: What matters is the presence of get_kvmclock_ns > > which returns a value that the guest sees. > > > > get_kernel_monotonic_clock() + kvmclock_offset + > > (rdtsc() - tsc_timestamp) > > > > IOW what the guest sees. And you changed that in > > > > commit 108b249c453dd7132599ab6dc7e435a7036c193f > > Author: Paolo Bonzini <pbonzini@redhat.com> > > Date: Thu Sep 1 14:21:03 2016 +0200 > > > > KVM: x86: introduce get_kvmclock_ns > > > > And the correct behaviour (once KVM_GET_CLOCK is fixed per > > previous message to return rdtsc - tsc_timestamp for the > > non masterclock case) depends on this commit above, > > not on masterclock. > > This commit in turn only gets the correct behavior if > "vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT" (and it will be > changed soon to ka->use_masterclock). KVM_CHECK_EXTENSION can still > return KVM_CLOCK_TSC_STABLE even if the masterclock is disabled, > because KVM_CHECK_EXTENSION only tells you which flags are known for > this version of the KVM module. What QEMU wants is to use KVM_GET_CLOCK at pre_save independently of whether masterclock is enabled or not... it just depends on KVM_GET_CLOCK being correct for the masterclock case (108b249c453dd7132599ab6dc7e435a7036c193f). So a "reliable KVM_GET_CLOCK" (that does not timebackward when masterclock is enabled) is much simpler to userspace than "whether masterclock is enabled or not". If you have a reason why that should not be the case, let me know. > To see if the masterclock is enabled _now_, you need to check what > KVM_GET_CLOCK sets in the flags. From the KVM_CLOCK_TSC_STABLE patch: > > user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0; Again, whether masterclock is enable is independent of being able to use KVM_GET_CLOCK at pre_save. ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-14 18:15 ` [Qemu-devel] " Marcelo Tosatti @ 2016-11-17 12:16 ` Marcelo Tosatti -1 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-17 12:16 UTC (permalink / raw) To: Paolo Bonzini Cc: Eduardo Habkost, kvm, Juan Quintela, Radim Krcmar, qemu-devel, Dr. David Alan Gilbert On Mon, Nov 14, 2016 at 04:15:18PM -0200, Marcelo Tosatti wrote: > On Mon, Nov 14, 2016 at 06:20:29PM +0100, Paolo Bonzini wrote: > > > > > > On 14/11/2016 18:13, Marcelo Tosatti wrote: > > > On Mon, Nov 14, 2016 at 05:43:33PM +0100, Paolo Bonzini wrote: > > >> > > >> > > >> On 14/11/2016 16:40, Marcelo Tosatti wrote: > > >>> static bool kvmclock_src_use_reliable_get_clock(void *opaque) > > >>> { > > >>> KVMClockState *s = opaque; > > >>> > > >>> /* > > >>> * On machine types that support reliable KVM_GET_CLOCK, > > >>> * if host kernel does provide reliable KVM_GET_CLOCK, > > >>> * set src_use_reliable_get_clock=true so that destination > > >>> * avoids reading kvmclock from memory. > > >>> */ > > >>> if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) > > >>> { > > >>> s->src_use_reliable_get_clock = true; > > >>> } > > >>> > > >>> return s->mach_use_reliable_get_clock; > > >>> } > > >>> > > >>> > > >>> Ah, OK, done. > > >> > > >> s->src_use_reliable_get_clock should not be set with > > >> KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK. > > > > > > Well, thats not right: What matters is the presence of get_kvmclock_ns > > > which returns a value that the guest sees. > > > > > > get_kernel_monotonic_clock() + kvmclock_offset + > > > (rdtsc() - tsc_timestamp) > > > > > > IOW what the guest sees. And you changed that in > > > > > > commit 108b249c453dd7132599ab6dc7e435a7036c193f > > > Author: Paolo Bonzini <pbonzini@redhat.com> > > > Date: Thu Sep 1 14:21:03 2016 +0200 > > > > > > KVM: x86: introduce get_kvmclock_ns > > > > > > And the correct behaviour (once KVM_GET_CLOCK is fixed per > > > previous message to return rdtsc - tsc_timestamp for the > > > non masterclock case) depends on this commit above, > > > not on masterclock. > > > > This commit in turn only gets the correct behavior if > > "vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT" (and it will be > > changed soon to ka->use_masterclock). KVM_CHECK_EXTENSION can still > > return KVM_CLOCK_TSC_STABLE even if the masterclock is disabled, > > because KVM_CHECK_EXTENSION only tells you which flags are known for > > this version of the KVM module. > > What QEMU wants is to use KVM_GET_CLOCK at pre_save independently > of whether masterclock is enabled or not... it just depends > on KVM_GET_CLOCK being correct for the masterclock case > (108b249c453dd7132599ab6dc7e435a7036c193f). > > So a "reliable KVM_GET_CLOCK" (that does not timebackward > when masterclock is enabled) is much simpler to userspace > than "whether masterclock is enabled or not". > > If you have a reason why that should not be the case, > let me know. > > > To see if the masterclock is enabled _now_, you need to check what > > KVM_GET_CLOCK sets in the flags. From the KVM_CLOCK_TSC_STABLE patch: > > > > user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0; > > Again, whether masterclock is enable is independent of > being able to use KVM_GET_CLOCK at pre_save. Is this point OK ? Using break; + case KVM_CAP_ADJUST_CLOCK: + r = KVM_CLOCK_TSC_STABLE; + break; To infer whether KVM_GET_CLOCK is fixed for the monotonic case. ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-17 12:16 ` Marcelo Tosatti 0 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-17 12:16 UTC (permalink / raw) To: Paolo Bonzini Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On Mon, Nov 14, 2016 at 04:15:18PM -0200, Marcelo Tosatti wrote: > On Mon, Nov 14, 2016 at 06:20:29PM +0100, Paolo Bonzini wrote: > > > > > > On 14/11/2016 18:13, Marcelo Tosatti wrote: > > > On Mon, Nov 14, 2016 at 05:43:33PM +0100, Paolo Bonzini wrote: > > >> > > >> > > >> On 14/11/2016 16:40, Marcelo Tosatti wrote: > > >>> static bool kvmclock_src_use_reliable_get_clock(void *opaque) > > >>> { > > >>> KVMClockState *s = opaque; > > >>> > > >>> /* > > >>> * On machine types that support reliable KVM_GET_CLOCK, > > >>> * if host kernel does provide reliable KVM_GET_CLOCK, > > >>> * set src_use_reliable_get_clock=true so that destination > > >>> * avoids reading kvmclock from memory. > > >>> */ > > >>> if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) > > >>> { > > >>> s->src_use_reliable_get_clock = true; > > >>> } > > >>> > > >>> return s->mach_use_reliable_get_clock; > > >>> } > > >>> > > >>> > > >>> Ah, OK, done. > > >> > > >> s->src_use_reliable_get_clock should not be set with > > >> KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK. > > > > > > Well, thats not right: What matters is the presence of get_kvmclock_ns > > > which returns a value that the guest sees. > > > > > > get_kernel_monotonic_clock() + kvmclock_offset + > > > (rdtsc() - tsc_timestamp) > > > > > > IOW what the guest sees. And you changed that in > > > > > > commit 108b249c453dd7132599ab6dc7e435a7036c193f > > > Author: Paolo Bonzini <pbonzini@redhat.com> > > > Date: Thu Sep 1 14:21:03 2016 +0200 > > > > > > KVM: x86: introduce get_kvmclock_ns > > > > > > And the correct behaviour (once KVM_GET_CLOCK is fixed per > > > previous message to return rdtsc - tsc_timestamp for the > > > non masterclock case) depends on this commit above, > > > not on masterclock. > > > > This commit in turn only gets the correct behavior if > > "vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT" (and it will be > > changed soon to ka->use_masterclock). KVM_CHECK_EXTENSION can still > > return KVM_CLOCK_TSC_STABLE even if the masterclock is disabled, > > because KVM_CHECK_EXTENSION only tells you which flags are known for > > this version of the KVM module. > > What QEMU wants is to use KVM_GET_CLOCK at pre_save independently > of whether masterclock is enabled or not... it just depends > on KVM_GET_CLOCK being correct for the masterclock case > (108b249c453dd7132599ab6dc7e435a7036c193f). > > So a "reliable KVM_GET_CLOCK" (that does not timebackward > when masterclock is enabled) is much simpler to userspace > than "whether masterclock is enabled or not". > > If you have a reason why that should not be the case, > let me know. > > > To see if the masterclock is enabled _now_, you need to check what > > KVM_GET_CLOCK sets in the flags. From the KVM_CLOCK_TSC_STABLE patch: > > > > user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0; > > Again, whether masterclock is enable is independent of > being able to use KVM_GET_CLOCK at pre_save. Is this point OK ? Using break; + case KVM_CAP_ADJUST_CLOCK: + r = KVM_CLOCK_TSC_STABLE; + break; To infer whether KVM_GET_CLOCK is fixed for the monotonic case. ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-17 12:16 ` [Qemu-devel] " Marcelo Tosatti @ 2016-11-17 13:03 ` Paolo Bonzini -1 siblings, 0 replies; 46+ messages in thread From: Paolo Bonzini @ 2016-11-17 13:03 UTC (permalink / raw) To: Marcelo Tosatti Cc: Eduardo Habkost, kvm, Radim Krcmar, Juan Quintela, qemu-devel, Dr. David Alan Gilbert On 17/11/2016 13:16, Marcelo Tosatti wrote: > On Mon, Nov 14, 2016 at 04:15:18PM -0200, Marcelo Tosatti wrote: >> On Mon, Nov 14, 2016 at 06:20:29PM +0100, Paolo Bonzini wrote: >>> >>> >>> On 14/11/2016 18:13, Marcelo Tosatti wrote: >>>> On Mon, Nov 14, 2016 at 05:43:33PM +0100, Paolo Bonzini wrote: >>>>> >>>>> >>>>> On 14/11/2016 16:40, Marcelo Tosatti wrote: >>>>>> static bool kvmclock_src_use_reliable_get_clock(void *opaque) >>>>>> { >>>>>> KVMClockState *s = opaque; >>>>>> >>>>>> /* >>>>>> * On machine types that support reliable KVM_GET_CLOCK, >>>>>> * if host kernel does provide reliable KVM_GET_CLOCK, >>>>>> * set src_use_reliable_get_clock=true so that destination >>>>>> * avoids reading kvmclock from memory. >>>>>> */ >>>>>> if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) >>>>>> { >>>>>> s->src_use_reliable_get_clock = true; >>>>>> } >>>>>> >>>>>> return s->mach_use_reliable_get_clock; >>>>>> } >>>>>> >>>>>> >>>>>> Ah, OK, done. >>>>> >>>>> s->src_use_reliable_get_clock should not be set with >>>>> KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK. >>>> >>>> Well, thats not right: What matters is the presence of get_kvmclock_ns >>>> which returns a value that the guest sees. >>>> >>>> get_kernel_monotonic_clock() + kvmclock_offset + >>>> (rdtsc() - tsc_timestamp) >>>> >>>> IOW what the guest sees. And you changed that in >>>> >>>> commit 108b249c453dd7132599ab6dc7e435a7036c193f >>>> Author: Paolo Bonzini <pbonzini@redhat.com> >>>> Date: Thu Sep 1 14:21:03 2016 +0200 >>>> >>>> KVM: x86: introduce get_kvmclock_ns >>>> >>>> And the correct behaviour (once KVM_GET_CLOCK is fixed per >>>> previous message to return rdtsc - tsc_timestamp for the >>>> non masterclock case) depends on this commit above, >>>> not on masterclock. >>> >>> This commit in turn only gets the correct behavior if >>> "vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT" (and it will be >>> changed soon to ka->use_masterclock). KVM_CHECK_EXTENSION can still >>> return KVM_CLOCK_TSC_STABLE even if the masterclock is disabled, >>> because KVM_CHECK_EXTENSION only tells you which flags are known for >>> this version of the KVM module. >> >> What QEMU wants is to use KVM_GET_CLOCK at pre_save independently >> of whether masterclock is enabled or not... it just depends >> on KVM_GET_CLOCK being correct for the masterclock case >> (108b249c453dd7132599ab6dc7e435a7036c193f). >> >> So a "reliable KVM_GET_CLOCK" (that does not timebackward >> when masterclock is enabled) is much simpler to userspace >> than "whether masterclock is enabled or not". >> >> If you have a reason why that should not be the case, >> let me know. >> >>> To see if the masterclock is enabled _now_, you need to check what >>> KVM_GET_CLOCK sets in the flags. From the KVM_CLOCK_TSC_STABLE patch: >>> >>> user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0; >> >> Again, whether masterclock is enable is independent of >> being able to use KVM_GET_CLOCK at pre_save. > > Is this point OK ? > > Using > > break; > + case KVM_CAP_ADJUST_CLOCK: > + r = KVM_CLOCK_TSC_STABLE; > + break; > > To infer whether KVM_GET_CLOCK is fixed for the monotonic case. Yes, I still haven't digested why it is correct (I need to read again what you wrote), but it is indeed correct to use KVM_CAP_ADJUST_CLOCK this way. Paolo ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-17 13:03 ` Paolo Bonzini 0 siblings, 0 replies; 46+ messages in thread From: Paolo Bonzini @ 2016-11-17 13:03 UTC (permalink / raw) To: Marcelo Tosatti Cc: Eduardo Habkost, kvm, Juan Quintela, Radim Krcmar, qemu-devel, Dr. David Alan Gilbert On 17/11/2016 13:16, Marcelo Tosatti wrote: > On Mon, Nov 14, 2016 at 04:15:18PM -0200, Marcelo Tosatti wrote: >> On Mon, Nov 14, 2016 at 06:20:29PM +0100, Paolo Bonzini wrote: >>> >>> >>> On 14/11/2016 18:13, Marcelo Tosatti wrote: >>>> On Mon, Nov 14, 2016 at 05:43:33PM +0100, Paolo Bonzini wrote: >>>>> >>>>> >>>>> On 14/11/2016 16:40, Marcelo Tosatti wrote: >>>>>> static bool kvmclock_src_use_reliable_get_clock(void *opaque) >>>>>> { >>>>>> KVMClockState *s = opaque; >>>>>> >>>>>> /* >>>>>> * On machine types that support reliable KVM_GET_CLOCK, >>>>>> * if host kernel does provide reliable KVM_GET_CLOCK, >>>>>> * set src_use_reliable_get_clock=true so that destination >>>>>> * avoids reading kvmclock from memory. >>>>>> */ >>>>>> if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) >>>>>> { >>>>>> s->src_use_reliable_get_clock = true; >>>>>> } >>>>>> >>>>>> return s->mach_use_reliable_get_clock; >>>>>> } >>>>>> >>>>>> >>>>>> Ah, OK, done. >>>>> >>>>> s->src_use_reliable_get_clock should not be set with >>>>> KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK. >>>> >>>> Well, thats not right: What matters is the presence of get_kvmclock_ns >>>> which returns a value that the guest sees. >>>> >>>> get_kernel_monotonic_clock() + kvmclock_offset + >>>> (rdtsc() - tsc_timestamp) >>>> >>>> IOW what the guest sees. And you changed that in >>>> >>>> commit 108b249c453dd7132599ab6dc7e435a7036c193f >>>> Author: Paolo Bonzini <pbonzini@redhat.com> >>>> Date: Thu Sep 1 14:21:03 2016 +0200 >>>> >>>> KVM: x86: introduce get_kvmclock_ns >>>> >>>> And the correct behaviour (once KVM_GET_CLOCK is fixed per >>>> previous message to return rdtsc - tsc_timestamp for the >>>> non masterclock case) depends on this commit above, >>>> not on masterclock. >>> >>> This commit in turn only gets the correct behavior if >>> "vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT" (and it will be >>> changed soon to ka->use_masterclock). KVM_CHECK_EXTENSION can still >>> return KVM_CLOCK_TSC_STABLE even if the masterclock is disabled, >>> because KVM_CHECK_EXTENSION only tells you which flags are known for >>> this version of the KVM module. >> >> What QEMU wants is to use KVM_GET_CLOCK at pre_save independently >> of whether masterclock is enabled or not... it just depends >> on KVM_GET_CLOCK being correct for the masterclock case >> (108b249c453dd7132599ab6dc7e435a7036c193f). >> >> So a "reliable KVM_GET_CLOCK" (that does not timebackward >> when masterclock is enabled) is much simpler to userspace >> than "whether masterclock is enabled or not". >> >> If you have a reason why that should not be the case, >> let me know. >> >>> To see if the masterclock is enabled _now_, you need to check what >>> KVM_GET_CLOCK sets in the flags. From the KVM_CLOCK_TSC_STABLE patch: >>> >>> user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0; >> >> Again, whether masterclock is enable is independent of >> being able to use KVM_GET_CLOCK at pre_save. > > Is this point OK ? > > Using > > break; > + case KVM_CAP_ADJUST_CLOCK: > + r = KVM_CLOCK_TSC_STABLE; > + break; > > To infer whether KVM_GET_CLOCK is fixed for the monotonic case. Yes, I still haven't digested why it is correct (I need to read again what you wrote), but it is indeed correct to use KVM_CAP_ADJUST_CLOCK this way. Paolo ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-17 12:16 ` [Qemu-devel] " Marcelo Tosatti @ 2016-11-28 13:47 ` Paolo Bonzini -1 siblings, 0 replies; 46+ messages in thread From: Paolo Bonzini @ 2016-11-28 13:47 UTC (permalink / raw) To: Marcelo Tosatti Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On 17/11/2016 13:16, Marcelo Tosatti wrote: > What QEMU wants is to use KVM_GET_CLOCK at pre_save independently > of whether masterclock is enabled or not... it just depends > on KVM_GET_CLOCK being correct for the masterclock case > (108b249c453dd7132599ab6dc7e435a7036c193f). > > So a "reliable KVM_GET_CLOCK" (that does not timebackward > when masterclock is enabled) is much simpler to userspace > than "whether masterclock is enabled or not". > > If you have a reason why that should not be the case, > let me know. I still cannot understand this case. If the source has masterclock _disabled_, shouldn't it read kvmclock from memory? But that's not what your patch does. To be clear, what I mean is: diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c index 653b0b4..6f6e2dc 100644 --- a/hw/i386/kvm/clock.c +++ b/hw/i386/kvm/clock.c @@ -97,6 +97,7 @@ static uint64_t kvm_get_clock(void) fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); abort(); } + s->src_use_reliable_get_clock = data.flags & KVM_CLOCK_TSC_STABLE; return data.clock; } @@ -110,34 +111,19 @@ static void kvmclock_vm_state_change(void *opaque, int running, if (running) { struct kvm_clock_data data = {}; - uint64_t pvclock_via_mem = 0; - /* local (running VM) restore */ - if (s->clock_valid) { - /* - * if host does not support reliable KVM_GET_CLOCK, - * read kvmclock value from memory - */ - if (!kvm_has_adjust_clock_stable()) { - pvclock_via_mem = kvmclock_current_nsec(s); - } - /* migration/savevm/init restore */ - } else { - /* - * use s->clock in case machine uses reliable - * get clock and source host supported - * reliable get clock - */ - if (!s->src_use_reliable_get_clock) { - pvclock_via_mem = kvmclock_current_nsec(s); + /* + * if last KVM_GET_CLOCK did not retrieve a reliable value, + * we can't rely on the saved clock value. Just discard it and + * read kvmclock value from memory + */ + if (!s->src_use_reliable_get_clock) { + uint64_t pvclock_via_mem = kvmclock_current_nsec(s); + if (pvclock_via_mem) { + s->clock = pvclock_via_mem; } } - /* We can't rely on the saved clock value, just discard it */ - if (pvclock_via_mem) { - s->clock = pvclock_via_mem; - } - s->clock_valid = false; data.clock = s->clock; @@ -181,16 +168,6 @@ static void kvmclock_realize(DeviceState *dev, Error **errp) { KVMClockState *s = KVM_CLOCK(dev); - /* - * On machine types that support reliable KVM_GET_CLOCK, - * if host kernel does provide reliable KVM_GET_CLOCK, - * set src_use_reliable_get_clock=true so that destination - * avoids reading kvmclock from memory. - */ - if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { - s->src_use_reliable_get_clock = true; - } - qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); } ^ permalink raw reply related [flat|nested] 46+ messages in thread
* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-28 13:47 ` Paolo Bonzini 0 siblings, 0 replies; 46+ messages in thread From: Paolo Bonzini @ 2016-11-28 13:47 UTC (permalink / raw) To: Marcelo Tosatti Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On 17/11/2016 13:16, Marcelo Tosatti wrote: > What QEMU wants is to use KVM_GET_CLOCK at pre_save independently > of whether masterclock is enabled or not... it just depends > on KVM_GET_CLOCK being correct for the masterclock case > (108b249c453dd7132599ab6dc7e435a7036c193f). > > So a "reliable KVM_GET_CLOCK" (that does not timebackward > when masterclock is enabled) is much simpler to userspace > than "whether masterclock is enabled or not". > > If you have a reason why that should not be the case, > let me know. I still cannot understand this case. If the source has masterclock _disabled_, shouldn't it read kvmclock from memory? But that's not what your patch does. To be clear, what I mean is: diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c index 653b0b4..6f6e2dc 100644 --- a/hw/i386/kvm/clock.c +++ b/hw/i386/kvm/clock.c @@ -97,6 +97,7 @@ static uint64_t kvm_get_clock(void) fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); abort(); } + s->src_use_reliable_get_clock = data.flags & KVM_CLOCK_TSC_STABLE; return data.clock; } @@ -110,34 +111,19 @@ static void kvmclock_vm_state_change(void *opaque, int running, if (running) { struct kvm_clock_data data = {}; - uint64_t pvclock_via_mem = 0; - /* local (running VM) restore */ - if (s->clock_valid) { - /* - * if host does not support reliable KVM_GET_CLOCK, - * read kvmclock value from memory - */ - if (!kvm_has_adjust_clock_stable()) { - pvclock_via_mem = kvmclock_current_nsec(s); - } - /* migration/savevm/init restore */ - } else { - /* - * use s->clock in case machine uses reliable - * get clock and source host supported - * reliable get clock - */ - if (!s->src_use_reliable_get_clock) { - pvclock_via_mem = kvmclock_current_nsec(s); + /* + * if last KVM_GET_CLOCK did not retrieve a reliable value, + * we can't rely on the saved clock value. Just discard it and + * read kvmclock value from memory + */ + if (!s->src_use_reliable_get_clock) { + uint64_t pvclock_via_mem = kvmclock_current_nsec(s); + if (pvclock_via_mem) { + s->clock = pvclock_via_mem; } } - /* We can't rely on the saved clock value, just discard it */ - if (pvclock_via_mem) { - s->clock = pvclock_via_mem; - } - s->clock_valid = false; data.clock = s->clock; @@ -181,16 +168,6 @@ static void kvmclock_realize(DeviceState *dev, Error **errp) { KVMClockState *s = KVM_CLOCK(dev); - /* - * On machine types that support reliable KVM_GET_CLOCK, - * if host kernel does provide reliable KVM_GET_CLOCK, - * set src_use_reliable_get_clock=true so that destination - * avoids reading kvmclock from memory. - */ - if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { - s->src_use_reliable_get_clock = true; - } - qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); } ^ permalink raw reply related [flat|nested] 46+ messages in thread
* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-28 13:47 ` [Qemu-devel] " Paolo Bonzini @ 2016-11-28 14:28 ` Eduardo Habkost -1 siblings, 0 replies; 46+ messages in thread From: Eduardo Habkost @ 2016-11-28 14:28 UTC (permalink / raw) To: Paolo Bonzini Cc: Marcelo Tosatti, kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar On Mon, Nov 28, 2016 at 02:47:18PM +0100, Paolo Bonzini wrote: > > > On 17/11/2016 13:16, Marcelo Tosatti wrote: > > What QEMU wants is to use KVM_GET_CLOCK at pre_save independently > > of whether masterclock is enabled or not... it just depends > > on KVM_GET_CLOCK being correct for the masterclock case > > (108b249c453dd7132599ab6dc7e435a7036c193f). > > > > So a "reliable KVM_GET_CLOCK" (that does not timebackward > > when masterclock is enabled) is much simpler to userspace > > than "whether masterclock is enabled or not". > > > > If you have a reason why that should not be the case, > > let me know. > > I still cannot understand this case. > > If the source has masterclock _disabled_, shouldn't it read kvmclock > from memory? But that's not what your patch does. To be clear, what > I mean is: > > diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c > index 653b0b4..6f6e2dc 100644 > --- a/hw/i386/kvm/clock.c > +++ b/hw/i386/kvm/clock.c > @@ -97,6 +97,7 @@ static uint64_t kvm_get_clock(void) > fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); > abort(); > } > + s->src_use_reliable_get_clock = data.flags & KVM_CLOCK_TSC_STABLE; I still don't understand the reasoning behind kvm_has_adjust_clock_stable() vs (flags & KVM_CLOCK_TSC_STABLE), but on either case, updating src_use_reliable_get_clock inside kvm_get_clock() looks like the right thing to do. > return data.clock; > } > > @@ -110,34 +111,19 @@ static void kvmclock_vm_state_change(void *opaque, int running, > > if (running) { > struct kvm_clock_data data = {}; > - uint64_t pvclock_via_mem = 0; > > - /* local (running VM) restore */ > - if (s->clock_valid) { > - /* > - * if host does not support reliable KVM_GET_CLOCK, > - * read kvmclock value from memory > - */ > - if (!kvm_has_adjust_clock_stable()) { > - pvclock_via_mem = kvmclock_current_nsec(s); > - } > - /* migration/savevm/init restore */ > - } else { > - /* > - * use s->clock in case machine uses reliable > - * get clock and source host supported > - * reliable get clock > - */ > - if (!s->src_use_reliable_get_clock) { > - pvclock_via_mem = kvmclock_current_nsec(s); > + /* > + * if last KVM_GET_CLOCK did not retrieve a reliable value, > + * we can't rely on the saved clock value. Just discard it and > + * read kvmclock value from memory > + */ > + if (!s->src_use_reliable_get_clock) { > + uint64_t pvclock_via_mem = kvmclock_current_nsec(s); > + if (pvclock_via_mem) { > + s->clock = pvclock_via_mem; > } > } This is equivalent to the logic I suggested on my reply to v3. Much simpler. > > - /* We can't rely on the saved clock value, just discard it */ > - if (pvclock_via_mem) { > - s->clock = pvclock_via_mem; > - } > - > s->clock_valid = false; > > data.clock = s->clock; > @@ -181,16 +168,6 @@ static void kvmclock_realize(DeviceState *dev, Error **errp) > { > KVMClockState *s = KVM_CLOCK(dev); > > - /* > - * On machine types that support reliable KVM_GET_CLOCK, > - * if host kernel does provide reliable KVM_GET_CLOCK, > - * set src_use_reliable_get_clock=true so that destination > - * avoids reading kvmclock from memory. > - */ > - if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { > - s->src_use_reliable_get_clock = true; > - } > - > qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); > } > -- Eduardo ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-28 14:28 ` Eduardo Habkost 0 siblings, 0 replies; 46+ messages in thread From: Eduardo Habkost @ 2016-11-28 14:28 UTC (permalink / raw) To: Paolo Bonzini Cc: Marcelo Tosatti, kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar On Mon, Nov 28, 2016 at 02:47:18PM +0100, Paolo Bonzini wrote: > > > On 17/11/2016 13:16, Marcelo Tosatti wrote: > > What QEMU wants is to use KVM_GET_CLOCK at pre_save independently > > of whether masterclock is enabled or not... it just depends > > on KVM_GET_CLOCK being correct for the masterclock case > > (108b249c453dd7132599ab6dc7e435a7036c193f). > > > > So a "reliable KVM_GET_CLOCK" (that does not timebackward > > when masterclock is enabled) is much simpler to userspace > > than "whether masterclock is enabled or not". > > > > If you have a reason why that should not be the case, > > let me know. > > I still cannot understand this case. > > If the source has masterclock _disabled_, shouldn't it read kvmclock > from memory? But that's not what your patch does. To be clear, what > I mean is: > > diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c > index 653b0b4..6f6e2dc 100644 > --- a/hw/i386/kvm/clock.c > +++ b/hw/i386/kvm/clock.c > @@ -97,6 +97,7 @@ static uint64_t kvm_get_clock(void) > fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); > abort(); > } > + s->src_use_reliable_get_clock = data.flags & KVM_CLOCK_TSC_STABLE; I still don't understand the reasoning behind kvm_has_adjust_clock_stable() vs (flags & KVM_CLOCK_TSC_STABLE), but on either case, updating src_use_reliable_get_clock inside kvm_get_clock() looks like the right thing to do. > return data.clock; > } > > @@ -110,34 +111,19 @@ static void kvmclock_vm_state_change(void *opaque, int running, > > if (running) { > struct kvm_clock_data data = {}; > - uint64_t pvclock_via_mem = 0; > > - /* local (running VM) restore */ > - if (s->clock_valid) { > - /* > - * if host does not support reliable KVM_GET_CLOCK, > - * read kvmclock value from memory > - */ > - if (!kvm_has_adjust_clock_stable()) { > - pvclock_via_mem = kvmclock_current_nsec(s); > - } > - /* migration/savevm/init restore */ > - } else { > - /* > - * use s->clock in case machine uses reliable > - * get clock and source host supported > - * reliable get clock > - */ > - if (!s->src_use_reliable_get_clock) { > - pvclock_via_mem = kvmclock_current_nsec(s); > + /* > + * if last KVM_GET_CLOCK did not retrieve a reliable value, > + * we can't rely on the saved clock value. Just discard it and > + * read kvmclock value from memory > + */ > + if (!s->src_use_reliable_get_clock) { > + uint64_t pvclock_via_mem = kvmclock_current_nsec(s); > + if (pvclock_via_mem) { > + s->clock = pvclock_via_mem; > } > } This is equivalent to the logic I suggested on my reply to v3. Much simpler. > > - /* We can't rely on the saved clock value, just discard it */ > - if (pvclock_via_mem) { > - s->clock = pvclock_via_mem; > - } > - > s->clock_valid = false; > > data.clock = s->clock; > @@ -181,16 +168,6 @@ static void kvmclock_realize(DeviceState *dev, Error **errp) > { > KVMClockState *s = KVM_CLOCK(dev); > > - /* > - * On machine types that support reliable KVM_GET_CLOCK, > - * if host kernel does provide reliable KVM_GET_CLOCK, > - * set src_use_reliable_get_clock=true so that destination > - * avoids reading kvmclock from memory. > - */ > - if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { > - s->src_use_reliable_get_clock = true; > - } > - > qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); > } > -- Eduardo ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-28 14:28 ` [Qemu-devel] " Eduardo Habkost @ 2016-11-28 15:12 ` Paolo Bonzini -1 siblings, 0 replies; 46+ messages in thread From: Paolo Bonzini @ 2016-11-28 15:12 UTC (permalink / raw) To: Eduardo Habkost Cc: Marcelo Tosatti, kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar On 28/11/2016 15:28, Eduardo Habkost wrote: > > + s->src_use_reliable_get_clock = data.flags & KVM_CLOCK_TSC_STABLE; > > I still don't understand the reasoning behind > kvm_has_adjust_clock_stable() vs (flags & KVM_CLOCK_TSC_STABLE), > but on either case, updating src_use_reliable_get_clock inside > kvm_get_clock() looks like the right thing to do. There are three possibility: the kernel tells you the clock is stable, the kernel tells you the clock is unstable, the kernel is too old and doesn't tell you anything. Then: kvm_has_adjust_clock_stable() == true: if the clock is stable, KVM_CLOCK_TSC_STABLE will be set in "flags" if the clock is unstable, KVM_CLOCK_TSC_STABLE will be unset kvm_has_adjust_clock_stable() == false: you cannot know if the clock is stable Paolo ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-28 15:12 ` Paolo Bonzini 0 siblings, 0 replies; 46+ messages in thread From: Paolo Bonzini @ 2016-11-28 15:12 UTC (permalink / raw) To: Eduardo Habkost Cc: Marcelo Tosatti, kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar On 28/11/2016 15:28, Eduardo Habkost wrote: > > + s->src_use_reliable_get_clock = data.flags & KVM_CLOCK_TSC_STABLE; > > I still don't understand the reasoning behind > kvm_has_adjust_clock_stable() vs (flags & KVM_CLOCK_TSC_STABLE), > but on either case, updating src_use_reliable_get_clock inside > kvm_get_clock() looks like the right thing to do. There are three possibility: the kernel tells you the clock is stable, the kernel tells you the clock is unstable, the kernel is too old and doesn't tell you anything. Then: kvm_has_adjust_clock_stable() == true: if the clock is stable, KVM_CLOCK_TSC_STABLE will be set in "flags" if the clock is unstable, KVM_CLOCK_TSC_STABLE will be unset kvm_has_adjust_clock_stable() == false: you cannot know if the clock is stable Paolo ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-28 13:47 ` [Qemu-devel] " Paolo Bonzini @ 2016-11-28 16:36 ` Marcelo Tosatti -1 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-28 16:36 UTC (permalink / raw) To: Paolo Bonzini Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On Mon, Nov 28, 2016 at 02:47:18PM +0100, Paolo Bonzini wrote: > > > On 17/11/2016 13:16, Marcelo Tosatti wrote: > > What QEMU wants is to use KVM_GET_CLOCK at pre_save independently > > of whether masterclock is enabled or not... it just depends > > on KVM_GET_CLOCK being correct for the masterclock case > > (108b249c453dd7132599ab6dc7e435a7036c193f). > > > > So a "reliable KVM_GET_CLOCK" (that does not timebackward > > when masterclock is enabled) is much simpler to userspace > > than "whether masterclock is enabled or not". > > > > If you have a reason why that should not be the case, > > let me know. > > I still cannot understand this case. > > If the source has masterclock _disabled_, shouldn't it read kvmclock > from memory? If the source masterclock is disabled, then the guest does not enable the optimization to not use a global variable to guarantee monotonicity. Therefore there will be no time backwards events (the timer backwards events crashed guests, and are the reason for reading from guest memory). So if there are no flaws in the reasoning above, no, there is no need to read from memory if masterclock is disabled. Can you state the reasons why you think it should be enabled? > But that's not what your patch does. Its on purpose with the data available. > To be clear, what > I mean is: > > diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c > index 653b0b4..6f6e2dc 100644 > --- a/hw/i386/kvm/clock.c > +++ b/hw/i386/kvm/clock.c > @@ -97,6 +97,7 @@ static uint64_t kvm_get_clock(void) > fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); > abort(); > } > + s->src_use_reliable_get_clock = data.flags & KVM_CLOCK_TSC_STABLE; > return data.clock; > } > > @@ -110,34 +111,19 @@ static void kvmclock_vm_state_change(void *opaque, int running, > > if (running) { > struct kvm_clock_data data = {}; > - uint64_t pvclock_via_mem = 0; > > - /* local (running VM) restore */ > - if (s->clock_valid) { > - /* > - * if host does not support reliable KVM_GET_CLOCK, > - * read kvmclock value from memory > - */ > - if (!kvm_has_adjust_clock_stable()) { > - pvclock_via_mem = kvmclock_current_nsec(s); > - } > - /* migration/savevm/init restore */ > - } else { > - /* > - * use s->clock in case machine uses reliable > - * get clock and source host supported > - * reliable get clock > - */ > - if (!s->src_use_reliable_get_clock) { > - pvclock_via_mem = kvmclock_current_nsec(s); > + /* > + * if last KVM_GET_CLOCK did not retrieve a reliable value, > + * we can't rely on the saved clock value. Just discard it and > + * read kvmclock value from memory > + */ > + if (!s->src_use_reliable_get_clock) { > + uint64_t pvclock_via_mem = kvmclock_current_nsec(s); > + if (pvclock_via_mem) { > + s->clock = pvclock_via_mem; > } > } > > - /* We can't rely on the saved clock value, just discard it */ > - if (pvclock_via_mem) { > - s->clock = pvclock_via_mem; > - } > - > s->clock_valid = false; > > data.clock = s->clock; > @@ -181,16 +168,6 @@ static void kvmclock_realize(DeviceState *dev, Error **errp) > { > KVMClockState *s = KVM_CLOCK(dev); > > - /* > - * On machine types that support reliable KVM_GET_CLOCK, > - * if host kernel does provide reliable KVM_GET_CLOCK, > - * set src_use_reliable_get_clock=true so that destination > - * avoids reading kvmclock from memory. > - */ > - if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { > - s->src_use_reliable_get_clock = true; > - } > - > qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); > } > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-28 16:36 ` Marcelo Tosatti 0 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-28 16:36 UTC (permalink / raw) To: Paolo Bonzini Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On Mon, Nov 28, 2016 at 02:47:18PM +0100, Paolo Bonzini wrote: > > > On 17/11/2016 13:16, Marcelo Tosatti wrote: > > What QEMU wants is to use KVM_GET_CLOCK at pre_save independently > > of whether masterclock is enabled or not... it just depends > > on KVM_GET_CLOCK being correct for the masterclock case > > (108b249c453dd7132599ab6dc7e435a7036c193f). > > > > So a "reliable KVM_GET_CLOCK" (that does not timebackward > > when masterclock is enabled) is much simpler to userspace > > than "whether masterclock is enabled or not". > > > > If you have a reason why that should not be the case, > > let me know. > > I still cannot understand this case. > > If the source has masterclock _disabled_, shouldn't it read kvmclock > from memory? If the source masterclock is disabled, then the guest does not enable the optimization to not use a global variable to guarantee monotonicity. Therefore there will be no time backwards events (the timer backwards events crashed guests, and are the reason for reading from guest memory). So if there are no flaws in the reasoning above, no, there is no need to read from memory if masterclock is disabled. Can you state the reasons why you think it should be enabled? > But that's not what your patch does. Its on purpose with the data available. > To be clear, what > I mean is: > > diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c > index 653b0b4..6f6e2dc 100644 > --- a/hw/i386/kvm/clock.c > +++ b/hw/i386/kvm/clock.c > @@ -97,6 +97,7 @@ static uint64_t kvm_get_clock(void) > fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); > abort(); > } > + s->src_use_reliable_get_clock = data.flags & KVM_CLOCK_TSC_STABLE; > return data.clock; > } > > @@ -110,34 +111,19 @@ static void kvmclock_vm_state_change(void *opaque, int running, > > if (running) { > struct kvm_clock_data data = {}; > - uint64_t pvclock_via_mem = 0; > > - /* local (running VM) restore */ > - if (s->clock_valid) { > - /* > - * if host does not support reliable KVM_GET_CLOCK, > - * read kvmclock value from memory > - */ > - if (!kvm_has_adjust_clock_stable()) { > - pvclock_via_mem = kvmclock_current_nsec(s); > - } > - /* migration/savevm/init restore */ > - } else { > - /* > - * use s->clock in case machine uses reliable > - * get clock and source host supported > - * reliable get clock > - */ > - if (!s->src_use_reliable_get_clock) { > - pvclock_via_mem = kvmclock_current_nsec(s); > + /* > + * if last KVM_GET_CLOCK did not retrieve a reliable value, > + * we can't rely on the saved clock value. Just discard it and > + * read kvmclock value from memory > + */ > + if (!s->src_use_reliable_get_clock) { > + uint64_t pvclock_via_mem = kvmclock_current_nsec(s); > + if (pvclock_via_mem) { > + s->clock = pvclock_via_mem; > } > } > > - /* We can't rely on the saved clock value, just discard it */ > - if (pvclock_via_mem) { > - s->clock = pvclock_via_mem; > - } > - > s->clock_valid = false; > > data.clock = s->clock; > @@ -181,16 +168,6 @@ static void kvmclock_realize(DeviceState *dev, Error **errp) > { > KVMClockState *s = KVM_CLOCK(dev); > > - /* > - * On machine types that support reliable KVM_GET_CLOCK, > - * if host kernel does provide reliable KVM_GET_CLOCK, > - * set src_use_reliable_get_clock=true so that destination > - * avoids reading kvmclock from memory. > - */ > - if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) { > - s->src_use_reliable_get_clock = true; > - } > - > qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); > } > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-28 16:36 ` [Qemu-devel] " Marcelo Tosatti @ 2016-11-28 17:30 ` Paolo Bonzini -1 siblings, 0 replies; 46+ messages in thread From: Paolo Bonzini @ 2016-11-28 17:30 UTC (permalink / raw) To: Marcelo Tosatti Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On 28/11/2016 17:36, Marcelo Tosatti wrote: > If the source masterclock is disabled, then the guest does > not enable the optimization to not use a global variable > to guarantee monotonicity. Therefore there will be no > time backwards events (the timer backwards events crashed > guests, and are the reason for reading from guest memory). > > So if there are no flaws in the reasoning above, > no, there is no need to read from memory if > masterclock is disabled. Yeah, the reasoning is sound. So you go from what Eduardo and I were thinking: if last KVM_GET_CLOCK was not reliable then read from memory to this: if last KVM_GET_CLOCK was not reliable && masterclock is enabled read from memory but: - on an old kernel, the left side is always true and the right side is unknown (so we must assume it's true and read from memory) - on a new kernel, the two sides of the "&&" are exactly the opposite, so the result is always false and then it becomes if old kernel then read from memory Got it finally. :) Paolo > Can you state the reasons why you think it should be enabled? > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-28 17:30 ` Paolo Bonzini 0 siblings, 0 replies; 46+ messages in thread From: Paolo Bonzini @ 2016-11-28 17:30 UTC (permalink / raw) To: Marcelo Tosatti Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar, Eduardo Habkost On 28/11/2016 17:36, Marcelo Tosatti wrote: > If the source masterclock is disabled, then the guest does > not enable the optimization to not use a global variable > to guarantee monotonicity. Therefore there will be no > time backwards events (the timer backwards events crashed > guests, and are the reason for reading from guest memory). > > So if there are no flaws in the reasoning above, > no, there is no need to read from memory if > masterclock is disabled. Yeah, the reasoning is sound. So you go from what Eduardo and I were thinking: if last KVM_GET_CLOCK was not reliable then read from memory to this: if last KVM_GET_CLOCK was not reliable && masterclock is enabled read from memory but: - on an old kernel, the left side is always true and the right side is unknown (so we must assume it's true and read from memory) - on a new kernel, the two sides of the "&&" are exactly the opposite, so the result is always false and then it becomes if old kernel then read from memory Got it finally. :) Paolo > Can you state the reasons why you think it should be enabled? > ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-14 13:54 ` [Qemu-devel] " Paolo Bonzini @ 2016-11-14 14:11 ` Juan Quintela -1 siblings, 0 replies; 46+ messages in thread From: Juan Quintela @ 2016-11-14 14:11 UTC (permalink / raw) To: Paolo Bonzini Cc: Marcelo Tosatti, kvm, qemu-devel, Dr. David Alan Gilbert, Radim Krcmar, Eduardo Habkost Paolo Bonzini <pbonzini@redhat.com> wrote: > On 14/11/2016 13:36, Marcelo Tosatti wrote: >> + /* local (running VM) restore */ >> + if (s->clock_valid) { >> + /* >> + * if host does not support reliable KVM_GET_CLOCK, >> + * read kvmclock value from memory >> + */ >> + if (!kvm_has_adjust_clock_stable()) { >> + time_at_migration = kvmclock_current_nsec(s); > > Just assign to s->clock here... Agreed, I just wanted to make so many changes. > Also, just another small nit: please make your scripts use the "-p" > option on diff. :) YESSSS I was searching what functions the code belonged for :p Thanks, Juan. ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-14 14:11 ` Juan Quintela 0 siblings, 0 replies; 46+ messages in thread From: Juan Quintela @ 2016-11-14 14:11 UTC (permalink / raw) To: Paolo Bonzini Cc: Marcelo Tosatti, kvm, qemu-devel, Dr. David Alan Gilbert, Radim Krcmar, Eduardo Habkost Paolo Bonzini <pbonzini@redhat.com> wrote: > On 14/11/2016 13:36, Marcelo Tosatti wrote: >> + /* local (running VM) restore */ >> + if (s->clock_valid) { >> + /* >> + * if host does not support reliable KVM_GET_CLOCK, >> + * read kvmclock value from memory >> + */ >> + if (!kvm_has_adjust_clock_stable()) { >> + time_at_migration = kvmclock_current_nsec(s); > > Just assign to s->clock here... Agreed, I just wanted to make so many changes. > Also, just another small nit: please make your scripts use the "-p" > option on diff. :) YESSSS I was searching what functions the code belonged for :p Thanks, Juan. ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-14 12:36 ` [Qemu-devel] " Marcelo Tosatti @ 2016-11-14 14:09 ` Juan Quintela -1 siblings, 0 replies; 46+ messages in thread From: Juan Quintela @ 2016-11-14 14:09 UTC (permalink / raw) To: Marcelo Tosatti Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Paolo Bonzini, Radim Krcmar, Eduardo Habkost Marcelo Tosatti <mtosatti@redhat.com> wrote: > Check for KVM_CAP_ADJUST_CLOCK capability KVM_CLOCK_TSC_STABLE, which > indicates that KVM_GET_CLOCK returns a value as seen by the guest at > that moment. > > For new machine types, use this value rather than reading > from guest memory. > > This reduces kvmclock difference on migration from 5s to 0.1s > (when max_downtime == 5s). > > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Acked-by: Juan Quintela <quintela@redhat.com> But, if you have to respin it .... > + /* whether machine supports reliable KVM_GET_CLOCK */ > + bool mach_use_reliable_get_clock; > + > + /* whether source host supported reliable KVM_GET_CLOCK */ > + bool src_use_reliable_get_clock; This two names are really long, but I don't have better suggesitons :-() > if (running) { > struct kvm_clock_data data = {}; > - uint64_t time_at_migration = kvmclock_current_nsec(s); > + uint64_t time_at_migration = 0; This was not "time_at_migration", it was not already before, but just now it looks really weird. (as it was already faulty, this is why it is only a suggestion.) > > - s->clock_valid = false; > + /* local (running VM) restore */ > + if (s->clock_valid) { > + /* > + * if host does not support reliable KVM_GET_CLOCK, > + * read kvmclock value from memory > + */ > + if (!kvm_has_adjust_clock_stable()) { > + time_at_migration = kvmclock_current_nsec(s); > + } > + /* migration/savevm/init restore */ > + } else { > + /* > + * use s->clock in case machine uses reliable > + * get clock and host where vm was executing > + * supported reliable get clock > + */ This comment is just weird. Simplifying /* If A and B do C */ if (!A and || !B) { then D(); } Doing the opposite comment? Migration code looks rigth. Once said that, I continue hating clocks. Later, Juan. ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-14 14:09 ` Juan Quintela 0 siblings, 0 replies; 46+ messages in thread From: Juan Quintela @ 2016-11-14 14:09 UTC (permalink / raw) To: Marcelo Tosatti Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Paolo Bonzini, Radim Krcmar, Eduardo Habkost Marcelo Tosatti <mtosatti@redhat.com> wrote: > Check for KVM_CAP_ADJUST_CLOCK capability KVM_CLOCK_TSC_STABLE, which > indicates that KVM_GET_CLOCK returns a value as seen by the guest at > that moment. > > For new machine types, use this value rather than reading > from guest memory. > > This reduces kvmclock difference on migration from 5s to 0.1s > (when max_downtime == 5s). > > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Acked-by: Juan Quintela <quintela@redhat.com> But, if you have to respin it .... > + /* whether machine supports reliable KVM_GET_CLOCK */ > + bool mach_use_reliable_get_clock; > + > + /* whether source host supported reliable KVM_GET_CLOCK */ > + bool src_use_reliable_get_clock; This two names are really long, but I don't have better suggesitons :-() > if (running) { > struct kvm_clock_data data = {}; > - uint64_t time_at_migration = kvmclock_current_nsec(s); > + uint64_t time_at_migration = 0; This was not "time_at_migration", it was not already before, but just now it looks really weird. (as it was already faulty, this is why it is only a suggestion.) > > - s->clock_valid = false; > + /* local (running VM) restore */ > + if (s->clock_valid) { > + /* > + * if host does not support reliable KVM_GET_CLOCK, > + * read kvmclock value from memory > + */ > + if (!kvm_has_adjust_clock_stable()) { > + time_at_migration = kvmclock_current_nsec(s); > + } > + /* migration/savevm/init restore */ > + } else { > + /* > + * use s->clock in case machine uses reliable > + * get clock and host where vm was executing > + * supported reliable get clock > + */ This comment is just weird. Simplifying /* If A and B do C */ if (!A and || !B) { then D(); } Doing the opposite comment? Migration code looks rigth. Once said that, I continue hating clocks. Later, Juan. ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration 2016-11-14 14:09 ` [Qemu-devel] " Juan Quintela @ 2016-11-14 15:37 ` Marcelo Tosatti -1 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-14 15:37 UTC (permalink / raw) To: Juan Quintela Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Paolo Bonzini, Radim Krcmar, Eduardo Habkost On Mon, Nov 14, 2016 at 03:09:46PM +0100, Juan Quintela wrote: > Marcelo Tosatti <mtosatti@redhat.com> wrote: > > Check for KVM_CAP_ADJUST_CLOCK capability KVM_CLOCK_TSC_STABLE, which > > indicates that KVM_GET_CLOCK returns a value as seen by the guest at > > that moment. > > > > For new machine types, use this value rather than reading > > from guest memory. > > > > This reduces kvmclock difference on migration from 5s to 0.1s > > (when max_downtime == 5s). > > > > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> > > Acked-by: Juan Quintela <quintela@redhat.com> > > But, if you have to respin it .... > > > > + /* whether machine supports reliable KVM_GET_CLOCK */ > > + bool mach_use_reliable_get_clock; > > + > > + /* whether source host supported reliable KVM_GET_CLOCK */ > > + bool src_use_reliable_get_clock; > > This two names are really long, but I don't have better suggesitons :-() > > if (running) { > > struct kvm_clock_data data = {}; > > - uint64_t time_at_migration = kvmclock_current_nsec(s); > > + uint64_t time_at_migration = 0; > > This was not "time_at_migration", it was not already before, but just > now it looks really weird. (as it was already faulty, this is why it is > only a suggestion.) > > > > > - s->clock_valid = false; > > + /* local (running VM) restore */ > > + if (s->clock_valid) { > > + /* > > + * if host does not support reliable KVM_GET_CLOCK, > > + * read kvmclock value from memory > > + */ > > + if (!kvm_has_adjust_clock_stable()) { > > + time_at_migration = kvmclock_current_nsec(s); > > + } > > + /* migration/savevm/init restore */ > > + } else { > > + /* > > + * use s->clock in case machine uses reliable > > + * get clock and host where vm was executing > > + * supported reliable get clock > > + */ > > This comment is just weird. Simplifying > /* If A and B do C */ > if (!A and || !B) { > then D(); > } > > Doing the opposite comment? > > Migration code looks rigth. Fixed those. > Once said that, I continue hating clocks. Me too, especially the biological one. ^ permalink raw reply [flat|nested] 46+ messages in thread
* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration @ 2016-11-14 15:37 ` Marcelo Tosatti 0 siblings, 0 replies; 46+ messages in thread From: Marcelo Tosatti @ 2016-11-14 15:37 UTC (permalink / raw) To: Juan Quintela Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Paolo Bonzini, Radim Krcmar, Eduardo Habkost On Mon, Nov 14, 2016 at 03:09:46PM +0100, Juan Quintela wrote: > Marcelo Tosatti <mtosatti@redhat.com> wrote: > > Check for KVM_CAP_ADJUST_CLOCK capability KVM_CLOCK_TSC_STABLE, which > > indicates that KVM_GET_CLOCK returns a value as seen by the guest at > > that moment. > > > > For new machine types, use this value rather than reading > > from guest memory. > > > > This reduces kvmclock difference on migration from 5s to 0.1s > > (when max_downtime == 5s). > > > > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> > > Acked-by: Juan Quintela <quintela@redhat.com> > > But, if you have to respin it .... > > > > + /* whether machine supports reliable KVM_GET_CLOCK */ > > + bool mach_use_reliable_get_clock; > > + > > + /* whether source host supported reliable KVM_GET_CLOCK */ > > + bool src_use_reliable_get_clock; > > This two names are really long, but I don't have better suggesitons :-() > > if (running) { > > struct kvm_clock_data data = {}; > > - uint64_t time_at_migration = kvmclock_current_nsec(s); > > + uint64_t time_at_migration = 0; > > This was not "time_at_migration", it was not already before, but just > now it looks really weird. (as it was already faulty, this is why it is > only a suggestion.) > > > > > - s->clock_valid = false; > > + /* local (running VM) restore */ > > + if (s->clock_valid) { > > + /* > > + * if host does not support reliable KVM_GET_CLOCK, > > + * read kvmclock value from memory > > + */ > > + if (!kvm_has_adjust_clock_stable()) { > > + time_at_migration = kvmclock_current_nsec(s); > > + } > > + /* migration/savevm/init restore */ > > + } else { > > + /* > > + * use s->clock in case machine uses reliable > > + * get clock and host where vm was executing > > + * supported reliable get clock > > + */ > > This comment is just weird. Simplifying > /* If A and B do C */ > if (!A and || !B) { > then D(); > } > > Doing the opposite comment? > > Migration code looks rigth. Fixed those. > Once said that, I continue hating clocks. Me too, especially the biological one. ^ permalink raw reply [flat|nested] 46+ messages in thread
end of thread, other threads:[~2016-11-28 17:30 UTC | newest] Thread overview: 46+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2016-11-14 12:36 [qemu patch 0/2] improve kvmclock difference on migration Marcelo Tosatti 2016-11-14 12:36 ` [Qemu-devel] " Marcelo Tosatti 2016-11-14 12:36 ` [qemu patch 1/2] kvm: sync linux headers Marcelo Tosatti 2016-11-14 12:36 ` [Qemu-devel] " Marcelo Tosatti 2016-11-14 12:36 ` [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration Marcelo Tosatti 2016-11-14 12:36 ` [Qemu-devel] " Marcelo Tosatti 2016-11-14 13:54 ` Paolo Bonzini 2016-11-14 13:54 ` [Qemu-devel] " Paolo Bonzini 2016-11-14 14:00 ` Marcelo Tosatti 2016-11-14 14:00 ` [Qemu-devel] " Marcelo Tosatti 2016-11-14 14:22 ` Paolo Bonzini 2016-11-14 14:22 ` [Qemu-devel] " Paolo Bonzini 2016-11-14 14:50 ` Marcelo Tosatti 2016-11-14 14:50 ` [Qemu-devel] " Marcelo Tosatti 2016-11-14 15:00 ` Paolo Bonzini 2016-11-14 15:00 ` [Qemu-devel] " Paolo Bonzini 2016-11-14 15:40 ` Marcelo Tosatti 2016-11-14 15:40 ` [Qemu-devel] " Marcelo Tosatti 2016-11-14 16:43 ` Paolo Bonzini 2016-11-14 16:43 ` [Qemu-devel] " Paolo Bonzini 2016-11-14 17:13 ` Marcelo Tosatti 2016-11-14 17:13 ` [Qemu-devel] " Marcelo Tosatti 2016-11-14 17:20 ` Paolo Bonzini 2016-11-14 17:20 ` [Qemu-devel] " Paolo Bonzini 2016-11-14 18:15 ` Marcelo Tosatti 2016-11-14 18:15 ` [Qemu-devel] " Marcelo Tosatti 2016-11-17 12:16 ` Marcelo Tosatti 2016-11-17 12:16 ` [Qemu-devel] " Marcelo Tosatti 2016-11-17 13:03 ` Paolo Bonzini 2016-11-17 13:03 ` [Qemu-devel] " Paolo Bonzini 2016-11-28 13:47 ` Paolo Bonzini 2016-11-28 13:47 ` [Qemu-devel] " Paolo Bonzini 2016-11-28 14:28 ` Eduardo Habkost 2016-11-28 14:28 ` [Qemu-devel] " Eduardo Habkost 2016-11-28 15:12 ` Paolo Bonzini 2016-11-28 15:12 ` [Qemu-devel] " Paolo Bonzini 2016-11-28 16:36 ` Marcelo Tosatti 2016-11-28 16:36 ` [Qemu-devel] " Marcelo Tosatti 2016-11-28 17:30 ` Paolo Bonzini 2016-11-28 17:30 ` [Qemu-devel] " Paolo Bonzini 2016-11-14 14:11 ` Juan Quintela 2016-11-14 14:11 ` [Qemu-devel] " Juan Quintela 2016-11-14 14:09 ` Juan Quintela 2016-11-14 14:09 ` [Qemu-devel] " Juan Quintela 2016-11-14 15:37 ` Marcelo Tosatti 2016-11-14 15:37 ` [Qemu-devel] " Marcelo Tosatti
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.