All of lore.kernel.org
 help / color / mirror / Atom feed
* [qemu patch 0/2] improve kvmclock difference on migration
@ 2016-11-14 12:36 ` Marcelo Tosatti
  0 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-14 12:36 UTC (permalink / raw)
  To: kvm
  Cc: qemu-devel, Dr. David Alan Gilbert, Paolo Bonzini, Juan Quintela,
	Radim Krcmar, Eduardo Habkost

See patches for details.



^ permalink raw reply	[flat|nested] 46+ messages in thread

* [Qemu-devel] [qemu patch 0/2] improve kvmclock difference on migration
@ 2016-11-14 12:36 ` Marcelo Tosatti
  0 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-14 12:36 UTC (permalink / raw)
  To: kvm
  Cc: qemu-devel, Dr. David Alan Gilbert, Paolo Bonzini, Juan Quintela,
	Radim Krcmar, Eduardo Habkost

See patches for details.

^ permalink raw reply	[flat|nested] 46+ messages in thread

* [qemu patch 1/2] kvm: sync linux headers
  2016-11-14 12:36 ` [Qemu-devel] " Marcelo Tosatti
@ 2016-11-14 12:36   ` Marcelo Tosatti
  -1 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-14 12:36 UTC (permalink / raw)
  To: kvm
  Cc: qemu-devel, Dr. David Alan Gilbert, Paolo Bonzini, Juan Quintela,
	Radim Krcmar, Eduardo Habkost, Marcelo Tosatti

[-- Attachment #1: sync-linux-headers.patch --]
[-- Type: text/plain, Size: 5449 bytes --]

Import KVM_CLOCK_TSC_STABLE.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h
index 7361a16..b472b85 100644
--- a/include/standard-headers/linux/input.h
+++ b/include/standard-headers/linux/input.h
@@ -245,6 +245,7 @@ struct input_mask {
 #define BUS_SPI			0x1C
 #define BUS_RMI			0x1D
 #define BUS_CEC			0x1E
+#define BUS_INTEL_ISHTP		0x1F
 
 /*
  * MT_TOOL types
diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h
index 4040951..e5a2e68 100644
--- a/include/standard-headers/linux/pci_regs.h
+++ b/include/standard-headers/linux/pci_regs.h
@@ -612,6 +612,8 @@
  */
 #define PCI_EXP_DEVCAP2		36	/* Device Capabilities 2 */
 #define  PCI_EXP_DEVCAP2_ARI		0x00000020 /* Alternative Routing-ID */
+#define  PCI_EXP_DEVCAP2_ATOMIC_ROUTE	0x00000040 /* Atomic Op routing */
+#define PCI_EXP_DEVCAP2_ATOMIC_COMP64	0x00000100 /* Atomic 64-bit compare */
 #define  PCI_EXP_DEVCAP2_LTR		0x00000800 /* Latency tolerance reporting */
 #define  PCI_EXP_DEVCAP2_OBFF_MASK	0x000c0000 /* OBFF support mechanism */
 #define  PCI_EXP_DEVCAP2_OBFF_MSG	0x00040000 /* New message signaling */
@@ -619,6 +621,7 @@
 #define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
 #define  PCI_EXP_DEVCTL2_COMP_TIMEOUT	0x000f	/* Completion Timeout Value */
 #define  PCI_EXP_DEVCTL2_ARI		0x0020	/* Alternative Routing-ID */
+#define PCI_EXP_DEVCTL2_ATOMIC_REQ	0x0040	/* Set Atomic requests */
 #define  PCI_EXP_DEVCTL2_IDO_REQ_EN	0x0100	/* Allow IDO for requests */
 #define  PCI_EXP_DEVCTL2_IDO_CMP_EN	0x0200	/* Allow IDO for completions */
 #define  PCI_EXP_DEVCTL2_LTR_EN		0x0400	/* Enable LTR mechanism */
@@ -671,7 +674,8 @@
 #define PCI_EXT_CAP_ID_PMUX	0x1A	/* Protocol Multiplexing */
 #define PCI_EXT_CAP_ID_PASID	0x1B	/* Process Address Space ID */
 #define PCI_EXT_CAP_ID_DPC	0x1D	/* Downstream Port Containment */
-#define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_DPC
+#define PCI_EXT_CAP_ID_PTM	0x1F	/* Precision Time Measurement */
+#define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_PTM
 
 #define PCI_EXT_CAP_DSN_SIZEOF	12
 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40
@@ -964,4 +968,13 @@
 
 #define PCI_EXP_DPC_SOURCE_ID		10	/* DPC Source Identifier */
 
+/* Precision Time Measurement */
+#define PCI_PTM_CAP			0x04	    /* PTM Capability */
+#define  PCI_PTM_CAP_REQ		0x00000001  /* Requester capable */
+#define  PCI_PTM_CAP_ROOT		0x00000004  /* Root capable */
+#define  PCI_PTM_GRANULARITY_MASK	0x0000FF00  /* Clock granularity */
+#define PCI_PTM_CTRL			0x08	    /* PTM Control */
+#define  PCI_PTM_CTRL_ENABLE		0x00000001  /* PTM enable */
+#define  PCI_PTM_CTRL_ROOT		0x00000002  /* Root select */
+
 #endif /* LINUX_PCI_REGS_H */
diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h
index 541268c..2fb7859 100644
--- a/linux-headers/asm-arm/kvm.h
+++ b/linux-headers/asm-arm/kvm.h
@@ -84,6 +84,13 @@ struct kvm_regs {
 #define KVM_VGIC_V2_DIST_SIZE		0x1000
 #define KVM_VGIC_V2_CPU_SIZE		0x2000
 
+/* Supported VGICv3 address types  */
+#define KVM_VGIC_V3_ADDR_TYPE_DIST	2
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
+
+#define KVM_VGIC_V3_DIST_SIZE		SZ_64K
+#define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
+
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_PSCI_0_2		1 /* CPU uses PSCI v0.2 */
 
diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h
index abeaf40..d45ea28 100644
--- a/linux-headers/asm-x86/unistd_32.h
+++ b/linux-headers/asm-x86/unistd_32.h
@@ -377,5 +377,8 @@
 #define __NR_copy_file_range 377
 #define __NR_preadv2 378
 #define __NR_pwritev2 379
+#define __NR_pkey_mprotect 380
+#define __NR_pkey_alloc 381
+#define __NR_pkey_free 382
 
 #endif /* _ASM_X86_UNISTD_32_H */
diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h
index 73c3d1f..e22db91 100644
--- a/linux-headers/asm-x86/unistd_64.h
+++ b/linux-headers/asm-x86/unistd_64.h
@@ -330,5 +330,8 @@
 #define __NR_copy_file_range 326
 #define __NR_preadv2 327
 #define __NR_pwritev2 328
+#define __NR_pkey_mprotect 329
+#define __NR_pkey_alloc 330
+#define __NR_pkey_free 331
 
 #endif /* _ASM_X86_UNISTD_64_H */
diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h
index e5aea76..84e58b2 100644
--- a/linux-headers/asm-x86/unistd_x32.h
+++ b/linux-headers/asm-x86/unistd_x32.h
@@ -283,6 +283,9 @@
 #define __NR_membarrier (__X32_SYSCALL_BIT + 324)
 #define __NR_mlock2 (__X32_SYSCALL_BIT + 325)
 #define __NR_copy_file_range (__X32_SYSCALL_BIT + 326)
+#define __NR_pkey_mprotect (__X32_SYSCALL_BIT + 329)
+#define __NR_pkey_alloc (__X32_SYSCALL_BIT + 330)
+#define __NR_pkey_free (__X32_SYSCALL_BIT + 331)
 #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512)
 #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513)
 #define __NR_ioctl (__X32_SYSCALL_BIT + 514)
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 4806e06..bb0ed71 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -972,12 +972,19 @@ struct kvm_irqfd {
 	__u8  pad[16];
 };
 
+/* For KVM_CAP_ADJUST_CLOCK */
+
+/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags.  */
+#define KVM_CLOCK_TSC_STABLE		2
+
 struct kvm_clock_data {
 	__u64 clock;
 	__u32 flags;
 	__u32 pad[9];
 };
 
+/* For KVM_CAP_SW_TLB */
+
 #define KVM_MMU_FSL_BOOKE_NOHV		0
 #define KVM_MMU_FSL_BOOKE_HV		1
 



^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [Qemu-devel] [qemu patch 1/2] kvm: sync linux headers
@ 2016-11-14 12:36   ` Marcelo Tosatti
  0 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-14 12:36 UTC (permalink / raw)
  To: kvm
  Cc: qemu-devel, Dr. David Alan Gilbert, Paolo Bonzini, Juan Quintela,
	Radim Krcmar, Eduardo Habkost, Marcelo Tosatti

[-- Attachment #1: sync-linux-headers.patch --]
[-- Type: text/plain, Size: 5447 bytes --]

Import KVM_CLOCK_TSC_STABLE.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h
index 7361a16..b472b85 100644
--- a/include/standard-headers/linux/input.h
+++ b/include/standard-headers/linux/input.h
@@ -245,6 +245,7 @@ struct input_mask {
 #define BUS_SPI			0x1C
 #define BUS_RMI			0x1D
 #define BUS_CEC			0x1E
+#define BUS_INTEL_ISHTP		0x1F
 
 /*
  * MT_TOOL types
diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h
index 4040951..e5a2e68 100644
--- a/include/standard-headers/linux/pci_regs.h
+++ b/include/standard-headers/linux/pci_regs.h
@@ -612,6 +612,8 @@
  */
 #define PCI_EXP_DEVCAP2		36	/* Device Capabilities 2 */
 #define  PCI_EXP_DEVCAP2_ARI		0x00000020 /* Alternative Routing-ID */
+#define  PCI_EXP_DEVCAP2_ATOMIC_ROUTE	0x00000040 /* Atomic Op routing */
+#define PCI_EXP_DEVCAP2_ATOMIC_COMP64	0x00000100 /* Atomic 64-bit compare */
 #define  PCI_EXP_DEVCAP2_LTR		0x00000800 /* Latency tolerance reporting */
 #define  PCI_EXP_DEVCAP2_OBFF_MASK	0x000c0000 /* OBFF support mechanism */
 #define  PCI_EXP_DEVCAP2_OBFF_MSG	0x00040000 /* New message signaling */
@@ -619,6 +621,7 @@
 #define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
 #define  PCI_EXP_DEVCTL2_COMP_TIMEOUT	0x000f	/* Completion Timeout Value */
 #define  PCI_EXP_DEVCTL2_ARI		0x0020	/* Alternative Routing-ID */
+#define PCI_EXP_DEVCTL2_ATOMIC_REQ	0x0040	/* Set Atomic requests */
 #define  PCI_EXP_DEVCTL2_IDO_REQ_EN	0x0100	/* Allow IDO for requests */
 #define  PCI_EXP_DEVCTL2_IDO_CMP_EN	0x0200	/* Allow IDO for completions */
 #define  PCI_EXP_DEVCTL2_LTR_EN		0x0400	/* Enable LTR mechanism */
@@ -671,7 +674,8 @@
 #define PCI_EXT_CAP_ID_PMUX	0x1A	/* Protocol Multiplexing */
 #define PCI_EXT_CAP_ID_PASID	0x1B	/* Process Address Space ID */
 #define PCI_EXT_CAP_ID_DPC	0x1D	/* Downstream Port Containment */
-#define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_DPC
+#define PCI_EXT_CAP_ID_PTM	0x1F	/* Precision Time Measurement */
+#define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_PTM
 
 #define PCI_EXT_CAP_DSN_SIZEOF	12
 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40
@@ -964,4 +968,13 @@
 
 #define PCI_EXP_DPC_SOURCE_ID		10	/* DPC Source Identifier */
 
+/* Precision Time Measurement */
+#define PCI_PTM_CAP			0x04	    /* PTM Capability */
+#define  PCI_PTM_CAP_REQ		0x00000001  /* Requester capable */
+#define  PCI_PTM_CAP_ROOT		0x00000004  /* Root capable */
+#define  PCI_PTM_GRANULARITY_MASK	0x0000FF00  /* Clock granularity */
+#define PCI_PTM_CTRL			0x08	    /* PTM Control */
+#define  PCI_PTM_CTRL_ENABLE		0x00000001  /* PTM enable */
+#define  PCI_PTM_CTRL_ROOT		0x00000002  /* Root select */
+
 #endif /* LINUX_PCI_REGS_H */
diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h
index 541268c..2fb7859 100644
--- a/linux-headers/asm-arm/kvm.h
+++ b/linux-headers/asm-arm/kvm.h
@@ -84,6 +84,13 @@ struct kvm_regs {
 #define KVM_VGIC_V2_DIST_SIZE		0x1000
 #define KVM_VGIC_V2_CPU_SIZE		0x2000
 
+/* Supported VGICv3 address types  */
+#define KVM_VGIC_V3_ADDR_TYPE_DIST	2
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
+
+#define KVM_VGIC_V3_DIST_SIZE		SZ_64K
+#define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
+
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_PSCI_0_2		1 /* CPU uses PSCI v0.2 */
 
diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h
index abeaf40..d45ea28 100644
--- a/linux-headers/asm-x86/unistd_32.h
+++ b/linux-headers/asm-x86/unistd_32.h
@@ -377,5 +377,8 @@
 #define __NR_copy_file_range 377
 #define __NR_preadv2 378
 #define __NR_pwritev2 379
+#define __NR_pkey_mprotect 380
+#define __NR_pkey_alloc 381
+#define __NR_pkey_free 382
 
 #endif /* _ASM_X86_UNISTD_32_H */
diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h
index 73c3d1f..e22db91 100644
--- a/linux-headers/asm-x86/unistd_64.h
+++ b/linux-headers/asm-x86/unistd_64.h
@@ -330,5 +330,8 @@
 #define __NR_copy_file_range 326
 #define __NR_preadv2 327
 #define __NR_pwritev2 328
+#define __NR_pkey_mprotect 329
+#define __NR_pkey_alloc 330
+#define __NR_pkey_free 331
 
 #endif /* _ASM_X86_UNISTD_64_H */
diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h
index e5aea76..84e58b2 100644
--- a/linux-headers/asm-x86/unistd_x32.h
+++ b/linux-headers/asm-x86/unistd_x32.h
@@ -283,6 +283,9 @@
 #define __NR_membarrier (__X32_SYSCALL_BIT + 324)
 #define __NR_mlock2 (__X32_SYSCALL_BIT + 325)
 #define __NR_copy_file_range (__X32_SYSCALL_BIT + 326)
+#define __NR_pkey_mprotect (__X32_SYSCALL_BIT + 329)
+#define __NR_pkey_alloc (__X32_SYSCALL_BIT + 330)
+#define __NR_pkey_free (__X32_SYSCALL_BIT + 331)
 #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512)
 #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513)
 #define __NR_ioctl (__X32_SYSCALL_BIT + 514)
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 4806e06..bb0ed71 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -972,12 +972,19 @@ struct kvm_irqfd {
 	__u8  pad[16];
 };
 
+/* For KVM_CAP_ADJUST_CLOCK */
+
+/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags.  */
+#define KVM_CLOCK_TSC_STABLE		2
+
 struct kvm_clock_data {
 	__u64 clock;
 	__u32 flags;
 	__u32 pad[9];
 };
 
+/* For KVM_CAP_SW_TLB */
+
 #define KVM_MMU_FSL_BOOKE_NOHV		0
 #define KVM_MMU_FSL_BOOKE_HV		1
 

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-14 12:36 ` [Qemu-devel] " Marcelo Tosatti
@ 2016-11-14 12:36   ` Marcelo Tosatti
  -1 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-14 12:36 UTC (permalink / raw)
  To: kvm
  Cc: qemu-devel, Dr. David Alan Gilbert, Paolo Bonzini, Juan Quintela,
	Radim Krcmar, Eduardo Habkost, Marcelo Tosatti

[-- Attachment #1: kvmclock-advance-clock.patch --]
[-- Type: text/plain, Size: 6623 bytes --]

Check for KVM_CAP_ADJUST_CLOCK capability KVM_CLOCK_TSC_STABLE, which
indicates that KVM_GET_CLOCK returns a value as seen by the guest at
that moment.

For new machine types, use this value rather than reading 
from guest memory.

This reduces kvmclock difference on migration from 5s to 0.1s
(when max_downtime == 5s).

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

---
 hw/i386/kvm/clock.c    |   88 +++++++++++++++++++++++++++++++++++++++++++++++--
 include/hw/i386/pc.h   |    5 ++
 target-i386/kvm.c      |    7 +++
 target-i386/kvm_i386.h |    1 
 4 files changed, 98 insertions(+), 3 deletions(-)

Index: qemu-mig-advance-clock/hw/i386/kvm/clock.c
===================================================================
--- qemu-mig-advance-clock.orig/hw/i386/kvm/clock.c	2016-11-14 09:07:55.519856329 -0200
+++ qemu-mig-advance-clock/hw/i386/kvm/clock.c	2016-11-14 10:19:45.723254737 -0200
@@ -36,6 +36,12 @@
 
     uint64_t clock;
     bool clock_valid;
+
+    /* whether machine supports reliable KVM_GET_CLOCK */
+    bool mach_use_reliable_get_clock;
+
+    /* whether source host supported reliable KVM_GET_CLOCK */
+    bool src_use_reliable_get_clock;
 } KVMClockState;
 
 struct pvclock_vcpu_time_info {
@@ -91,15 +97,37 @@
 
     if (running) {
         struct kvm_clock_data data = {};
-        uint64_t time_at_migration = kvmclock_current_nsec(s);
+        uint64_t time_at_migration = 0;
 
-        s->clock_valid = false;
+        /* local (running VM) restore */
+        if (s->clock_valid) {
+            /*
+             * if host does not support reliable KVM_GET_CLOCK,
+             * read kvmclock value from memory
+             */
+            if (!kvm_has_adjust_clock_stable()) {
+                time_at_migration = kvmclock_current_nsec(s);
+            }
+        /* migration/savevm/init restore */
+        } else {
+            /*
+             * use s->clock in case machine uses reliable
+             * get clock and host where vm was executing
+             * supported reliable get clock
+             */
+            if (!s->mach_use_reliable_get_clock ||
+                !s->src_use_reliable_get_clock) {
+                time_at_migration = kvmclock_current_nsec(s);
+            }
+        }
 
-        /* We can't rely on the migrated clock value, just discard it */
+        /* We can't rely on the saved clock value, just discard it */
         if (time_at_migration) {
             s->clock = time_at_migration;
         }
 
+        s->clock_valid = false;
+
         data.clock = s->clock;
         ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
         if (ret < 0) {
@@ -152,22 +180,76 @@
     qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
 }
 
+static bool kvmclock_src_use_reliable_get_clock(void *opaque)
+{
+    KVMClockState *s = opaque;
+
+    /*
+     * On machine types that support reliable KVM_GET_CLOCK,
+     * if host kernel does provide reliable KVM_GET_CLOCK,
+     * set src_use_reliable_get_clock=true so that destination
+     * avoids reading kvmclock from memory.
+     */
+    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
+        s->src_use_reliable_get_clock = true;
+    }
+
+    return s->src_use_reliable_get_clock;
+}
+
+static const VMStateDescription kvmclock_reliable_get_clock = {
+    .name = "kvmclock/src_use_reliable_get_clock",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = kvmclock_src_use_reliable_get_clock,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL(src_use_reliable_get_clock, KVMClockState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void kvmclock_pre_save(void *opaque)
+{
+    KVMClockState *s = opaque;
+    struct kvm_clock_data data;
+    int ret;
+
+    ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
+    if (ret < 0) {
+        fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
+                abort();
+    }
+    s->clock = data.clock;
+}
+
 static const VMStateDescription kvmclock_vmsd = {
     .name = "kvmclock",
     .version_id = 1,
     .minimum_version_id = 1,
+    .pre_save = kvmclock_pre_save,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(clock, KVMClockState),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &kvmclock_reliable_get_clock,
+        NULL
     }
 };
 
+static Property kvmclock_properties[] = {
+    DEFINE_PROP_BOOL("mach_use_reliable_get_clock", KVMClockState,
+                      mach_use_reliable_get_clock, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void kvmclock_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->realize = kvmclock_realize;
     dc->vmsd = &kvmclock_vmsd;
+    dc->props = kvmclock_properties;
 }
 
 static const TypeInfo kvmclock_info = {
Index: qemu-mig-advance-clock/include/hw/i386/pc.h
===================================================================
--- qemu-mig-advance-clock.orig/include/hw/i386/pc.h	2016-11-14 09:07:55.519856329 -0200
+++ qemu-mig-advance-clock/include/hw/i386/pc.h	2016-11-14 09:11:47.112200123 -0200
@@ -370,6 +370,11 @@
 #define PC_COMPAT_2_7 \
     HW_COMPAT_2_7 \
     {\
+        .driver   = "kvmclock",\
+        .property = "mach_use_reliable_get_clock",\
+        .value    = "off",\
+    },\
+    {\
         .driver   = TYPE_X86_CPU,\
         .property = "l3-cache",\
         .value    = "off",\
Index: qemu-mig-advance-clock/target-i386/kvm.c
===================================================================
--- qemu-mig-advance-clock.orig/target-i386/kvm.c	2016-11-14 09:07:55.520856330 -0200
+++ qemu-mig-advance-clock/target-i386/kvm.c	2016-11-14 09:11:47.125200142 -0200
@@ -117,6 +117,13 @@
     return kvm_check_extension(kvm_state, KVM_CAP_X86_SMM);
 }
 
+bool kvm_has_adjust_clock_stable(void)
+{
+    int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK);
+
+    return (ret == KVM_CLOCK_TSC_STABLE);
+}
+
 bool kvm_allows_irq0_override(void)
 {
     return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();
Index: qemu-mig-advance-clock/target-i386/kvm_i386.h
===================================================================
--- qemu-mig-advance-clock.orig/target-i386/kvm_i386.h	2016-11-14 09:07:55.520856330 -0200
+++ qemu-mig-advance-clock/target-i386/kvm_i386.h	2016-11-14 09:11:47.125200142 -0200
@@ -17,6 +17,7 @@
 
 bool kvm_allows_irq0_override(void);
 bool kvm_has_smm(void);
+bool kvm_has_adjust_clock_stable(void);
 void kvm_synchronize_all_tsc(void);
 void kvm_arch_reset_vcpu(X86CPU *cs);
 void kvm_arch_do_init_vcpu(X86CPU *cs);



^ permalink raw reply	[flat|nested] 46+ messages in thread

* [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-14 12:36   ` Marcelo Tosatti
  0 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-14 12:36 UTC (permalink / raw)
  To: kvm
  Cc: qemu-devel, Dr. David Alan Gilbert, Paolo Bonzini, Juan Quintela,
	Radim Krcmar, Eduardo Habkost, Marcelo Tosatti

[-- Attachment #1: kvmclock-advance-clock.patch --]
[-- Type: text/plain, Size: 6621 bytes --]

Check for KVM_CAP_ADJUST_CLOCK capability KVM_CLOCK_TSC_STABLE, which
indicates that KVM_GET_CLOCK returns a value as seen by the guest at
that moment.

For new machine types, use this value rather than reading 
from guest memory.

This reduces kvmclock difference on migration from 5s to 0.1s
(when max_downtime == 5s).

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

---
 hw/i386/kvm/clock.c    |   88 +++++++++++++++++++++++++++++++++++++++++++++++--
 include/hw/i386/pc.h   |    5 ++
 target-i386/kvm.c      |    7 +++
 target-i386/kvm_i386.h |    1 
 4 files changed, 98 insertions(+), 3 deletions(-)

Index: qemu-mig-advance-clock/hw/i386/kvm/clock.c
===================================================================
--- qemu-mig-advance-clock.orig/hw/i386/kvm/clock.c	2016-11-14 09:07:55.519856329 -0200
+++ qemu-mig-advance-clock/hw/i386/kvm/clock.c	2016-11-14 10:19:45.723254737 -0200
@@ -36,6 +36,12 @@
 
     uint64_t clock;
     bool clock_valid;
+
+    /* whether machine supports reliable KVM_GET_CLOCK */
+    bool mach_use_reliable_get_clock;
+
+    /* whether source host supported reliable KVM_GET_CLOCK */
+    bool src_use_reliable_get_clock;
 } KVMClockState;
 
 struct pvclock_vcpu_time_info {
@@ -91,15 +97,37 @@
 
     if (running) {
         struct kvm_clock_data data = {};
-        uint64_t time_at_migration = kvmclock_current_nsec(s);
+        uint64_t time_at_migration = 0;
 
-        s->clock_valid = false;
+        /* local (running VM) restore */
+        if (s->clock_valid) {
+            /*
+             * if host does not support reliable KVM_GET_CLOCK,
+             * read kvmclock value from memory
+             */
+            if (!kvm_has_adjust_clock_stable()) {
+                time_at_migration = kvmclock_current_nsec(s);
+            }
+        /* migration/savevm/init restore */
+        } else {
+            /*
+             * use s->clock in case machine uses reliable
+             * get clock and host where vm was executing
+             * supported reliable get clock
+             */
+            if (!s->mach_use_reliable_get_clock ||
+                !s->src_use_reliable_get_clock) {
+                time_at_migration = kvmclock_current_nsec(s);
+            }
+        }
 
-        /* We can't rely on the migrated clock value, just discard it */
+        /* We can't rely on the saved clock value, just discard it */
         if (time_at_migration) {
             s->clock = time_at_migration;
         }
 
+        s->clock_valid = false;
+
         data.clock = s->clock;
         ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
         if (ret < 0) {
@@ -152,22 +180,76 @@
     qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
 }
 
+static bool kvmclock_src_use_reliable_get_clock(void *opaque)
+{
+    KVMClockState *s = opaque;
+
+    /*
+     * On machine types that support reliable KVM_GET_CLOCK,
+     * if host kernel does provide reliable KVM_GET_CLOCK,
+     * set src_use_reliable_get_clock=true so that destination
+     * avoids reading kvmclock from memory.
+     */
+    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
+        s->src_use_reliable_get_clock = true;
+    }
+
+    return s->src_use_reliable_get_clock;
+}
+
+static const VMStateDescription kvmclock_reliable_get_clock = {
+    .name = "kvmclock/src_use_reliable_get_clock",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = kvmclock_src_use_reliable_get_clock,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL(src_use_reliable_get_clock, KVMClockState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void kvmclock_pre_save(void *opaque)
+{
+    KVMClockState *s = opaque;
+    struct kvm_clock_data data;
+    int ret;
+
+    ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
+    if (ret < 0) {
+        fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
+                abort();
+    }
+    s->clock = data.clock;
+}
+
 static const VMStateDescription kvmclock_vmsd = {
     .name = "kvmclock",
     .version_id = 1,
     .minimum_version_id = 1,
+    .pre_save = kvmclock_pre_save,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(clock, KVMClockState),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &kvmclock_reliable_get_clock,
+        NULL
     }
 };
 
+static Property kvmclock_properties[] = {
+    DEFINE_PROP_BOOL("mach_use_reliable_get_clock", KVMClockState,
+                      mach_use_reliable_get_clock, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void kvmclock_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->realize = kvmclock_realize;
     dc->vmsd = &kvmclock_vmsd;
+    dc->props = kvmclock_properties;
 }
 
 static const TypeInfo kvmclock_info = {
Index: qemu-mig-advance-clock/include/hw/i386/pc.h
===================================================================
--- qemu-mig-advance-clock.orig/include/hw/i386/pc.h	2016-11-14 09:07:55.519856329 -0200
+++ qemu-mig-advance-clock/include/hw/i386/pc.h	2016-11-14 09:11:47.112200123 -0200
@@ -370,6 +370,11 @@
 #define PC_COMPAT_2_7 \
     HW_COMPAT_2_7 \
     {\
+        .driver   = "kvmclock",\
+        .property = "mach_use_reliable_get_clock",\
+        .value    = "off",\
+    },\
+    {\
         .driver   = TYPE_X86_CPU,\
         .property = "l3-cache",\
         .value    = "off",\
Index: qemu-mig-advance-clock/target-i386/kvm.c
===================================================================
--- qemu-mig-advance-clock.orig/target-i386/kvm.c	2016-11-14 09:07:55.520856330 -0200
+++ qemu-mig-advance-clock/target-i386/kvm.c	2016-11-14 09:11:47.125200142 -0200
@@ -117,6 +117,13 @@
     return kvm_check_extension(kvm_state, KVM_CAP_X86_SMM);
 }
 
+bool kvm_has_adjust_clock_stable(void)
+{
+    int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK);
+
+    return (ret == KVM_CLOCK_TSC_STABLE);
+}
+
 bool kvm_allows_irq0_override(void)
 {
     return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();
Index: qemu-mig-advance-clock/target-i386/kvm_i386.h
===================================================================
--- qemu-mig-advance-clock.orig/target-i386/kvm_i386.h	2016-11-14 09:07:55.520856330 -0200
+++ qemu-mig-advance-clock/target-i386/kvm_i386.h	2016-11-14 09:11:47.125200142 -0200
@@ -17,6 +17,7 @@
 
 bool kvm_allows_irq0_override(void);
 bool kvm_has_smm(void);
+bool kvm_has_adjust_clock_stable(void);
 void kvm_synchronize_all_tsc(void);
 void kvm_arch_reset_vcpu(X86CPU *cs);
 void kvm_arch_do_init_vcpu(X86CPU *cs);

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-14 12:36   ` [Qemu-devel] " Marcelo Tosatti
@ 2016-11-14 13:54     ` Paolo Bonzini
  -1 siblings, 0 replies; 46+ messages in thread
From: Paolo Bonzini @ 2016-11-14 13:54 UTC (permalink / raw)
  To: Marcelo Tosatti, kvm
  Cc: qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar,
	Eduardo Habkost



On 14/11/2016 13:36, Marcelo Tosatti wrote:
> +        /* local (running VM) restore */
> +        if (s->clock_valid) {
> +            /*
> +             * if host does not support reliable KVM_GET_CLOCK,
> +             * read kvmclock value from memory
> +             */
> +            if (!kvm_has_adjust_clock_stable()) {
> +                time_at_migration = kvmclock_current_nsec(s);

Just assign to s->clock here...


> +            }
> +        /* migration/savevm/init restore */
> +        } else {
> +            /*
> +             * use s->clock in case machine uses reliable
> +             * get clock and host where vm was executing
> +             * supported reliable get clock
> +             */
> +            if (!s->mach_use_reliable_get_clock ||
> +                !s->src_use_reliable_get_clock) {
> +                time_at_migration = kvmclock_current_nsec(s);

... and here, so that time_at_migration is not needed anymore.

Also here it's enough to look at s->src_user_reliable_get_clock, because
if s->mach_use_reliable_get_clock is false,
s->src_use_reliable_get_clock will be false as well.

> +            }
> +        }
>  
> -        /* We can't rely on the migrated clock value, just discard it */
> +        /* We can't rely on the saved clock value, just discard it */
>          if (time_at_migration) {
>              s->clock = time_at_migration;

[...]

> 
> +static bool kvmclock_src_use_reliable_get_clock(void *opaque)
> +{
> +    KVMClockState *s = opaque;
> +
> +    /*
> +     * On machine types that support reliable KVM_GET_CLOCK,
> +     * if host kernel does provide reliable KVM_GET_CLOCK,
> +     * set src_use_reliable_get_clock=true so that destination
> +     * avoids reading kvmclock from memory.
> +     */
> +    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
> +        s->src_use_reliable_get_clock = true;
> +    }
> +
> +    return s->src_use_reliable_get_clock;
> +}

Here you can just return s->mach_use_reliable_get_clock.  To set
s->src_use_reliable_get_clock, after issuing KVM_GET_CLOCK you can look
at the KVM_CLOCK_TSC_STABLE bit in the kvm_clock struct's flags.

You don't actually need kvm_has_adjust_clock_stable(), but please place
the KVM_GET_CLOCK code for kvmclock_pre_save and
kvmclock_vm_state_change in a common function.

Also, just another small nit: please make your scripts use the "-p"
option on diff. :)

Thanks,

Paolo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-14 13:54     ` Paolo Bonzini
  0 siblings, 0 replies; 46+ messages in thread
From: Paolo Bonzini @ 2016-11-14 13:54 UTC (permalink / raw)
  To: Marcelo Tosatti, kvm
  Cc: qemu-devel, Dr. David Alan Gilbert, Juan Quintela, Radim Krcmar,
	Eduardo Habkost



On 14/11/2016 13:36, Marcelo Tosatti wrote:
> +        /* local (running VM) restore */
> +        if (s->clock_valid) {
> +            /*
> +             * if host does not support reliable KVM_GET_CLOCK,
> +             * read kvmclock value from memory
> +             */
> +            if (!kvm_has_adjust_clock_stable()) {
> +                time_at_migration = kvmclock_current_nsec(s);

Just assign to s->clock here...


> +            }
> +        /* migration/savevm/init restore */
> +        } else {
> +            /*
> +             * use s->clock in case machine uses reliable
> +             * get clock and host where vm was executing
> +             * supported reliable get clock
> +             */
> +            if (!s->mach_use_reliable_get_clock ||
> +                !s->src_use_reliable_get_clock) {
> +                time_at_migration = kvmclock_current_nsec(s);

... and here, so that time_at_migration is not needed anymore.

Also here it's enough to look at s->src_user_reliable_get_clock, because
if s->mach_use_reliable_get_clock is false,
s->src_use_reliable_get_clock will be false as well.

> +            }
> +        }
>  
> -        /* We can't rely on the migrated clock value, just discard it */
> +        /* We can't rely on the saved clock value, just discard it */
>          if (time_at_migration) {
>              s->clock = time_at_migration;

[...]

> 
> +static bool kvmclock_src_use_reliable_get_clock(void *opaque)
> +{
> +    KVMClockState *s = opaque;
> +
> +    /*
> +     * On machine types that support reliable KVM_GET_CLOCK,
> +     * if host kernel does provide reliable KVM_GET_CLOCK,
> +     * set src_use_reliable_get_clock=true so that destination
> +     * avoids reading kvmclock from memory.
> +     */
> +    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
> +        s->src_use_reliable_get_clock = true;
> +    }
> +
> +    return s->src_use_reliable_get_clock;
> +}

Here you can just return s->mach_use_reliable_get_clock.  To set
s->src_use_reliable_get_clock, after issuing KVM_GET_CLOCK you can look
at the KVM_CLOCK_TSC_STABLE bit in the kvm_clock struct's flags.

You don't actually need kvm_has_adjust_clock_stable(), but please place
the KVM_GET_CLOCK code for kvmclock_pre_save and
kvmclock_vm_state_change in a common function.

Also, just another small nit: please make your scripts use the "-p"
option on diff. :)

Thanks,

Paolo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-14 13:54     ` [Qemu-devel] " Paolo Bonzini
@ 2016-11-14 14:00       ` Marcelo Tosatti
  -1 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-14 14:00 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost

On Mon, Nov 14, 2016 at 02:54:38PM +0100, Paolo Bonzini wrote:
> 
> 
> On 14/11/2016 13:36, Marcelo Tosatti wrote:
> > +        /* local (running VM) restore */
> > +        if (s->clock_valid) {
> > +            /*
> > +             * if host does not support reliable KVM_GET_CLOCK,
> > +             * read kvmclock value from memory
> > +             */
> > +            if (!kvm_has_adjust_clock_stable()) {
> > +                time_at_migration = kvmclock_current_nsec(s);
> 
> Just assign to s->clock here...

If kvmclock is not enabled, you want to use s->clock,
rather than 0.

> > +            }
> > +        /* migration/savevm/init restore */
> > +        } else {
> > +            /*
> > +             * use s->clock in case machine uses reliable
> > +             * get clock and host where vm was executing
> > +             * supported reliable get clock
> > +             */
> > +            if (!s->mach_use_reliable_get_clock ||
> > +                !s->src_use_reliable_get_clock) {
> > +                time_at_migration = kvmclock_current_nsec(s);
> 
> ... and here, so that time_at_migration is not needed anymore.

Same as above.

> Also here it's enough to look at s->src_user_reliable_get_clock, because
> if s->mach_use_reliable_get_clock is false,
> s->src_use_reliable_get_clock will be false as well.

Yes, but i like the code annotation.

> > +            }
> > +        }
> >  
> > -        /* We can't rely on the migrated clock value, just discard it */
> > +        /* We can't rely on the saved clock value, just discard it */
> >          if (time_at_migration) {
> >              s->clock = time_at_migration;
> 
> [...]
> 
> > 
> > +static bool kvmclock_src_use_reliable_get_clock(void *opaque)
> > +{
> > +    KVMClockState *s = opaque;
> > +
> > +    /*
> > +     * On machine types that support reliable KVM_GET_CLOCK,
> > +     * if host kernel does provide reliable KVM_GET_CLOCK,
> > +     * set src_use_reliable_get_clock=true so that destination
> > +     * avoids reading kvmclock from memory.
> > +     */
> > +    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
> > +        s->src_use_reliable_get_clock = true;
> > +    }
> > +
> > +    return s->src_use_reliable_get_clock;
> > +}
> 
> Here you can just return s->mach_use_reliable_get_clock. 

mach_use_reliable_get_clock can be true but host might not support it.

>  To set
> s->src_use_reliable_get_clock, after issuing KVM_GET_CLOCK you can look
> at the KVM_CLOCK_TSC_STABLE bit in the kvm_clock struct's flags.

KVM_CLOCK_TSC_STABLE bit in the kvmclock structure != 
KVM_GET_CLOCK returns reliable value, right?


> You don't actually need kvm_has_adjust_clock_stable(), but please place
> the KVM_GET_CLOCK code for kvmclock_pre_save and
> kvmclock_vm_state_change in a common function.

Sure.

> Also, just another small nit: please make your scripts use the "-p"
> option on diff. :)

Sure.

> 
> Thanks,
> 
> Paolo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-14 14:00       ` Marcelo Tosatti
  0 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-14 14:00 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost

On Mon, Nov 14, 2016 at 02:54:38PM +0100, Paolo Bonzini wrote:
> 
> 
> On 14/11/2016 13:36, Marcelo Tosatti wrote:
> > +        /* local (running VM) restore */
> > +        if (s->clock_valid) {
> > +            /*
> > +             * if host does not support reliable KVM_GET_CLOCK,
> > +             * read kvmclock value from memory
> > +             */
> > +            if (!kvm_has_adjust_clock_stable()) {
> > +                time_at_migration = kvmclock_current_nsec(s);
> 
> Just assign to s->clock here...

If kvmclock is not enabled, you want to use s->clock,
rather than 0.

> > +            }
> > +        /* migration/savevm/init restore */
> > +        } else {
> > +            /*
> > +             * use s->clock in case machine uses reliable
> > +             * get clock and host where vm was executing
> > +             * supported reliable get clock
> > +             */
> > +            if (!s->mach_use_reliable_get_clock ||
> > +                !s->src_use_reliable_get_clock) {
> > +                time_at_migration = kvmclock_current_nsec(s);
> 
> ... and here, so that time_at_migration is not needed anymore.

Same as above.

> Also here it's enough to look at s->src_user_reliable_get_clock, because
> if s->mach_use_reliable_get_clock is false,
> s->src_use_reliable_get_clock will be false as well.

Yes, but i like the code annotation.

> > +            }
> > +        }
> >  
> > -        /* We can't rely on the migrated clock value, just discard it */
> > +        /* We can't rely on the saved clock value, just discard it */
> >          if (time_at_migration) {
> >              s->clock = time_at_migration;
> 
> [...]
> 
> > 
> > +static bool kvmclock_src_use_reliable_get_clock(void *opaque)
> > +{
> > +    KVMClockState *s = opaque;
> > +
> > +    /*
> > +     * On machine types that support reliable KVM_GET_CLOCK,
> > +     * if host kernel does provide reliable KVM_GET_CLOCK,
> > +     * set src_use_reliable_get_clock=true so that destination
> > +     * avoids reading kvmclock from memory.
> > +     */
> > +    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
> > +        s->src_use_reliable_get_clock = true;
> > +    }
> > +
> > +    return s->src_use_reliable_get_clock;
> > +}
> 
> Here you can just return s->mach_use_reliable_get_clock. 

mach_use_reliable_get_clock can be true but host might not support it.

>  To set
> s->src_use_reliable_get_clock, after issuing KVM_GET_CLOCK you can look
> at the KVM_CLOCK_TSC_STABLE bit in the kvm_clock struct's flags.

KVM_CLOCK_TSC_STABLE bit in the kvmclock structure != 
KVM_GET_CLOCK returns reliable value, right?


> You don't actually need kvm_has_adjust_clock_stable(), but please place
> the KVM_GET_CLOCK code for kvmclock_pre_save and
> kvmclock_vm_state_change in a common function.

Sure.

> Also, just another small nit: please make your scripts use the "-p"
> option on diff. :)

Sure.

> 
> Thanks,
> 
> Paolo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-14 12:36   ` [Qemu-devel] " Marcelo Tosatti
@ 2016-11-14 14:09     ` Juan Quintela
  -1 siblings, 0 replies; 46+ messages in thread
From: Juan Quintela @ 2016-11-14 14:09 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Paolo Bonzini,
	Radim Krcmar, Eduardo Habkost

Marcelo Tosatti <mtosatti@redhat.com> wrote:
> Check for KVM_CAP_ADJUST_CLOCK capability KVM_CLOCK_TSC_STABLE, which
> indicates that KVM_GET_CLOCK returns a value as seen by the guest at
> that moment.
>
> For new machine types, use this value rather than reading 
> from guest memory.
>
> This reduces kvmclock difference on migration from 5s to 0.1s
> (when max_downtime == 5s).
>
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Acked-by: Juan Quintela <quintela@redhat.com>

But, if you have to respin it ....


> +    /* whether machine supports reliable KVM_GET_CLOCK */
> +    bool mach_use_reliable_get_clock;
> +
> +    /* whether source host supported reliable KVM_GET_CLOCK */
> +    bool src_use_reliable_get_clock;

This two names are really long, but I don't have better suggesitons :-()
>      if (running) {
>          struct kvm_clock_data data = {};
> -        uint64_t time_at_migration = kvmclock_current_nsec(s);
> +        uint64_t time_at_migration = 0;

This was not "time_at_migration", it was not already before, but just
now it looks really weird. (as it was already faulty, this is why it is
only a suggestion.)

>  
> -        s->clock_valid = false;
> +        /* local (running VM) restore */
> +        if (s->clock_valid) {
> +            /*
> +             * if host does not support reliable KVM_GET_CLOCK,
> +             * read kvmclock value from memory
> +             */
> +            if (!kvm_has_adjust_clock_stable()) {
> +                time_at_migration = kvmclock_current_nsec(s);
> +            }
> +        /* migration/savevm/init restore */
> +        } else {
> +            /*
> +             * use s->clock in case machine uses reliable
> +             * get clock and host where vm was executing
> +             * supported reliable get clock
> +             */

This comment is just weird.  Simplifying
    /* If A and B do C */
    if (!A and || !B) {
       then D();
    }

Doing the opposite comment?

Migration code looks rigth.

Once said that, I continue hating clocks.

Later, Juan.

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-14 14:09     ` Juan Quintela
  0 siblings, 0 replies; 46+ messages in thread
From: Juan Quintela @ 2016-11-14 14:09 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Paolo Bonzini,
	Radim Krcmar, Eduardo Habkost

Marcelo Tosatti <mtosatti@redhat.com> wrote:
> Check for KVM_CAP_ADJUST_CLOCK capability KVM_CLOCK_TSC_STABLE, which
> indicates that KVM_GET_CLOCK returns a value as seen by the guest at
> that moment.
>
> For new machine types, use this value rather than reading 
> from guest memory.
>
> This reduces kvmclock difference on migration from 5s to 0.1s
> (when max_downtime == 5s).
>
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Acked-by: Juan Quintela <quintela@redhat.com>

But, if you have to respin it ....


> +    /* whether machine supports reliable KVM_GET_CLOCK */
> +    bool mach_use_reliable_get_clock;
> +
> +    /* whether source host supported reliable KVM_GET_CLOCK */
> +    bool src_use_reliable_get_clock;

This two names are really long, but I don't have better suggesitons :-()
>      if (running) {
>          struct kvm_clock_data data = {};
> -        uint64_t time_at_migration = kvmclock_current_nsec(s);
> +        uint64_t time_at_migration = 0;

This was not "time_at_migration", it was not already before, but just
now it looks really weird. (as it was already faulty, this is why it is
only a suggestion.)

>  
> -        s->clock_valid = false;
> +        /* local (running VM) restore */
> +        if (s->clock_valid) {
> +            /*
> +             * if host does not support reliable KVM_GET_CLOCK,
> +             * read kvmclock value from memory
> +             */
> +            if (!kvm_has_adjust_clock_stable()) {
> +                time_at_migration = kvmclock_current_nsec(s);
> +            }
> +        /* migration/savevm/init restore */
> +        } else {
> +            /*
> +             * use s->clock in case machine uses reliable
> +             * get clock and host where vm was executing
> +             * supported reliable get clock
> +             */

This comment is just weird.  Simplifying
    /* If A and B do C */
    if (!A and || !B) {
       then D();
    }

Doing the opposite comment?

Migration code looks rigth.

Once said that, I continue hating clocks.

Later, Juan.

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-14 13:54     ` [Qemu-devel] " Paolo Bonzini
@ 2016-11-14 14:11       ` Juan Quintela
  -1 siblings, 0 replies; 46+ messages in thread
From: Juan Quintela @ 2016-11-14 14:11 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Marcelo Tosatti, kvm, qemu-devel, Dr. David Alan Gilbert,
	Radim Krcmar, Eduardo Habkost

Paolo Bonzini <pbonzini@redhat.com> wrote:
> On 14/11/2016 13:36, Marcelo Tosatti wrote:
>> +        /* local (running VM) restore */
>> +        if (s->clock_valid) {
>> +            /*
>> +             * if host does not support reliable KVM_GET_CLOCK,
>> +             * read kvmclock value from memory
>> +             */
>> +            if (!kvm_has_adjust_clock_stable()) {
>> +                time_at_migration = kvmclock_current_nsec(s);
>
> Just assign to s->clock here...

Agreed, I just wanted to make so many changes.


> Also, just another small nit: please make your scripts use the "-p"
> option on diff. :)

YESSSS

I was searching what functions the code belonged for :p

Thanks, Juan.

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-14 14:11       ` Juan Quintela
  0 siblings, 0 replies; 46+ messages in thread
From: Juan Quintela @ 2016-11-14 14:11 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Marcelo Tosatti, kvm, qemu-devel, Dr. David Alan Gilbert,
	Radim Krcmar, Eduardo Habkost

Paolo Bonzini <pbonzini@redhat.com> wrote:
> On 14/11/2016 13:36, Marcelo Tosatti wrote:
>> +        /* local (running VM) restore */
>> +        if (s->clock_valid) {
>> +            /*
>> +             * if host does not support reliable KVM_GET_CLOCK,
>> +             * read kvmclock value from memory
>> +             */
>> +            if (!kvm_has_adjust_clock_stable()) {
>> +                time_at_migration = kvmclock_current_nsec(s);
>
> Just assign to s->clock here...

Agreed, I just wanted to make so many changes.


> Also, just another small nit: please make your scripts use the "-p"
> option on diff. :)

YESSSS

I was searching what functions the code belonged for :p

Thanks, Juan.

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-14 14:00       ` [Qemu-devel] " Marcelo Tosatti
@ 2016-11-14 14:22         ` Paolo Bonzini
  -1 siblings, 0 replies; 46+ messages in thread
From: Paolo Bonzini @ 2016-11-14 14:22 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost



On 14/11/2016 15:00, Marcelo Tosatti wrote:
> On Mon, Nov 14, 2016 at 02:54:38PM +0100, Paolo Bonzini wrote:
>>
>>
>> On 14/11/2016 13:36, Marcelo Tosatti wrote:
>>> +        /* local (running VM) restore */
>>> +        if (s->clock_valid) {
>>> +            /*
>>> +             * if host does not support reliable KVM_GET_CLOCK,
>>> +             * read kvmclock value from memory
>>> +             */
>>> +            if (!kvm_has_adjust_clock_stable()) {
>>> +                time_at_migration = kvmclock_current_nsec(s);
>>
>> Just assign to s->clock here...
> 
> If kvmclock is not enabled, you want to use s->clock,
> rather than 0.
> 
>>> +            }
>>> +        /* migration/savevm/init restore */
>>> +        } else {
>>> +            /*
>>> +             * use s->clock in case machine uses reliable
>>> +             * get clock and host where vm was executing
>>> +             * supported reliable get clock
>>> +             */
>>> +            if (!s->mach_use_reliable_get_clock ||
>>> +                !s->src_use_reliable_get_clock) {
>>> +                time_at_migration = kvmclock_current_nsec(s);
>>
>> ... and here, so that time_at_migration is not needed anymore.
> 
> Same as above.

You're right.

>> Also here it's enough to look at s->src_user_reliable_get_clock, because
>> if s->mach_use_reliable_get_clock is false,
>> s->src_use_reliable_get_clock will be false as well.
> 
> Yes, but i like the code annotation.

Ah, I think we're looking at it differently.

I'm thinking "mach_use_reliable_get_clock is just for migration,
src_use_reliable_get_clock is the state".  Perhaps you're thinking of
enabling/disabling the whole new code for old machines?  What is the
advantage?

>>> +            }
>>> +        }
>>>  
>>> -        /* We can't rely on the migrated clock value, just discard it */
>>> +        /* We can't rely on the saved clock value, just discard it */
>>>          if (time_at_migration) {
>>>              s->clock = time_at_migration;
>>
>> [...]
>>
>>>
>>> +static bool kvmclock_src_use_reliable_get_clock(void *opaque)
>>> +{
>>> +    KVMClockState *s = opaque;
>>> +
>>> +    /*
>>> +     * On machine types that support reliable KVM_GET_CLOCK,
>>> +     * if host kernel does provide reliable KVM_GET_CLOCK,
>>> +     * set src_use_reliable_get_clock=true so that destination
>>> +     * avoids reading kvmclock from memory.
>>> +     */
>>> +    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
>>> +        s->src_use_reliable_get_clock = true;
>>> +    }
>>> +
>>> +    return s->src_use_reliable_get_clock;
>>> +}
>>
>> Here you can just return s->mach_use_reliable_get_clock. 
> 
> mach_use_reliable_get_clock can be true but host might not support it.

Yes, but the "needed" function is only required to avoid breaking
pc-i440fx-2.7 and earlier.  If you return true here, you can still
migrate a "false" value for src_use_reliable_get_clock.

>>  To set
>> s->src_use_reliable_get_clock, after issuing KVM_GET_CLOCK you can look
>> at the KVM_CLOCK_TSC_STABLE bit in the kvm_clock struct's flags.
> 
> KVM_CLOCK_TSC_STABLE bit in the kvmclock structure != 
> KVM_GET_CLOCK returns reliable value, right?

It is the same as "is using masterclock", which is actually a stricter
condition than the KVM_CHECK_EXTENSION return value.  The right check to
use is whether masterclock is in use, and then the idea is to treat
clock,src_use_reliable_get_clock as one tuple that is updated atomically.

Paolo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-14 14:22         ` Paolo Bonzini
  0 siblings, 0 replies; 46+ messages in thread
From: Paolo Bonzini @ 2016-11-14 14:22 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost



On 14/11/2016 15:00, Marcelo Tosatti wrote:
> On Mon, Nov 14, 2016 at 02:54:38PM +0100, Paolo Bonzini wrote:
>>
>>
>> On 14/11/2016 13:36, Marcelo Tosatti wrote:
>>> +        /* local (running VM) restore */
>>> +        if (s->clock_valid) {
>>> +            /*
>>> +             * if host does not support reliable KVM_GET_CLOCK,
>>> +             * read kvmclock value from memory
>>> +             */
>>> +            if (!kvm_has_adjust_clock_stable()) {
>>> +                time_at_migration = kvmclock_current_nsec(s);
>>
>> Just assign to s->clock here...
> 
> If kvmclock is not enabled, you want to use s->clock,
> rather than 0.
> 
>>> +            }
>>> +        /* migration/savevm/init restore */
>>> +        } else {
>>> +            /*
>>> +             * use s->clock in case machine uses reliable
>>> +             * get clock and host where vm was executing
>>> +             * supported reliable get clock
>>> +             */
>>> +            if (!s->mach_use_reliable_get_clock ||
>>> +                !s->src_use_reliable_get_clock) {
>>> +                time_at_migration = kvmclock_current_nsec(s);
>>
>> ... and here, so that time_at_migration is not needed anymore.
> 
> Same as above.

You're right.

>> Also here it's enough to look at s->src_user_reliable_get_clock, because
>> if s->mach_use_reliable_get_clock is false,
>> s->src_use_reliable_get_clock will be false as well.
> 
> Yes, but i like the code annotation.

Ah, I think we're looking at it differently.

I'm thinking "mach_use_reliable_get_clock is just for migration,
src_use_reliable_get_clock is the state".  Perhaps you're thinking of
enabling/disabling the whole new code for old machines?  What is the
advantage?

>>> +            }
>>> +        }
>>>  
>>> -        /* We can't rely on the migrated clock value, just discard it */
>>> +        /* We can't rely on the saved clock value, just discard it */
>>>          if (time_at_migration) {
>>>              s->clock = time_at_migration;
>>
>> [...]
>>
>>>
>>> +static bool kvmclock_src_use_reliable_get_clock(void *opaque)
>>> +{
>>> +    KVMClockState *s = opaque;
>>> +
>>> +    /*
>>> +     * On machine types that support reliable KVM_GET_CLOCK,
>>> +     * if host kernel does provide reliable KVM_GET_CLOCK,
>>> +     * set src_use_reliable_get_clock=true so that destination
>>> +     * avoids reading kvmclock from memory.
>>> +     */
>>> +    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
>>> +        s->src_use_reliable_get_clock = true;
>>> +    }
>>> +
>>> +    return s->src_use_reliable_get_clock;
>>> +}
>>
>> Here you can just return s->mach_use_reliable_get_clock. 
> 
> mach_use_reliable_get_clock can be true but host might not support it.

Yes, but the "needed" function is only required to avoid breaking
pc-i440fx-2.7 and earlier.  If you return true here, you can still
migrate a "false" value for src_use_reliable_get_clock.

>>  To set
>> s->src_use_reliable_get_clock, after issuing KVM_GET_CLOCK you can look
>> at the KVM_CLOCK_TSC_STABLE bit in the kvm_clock struct's flags.
> 
> KVM_CLOCK_TSC_STABLE bit in the kvmclock structure != 
> KVM_GET_CLOCK returns reliable value, right?

It is the same as "is using masterclock", which is actually a stricter
condition than the KVM_CHECK_EXTENSION return value.  The right check to
use is whether masterclock is in use, and then the idea is to treat
clock,src_use_reliable_get_clock as one tuple that is updated atomically.

Paolo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-14 14:22         ` [Qemu-devel] " Paolo Bonzini
@ 2016-11-14 14:50           ` Marcelo Tosatti
  -1 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-14 14:50 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost

On Mon, Nov 14, 2016 at 03:22:02PM +0100, Paolo Bonzini wrote:
> 
> 
> On 14/11/2016 15:00, Marcelo Tosatti wrote:
> > On Mon, Nov 14, 2016 at 02:54:38PM +0100, Paolo Bonzini wrote:
> >>
> >>
> >> On 14/11/2016 13:36, Marcelo Tosatti wrote:
> >>> +        /* local (running VM) restore */
> >>> +        if (s->clock_valid) {
> >>> +            /*
> >>> +             * if host does not support reliable KVM_GET_CLOCK,
> >>> +             * read kvmclock value from memory
> >>> +             */
> >>> +            if (!kvm_has_adjust_clock_stable()) {
> >>> +                time_at_migration = kvmclock_current_nsec(s);
> >>
> >> Just assign to s->clock here...
> > 
> > If kvmclock is not enabled, you want to use s->clock,
> > rather than 0.
> > 
> >>> +            }
> >>> +        /* migration/savevm/init restore */
> >>> +        } else {
> >>> +            /*
> >>> +             * use s->clock in case machine uses reliable
> >>> +             * get clock and host where vm was executing
> >>> +             * supported reliable get clock
> >>> +             */
> >>> +            if (!s->mach_use_reliable_get_clock ||
> >>> +                !s->src_use_reliable_get_clock) {
> >>> +                time_at_migration = kvmclock_current_nsec(s);
> >>
> >> ... and here, so that time_at_migration is not needed anymore.
> > 
> > Same as above.
> 
> You're right.
> 
> >> Also here it's enough to look at s->src_user_reliable_get_clock, because
> >> if s->mach_use_reliable_get_clock is false,
> >> s->src_use_reliable_get_clock will be false as well.
> > 
> > Yes, but i like the code annotation.
> 
> Ah, I think we're looking at it differently.

Well, i didnt want to mix the meaning of the variables:

+    /* whether machine supports reliable KVM_GET_CLOCK */
+    bool mach_use_reliable_get_clock;
+
+    /* whether source host supported reliable KVM_GET_CLOCK */
+    bool src_use_reliable_get_clock;

See the comments on top (later if you look at the variable, 
then have to think: well it has one name, but its disabled 
by that other path as well, so its more than its 
name,etc...).

> I'm thinking "mach_use_reliable_get_clock is just for migration,

Thats whether the machine supports it. New machines have it enabled,
olders don't.
> src_use_reliable_get_clock is the state". 

Thats whether the migration source supported it.

> Perhaps you're thinking of
> enabling/disabling the whole new code for old machines? 

source          destination         behaviour
supports        supports            on migration use s->clock,
                                    on stop/cont as well

supports        ~supports
                                    on migration use s->clock,
                                    on stop/cont read from guest mem

~support        supports            on migration read from guest,
                                    on stop/cont use
                                    kvm_get_clock/kvm_set_clock

~support        ~support            always read from guest memory


Thats what should happen (and thats what the patch should implement).


"support" means host supports your new KVM_GET_CLOCK/KVM_SET_CLOCK.

>  What is the
> advantage?

Well its necessary to use the correct thing, otherwise
you see a time backwards event.

> 
> >>> +            }
> >>> +        }
> >>>  
> >>> -        /* We can't rely on the migrated clock value, just discard it */
> >>> +        /* We can't rely on the saved clock value, just discard it */
> >>>          if (time_at_migration) {
> >>>              s->clock = time_at_migration;
> >>
> >> [...]
> >>
> >>>
> >>> +static bool kvmclock_src_use_reliable_get_clock(void *opaque)
> >>> +{
> >>> +    KVMClockState *s = opaque;
> >>> +
> >>> +    /*
> >>> +     * On machine types that support reliable KVM_GET_CLOCK,
> >>> +     * if host kernel does provide reliable KVM_GET_CLOCK,
> >>> +     * set src_use_reliable_get_clock=true so that destination
> >>> +     * avoids reading kvmclock from memory.
> >>> +     */
> >>> +    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
> >>> +        s->src_use_reliable_get_clock = true;
> >>> +    }
> >>> +
> >>> +    return s->src_use_reliable_get_clock;
> >>> +}
> >>
> >> Here you can just return s->mach_use_reliable_get_clock. 
> > 
> > mach_use_reliable_get_clock can be true but host might not support it.
> 
> Yes, but the "needed" function is only required to avoid breaking
> pc-i440fx-2.7 and earlier. 

"needed" is required so that the migration between:

SRC             DEST                BEHAVIOUR
~support        supports            on migration read from guest,
                                    on stop/cont use
                                    kvm_get_clock/kvm_set_clock

Destination does not use KVM_GET_CLOCK value (which is
broken and should not be used).

> If you return true here, you can still
> migrate a "false" value for src_use_reliable_get_clock.

But the source only uses a reliable KVM_GET_CLOCK if 
both conditions are true.

And the subsection is only needed if the source
uses a reliable KVM_GET_CLOCK.

> >>  To set
> >> s->src_use_reliable_get_clock, after issuing KVM_GET_CLOCK you can look
> >> at the KVM_CLOCK_TSC_STABLE bit in the kvm_clock struct's flags.
> > 
> > KVM_CLOCK_TSC_STABLE bit in the kvmclock structure != 
> > KVM_GET_CLOCK returns reliable value, right?
> 
> It is the same as "is using masterclock", which is actually a stricter
> condition than the KVM_CHECK_EXTENSION return value.  The right check to
> use is whether masterclock is in use, 

Actually its "has a reliable KVM_GET_CLOCK" (which returns 
get_kernel_clock() + (rdtsc() - tsc_timestamp), 

"broken KVM_GET_CLOCK" =  get_kernel_clock()

> and then the idea is to treat
> clock,src_use_reliable_get_clock as one tuple that is updated atomically.
> 
> Paolo

Hum, not sure i get this...


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-14 14:50           ` Marcelo Tosatti
  0 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-14 14:50 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost

On Mon, Nov 14, 2016 at 03:22:02PM +0100, Paolo Bonzini wrote:
> 
> 
> On 14/11/2016 15:00, Marcelo Tosatti wrote:
> > On Mon, Nov 14, 2016 at 02:54:38PM +0100, Paolo Bonzini wrote:
> >>
> >>
> >> On 14/11/2016 13:36, Marcelo Tosatti wrote:
> >>> +        /* local (running VM) restore */
> >>> +        if (s->clock_valid) {
> >>> +            /*
> >>> +             * if host does not support reliable KVM_GET_CLOCK,
> >>> +             * read kvmclock value from memory
> >>> +             */
> >>> +            if (!kvm_has_adjust_clock_stable()) {
> >>> +                time_at_migration = kvmclock_current_nsec(s);
> >>
> >> Just assign to s->clock here...
> > 
> > If kvmclock is not enabled, you want to use s->clock,
> > rather than 0.
> > 
> >>> +            }
> >>> +        /* migration/savevm/init restore */
> >>> +        } else {
> >>> +            /*
> >>> +             * use s->clock in case machine uses reliable
> >>> +             * get clock and host where vm was executing
> >>> +             * supported reliable get clock
> >>> +             */
> >>> +            if (!s->mach_use_reliable_get_clock ||
> >>> +                !s->src_use_reliable_get_clock) {
> >>> +                time_at_migration = kvmclock_current_nsec(s);
> >>
> >> ... and here, so that time_at_migration is not needed anymore.
> > 
> > Same as above.
> 
> You're right.
> 
> >> Also here it's enough to look at s->src_user_reliable_get_clock, because
> >> if s->mach_use_reliable_get_clock is false,
> >> s->src_use_reliable_get_clock will be false as well.
> > 
> > Yes, but i like the code annotation.
> 
> Ah, I think we're looking at it differently.

Well, i didnt want to mix the meaning of the variables:

+    /* whether machine supports reliable KVM_GET_CLOCK */
+    bool mach_use_reliable_get_clock;
+
+    /* whether source host supported reliable KVM_GET_CLOCK */
+    bool src_use_reliable_get_clock;

See the comments on top (later if you look at the variable, 
then have to think: well it has one name, but its disabled 
by that other path as well, so its more than its 
name,etc...).

> I'm thinking "mach_use_reliable_get_clock is just for migration,

Thats whether the machine supports it. New machines have it enabled,
olders don't.
> src_use_reliable_get_clock is the state". 

Thats whether the migration source supported it.

> Perhaps you're thinking of
> enabling/disabling the whole new code for old machines? 

source          destination         behaviour
supports        supports            on migration use s->clock,
                                    on stop/cont as well

supports        ~supports
                                    on migration use s->clock,
                                    on stop/cont read from guest mem

~support        supports            on migration read from guest,
                                    on stop/cont use
                                    kvm_get_clock/kvm_set_clock

~support        ~support            always read from guest memory


Thats what should happen (and thats what the patch should implement).


"support" means host supports your new KVM_GET_CLOCK/KVM_SET_CLOCK.

>  What is the
> advantage?

Well its necessary to use the correct thing, otherwise
you see a time backwards event.

> 
> >>> +            }
> >>> +        }
> >>>  
> >>> -        /* We can't rely on the migrated clock value, just discard it */
> >>> +        /* We can't rely on the saved clock value, just discard it */
> >>>          if (time_at_migration) {
> >>>              s->clock = time_at_migration;
> >>
> >> [...]
> >>
> >>>
> >>> +static bool kvmclock_src_use_reliable_get_clock(void *opaque)
> >>> +{
> >>> +    KVMClockState *s = opaque;
> >>> +
> >>> +    /*
> >>> +     * On machine types that support reliable KVM_GET_CLOCK,
> >>> +     * if host kernel does provide reliable KVM_GET_CLOCK,
> >>> +     * set src_use_reliable_get_clock=true so that destination
> >>> +     * avoids reading kvmclock from memory.
> >>> +     */
> >>> +    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
> >>> +        s->src_use_reliable_get_clock = true;
> >>> +    }
> >>> +
> >>> +    return s->src_use_reliable_get_clock;
> >>> +}
> >>
> >> Here you can just return s->mach_use_reliable_get_clock. 
> > 
> > mach_use_reliable_get_clock can be true but host might not support it.
> 
> Yes, but the "needed" function is only required to avoid breaking
> pc-i440fx-2.7 and earlier. 

"needed" is required so that the migration between:

SRC             DEST                BEHAVIOUR
~support        supports            on migration read from guest,
                                    on stop/cont use
                                    kvm_get_clock/kvm_set_clock

Destination does not use KVM_GET_CLOCK value (which is
broken and should not be used).

> If you return true here, you can still
> migrate a "false" value for src_use_reliable_get_clock.

But the source only uses a reliable KVM_GET_CLOCK if 
both conditions are true.

And the subsection is only needed if the source
uses a reliable KVM_GET_CLOCK.

> >>  To set
> >> s->src_use_reliable_get_clock, after issuing KVM_GET_CLOCK you can look
> >> at the KVM_CLOCK_TSC_STABLE bit in the kvm_clock struct's flags.
> > 
> > KVM_CLOCK_TSC_STABLE bit in the kvmclock structure != 
> > KVM_GET_CLOCK returns reliable value, right?
> 
> It is the same as "is using masterclock", which is actually a stricter
> condition than the KVM_CHECK_EXTENSION return value.  The right check to
> use is whether masterclock is in use, 

Actually its "has a reliable KVM_GET_CLOCK" (which returns 
get_kernel_clock() + (rdtsc() - tsc_timestamp), 

"broken KVM_GET_CLOCK" =  get_kernel_clock()

> and then the idea is to treat
> clock,src_use_reliable_get_clock as one tuple that is updated atomically.
> 
> Paolo

Hum, not sure i get this...

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-14 14:50           ` [Qemu-devel] " Marcelo Tosatti
@ 2016-11-14 15:00             ` Paolo Bonzini
  -1 siblings, 0 replies; 46+ messages in thread
From: Paolo Bonzini @ 2016-11-14 15:00 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: Eduardo Habkost, kvm, Juan Quintela, Radim Krcmar, qemu-devel,
	Dr. David Alan Gilbert



On 14/11/2016 15:50, Marcelo Tosatti wrote:
> Well, i didnt want to mix the meaning of the variables:
> 
> +    /* whether machine supports reliable KVM_GET_CLOCK */
> +    bool mach_use_reliable_get_clock;
> +
> +    /* whether source host supported reliable KVM_GET_CLOCK */
> +    bool src_use_reliable_get_clock;
> 
> See the comments on top (later if you look at the variable, 
> then have to think: well it has one name, but its disabled 
> by that other path as well, so its more than its 
> name,etc...).
> 
>> I'm thinking "mach_use_reliable_get_clock is just for migration,
> 
> Thats whether the machine supports it. New machines have it enabled,
> olders don't.

Yes.

>> src_use_reliable_get_clock is the state". 
> 
> Thats whether the migration source supported it.

But it's not used only for migration.  It's used on every vmstate change
(running->stop and stop->running, isn't it?  I think that, apart from
the migration case, it's better to use s->clock if kvmclock is stable,
even on older machine types.

>>>>> +static bool kvmclock_src_use_reliable_get_clock(void *opaque)
>>>>> +{
>>>>> +    KVMClockState *s = opaque;
>>>>> +
>>>>> +    /*
>>>>> +     * On machine types that support reliable KVM_GET_CLOCK,
>>>>> +     * if host kernel does provide reliable KVM_GET_CLOCK,
>>>>> +     * set src_use_reliable_get_clock=true so that destination
>>>>> +     * avoids reading kvmclock from memory.
>>>>> +     */
>>>>> +    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
>>>>> +        s->src_use_reliable_get_clock = true;
>>>>> +    }
>>>>> +
>>>>> +    return s->src_use_reliable_get_clock;
>>>>> +}
>>>>
>>>> Here you can just return s->mach_use_reliable_get_clock. 
>>>
>>> mach_use_reliable_get_clock can be true but host might not support it.
>>
>> Yes, but the "needed" function is only required to avoid breaking
>> pc-i440fx-2.7 and earlier. 
> 
> "needed" is required so that the migration between:
> 
> SRC             DEST                BEHAVIOUR
> ~support        supports            on migration read from guest,
>                                     on stop/cont use
>                                     kvm_get_clock/kvm_set_clock
> 
> Destination does not use KVM_GET_CLOCK value (which is
> broken and should not be used).

If needed returns false, the destination will see
src_use_reliable_get_clock = false anyway.

If needed returns true, the destination can still see
src_use_reliable_get_clock = false if that's the value.

needed	src_use_reliable_get_clock	effect
false	false				use kvmclock_current_nsec
false	true				use kvmclock_current_nsec
true	false				use kvmclock_current_nsec
true	true				use s->clock


So the idea is:

- set s->clock and s->reliable_get_clock on every KVM_GET_CLOCK

- on migration source, use subsections and
s->mach_use_reliable_get_clock to avoid breaking migration on pre-2.8
machine types

- on migration destination, use .pre_load so that s->reliable_get_clock
is initialized to false on older machine types

>> If you return true here, you can still
>> migrate a "false" value for src_use_reliable_get_clock.
> 
> But the source only uses a reliable KVM_GET_CLOCK if 
> both conditions are true.
> 
> And the subsection is only needed if the source
> uses a reliable KVM_GET_CLOCK.

Yes, but the point of the subsection is just to avoid breaking migration
format.

>> It is the same as "is using masterclock", which is actually a stricter
>> condition than the KVM_CHECK_EXTENSION return value.  The right check to
>> use is whether masterclock is in use, 
> 
> Actually its "has a reliable KVM_GET_CLOCK" (which returns 
> get_kernel_clock() + (rdtsc() - tsc_timestamp), 
> 
> "broken KVM_GET_CLOCK" =  get_kernel_clock()

Yes.  And these end up being the same.

Paolo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-14 15:00             ` Paolo Bonzini
  0 siblings, 0 replies; 46+ messages in thread
From: Paolo Bonzini @ 2016-11-14 15:00 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost



On 14/11/2016 15:50, Marcelo Tosatti wrote:
> Well, i didnt want to mix the meaning of the variables:
> 
> +    /* whether machine supports reliable KVM_GET_CLOCK */
> +    bool mach_use_reliable_get_clock;
> +
> +    /* whether source host supported reliable KVM_GET_CLOCK */
> +    bool src_use_reliable_get_clock;
> 
> See the comments on top (later if you look at the variable, 
> then have to think: well it has one name, but its disabled 
> by that other path as well, so its more than its 
> name,etc...).
> 
>> I'm thinking "mach_use_reliable_get_clock is just for migration,
> 
> Thats whether the machine supports it. New machines have it enabled,
> olders don't.

Yes.

>> src_use_reliable_get_clock is the state". 
> 
> Thats whether the migration source supported it.

But it's not used only for migration.  It's used on every vmstate change
(running->stop and stop->running, isn't it?  I think that, apart from
the migration case, it's better to use s->clock if kvmclock is stable,
even on older machine types.

>>>>> +static bool kvmclock_src_use_reliable_get_clock(void *opaque)
>>>>> +{
>>>>> +    KVMClockState *s = opaque;
>>>>> +
>>>>> +    /*
>>>>> +     * On machine types that support reliable KVM_GET_CLOCK,
>>>>> +     * if host kernel does provide reliable KVM_GET_CLOCK,
>>>>> +     * set src_use_reliable_get_clock=true so that destination
>>>>> +     * avoids reading kvmclock from memory.
>>>>> +     */
>>>>> +    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
>>>>> +        s->src_use_reliable_get_clock = true;
>>>>> +    }
>>>>> +
>>>>> +    return s->src_use_reliable_get_clock;
>>>>> +}
>>>>
>>>> Here you can just return s->mach_use_reliable_get_clock. 
>>>
>>> mach_use_reliable_get_clock can be true but host might not support it.
>>
>> Yes, but the "needed" function is only required to avoid breaking
>> pc-i440fx-2.7 and earlier. 
> 
> "needed" is required so that the migration between:
> 
> SRC             DEST                BEHAVIOUR
> ~support        supports            on migration read from guest,
>                                     on stop/cont use
>                                     kvm_get_clock/kvm_set_clock
> 
> Destination does not use KVM_GET_CLOCK value (which is
> broken and should not be used).

If needed returns false, the destination will see
src_use_reliable_get_clock = false anyway.

If needed returns true, the destination can still see
src_use_reliable_get_clock = false if that's the value.

needed	src_use_reliable_get_clock	effect
false	false				use kvmclock_current_nsec
false	true				use kvmclock_current_nsec
true	false				use kvmclock_current_nsec
true	true				use s->clock


So the idea is:

- set s->clock and s->reliable_get_clock on every KVM_GET_CLOCK

- on migration source, use subsections and
s->mach_use_reliable_get_clock to avoid breaking migration on pre-2.8
machine types

- on migration destination, use .pre_load so that s->reliable_get_clock
is initialized to false on older machine types

>> If you return true here, you can still
>> migrate a "false" value for src_use_reliable_get_clock.
> 
> But the source only uses a reliable KVM_GET_CLOCK if 
> both conditions are true.
> 
> And the subsection is only needed if the source
> uses a reliable KVM_GET_CLOCK.

Yes, but the point of the subsection is just to avoid breaking migration
format.

>> It is the same as "is using masterclock", which is actually a stricter
>> condition than the KVM_CHECK_EXTENSION return value.  The right check to
>> use is whether masterclock is in use, 
> 
> Actually its "has a reliable KVM_GET_CLOCK" (which returns 
> get_kernel_clock() + (rdtsc() - tsc_timestamp), 
> 
> "broken KVM_GET_CLOCK" =  get_kernel_clock()

Yes.  And these end up being the same.

Paolo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-14 14:09     ` [Qemu-devel] " Juan Quintela
@ 2016-11-14 15:37       ` Marcelo Tosatti
  -1 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-14 15:37 UTC (permalink / raw)
  To: Juan Quintela
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Paolo Bonzini,
	Radim Krcmar, Eduardo Habkost

On Mon, Nov 14, 2016 at 03:09:46PM +0100, Juan Quintela wrote:
> Marcelo Tosatti <mtosatti@redhat.com> wrote:
> > Check for KVM_CAP_ADJUST_CLOCK capability KVM_CLOCK_TSC_STABLE, which
> > indicates that KVM_GET_CLOCK returns a value as seen by the guest at
> > that moment.
> >
> > For new machine types, use this value rather than reading 
> > from guest memory.
> >
> > This reduces kvmclock difference on migration from 5s to 0.1s
> > (when max_downtime == 5s).
> >
> > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> Acked-by: Juan Quintela <quintela@redhat.com>
> 
> But, if you have to respin it ....
> 
> 
> > +    /* whether machine supports reliable KVM_GET_CLOCK */
> > +    bool mach_use_reliable_get_clock;
> > +
> > +    /* whether source host supported reliable KVM_GET_CLOCK */
> > +    bool src_use_reliable_get_clock;
> 
> This two names are really long, but I don't have better suggesitons :-()
> >      if (running) {
> >          struct kvm_clock_data data = {};
> > -        uint64_t time_at_migration = kvmclock_current_nsec(s);
> > +        uint64_t time_at_migration = 0;
> 
> This was not "time_at_migration", it was not already before, but just
> now it looks really weird. (as it was already faulty, this is why it is
> only a suggestion.)
> 
> >  
> > -        s->clock_valid = false;
> > +        /* local (running VM) restore */
> > +        if (s->clock_valid) {
> > +            /*
> > +             * if host does not support reliable KVM_GET_CLOCK,
> > +             * read kvmclock value from memory
> > +             */
> > +            if (!kvm_has_adjust_clock_stable()) {
> > +                time_at_migration = kvmclock_current_nsec(s);
> > +            }
> > +        /* migration/savevm/init restore */
> > +        } else {
> > +            /*
> > +             * use s->clock in case machine uses reliable
> > +             * get clock and host where vm was executing
> > +             * supported reliable get clock
> > +             */
> 
> This comment is just weird.  Simplifying
>     /* If A and B do C */
>     if (!A and || !B) {
>        then D();
>     }
> 
> Doing the opposite comment?
> 
> Migration code looks rigth.

Fixed those.

> Once said that, I continue hating clocks.

Me too, especially the biological one.


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-14 15:37       ` Marcelo Tosatti
  0 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-14 15:37 UTC (permalink / raw)
  To: Juan Quintela
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Paolo Bonzini,
	Radim Krcmar, Eduardo Habkost

On Mon, Nov 14, 2016 at 03:09:46PM +0100, Juan Quintela wrote:
> Marcelo Tosatti <mtosatti@redhat.com> wrote:
> > Check for KVM_CAP_ADJUST_CLOCK capability KVM_CLOCK_TSC_STABLE, which
> > indicates that KVM_GET_CLOCK returns a value as seen by the guest at
> > that moment.
> >
> > For new machine types, use this value rather than reading 
> > from guest memory.
> >
> > This reduces kvmclock difference on migration from 5s to 0.1s
> > (when max_downtime == 5s).
> >
> > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> Acked-by: Juan Quintela <quintela@redhat.com>
> 
> But, if you have to respin it ....
> 
> 
> > +    /* whether machine supports reliable KVM_GET_CLOCK */
> > +    bool mach_use_reliable_get_clock;
> > +
> > +    /* whether source host supported reliable KVM_GET_CLOCK */
> > +    bool src_use_reliable_get_clock;
> 
> This two names are really long, but I don't have better suggesitons :-()
> >      if (running) {
> >          struct kvm_clock_data data = {};
> > -        uint64_t time_at_migration = kvmclock_current_nsec(s);
> > +        uint64_t time_at_migration = 0;
> 
> This was not "time_at_migration", it was not already before, but just
> now it looks really weird. (as it was already faulty, this is why it is
> only a suggestion.)
> 
> >  
> > -        s->clock_valid = false;
> > +        /* local (running VM) restore */
> > +        if (s->clock_valid) {
> > +            /*
> > +             * if host does not support reliable KVM_GET_CLOCK,
> > +             * read kvmclock value from memory
> > +             */
> > +            if (!kvm_has_adjust_clock_stable()) {
> > +                time_at_migration = kvmclock_current_nsec(s);
> > +            }
> > +        /* migration/savevm/init restore */
> > +        } else {
> > +            /*
> > +             * use s->clock in case machine uses reliable
> > +             * get clock and host where vm was executing
> > +             * supported reliable get clock
> > +             */
> 
> This comment is just weird.  Simplifying
>     /* If A and B do C */
>     if (!A and || !B) {
>        then D();
>     }
> 
> Doing the opposite comment?
> 
> Migration code looks rigth.

Fixed those.

> Once said that, I continue hating clocks.

Me too, especially the biological one.

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-14 15:00             ` [Qemu-devel] " Paolo Bonzini
@ 2016-11-14 15:40               ` Marcelo Tosatti
  -1 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-14 15:40 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost

On Mon, Nov 14, 2016 at 04:00:38PM +0100, Paolo Bonzini wrote:
> 
> 
> On 14/11/2016 15:50, Marcelo Tosatti wrote:
> > Well, i didnt want to mix the meaning of the variables:
> > 
> > +    /* whether machine supports reliable KVM_GET_CLOCK */
> > +    bool mach_use_reliable_get_clock;
> > +
> > +    /* whether source host supported reliable KVM_GET_CLOCK */
> > +    bool src_use_reliable_get_clock;
> > 
> > See the comments on top (later if you look at the variable, 
> > then have to think: well it has one name, but its disabled 
> > by that other path as well, so its more than its 
> > name,etc...).
> > 
> >> I'm thinking "mach_use_reliable_get_clock is just for migration,
> > 
> > Thats whether the machine supports it. New machines have it enabled,
> > olders don't.
> 
> Yes.
> 
> >> src_use_reliable_get_clock is the state". 
> > 
> > Thats whether the migration source supported it.
> 
> But it's not used only for migration.  It's used on every vmstate change
> (running->stop and stop->running, isn't it?  

No. Its used only if s->clock_valid == false, which means either:

migration/savevm/init.

Now for savevm there is a bug: 

> I think that, apart from
> the migration case, it's better to use s->clock if kvmclock is stable,
> even on older machine types.

Yes, its already doing that:

        /* local (running VM) restore */
        if (s->clock_valid) {
            /*
             * if host does not support reliable KVM_GET_CLOCK,
             * read kvmclock value from memory
             */
            if (!kvm_has_adjust_clock_stable()) {
                time_at_migration = kvmclock_current_nsec(s);
            }

It only uses kvmclock_current_nsec() if kvm_has_adjust_clock_stable is 
false.

> >>>>> +static bool kvmclock_src_use_reliable_get_clock(void *opaque)
> >>>>> +{
> >>>>> +    KVMClockState *s = opaque;
> >>>>> +
> >>>>> +    /*
> >>>>> +     * On machine types that support reliable KVM_GET_CLOCK,
> >>>>> +     * if host kernel does provide reliable KVM_GET_CLOCK,
> >>>>> +     * set src_use_reliable_get_clock=true so that destination
> >>>>> +     * avoids reading kvmclock from memory.
> >>>>> +     */
> >>>>> +    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
> >>>>> +        s->src_use_reliable_get_clock = true;
> >>>>> +    }
> >>>>> +
> >>>>> +    return s->src_use_reliable_get_clock;
> >>>>> +}
> >>>>
> >>>> Here you can just return s->mach_use_reliable_get_clock. 
> >>>
> >>> mach_use_reliable_get_clock can be true but host might not support it.
> >>
> >> Yes, but the "needed" function is only required to avoid breaking
> >> pc-i440fx-2.7 and earlier. 
> > 
> > "needed" is required so that the migration between:
> > 
> > SRC             DEST                BEHAVIOUR
> > ~support        supports            on migration read from guest,
> >                                     on stop/cont use
> >                                     kvm_get_clock/kvm_set_clock
> > 
> > Destination does not use KVM_GET_CLOCK value (which is
> > broken and should not be used).
> 
> If needed returns false, the destination will see
> src_use_reliable_get_clock = false anyway.

Causing it to read the clock from memory, thats what 
should happen.

> If needed returns true, the destination can still see
> src_use_reliable_get_clock = false if that's the value.

You are asking me to change from:

+static bool kvmclock_src_use_reliable_get_clock(void *opaque)
+{
+    KVMClockState *s = opaque;
+
+    /*
+     * On machine types that support reliable KVM_GET_CLOCK,
+     * if host kernel does provide reliable KVM_GET_CLOCK,
+     * set src_use_reliable_get_clock=true so that destination
+     * avoids reading kvmclock from memory.
+     */
+    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
+        s->src_use_reliable_get_clock = true;
+    }
+
+    return s->src_use_reliable_get_clock;
+}

to

static bool kvmclock_src_use_reliable_get_clock(void *opaque)
{
    KVMClockState *s = opaque;

    /*
     * On machine types that support reliable KVM_GET_CLOCK,
     * if host kernel does provide reliable KVM_GET_CLOCK,
     * set src_use_reliable_get_clock=true so that destination
     * avoids reading kvmclock from memory.
     */
    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable())
    {
        s->src_use_reliable_get_clock = true;
    }

    return s->mach_use_reliable_get_clock;
}


Ah, OK, done.


> needed	src_use_reliable_get_clock	effect
> false	false				use kvmclock_current_nsec
> false	true				use kvmclock_current_nsec
> true	false				use kvmclock_current_nsec
> true	true				use s->clock
> 
> 
> So the idea is:
> 
> - set s->clock and s->reliable_get_clock on every KVM_GET_CLOCK

Already do that (apart from s->clock_valid which is necessary).

> - on migration source, use subsections and
> s->mach_use_reliable_get_clock to avoid breaking migration on pre-2.8
> machine types

Migration is already broken when you use different machine types.

So s->src_use_reliable_get_clock is only used to indicate 
to the destination that: "you can use KVM_GET_CLOCK value, 
its safe".

> - on migration destination, use .pre_load so that s->reliable_get_clock
> is initialized to false on older machine types

Thats what mach_use_reliable_get_clock does already:

static Property kvmclock_properties[] = {
    DEFINE_PROP_BOOL("mach_use_reliable_get_clock", KVMClockState,
                      mach_use_reliable_get_clock, true),
    DEFINE_PROP_END_OF_LIST(),
};

+        .driver   = "kvmclock",\
+        .property = "mach_use_reliable_get_clock",\
+        .value    = "off",\
+    },\
+    {\

> 
> >> If you return true here, you can still
> >> migrate a "false" value for src_use_reliable_get_clock.
> > 
> > But the source only uses a reliable KVM_GET_CLOCK if 
> > both conditions are true.
> > 
> > And the subsection is only needed if the source
> > uses a reliable KVM_GET_CLOCK.
> 
> Yes, but the point of the subsection is just to avoid breaking migration
> format.

Its a new creative use of the subsection. 

> >> It is the same as "is using masterclock", which is actually a stricter
> >> condition than the KVM_CHECK_EXTENSION return value.  The right check to
> >> use is whether masterclock is in use, 
> > 
> > Actually its "has a reliable KVM_GET_CLOCK" (which returns 
> > get_kernel_clock() + (rdtsc() - tsc_timestamp), 
> > 
> > "broken KVM_GET_CLOCK" =  get_kernel_clock()
> 
> Yes.  And these end up being the same.

No. You can have masterclock enabled, KVM_TSC_STABLE set in pvclock
flags, and unreliable KVM_GET_CLOCK (without your patch to
KVM_GET_CLOCK).


Paolo not sure if there is anything further you want me to 
change at this point ? 


Index: qemu-mig-advance-clock/hw/i386/kvm/clock.c
===================================================================
--- qemu-mig-advance-clock.orig/hw/i386/kvm/clock.c	2016-11-14 10:40:39.748116312 -0200
+++ qemu-mig-advance-clock/hw/i386/kvm/clock.c	2016-11-14 13:38:29.299955042 -0200
@@ -36,6 +36,12 @@
 
     uint64_t clock;
     bool clock_valid;
+
+    /* whether machine supports reliable KVM_GET_CLOCK */
+    bool mach_use_reliable_get_clock;
+
+    /* whether source host supported reliable KVM_GET_CLOCK */
+    bool src_use_reliable_get_clock;
 } KVMClockState;
 
 struct pvclock_vcpu_time_info {
@@ -81,6 +87,19 @@
     return nsec + time.system_time;
 }
 
+static uint64_t kvm_get_clock(void)
+{
+    struct kvm_clock_data data;
+    int ret;
+
+    ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
+    if (ret < 0) {
+        fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
+                abort();
+    }
+    return data.clock;
+}
+
 static void kvmclock_vm_state_change(void *opaque, int running,
                                      RunState state)
 {
@@ -91,15 +110,37 @@
 
     if (running) {
         struct kvm_clock_data data = {};
-        uint64_t time_at_migration = kvmclock_current_nsec(s);
+        uint64_t pvclock_via_mem = 0;
 
-        s->clock_valid = false;
+        /* local (running VM) restore */
+        if (s->clock_valid) {
+            /*
+             * if host does not support reliable KVM_GET_CLOCK,
+             * read kvmclock value from memory
+             */
+            if (!kvm_has_adjust_clock_stable()) {
+                pvclock_via_mem = kvmclock_current_nsec(s);
+            }
+        /* migration/savevm/init restore */
+        } else {
+            /*
+             * use s->clock in case machine uses reliable
+             * get clock and source host supported
+             * reliable get clock
+             */
+            if (!(s->mach_use_reliable_get_clock &&
+                  s->src_use_reliable_get_clock)) {
+                pvclock_via_mem = kvmclock_current_nsec(s);
+            }
+        }
 
-        /* We can't rely on the migrated clock value, just discard it */
-        if (time_at_migration) {
-            s->clock = time_at_migration;
+        /* We can't rely on the saved clock value, just discard it */
+        if (pvclock_via_mem) {
+            s->clock = pvclock_via_mem;
         }
 
+        s->clock_valid = false;
+
         data.clock = s->clock;
         ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
         if (ret < 0) {
@@ -120,8 +161,6 @@
             }
         }
     } else {
-        struct kvm_clock_data data;
-        int ret;
 
         if (s->clock_valid) {
             return;
@@ -129,13 +168,7 @@
 
         kvm_synchronize_all_tsc();
 
-        ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
-        if (ret < 0) {
-            fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
-            abort();
-        }
-        s->clock = data.clock;
-
+        s->clock = kvm_get_clock();
         /*
          * If the VM is stopped, declare the clock state valid to
          * avoid re-reading it on next vmsave (which would return
@@ -152,22 +185,69 @@
     qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
 }
 
+static bool kvmclock_src_use_reliable_get_clock(void *opaque)
+{
+    KVMClockState *s = opaque;
+
+    /*
+     * On machine types that support reliable KVM_GET_CLOCK,
+     * if host kernel does provide reliable KVM_GET_CLOCK,
+     * set src_use_reliable_get_clock=true so that destination
+     * avoids reading kvmclock from memory.
+     */
+    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
+        s->src_use_reliable_get_clock = true;
+    }
+
+    return s->mach_use_reliable_get_clock;
+}
+
+static const VMStateDescription kvmclock_reliable_get_clock = {
+    .name = "kvmclock/src_use_reliable_get_clock",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = kvmclock_src_use_reliable_get_clock,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL(src_use_reliable_get_clock, KVMClockState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void kvmclock_pre_save(void *opaque)
+{
+    KVMClockState *s = opaque;
+
+    s->clock = kvm_get_clock();
+}
+
 static const VMStateDescription kvmclock_vmsd = {
     .name = "kvmclock",
     .version_id = 1,
     .minimum_version_id = 1,
+    .pre_save = kvmclock_pre_save,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(clock, KVMClockState),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &kvmclock_reliable_get_clock,
+        NULL
     }
 };
 
+static Property kvmclock_properties[] = {
+    DEFINE_PROP_BOOL("mach_use_reliable_get_clock", KVMClockState,
+                      mach_use_reliable_get_clock, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void kvmclock_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->realize = kvmclock_realize;
     dc->vmsd = &kvmclock_vmsd;
+    dc->props = kvmclock_properties;
 }
 
 static const TypeInfo kvmclock_info = {
Index: qemu-mig-advance-clock/include/hw/i386/pc.h
===================================================================
--- qemu-mig-advance-clock.orig/include/hw/i386/pc.h	2016-11-14 10:40:39.748116312 -0200
+++ qemu-mig-advance-clock/include/hw/i386/pc.h	2016-11-14 10:40:48.059128649 -0200
@@ -370,6 +370,11 @@
 #define PC_COMPAT_2_7 \
     HW_COMPAT_2_7 \
     {\
+        .driver   = "kvmclock",\
+        .property = "mach_use_reliable_get_clock",\
+        .value    = "off",\
+    },\
+    {\
         .driver   = TYPE_X86_CPU,\
         .property = "l3-cache",\
         .value    = "off",\
Index: qemu-mig-advance-clock/target-i386/kvm.c
===================================================================
--- qemu-mig-advance-clock.orig/target-i386/kvm.c	2016-11-14 10:40:39.750116314 -0200
+++ qemu-mig-advance-clock/target-i386/kvm.c	2016-11-14 10:40:48.061128652 -0200
@@ -117,6 +117,13 @@
     return kvm_check_extension(kvm_state, KVM_CAP_X86_SMM);
 }
 
+bool kvm_has_adjust_clock_stable(void)
+{
+    int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK);
+
+    return (ret == KVM_CLOCK_TSC_STABLE);
+}
+
 bool kvm_allows_irq0_override(void)
 {
     return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();
Index: qemu-mig-advance-clock/target-i386/kvm_i386.h
===================================================================
--- qemu-mig-advance-clock.orig/target-i386/kvm_i386.h	2016-11-14 10:40:39.751116316 -0200
+++ qemu-mig-advance-clock/target-i386/kvm_i386.h	2016-11-14 10:40:48.061128652 -0200
@@ -17,6 +17,7 @@
 
 bool kvm_allows_irq0_override(void);
 bool kvm_has_smm(void);
+bool kvm_has_adjust_clock_stable(void);
 void kvm_synchronize_all_tsc(void);
 void kvm_arch_reset_vcpu(X86CPU *cs);
 void kvm_arch_do_init_vcpu(X86CPU *cs);

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-14 15:40               ` Marcelo Tosatti
  0 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-14 15:40 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost

On Mon, Nov 14, 2016 at 04:00:38PM +0100, Paolo Bonzini wrote:
> 
> 
> On 14/11/2016 15:50, Marcelo Tosatti wrote:
> > Well, i didnt want to mix the meaning of the variables:
> > 
> > +    /* whether machine supports reliable KVM_GET_CLOCK */
> > +    bool mach_use_reliable_get_clock;
> > +
> > +    /* whether source host supported reliable KVM_GET_CLOCK */
> > +    bool src_use_reliable_get_clock;
> > 
> > See the comments on top (later if you look at the variable, 
> > then have to think: well it has one name, but its disabled 
> > by that other path as well, so its more than its 
> > name,etc...).
> > 
> >> I'm thinking "mach_use_reliable_get_clock is just for migration,
> > 
> > Thats whether the machine supports it. New machines have it enabled,
> > olders don't.
> 
> Yes.
> 
> >> src_use_reliable_get_clock is the state". 
> > 
> > Thats whether the migration source supported it.
> 
> But it's not used only for migration.  It's used on every vmstate change
> (running->stop and stop->running, isn't it?  

No. Its used only if s->clock_valid == false, which means either:

migration/savevm/init.

Now for savevm there is a bug: 

> I think that, apart from
> the migration case, it's better to use s->clock if kvmclock is stable,
> even on older machine types.

Yes, its already doing that:

        /* local (running VM) restore */
        if (s->clock_valid) {
            /*
             * if host does not support reliable KVM_GET_CLOCK,
             * read kvmclock value from memory
             */
            if (!kvm_has_adjust_clock_stable()) {
                time_at_migration = kvmclock_current_nsec(s);
            }

It only uses kvmclock_current_nsec() if kvm_has_adjust_clock_stable is 
false.

> >>>>> +static bool kvmclock_src_use_reliable_get_clock(void *opaque)
> >>>>> +{
> >>>>> +    KVMClockState *s = opaque;
> >>>>> +
> >>>>> +    /*
> >>>>> +     * On machine types that support reliable KVM_GET_CLOCK,
> >>>>> +     * if host kernel does provide reliable KVM_GET_CLOCK,
> >>>>> +     * set src_use_reliable_get_clock=true so that destination
> >>>>> +     * avoids reading kvmclock from memory.
> >>>>> +     */
> >>>>> +    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
> >>>>> +        s->src_use_reliable_get_clock = true;
> >>>>> +    }
> >>>>> +
> >>>>> +    return s->src_use_reliable_get_clock;
> >>>>> +}
> >>>>
> >>>> Here you can just return s->mach_use_reliable_get_clock. 
> >>>
> >>> mach_use_reliable_get_clock can be true but host might not support it.
> >>
> >> Yes, but the "needed" function is only required to avoid breaking
> >> pc-i440fx-2.7 and earlier. 
> > 
> > "needed" is required so that the migration between:
> > 
> > SRC             DEST                BEHAVIOUR
> > ~support        supports            on migration read from guest,
> >                                     on stop/cont use
> >                                     kvm_get_clock/kvm_set_clock
> > 
> > Destination does not use KVM_GET_CLOCK value (which is
> > broken and should not be used).
> 
> If needed returns false, the destination will see
> src_use_reliable_get_clock = false anyway.

Causing it to read the clock from memory, thats what 
should happen.

> If needed returns true, the destination can still see
> src_use_reliable_get_clock = false if that's the value.

You are asking me to change from:

+static bool kvmclock_src_use_reliable_get_clock(void *opaque)
+{
+    KVMClockState *s = opaque;
+
+    /*
+     * On machine types that support reliable KVM_GET_CLOCK,
+     * if host kernel does provide reliable KVM_GET_CLOCK,
+     * set src_use_reliable_get_clock=true so that destination
+     * avoids reading kvmclock from memory.
+     */
+    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
+        s->src_use_reliable_get_clock = true;
+    }
+
+    return s->src_use_reliable_get_clock;
+}

to

static bool kvmclock_src_use_reliable_get_clock(void *opaque)
{
    KVMClockState *s = opaque;

    /*
     * On machine types that support reliable KVM_GET_CLOCK,
     * if host kernel does provide reliable KVM_GET_CLOCK,
     * set src_use_reliable_get_clock=true so that destination
     * avoids reading kvmclock from memory.
     */
    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable())
    {
        s->src_use_reliable_get_clock = true;
    }

    return s->mach_use_reliable_get_clock;
}


Ah, OK, done.


> needed	src_use_reliable_get_clock	effect
> false	false				use kvmclock_current_nsec
> false	true				use kvmclock_current_nsec
> true	false				use kvmclock_current_nsec
> true	true				use s->clock
> 
> 
> So the idea is:
> 
> - set s->clock and s->reliable_get_clock on every KVM_GET_CLOCK

Already do that (apart from s->clock_valid which is necessary).

> - on migration source, use subsections and
> s->mach_use_reliable_get_clock to avoid breaking migration on pre-2.8
> machine types

Migration is already broken when you use different machine types.

So s->src_use_reliable_get_clock is only used to indicate 
to the destination that: "you can use KVM_GET_CLOCK value, 
its safe".

> - on migration destination, use .pre_load so that s->reliable_get_clock
> is initialized to false on older machine types

Thats what mach_use_reliable_get_clock does already:

static Property kvmclock_properties[] = {
    DEFINE_PROP_BOOL("mach_use_reliable_get_clock", KVMClockState,
                      mach_use_reliable_get_clock, true),
    DEFINE_PROP_END_OF_LIST(),
};

+        .driver   = "kvmclock",\
+        .property = "mach_use_reliable_get_clock",\
+        .value    = "off",\
+    },\
+    {\

> 
> >> If you return true here, you can still
> >> migrate a "false" value for src_use_reliable_get_clock.
> > 
> > But the source only uses a reliable KVM_GET_CLOCK if 
> > both conditions are true.
> > 
> > And the subsection is only needed if the source
> > uses a reliable KVM_GET_CLOCK.
> 
> Yes, but the point of the subsection is just to avoid breaking migration
> format.

Its a new creative use of the subsection. 

> >> It is the same as "is using masterclock", which is actually a stricter
> >> condition than the KVM_CHECK_EXTENSION return value.  The right check to
> >> use is whether masterclock is in use, 
> > 
> > Actually its "has a reliable KVM_GET_CLOCK" (which returns 
> > get_kernel_clock() + (rdtsc() - tsc_timestamp), 
> > 
> > "broken KVM_GET_CLOCK" =  get_kernel_clock()
> 
> Yes.  And these end up being the same.

No. You can have masterclock enabled, KVM_TSC_STABLE set in pvclock
flags, and unreliable KVM_GET_CLOCK (without your patch to
KVM_GET_CLOCK).


Paolo not sure if there is anything further you want me to 
change at this point ? 


Index: qemu-mig-advance-clock/hw/i386/kvm/clock.c
===================================================================
--- qemu-mig-advance-clock.orig/hw/i386/kvm/clock.c	2016-11-14 10:40:39.748116312 -0200
+++ qemu-mig-advance-clock/hw/i386/kvm/clock.c	2016-11-14 13:38:29.299955042 -0200
@@ -36,6 +36,12 @@
 
     uint64_t clock;
     bool clock_valid;
+
+    /* whether machine supports reliable KVM_GET_CLOCK */
+    bool mach_use_reliable_get_clock;
+
+    /* whether source host supported reliable KVM_GET_CLOCK */
+    bool src_use_reliable_get_clock;
 } KVMClockState;
 
 struct pvclock_vcpu_time_info {
@@ -81,6 +87,19 @@
     return nsec + time.system_time;
 }
 
+static uint64_t kvm_get_clock(void)
+{
+    struct kvm_clock_data data;
+    int ret;
+
+    ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
+    if (ret < 0) {
+        fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
+                abort();
+    }
+    return data.clock;
+}
+
 static void kvmclock_vm_state_change(void *opaque, int running,
                                      RunState state)
 {
@@ -91,15 +110,37 @@
 
     if (running) {
         struct kvm_clock_data data = {};
-        uint64_t time_at_migration = kvmclock_current_nsec(s);
+        uint64_t pvclock_via_mem = 0;
 
-        s->clock_valid = false;
+        /* local (running VM) restore */
+        if (s->clock_valid) {
+            /*
+             * if host does not support reliable KVM_GET_CLOCK,
+             * read kvmclock value from memory
+             */
+            if (!kvm_has_adjust_clock_stable()) {
+                pvclock_via_mem = kvmclock_current_nsec(s);
+            }
+        /* migration/savevm/init restore */
+        } else {
+            /*
+             * use s->clock in case machine uses reliable
+             * get clock and source host supported
+             * reliable get clock
+             */
+            if (!(s->mach_use_reliable_get_clock &&
+                  s->src_use_reliable_get_clock)) {
+                pvclock_via_mem = kvmclock_current_nsec(s);
+            }
+        }
 
-        /* We can't rely on the migrated clock value, just discard it */
-        if (time_at_migration) {
-            s->clock = time_at_migration;
+        /* We can't rely on the saved clock value, just discard it */
+        if (pvclock_via_mem) {
+            s->clock = pvclock_via_mem;
         }
 
+        s->clock_valid = false;
+
         data.clock = s->clock;
         ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
         if (ret < 0) {
@@ -120,8 +161,6 @@
             }
         }
     } else {
-        struct kvm_clock_data data;
-        int ret;
 
         if (s->clock_valid) {
             return;
@@ -129,13 +168,7 @@
 
         kvm_synchronize_all_tsc();
 
-        ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
-        if (ret < 0) {
-            fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
-            abort();
-        }
-        s->clock = data.clock;
-
+        s->clock = kvm_get_clock();
         /*
          * If the VM is stopped, declare the clock state valid to
          * avoid re-reading it on next vmsave (which would return
@@ -152,22 +185,69 @@
     qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
 }
 
+static bool kvmclock_src_use_reliable_get_clock(void *opaque)
+{
+    KVMClockState *s = opaque;
+
+    /*
+     * On machine types that support reliable KVM_GET_CLOCK,
+     * if host kernel does provide reliable KVM_GET_CLOCK,
+     * set src_use_reliable_get_clock=true so that destination
+     * avoids reading kvmclock from memory.
+     */
+    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
+        s->src_use_reliable_get_clock = true;
+    }
+
+    return s->mach_use_reliable_get_clock;
+}
+
+static const VMStateDescription kvmclock_reliable_get_clock = {
+    .name = "kvmclock/src_use_reliable_get_clock",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = kvmclock_src_use_reliable_get_clock,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL(src_use_reliable_get_clock, KVMClockState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void kvmclock_pre_save(void *opaque)
+{
+    KVMClockState *s = opaque;
+
+    s->clock = kvm_get_clock();
+}
+
 static const VMStateDescription kvmclock_vmsd = {
     .name = "kvmclock",
     .version_id = 1,
     .minimum_version_id = 1,
+    .pre_save = kvmclock_pre_save,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(clock, KVMClockState),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &kvmclock_reliable_get_clock,
+        NULL
     }
 };
 
+static Property kvmclock_properties[] = {
+    DEFINE_PROP_BOOL("mach_use_reliable_get_clock", KVMClockState,
+                      mach_use_reliable_get_clock, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void kvmclock_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->realize = kvmclock_realize;
     dc->vmsd = &kvmclock_vmsd;
+    dc->props = kvmclock_properties;
 }
 
 static const TypeInfo kvmclock_info = {
Index: qemu-mig-advance-clock/include/hw/i386/pc.h
===================================================================
--- qemu-mig-advance-clock.orig/include/hw/i386/pc.h	2016-11-14 10:40:39.748116312 -0200
+++ qemu-mig-advance-clock/include/hw/i386/pc.h	2016-11-14 10:40:48.059128649 -0200
@@ -370,6 +370,11 @@
 #define PC_COMPAT_2_7 \
     HW_COMPAT_2_7 \
     {\
+        .driver   = "kvmclock",\
+        .property = "mach_use_reliable_get_clock",\
+        .value    = "off",\
+    },\
+    {\
         .driver   = TYPE_X86_CPU,\
         .property = "l3-cache",\
         .value    = "off",\
Index: qemu-mig-advance-clock/target-i386/kvm.c
===================================================================
--- qemu-mig-advance-clock.orig/target-i386/kvm.c	2016-11-14 10:40:39.750116314 -0200
+++ qemu-mig-advance-clock/target-i386/kvm.c	2016-11-14 10:40:48.061128652 -0200
@@ -117,6 +117,13 @@
     return kvm_check_extension(kvm_state, KVM_CAP_X86_SMM);
 }
 
+bool kvm_has_adjust_clock_stable(void)
+{
+    int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK);
+
+    return (ret == KVM_CLOCK_TSC_STABLE);
+}
+
 bool kvm_allows_irq0_override(void)
 {
     return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();
Index: qemu-mig-advance-clock/target-i386/kvm_i386.h
===================================================================
--- qemu-mig-advance-clock.orig/target-i386/kvm_i386.h	2016-11-14 10:40:39.751116316 -0200
+++ qemu-mig-advance-clock/target-i386/kvm_i386.h	2016-11-14 10:40:48.061128652 -0200
@@ -17,6 +17,7 @@
 
 bool kvm_allows_irq0_override(void);
 bool kvm_has_smm(void);
+bool kvm_has_adjust_clock_stable(void);
 void kvm_synchronize_all_tsc(void);
 void kvm_arch_reset_vcpu(X86CPU *cs);
 void kvm_arch_do_init_vcpu(X86CPU *cs);

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-14 15:40               ` [Qemu-devel] " Marcelo Tosatti
@ 2016-11-14 16:43                 ` Paolo Bonzini
  -1 siblings, 0 replies; 46+ messages in thread
From: Paolo Bonzini @ 2016-11-14 16:43 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost



On 14/11/2016 16:40, Marcelo Tosatti wrote:
> static bool kvmclock_src_use_reliable_get_clock(void *opaque)
> {
>     KVMClockState *s = opaque;
> 
>     /*
>      * On machine types that support reliable KVM_GET_CLOCK,
>      * if host kernel does provide reliable KVM_GET_CLOCK,
>      * set src_use_reliable_get_clock=true so that destination
>      * avoids reading kvmclock from memory.
>      */
>     if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable())
>     {
>         s->src_use_reliable_get_clock = true;
>     }
> 
>     return s->mach_use_reliable_get_clock;
> }
> 
> 
> Ah, OK, done.

s->src_use_reliable_get_clock should not be set with
KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK.

> So s->src_use_reliable_get_clock is only used to indicate 
> to the destination that: "you can use KVM_GET_CLOCK value, 
> its safe".

Yes, we agree.  I was listing all the points, not just those where we
disagree.  Actually I'm not sure where we disagree, except on using
flags from KVM_CHECK_EXTENSION vs. flags from KVM_GET_CLOCK...

Paolo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-14 16:43                 ` Paolo Bonzini
  0 siblings, 0 replies; 46+ messages in thread
From: Paolo Bonzini @ 2016-11-14 16:43 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost



On 14/11/2016 16:40, Marcelo Tosatti wrote:
> static bool kvmclock_src_use_reliable_get_clock(void *opaque)
> {
>     KVMClockState *s = opaque;
> 
>     /*
>      * On machine types that support reliable KVM_GET_CLOCK,
>      * if host kernel does provide reliable KVM_GET_CLOCK,
>      * set src_use_reliable_get_clock=true so that destination
>      * avoids reading kvmclock from memory.
>      */
>     if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable())
>     {
>         s->src_use_reliable_get_clock = true;
>     }
> 
>     return s->mach_use_reliable_get_clock;
> }
> 
> 
> Ah, OK, done.

s->src_use_reliable_get_clock should not be set with
KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK.

> So s->src_use_reliable_get_clock is only used to indicate 
> to the destination that: "you can use KVM_GET_CLOCK value, 
> its safe".

Yes, we agree.  I was listing all the points, not just those where we
disagree.  Actually I'm not sure where we disagree, except on using
flags from KVM_CHECK_EXTENSION vs. flags from KVM_GET_CLOCK...

Paolo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-14 16:43                 ` [Qemu-devel] " Paolo Bonzini
@ 2016-11-14 17:13                   ` Marcelo Tosatti
  -1 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-14 17:13 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost

On Mon, Nov 14, 2016 at 05:43:33PM +0100, Paolo Bonzini wrote:
> 
> 
> On 14/11/2016 16:40, Marcelo Tosatti wrote:
> > static bool kvmclock_src_use_reliable_get_clock(void *opaque)
> > {
> >     KVMClockState *s = opaque;
> > 
> >     /*
> >      * On machine types that support reliable KVM_GET_CLOCK,
> >      * if host kernel does provide reliable KVM_GET_CLOCK,
> >      * set src_use_reliable_get_clock=true so that destination
> >      * avoids reading kvmclock from memory.
> >      */
> >     if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable())
> >     {
> >         s->src_use_reliable_get_clock = true;
> >     }
> > 
> >     return s->mach_use_reliable_get_clock;
> > }
> > 
> > 
> > Ah, OK, done.
> 
> s->src_use_reliable_get_clock should not be set with
> KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK.

Well, thats not right: What matters is the presence of get_kvmclock_ns 
which returns a value that the guest sees. 

                get_kernel_monotonic_clock() + kvmclock_offset +
                (rdtsc() - tsc_timestamp)

IOW what the guest sees. And you changed that in 

commit 108b249c453dd7132599ab6dc7e435a7036c193f
Author: Paolo Bonzini <pbonzini@redhat.com>
Date:   Thu Sep 1 14:21:03 2016 +0200

    KVM: x86: introduce get_kvmclock_ns

And the correct behaviour (once KVM_GET_CLOCK is fixed per 
previous message to return rdtsc - tsc_timestamp for the 
non masterclock case) depends on this commit above, 
not on masterclock.

> > So s->src_use_reliable_get_clock is only used to indicate 
> > to the destination that: "you can use KVM_GET_CLOCK value, 
> > its safe".
> 
> Yes, we agree.  I was listing all the points, not just those where we
> disagree.  Actually I'm not sure where we disagree, except on using
> flags from KVM_CHECK_EXTENSION vs. flags from KVM_GET_CLOCK...
> 
> Paolo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-14 17:13                   ` Marcelo Tosatti
  0 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-14 17:13 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost

On Mon, Nov 14, 2016 at 05:43:33PM +0100, Paolo Bonzini wrote:
> 
> 
> On 14/11/2016 16:40, Marcelo Tosatti wrote:
> > static bool kvmclock_src_use_reliable_get_clock(void *opaque)
> > {
> >     KVMClockState *s = opaque;
> > 
> >     /*
> >      * On machine types that support reliable KVM_GET_CLOCK,
> >      * if host kernel does provide reliable KVM_GET_CLOCK,
> >      * set src_use_reliable_get_clock=true so that destination
> >      * avoids reading kvmclock from memory.
> >      */
> >     if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable())
> >     {
> >         s->src_use_reliable_get_clock = true;
> >     }
> > 
> >     return s->mach_use_reliable_get_clock;
> > }
> > 
> > 
> > Ah, OK, done.
> 
> s->src_use_reliable_get_clock should not be set with
> KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK.

Well, thats not right: What matters is the presence of get_kvmclock_ns 
which returns a value that the guest sees. 

                get_kernel_monotonic_clock() + kvmclock_offset +
                (rdtsc() - tsc_timestamp)

IOW what the guest sees. And you changed that in 

commit 108b249c453dd7132599ab6dc7e435a7036c193f
Author: Paolo Bonzini <pbonzini@redhat.com>
Date:   Thu Sep 1 14:21:03 2016 +0200

    KVM: x86: introduce get_kvmclock_ns

And the correct behaviour (once KVM_GET_CLOCK is fixed per 
previous message to return rdtsc - tsc_timestamp for the 
non masterclock case) depends on this commit above, 
not on masterclock.

> > So s->src_use_reliable_get_clock is only used to indicate 
> > to the destination that: "you can use KVM_GET_CLOCK value, 
> > its safe".
> 
> Yes, we agree.  I was listing all the points, not just those where we
> disagree.  Actually I'm not sure where we disagree, except on using
> flags from KVM_CHECK_EXTENSION vs. flags from KVM_GET_CLOCK...
> 
> Paolo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-14 17:13                   ` [Qemu-devel] " Marcelo Tosatti
@ 2016-11-14 17:20                     ` Paolo Bonzini
  -1 siblings, 0 replies; 46+ messages in thread
From: Paolo Bonzini @ 2016-11-14 17:20 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost



On 14/11/2016 18:13, Marcelo Tosatti wrote:
> On Mon, Nov 14, 2016 at 05:43:33PM +0100, Paolo Bonzini wrote:
>>
>>
>> On 14/11/2016 16:40, Marcelo Tosatti wrote:
>>> static bool kvmclock_src_use_reliable_get_clock(void *opaque)
>>> {
>>>     KVMClockState *s = opaque;
>>>
>>>     /*
>>>      * On machine types that support reliable KVM_GET_CLOCK,
>>>      * if host kernel does provide reliable KVM_GET_CLOCK,
>>>      * set src_use_reliable_get_clock=true so that destination
>>>      * avoids reading kvmclock from memory.
>>>      */
>>>     if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable())
>>>     {
>>>         s->src_use_reliable_get_clock = true;
>>>     }
>>>
>>>     return s->mach_use_reliable_get_clock;
>>> }
>>>
>>>
>>> Ah, OK, done.
>>
>> s->src_use_reliable_get_clock should not be set with
>> KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK.
> 
> Well, thats not right: What matters is the presence of get_kvmclock_ns 
> which returns a value that the guest sees. 
> 
>                 get_kernel_monotonic_clock() + kvmclock_offset +
>                 (rdtsc() - tsc_timestamp)
> 
> IOW what the guest sees. And you changed that in 
> 
> commit 108b249c453dd7132599ab6dc7e435a7036c193f
> Author: Paolo Bonzini <pbonzini@redhat.com>
> Date:   Thu Sep 1 14:21:03 2016 +0200
> 
>     KVM: x86: introduce get_kvmclock_ns
> 
> And the correct behaviour (once KVM_GET_CLOCK is fixed per 
> previous message to return rdtsc - tsc_timestamp for the 
> non masterclock case) depends on this commit above, 
> not on masterclock.

This commit in turn only gets the correct behavior if 
"vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT" (and it will be 
changed soon to ka->use_masterclock).  KVM_CHECK_EXTENSION can still 
return KVM_CLOCK_TSC_STABLE even if the masterclock is disabled, 
because KVM_CHECK_EXTENSION only tells you which flags are known for
this version of the KVM module.

To see if the masterclock is enabled _now_, you need to check what
KVM_GET_CLOCK sets in the flags.  From the KVM_CLOCK_TSC_STABLE patch:

		user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;

Thanks,

Paolo

>>> So s->src_use_reliable_get_clock is only used to indicate 
>>> to the destination that: "you can use KVM_GET_CLOCK value, 
>>> its safe".
>>
>> Yes, we agree.  I was listing all the points, not just those where we
>> disagree.  Actually I'm not sure where we disagree, except on using
>> flags from KVM_CHECK_EXTENSION vs. flags from KVM_GET_CLOCK...
>>
>> Paolo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-14 17:20                     ` Paolo Bonzini
  0 siblings, 0 replies; 46+ messages in thread
From: Paolo Bonzini @ 2016-11-14 17:20 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost



On 14/11/2016 18:13, Marcelo Tosatti wrote:
> On Mon, Nov 14, 2016 at 05:43:33PM +0100, Paolo Bonzini wrote:
>>
>>
>> On 14/11/2016 16:40, Marcelo Tosatti wrote:
>>> static bool kvmclock_src_use_reliable_get_clock(void *opaque)
>>> {
>>>     KVMClockState *s = opaque;
>>>
>>>     /*
>>>      * On machine types that support reliable KVM_GET_CLOCK,
>>>      * if host kernel does provide reliable KVM_GET_CLOCK,
>>>      * set src_use_reliable_get_clock=true so that destination
>>>      * avoids reading kvmclock from memory.
>>>      */
>>>     if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable())
>>>     {
>>>         s->src_use_reliable_get_clock = true;
>>>     }
>>>
>>>     return s->mach_use_reliable_get_clock;
>>> }
>>>
>>>
>>> Ah, OK, done.
>>
>> s->src_use_reliable_get_clock should not be set with
>> KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK.
> 
> Well, thats not right: What matters is the presence of get_kvmclock_ns 
> which returns a value that the guest sees. 
> 
>                 get_kernel_monotonic_clock() + kvmclock_offset +
>                 (rdtsc() - tsc_timestamp)
> 
> IOW what the guest sees. And you changed that in 
> 
> commit 108b249c453dd7132599ab6dc7e435a7036c193f
> Author: Paolo Bonzini <pbonzini@redhat.com>
> Date:   Thu Sep 1 14:21:03 2016 +0200
> 
>     KVM: x86: introduce get_kvmclock_ns
> 
> And the correct behaviour (once KVM_GET_CLOCK is fixed per 
> previous message to return rdtsc - tsc_timestamp for the 
> non masterclock case) depends on this commit above, 
> not on masterclock.

This commit in turn only gets the correct behavior if 
"vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT" (and it will be 
changed soon to ka->use_masterclock).  KVM_CHECK_EXTENSION can still 
return KVM_CLOCK_TSC_STABLE even if the masterclock is disabled, 
because KVM_CHECK_EXTENSION only tells you which flags are known for
this version of the KVM module.

To see if the masterclock is enabled _now_, you need to check what
KVM_GET_CLOCK sets in the flags.  From the KVM_CLOCK_TSC_STABLE patch:

		user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;

Thanks,

Paolo

>>> So s->src_use_reliable_get_clock is only used to indicate 
>>> to the destination that: "you can use KVM_GET_CLOCK value, 
>>> its safe".
>>
>> Yes, we agree.  I was listing all the points, not just those where we
>> disagree.  Actually I'm not sure where we disagree, except on using
>> flags from KVM_CHECK_EXTENSION vs. flags from KVM_GET_CLOCK...
>>
>> Paolo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-14 17:20                     ` [Qemu-devel] " Paolo Bonzini
@ 2016-11-14 18:15                       ` Marcelo Tosatti
  -1 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-14 18:15 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost

On Mon, Nov 14, 2016 at 06:20:29PM +0100, Paolo Bonzini wrote:
> 
> 
> On 14/11/2016 18:13, Marcelo Tosatti wrote:
> > On Mon, Nov 14, 2016 at 05:43:33PM +0100, Paolo Bonzini wrote:
> >>
> >>
> >> On 14/11/2016 16:40, Marcelo Tosatti wrote:
> >>> static bool kvmclock_src_use_reliable_get_clock(void *opaque)
> >>> {
> >>>     KVMClockState *s = opaque;
> >>>
> >>>     /*
> >>>      * On machine types that support reliable KVM_GET_CLOCK,
> >>>      * if host kernel does provide reliable KVM_GET_CLOCK,
> >>>      * set src_use_reliable_get_clock=true so that destination
> >>>      * avoids reading kvmclock from memory.
> >>>      */
> >>>     if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable())
> >>>     {
> >>>         s->src_use_reliable_get_clock = true;
> >>>     }
> >>>
> >>>     return s->mach_use_reliable_get_clock;
> >>> }
> >>>
> >>>
> >>> Ah, OK, done.
> >>
> >> s->src_use_reliable_get_clock should not be set with
> >> KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK.
> > 
> > Well, thats not right: What matters is the presence of get_kvmclock_ns 
> > which returns a value that the guest sees. 
> > 
> >                 get_kernel_monotonic_clock() + kvmclock_offset +
> >                 (rdtsc() - tsc_timestamp)
> > 
> > IOW what the guest sees. And you changed that in 
> > 
> > commit 108b249c453dd7132599ab6dc7e435a7036c193f
> > Author: Paolo Bonzini <pbonzini@redhat.com>
> > Date:   Thu Sep 1 14:21:03 2016 +0200
> > 
> >     KVM: x86: introduce get_kvmclock_ns
> > 
> > And the correct behaviour (once KVM_GET_CLOCK is fixed per 
> > previous message to return rdtsc - tsc_timestamp for the 
> > non masterclock case) depends on this commit above, 
> > not on masterclock.
> 
> This commit in turn only gets the correct behavior if 
> "vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT" (and it will be 
> changed soon to ka->use_masterclock).  KVM_CHECK_EXTENSION can still 
> return KVM_CLOCK_TSC_STABLE even if the masterclock is disabled, 
> because KVM_CHECK_EXTENSION only tells you which flags are known for
> this version of the KVM module.

What QEMU wants is to use KVM_GET_CLOCK at pre_save independently
of whether masterclock is enabled or not... it just depends
on KVM_GET_CLOCK being correct for the masterclock case
(108b249c453dd7132599ab6dc7e435a7036c193f).

So a "reliable KVM_GET_CLOCK" (that does not timebackward
when masterclock is enabled) is much simpler to userspace
than "whether masterclock is enabled or not".

If you have a reason why that should not be the case,
let me know.

> To see if the masterclock is enabled _now_, you need to check what
> KVM_GET_CLOCK sets in the flags.  From the KVM_CLOCK_TSC_STABLE patch:
> 
> 		user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;

Again, whether masterclock is enable is independent of 
being able to use KVM_GET_CLOCK at pre_save.



^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-14 18:15                       ` Marcelo Tosatti
  0 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-14 18:15 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost

On Mon, Nov 14, 2016 at 06:20:29PM +0100, Paolo Bonzini wrote:
> 
> 
> On 14/11/2016 18:13, Marcelo Tosatti wrote:
> > On Mon, Nov 14, 2016 at 05:43:33PM +0100, Paolo Bonzini wrote:
> >>
> >>
> >> On 14/11/2016 16:40, Marcelo Tosatti wrote:
> >>> static bool kvmclock_src_use_reliable_get_clock(void *opaque)
> >>> {
> >>>     KVMClockState *s = opaque;
> >>>
> >>>     /*
> >>>      * On machine types that support reliable KVM_GET_CLOCK,
> >>>      * if host kernel does provide reliable KVM_GET_CLOCK,
> >>>      * set src_use_reliable_get_clock=true so that destination
> >>>      * avoids reading kvmclock from memory.
> >>>      */
> >>>     if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable())
> >>>     {
> >>>         s->src_use_reliable_get_clock = true;
> >>>     }
> >>>
> >>>     return s->mach_use_reliable_get_clock;
> >>> }
> >>>
> >>>
> >>> Ah, OK, done.
> >>
> >> s->src_use_reliable_get_clock should not be set with
> >> KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK.
> > 
> > Well, thats not right: What matters is the presence of get_kvmclock_ns 
> > which returns a value that the guest sees. 
> > 
> >                 get_kernel_monotonic_clock() + kvmclock_offset +
> >                 (rdtsc() - tsc_timestamp)
> > 
> > IOW what the guest sees. And you changed that in 
> > 
> > commit 108b249c453dd7132599ab6dc7e435a7036c193f
> > Author: Paolo Bonzini <pbonzini@redhat.com>
> > Date:   Thu Sep 1 14:21:03 2016 +0200
> > 
> >     KVM: x86: introduce get_kvmclock_ns
> > 
> > And the correct behaviour (once KVM_GET_CLOCK is fixed per 
> > previous message to return rdtsc - tsc_timestamp for the 
> > non masterclock case) depends on this commit above, 
> > not on masterclock.
> 
> This commit in turn only gets the correct behavior if 
> "vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT" (and it will be 
> changed soon to ka->use_masterclock).  KVM_CHECK_EXTENSION can still 
> return KVM_CLOCK_TSC_STABLE even if the masterclock is disabled, 
> because KVM_CHECK_EXTENSION only tells you which flags are known for
> this version of the KVM module.

What QEMU wants is to use KVM_GET_CLOCK at pre_save independently
of whether masterclock is enabled or not... it just depends
on KVM_GET_CLOCK being correct for the masterclock case
(108b249c453dd7132599ab6dc7e435a7036c193f).

So a "reliable KVM_GET_CLOCK" (that does not timebackward
when masterclock is enabled) is much simpler to userspace
than "whether masterclock is enabled or not".

If you have a reason why that should not be the case,
let me know.

> To see if the masterclock is enabled _now_, you need to check what
> KVM_GET_CLOCK sets in the flags.  From the KVM_CLOCK_TSC_STABLE patch:
> 
> 		user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;

Again, whether masterclock is enable is independent of 
being able to use KVM_GET_CLOCK at pre_save.

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-14 18:15                       ` [Qemu-devel] " Marcelo Tosatti
@ 2016-11-17 12:16                         ` Marcelo Tosatti
  -1 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-17 12:16 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Eduardo Habkost, kvm, Juan Quintela, Radim Krcmar, qemu-devel,
	Dr. David Alan Gilbert

On Mon, Nov 14, 2016 at 04:15:18PM -0200, Marcelo Tosatti wrote:
> On Mon, Nov 14, 2016 at 06:20:29PM +0100, Paolo Bonzini wrote:
> > 
> > 
> > On 14/11/2016 18:13, Marcelo Tosatti wrote:
> > > On Mon, Nov 14, 2016 at 05:43:33PM +0100, Paolo Bonzini wrote:
> > >>
> > >>
> > >> On 14/11/2016 16:40, Marcelo Tosatti wrote:
> > >>> static bool kvmclock_src_use_reliable_get_clock(void *opaque)
> > >>> {
> > >>>     KVMClockState *s = opaque;
> > >>>
> > >>>     /*
> > >>>      * On machine types that support reliable KVM_GET_CLOCK,
> > >>>      * if host kernel does provide reliable KVM_GET_CLOCK,
> > >>>      * set src_use_reliable_get_clock=true so that destination
> > >>>      * avoids reading kvmclock from memory.
> > >>>      */
> > >>>     if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable())
> > >>>     {
> > >>>         s->src_use_reliable_get_clock = true;
> > >>>     }
> > >>>
> > >>>     return s->mach_use_reliable_get_clock;
> > >>> }
> > >>>
> > >>>
> > >>> Ah, OK, done.
> > >>
> > >> s->src_use_reliable_get_clock should not be set with
> > >> KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK.
> > > 
> > > Well, thats not right: What matters is the presence of get_kvmclock_ns 
> > > which returns a value that the guest sees. 
> > > 
> > >                 get_kernel_monotonic_clock() + kvmclock_offset +
> > >                 (rdtsc() - tsc_timestamp)
> > > 
> > > IOW what the guest sees. And you changed that in 
> > > 
> > > commit 108b249c453dd7132599ab6dc7e435a7036c193f
> > > Author: Paolo Bonzini <pbonzini@redhat.com>
> > > Date:   Thu Sep 1 14:21:03 2016 +0200
> > > 
> > >     KVM: x86: introduce get_kvmclock_ns
> > > 
> > > And the correct behaviour (once KVM_GET_CLOCK is fixed per 
> > > previous message to return rdtsc - tsc_timestamp for the 
> > > non masterclock case) depends on this commit above, 
> > > not on masterclock.
> > 
> > This commit in turn only gets the correct behavior if 
> > "vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT" (and it will be 
> > changed soon to ka->use_masterclock).  KVM_CHECK_EXTENSION can still 
> > return KVM_CLOCK_TSC_STABLE even if the masterclock is disabled, 
> > because KVM_CHECK_EXTENSION only tells you which flags are known for
> > this version of the KVM module.
> 
> What QEMU wants is to use KVM_GET_CLOCK at pre_save independently
> of whether masterclock is enabled or not... it just depends
> on KVM_GET_CLOCK being correct for the masterclock case
> (108b249c453dd7132599ab6dc7e435a7036c193f).
> 
> So a "reliable KVM_GET_CLOCK" (that does not timebackward
> when masterclock is enabled) is much simpler to userspace
> than "whether masterclock is enabled or not".
> 
> If you have a reason why that should not be the case,
> let me know.
> 
> > To see if the masterclock is enabled _now_, you need to check what
> > KVM_GET_CLOCK sets in the flags.  From the KVM_CLOCK_TSC_STABLE patch:
> > 
> > 		user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
> 
> Again, whether masterclock is enable is independent of 
> being able to use KVM_GET_CLOCK at pre_save.

Is this point OK ?

Using 

                break;
+   case KVM_CAP_ADJUST_CLOCK:
+               r = KVM_CLOCK_TSC_STABLE;
+               break;

To infer whether KVM_GET_CLOCK is fixed for the monotonic case.

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-17 12:16                         ` Marcelo Tosatti
  0 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-17 12:16 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost

On Mon, Nov 14, 2016 at 04:15:18PM -0200, Marcelo Tosatti wrote:
> On Mon, Nov 14, 2016 at 06:20:29PM +0100, Paolo Bonzini wrote:
> > 
> > 
> > On 14/11/2016 18:13, Marcelo Tosatti wrote:
> > > On Mon, Nov 14, 2016 at 05:43:33PM +0100, Paolo Bonzini wrote:
> > >>
> > >>
> > >> On 14/11/2016 16:40, Marcelo Tosatti wrote:
> > >>> static bool kvmclock_src_use_reliable_get_clock(void *opaque)
> > >>> {
> > >>>     KVMClockState *s = opaque;
> > >>>
> > >>>     /*
> > >>>      * On machine types that support reliable KVM_GET_CLOCK,
> > >>>      * if host kernel does provide reliable KVM_GET_CLOCK,
> > >>>      * set src_use_reliable_get_clock=true so that destination
> > >>>      * avoids reading kvmclock from memory.
> > >>>      */
> > >>>     if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable())
> > >>>     {
> > >>>         s->src_use_reliable_get_clock = true;
> > >>>     }
> > >>>
> > >>>     return s->mach_use_reliable_get_clock;
> > >>> }
> > >>>
> > >>>
> > >>> Ah, OK, done.
> > >>
> > >> s->src_use_reliable_get_clock should not be set with
> > >> KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK.
> > > 
> > > Well, thats not right: What matters is the presence of get_kvmclock_ns 
> > > which returns a value that the guest sees. 
> > > 
> > >                 get_kernel_monotonic_clock() + kvmclock_offset +
> > >                 (rdtsc() - tsc_timestamp)
> > > 
> > > IOW what the guest sees. And you changed that in 
> > > 
> > > commit 108b249c453dd7132599ab6dc7e435a7036c193f
> > > Author: Paolo Bonzini <pbonzini@redhat.com>
> > > Date:   Thu Sep 1 14:21:03 2016 +0200
> > > 
> > >     KVM: x86: introduce get_kvmclock_ns
> > > 
> > > And the correct behaviour (once KVM_GET_CLOCK is fixed per 
> > > previous message to return rdtsc - tsc_timestamp for the 
> > > non masterclock case) depends on this commit above, 
> > > not on masterclock.
> > 
> > This commit in turn only gets the correct behavior if 
> > "vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT" (and it will be 
> > changed soon to ka->use_masterclock).  KVM_CHECK_EXTENSION can still 
> > return KVM_CLOCK_TSC_STABLE even if the masterclock is disabled, 
> > because KVM_CHECK_EXTENSION only tells you which flags are known for
> > this version of the KVM module.
> 
> What QEMU wants is to use KVM_GET_CLOCK at pre_save independently
> of whether masterclock is enabled or not... it just depends
> on KVM_GET_CLOCK being correct for the masterclock case
> (108b249c453dd7132599ab6dc7e435a7036c193f).
> 
> So a "reliable KVM_GET_CLOCK" (that does not timebackward
> when masterclock is enabled) is much simpler to userspace
> than "whether masterclock is enabled or not".
> 
> If you have a reason why that should not be the case,
> let me know.
> 
> > To see if the masterclock is enabled _now_, you need to check what
> > KVM_GET_CLOCK sets in the flags.  From the KVM_CLOCK_TSC_STABLE patch:
> > 
> > 		user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
> 
> Again, whether masterclock is enable is independent of 
> being able to use KVM_GET_CLOCK at pre_save.

Is this point OK ?

Using 

                break;
+   case KVM_CAP_ADJUST_CLOCK:
+               r = KVM_CLOCK_TSC_STABLE;
+               break;

To infer whether KVM_GET_CLOCK is fixed for the monotonic case.

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-17 12:16                         ` [Qemu-devel] " Marcelo Tosatti
@ 2016-11-17 13:03                           ` Paolo Bonzini
  -1 siblings, 0 replies; 46+ messages in thread
From: Paolo Bonzini @ 2016-11-17 13:03 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: Eduardo Habkost, kvm, Radim Krcmar, Juan Quintela, qemu-devel,
	Dr. David Alan Gilbert



On 17/11/2016 13:16, Marcelo Tosatti wrote:
> On Mon, Nov 14, 2016 at 04:15:18PM -0200, Marcelo Tosatti wrote:
>> On Mon, Nov 14, 2016 at 06:20:29PM +0100, Paolo Bonzini wrote:
>>>
>>>
>>> On 14/11/2016 18:13, Marcelo Tosatti wrote:
>>>> On Mon, Nov 14, 2016 at 05:43:33PM +0100, Paolo Bonzini wrote:
>>>>>
>>>>>
>>>>> On 14/11/2016 16:40, Marcelo Tosatti wrote:
>>>>>> static bool kvmclock_src_use_reliable_get_clock(void *opaque)
>>>>>> {
>>>>>>     KVMClockState *s = opaque;
>>>>>>
>>>>>>     /*
>>>>>>      * On machine types that support reliable KVM_GET_CLOCK,
>>>>>>      * if host kernel does provide reliable KVM_GET_CLOCK,
>>>>>>      * set src_use_reliable_get_clock=true so that destination
>>>>>>      * avoids reading kvmclock from memory.
>>>>>>      */
>>>>>>     if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable())
>>>>>>     {
>>>>>>         s->src_use_reliable_get_clock = true;
>>>>>>     }
>>>>>>
>>>>>>     return s->mach_use_reliable_get_clock;
>>>>>> }
>>>>>>
>>>>>>
>>>>>> Ah, OK, done.
>>>>>
>>>>> s->src_use_reliable_get_clock should not be set with
>>>>> KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK.
>>>>
>>>> Well, thats not right: What matters is the presence of get_kvmclock_ns 
>>>> which returns a value that the guest sees. 
>>>>
>>>>                 get_kernel_monotonic_clock() + kvmclock_offset +
>>>>                 (rdtsc() - tsc_timestamp)
>>>>
>>>> IOW what the guest sees. And you changed that in 
>>>>
>>>> commit 108b249c453dd7132599ab6dc7e435a7036c193f
>>>> Author: Paolo Bonzini <pbonzini@redhat.com>
>>>> Date:   Thu Sep 1 14:21:03 2016 +0200
>>>>
>>>>     KVM: x86: introduce get_kvmclock_ns
>>>>
>>>> And the correct behaviour (once KVM_GET_CLOCK is fixed per 
>>>> previous message to return rdtsc - tsc_timestamp for the 
>>>> non masterclock case) depends on this commit above, 
>>>> not on masterclock.
>>>
>>> This commit in turn only gets the correct behavior if 
>>> "vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT" (and it will be 
>>> changed soon to ka->use_masterclock).  KVM_CHECK_EXTENSION can still 
>>> return KVM_CLOCK_TSC_STABLE even if the masterclock is disabled, 
>>> because KVM_CHECK_EXTENSION only tells you which flags are known for
>>> this version of the KVM module.
>>
>> What QEMU wants is to use KVM_GET_CLOCK at pre_save independently
>> of whether masterclock is enabled or not... it just depends
>> on KVM_GET_CLOCK being correct for the masterclock case
>> (108b249c453dd7132599ab6dc7e435a7036c193f).
>>
>> So a "reliable KVM_GET_CLOCK" (that does not timebackward
>> when masterclock is enabled) is much simpler to userspace
>> than "whether masterclock is enabled or not".
>>
>> If you have a reason why that should not be the case,
>> let me know.
>>
>>> To see if the masterclock is enabled _now_, you need to check what
>>> KVM_GET_CLOCK sets in the flags.  From the KVM_CLOCK_TSC_STABLE patch:
>>>
>>> 		user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
>>
>> Again, whether masterclock is enable is independent of 
>> being able to use KVM_GET_CLOCK at pre_save.
> 
> Is this point OK ?
> 
> Using 
> 
>                 break;
> +   case KVM_CAP_ADJUST_CLOCK:
> +               r = KVM_CLOCK_TSC_STABLE;
> +               break;
> 
> To infer whether KVM_GET_CLOCK is fixed for the monotonic case.

Yes, I still haven't digested why it is correct (I need to read again
what you wrote), but it is indeed correct to use KVM_CAP_ADJUST_CLOCK
this way.

Paolo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-17 13:03                           ` Paolo Bonzini
  0 siblings, 0 replies; 46+ messages in thread
From: Paolo Bonzini @ 2016-11-17 13:03 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: Eduardo Habkost, kvm, Juan Quintela, Radim Krcmar, qemu-devel,
	Dr. David Alan Gilbert



On 17/11/2016 13:16, Marcelo Tosatti wrote:
> On Mon, Nov 14, 2016 at 04:15:18PM -0200, Marcelo Tosatti wrote:
>> On Mon, Nov 14, 2016 at 06:20:29PM +0100, Paolo Bonzini wrote:
>>>
>>>
>>> On 14/11/2016 18:13, Marcelo Tosatti wrote:
>>>> On Mon, Nov 14, 2016 at 05:43:33PM +0100, Paolo Bonzini wrote:
>>>>>
>>>>>
>>>>> On 14/11/2016 16:40, Marcelo Tosatti wrote:
>>>>>> static bool kvmclock_src_use_reliable_get_clock(void *opaque)
>>>>>> {
>>>>>>     KVMClockState *s = opaque;
>>>>>>
>>>>>>     /*
>>>>>>      * On machine types that support reliable KVM_GET_CLOCK,
>>>>>>      * if host kernel does provide reliable KVM_GET_CLOCK,
>>>>>>      * set src_use_reliable_get_clock=true so that destination
>>>>>>      * avoids reading kvmclock from memory.
>>>>>>      */
>>>>>>     if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable())
>>>>>>     {
>>>>>>         s->src_use_reliable_get_clock = true;
>>>>>>     }
>>>>>>
>>>>>>     return s->mach_use_reliable_get_clock;
>>>>>> }
>>>>>>
>>>>>>
>>>>>> Ah, OK, done.
>>>>>
>>>>> s->src_use_reliable_get_clock should not be set with
>>>>> KVM_CHECK_EXTENSION, but rather from the flags returned by KVM_GET_CLOCK.
>>>>
>>>> Well, thats not right: What matters is the presence of get_kvmclock_ns 
>>>> which returns a value that the guest sees. 
>>>>
>>>>                 get_kernel_monotonic_clock() + kvmclock_offset +
>>>>                 (rdtsc() - tsc_timestamp)
>>>>
>>>> IOW what the guest sees. And you changed that in 
>>>>
>>>> commit 108b249c453dd7132599ab6dc7e435a7036c193f
>>>> Author: Paolo Bonzini <pbonzini@redhat.com>
>>>> Date:   Thu Sep 1 14:21:03 2016 +0200
>>>>
>>>>     KVM: x86: introduce get_kvmclock_ns
>>>>
>>>> And the correct behaviour (once KVM_GET_CLOCK is fixed per 
>>>> previous message to return rdtsc - tsc_timestamp for the 
>>>> non masterclock case) depends on this commit above, 
>>>> not on masterclock.
>>>
>>> This commit in turn only gets the correct behavior if 
>>> "vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT" (and it will be 
>>> changed soon to ka->use_masterclock).  KVM_CHECK_EXTENSION can still 
>>> return KVM_CLOCK_TSC_STABLE even if the masterclock is disabled, 
>>> because KVM_CHECK_EXTENSION only tells you which flags are known for
>>> this version of the KVM module.
>>
>> What QEMU wants is to use KVM_GET_CLOCK at pre_save independently
>> of whether masterclock is enabled or not... it just depends
>> on KVM_GET_CLOCK being correct for the masterclock case
>> (108b249c453dd7132599ab6dc7e435a7036c193f).
>>
>> So a "reliable KVM_GET_CLOCK" (that does not timebackward
>> when masterclock is enabled) is much simpler to userspace
>> than "whether masterclock is enabled or not".
>>
>> If you have a reason why that should not be the case,
>> let me know.
>>
>>> To see if the masterclock is enabled _now_, you need to check what
>>> KVM_GET_CLOCK sets in the flags.  From the KVM_CLOCK_TSC_STABLE patch:
>>>
>>> 		user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
>>
>> Again, whether masterclock is enable is independent of 
>> being able to use KVM_GET_CLOCK at pre_save.
> 
> Is this point OK ?
> 
> Using 
> 
>                 break;
> +   case KVM_CAP_ADJUST_CLOCK:
> +               r = KVM_CLOCK_TSC_STABLE;
> +               break;
> 
> To infer whether KVM_GET_CLOCK is fixed for the monotonic case.

Yes, I still haven't digested why it is correct (I need to read again
what you wrote), but it is indeed correct to use KVM_CAP_ADJUST_CLOCK
this way.

Paolo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-17 12:16                         ` [Qemu-devel] " Marcelo Tosatti
@ 2016-11-28 13:47                           ` Paolo Bonzini
  -1 siblings, 0 replies; 46+ messages in thread
From: Paolo Bonzini @ 2016-11-28 13:47 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost



On 17/11/2016 13:16, Marcelo Tosatti wrote:
> What QEMU wants is to use KVM_GET_CLOCK at pre_save independently
> of whether masterclock is enabled or not... it just depends
> on KVM_GET_CLOCK being correct for the masterclock case
> (108b249c453dd7132599ab6dc7e435a7036c193f).
> 
> So a "reliable KVM_GET_CLOCK" (that does not timebackward
> when masterclock is enabled) is much simpler to userspace
> than "whether masterclock is enabled or not".
> 
> If you have a reason why that should not be the case,
> let me know.

I still cannot understand this case.

If the source has masterclock _disabled_, shouldn't it read kvmclock 
from memory?  But that's not what your patch does.  To be clear, what
I mean is:

diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index 653b0b4..6f6e2dc 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -97,6 +97,7 @@ static uint64_t kvm_get_clock(void)
         fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
                 abort();
     }
+    s->src_use_reliable_get_clock = data.flags & KVM_CLOCK_TSC_STABLE;
     return data.clock;
 }
 
@@ -110,34 +111,19 @@ static void kvmclock_vm_state_change(void *opaque, int running,
 
     if (running) {
         struct kvm_clock_data data = {};
-        uint64_t pvclock_via_mem = 0;
 
-        /* local (running VM) restore */
-        if (s->clock_valid) {
-            /*
-             * if host does not support reliable KVM_GET_CLOCK,
-             * read kvmclock value from memory
-             */
-            if (!kvm_has_adjust_clock_stable()) {
-                pvclock_via_mem = kvmclock_current_nsec(s);
-            }
-        /* migration/savevm/init restore */
-        } else {
-            /*
-             * use s->clock in case machine uses reliable
-             * get clock and source host supported
-             * reliable get clock
-             */
-            if (!s->src_use_reliable_get_clock) {
-                pvclock_via_mem = kvmclock_current_nsec(s);
+        /*
+         * if last KVM_GET_CLOCK did not retrieve a reliable value,
+         * we can't rely on the saved clock value.  Just discard it and
+         * read kvmclock value from memory
+         */
+        if (!s->src_use_reliable_get_clock) {
+            uint64_t pvclock_via_mem = kvmclock_current_nsec(s);
+            if (pvclock_via_mem) {
+                s->clock = pvclock_via_mem;
             }
         }
 
-        /* We can't rely on the saved clock value, just discard it */
-        if (pvclock_via_mem) {
-            s->clock = pvclock_via_mem;
-        }
-
         s->clock_valid = false;
 
         data.clock = s->clock;
@@ -181,16 +168,6 @@ static void kvmclock_realize(DeviceState *dev, Error **errp)
 {
     KVMClockState *s = KVM_CLOCK(dev);
 
-    /*
-     * On machine types that support reliable KVM_GET_CLOCK,
-     * if host kernel does provide reliable KVM_GET_CLOCK,
-     * set src_use_reliable_get_clock=true so that destination
-     * avoids reading kvmclock from memory.
-     */
-    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
-        s->src_use_reliable_get_clock = true;
-    }
-
     qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
 }
 

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-28 13:47                           ` Paolo Bonzini
  0 siblings, 0 replies; 46+ messages in thread
From: Paolo Bonzini @ 2016-11-28 13:47 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost



On 17/11/2016 13:16, Marcelo Tosatti wrote:
> What QEMU wants is to use KVM_GET_CLOCK at pre_save independently
> of whether masterclock is enabled or not... it just depends
> on KVM_GET_CLOCK being correct for the masterclock case
> (108b249c453dd7132599ab6dc7e435a7036c193f).
> 
> So a "reliable KVM_GET_CLOCK" (that does not timebackward
> when masterclock is enabled) is much simpler to userspace
> than "whether masterclock is enabled or not".
> 
> If you have a reason why that should not be the case,
> let me know.

I still cannot understand this case.

If the source has masterclock _disabled_, shouldn't it read kvmclock 
from memory?  But that's not what your patch does.  To be clear, what
I mean is:

diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index 653b0b4..6f6e2dc 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -97,6 +97,7 @@ static uint64_t kvm_get_clock(void)
         fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
                 abort();
     }
+    s->src_use_reliable_get_clock = data.flags & KVM_CLOCK_TSC_STABLE;
     return data.clock;
 }
 
@@ -110,34 +111,19 @@ static void kvmclock_vm_state_change(void *opaque, int running,
 
     if (running) {
         struct kvm_clock_data data = {};
-        uint64_t pvclock_via_mem = 0;
 
-        /* local (running VM) restore */
-        if (s->clock_valid) {
-            /*
-             * if host does not support reliable KVM_GET_CLOCK,
-             * read kvmclock value from memory
-             */
-            if (!kvm_has_adjust_clock_stable()) {
-                pvclock_via_mem = kvmclock_current_nsec(s);
-            }
-        /* migration/savevm/init restore */
-        } else {
-            /*
-             * use s->clock in case machine uses reliable
-             * get clock and source host supported
-             * reliable get clock
-             */
-            if (!s->src_use_reliable_get_clock) {
-                pvclock_via_mem = kvmclock_current_nsec(s);
+        /*
+         * if last KVM_GET_CLOCK did not retrieve a reliable value,
+         * we can't rely on the saved clock value.  Just discard it and
+         * read kvmclock value from memory
+         */
+        if (!s->src_use_reliable_get_clock) {
+            uint64_t pvclock_via_mem = kvmclock_current_nsec(s);
+            if (pvclock_via_mem) {
+                s->clock = pvclock_via_mem;
             }
         }
 
-        /* We can't rely on the saved clock value, just discard it */
-        if (pvclock_via_mem) {
-            s->clock = pvclock_via_mem;
-        }
-
         s->clock_valid = false;
 
         data.clock = s->clock;
@@ -181,16 +168,6 @@ static void kvmclock_realize(DeviceState *dev, Error **errp)
 {
     KVMClockState *s = KVM_CLOCK(dev);
 
-    /*
-     * On machine types that support reliable KVM_GET_CLOCK,
-     * if host kernel does provide reliable KVM_GET_CLOCK,
-     * set src_use_reliable_get_clock=true so that destination
-     * avoids reading kvmclock from memory.
-     */
-    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
-        s->src_use_reliable_get_clock = true;
-    }
-
     qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
 }
 

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-28 13:47                           ` [Qemu-devel] " Paolo Bonzini
@ 2016-11-28 14:28                             ` Eduardo Habkost
  -1 siblings, 0 replies; 46+ messages in thread
From: Eduardo Habkost @ 2016-11-28 14:28 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Marcelo Tosatti, kvm, qemu-devel, Dr. David Alan Gilbert,
	Juan Quintela, Radim Krcmar

On Mon, Nov 28, 2016 at 02:47:18PM +0100, Paolo Bonzini wrote:
> 
> 
> On 17/11/2016 13:16, Marcelo Tosatti wrote:
> > What QEMU wants is to use KVM_GET_CLOCK at pre_save independently
> > of whether masterclock is enabled or not... it just depends
> > on KVM_GET_CLOCK being correct for the masterclock case
> > (108b249c453dd7132599ab6dc7e435a7036c193f).
> > 
> > So a "reliable KVM_GET_CLOCK" (that does not timebackward
> > when masterclock is enabled) is much simpler to userspace
> > than "whether masterclock is enabled or not".
> > 
> > If you have a reason why that should not be the case,
> > let me know.
> 
> I still cannot understand this case.
> 
> If the source has masterclock _disabled_, shouldn't it read kvmclock 
> from memory?  But that's not what your patch does.  To be clear, what
> I mean is:
> 
> diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
> index 653b0b4..6f6e2dc 100644
> --- a/hw/i386/kvm/clock.c
> +++ b/hw/i386/kvm/clock.c
> @@ -97,6 +97,7 @@ static uint64_t kvm_get_clock(void)
>          fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
>                  abort();
>      }
> +    s->src_use_reliable_get_clock = data.flags & KVM_CLOCK_TSC_STABLE;

I still don't understand the reasoning behind
kvm_has_adjust_clock_stable() vs (flags & KVM_CLOCK_TSC_STABLE),
but on either case, updating src_use_reliable_get_clock inside
kvm_get_clock() looks like the right thing to do.

>      return data.clock;
>  }
>  
> @@ -110,34 +111,19 @@ static void kvmclock_vm_state_change(void *opaque, int running,
>  
>      if (running) {
>          struct kvm_clock_data data = {};
> -        uint64_t pvclock_via_mem = 0;
>  
> -        /* local (running VM) restore */
> -        if (s->clock_valid) {
> -            /*
> -             * if host does not support reliable KVM_GET_CLOCK,
> -             * read kvmclock value from memory
> -             */
> -            if (!kvm_has_adjust_clock_stable()) {
> -                pvclock_via_mem = kvmclock_current_nsec(s);
> -            }
> -        /* migration/savevm/init restore */
> -        } else {
> -            /*
> -             * use s->clock in case machine uses reliable
> -             * get clock and source host supported
> -             * reliable get clock
> -             */
> -            if (!s->src_use_reliable_get_clock) {
> -                pvclock_via_mem = kvmclock_current_nsec(s);
> +        /*
> +         * if last KVM_GET_CLOCK did not retrieve a reliable value,
> +         * we can't rely on the saved clock value.  Just discard it and
> +         * read kvmclock value from memory
> +         */
> +        if (!s->src_use_reliable_get_clock) {
> +            uint64_t pvclock_via_mem = kvmclock_current_nsec(s);
> +            if (pvclock_via_mem) {
> +                s->clock = pvclock_via_mem;
>              }
>          }

This is equivalent to the logic I suggested on my reply to v3.
Much simpler.

>  
> -        /* We can't rely on the saved clock value, just discard it */
> -        if (pvclock_via_mem) {
> -            s->clock = pvclock_via_mem;
> -        }
> -
>          s->clock_valid = false;
>  
>          data.clock = s->clock;
> @@ -181,16 +168,6 @@ static void kvmclock_realize(DeviceState *dev, Error **errp)
>  {
>      KVMClockState *s = KVM_CLOCK(dev);
>  
> -    /*
> -     * On machine types that support reliable KVM_GET_CLOCK,
> -     * if host kernel does provide reliable KVM_GET_CLOCK,
> -     * set src_use_reliable_get_clock=true so that destination
> -     * avoids reading kvmclock from memory.
> -     */
> -    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
> -        s->src_use_reliable_get_clock = true;
> -    }
> -
>      qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
>  }
>  

-- 
Eduardo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-28 14:28                             ` Eduardo Habkost
  0 siblings, 0 replies; 46+ messages in thread
From: Eduardo Habkost @ 2016-11-28 14:28 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Marcelo Tosatti, kvm, qemu-devel, Dr. David Alan Gilbert,
	Juan Quintela, Radim Krcmar

On Mon, Nov 28, 2016 at 02:47:18PM +0100, Paolo Bonzini wrote:
> 
> 
> On 17/11/2016 13:16, Marcelo Tosatti wrote:
> > What QEMU wants is to use KVM_GET_CLOCK at pre_save independently
> > of whether masterclock is enabled or not... it just depends
> > on KVM_GET_CLOCK being correct for the masterclock case
> > (108b249c453dd7132599ab6dc7e435a7036c193f).
> > 
> > So a "reliable KVM_GET_CLOCK" (that does not timebackward
> > when masterclock is enabled) is much simpler to userspace
> > than "whether masterclock is enabled or not".
> > 
> > If you have a reason why that should not be the case,
> > let me know.
> 
> I still cannot understand this case.
> 
> If the source has masterclock _disabled_, shouldn't it read kvmclock 
> from memory?  But that's not what your patch does.  To be clear, what
> I mean is:
> 
> diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
> index 653b0b4..6f6e2dc 100644
> --- a/hw/i386/kvm/clock.c
> +++ b/hw/i386/kvm/clock.c
> @@ -97,6 +97,7 @@ static uint64_t kvm_get_clock(void)
>          fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
>                  abort();
>      }
> +    s->src_use_reliable_get_clock = data.flags & KVM_CLOCK_TSC_STABLE;

I still don't understand the reasoning behind
kvm_has_adjust_clock_stable() vs (flags & KVM_CLOCK_TSC_STABLE),
but on either case, updating src_use_reliable_get_clock inside
kvm_get_clock() looks like the right thing to do.

>      return data.clock;
>  }
>  
> @@ -110,34 +111,19 @@ static void kvmclock_vm_state_change(void *opaque, int running,
>  
>      if (running) {
>          struct kvm_clock_data data = {};
> -        uint64_t pvclock_via_mem = 0;
>  
> -        /* local (running VM) restore */
> -        if (s->clock_valid) {
> -            /*
> -             * if host does not support reliable KVM_GET_CLOCK,
> -             * read kvmclock value from memory
> -             */
> -            if (!kvm_has_adjust_clock_stable()) {
> -                pvclock_via_mem = kvmclock_current_nsec(s);
> -            }
> -        /* migration/savevm/init restore */
> -        } else {
> -            /*
> -             * use s->clock in case machine uses reliable
> -             * get clock and source host supported
> -             * reliable get clock
> -             */
> -            if (!s->src_use_reliable_get_clock) {
> -                pvclock_via_mem = kvmclock_current_nsec(s);
> +        /*
> +         * if last KVM_GET_CLOCK did not retrieve a reliable value,
> +         * we can't rely on the saved clock value.  Just discard it and
> +         * read kvmclock value from memory
> +         */
> +        if (!s->src_use_reliable_get_clock) {
> +            uint64_t pvclock_via_mem = kvmclock_current_nsec(s);
> +            if (pvclock_via_mem) {
> +                s->clock = pvclock_via_mem;
>              }
>          }

This is equivalent to the logic I suggested on my reply to v3.
Much simpler.

>  
> -        /* We can't rely on the saved clock value, just discard it */
> -        if (pvclock_via_mem) {
> -            s->clock = pvclock_via_mem;
> -        }
> -
>          s->clock_valid = false;
>  
>          data.clock = s->clock;
> @@ -181,16 +168,6 @@ static void kvmclock_realize(DeviceState *dev, Error **errp)
>  {
>      KVMClockState *s = KVM_CLOCK(dev);
>  
> -    /*
> -     * On machine types that support reliable KVM_GET_CLOCK,
> -     * if host kernel does provide reliable KVM_GET_CLOCK,
> -     * set src_use_reliable_get_clock=true so that destination
> -     * avoids reading kvmclock from memory.
> -     */
> -    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
> -        s->src_use_reliable_get_clock = true;
> -    }
> -
>      qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
>  }
>  

-- 
Eduardo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-28 14:28                             ` [Qemu-devel] " Eduardo Habkost
@ 2016-11-28 15:12                               ` Paolo Bonzini
  -1 siblings, 0 replies; 46+ messages in thread
From: Paolo Bonzini @ 2016-11-28 15:12 UTC (permalink / raw)
  To: Eduardo Habkost
  Cc: Marcelo Tosatti, kvm, qemu-devel, Dr. David Alan Gilbert,
	Juan Quintela, Radim Krcmar



On 28/11/2016 15:28, Eduardo Habkost wrote:
> > +    s->src_use_reliable_get_clock = data.flags & KVM_CLOCK_TSC_STABLE;
> 
> I still don't understand the reasoning behind
> kvm_has_adjust_clock_stable() vs (flags & KVM_CLOCK_TSC_STABLE),
> but on either case, updating src_use_reliable_get_clock inside
> kvm_get_clock() looks like the right thing to do.

There are three possibility: the kernel tells you the clock is stable,
the kernel tells you the clock is unstable, the kernel is too old and
doesn't tell you anything.  Then:

    kvm_has_adjust_clock_stable() == true:
	if the clock is stable, KVM_CLOCK_TSC_STABLE will be set in "flags"
	if the clock is unstable, KVM_CLOCK_TSC_STABLE will be unset

    kvm_has_adjust_clock_stable() == false:
	you cannot know if the clock is stable

Paolo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-28 15:12                               ` Paolo Bonzini
  0 siblings, 0 replies; 46+ messages in thread
From: Paolo Bonzini @ 2016-11-28 15:12 UTC (permalink / raw)
  To: Eduardo Habkost
  Cc: Marcelo Tosatti, kvm, qemu-devel, Dr. David Alan Gilbert,
	Juan Quintela, Radim Krcmar



On 28/11/2016 15:28, Eduardo Habkost wrote:
> > +    s->src_use_reliable_get_clock = data.flags & KVM_CLOCK_TSC_STABLE;
> 
> I still don't understand the reasoning behind
> kvm_has_adjust_clock_stable() vs (flags & KVM_CLOCK_TSC_STABLE),
> but on either case, updating src_use_reliable_get_clock inside
> kvm_get_clock() looks like the right thing to do.

There are three possibility: the kernel tells you the clock is stable,
the kernel tells you the clock is unstable, the kernel is too old and
doesn't tell you anything.  Then:

    kvm_has_adjust_clock_stable() == true:
	if the clock is stable, KVM_CLOCK_TSC_STABLE will be set in "flags"
	if the clock is unstable, KVM_CLOCK_TSC_STABLE will be unset

    kvm_has_adjust_clock_stable() == false:
	you cannot know if the clock is stable

Paolo

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-28 13:47                           ` [Qemu-devel] " Paolo Bonzini
@ 2016-11-28 16:36                             ` Marcelo Tosatti
  -1 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-28 16:36 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost

On Mon, Nov 28, 2016 at 02:47:18PM +0100, Paolo Bonzini wrote:
> 
> 
> On 17/11/2016 13:16, Marcelo Tosatti wrote:
> > What QEMU wants is to use KVM_GET_CLOCK at pre_save independently
> > of whether masterclock is enabled or not... it just depends
> > on KVM_GET_CLOCK being correct for the masterclock case
> > (108b249c453dd7132599ab6dc7e435a7036c193f).
> > 
> > So a "reliable KVM_GET_CLOCK" (that does not timebackward
> > when masterclock is enabled) is much simpler to userspace
> > than "whether masterclock is enabled or not".
> > 
> > If you have a reason why that should not be the case,
> > let me know.
> 
> I still cannot understand this case.
> 
> If the source has masterclock _disabled_, shouldn't it read kvmclock 
> from memory?  

If the source masterclock is disabled, then the guest does
not enable the optimization to not use a global variable 
to guarantee monotonicity. Therefore there will be no 
time backwards events (the timer backwards events crashed 
guests, and are the reason for reading from guest memory).

So if there are no flaws in the reasoning above, 
no, there is no need to read from memory if 
masterclock is disabled.

Can you state the reasons why you think it should be enabled?

> But that's not what your patch does.  

Its on purpose with the data available.

> To be clear, what
> I mean is:
> 
> diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
> index 653b0b4..6f6e2dc 100644
> --- a/hw/i386/kvm/clock.c
> +++ b/hw/i386/kvm/clock.c
> @@ -97,6 +97,7 @@ static uint64_t kvm_get_clock(void)
>          fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
>                  abort();
>      }
> +    s->src_use_reliable_get_clock = data.flags & KVM_CLOCK_TSC_STABLE;
>      return data.clock;
>  }
>  
> @@ -110,34 +111,19 @@ static void kvmclock_vm_state_change(void *opaque, int running,
>  
>      if (running) {
>          struct kvm_clock_data data = {};
> -        uint64_t pvclock_via_mem = 0;
>  
> -        /* local (running VM) restore */
> -        if (s->clock_valid) {
> -            /*
> -             * if host does not support reliable KVM_GET_CLOCK,
> -             * read kvmclock value from memory
> -             */
> -            if (!kvm_has_adjust_clock_stable()) {
> -                pvclock_via_mem = kvmclock_current_nsec(s);
> -            }
> -        /* migration/savevm/init restore */
> -        } else {
> -            /*
> -             * use s->clock in case machine uses reliable
> -             * get clock and source host supported
> -             * reliable get clock
> -             */
> -            if (!s->src_use_reliable_get_clock) {
> -                pvclock_via_mem = kvmclock_current_nsec(s);
> +        /*
> +         * if last KVM_GET_CLOCK did not retrieve a reliable value,
> +         * we can't rely on the saved clock value.  Just discard it and
> +         * read kvmclock value from memory
> +         */
> +        if (!s->src_use_reliable_get_clock) {
> +            uint64_t pvclock_via_mem = kvmclock_current_nsec(s);
> +            if (pvclock_via_mem) {
> +                s->clock = pvclock_via_mem;
>              }
>          }
>  
> -        /* We can't rely on the saved clock value, just discard it */
> -        if (pvclock_via_mem) {
> -            s->clock = pvclock_via_mem;
> -        }
> -
>          s->clock_valid = false;
>  
>          data.clock = s->clock;
> @@ -181,16 +168,6 @@ static void kvmclock_realize(DeviceState *dev, Error **errp)
>  {
>      KVMClockState *s = KVM_CLOCK(dev);
>  
> -    /*
> -     * On machine types that support reliable KVM_GET_CLOCK,
> -     * if host kernel does provide reliable KVM_GET_CLOCK,
> -     * set src_use_reliable_get_clock=true so that destination
> -     * avoids reading kvmclock from memory.
> -     */
> -    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
> -        s->src_use_reliable_get_clock = true;
> -    }
> -
>      qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
>  }
>  

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-28 16:36                             ` Marcelo Tosatti
  0 siblings, 0 replies; 46+ messages in thread
From: Marcelo Tosatti @ 2016-11-28 16:36 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost

On Mon, Nov 28, 2016 at 02:47:18PM +0100, Paolo Bonzini wrote:
> 
> 
> On 17/11/2016 13:16, Marcelo Tosatti wrote:
> > What QEMU wants is to use KVM_GET_CLOCK at pre_save independently
> > of whether masterclock is enabled or not... it just depends
> > on KVM_GET_CLOCK being correct for the masterclock case
> > (108b249c453dd7132599ab6dc7e435a7036c193f).
> > 
> > So a "reliable KVM_GET_CLOCK" (that does not timebackward
> > when masterclock is enabled) is much simpler to userspace
> > than "whether masterclock is enabled or not".
> > 
> > If you have a reason why that should not be the case,
> > let me know.
> 
> I still cannot understand this case.
> 
> If the source has masterclock _disabled_, shouldn't it read kvmclock 
> from memory?  

If the source masterclock is disabled, then the guest does
not enable the optimization to not use a global variable 
to guarantee monotonicity. Therefore there will be no 
time backwards events (the timer backwards events crashed 
guests, and are the reason for reading from guest memory).

So if there are no flaws in the reasoning above, 
no, there is no need to read from memory if 
masterclock is disabled.

Can you state the reasons why you think it should be enabled?

> But that's not what your patch does.  

Its on purpose with the data available.

> To be clear, what
> I mean is:
> 
> diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
> index 653b0b4..6f6e2dc 100644
> --- a/hw/i386/kvm/clock.c
> +++ b/hw/i386/kvm/clock.c
> @@ -97,6 +97,7 @@ static uint64_t kvm_get_clock(void)
>          fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
>                  abort();
>      }
> +    s->src_use_reliable_get_clock = data.flags & KVM_CLOCK_TSC_STABLE;
>      return data.clock;
>  }
>  
> @@ -110,34 +111,19 @@ static void kvmclock_vm_state_change(void *opaque, int running,
>  
>      if (running) {
>          struct kvm_clock_data data = {};
> -        uint64_t pvclock_via_mem = 0;
>  
> -        /* local (running VM) restore */
> -        if (s->clock_valid) {
> -            /*
> -             * if host does not support reliable KVM_GET_CLOCK,
> -             * read kvmclock value from memory
> -             */
> -            if (!kvm_has_adjust_clock_stable()) {
> -                pvclock_via_mem = kvmclock_current_nsec(s);
> -            }
> -        /* migration/savevm/init restore */
> -        } else {
> -            /*
> -             * use s->clock in case machine uses reliable
> -             * get clock and source host supported
> -             * reliable get clock
> -             */
> -            if (!s->src_use_reliable_get_clock) {
> -                pvclock_via_mem = kvmclock_current_nsec(s);
> +        /*
> +         * if last KVM_GET_CLOCK did not retrieve a reliable value,
> +         * we can't rely on the saved clock value.  Just discard it and
> +         * read kvmclock value from memory
> +         */
> +        if (!s->src_use_reliable_get_clock) {
> +            uint64_t pvclock_via_mem = kvmclock_current_nsec(s);
> +            if (pvclock_via_mem) {
> +                s->clock = pvclock_via_mem;
>              }
>          }
>  
> -        /* We can't rely on the saved clock value, just discard it */
> -        if (pvclock_via_mem) {
> -            s->clock = pvclock_via_mem;
> -        }
> -
>          s->clock_valid = false;
>  
>          data.clock = s->clock;
> @@ -181,16 +168,6 @@ static void kvmclock_realize(DeviceState *dev, Error **errp)
>  {
>      KVMClockState *s = KVM_CLOCK(dev);
>  
> -    /*
> -     * On machine types that support reliable KVM_GET_CLOCK,
> -     * if host kernel does provide reliable KVM_GET_CLOCK,
> -     * set src_use_reliable_get_clock=true so that destination
> -     * avoids reading kvmclock from memory.
> -     */
> -    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
> -        s->src_use_reliable_get_clock = true;
> -    }
> -
>      qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
>  }
>  

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
  2016-11-28 16:36                             ` [Qemu-devel] " Marcelo Tosatti
@ 2016-11-28 17:30                               ` Paolo Bonzini
  -1 siblings, 0 replies; 46+ messages in thread
From: Paolo Bonzini @ 2016-11-28 17:30 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost



On 28/11/2016 17:36, Marcelo Tosatti wrote:
> If the source masterclock is disabled, then the guest does
> not enable the optimization to not use a global variable 
> to guarantee monotonicity. Therefore there will be no 
> time backwards events (the timer backwards events crashed 
> guests, and are the reason for reading from guest memory).
> 
> So if there are no flaws in the reasoning above, 
> no, there is no need to read from memory if 
> masterclock is disabled.

Yeah, the reasoning is sound.  So you go from what Eduardo and I were
thinking:

    if last KVM_GET_CLOCK was not reliable then
        read from memory

to this:

    if last KVM_GET_CLOCK was not reliable && masterclock is enabled
        read from memory

but:

- on an old kernel, the left side is always true and the right side is
unknown (so we must assume it's true and read from memory)

- on a new kernel, the two sides of the "&&" are exactly the opposite,
so the result is always false

and then it becomes

    if old kernel then
        read from memory

Got it finally. :)

Paolo

> Can you state the reasons why you think it should be enabled?
> 

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [Qemu-devel] [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration
@ 2016-11-28 17:30                               ` Paolo Bonzini
  0 siblings, 0 replies; 46+ messages in thread
From: Paolo Bonzini @ 2016-11-28 17:30 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: kvm, qemu-devel, Dr. David Alan Gilbert, Juan Quintela,
	Radim Krcmar, Eduardo Habkost



On 28/11/2016 17:36, Marcelo Tosatti wrote:
> If the source masterclock is disabled, then the guest does
> not enable the optimization to not use a global variable 
> to guarantee monotonicity. Therefore there will be no 
> time backwards events (the timer backwards events crashed 
> guests, and are the reason for reading from guest memory).
> 
> So if there are no flaws in the reasoning above, 
> no, there is no need to read from memory if 
> masterclock is disabled.

Yeah, the reasoning is sound.  So you go from what Eduardo and I were
thinking:

    if last KVM_GET_CLOCK was not reliable then
        read from memory

to this:

    if last KVM_GET_CLOCK was not reliable && masterclock is enabled
        read from memory

but:

- on an old kernel, the left side is always true and the right side is
unknown (so we must assume it's true and read from memory)

- on a new kernel, the two sides of the "&&" are exactly the opposite,
so the result is always false

and then it becomes

    if old kernel then
        read from memory

Got it finally. :)

Paolo

> Can you state the reasons why you think it should be enabled?
> 

^ permalink raw reply	[flat|nested] 46+ messages in thread

end of thread, other threads:[~2016-11-28 17:30 UTC | newest]

Thread overview: 46+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-14 12:36 [qemu patch 0/2] improve kvmclock difference on migration Marcelo Tosatti
2016-11-14 12:36 ` [Qemu-devel] " Marcelo Tosatti
2016-11-14 12:36 ` [qemu patch 1/2] kvm: sync linux headers Marcelo Tosatti
2016-11-14 12:36   ` [Qemu-devel] " Marcelo Tosatti
2016-11-14 12:36 ` [qemu patch 2/2] kvmclock: reduce kvmclock difference on migration Marcelo Tosatti
2016-11-14 12:36   ` [Qemu-devel] " Marcelo Tosatti
2016-11-14 13:54   ` Paolo Bonzini
2016-11-14 13:54     ` [Qemu-devel] " Paolo Bonzini
2016-11-14 14:00     ` Marcelo Tosatti
2016-11-14 14:00       ` [Qemu-devel] " Marcelo Tosatti
2016-11-14 14:22       ` Paolo Bonzini
2016-11-14 14:22         ` [Qemu-devel] " Paolo Bonzini
2016-11-14 14:50         ` Marcelo Tosatti
2016-11-14 14:50           ` [Qemu-devel] " Marcelo Tosatti
2016-11-14 15:00           ` Paolo Bonzini
2016-11-14 15:00             ` [Qemu-devel] " Paolo Bonzini
2016-11-14 15:40             ` Marcelo Tosatti
2016-11-14 15:40               ` [Qemu-devel] " Marcelo Tosatti
2016-11-14 16:43               ` Paolo Bonzini
2016-11-14 16:43                 ` [Qemu-devel] " Paolo Bonzini
2016-11-14 17:13                 ` Marcelo Tosatti
2016-11-14 17:13                   ` [Qemu-devel] " Marcelo Tosatti
2016-11-14 17:20                   ` Paolo Bonzini
2016-11-14 17:20                     ` [Qemu-devel] " Paolo Bonzini
2016-11-14 18:15                     ` Marcelo Tosatti
2016-11-14 18:15                       ` [Qemu-devel] " Marcelo Tosatti
2016-11-17 12:16                       ` Marcelo Tosatti
2016-11-17 12:16                         ` [Qemu-devel] " Marcelo Tosatti
2016-11-17 13:03                         ` Paolo Bonzini
2016-11-17 13:03                           ` [Qemu-devel] " Paolo Bonzini
2016-11-28 13:47                         ` Paolo Bonzini
2016-11-28 13:47                           ` [Qemu-devel] " Paolo Bonzini
2016-11-28 14:28                           ` Eduardo Habkost
2016-11-28 14:28                             ` [Qemu-devel] " Eduardo Habkost
2016-11-28 15:12                             ` Paolo Bonzini
2016-11-28 15:12                               ` [Qemu-devel] " Paolo Bonzini
2016-11-28 16:36                           ` Marcelo Tosatti
2016-11-28 16:36                             ` [Qemu-devel] " Marcelo Tosatti
2016-11-28 17:30                             ` Paolo Bonzini
2016-11-28 17:30                               ` [Qemu-devel] " Paolo Bonzini
2016-11-14 14:11     ` Juan Quintela
2016-11-14 14:11       ` [Qemu-devel] " Juan Quintela
2016-11-14 14:09   ` Juan Quintela
2016-11-14 14:09     ` [Qemu-devel] " Juan Quintela
2016-11-14 15:37     ` Marcelo Tosatti
2016-11-14 15:37       ` [Qemu-devel] " Marcelo Tosatti

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.