All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH v2 0/2]  Hyper-V timers
@ 2013-05-19  7:06 Vadim Rozenfeld
  2013-05-19  7:06 ` [RFC PATCH v2 1/2] add support for Hyper-V reference time counter Vadim Rozenfeld
  2013-05-19  7:06 ` [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC Vadim Rozenfeld
  0 siblings, 2 replies; 44+ messages in thread
From: Vadim Rozenfeld @ 2013-05-19  7:06 UTC (permalink / raw)
  To: kvm; +Cc: gleb, mtosatti, pl, Vadim Rozenfeld

This RFC series adds support for two Hyper-V timer services -
a per-partition reference time counter, and a  partition reference 
time enlightenment.

Vadim Rozenfeld (2):
  add support for Hyper-V reference time counter
  add support for Hyper-V invariant TSC

 arch/x86/include/asm/kvm_host.h    |  2 ++
 arch/x86/include/uapi/asm/hyperv.h | 14 +++++++++++++
 arch/x86/kvm/x86.c                 | 43 +++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/kvm.h           |  1 +
 4 files changed, 59 insertions(+), 1 deletion(-)

-- 
1.8.1.2


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
  2013-05-19  7:06 [RFC PATCH v2 0/2] Hyper-V timers Vadim Rozenfeld
@ 2013-05-19  7:06 ` Vadim Rozenfeld
  2013-05-19 13:47   ` Gleb Natapov
                     ` (2 more replies)
  2013-05-19  7:06 ` [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC Vadim Rozenfeld
  1 sibling, 3 replies; 44+ messages in thread
From: Vadim Rozenfeld @ 2013-05-19  7:06 UTC (permalink / raw)
  To: kvm; +Cc: gleb, mtosatti, pl, Vadim Rozenfeld

Signed-off: Peter Lieven <pl@dlh.net>
Signed-off: Gleb Natapov <gleb@redhat.com>
Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>

v1 -> v2
1. mark TSC page dirty as suggested by 
    Eric Northup <digitaleric@google.com> and Gleb
2. disable local irq when calling get_kernel_ns, 
    as it was done by Peter Lieven <pl@dlhnet.de>
3. move check for TSC page enable from second patch
    to this one.
 
---
 arch/x86/include/asm/kvm_host.h    |  2 ++
 arch/x86/include/uapi/asm/hyperv.h | 14 ++++++++++++++
 arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/kvm.h           |  1 +
 4 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3741c65..f0fee35 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -575,6 +575,8 @@ struct kvm_arch {
 	/* fields used by HYPER-V emulation */
 	u64 hv_guest_os_id;
 	u64 hv_hypercall;
+	u64 hv_ref_count;
+	u64 hv_tsc_page;
 
 	#ifdef CONFIG_KVM_MMU_AUDIT
 	int audit_point;
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index b80420b..890dfc3 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -136,6 +136,9 @@
 /* MSR used to read the per-partition time reference counter */
 #define HV_X64_MSR_TIME_REF_COUNT		0x40000020
 
+/* A partition's reference time stamp counter (TSC) page */
+#define HV_X64_MSR_REFERENCE_TSC		0x40000021
+
 /* Define the virtual APIC registers */
 #define HV_X64_MSR_EOI				0x40000070
 #define HV_X64_MSR_ICR				0x40000071
@@ -179,6 +182,9 @@
 #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
 		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
 
+#define HV_X64_MSR_TSC_REFERENCE_ENABLE			0x00000001
+#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
+
 #define HV_PROCESSOR_POWER_STATE_C0		0
 #define HV_PROCESSOR_POWER_STATE_C1		1
 #define HV_PROCESSOR_POWER_STATE_C2		2
@@ -191,4 +197,12 @@
 #define HV_STATUS_INVALID_ALIGNMENT		4
 #define HV_STATUS_INSUFFICIENT_BUFFERS		19
 
+typedef struct _HV_REFERENCE_TSC_PAGE {
+	__u32 TscSequence;
+	__u32 Rserved1;
+	__u64 TscScale;
+	__s64  TscOffset;
+} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
+
+
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8d28810..9645dab 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -843,7 +843,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
 static u32 msrs_to_save[] = {
 	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
 	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
-	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
+	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
 	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
 	MSR_KVM_PV_EOI_EN,
 	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
@@ -1788,6 +1788,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
 	switch (msr) {
 	case HV_X64_MSR_GUEST_OS_ID:
 	case HV_X64_MSR_HYPERCALL:
+	case HV_X64_MSR_REFERENCE_TSC:
+	case HV_X64_MSR_TIME_REF_COUNT:
 		r = true;
 		break;
 	}
@@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		if (__copy_to_user((void __user *)addr, instructions, 4))
 			return 1;
 		kvm->arch.hv_hypercall = data;
+		local_irq_disable();
+		kvm->arch.hv_ref_count = get_kernel_ns();
+		local_irq_enable();
+		break;
+	}
+	case HV_X64_MSR_REFERENCE_TSC: {
+		u64 gfn;
+		unsigned long addr;
+		HV_REFERENCE_TSC_PAGE tsc_ref;
+		tsc_ref.TscSequence = 0;
+		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
+			kvm->arch.hv_tsc_page = data;
+			break;
+		}
+		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
+		addr = gfn_to_hva(kvm, data >>
+				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
+		if (kvm_is_error_hva(addr))
+			return 1;
+		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
+			return 1;
+		mark_page_dirty(kvm, gfn);
+		kvm->arch.hv_tsc_page = data;
 		break;
 	}
 	default:
@@ -2253,6 +2278,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case HV_X64_MSR_HYPERCALL:
 		data = kvm->arch.hv_hypercall;
 		break;
+	case HV_X64_MSR_TIME_REF_COUNT: {
+		u64 now_ns;
+		local_irq_disable();
+		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
+		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
+		local_irq_enable();
+		break;
+	}
+	case HV_X64_MSR_REFERENCE_TSC:
+		data = kvm->arch.hv_tsc_page;
+		break;
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
 		return 1;
@@ -2566,6 +2602,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_PCI_2_3:
 #endif
+	case KVM_CAP_HYPERV_TIME:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a5c86fc..0810763 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -666,6 +666,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_IRQ_MPIC 90
 #define KVM_CAP_PPC_RTAS 91
 #define KVM_CAP_IRQ_XICS 92
+#define KVM_CAP_HYPERV_TIME 93
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
1.8.1.2


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-19  7:06 [RFC PATCH v2 0/2] Hyper-V timers Vadim Rozenfeld
  2013-05-19  7:06 ` [RFC PATCH v2 1/2] add support for Hyper-V reference time counter Vadim Rozenfeld
@ 2013-05-19  7:06 ` Vadim Rozenfeld
  2013-05-22  0:50   ` Marcelo Tosatti
  2013-05-23 16:44   ` Paolo Bonzini
  1 sibling, 2 replies; 44+ messages in thread
From: Vadim Rozenfeld @ 2013-05-19  7:06 UTC (permalink / raw)
  To: kvm; +Cc: gleb, mtosatti, pl, Vadim Rozenfeld

The following patch allows to activate a partition reference 
time enlightenment that is based on the host platform's support 
for an Invariant Time Stamp Counter (iTSC).
NOTE: This code will survive migration due to lack of VM stop/resume
handlers, when offset, scale and sequence should be
readjusted. 

---
 arch/x86/kvm/x86.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9645dab..b423fe4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1838,7 +1838,6 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		u64 gfn;
 		unsigned long addr;
 		HV_REFERENCE_TSC_PAGE tsc_ref;
-		tsc_ref.TscSequence = 0;
 		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
 			kvm->arch.hv_tsc_page = data;
 			break;
@@ -1848,6 +1847,11 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
 		if (kvm_is_error_hva(addr))
 			return 1;
+		tsc_ref.TscSequence =
+				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
+		tsc_ref.TscScale =
+				((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32;
+		tsc_ref.TscOffset = 0;
 		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
 			return 1;
 		mark_page_dirty(kvm, gfn);
-- 
1.8.1.2


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
  2013-05-19  7:06 ` [RFC PATCH v2 1/2] add support for Hyper-V reference time counter Vadim Rozenfeld
@ 2013-05-19 13:47   ` Gleb Natapov
  2013-05-19 14:37   ` Paolo Bonzini
  2013-05-22  0:46   ` Marcelo Tosatti
  2 siblings, 0 replies; 44+ messages in thread
From: Gleb Natapov @ 2013-05-19 13:47 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: kvm, mtosatti, pl

On Sun, May 19, 2013 at 05:06:36PM +1000, Vadim Rozenfeld wrote:
> Signed-off: Peter Lieven <pl@dlh.net>
> Signed-off: Gleb Natapov <gleb@redhat.com>
> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> 
> v1 -> v2
> 1. mark TSC page dirty as suggested by 
>     Eric Northup <digitaleric@google.com> and Gleb
> 2. disable local irq when calling get_kernel_ns, 
>     as it was done by Peter Lieven <pl@dlhnet.de>
> 3. move check for TSC page enable from second patch
>     to this one.
>  
> ---
>  arch/x86/include/asm/kvm_host.h    |  2 ++
>  arch/x86/include/uapi/asm/hyperv.h | 14 ++++++++++++++
>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>  include/uapi/linux/kvm.h           |  1 +
>  4 files changed, 55 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 3741c65..f0fee35 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -575,6 +575,8 @@ struct kvm_arch {
>  	/* fields used by HYPER-V emulation */
>  	u64 hv_guest_os_id;
>  	u64 hv_hypercall;
> +	u64 hv_ref_count;
> +	u64 hv_tsc_page;
>  
>  	#ifdef CONFIG_KVM_MMU_AUDIT
>  	int audit_point;
> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> index b80420b..890dfc3 100644
> --- a/arch/x86/include/uapi/asm/hyperv.h
> +++ b/arch/x86/include/uapi/asm/hyperv.h
> @@ -136,6 +136,9 @@
>  /* MSR used to read the per-partition time reference counter */
>  #define HV_X64_MSR_TIME_REF_COUNT		0x40000020
>  
> +/* A partition's reference time stamp counter (TSC) page */
> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> +
>  /* Define the virtual APIC registers */
>  #define HV_X64_MSR_EOI				0x40000070
>  #define HV_X64_MSR_ICR				0x40000071
> @@ -179,6 +182,9 @@
>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>  
> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE			0x00000001
> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> +
>  #define HV_PROCESSOR_POWER_STATE_C0		0
>  #define HV_PROCESSOR_POWER_STATE_C1		1
>  #define HV_PROCESSOR_POWER_STATE_C2		2
> @@ -191,4 +197,12 @@
>  #define HV_STATUS_INVALID_ALIGNMENT		4
>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>  
> +typedef struct _HV_REFERENCE_TSC_PAGE {
> +	__u32 TscSequence;
> +	__u32 Rserved1;
> +	__u64 TscScale;
> +	__s64  TscOffset;
> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> +
I understand that you want to keep structure definition as close to MS
docs as possible, but this breaks all the rules of Linux kernel code
unfortunately. Lest define it like that:

struct hv_reference_tsc_page {
	__u32 tsc_sequence;
	__u32 reserved1;
	__u64 tsc_scale;
	__s64 tsc_offset;
};

And drop typedef. If you'll look at include/linux/hyperv.h this is what
Linux hyper-v guest support does.

> +
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 8d28810..9645dab 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -843,7 +843,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>  static u32 msrs_to_save[] = {
>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>  	MSR_KVM_PV_EOI_EN,
>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> @@ -1788,6 +1788,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>  	switch (msr) {
>  	case HV_X64_MSR_GUEST_OS_ID:
>  	case HV_X64_MSR_HYPERCALL:
> +	case HV_X64_MSR_REFERENCE_TSC:
> +	case HV_X64_MSR_TIME_REF_COUNT:
>  		r = true;
>  		break;
>  	}
> @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  		if (__copy_to_user((void __user *)addr, instructions, 4))
>  			return 1;
>  		kvm->arch.hv_hypercall = data;
> +		local_irq_disable();
> +		kvm->arch.hv_ref_count = get_kernel_ns();
> +		local_irq_enable();
> +		break;
> +	}
> +	case HV_X64_MSR_REFERENCE_TSC: {
> +		u64 gfn;
> +		unsigned long addr;
> +		HV_REFERENCE_TSC_PAGE tsc_ref;
> +		tsc_ref.TscSequence = 0;
> +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> +			kvm->arch.hv_tsc_page = data;
> +			break;
> +		}
> +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> +		addr = gfn_to_hva(kvm, data >>
> +				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);

		addr = gfn_to_hva(kvm, gfn);

You already calculated gfn :)

> +		if (kvm_is_error_hva(addr))
> +			return 1;
> +		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
> +			return 1;
> +		mark_page_dirty(kvm, gfn);
> +		kvm->arch.hv_tsc_page = data;
>  		break;
>  	}
>  	default:
> @@ -2253,6 +2278,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>  	case HV_X64_MSR_HYPERCALL:
>  		data = kvm->arch.hv_hypercall;
>  		break;
> +	case HV_X64_MSR_TIME_REF_COUNT: {
> +		u64 now_ns;
> +		local_irq_disable();
> +		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
> +		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
> +		local_irq_enable();
> +		break;
> +	}
> +	case HV_X64_MSR_REFERENCE_TSC:
> +		data = kvm->arch.hv_tsc_page;
> +		break;
>  	default:
>  		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
>  		return 1;
> @@ -2566,6 +2602,7 @@ int kvm_dev_ioctl_check_extension(long ext)
>  	case KVM_CAP_ASSIGN_DEV_IRQ:
>  	case KVM_CAP_PCI_2_3:
>  #endif
> +	case KVM_CAP_HYPERV_TIME:
>  		r = 1;
>  		break;
>  	case KVM_CAP_COALESCED_MMIO:
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index a5c86fc..0810763 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -666,6 +666,7 @@ struct kvm_ppc_smmu_info {
>  #define KVM_CAP_IRQ_MPIC 90
>  #define KVM_CAP_PPC_RTAS 91
>  #define KVM_CAP_IRQ_XICS 92
> +#define KVM_CAP_HYPERV_TIME 93
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> -- 
> 1.8.1.2

--
			Gleb.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
  2013-05-19  7:06 ` [RFC PATCH v2 1/2] add support for Hyper-V reference time counter Vadim Rozenfeld
  2013-05-19 13:47   ` Gleb Natapov
@ 2013-05-19 14:37   ` Paolo Bonzini
  2013-05-22  0:46   ` Marcelo Tosatti
  2 siblings, 0 replies; 44+ messages in thread
From: Paolo Bonzini @ 2013-05-19 14:37 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: kvm, gleb, mtosatti, pl

Il 19/05/2013 09:06, Vadim Rozenfeld ha scritto:
> Signed-off: Peter Lieven <pl@dlh.net>
> Signed-off: Gleb Natapov <gleb@redhat.com>
> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> 
> v1 -> v2
> 1. mark TSC page dirty as suggested by 
>     Eric Northup <digitaleric@google.com> and Gleb
> 2. disable local irq when calling get_kernel_ns, 
>     as it was done by Peter Lieven <pl@dlhnet.de>
> 3. move check for TSC page enable from second patch
>     to this one.
>  
> ---
>  arch/x86/include/asm/kvm_host.h    |  2 ++
>  arch/x86/include/uapi/asm/hyperv.h | 14 ++++++++++++++
>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>  include/uapi/linux/kvm.h           |  1 +
>  4 files changed, 55 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 3741c65..f0fee35 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -575,6 +575,8 @@ struct kvm_arch {
>  	/* fields used by HYPER-V emulation */
>  	u64 hv_guest_os_id;
>  	u64 hv_hypercall;
> +	u64 hv_ref_count;
> +	u64 hv_tsc_page;
>  
>  	#ifdef CONFIG_KVM_MMU_AUDIT
>  	int audit_point;
> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> index b80420b..890dfc3 100644
> --- a/arch/x86/include/uapi/asm/hyperv.h
> +++ b/arch/x86/include/uapi/asm/hyperv.h
> @@ -136,6 +136,9 @@
>  /* MSR used to read the per-partition time reference counter */
>  #define HV_X64_MSR_TIME_REF_COUNT		0x40000020
>  
> +/* A partition's reference time stamp counter (TSC) page */
> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> +
>  /* Define the virtual APIC registers */
>  #define HV_X64_MSR_EOI				0x40000070
>  #define HV_X64_MSR_ICR				0x40000071
> @@ -179,6 +182,9 @@
>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>  
> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE			0x00000001
> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> +
>  #define HV_PROCESSOR_POWER_STATE_C0		0
>  #define HV_PROCESSOR_POWER_STATE_C1		1
>  #define HV_PROCESSOR_POWER_STATE_C2		2
> @@ -191,4 +197,12 @@
>  #define HV_STATUS_INVALID_ALIGNMENT		4
>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>  
> +typedef struct _HV_REFERENCE_TSC_PAGE {
> +	__u32 TscSequence;
> +	__u32 Rserved1;
> +	__u64 TscScale;
> +	__s64  TscOffset;
> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> +
> +
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 8d28810..9645dab 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -843,7 +843,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>  static u32 msrs_to_save[] = {
>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>  	MSR_KVM_PV_EOI_EN,
>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> @@ -1788,6 +1788,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>  	switch (msr) {
>  	case HV_X64_MSR_GUEST_OS_ID:
>  	case HV_X64_MSR_HYPERCALL:
> +	case HV_X64_MSR_REFERENCE_TSC:
> +	case HV_X64_MSR_TIME_REF_COUNT:
>  		r = true;
>  		break;
>  	}
> @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  		if (__copy_to_user((void __user *)addr, instructions, 4))
>  			return 1;
>  		kvm->arch.hv_hypercall = data;
> +		local_irq_disable();
> +		kvm->arch.hv_ref_count = get_kernel_ns();
> +		local_irq_enable();
> +		break;
> +	}
> +	case HV_X64_MSR_REFERENCE_TSC: {
> +		u64 gfn;
> +		unsigned long addr;
> +		HV_REFERENCE_TSC_PAGE tsc_ref;
> +		tsc_ref.TscSequence = 0;

Again, please explain why this is not 0xFFFFFFFF.

Paolo

> +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> +			kvm->arch.hv_tsc_page = data;
> +			break;
> +		}
> +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> +		addr = gfn_to_hva(kvm, data >>
> +				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> +		if (kvm_is_error_hva(addr))
> +			return 1;
> +		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
> +			return 1;
> +		mark_page_dirty(kvm, gfn);
> +		kvm->arch.hv_tsc_page = data;
>  		break;
>  	}
>  	default:
> @@ -2253,6 +2278,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>  	case HV_X64_MSR_HYPERCALL:
>  		data = kvm->arch.hv_hypercall;
>  		break;
> +	case HV_X64_MSR_TIME_REF_COUNT: {
> +		u64 now_ns;
> +		local_irq_disable();
> +		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
> +		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
> +		local_irq_enable();
> +		break;
> +	}
> +	case HV_X64_MSR_REFERENCE_TSC:
> +		data = kvm->arch.hv_tsc_page;
> +		break;
>  	default:
>  		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
>  		return 1;
> @@ -2566,6 +2602,7 @@ int kvm_dev_ioctl_check_extension(long ext)
>  	case KVM_CAP_ASSIGN_DEV_IRQ:
>  	case KVM_CAP_PCI_2_3:
>  #endif
> +	case KVM_CAP_HYPERV_TIME:
>  		r = 1;
>  		break;
>  	case KVM_CAP_COALESCED_MMIO:
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index a5c86fc..0810763 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -666,6 +666,7 @@ struct kvm_ppc_smmu_info {
>  #define KVM_CAP_IRQ_MPIC 90
>  #define KVM_CAP_PPC_RTAS 91
>  #define KVM_CAP_IRQ_XICS 92
> +#define KVM_CAP_HYPERV_TIME 93
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> 


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
  2013-05-19  7:06 ` [RFC PATCH v2 1/2] add support for Hyper-V reference time counter Vadim Rozenfeld
  2013-05-19 13:47   ` Gleb Natapov
  2013-05-19 14:37   ` Paolo Bonzini
@ 2013-05-22  0:46   ` Marcelo Tosatti
  2013-05-22  3:28     ` Gleb Natapov
  2013-05-22  7:32     ` Vadim Rozenfeld
  2 siblings, 2 replies; 44+ messages in thread
From: Marcelo Tosatti @ 2013-05-22  0:46 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: kvm, gleb, pl

On Sun, May 19, 2013 at 05:06:36PM +1000, Vadim Rozenfeld wrote:
> Signed-off: Peter Lieven <pl@dlh.net>
> Signed-off: Gleb Natapov <gleb@redhat.com>
> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> 
> v1 -> v2
> 1. mark TSC page dirty as suggested by 
>     Eric Northup <digitaleric@google.com> and Gleb
> 2. disable local irq when calling get_kernel_ns, 
>     as it was done by Peter Lieven <pl@dlhnet.de>
> 3. move check for TSC page enable from second patch
>     to this one.
>  
> ---
>  arch/x86/include/asm/kvm_host.h    |  2 ++
>  arch/x86/include/uapi/asm/hyperv.h | 14 ++++++++++++++
>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>  include/uapi/linux/kvm.h           |  1 +
>  4 files changed, 55 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 3741c65..f0fee35 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -575,6 +575,8 @@ struct kvm_arch {
>  	/* fields used by HYPER-V emulation */
>  	u64 hv_guest_os_id;
>  	u64 hv_hypercall;
> +	u64 hv_ref_count;
> +	u64 hv_tsc_page;
>  
>  	#ifdef CONFIG_KVM_MMU_AUDIT
>  	int audit_point;
> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> index b80420b..890dfc3 100644
> --- a/arch/x86/include/uapi/asm/hyperv.h
> +++ b/arch/x86/include/uapi/asm/hyperv.h
> @@ -136,6 +136,9 @@
>  /* MSR used to read the per-partition time reference counter */
>  #define HV_X64_MSR_TIME_REF_COUNT		0x40000020
>  
> +/* A partition's reference time stamp counter (TSC) page */
> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> +
>  /* Define the virtual APIC registers */
>  #define HV_X64_MSR_EOI				0x40000070
>  #define HV_X64_MSR_ICR				0x40000071
> @@ -179,6 +182,9 @@
>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>  
> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE			0x00000001
> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> +
>  #define HV_PROCESSOR_POWER_STATE_C0		0
>  #define HV_PROCESSOR_POWER_STATE_C1		1
>  #define HV_PROCESSOR_POWER_STATE_C2		2
> @@ -191,4 +197,12 @@
>  #define HV_STATUS_INVALID_ALIGNMENT		4
>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>  
> +typedef struct _HV_REFERENCE_TSC_PAGE {
> +	__u32 TscSequence;
> +	__u32 Rserved1;
> +	__u64 TscScale;
> +	__s64  TscOffset;
> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> +
> +
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 8d28810..9645dab 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -843,7 +843,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>  static u32 msrs_to_save[] = {
>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, not needed.
> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>  	MSR_KVM_PV_EOI_EN,
>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> @@ -1788,6 +1788,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>  	switch (msr) {
>  	case HV_X64_MSR_GUEST_OS_ID:
>  	case HV_X64_MSR_HYPERCALL:
> +	case HV_X64_MSR_REFERENCE_TSC:
> +	case HV_X64_MSR_TIME_REF_COUNT:
>  		r = true;
>  		break;
>  	}
> @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  		if (__copy_to_user((void __user *)addr, instructions, 4))
>  			return 1;
>  		kvm->arch.hv_hypercall = data;
> +		local_irq_disable();
> +		kvm->arch.hv_ref_count = get_kernel_ns();
> +		local_irq_enable();
> +		break;

local_irq_disable/local_irq_enable not needed.

What is the reasoning behind reading this time value at msr write time?

> +	}
> +	case HV_X64_MSR_REFERENCE_TSC: {
> +		u64 gfn;
> +		unsigned long addr;
> +		HV_REFERENCE_TSC_PAGE tsc_ref;
> +		tsc_ref.TscSequence = 0;
> +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> +			kvm->arch.hv_tsc_page = data;
> +			break;
> +		}
> +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> +		addr = gfn_to_hva(kvm, data >>
> +				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> +		if (kvm_is_error_hva(addr))
> +			return 1;
> +		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
> +			return 1;
> +		mark_page_dirty(kvm, gfn);
> +		kvm->arch.hv_tsc_page = data;
>  		break;
>  	}
>  	default:
> @@ -2253,6 +2278,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>  	case HV_X64_MSR_HYPERCALL:
>  		data = kvm->arch.hv_hypercall;
>  		break;
> +	case HV_X64_MSR_TIME_REF_COUNT: {
> +		u64 now_ns;
> +		local_irq_disable();
> +		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
> +		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
> +		local_irq_enable();
> +		break;
> +	}

local_irq_disable/enable not needed.

It would be nice to have a testcase to compare reference tsc versus MSR.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-19  7:06 ` [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC Vadim Rozenfeld
@ 2013-05-22  0:50   ` Marcelo Tosatti
  2013-05-22  7:22     ` Vadim Rozenfeld
  2013-05-23 16:44   ` Paolo Bonzini
  1 sibling, 1 reply; 44+ messages in thread
From: Marcelo Tosatti @ 2013-05-22  0:50 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: kvm, gleb, pl

On Sun, May 19, 2013 at 05:06:37PM +1000, Vadim Rozenfeld wrote:
> The following patch allows to activate a partition reference 
> time enlightenment that is based on the host platform's support 
> for an Invariant Time Stamp Counter (iTSC).
> NOTE: This code will survive migration due to lack of VM stop/resume
> handlers, when offset, scale and sequence should be
> readjusted. 
> 
> ---
>  arch/x86/kvm/x86.c | 6 +++++-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 9645dab..b423fe4 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1838,7 +1838,6 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  		u64 gfn;
>  		unsigned long addr;
>  		HV_REFERENCE_TSC_PAGE tsc_ref;
> -		tsc_ref.TscSequence = 0;
>  		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
>  			kvm->arch.hv_tsc_page = data;
>  			break;
> @@ -1848,6 +1847,11 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
>  		if (kvm_is_error_hva(addr))
>  			return 1;
> +		tsc_ref.TscSequence =
> +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;

1) You want NONSTOP_TSC (see 40fb1715 commit) which matches INVARIANT TSC.
2) TscSequence should increase? 
"This field serves as a sequence number that is incremented whenever..."
3) 0xFFFFFFFF is the value for invalid source of reference time?

> +		tsc_ref.TscScale =
> +				((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32;
> +		tsc_ref.TscOffset = 0;
>  		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
>  			return 1;
>  		mark_page_dirty(kvm, gfn);
> -- 
> 1.8.1.2

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
  2013-05-22  0:46   ` Marcelo Tosatti
@ 2013-05-22  3:28     ` Gleb Natapov
  2013-05-22  3:32       ` Marcelo Tosatti
  2013-05-22  7:32     ` Vadim Rozenfeld
  1 sibling, 1 reply; 44+ messages in thread
From: Gleb Natapov @ 2013-05-22  3:28 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Vadim Rozenfeld, kvm, pl

On Tue, May 21, 2013 at 09:46:14PM -0300, Marcelo Tosatti wrote:
> On Sun, May 19, 2013 at 05:06:36PM +1000, Vadim Rozenfeld wrote:
> > Signed-off: Peter Lieven <pl@dlh.net>
> > Signed-off: Gleb Natapov <gleb@redhat.com>
> > Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> > 
> > v1 -> v2
> > 1. mark TSC page dirty as suggested by 
> >     Eric Northup <digitaleric@google.com> and Gleb
> > 2. disable local irq when calling get_kernel_ns, 
> >     as it was done by Peter Lieven <pl@dlhnet.de>
> > 3. move check for TSC page enable from second patch
> >     to this one.
> >  
> > ---
> >  arch/x86/include/asm/kvm_host.h    |  2 ++
> >  arch/x86/include/uapi/asm/hyperv.h | 14 ++++++++++++++
> >  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
> >  include/uapi/linux/kvm.h           |  1 +
> >  4 files changed, 55 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 3741c65..f0fee35 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -575,6 +575,8 @@ struct kvm_arch {
> >  	/* fields used by HYPER-V emulation */
> >  	u64 hv_guest_os_id;
> >  	u64 hv_hypercall;
> > +	u64 hv_ref_count;
> > +	u64 hv_tsc_page;
> >  
> >  	#ifdef CONFIG_KVM_MMU_AUDIT
> >  	int audit_point;
> > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> > index b80420b..890dfc3 100644
> > --- a/arch/x86/include/uapi/asm/hyperv.h
> > +++ b/arch/x86/include/uapi/asm/hyperv.h
> > @@ -136,6 +136,9 @@
> >  /* MSR used to read the per-partition time reference counter */
> >  #define HV_X64_MSR_TIME_REF_COUNT		0x40000020
> >  
> > +/* A partition's reference time stamp counter (TSC) page */
> > +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> > +
> >  /* Define the virtual APIC registers */
> >  #define HV_X64_MSR_EOI				0x40000070
> >  #define HV_X64_MSR_ICR				0x40000071
> > @@ -179,6 +182,9 @@
> >  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
> >  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
> >  
> > +#define HV_X64_MSR_TSC_REFERENCE_ENABLE			0x00000001
> > +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> > +
> >  #define HV_PROCESSOR_POWER_STATE_C0		0
> >  #define HV_PROCESSOR_POWER_STATE_C1		1
> >  #define HV_PROCESSOR_POWER_STATE_C2		2
> > @@ -191,4 +197,12 @@
> >  #define HV_STATUS_INVALID_ALIGNMENT		4
> >  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
> >  
> > +typedef struct _HV_REFERENCE_TSC_PAGE {
> > +	__u32 TscSequence;
> > +	__u32 Rserved1;
> > +	__u64 TscScale;
> > +	__s64  TscOffset;
> > +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> > +
> > +
> >  #endif
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 8d28810..9645dab 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -843,7 +843,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
> >  static u32 msrs_to_save[] = {
> >  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
> >  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, not needed.
> > -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> > +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
> >  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
> >  	MSR_KVM_PV_EOI_EN,
> >  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> > @@ -1788,6 +1788,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
> >  	switch (msr) {
> >  	case HV_X64_MSR_GUEST_OS_ID:
> >  	case HV_X64_MSR_HYPERCALL:
> > +	case HV_X64_MSR_REFERENCE_TSC:
> > +	case HV_X64_MSR_TIME_REF_COUNT:
> >  		r = true;
> >  		break;
> >  	}
> > @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >  		if (__copy_to_user((void __user *)addr, instructions, 4))
> >  			return 1;
> >  		kvm->arch.hv_hypercall = data;
> > +		local_irq_disable();
> > +		kvm->arch.hv_ref_count = get_kernel_ns();
> > +		local_irq_enable();
> > +		break;
> 
> local_irq_disable/local_irq_enable not needed.
>
get_kernel_ns has WARN_ON(preemptible()) so at least preempt_disable()
is needed, but all callers of the function disable interrupts.

> What is the reasoning behind reading this time value at msr write time?
> 
> > +	}
> > +	case HV_X64_MSR_REFERENCE_TSC: {
> > +		u64 gfn;
> > +		unsigned long addr;
> > +		HV_REFERENCE_TSC_PAGE tsc_ref;
> > +		tsc_ref.TscSequence = 0;
> > +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> > +			kvm->arch.hv_tsc_page = data;
> > +			break;
> > +		}
> > +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> > +		addr = gfn_to_hva(kvm, data >>
> > +				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> > +		if (kvm_is_error_hva(addr))
> > +			return 1;
> > +		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
> > +			return 1;
> > +		mark_page_dirty(kvm, gfn);
> > +		kvm->arch.hv_tsc_page = data;
> >  		break;
> >  	}
> >  	default:
> > @@ -2253,6 +2278,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> >  	case HV_X64_MSR_HYPERCALL:
> >  		data = kvm->arch.hv_hypercall;
> >  		break;
> > +	case HV_X64_MSR_TIME_REF_COUNT: {
> > +		u64 now_ns;
> > +		local_irq_disable();
> > +		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
> > +		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
> > +		local_irq_enable();
> > +		break;
> > +	}
> 
> local_irq_disable/enable not needed.
> 
> It would be nice to have a testcase to compare reference tsc versus MSR.

--
			Gleb.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
  2013-05-22  3:28     ` Gleb Natapov
@ 2013-05-22  3:32       ` Marcelo Tosatti
  2013-05-22  3:38         ` Gleb Natapov
  0 siblings, 1 reply; 44+ messages in thread
From: Marcelo Tosatti @ 2013-05-22  3:32 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Vadim Rozenfeld, kvm, pl

On Wed, May 22, 2013 at 06:28:47AM +0300, Gleb Natapov wrote
> > > +	case HV_X64_MSR_TIME_REF_COUNT:
> > >  		r = true;
> > >  		break;
> > >  	}
> > > @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> > >  		if (__copy_to_user((void __user *)addr, instructions, 4))
> > >  			return 1;
> > >  		kvm->arch.hv_hypercall = data;
> > > +		local_irq_disable();
> > > +		kvm->arch.hv_ref_count = get_kernel_ns();
> > > +		local_irq_enable();
> > > +		break;
> > 
> > local_irq_disable/local_irq_enable not needed.
> >
> get_kernel_ns has WARN_ON(preemptible()) so at least preempt_disable()
> is needed, but all callers of the function disable interrupts.

OK. Its not necessary to disable interrupts (this is a left over of
kvm_guest_update_time).

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
  2013-05-22  3:32       ` Marcelo Tosatti
@ 2013-05-22  3:38         ` Gleb Natapov
  2013-05-22 14:31           ` Marcelo Tosatti
  0 siblings, 1 reply; 44+ messages in thread
From: Gleb Natapov @ 2013-05-22  3:38 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Vadim Rozenfeld, kvm, pl

On Wed, May 22, 2013 at 12:32:57AM -0300, Marcelo Tosatti wrote:
> On Wed, May 22, 2013 at 06:28:47AM +0300, Gleb Natapov wrote
> > > > +	case HV_X64_MSR_TIME_REF_COUNT:
> > > >  		r = true;
> > > >  		break;
> > > >  	}
> > > > @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> > > >  		if (__copy_to_user((void __user *)addr, instructions, 4))
> > > >  			return 1;
> > > >  		kvm->arch.hv_hypercall = data;
> > > > +		local_irq_disable();
> > > > +		kvm->arch.hv_ref_count = get_kernel_ns();
> > > > +		local_irq_enable();
> > > > +		break;
> > > 
> > > local_irq_disable/local_irq_enable not needed.
> > >
> > get_kernel_ns has WARN_ON(preemptible()) so at least preempt_disable()
> > is needed, but all callers of the function disable interrupts.
> 
> OK. Its not necessary to disable interrupts (this is a left over of
> kvm_guest_update_time).
So preempt_disable() then? Should we change other callers?

--
			Gleb.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-22  0:50   ` Marcelo Tosatti
@ 2013-05-22  7:22     ` Vadim Rozenfeld
  2013-05-22 21:23       ` Marcelo Tosatti
  0 siblings, 1 reply; 44+ messages in thread
From: Vadim Rozenfeld @ 2013-05-22  7:22 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, gleb, pl



----- Original Message -----
From: "Marcelo Tosatti" <mtosatti@redhat.com>
To: "Vadim Rozenfeld" <vrozenfe@redhat.com>
Cc: kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
Sent: Wednesday, May 22, 2013 10:50:46 AM
Subject: Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC

On Sun, May 19, 2013 at 05:06:37PM +1000, Vadim Rozenfeld wrote:
> The following patch allows to activate a partition reference 
> time enlightenment that is based on the host platform's support 
> for an Invariant Time Stamp Counter (iTSC).
> NOTE: This code will survive migration due to lack of VM stop/resume
> handlers, when offset, scale and sequence should be
> readjusted. 
> 
> ---
>  arch/x86/kvm/x86.c | 6 +++++-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 9645dab..b423fe4 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1838,7 +1838,6 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  		u64 gfn;
>  		unsigned long addr;
>  		HV_REFERENCE_TSC_PAGE tsc_ref;
> -		tsc_ref.TscSequence = 0;
>  		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
>  			kvm->arch.hv_tsc_page = data;
>  			break;
> @@ -1848,6 +1847,11 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
>  		if (kvm_is_error_hva(addr))
>  			return 1;
> +		tsc_ref.TscSequence =
> +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;

1) You want NONSTOP_TSC (see 40fb1715 commit) which matches INVARIANT TSC.
[VR]
Thank you for reviewing. Will fix it.
2) TscSequence should increase? 
"This field serves as a sequence number that is incremented whenever..."
[VR]
Yes, on every VM resume, including migration. After migration we also need
to recalculate scale and adjust offset. 
3) 0xFFFFFFFF is the value for invalid source of reference time?
[VR] Yes, on boot-up. In this case guest will go with PMTimer (not sure about HPET
but I can check). But if we set sequence to 0xFFFFFFFF after migration - it's probably will not work.

> +		tsc_ref.TscScale =
> +				((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32;
> +		tsc_ref.TscOffset = 0;
>  		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
>  			return 1;
>  		mark_page_dirty(kvm, gfn);
> -- 
> 1.8.1.2

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
  2013-05-22  0:46   ` Marcelo Tosatti
  2013-05-22  3:28     ` Gleb Natapov
@ 2013-05-22  7:32     ` Vadim Rozenfeld
  2013-05-22 21:55       ` Paolo Bonzini
  1 sibling, 1 reply; 44+ messages in thread
From: Vadim Rozenfeld @ 2013-05-22  7:32 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, gleb, pl



----- Original Message -----
From: "Marcelo Tosatti" <mtosatti@redhat.com>
To: "Vadim Rozenfeld" <vrozenfe@redhat.com>
Cc: kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
Sent: Wednesday, May 22, 2013 10:46:14 AM
Subject: Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter

On Sun, May 19, 2013 at 05:06:36PM +1000, Vadim Rozenfeld wrote:
> Signed-off: Peter Lieven <pl@dlh.net>
> Signed-off: Gleb Natapov <gleb@redhat.com>
> Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>
> 
> v1 -> v2
> 1. mark TSC page dirty as suggested by 
>     Eric Northup <digitaleric@google.com> and Gleb
> 2. disable local irq when calling get_kernel_ns, 
>     as it was done by Peter Lieven <pl@dlhnet.de>
> 3. move check for TSC page enable from second patch
>     to this one.
>  
> ---
>  arch/x86/include/asm/kvm_host.h    |  2 ++
>  arch/x86/include/uapi/asm/hyperv.h | 14 ++++++++++++++
>  arch/x86/kvm/x86.c                 | 39 +++++++++++++++++++++++++++++++++++++-
>  include/uapi/linux/kvm.h           |  1 +
>  4 files changed, 55 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 3741c65..f0fee35 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -575,6 +575,8 @@ struct kvm_arch {
>  	/* fields used by HYPER-V emulation */
>  	u64 hv_guest_os_id;
>  	u64 hv_hypercall;
> +	u64 hv_ref_count;
> +	u64 hv_tsc_page;
>  
>  	#ifdef CONFIG_KVM_MMU_AUDIT
>  	int audit_point;
> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> index b80420b..890dfc3 100644
> --- a/arch/x86/include/uapi/asm/hyperv.h
> +++ b/arch/x86/include/uapi/asm/hyperv.h
> @@ -136,6 +136,9 @@
>  /* MSR used to read the per-partition time reference counter */
>  #define HV_X64_MSR_TIME_REF_COUNT		0x40000020
>  
> +/* A partition's reference time stamp counter (TSC) page */
> +#define HV_X64_MSR_REFERENCE_TSC		0x40000021
> +
>  /* Define the virtual APIC registers */
>  #define HV_X64_MSR_EOI				0x40000070
>  #define HV_X64_MSR_ICR				0x40000071
> @@ -179,6 +182,9 @@
>  #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
>  		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
>  
> +#define HV_X64_MSR_TSC_REFERENCE_ENABLE			0x00000001
> +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
> +
>  #define HV_PROCESSOR_POWER_STATE_C0		0
>  #define HV_PROCESSOR_POWER_STATE_C1		1
>  #define HV_PROCESSOR_POWER_STATE_C2		2
> @@ -191,4 +197,12 @@
>  #define HV_STATUS_INVALID_ALIGNMENT		4
>  #define HV_STATUS_INSUFFICIENT_BUFFERS		19
>  
> +typedef struct _HV_REFERENCE_TSC_PAGE {
> +	__u32 TscSequence;
> +	__u32 Rserved1;
> +	__u64 TscScale;
> +	__s64  TscOffset;
> +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
> +
> +
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 8d28810..9645dab 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -843,7 +843,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
>  static u32 msrs_to_save[] = {
>  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
>  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, not needed.
> -	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
> +	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT,
>  	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
>  	MSR_KVM_PV_EOI_EN,
>  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
> @@ -1788,6 +1788,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
>  	switch (msr) {
>  	case HV_X64_MSR_GUEST_OS_ID:
>  	case HV_X64_MSR_HYPERCALL:
> +	case HV_X64_MSR_REFERENCE_TSC:
> +	case HV_X64_MSR_TIME_REF_COUNT:
>  		r = true;
>  		break;
>  	}
> @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  		if (__copy_to_user((void __user *)addr, instructions, 4))
>  			return 1;
>  		kvm->arch.hv_hypercall = data;
> +		local_irq_disable();
> +		kvm->arch.hv_ref_count = get_kernel_ns();
> +		local_irq_enable();
> +		break;

local_irq_disable/local_irq_enable not needed.


What is the reasoning behind reading this time value at msr write time?
[VR] Windows writs this MSR only once, during HAL initialization.
So, I decided to treat this call as a partition crate event.

> +	}
> +	case HV_X64_MSR_REFERENCE_TSC: {
> +		u64 gfn;
> +		unsigned long addr;
> +		HV_REFERENCE_TSC_PAGE tsc_ref;
> +		tsc_ref.TscSequence = 0;
> +		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> +			kvm->arch.hv_tsc_page = data;
> +			break;
> +		}
> +		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> +		addr = gfn_to_hva(kvm, data >>
> +				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> +		if (kvm_is_error_hva(addr))
> +			return 1;
> +		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
> +			return 1;
> +		mark_page_dirty(kvm, gfn);
> +		kvm->arch.hv_tsc_page = data;
>  		break;
>  	}
>  	default:
> @@ -2253,6 +2278,17 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>  	case HV_X64_MSR_HYPERCALL:
>  		data = kvm->arch.hv_hypercall;
>  		break;
> +	case HV_X64_MSR_TIME_REF_COUNT: {
> +		u64 now_ns;
> +		local_irq_disable();
> +		now_ns = get_kernel_ns() + kvm->arch.kvmclock_offset;
> +		data = div_u64(now_ns - kvm->arch.hv_ref_count, 100);
> +		local_irq_enable();
> +		break;
> +	}

local_irq_disable/enable not needed.

It would be nice to have a testcase to compare reference tsc versus MSR.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
  2013-05-22  3:38         ` Gleb Natapov
@ 2013-05-22 14:31           ` Marcelo Tosatti
  0 siblings, 0 replies; 44+ messages in thread
From: Marcelo Tosatti @ 2013-05-22 14:31 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Vadim Rozenfeld, kvm, pl

On Wed, May 22, 2013 at 06:38:47AM +0300, Gleb Natapov wrote:
> On Wed, May 22, 2013 at 12:32:57AM -0300, Marcelo Tosatti wrote:
> > On Wed, May 22, 2013 at 06:28:47AM +0300, Gleb Natapov wrote
> > > > > +	case HV_X64_MSR_TIME_REF_COUNT:
> > > > >  		r = true;
> > > > >  		break;
> > > > >  	}
> > > > > @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> > > > >  		if (__copy_to_user((void __user *)addr, instructions, 4))
> > > > >  			return 1;
> > > > >  		kvm->arch.hv_hypercall = data;
> > > > > +		local_irq_disable();
> > > > > +		kvm->arch.hv_ref_count = get_kernel_ns();
> > > > > +		local_irq_enable();
> > > > > +		break;
> > > > 
> > > > local_irq_disable/local_irq_enable not needed.
> > > >
> > > get_kernel_ns has WARN_ON(preemptible()) so at least preempt_disable()
> > > is needed, but all callers of the function disable interrupts.
> > 
> > OK. Its not necessary to disable interrupts (this is a left over of
> > kvm_guest_update_time).
> So preempt_disable() then? Should we change other callers?

Yes. Should only disable interrupts if necessary (the case in
kvm_write_guest_time is necessary).



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-22  7:22     ` Vadim Rozenfeld
@ 2013-05-22 21:23       ` Marcelo Tosatti
  2013-05-23  6:18         ` Peter Lieven
                           ` (2 more replies)
  0 siblings, 3 replies; 44+ messages in thread
From: Marcelo Tosatti @ 2013-05-22 21:23 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: kvm, gleb, pl

On Wed, May 22, 2013 at 03:22:55AM -0400, Vadim Rozenfeld wrote:
> 
> 
> ----- Original Message -----
> From: "Marcelo Tosatti" <mtosatti@redhat.com>
> To: "Vadim Rozenfeld" <vrozenfe@redhat.com>
> Cc: kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
> Sent: Wednesday, May 22, 2013 10:50:46 AM
> Subject: Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
> 
> On Sun, May 19, 2013 at 05:06:37PM +1000, Vadim Rozenfeld wrote:
> > The following patch allows to activate a partition reference 
> > time enlightenment that is based on the host platform's support 
> > for an Invariant Time Stamp Counter (iTSC).
> > NOTE: This code will survive migration due to lack of VM stop/resume
> > handlers, when offset, scale and sequence should be
> > readjusted. 
> > 
> > ---
> >  arch/x86/kvm/x86.c | 6 +++++-
> >  1 file changed, 5 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 9645dab..b423fe4 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -1838,7 +1838,6 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >  		u64 gfn;
> >  		unsigned long addr;
> >  		HV_REFERENCE_TSC_PAGE tsc_ref;
> > -		tsc_ref.TscSequence = 0;
> >  		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> >  			kvm->arch.hv_tsc_page = data;
> >  			break;
> > @@ -1848,6 +1847,11 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >  				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> >  		if (kvm_is_error_hva(addr))
> >  			return 1;
> > +		tsc_ref.TscSequence =
> > +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
> 
> 1) You want NONSTOP_TSC (see 40fb1715 commit) which matches INVARIANT TSC.
> [VR]
> Thank you for reviewing. Will fix it.
> 2) TscSequence should increase? 
> "This field serves as a sequence number that is incremented whenever..."
> [VR]
> Yes, on every VM resume, including migration. After migration we also need
> to recalculate scale and adjust offset. 
> 3) 0xFFFFFFFF is the value for invalid source of reference time?
> [VR] Yes, on boot-up. In this case guest will go with PMTimer (not sure about HPET
> but I can check). But if we set sequence to 0xFFFFFFFF after migration - it's probably will not work.

"Reference TSC during Save and Restore and Migration

To address migration scenarios to physical platforms that do not support
iTSC, the TscSequence field is used. In the event that a guest partition
is  migrated from an iTSC capable host to a non-iTSC capable host, the
hypervisor sets TscSequence to the special value of 0xFFFFFFFF, which
directs the guest operating system to fall back to a different clock
source (for example, the virtual PM timer)."

Why it would not/does not work after migration?



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
  2013-05-22  7:32     ` Vadim Rozenfeld
@ 2013-05-22 21:55       ` Paolo Bonzini
  2013-05-23  6:17         ` Peter Lieven
  0 siblings, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2013-05-22 21:55 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: Marcelo Tosatti, kvm, gleb, pl

Il 22/05/2013 09:32, Vadim Rozenfeld ha scritto:
>> > @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>> >  		if (__copy_to_user((void __user *)addr, instructions, 4))
>> >  			return 1;
>> >  		kvm->arch.hv_hypercall = data;
>> > +		local_irq_disable();
>> > +		kvm->arch.hv_ref_count = get_kernel_ns();
>> > +		local_irq_enable();
>> > +		break;
> local_irq_disable/local_irq_enable not needed.
> 
> 
> What is the reasoning behind reading this time value at msr write time?
> [VR] Windows writs this MSR only once, during HAL initialization.
> So, I decided to treat this call as a partition crate event.
> 

But is it expected by Windows that the reference count starts counting
up from 0 at partition creation time?  If you could just use
(get_kernel_ns() + kvm->arch.kvmclock_offset) / 100, it would also be
simpler for migration purposes.

Paolo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
  2013-05-22 21:55       ` Paolo Bonzini
@ 2013-05-23  6:17         ` Peter Lieven
  2013-05-23  9:54           ` Paolo Bonzini
  2013-05-23 12:25           ` Vadim Rozenfeld
  0 siblings, 2 replies; 44+ messages in thread
From: Peter Lieven @ 2013-05-23  6:17 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Vadim Rozenfeld, Marcelo Tosatti, kvm, gleb, pl

On 22.05.2013 23:55, Paolo Bonzini wrote:
> Il 22/05/2013 09:32, Vadim Rozenfeld ha scritto:
>>>> @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>   		if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>   			return 1;
>>>>   		kvm->arch.hv_hypercall = data;
>>>> +		local_irq_disable();
>>>> +		kvm->arch.hv_ref_count = get_kernel_ns();
>>>> +		local_irq_enable();
>>>> +		break;
>> local_irq_disable/local_irq_enable not needed.
>>
>>
>> What is the reasoning behind reading this time value at msr write time?
>> [VR] Windows writs this MSR only once, during HAL initialization.
>> So, I decided to treat this call as a partition crate event.
>>
>
> But is it expected by Windows that the reference count starts counting
> up from 0 at partition creation time?  If you could just use
> (get_kernel_ns() + kvm->arch.kvmclock_offset) / 100, it would also be
> simpler for migration purposes.

I can just report, that I have used the patch that does it that way and it works.
Maybe Windows is calculating the uptime by the reference counter?

Peter


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-22 21:23       ` Marcelo Tosatti
@ 2013-05-23  6:18         ` Peter Lieven
  2013-05-23  9:13           ` Gleb Natapov
  2013-05-23 12:33           ` Vadim Rozenfeld
  2013-05-23  9:12         ` Gleb Natapov
  2013-05-23 12:21         ` Vadim Rozenfeld
  2 siblings, 2 replies; 44+ messages in thread
From: Peter Lieven @ 2013-05-23  6:18 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Vadim Rozenfeld, kvm, gleb, pl

On 22.05.2013 23:23, Marcelo Tosatti wrote:
> On Wed, May 22, 2013 at 03:22:55AM -0400, Vadim Rozenfeld wrote:
>>
>>
>> ----- Original Message -----
>> From: "Marcelo Tosatti" <mtosatti@redhat.com>
>> To: "Vadim Rozenfeld" <vrozenfe@redhat.com>
>> Cc: kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
>> Sent: Wednesday, May 22, 2013 10:50:46 AM
>> Subject: Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
>>
>> On Sun, May 19, 2013 at 05:06:37PM +1000, Vadim Rozenfeld wrote:
>>> The following patch allows to activate a partition reference
>>> time enlightenment that is based on the host platform's support
>>> for an Invariant Time Stamp Counter (iTSC).
>>> NOTE: This code will survive migration due to lack of VM stop/resume
>>> handlers, when offset, scale and sequence should be
>>> readjusted.
>>>
>>> ---
>>>   arch/x86/kvm/x86.c | 6 +++++-
>>>   1 file changed, 5 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> index 9645dab..b423fe4 100644
>>> --- a/arch/x86/kvm/x86.c
>>> +++ b/arch/x86/kvm/x86.c
>>> @@ -1838,7 +1838,6 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>   		u64 gfn;
>>>   		unsigned long addr;
>>>   		HV_REFERENCE_TSC_PAGE tsc_ref;
>>> -		tsc_ref.TscSequence = 0;
>>>   		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
>>>   			kvm->arch.hv_tsc_page = data;
>>>   			break;
>>> @@ -1848,6 +1847,11 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>   				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
>>>   		if (kvm_is_error_hva(addr))
>>>   			return 1;
>>> +		tsc_ref.TscSequence =
>>> +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
>>
>> 1) You want NONSTOP_TSC (see 40fb1715 commit) which matches INVARIANT TSC.
>> [VR]
>> Thank you for reviewing. Will fix it.
>> 2) TscSequence should increase?
>> "This field serves as a sequence number that is incremented whenever..."
>> [VR]
>> Yes, on every VM resume, including migration. After migration we also need
>> to recalculate scale and adjust offset.
>> 3) 0xFFFFFFFF is the value for invalid source of reference time?
>> [VR] Yes, on boot-up. In this case guest will go with PMTimer (not sure about HPET
>> but I can check). But if we set sequence to 0xFFFFFFFF after migration - it's probably will not work.
>
> "Reference TSC during Save and Restore and Migration
>
> To address migration scenarios to physical platforms that do not support
> iTSC, the TscSequence field is used. In the event that a guest partition
> is  migrated from an iTSC capable host to a non-iTSC capable host, the
> hypervisor sets TscSequence to the special value of 0xFFFFFFFF, which
> directs the guest operating system to fall back to a different clock
> source (for example, the virtual PM timer)."
>
> Why it would not/does not work after migration?
>
>

what exactly do we heed the reference TSC for? the reference counter alone works great and it seems
that there is a lot of trouble and crash possibilities involved with the referece tsc.

Peter


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-22 21:23       ` Marcelo Tosatti
  2013-05-23  6:18         ` Peter Lieven
@ 2013-05-23  9:12         ` Gleb Natapov
  2013-05-23 13:53           ` Marcelo Tosatti
  2013-05-23 12:21         ` Vadim Rozenfeld
  2 siblings, 1 reply; 44+ messages in thread
From: Gleb Natapov @ 2013-05-23  9:12 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Vadim Rozenfeld, kvm, pl

On Wed, May 22, 2013 at 06:23:30PM -0300, Marcelo Tosatti wrote:
> On Wed, May 22, 2013 at 03:22:55AM -0400, Vadim Rozenfeld wrote:
> > 
> > 
> > ----- Original Message -----
> > From: "Marcelo Tosatti" <mtosatti@redhat.com>
> > To: "Vadim Rozenfeld" <vrozenfe@redhat.com>
> > Cc: kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
> > Sent: Wednesday, May 22, 2013 10:50:46 AM
> > Subject: Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
> > 
> > On Sun, May 19, 2013 at 05:06:37PM +1000, Vadim Rozenfeld wrote:
> > > The following patch allows to activate a partition reference 
> > > time enlightenment that is based on the host platform's support 
> > > for an Invariant Time Stamp Counter (iTSC).
> > > NOTE: This code will survive migration due to lack of VM stop/resume
> > > handlers, when offset, scale and sequence should be
> > > readjusted. 
> > > 
> > > ---
> > >  arch/x86/kvm/x86.c | 6 +++++-
> > >  1 file changed, 5 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > index 9645dab..b423fe4 100644
> > > --- a/arch/x86/kvm/x86.c
> > > +++ b/arch/x86/kvm/x86.c
> > > @@ -1838,7 +1838,6 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> > >  		u64 gfn;
> > >  		unsigned long addr;
> > >  		HV_REFERENCE_TSC_PAGE tsc_ref;
> > > -		tsc_ref.TscSequence = 0;
> > >  		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> > >  			kvm->arch.hv_tsc_page = data;
> > >  			break;
> > > @@ -1848,6 +1847,11 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> > >  				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> > >  		if (kvm_is_error_hva(addr))
> > >  			return 1;
> > > +		tsc_ref.TscSequence =
> > > +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
> > 
> > 1) You want NONSTOP_TSC (see 40fb1715 commit) which matches INVARIANT TSC.
> > [VR]
> > Thank you for reviewing. Will fix it.
> > 2) TscSequence should increase? 
> > "This field serves as a sequence number that is incremented whenever..."
> > [VR]
> > Yes, on every VM resume, including migration. After migration we also need
> > to recalculate scale and adjust offset. 
> > 3) 0xFFFFFFFF is the value for invalid source of reference time?
> > [VR] Yes, on boot-up. In this case guest will go with PMTimer (not sure about HPET
> > but I can check). But if we set sequence to 0xFFFFFFFF after migration - it's probably will not work.
> 
> "Reference TSC during Save and Restore and Migration
> 
> To address migration scenarios to physical platforms that do not support
> iTSC, the TscSequence field is used. In the event that a guest partition
> is  migrated from an iTSC capable host to a non-iTSC capable host, the
> hypervisor sets TscSequence to the special value of 0xFFFFFFFF, which
> directs the guest operating system to fall back to a different clock
> source (for example, the virtual PM timer)."
> 
> Why it would not/does not work after migration?
> 
Please read the whole discussion, we talked about it already. We
definitely do not want to fall back to PM timer either, we want to use
reference counter instead.

--
			Gleb.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-23  6:18         ` Peter Lieven
@ 2013-05-23  9:13           ` Gleb Natapov
  2013-05-23 13:35             ` Marcelo Tosatti
  2013-05-23 12:33           ` Vadim Rozenfeld
  1 sibling, 1 reply; 44+ messages in thread
From: Gleb Natapov @ 2013-05-23  9:13 UTC (permalink / raw)
  To: Peter Lieven; +Cc: Marcelo Tosatti, Vadim Rozenfeld, kvm, pl

On Thu, May 23, 2013 at 08:18:55AM +0200, Peter Lieven wrote:
> On 22.05.2013 23:23, Marcelo Tosatti wrote:
> >On Wed, May 22, 2013 at 03:22:55AM -0400, Vadim Rozenfeld wrote:
> >>
> >>
> >>----- Original Message -----
> >>From: "Marcelo Tosatti" <mtosatti@redhat.com>
> >>To: "Vadim Rozenfeld" <vrozenfe@redhat.com>
> >>Cc: kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
> >>Sent: Wednesday, May 22, 2013 10:50:46 AM
> >>Subject: Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
> >>
> >>On Sun, May 19, 2013 at 05:06:37PM +1000, Vadim Rozenfeld wrote:
> >>>The following patch allows to activate a partition reference
> >>>time enlightenment that is based on the host platform's support
> >>>for an Invariant Time Stamp Counter (iTSC).
> >>>NOTE: This code will survive migration due to lack of VM stop/resume
> >>>handlers, when offset, scale and sequence should be
> >>>readjusted.
> >>>
> >>>---
> >>>  arch/x86/kvm/x86.c | 6 +++++-
> >>>  1 file changed, 5 insertions(+), 1 deletion(-)
> >>>
> >>>diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>>index 9645dab..b423fe4 100644
> >>>--- a/arch/x86/kvm/x86.c
> >>>+++ b/arch/x86/kvm/x86.c
> >>>@@ -1838,7 +1838,6 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>>  		u64 gfn;
> >>>  		unsigned long addr;
> >>>  		HV_REFERENCE_TSC_PAGE tsc_ref;
> >>>-		tsc_ref.TscSequence = 0;
> >>>  		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> >>>  			kvm->arch.hv_tsc_page = data;
> >>>  			break;
> >>>@@ -1848,6 +1847,11 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>>  				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> >>>  		if (kvm_is_error_hva(addr))
> >>>  			return 1;
> >>>+		tsc_ref.TscSequence =
> >>>+				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
> >>
> >>1) You want NONSTOP_TSC (see 40fb1715 commit) which matches INVARIANT TSC.
> >>[VR]
> >>Thank you for reviewing. Will fix it.
> >>2) TscSequence should increase?
> >>"This field serves as a sequence number that is incremented whenever..."
> >>[VR]
> >>Yes, on every VM resume, including migration. After migration we also need
> >>to recalculate scale and adjust offset.
> >>3) 0xFFFFFFFF is the value for invalid source of reference time?
> >>[VR] Yes, on boot-up. In this case guest will go with PMTimer (not sure about HPET
> >>but I can check). But if we set sequence to 0xFFFFFFFF after migration - it's probably will not work.
> >
> >"Reference TSC during Save and Restore and Migration
> >
> >To address migration scenarios to physical platforms that do not support
> >iTSC, the TscSequence field is used. In the event that a guest partition
> >is  migrated from an iTSC capable host to a non-iTSC capable host, the
> >hypervisor sets TscSequence to the special value of 0xFFFFFFFF, which
> >directs the guest operating system to fall back to a different clock
> >source (for example, the virtual PM timer)."
> >
> >Why it would not/does not work after migration?
> >
> >
> 
> what exactly do we heed the reference TSC for? the reference counter alone works great and it seems
> that there is a lot of trouble and crash possibilities involved with the referece tsc.
> 
Reference TSC is even faster. There should be no crashed with proper
implementation.

--
			Gleb.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
  2013-05-23  6:17         ` Peter Lieven
@ 2013-05-23  9:54           ` Paolo Bonzini
  2013-05-23 10:45             ` Peter Lieven
  2013-05-23 12:25           ` Vadim Rozenfeld
  1 sibling, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2013-05-23  9:54 UTC (permalink / raw)
  To: Peter Lieven; +Cc: Vadim Rozenfeld, Marcelo Tosatti, kvm, gleb, pl

Il 23/05/2013 08:17, Peter Lieven ha scritto:
> On 22.05.2013 23:55, Paolo Bonzini wrote:
>> Il 22/05/2013 09:32, Vadim Rozenfeld ha scritto:
>>>>> @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu
>>>>> *vcpu, u32 msr, u64 data)
>>>>>           if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>>               return 1;
>>>>>           kvm->arch.hv_hypercall = data;
>>>>> +        local_irq_disable();
>>>>> +        kvm->arch.hv_ref_count = get_kernel_ns();
>>>>> +        local_irq_enable();
>>>>> +        break;
>>> local_irq_disable/local_irq_enable not needed.
>>>
>>>
>>> What is the reasoning behind reading this time value at msr write time?
>>> [VR] Windows writs this MSR only once, during HAL initialization.
>>> So, I decided to treat this call as a partition crate event.
>>>
>>
>> But is it expected by Windows that the reference count starts counting
>> up from 0 at partition creation time?  If you could just use
>> (get_kernel_ns() + kvm->arch.kvmclock_offset) / 100, it would also be
>> simpler for migration purposes.
> 
> I can just report, that I have used the patch that does it that way and
> it works.

What do you mean by "that way"? :)

Paolo

> Maybe Windows is calculating the uptime by the reference counter?
> 
> Peter
> 


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
  2013-05-23  9:54           ` Paolo Bonzini
@ 2013-05-23 10:45             ` Peter Lieven
  0 siblings, 0 replies; 44+ messages in thread
From: Peter Lieven @ 2013-05-23 10:45 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Vadim Rozenfeld, Marcelo Tosatti, kvm, gleb, pl

On 23.05.2013 11:54, Paolo Bonzini wrote:
> Il 23/05/2013 08:17, Peter Lieven ha scritto:
>> On 22.05.2013 23:55, Paolo Bonzini wrote:
>>> Il 22/05/2013 09:32, Vadim Rozenfeld ha scritto:
>>>>>> @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu
>>>>>> *vcpu, u32 msr, u64 data)
>>>>>>            if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>>>                return 1;
>>>>>>            kvm->arch.hv_hypercall = data;
>>>>>> +        local_irq_disable();
>>>>>> +        kvm->arch.hv_ref_count = get_kernel_ns();
>>>>>> +        local_irq_enable();
>>>>>> +        break;
>>>> local_irq_disable/local_irq_enable not needed.
>>>>
>>>>
>>>> What is the reasoning behind reading this time value at msr write time?
>>>> [VR] Windows writs this MSR only once, during HAL initialization.
>>>> So, I decided to treat this call as a partition crate event.
>>>>
>>>
>>> But is it expected by Windows that the reference count starts counting
>>> up from 0 at partition creation time?  If you could just use
>>> (get_kernel_ns() + kvm->arch.kvmclock_offset) / 100, it would also be
>>> simpler for migration purposes.
>>
>> I can just report, that I have used the patch that does it that way and
>> it works.
>
> What do you mean by "that way"? :)

Ups sorry… I meant the way it was implemented in the old patch (I sent a few days ago).

@@ -1426,6 +1428,21 @@ static int set_msr_hyperv_pw(struct kvm_
		if (__copy_to_user((void   *)addr, instructions, 4))
			return 1;
		kvm->arch.hv_hypercall = data;
+		kvm->arch.hv_ref_count = get_kernel_ns();
+ 		break;
+	}
+	case HV_X64_MSR_REFERENCE_TSC: {
+		u64 gfn;
+		unsigned long addr;
+		u32 hv_tsc_sequence;
+		gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
+		addr = gfn_to_hva(kvm, gfn);
+		if (kvm_is_error_hva(addr))
+			return 1;
+		hv_tsc_sequence = 0x0; //invalid
+		if (__copy_to_user((void   *)addr, (void __user *) &hv_tsc_sequence, sizeof(hv_tsc_sequence)))
+			return 1;		
+		kvm->arch.hv_reference_tsc = data;
		break;
	}
	default:
@@ -1826,6 +1843,17 @@ static int get_msr_hyperv_pw(struct kvm_
	case HV_X64_MSR_HYPERCALL:
		data = kvm->arch.hv_hypercall;
		break;
+	case HV_X64_MSR_TIME_REF_COUNT: {
+		u64 now_ns;
+		local_irq_disable();
+		now_ns = get_kernel_ns();
+		data = div_u64(now_ns + kvm->arch.kvmclock_offset - kvm->arch.hv_ref_count,100);
+		local_irq_enable();
+		break;
+	}
+	case HV_X64_MSR_REFERENCE_TSC:
+		data = kvm->arch.hv_reference_tsc;
+		break;
	default:
		pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
		return 1;


Peter

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-22 21:23       ` Marcelo Tosatti
  2013-05-23  6:18         ` Peter Lieven
  2013-05-23  9:12         ` Gleb Natapov
@ 2013-05-23 12:21         ` Vadim Rozenfeld
  2013-05-23 13:47           ` Marcelo Tosatti
  2 siblings, 1 reply; 44+ messages in thread
From: Vadim Rozenfeld @ 2013-05-23 12:21 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, gleb, pl



----- Original Message -----
From: "Marcelo Tosatti" <mtosatti@redhat.com>
To: "Vadim Rozenfeld" <vrozenfe@redhat.com>
Cc: kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
Sent: Thursday, May 23, 2013 7:23:30 AM
Subject: Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC

On Wed, May 22, 2013 at 03:22:55AM -0400, Vadim Rozenfeld wrote:
> 
> 
> ----- Original Message -----
> From: "Marcelo Tosatti" <mtosatti@redhat.com>
> To: "Vadim Rozenfeld" <vrozenfe@redhat.com>
> Cc: kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
> Sent: Wednesday, May 22, 2013 10:50:46 AM
> Subject: Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
> 
> On Sun, May 19, 2013 at 05:06:37PM +1000, Vadim Rozenfeld wrote:
> > The following patch allows to activate a partition reference 
> > time enlightenment that is based on the host platform's support 
> > for an Invariant Time Stamp Counter (iTSC).
> > NOTE: This code will survive migration due to lack of VM stop/resume
> > handlers, when offset, scale and sequence should be
> > readjusted. 
> > 
> > ---
> >  arch/x86/kvm/x86.c | 6 +++++-
> >  1 file changed, 5 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 9645dab..b423fe4 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -1838,7 +1838,6 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >  		u64 gfn;
> >  		unsigned long addr;
> >  		HV_REFERENCE_TSC_PAGE tsc_ref;
> > -		tsc_ref.TscSequence = 0;
> >  		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> >  			kvm->arch.hv_tsc_page = data;
> >  			break;
> > @@ -1848,6 +1847,11 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >  				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> >  		if (kvm_is_error_hva(addr))
> >  			return 1;
> > +		tsc_ref.TscSequence =
> > +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
> 
> 1) You want NONSTOP_TSC (see 40fb1715 commit) which matches INVARIANT TSC.
> [VR]
> Thank you for reviewing. Will fix it.
> 2) TscSequence should increase? 
> "This field serves as a sequence number that is incremented whenever..."
> [VR]
> Yes, on every VM resume, including migration. After migration we also need
> to recalculate scale and adjust offset. 
> 3) 0xFFFFFFFF is the value for invalid source of reference time?
> [VR] Yes, on boot-up. In this case guest will go with PMTimer (not sure about HPET
> but I can check). But if we set sequence to 0xFFFFFFFF after migration - it's probably will not work.

"Reference TSC during Save and Restore and Migration

To address migration scenarios to physical platforms that do not support
iTSC, the TscSequence field is used. In the event that a guest partition
is  migrated from an iTSC capable host to a non-iTSC capable host, the
hypervisor sets TscSequence to the special value of 0xFFFFFFFF, which
directs the guest operating system to fall back to a different clock
source (for example, the virtual PM timer)."

Why it would not/does not work after migration?

[VR]
Because of different frequencies, I think. 
Hyper-V reference counters and iTSC report
performance frequency equal to 10MHz,
which is obviously is not true for PM and HPET timers.
Windows calibrates timers on boot-up and you probably 
have no chance to do it after or during resume.  



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
  2013-05-23  6:17         ` Peter Lieven
  2013-05-23  9:54           ` Paolo Bonzini
@ 2013-05-23 12:25           ` Vadim Rozenfeld
  2013-05-23 13:18             ` Paolo Bonzini
  1 sibling, 1 reply; 44+ messages in thread
From: Vadim Rozenfeld @ 2013-05-23 12:25 UTC (permalink / raw)
  To: Peter Lieven; +Cc: Paolo Bonzini, Marcelo Tosatti, kvm, gleb, pl



----- Original Message -----
From: "Peter Lieven" <pl@dlhnet.de>
To: "Paolo Bonzini" <pbonzini@redhat.com>
Cc: "Vadim Rozenfeld" <vrozenfe@redhat.com>, "Marcelo Tosatti" <mtosatti@redhat.com>, kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
Sent: Thursday, May 23, 2013 4:17:57 PM
Subject: Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter

On 22.05.2013 23:55, Paolo Bonzini wrote:
> Il 22/05/2013 09:32, Vadim Rozenfeld ha scritto:
>>>> @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>   		if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>   			return 1;
>>>>   		kvm->arch.hv_hypercall = data;
>>>> +		local_irq_disable();
>>>> +		kvm->arch.hv_ref_count = get_kernel_ns();
>>>> +		local_irq_enable();
>>>> +		break;
>> local_irq_disable/local_irq_enable not needed.
>>
>>
>> What is the reasoning behind reading this time value at msr write time?
>> [VR] Windows writs this MSR only once, during HAL initialization.
>> So, I decided to treat this call as a partition crate event.
>>
>
> But is it expected by Windows that the reference count starts counting
> up from 0 at partition creation time?  If you could just use
> (get_kernel_ns() + kvm->arch.kvmclock_offset) / 100, it would also be
> simpler for migration purposes.

I can just report, that I have used the patch that does it that way and it works.
Maybe Windows is calculating the uptime by the reference counter?

[VR] 
Windows use it (reference counters/iTSC/PMTimer/HPET) as a time-stamp source
for (Ke)QueryPerformanceCounter function. 

Peter


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-23  6:18         ` Peter Lieven
  2013-05-23  9:13           ` Gleb Natapov
@ 2013-05-23 12:33           ` Vadim Rozenfeld
  2013-05-23 12:44             ` Peter Lieven
  1 sibling, 1 reply; 44+ messages in thread
From: Vadim Rozenfeld @ 2013-05-23 12:33 UTC (permalink / raw)
  To: Peter Lieven; +Cc: Marcelo Tosatti, kvm, gleb, pl



----- Original Message -----
From: "Peter Lieven" <pl@dlhnet.de>
To: "Marcelo Tosatti" <mtosatti@redhat.com>
Cc: "Vadim Rozenfeld" <vrozenfe@redhat.com>, kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
Sent: Thursday, May 23, 2013 4:18:55 PM
Subject: Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC

On 22.05.2013 23:23, Marcelo Tosatti wrote:
> On Wed, May 22, 2013 at 03:22:55AM -0400, Vadim Rozenfeld wrote:
>>
>>
>> ----- Original Message -----
>> From: "Marcelo Tosatti" <mtosatti@redhat.com>
>> To: "Vadim Rozenfeld" <vrozenfe@redhat.com>
>> Cc: kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
>> Sent: Wednesday, May 22, 2013 10:50:46 AM
>> Subject: Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
>>
>> On Sun, May 19, 2013 at 05:06:37PM +1000, Vadim Rozenfeld wrote:
>>> The following patch allows to activate a partition reference
>>> time enlightenment that is based on the host platform's support
>>> for an Invariant Time Stamp Counter (iTSC).
>>> NOTE: This code will survive migration due to lack of VM stop/resume
>>> handlers, when offset, scale and sequence should be
>>> readjusted.
>>>
>>> ---
>>>   arch/x86/kvm/x86.c | 6 +++++-
>>>   1 file changed, 5 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> index 9645dab..b423fe4 100644
>>> --- a/arch/x86/kvm/x86.c
>>> +++ b/arch/x86/kvm/x86.c
>>> @@ -1838,7 +1838,6 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>   		u64 gfn;
>>>   		unsigned long addr;
>>>   		HV_REFERENCE_TSC_PAGE tsc_ref;
>>> -		tsc_ref.TscSequence = 0;
>>>   		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
>>>   			kvm->arch.hv_tsc_page = data;
>>>   			break;
>>> @@ -1848,6 +1847,11 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>   				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
>>>   		if (kvm_is_error_hva(addr))
>>>   			return 1;
>>> +		tsc_ref.TscSequence =
>>> +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
>>
>> 1) You want NONSTOP_TSC (see 40fb1715 commit) which matches INVARIANT TSC.
>> [VR]
>> Thank you for reviewing. Will fix it.
>> 2) TscSequence should increase?
>> "This field serves as a sequence number that is incremented whenever..."
>> [VR]
>> Yes, on every VM resume, including migration. After migration we also need
>> to recalculate scale and adjust offset.
>> 3) 0xFFFFFFFF is the value for invalid source of reference time?
>> [VR] Yes, on boot-up. In this case guest will go with PMTimer (not sure about HPET
>> but I can check). But if we set sequence to 0xFFFFFFFF after migration - it's probably will not work.
>
> "Reference TSC during Save and Restore and Migration
>
> To address migration scenarios to physical platforms that do not support
> iTSC, the TscSequence field is used. In the event that a guest partition
> is  migrated from an iTSC capable host to a non-iTSC capable host, the
> hypervisor sets TscSequence to the special value of 0xFFFFFFFF, which
> directs the guest operating system to fall back to a different clock
> source (for example, the virtual PM timer)."
>
> Why it would not/does not work after migration?
>
>

what exactly do we heed the reference TSC for? the reference counter alone works great and it seems
that there is a lot of trouble and crash possibilities involved with the referece tsc.

[VR]
Because it is incredibly light and fast.
The simple test which calls QueryPerformanceCounter in a 
loop 10 millions times gives we the following results:
PMTimer     32269 ms
HPET        38466 ms
Ref Count   6499 ms
iTSC        1169 ms

Vadim.

Peter


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-23 12:33           ` Vadim Rozenfeld
@ 2013-05-23 12:44             ` Peter Lieven
  2013-05-23 12:45               ` Gleb Natapov
  2013-05-23 12:54               ` Vadim Rozenfeld
  0 siblings, 2 replies; 44+ messages in thread
From: Peter Lieven @ 2013-05-23 12:44 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: Marcelo Tosatti, kvm, gleb, pl

On 23.05.2013 14:33, Vadim Rozenfeld wrote:
>
>
> ----- Original Message -----
> From: "Peter Lieven" <pl@dlhnet.de>
> To: "Marcelo Tosatti" <mtosatti@redhat.com>
> Cc: "Vadim Rozenfeld" <vrozenfe@redhat.com>, kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
> Sent: Thursday, May 23, 2013 4:18:55 PM
> Subject: Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
>
> On 22.05.2013 23:23, Marcelo Tosatti wrote:
>> On Wed, May 22, 2013 at 03:22:55AM -0400, Vadim Rozenfeld wrote:
>>>
>>>
>>> ----- Original Message -----
>>> From: "Marcelo Tosatti" <mtosatti@redhat.com>
>>> To: "Vadim Rozenfeld" <vrozenfe@redhat.com>
>>> Cc: kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
>>> Sent: Wednesday, May 22, 2013 10:50:46 AM
>>> Subject: Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
>>>
>>> On Sun, May 19, 2013 at 05:06:37PM +1000, Vadim Rozenfeld wrote:
>>>> The following patch allows to activate a partition reference
>>>> time enlightenment that is based on the host platform's support
>>>> for an Invariant Time Stamp Counter (iTSC).
>>>> NOTE: This code will survive migration due to lack of VM stop/resume
>>>> handlers, when offset, scale and sequence should be
>>>> readjusted.
>>>>
>>>> ---
>>>>    arch/x86/kvm/x86.c | 6 +++++-
>>>>    1 file changed, 5 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>> index 9645dab..b423fe4 100644
>>>> --- a/arch/x86/kvm/x86.c
>>>> +++ b/arch/x86/kvm/x86.c
>>>> @@ -1838,7 +1838,6 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>    		u64 gfn;
>>>>    		unsigned long addr;
>>>>    		HV_REFERENCE_TSC_PAGE tsc_ref;
>>>> -		tsc_ref.TscSequence = 0;
>>>>    		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
>>>>    			kvm->arch.hv_tsc_page = data;
>>>>    			break;
>>>> @@ -1848,6 +1847,11 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>    				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
>>>>    		if (kvm_is_error_hva(addr))
>>>>    			return 1;
>>>> +		tsc_ref.TscSequence =
>>>> +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
>>>
>>> 1) You want NONSTOP_TSC (see 40fb1715 commit) which matches INVARIANT TSC.
>>> [VR]
>>> Thank you for reviewing. Will fix it.
>>> 2) TscSequence should increase?
>>> "This field serves as a sequence number that is incremented whenever..."
>>> [VR]
>>> Yes, on every VM resume, including migration. After migration we also need
>>> to recalculate scale and adjust offset.
>>> 3) 0xFFFFFFFF is the value for invalid source of reference time?
>>> [VR] Yes, on boot-up. In this case guest will go with PMTimer (not sure about HPET
>>> but I can check). But if we set sequence to 0xFFFFFFFF after migration - it's probably will not work.
>>
>> "Reference TSC during Save and Restore and Migration
>>
>> To address migration scenarios to physical platforms that do not support
>> iTSC, the TscSequence field is used. In the event that a guest partition
>> is  migrated from an iTSC capable host to a non-iTSC capable host, the
>> hypervisor sets TscSequence to the special value of 0xFFFFFFFF, which
>> directs the guest operating system to fall back to a different clock
>> source (for example, the virtual PM timer)."
>>
>> Why it would not/does not work after migration?
>>
>>
>
> what exactly do we heed the reference TSC for? the reference counter alone works great and it seems
> that there is a lot of trouble and crash possibilities involved with the referece tsc.
>
> [VR]
> Because it is incredibly light and fast.
> The simple test which calls QueryPerformanceCounter in a
> loop 10 millions times gives we the following results:
> PMTimer     32269 ms
> HPET        38466 ms
> Ref Count   6499 ms
> iTSC        1169 ms

is the ref_count with local_irq_disable or preempt_disable?

Peter


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-23 12:44             ` Peter Lieven
@ 2013-05-23 12:45               ` Gleb Natapov
  2013-05-23 12:54               ` Vadim Rozenfeld
  1 sibling, 0 replies; 44+ messages in thread
From: Gleb Natapov @ 2013-05-23 12:45 UTC (permalink / raw)
  To: Peter Lieven; +Cc: Vadim Rozenfeld, Marcelo Tosatti, kvm, pl

On Thu, May 23, 2013 at 02:44:14PM +0200, Peter Lieven wrote:
> On 23.05.2013 14:33, Vadim Rozenfeld wrote:
> >
> >
> >----- Original Message -----
> >From: "Peter Lieven" <pl@dlhnet.de>
> >To: "Marcelo Tosatti" <mtosatti@redhat.com>
> >Cc: "Vadim Rozenfeld" <vrozenfe@redhat.com>, kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
> >Sent: Thursday, May 23, 2013 4:18:55 PM
> >Subject: Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
> >
> >On 22.05.2013 23:23, Marcelo Tosatti wrote:
> >>On Wed, May 22, 2013 at 03:22:55AM -0400, Vadim Rozenfeld wrote:
> >>>
> >>>
> >>>----- Original Message -----
> >>>From: "Marcelo Tosatti" <mtosatti@redhat.com>
> >>>To: "Vadim Rozenfeld" <vrozenfe@redhat.com>
> >>>Cc: kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
> >>>Sent: Wednesday, May 22, 2013 10:50:46 AM
> >>>Subject: Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
> >>>
> >>>On Sun, May 19, 2013 at 05:06:37PM +1000, Vadim Rozenfeld wrote:
> >>>>The following patch allows to activate a partition reference
> >>>>time enlightenment that is based on the host platform's support
> >>>>for an Invariant Time Stamp Counter (iTSC).
> >>>>NOTE: This code will survive migration due to lack of VM stop/resume
> >>>>handlers, when offset, scale and sequence should be
> >>>>readjusted.
> >>>>
> >>>>---
> >>>>   arch/x86/kvm/x86.c | 6 +++++-
> >>>>   1 file changed, 5 insertions(+), 1 deletion(-)
> >>>>
> >>>>diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >>>>index 9645dab..b423fe4 100644
> >>>>--- a/arch/x86/kvm/x86.c
> >>>>+++ b/arch/x86/kvm/x86.c
> >>>>@@ -1838,7 +1838,6 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>>>   		u64 gfn;
> >>>>   		unsigned long addr;
> >>>>   		HV_REFERENCE_TSC_PAGE tsc_ref;
> >>>>-		tsc_ref.TscSequence = 0;
> >>>>   		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
> >>>>   			kvm->arch.hv_tsc_page = data;
> >>>>   			break;
> >>>>@@ -1848,6 +1847,11 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> >>>>   				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> >>>>   		if (kvm_is_error_hva(addr))
> >>>>   			return 1;
> >>>>+		tsc_ref.TscSequence =
> >>>>+				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
> >>>
> >>>1) You want NONSTOP_TSC (see 40fb1715 commit) which matches INVARIANT TSC.
> >>>[VR]
> >>>Thank you for reviewing. Will fix it.
> >>>2) TscSequence should increase?
> >>>"This field serves as a sequence number that is incremented whenever..."
> >>>[VR]
> >>>Yes, on every VM resume, including migration. After migration we also need
> >>>to recalculate scale and adjust offset.
> >>>3) 0xFFFFFFFF is the value for invalid source of reference time?
> >>>[VR] Yes, on boot-up. In this case guest will go with PMTimer (not sure about HPET
> >>>but I can check). But if we set sequence to 0xFFFFFFFF after migration - it's probably will not work.
> >>
> >>"Reference TSC during Save and Restore and Migration
> >>
> >>To address migration scenarios to physical platforms that do not support
> >>iTSC, the TscSequence field is used. In the event that a guest partition
> >>is  migrated from an iTSC capable host to a non-iTSC capable host, the
> >>hypervisor sets TscSequence to the special value of 0xFFFFFFFF, which
> >>directs the guest operating system to fall back to a different clock
> >>source (for example, the virtual PM timer)."
> >>
> >>Why it would not/does not work after migration?
> >>
> >>
> >
> >what exactly do we heed the reference TSC for? the reference counter alone works great and it seems
> >that there is a lot of trouble and crash possibilities involved with the referece tsc.
> >
> >[VR]
> >Because it is incredibly light and fast.
> >The simple test which calls QueryPerformanceCounter in a
> >loop 10 millions times gives we the following results:
> >PMTimer     32269 ms
> >HPET        38466 ms
> >Ref Count   6499 ms
> >iTSC        1169 ms
> 
> is the ref_count with local_irq_disable or preempt_disable?
> 
irq disabling cannot account for such big difference.

--
			Gleb.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-23 12:44             ` Peter Lieven
  2013-05-23 12:45               ` Gleb Natapov
@ 2013-05-23 12:54               ` Vadim Rozenfeld
  1 sibling, 0 replies; 44+ messages in thread
From: Vadim Rozenfeld @ 2013-05-23 12:54 UTC (permalink / raw)
  To: Peter Lieven; +Cc: Marcelo Tosatti, kvm, gleb, pl



----- Original Message -----
From: "Peter Lieven" <pl@dlhnet.de>
To: "Vadim Rozenfeld" <vrozenfe@redhat.com>
Cc: "Marcelo Tosatti" <mtosatti@redhat.com>, kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
Sent: Thursday, May 23, 2013 10:44:14 PM
Subject: Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC

On 23.05.2013 14:33, Vadim Rozenfeld wrote:
>
>
> ----- Original Message -----
> From: "Peter Lieven" <pl@dlhnet.de>
> To: "Marcelo Tosatti" <mtosatti@redhat.com>
> Cc: "Vadim Rozenfeld" <vrozenfe@redhat.com>, kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
> Sent: Thursday, May 23, 2013 4:18:55 PM
> Subject: Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
>
> On 22.05.2013 23:23, Marcelo Tosatti wrote:
>> On Wed, May 22, 2013 at 03:22:55AM -0400, Vadim Rozenfeld wrote:
>>>
>>>
>>> ----- Original Message -----
>>> From: "Marcelo Tosatti" <mtosatti@redhat.com>
>>> To: "Vadim Rozenfeld" <vrozenfe@redhat.com>
>>> Cc: kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
>>> Sent: Wednesday, May 22, 2013 10:50:46 AM
>>> Subject: Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
>>>
>>> On Sun, May 19, 2013 at 05:06:37PM +1000, Vadim Rozenfeld wrote:
>>>> The following patch allows to activate a partition reference
>>>> time enlightenment that is based on the host platform's support
>>>> for an Invariant Time Stamp Counter (iTSC).
>>>> NOTE: This code will survive migration due to lack of VM stop/resume
>>>> handlers, when offset, scale and sequence should be
>>>> readjusted.
>>>>
>>>> ---
>>>>    arch/x86/kvm/x86.c | 6 +++++-
>>>>    1 file changed, 5 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>>> index 9645dab..b423fe4 100644
>>>> --- a/arch/x86/kvm/x86.c
>>>> +++ b/arch/x86/kvm/x86.c
>>>> @@ -1838,7 +1838,6 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>    		u64 gfn;
>>>>    		unsigned long addr;
>>>>    		HV_REFERENCE_TSC_PAGE tsc_ref;
>>>> -		tsc_ref.TscSequence = 0;
>>>>    		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
>>>>    			kvm->arch.hv_tsc_page = data;
>>>>    			break;
>>>> @@ -1848,6 +1847,11 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>    				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
>>>>    		if (kvm_is_error_hva(addr))
>>>>    			return 1;
>>>> +		tsc_ref.TscSequence =
>>>> +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
>>>
>>> 1) You want NONSTOP_TSC (see 40fb1715 commit) which matches INVARIANT TSC.
>>> [VR]
>>> Thank you for reviewing. Will fix it.
>>> 2) TscSequence should increase?
>>> "This field serves as a sequence number that is incremented whenever..."
>>> [VR]
>>> Yes, on every VM resume, including migration. After migration we also need
>>> to recalculate scale and adjust offset.
>>> 3) 0xFFFFFFFF is the value for invalid source of reference time?
>>> [VR] Yes, on boot-up. In this case guest will go with PMTimer (not sure about HPET
>>> but I can check). But if we set sequence to 0xFFFFFFFF after migration - it's probably will not work.
>>
>> "Reference TSC during Save and Restore and Migration
>>
>> To address migration scenarios to physical platforms that do not support
>> iTSC, the TscSequence field is used. In the event that a guest partition
>> is  migrated from an iTSC capable host to a non-iTSC capable host, the
>> hypervisor sets TscSequence to the special value of 0xFFFFFFFF, which
>> directs the guest operating system to fall back to a different clock
>> source (for example, the virtual PM timer)."
>>
>> Why it would not/does not work after migration?
>>
>>
>
> what exactly do we heed the reference TSC for? the reference counter alone works great and it seems
> that there is a lot of trouble and crash possibilities involved with the referece tsc.
>
> [VR]
> Because it is incredibly light and fast.
> The simple test which calls QueryPerformanceCounter in a
> loop 10 millions times gives we the following results:
> PMTimer     32269 ms
> HPET        38466 ms
> Ref Count   6499 ms
> iTSC        1169 ms

is the ref_count with local_irq_disable or preempt_disable?
[VR]
local_irq_disable

Peter


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
  2013-05-23 12:25           ` Vadim Rozenfeld
@ 2013-05-23 13:18             ` Paolo Bonzini
  2013-05-23 13:20               ` Peter Lieven
  0 siblings, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2013-05-23 13:18 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: Peter Lieven, Marcelo Tosatti, kvm, gleb, pl

Il 23/05/2013 14:25, Vadim Rozenfeld ha scritto:
> 
> 
> ----- Original Message -----
> From: "Peter Lieven" <pl@dlhnet.de>
> To: "Paolo Bonzini" <pbonzini@redhat.com>
> Cc: "Vadim Rozenfeld" <vrozenfe@redhat.com>, "Marcelo Tosatti" <mtosatti@redhat.com>, kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
> Sent: Thursday, May 23, 2013 4:17:57 PM
> Subject: Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
> 
> On 22.05.2013 23:55, Paolo Bonzini wrote:
>> Il 22/05/2013 09:32, Vadim Rozenfeld ha scritto:
>>>>> @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>>   		if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>>   			return 1;
>>>>>   		kvm->arch.hv_hypercall = data;
>>>>> +		local_irq_disable();
>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns();
>>>>> +		local_irq_enable();
>>>>> +		break;
>>> local_irq_disable/local_irq_enable not needed.
>>>
>>>
>>> What is the reasoning behind reading this time value at msr write time?
>>> [VR] Windows writs this MSR only once, during HAL initialization.
>>> So, I decided to treat this call as a partition crate event.
>>>
>>
>> But is it expected by Windows that the reference count starts counting
>> up from 0 at partition creation time?  If you could just use
>> (get_kernel_ns() + kvm->arch.kvmclock_offset) / 100, it would also be
>> simpler for migration purposes.
> 
> I can just report, that I have used the patch that does it that way and it works.
> Maybe Windows is calculating the uptime by the reference counter?
> 
> [VR] 
> Windows use it (reference counters/iTSC/PMTimer/HPET) as a time-stamp source
> for (Ke)QueryPerformanceCounter function. 

So I would prefer to remove kvm->arch.hv_ref_count altogether.

Paolo


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
  2013-05-23 13:18             ` Paolo Bonzini
@ 2013-05-23 13:20               ` Peter Lieven
  2013-05-23 13:23                 ` Paolo Bonzini
  0 siblings, 1 reply; 44+ messages in thread
From: Peter Lieven @ 2013-05-23 13:20 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Vadim Rozenfeld, Marcelo Tosatti, kvm, gleb, pl

On 23.05.2013 15:18, Paolo Bonzini wrote:
> Il 23/05/2013 14:25, Vadim Rozenfeld ha scritto:
>>
>>
>> ----- Original Message -----
>> From: "Peter Lieven" <pl@dlhnet.de>
>> To: "Paolo Bonzini" <pbonzini@redhat.com>
>> Cc: "Vadim Rozenfeld" <vrozenfe@redhat.com>, "Marcelo Tosatti" <mtosatti@redhat.com>, kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
>> Sent: Thursday, May 23, 2013 4:17:57 PM
>> Subject: Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
>>
>> On 22.05.2013 23:55, Paolo Bonzini wrote:
>>> Il 22/05/2013 09:32, Vadim Rozenfeld ha scritto:
>>>>>> @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>>>    		if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>>>    			return 1;
>>>>>>    		kvm->arch.hv_hypercall = data;
>>>>>> +		local_irq_disable();
>>>>>> +		kvm->arch.hv_ref_count = get_kernel_ns();
>>>>>> +		local_irq_enable();
>>>>>> +		break;
>>>> local_irq_disable/local_irq_enable not needed.
>>>>
>>>>
>>>> What is the reasoning behind reading this time value at msr write time?
>>>> [VR] Windows writs this MSR only once, during HAL initialization.
>>>> So, I decided to treat this call as a partition crate event.
>>>>
>>>
>>> But is it expected by Windows that the reference count starts counting
>>> up from 0 at partition creation time?  If you could just use
>>> (get_kernel_ns() + kvm->arch.kvmclock_offset) / 100, it would also be
>>> simpler for migration purposes.
>>
>> I can just report, that I have used the patch that does it that way and it works.
>> Maybe Windows is calculating the uptime by the reference counter?
>>
>> [VR]
>> Windows use it (reference counters/iTSC/PMTimer/HPET) as a time-stamp source
>> for (Ke)QueryPerformanceCounter function.
>
> So I would prefer to remove kvm->arch.hv_ref_count altogether.

But only if the migration support is guaranteed.
And what if we have a host which lacks invariant TSC support?

Peter


>
> Paolo
>


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
  2013-05-23 13:20               ` Peter Lieven
@ 2013-05-23 13:23                 ` Paolo Bonzini
  2013-05-23 13:30                   ` Peter Lieven
  0 siblings, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2013-05-23 13:23 UTC (permalink / raw)
  To: Peter Lieven; +Cc: Vadim Rozenfeld, Marcelo Tosatti, kvm, gleb, pl

Il 23/05/2013 15:20, Peter Lieven ha scritto:
> On 23.05.2013 15:18, Paolo Bonzini wrote:
>> Il 23/05/2013 14:25, Vadim Rozenfeld ha scritto:
>>>
>>>
>>> ----- Original Message -----
>>> From: "Peter Lieven" <pl@dlhnet.de>
>>> To: "Paolo Bonzini" <pbonzini@redhat.com>
>>> Cc: "Vadim Rozenfeld" <vrozenfe@redhat.com>, "Marcelo Tosatti"
>>> <mtosatti@redhat.com>, kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
>>> Sent: Thursday, May 23, 2013 4:17:57 PM
>>> Subject: Re: [RFC PATCH v2 1/2] add support for Hyper-V reference
>>> time counter
>>>
>>> On 22.05.2013 23:55, Paolo Bonzini wrote:
>>>> Il 22/05/2013 09:32, Vadim Rozenfeld ha scritto:
>>>>>>> @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct
>>>>>>> kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>>>>            if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>>>>                return 1;
>>>>>>>            kvm->arch.hv_hypercall = data;
>>>>>>> +        local_irq_disable();
>>>>>>> +        kvm->arch.hv_ref_count = get_kernel_ns();
>>>>>>> +        local_irq_enable();
>>>>>>> +        break;
>>>>> local_irq_disable/local_irq_enable not needed.
>>>>>
>>>>>
>>>>> What is the reasoning behind reading this time value at msr write
>>>>> time?
>>>>> [VR] Windows writs this MSR only once, during HAL initialization.
>>>>> So, I decided to treat this call as a partition crate event.
>>>>>
>>>>
>>>> But is it expected by Windows that the reference count starts counting
>>>> up from 0 at partition creation time?  If you could just use
>>>> (get_kernel_ns() + kvm->arch.kvmclock_offset) / 100, it would also be
>>>> simpler for migration purposes.
>>>
>>> I can just report, that I have used the patch that does it that way
>>> and it works.
>>> Maybe Windows is calculating the uptime by the reference counter?
>>>
>>> [VR]
>>> Windows use it (reference counters/iTSC/PMTimer/HPET) as a time-stamp
>>> source
>>> for (Ke)QueryPerformanceCounter function.
>>
>> So I would prefer to remove kvm->arch.hv_ref_count altogether.
> 
> But only if the migration support is guaranteed.

Migration support wouldn't work yet anyway, you need to recompute the
scale and sequence.  But that could be done by KVM_SET_CLOCK.

> And what if we have a host which lacks invariant TSC support?

Then the sequence must be set to 0 or 0xFFFFFFFF, I still haven't
understood. :)

Paolo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
  2013-05-23 13:23                 ` Paolo Bonzini
@ 2013-05-23 13:30                   ` Peter Lieven
  2013-05-23 13:40                     ` Paolo Bonzini
  0 siblings, 1 reply; 44+ messages in thread
From: Peter Lieven @ 2013-05-23 13:30 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Vadim Rozenfeld, Marcelo Tosatti, kvm, gleb, pl

On 23.05.2013 15:23, Paolo Bonzini wrote:
> Il 23/05/2013 15:20, Peter Lieven ha scritto:
>> On 23.05.2013 15:18, Paolo Bonzini wrote:
>>> Il 23/05/2013 14:25, Vadim Rozenfeld ha scritto:
>>>>
>>>>
>>>> ----- Original Message -----
>>>> From: "Peter Lieven" <pl@dlhnet.de>
>>>> To: "Paolo Bonzini" <pbonzini@redhat.com>
>>>> Cc: "Vadim Rozenfeld" <vrozenfe@redhat.com>, "Marcelo Tosatti"
>>>> <mtosatti@redhat.com>, kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
>>>> Sent: Thursday, May 23, 2013 4:17:57 PM
>>>> Subject: Re: [RFC PATCH v2 1/2] add support for Hyper-V reference
>>>> time counter
>>>>
>>>> On 22.05.2013 23:55, Paolo Bonzini wrote:
>>>>> Il 22/05/2013 09:32, Vadim Rozenfeld ha scritto:
>>>>>>>> @@ -1827,6 +1829,29 @@ static int set_msr_hyperv_pw(struct
>>>>>>>> kvm_vcpu *vcpu, u32 msr, u64 data)
>>>>>>>>             if (__copy_to_user((void __user *)addr, instructions, 4))
>>>>>>>>                 return 1;
>>>>>>>>             kvm->arch.hv_hypercall = data;
>>>>>>>> +        local_irq_disable();
>>>>>>>> +        kvm->arch.hv_ref_count = get_kernel_ns();
>>>>>>>> +        local_irq_enable();
>>>>>>>> +        break;
>>>>>> local_irq_disable/local_irq_enable not needed.
>>>>>>
>>>>>>
>>>>>> What is the reasoning behind reading this time value at msr write
>>>>>> time?
>>>>>> [VR] Windows writs this MSR only once, during HAL initialization.
>>>>>> So, I decided to treat this call as a partition crate event.
>>>>>>
>>>>>
>>>>> But is it expected by Windows that the reference count starts counting
>>>>> up from 0 at partition creation time?  If you could just use
>>>>> (get_kernel_ns() + kvm->arch.kvmclock_offset) / 100, it would also be
>>>>> simpler for migration purposes.
>>>>
>>>> I can just report, that I have used the patch that does it that way
>>>> and it works.
>>>> Maybe Windows is calculating the uptime by the reference counter?
>>>>
>>>> [VR]
>>>> Windows use it (reference counters/iTSC/PMTimer/HPET) as a time-stamp
>>>> source
>>>> for (Ke)QueryPerformanceCounter function.
>>>
>>> So I would prefer to remove kvm->arch.hv_ref_count altogether.
>>
>> But only if the migration support is guaranteed.
>
> Migration support wouldn't work yet anyway, you need to recompute the
> scale and sequence.  But that could be done by KVM_SET_CLOCK.

hv_ref_counter does work out of the box. what I was trying to say is
even it is slower than iTSC, it is significantly faster than hpet
or pmtimer and I can confirm it works flawlessly with migration.

>
>> And what if we have a host which lacks invariant TSC support?
>
> Then the sequence must be set to 0 or 0xFFFFFFFF, I still haven't
> understood. :)

yes, but windows does then fall back to pmtimer or hpet which is much slower
then reference counter.

Peter

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-23  9:13           ` Gleb Natapov
@ 2013-05-23 13:35             ` Marcelo Tosatti
  2013-05-23 15:14               ` Gleb Natapov
  2013-05-24  9:57               ` Vadim Rozenfeld
  0 siblings, 2 replies; 44+ messages in thread
From: Marcelo Tosatti @ 2013-05-23 13:35 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Peter Lieven, Vadim Rozenfeld, kvm, pl

On Thu, May 23, 2013 at 12:13:16PM +0300, Gleb Natapov wrote:
> > >
> > >"Reference TSC during Save and Restore and Migration
> > >
> > >To address migration scenarios to physical platforms that do not support
> > >iTSC, the TscSequence field is used. In the event that a guest partition
> > >is  migrated from an iTSC capable host to a non-iTSC capable host, the
> > >hypervisor sets TscSequence to the special value of 0xFFFFFFFF, which
> > >directs the guest operating system to fall back to a different clock
> > >source (for example, the virtual PM timer)."
> > >
> > >Why it would not/does not work after migration?
> > >
> > >
> > 
> > what exactly do we heed the reference TSC for? the reference counter alone works great and it seems
> > that there is a lot of trouble and crash possibilities involved with the referece tsc.
> > 
> Reference TSC is even faster. There should be no crashed with proper
> implementation.
> 
> --
> 			Gleb.

Lack of invariant TSC support in the host.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 1/2] add support for Hyper-V reference time counter
  2013-05-23 13:30                   ` Peter Lieven
@ 2013-05-23 13:40                     ` Paolo Bonzini
  0 siblings, 0 replies; 44+ messages in thread
From: Paolo Bonzini @ 2013-05-23 13:40 UTC (permalink / raw)
  To: Peter Lieven; +Cc: Vadim Rozenfeld, Marcelo Tosatti, kvm, gleb, pl

Il 23/05/2013 15:30, Peter Lieven ha scritto:
>>>> So I would prefer to remove kvm->arch.hv_ref_count altogether.
>>>
>>> But only if the migration support is guaranteed.
>>
>> Migration support wouldn't work yet anyway, you need to recompute the
>> scale and sequence.  But that could be done by KVM_SET_CLOCK.
> 
> hv_ref_counter does work out of the box. what I was trying to say is
> even it is slower than iTSC, it is significantly faster than hpet
> or pmtimer and I can confirm it works flawlessly with migration.

I can't see how without a userspace patch to migrate hv_ref_count.

>>> And what if we have a host which lacks invariant TSC support?
>>
>> Then the sequence must be set to 0 or 0xFFFFFFFF, I still haven't
>> understood. :)
> 
> yes, but windows does then fall back to pmtimer or hpet which is much
> slower then reference counter.

IIUC, with 0 it doesn't.

Paolo


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-23 12:21         ` Vadim Rozenfeld
@ 2013-05-23 13:47           ` Marcelo Tosatti
  2013-05-24 10:01             ` Vadim Rozenfeld
  0 siblings, 1 reply; 44+ messages in thread
From: Marcelo Tosatti @ 2013-05-23 13:47 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: kvm, gleb, pl

On Thu, May 23, 2013 at 08:21:29AM -0400, Vadim Rozenfeld wrote:
> > > @@ -1848,6 +1847,11 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> > >  				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> > >  		if (kvm_is_error_hva(addr))
> > >  			return 1;
> > > +		tsc_ref.TscSequence =
> > > +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
> > 
> > 1) You want NONSTOP_TSC (see 40fb1715 commit) which matches INVARIANT TSC.
> > [VR]
> > Thank you for reviewing. Will fix it.
> > 2) TscSequence should increase? 
> > "This field serves as a sequence number that is incremented whenever..."
> > [VR]
> > Yes, on every VM resume, including migration. After migration we also need
> > to recalculate scale and adjust offset. 
> > 3) 0xFFFFFFFF is the value for invalid source of reference time?
> > [VR] Yes, on boot-up. In this case guest will go with PMTimer (not sure about HPET
> > but I can check). But if we set sequence to 0xFFFFFFFF after migration - it's probably will not work.
> 
> "Reference TSC during Save and Restore and Migration
> 
> To address migration scenarios to physical platforms that do not support
> iTSC, the TscSequence field is used. In the event that a guest partition
> is  migrated from an iTSC capable host to a non-iTSC capable host, the
> hypervisor sets TscSequence to the special value of 0xFFFFFFFF, which
> directs the guest operating system to fall back to a different clock
> source (for example, the virtual PM timer)."
> 
> Why it would not/does not work after migration?
> 
> [VR]
> Because of different frequencies, I think. 
> Hyper-V reference counters and iTSC report
> performance frequency equal to 10MHz,
> which is obviously is not true for PM and HPET timers.

Windows has to convert from the native hardware clock frequency to
internal system frequency, so i don't believe this is a problem.

> Windows calibrates timers on boot-up and you probably 
> have no chance to do it after or during resume.  

It is documented as such, it has been designed to fallback
to other hardware clock devices. Is there evidence for any 
problem on fallback?

Earlier you said:

"> What if you put 0xFFFFFFFF as a sequence? Or is this another case where
> the spec is wrong.
>
it will use PMTimer (maybe HPET if you have it) if you specify it on
VM's start up. But I'm not sure if it will work if you migrate from TSC
or reference counter to 0xFFFFFFFF
"



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-23  9:12         ` Gleb Natapov
@ 2013-05-23 13:53           ` Marcelo Tosatti
  2013-05-23 15:31             ` Gleb Natapov
  0 siblings, 1 reply; 44+ messages in thread
From: Marcelo Tosatti @ 2013-05-23 13:53 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Vadim Rozenfeld, kvm, pl

On Thu, May 23, 2013 at 12:12:29PM +0300, Gleb Natapov wrote:
> > To address migration scenarios to physical platforms that do not support
> > iTSC, the TscSequence field is used. In the event that a guest partition
> > is  migrated from an iTSC capable host to a non-iTSC capable host, the
> > hypervisor sets TscSequence to the special value of 0xFFFFFFFF, which
> > directs the guest operating system to fall back to a different clock
> > source (for example, the virtual PM timer)."
> > 
> > Why it would not/does not work after migration?
> > 
> Please read the whole discussion, we talked about it already. We
> definitely do not want to fall back to PM timer either, we want to use
> reference counter instead.

Case 1) On migration of TSC page enabled Windows guest, from invariant TSC host,
to non-invariant TSC host, Windows guests fallback to PMTimer
and not to reference timer via MSR. 

This is suboptimal because pmtimer emulation is excessively slow.

Is there a better option?

Case 2)
Reference timer (via MSR) support is interesting for the case of non invariant TSC
host.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-23 13:35             ` Marcelo Tosatti
@ 2013-05-23 15:14               ` Gleb Natapov
  2013-05-24  9:57               ` Vadim Rozenfeld
  1 sibling, 0 replies; 44+ messages in thread
From: Gleb Natapov @ 2013-05-23 15:14 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Peter Lieven, Vadim Rozenfeld, kvm, pl

On Thu, May 23, 2013 at 10:35:59AM -0300, Marcelo Tosatti wrote:
> On Thu, May 23, 2013 at 12:13:16PM +0300, Gleb Natapov wrote:
> > > >
> > > >"Reference TSC during Save and Restore and Migration
> > > >
> > > >To address migration scenarios to physical platforms that do not support
> > > >iTSC, the TscSequence field is used. In the event that a guest partition
> > > >is  migrated from an iTSC capable host to a non-iTSC capable host, the
> > > >hypervisor sets TscSequence to the special value of 0xFFFFFFFF, which
> > > >directs the guest operating system to fall back to a different clock
> > > >source (for example, the virtual PM timer)."
> > > >
> > > >Why it would not/does not work after migration?
> > > >
> > > >
> > > 
> > > what exactly do we heed the reference TSC for? the reference counter alone works great and it seems
> > > that there is a lot of trouble and crash possibilities involved with the referece tsc.
> > > 
> > Reference TSC is even faster. There should be no crashed with proper
> > implementation.
> > 
> > --
> > 			Gleb.
> 
> Lack of invariant TSC support in the host.
Proper implementation will disable reference TSC in this case.

--
			Gleb.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-23 13:53           ` Marcelo Tosatti
@ 2013-05-23 15:31             ` Gleb Natapov
  2013-05-24 10:11               ` Vadim Rozenfeld
  0 siblings, 1 reply; 44+ messages in thread
From: Gleb Natapov @ 2013-05-23 15:31 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Vadim Rozenfeld, kvm, pl

On Thu, May 23, 2013 at 10:53:38AM -0300, Marcelo Tosatti wrote:
> On Thu, May 23, 2013 at 12:12:29PM +0300, Gleb Natapov wrote:
> > > To address migration scenarios to physical platforms that do not support
> > > iTSC, the TscSequence field is used. In the event that a guest partition
> > > is  migrated from an iTSC capable host to a non-iTSC capable host, the
> > > hypervisor sets TscSequence to the special value of 0xFFFFFFFF, which
> > > directs the guest operating system to fall back to a different clock
> > > source (for example, the virtual PM timer)."
> > > 
> > > Why it would not/does not work after migration?
> > > 
> > Please read the whole discussion, we talked about it already. We
> > definitely do not want to fall back to PM timer either, we want to use
> > reference counter instead.
> 
> Case 1) On migration of TSC page enabled Windows guest, from invariant TSC host,
> to non-invariant TSC host, Windows guests fallback to PMTimer
> and not to reference timer via MSR. 
> 
> This is suboptimal because pmtimer emulation is excessively slow.
> 
> Is there a better option?
> 
If setting TscSequence to zero makes Windows fall back to the MSR this is a
better option.

> Case 2)
> Reference timer (via MSR) support is interesting for the case of non invariant TSC
> host.

--
			Gleb.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-19  7:06 ` [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC Vadim Rozenfeld
  2013-05-22  0:50   ` Marcelo Tosatti
@ 2013-05-23 16:44   ` Paolo Bonzini
  2013-05-24 10:16     ` Vadim Rozenfeld
  1 sibling, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2013-05-23 16:44 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: kvm, gleb, mtosatti, pl

Il 19/05/2013 09:06, Vadim Rozenfeld ha scritto:
> The following patch allows to activate a partition reference 
> time enlightenment that is based on the host platform's support 
> for an Invariant Time Stamp Counter (iTSC).
> NOTE: This code will survive migration due to lack of VM stop/resume
> handlers, when offset, scale and sequence should be
> readjusted. 
> 
> ---
>  arch/x86/kvm/x86.c | 6 +++++-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 9645dab..b423fe4 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1838,7 +1838,6 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  		u64 gfn;
>  		unsigned long addr;
>  		HV_REFERENCE_TSC_PAGE tsc_ref;
> -		tsc_ref.TscSequence = 0;
>  		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
>  			kvm->arch.hv_tsc_page = data;
>  			break;
> @@ -1848,6 +1847,11 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
>  		if (kvm_is_error_hva(addr))
>  			return 1;
> +		tsc_ref.TscSequence =
> +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;

Thinking more of migration, could we increment whatever sequence value
we found (or better, do (x|3)+2 to skip over 0 and 0xFFFFFFFF), instead
of forcing it to 1?

Add HV_X64_MSR_REFERENCE_TSC to msrs_to_save, and migration should just
work.

Paolo

> +		tsc_ref.TscScale =
> +				((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32;
> +		tsc_ref.TscOffset = 0;
>  		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
>  			return 1;
>  		mark_page_dirty(kvm, gfn);
> 


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-23 13:35             ` Marcelo Tosatti
  2013-05-23 15:14               ` Gleb Natapov
@ 2013-05-24  9:57               ` Vadim Rozenfeld
  1 sibling, 0 replies; 44+ messages in thread
From: Vadim Rozenfeld @ 2013-05-24  9:57 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Gleb Natapov, Peter Lieven, kvm, pl



----- Original Message -----
From: "Marcelo Tosatti" <mtosatti@redhat.com>
To: "Gleb Natapov" <gleb@redhat.com>
Cc: "Peter Lieven" <pl@dlhnet.de>, "Vadim Rozenfeld" <vrozenfe@redhat.com>, kvm@vger.kernel.org, pl@dlh.net
Sent: Thursday, May 23, 2013 11:35:59 PM
Subject: Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC

On Thu, May 23, 2013 at 12:13:16PM +0300, Gleb Natapov wrote:
> > >
> > >"Reference TSC during Save and Restore and Migration
> > >
> > >To address migration scenarios to physical platforms that do not support
> > >iTSC, the TscSequence field is used. In the event that a guest partition
> > >is  migrated from an iTSC capable host to a non-iTSC capable host, the
> > >hypervisor sets TscSequence to the special value of 0xFFFFFFFF, which
> > >directs the guest operating system to fall back to a different clock
> > >source (for example, the virtual PM timer)."
> > >
> > >Why it would not/does not work after migration?
> > >
> > >
> > 
> > what exactly do we heed the reference TSC for? the reference counter alone works great and it seems
> > that there is a lot of trouble and crash possibilities involved with the referece tsc.
> > 
> Reference TSC is even faster. There should be no crashed with proper
> implementation.
> 
> --
> 			Gleb.

Lack of invariant TSC support in the host.

if there is no iTSC in the host -> set sequence to 0 and go with
reference counter. It is why they both scaled to 10 MHz, and it's
why reference counters is a fall-back for iTSC. 


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-23 13:47           ` Marcelo Tosatti
@ 2013-05-24 10:01             ` Vadim Rozenfeld
  0 siblings, 0 replies; 44+ messages in thread
From: Vadim Rozenfeld @ 2013-05-24 10:01 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, gleb, pl



----- Original Message -----
From: "Marcelo Tosatti" <mtosatti@redhat.com>
To: "Vadim Rozenfeld" <vrozenfe@redhat.com>
Cc: kvm@vger.kernel.org, gleb@redhat.com, pl@dlh.net
Sent: Thursday, May 23, 2013 11:47:46 PM
Subject: Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC

On Thu, May 23, 2013 at 08:21:29AM -0400, Vadim Rozenfeld wrote:
> > > @@ -1848,6 +1847,11 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> > >  				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
> > >  		if (kvm_is_error_hva(addr))
> > >  			return 1;
> > > +		tsc_ref.TscSequence =
> > > +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
> > 
> > 1) You want NONSTOP_TSC (see 40fb1715 commit) which matches INVARIANT TSC.
> > [VR]
> > Thank you for reviewing. Will fix it.
> > 2) TscSequence should increase? 
> > "This field serves as a sequence number that is incremented whenever..."
> > [VR]
> > Yes, on every VM resume, including migration. After migration we also need
> > to recalculate scale and adjust offset. 
> > 3) 0xFFFFFFFF is the value for invalid source of reference time?
> > [VR] Yes, on boot-up. In this case guest will go with PMTimer (not sure about HPET
> > but I can check). But if we set sequence to 0xFFFFFFFF after migration - it's probably will not work.
> 
> "Reference TSC during Save and Restore and Migration
> 
> To address migration scenarios to physical platforms that do not support
> iTSC, the TscSequence field is used. In the event that a guest partition
> is  migrated from an iTSC capable host to a non-iTSC capable host, the
> hypervisor sets TscSequence to the special value of 0xFFFFFFFF, which
> directs the guest operating system to fall back to a different clock
> source (for example, the virtual PM timer)."
> 
> Why it would not/does not work after migration?
> 
> [VR]
> Because of different frequencies, I think. 
> Hyper-V reference counters and iTSC report
> performance frequency equal to 10MHz,
> which is obviously is not true for PM and HPET timers.

Windows has to convert from the native hardware clock frequency to
internal system frequency, so i don't believe this is a problem.

> Windows calibrates timers on boot-up and you probably 
> have no chance to do it after or during resume.  

It is documented as such, it has been designed to fallback
to other hardware clock devices. Is there evidence for any 
problem on fallback?

Earlier you said:

"> What if you put 0xFFFFFFFF as a sequence? Or is this another case where
> the spec is wrong.
>
it will use PMTimer (maybe HPET if you have it) if you specify it on
VM's start up. But I'm not sure if it will work if you migrate from TSC
or reference counter to 0xFFFFFFFF
"
On startup, not after migration, when you migrate to host w/o iTSC and/or 
reference counters support.



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-23 15:31             ` Gleb Natapov
@ 2013-05-24 10:11               ` Vadim Rozenfeld
  2013-05-24 19:41                 ` Marcelo Tosatti
  0 siblings, 1 reply; 44+ messages in thread
From: Vadim Rozenfeld @ 2013-05-24 10:11 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Marcelo Tosatti, kvm, pl



----- Original Message -----
From: "Gleb Natapov" <gleb@redhat.com>
To: "Marcelo Tosatti" <mtosatti@redhat.com>
Cc: "Vadim Rozenfeld" <vrozenfe@redhat.com>, kvm@vger.kernel.org, pl@dlh.net
Sent: Friday, May 24, 2013 1:31:10 AM
Subject: Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC

On Thu, May 23, 2013 at 10:53:38AM -0300, Marcelo Tosatti wrote:
> On Thu, May 23, 2013 at 12:12:29PM +0300, Gleb Natapov wrote:
> > > To address migration scenarios to physical platforms that do not support
> > > iTSC, the TscSequence field is used. In the event that a guest partition
> > > is  migrated from an iTSC capable host to a non-iTSC capable host, the
> > > hypervisor sets TscSequence to the special value of 0xFFFFFFFF, which
> > > directs the guest operating system to fall back to a different clock
> > > source (for example, the virtual PM timer)."
> > > 
> > > Why it would not/does not work after migration?
> > > 
> > Please read the whole discussion, we talked about it already. We
> > definitely do not want to fall back to PM timer either, we want to use
> > reference counter instead.
> 
> Case 1) On migration of TSC page enabled Windows guest, from invariant TSC host,
> to non-invariant TSC host, Windows guests fallback to PMTimer
> and not to reference timer via MSR. 
> 
> This is suboptimal because pmtimer emulation is excessively slow.
> 
> Is there a better option?
> 
If setting TscSequence to zero makes Windows fall back to the MSR this is a
better option.

+1 
This is why MS has two different mechanisms:
iTSC as a primary, reference counters as a fall-back.

  
> Case 2)
> Reference timer (via MSR) support is interesting for the case of non invariant TSC
> host.

--
			Gleb.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-23 16:44   ` Paolo Bonzini
@ 2013-05-24 10:16     ` Vadim Rozenfeld
  0 siblings, 0 replies; 44+ messages in thread
From: Vadim Rozenfeld @ 2013-05-24 10:16 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: kvm, gleb, mtosatti, pl



----- Original Message -----
From: "Paolo Bonzini" <pbonzini@redhat.com>
To: "Vadim Rozenfeld" <vrozenfe@redhat.com>
Cc: kvm@vger.kernel.org, gleb@redhat.com, mtosatti@redhat.com, pl@dlh.net
Sent: Friday, May 24, 2013 2:44:50 AM
Subject: Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC

Il 19/05/2013 09:06, Vadim Rozenfeld ha scritto:
> The following patch allows to activate a partition reference 
> time enlightenment that is based on the host platform's support 
> for an Invariant Time Stamp Counter (iTSC).
> NOTE: This code will survive migration due to lack of VM stop/resume
> handlers, when offset, scale and sequence should be
> readjusted. 
> 
> ---
>  arch/x86/kvm/x86.c | 6 +++++-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 9645dab..b423fe4 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1838,7 +1838,6 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  		u64 gfn;
>  		unsigned long addr;
>  		HV_REFERENCE_TSC_PAGE tsc_ref;
> -		tsc_ref.TscSequence = 0;
>  		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) {
>  			kvm->arch.hv_tsc_page = data;
>  			break;
> @@ -1848,6 +1847,11 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  				HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
>  		if (kvm_is_error_hva(addr))
>  			return 1;
> +		tsc_ref.TscSequence =
> +				boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;

Thinking more of migration, could we increment whatever sequence value
we found (or better, do (x|3)+2 to skip over 0 and 0xFFFFFFFF), instead
of forcing it to 1?

[VR]
Yes, it should work.
We need to keep sequence between 1 and 0xFFFFFFFF and increment it every time
the VM was migrated or paused/resumed.

Add HV_X64_MSR_REFERENCE_TSC to msrs_to_save, and migration should just
work.

Paolo

> +		tsc_ref.TscScale =
> +				((10000LL << 32) / vcpu->arch.virtual_tsc_khz) << 32;
> +		tsc_ref.TscOffset = 0;
>  		if (__copy_to_user((void __user *)addr, &tsc_ref, sizeof(tsc_ref)))
>  			return 1;
>  		mark_page_dirty(kvm, gfn);
> 


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-24 10:11               ` Vadim Rozenfeld
@ 2013-05-24 19:41                 ` Marcelo Tosatti
  2013-05-27 12:33                   ` Vadim Rozenfeld
  0 siblings, 1 reply; 44+ messages in thread
From: Marcelo Tosatti @ 2013-05-24 19:41 UTC (permalink / raw)
  To: Vadim Rozenfeld; +Cc: Gleb Natapov, kvm, pl

On Fri, May 24, 2013 at 06:11:16AM -0400, Vadim Rozenfeld wrote:
> > Is there a better option?
> > 
> If setting TscSequence to zero makes Windows fall back to the MSR this is a
> better option.
> 
> +1 
> This is why MS has two different mechanisms:
> iTSC as a primary, reference counters as a fall-back.

Ok, is it documented that transition

iTSC valid (Sequence != 0 and != 0xFFFFFFFF) -> 
iTSC not valid but ref MSR valid (Sequence = 0), 

is a valid transition?

It was not obvious for me. Can you point to documentation?



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC
  2013-05-24 19:41                 ` Marcelo Tosatti
@ 2013-05-27 12:33                   ` Vadim Rozenfeld
  0 siblings, 0 replies; 44+ messages in thread
From: Vadim Rozenfeld @ 2013-05-27 12:33 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm, pl

On Fri, 2013-05-24 at 16:41 -0300, Marcelo Tosatti wrote:
> On Fri, May 24, 2013 at 06:11:16AM -0400, Vadim Rozenfeld wrote:
> > > Is there a better option?
> > > 
> > If setting TscSequence to zero makes Windows fall back to the MSR this is a
> > better option.
> > 
> > +1 
> > This is why MS has two different mechanisms:
> > iTSC as a primary, reference counters as a fall-back.
> 
> Ok, is it documented that transition
> 
> iTSC valid (Sequence != 0 and != 0xFFFFFFFF) -> 
> iTSC not valid but ref MSR valid (Sequence = 0), 
> 
> is a valid transition?
> 
Yes, it's true.
> It was not obvious for me. Can you point to documentation?
> 
> 
Hypervisor Functional Specification v2.0a: For Windows Server 2008 R2
http://www.microsoft.com/download/en/details.aspx?displaylang=en&id=18673

"15.4.3.3 Reference TSC during Save/Restore and Migration
To address migration scenarios to physical platforms which do not
support iTSC, the TscSequence field is used. In the event a guest
partition is migrated from an iTSC capable host to a non-iTSC capable
host, the hypervisor sets TscSequence to the special value of 0x0, which
directs the guest operating system to fall back to a different clock
source (the virtual PM timer)."

Now what the virtual PM timer is - if hypervisor provides PM Timer
assist support (HvPartitionPropertyPmTimerAssist partition property),
it will use partition reference counters to calculate PM Timer value.
If partition has no HvPartitionPropertyPmTimerAssist - guest will use
reference counters MSR directly.
Currently we don't support PM timer assist, so TscSequence 0x0 means
fallback to reference counters.







^ permalink raw reply	[flat|nested] 44+ messages in thread

end of thread, other threads:[~2013-05-27 12:34 UTC | newest]

Thread overview: 44+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-05-19  7:06 [RFC PATCH v2 0/2] Hyper-V timers Vadim Rozenfeld
2013-05-19  7:06 ` [RFC PATCH v2 1/2] add support for Hyper-V reference time counter Vadim Rozenfeld
2013-05-19 13:47   ` Gleb Natapov
2013-05-19 14:37   ` Paolo Bonzini
2013-05-22  0:46   ` Marcelo Tosatti
2013-05-22  3:28     ` Gleb Natapov
2013-05-22  3:32       ` Marcelo Tosatti
2013-05-22  3:38         ` Gleb Natapov
2013-05-22 14:31           ` Marcelo Tosatti
2013-05-22  7:32     ` Vadim Rozenfeld
2013-05-22 21:55       ` Paolo Bonzini
2013-05-23  6:17         ` Peter Lieven
2013-05-23  9:54           ` Paolo Bonzini
2013-05-23 10:45             ` Peter Lieven
2013-05-23 12:25           ` Vadim Rozenfeld
2013-05-23 13:18             ` Paolo Bonzini
2013-05-23 13:20               ` Peter Lieven
2013-05-23 13:23                 ` Paolo Bonzini
2013-05-23 13:30                   ` Peter Lieven
2013-05-23 13:40                     ` Paolo Bonzini
2013-05-19  7:06 ` [RFC PATCH v2 2/2] add support for Hyper-V invariant TSC Vadim Rozenfeld
2013-05-22  0:50   ` Marcelo Tosatti
2013-05-22  7:22     ` Vadim Rozenfeld
2013-05-22 21:23       ` Marcelo Tosatti
2013-05-23  6:18         ` Peter Lieven
2013-05-23  9:13           ` Gleb Natapov
2013-05-23 13:35             ` Marcelo Tosatti
2013-05-23 15:14               ` Gleb Natapov
2013-05-24  9:57               ` Vadim Rozenfeld
2013-05-23 12:33           ` Vadim Rozenfeld
2013-05-23 12:44             ` Peter Lieven
2013-05-23 12:45               ` Gleb Natapov
2013-05-23 12:54               ` Vadim Rozenfeld
2013-05-23  9:12         ` Gleb Natapov
2013-05-23 13:53           ` Marcelo Tosatti
2013-05-23 15:31             ` Gleb Natapov
2013-05-24 10:11               ` Vadim Rozenfeld
2013-05-24 19:41                 ` Marcelo Tosatti
2013-05-27 12:33                   ` Vadim Rozenfeld
2013-05-23 12:21         ` Vadim Rozenfeld
2013-05-23 13:47           ` Marcelo Tosatti
2013-05-24 10:01             ` Vadim Rozenfeld
2013-05-23 16:44   ` Paolo Bonzini
2013-05-24 10:16     ` Vadim Rozenfeld

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.