All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2015-12-24  9:33 ` Andrey Smetanin
  0 siblings, 0 replies; 42+ messages in thread
From: Andrey Smetanin @ 2015-12-24  9:33 UTC (permalink / raw)
  To: kvm; +Cc: Paolo Bonzini, Gleb Natapov, Roman Kagan, Denis V. Lunev, qemu-devel

Lately tsc page was implemented but filled with empty
values. This patch setup tsc page scale and offset based
on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.

The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
reads count to zero which potentially improves performance.

The patch applies on top of
'kvm: Make vcpu->requests as 64 bit bitmap'
previously sent.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org

---
 arch/x86/kvm/hyperv.c    | 117 +++++++++++++++++++++++++++++++++++++++++------
 arch/x86/kvm/hyperv.h    |   2 +
 arch/x86/kvm/x86.c       |  12 +++++
 include/linux/kvm_host.h |   1 +
 4 files changed, 117 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index d50675a..504fdc7 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -753,6 +753,105 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static u64 calc_tsc_page_scale(u32 tsc_khz)
+{
+	/*
+	 * reftime (in 100ns) = tsc * tsc_scale / 2^64 + tsc_offset
+	 * so reftime_delta = (tsc_delta * tsc_scale) / 2^64
+	 * so tsc_scale = (2^64 * reftime_delta)/tsc_delta
+	 * so tsc_scale = (2^64 * 10 * 10^6) / tsc_hz = (2^64 * 10000) / tsc_khz
+	 * so tsc_scale = (2^63 * 2 * 10000) / tsc_khz
+	 */
+	return mul_u64_u32_div(1ULL << 63, 2 * 10000, tsc_khz);
+}
+
+static int write_tsc_page(struct kvm *kvm, u64 gfn,
+			  PHV_REFERENCE_TSC_PAGE tsc_ref)
+{
+	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
+			    tsc_ref, sizeof(*tsc_ref)))
+		return 1;
+	mark_page_dirty(kvm, gfn);
+	return 0;
+}
+
+static int read_tsc_page(struct kvm *kvm, u64 gfn,
+			 PHV_REFERENCE_TSC_PAGE tsc_ref)
+{
+	if (kvm_read_guest(kvm, gfn_to_gpa(gfn),
+			   tsc_ref, sizeof(*tsc_ref)))
+		return 1;
+	return 0;
+}
+
+static u64 calc_tsc_page_time(struct kvm_vcpu *vcpu,
+			      PHV_REFERENCE_TSC_PAGE tsc_ref)
+{
+
+	u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+
+	return mul_u64_u64_shr(tsc, tsc_ref->tsc_scale, 64)
+		+ tsc_ref->tsc_offset;
+}
+
+static int setup_blank_tsc_page(struct kvm_vcpu *vcpu, u64 gfn)
+{
+	HV_REFERENCE_TSC_PAGE tsc_ref;
+
+	memset(&tsc_ref, 0, sizeof(tsc_ref));
+	return write_tsc_page(vcpu->kvm, gfn, &tsc_ref);
+}
+
+int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_hv *hv = &kvm->arch.hyperv;
+	HV_REFERENCE_TSC_PAGE tsc_ref;
+	u32 tsc_khz;
+	int r;
+	u64 gfn, ref_time, tsc_scale, tsc_offset, tsc;
+
+	if (WARN_ON_ONCE(!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)))
+		return -EINVAL;
+
+	gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
+	vcpu_debug(vcpu, "tsc page gfn 0x%llx\n", gfn);
+
+	tsc_khz = vcpu->arch.virtual_tsc_khz;
+	if (!tsc_khz) {
+		vcpu_unimpl(vcpu, "no tsc khz\n");
+		return setup_blank_tsc_page(vcpu, gfn);
+	}
+
+	r = read_tsc_page(kvm, gfn, &tsc_ref);
+	if (r) {
+		vcpu_err(vcpu, "can't access tsc page gfn 0x%llx\n", gfn);
+		return r;
+	}
+
+	tsc_scale = calc_tsc_page_scale(tsc_khz);
+	ref_time = get_time_ref_counter(kvm);
+	tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+
+	/* tsc_offset = reftime - tsc * tsc_scale / 2^64 */
+	tsc_offset = ref_time - mul_u64_u64_shr(tsc, tsc_scale, 64);
+	vcpu_debug(vcpu, "tsc khz %u tsc %llu scale %llu offset %llu\n",
+		   tsc_khz, tsc, tsc_scale, tsc_offset);
+
+	tsc_ref.tsc_sequence++;
+	if (tsc_ref.tsc_sequence == 0)
+		tsc_ref.tsc_sequence = 1;
+
+	tsc_ref.tsc_scale = tsc_scale;
+	tsc_ref.tsc_offset = tsc_offset;
+
+	vcpu_debug(vcpu, "tsc page calibration time %llu vs. reftime %llu\n",
+		   calc_tsc_page_time(vcpu, &tsc_ref),
+		   get_time_ref_counter(kvm));
+
+	return write_tsc_page(kvm, gfn, &tsc_ref);
+}
+
 static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
 			     bool host)
 {
@@ -790,23 +889,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
 		mark_page_dirty(kvm, gfn);
 		break;
 	}
-	case HV_X64_MSR_REFERENCE_TSC: {
-		u64 gfn;
-		HV_REFERENCE_TSC_PAGE tsc_ref;
-
-		memset(&tsc_ref, 0, sizeof(tsc_ref));
+	case HV_X64_MSR_REFERENCE_TSC:
 		hv->hv_tsc_page = data;
-		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
-			break;
-		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
-		if (kvm_write_guest(
-				kvm,
-				gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
-				&tsc_ref, sizeof(tsc_ref)))
-			return 1;
-		mark_page_dirty(kvm, gfn);
+		if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
+			kvm_make_request(KVM_REQ_HV_TSC_PAGE, vcpu);
 		break;
-	}
 	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
 		return kvm_hv_msr_set_crash_data(vcpu,
 						 msr - HV_X64_MSR_CRASH_P0,
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 60eccd4..e7cc446 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -84,4 +84,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
 
 void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
 
+int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu);
+
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index aedb1a0..1d821f6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6500,6 +6500,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		 */
 		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
 			kvm_hv_process_stimers(vcpu);
+
+		/*
+		 * KVM_REQ_HV_TSC_PAGE setup should be after
+		 * KVM_REQ_CLOCK_UPDATE processing because
+		 * Hyper-V tsc page content depends
+		 * on kvm->arch.kvmclock_offset value.
+		 */
+		if (kvm_check_request(KVM_REQ_HV_TSC_PAGE, vcpu)) {
+			r = kvm_hv_setup_tsc_page(vcpu);
+			if (unlikely(r))
+				goto out;
+		}
 	}
 
 	/*
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6877b4d7e..93c9e25 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_HV_RESET          29
 #define KVM_REQ_HV_EXIT           30
 #define KVM_REQ_HV_STIMER         31
+#define KVM_REQ_HV_TSC_PAGE       32
 
 #define KVM_REQ_MAX               64
 
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2015-12-24  9:33 ` Andrey Smetanin
  0 siblings, 0 replies; 42+ messages in thread
From: Andrey Smetanin @ 2015-12-24  9:33 UTC (permalink / raw)
  To: kvm; +Cc: Gleb Natapov, Paolo Bonzini, qemu-devel, Roman Kagan, Denis V. Lunev

Lately tsc page was implemented but filled with empty
values. This patch setup tsc page scale and offset based
on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.

The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
reads count to zero which potentially improves performance.

The patch applies on top of
'kvm: Make vcpu->requests as 64 bit bitmap'
previously sent.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org

---
 arch/x86/kvm/hyperv.c    | 117 +++++++++++++++++++++++++++++++++++++++++------
 arch/x86/kvm/hyperv.h    |   2 +
 arch/x86/kvm/x86.c       |  12 +++++
 include/linux/kvm_host.h |   1 +
 4 files changed, 117 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index d50675a..504fdc7 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -753,6 +753,105 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static u64 calc_tsc_page_scale(u32 tsc_khz)
+{
+	/*
+	 * reftime (in 100ns) = tsc * tsc_scale / 2^64 + tsc_offset
+	 * so reftime_delta = (tsc_delta * tsc_scale) / 2^64
+	 * so tsc_scale = (2^64 * reftime_delta)/tsc_delta
+	 * so tsc_scale = (2^64 * 10 * 10^6) / tsc_hz = (2^64 * 10000) / tsc_khz
+	 * so tsc_scale = (2^63 * 2 * 10000) / tsc_khz
+	 */
+	return mul_u64_u32_div(1ULL << 63, 2 * 10000, tsc_khz);
+}
+
+static int write_tsc_page(struct kvm *kvm, u64 gfn,
+			  PHV_REFERENCE_TSC_PAGE tsc_ref)
+{
+	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
+			    tsc_ref, sizeof(*tsc_ref)))
+		return 1;
+	mark_page_dirty(kvm, gfn);
+	return 0;
+}
+
+static int read_tsc_page(struct kvm *kvm, u64 gfn,
+			 PHV_REFERENCE_TSC_PAGE tsc_ref)
+{
+	if (kvm_read_guest(kvm, gfn_to_gpa(gfn),
+			   tsc_ref, sizeof(*tsc_ref)))
+		return 1;
+	return 0;
+}
+
+static u64 calc_tsc_page_time(struct kvm_vcpu *vcpu,
+			      PHV_REFERENCE_TSC_PAGE tsc_ref)
+{
+
+	u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+
+	return mul_u64_u64_shr(tsc, tsc_ref->tsc_scale, 64)
+		+ tsc_ref->tsc_offset;
+}
+
+static int setup_blank_tsc_page(struct kvm_vcpu *vcpu, u64 gfn)
+{
+	HV_REFERENCE_TSC_PAGE tsc_ref;
+
+	memset(&tsc_ref, 0, sizeof(tsc_ref));
+	return write_tsc_page(vcpu->kvm, gfn, &tsc_ref);
+}
+
+int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_hv *hv = &kvm->arch.hyperv;
+	HV_REFERENCE_TSC_PAGE tsc_ref;
+	u32 tsc_khz;
+	int r;
+	u64 gfn, ref_time, tsc_scale, tsc_offset, tsc;
+
+	if (WARN_ON_ONCE(!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)))
+		return -EINVAL;
+
+	gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
+	vcpu_debug(vcpu, "tsc page gfn 0x%llx\n", gfn);
+
+	tsc_khz = vcpu->arch.virtual_tsc_khz;
+	if (!tsc_khz) {
+		vcpu_unimpl(vcpu, "no tsc khz\n");
+		return setup_blank_tsc_page(vcpu, gfn);
+	}
+
+	r = read_tsc_page(kvm, gfn, &tsc_ref);
+	if (r) {
+		vcpu_err(vcpu, "can't access tsc page gfn 0x%llx\n", gfn);
+		return r;
+	}
+
+	tsc_scale = calc_tsc_page_scale(tsc_khz);
+	ref_time = get_time_ref_counter(kvm);
+	tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+
+	/* tsc_offset = reftime - tsc * tsc_scale / 2^64 */
+	tsc_offset = ref_time - mul_u64_u64_shr(tsc, tsc_scale, 64);
+	vcpu_debug(vcpu, "tsc khz %u tsc %llu scale %llu offset %llu\n",
+		   tsc_khz, tsc, tsc_scale, tsc_offset);
+
+	tsc_ref.tsc_sequence++;
+	if (tsc_ref.tsc_sequence == 0)
+		tsc_ref.tsc_sequence = 1;
+
+	tsc_ref.tsc_scale = tsc_scale;
+	tsc_ref.tsc_offset = tsc_offset;
+
+	vcpu_debug(vcpu, "tsc page calibration time %llu vs. reftime %llu\n",
+		   calc_tsc_page_time(vcpu, &tsc_ref),
+		   get_time_ref_counter(kvm));
+
+	return write_tsc_page(kvm, gfn, &tsc_ref);
+}
+
 static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
 			     bool host)
 {
@@ -790,23 +889,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
 		mark_page_dirty(kvm, gfn);
 		break;
 	}
-	case HV_X64_MSR_REFERENCE_TSC: {
-		u64 gfn;
-		HV_REFERENCE_TSC_PAGE tsc_ref;
-
-		memset(&tsc_ref, 0, sizeof(tsc_ref));
+	case HV_X64_MSR_REFERENCE_TSC:
 		hv->hv_tsc_page = data;
-		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
-			break;
-		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
-		if (kvm_write_guest(
-				kvm,
-				gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
-				&tsc_ref, sizeof(tsc_ref)))
-			return 1;
-		mark_page_dirty(kvm, gfn);
+		if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
+			kvm_make_request(KVM_REQ_HV_TSC_PAGE, vcpu);
 		break;
-	}
 	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
 		return kvm_hv_msr_set_crash_data(vcpu,
 						 msr - HV_X64_MSR_CRASH_P0,
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 60eccd4..e7cc446 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -84,4 +84,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
 
 void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
 
+int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu);
+
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index aedb1a0..1d821f6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6500,6 +6500,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		 */
 		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
 			kvm_hv_process_stimers(vcpu);
+
+		/*
+		 * KVM_REQ_HV_TSC_PAGE setup should be after
+		 * KVM_REQ_CLOCK_UPDATE processing because
+		 * Hyper-V tsc page content depends
+		 * on kvm->arch.kvmclock_offset value.
+		 */
+		if (kvm_check_request(KVM_REQ_HV_TSC_PAGE, vcpu)) {
+			r = kvm_hv_setup_tsc_page(vcpu);
+			if (unlikely(r))
+				goto out;
+		}
 	}
 
 	/*
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6877b4d7e..93c9e25 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_HV_RESET          29
 #define KVM_REQ_HV_EXIT           30
 #define KVM_REQ_HV_STIMER         31
+#define KVM_REQ_HV_TSC_PAGE       32
 
 #define KVM_REQ_MAX               64
 
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup
  2015-12-24  9:33 ` [Qemu-devel] " Andrey Smetanin
@ 2016-01-05 21:48   ` Peter Hornyack
  -1 siblings, 0 replies; 42+ messages in thread
From: Peter Hornyack @ 2016-01-05 21:48 UTC (permalink / raw)
  To: Andrey Smetanin
  Cc: kvm list, Paolo Bonzini, Gleb Natapov, Roman Kagan,
	Denis V. Lunev, qemu-devel, Feng Min, Mike Waychison

On Thu, Dec 24, 2015 at 1:33 AM, Andrey Smetanin
<asmetanin@virtuozzo.com> wrote:
> Lately tsc page was implemented but filled with empty
> values. This patch setup tsc page scale and offset based
> on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.
>
> The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
> reads count to zero which potentially improves performance.
>
> The patch applies on top of
> 'kvm: Make vcpu->requests as 64 bit bitmap'
> previously sent.
>
> Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
> CC: Paolo Bonzini <pbonzini@redhat.com>
> CC: Gleb Natapov <gleb@kernel.org>
> CC: Roman Kagan <rkagan@virtuozzo.com>
> CC: Denis V. Lunev <den@openvz.org>
> CC: qemu-devel@nongnu.org
Reviewed-by: Peter Hornyack <peterhornyack@google.com>

>
> ---
>  arch/x86/kvm/hyperv.c    | 117 +++++++++++++++++++++++++++++++++++++++++------
>  arch/x86/kvm/hyperv.h    |   2 +
>  arch/x86/kvm/x86.c       |  12 +++++
>  include/linux/kvm_host.h |   1 +
>  4 files changed, 117 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index d50675a..504fdc7 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -753,6 +753,105 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
>         return 0;
>  }
>
> +static u64 calc_tsc_page_scale(u32 tsc_khz)
> +{
> +       /*
> +        * reftime (in 100ns) = tsc * tsc_scale / 2^64 + tsc_offset
> +        * so reftime_delta = (tsc_delta * tsc_scale) / 2^64
> +        * so tsc_scale = (2^64 * reftime_delta)/tsc_delta
> +        * so tsc_scale = (2^64 * 10 * 10^6) / tsc_hz = (2^64 * 10000) / tsc_khz
> +        * so tsc_scale = (2^63 * 2 * 10000) / tsc_khz
> +        */
> +       return mul_u64_u32_div(1ULL << 63, 2 * 10000, tsc_khz);
> +}
> +
> +static int write_tsc_page(struct kvm *kvm, u64 gfn,
> +                         PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +       if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
> +                           tsc_ref, sizeof(*tsc_ref)))
> +               return 1;
> +       mark_page_dirty(kvm, gfn);
> +       return 0;
> +}
> +
> +static int read_tsc_page(struct kvm *kvm, u64 gfn,
> +                        PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +       if (kvm_read_guest(kvm, gfn_to_gpa(gfn),
> +                          tsc_ref, sizeof(*tsc_ref)))
> +               return 1;
> +       return 0;
> +}
> +
> +static u64 calc_tsc_page_time(struct kvm_vcpu *vcpu,
> +                             PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +
> +       u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> +       return mul_u64_u64_shr(tsc, tsc_ref->tsc_scale, 64)
> +               + tsc_ref->tsc_offset;
> +}
> +
> +static int setup_blank_tsc_page(struct kvm_vcpu *vcpu, u64 gfn)
> +{
> +       HV_REFERENCE_TSC_PAGE tsc_ref;
> +
> +       memset(&tsc_ref, 0, sizeof(tsc_ref));
> +       return write_tsc_page(vcpu->kvm, gfn, &tsc_ref);
> +}
> +
> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
> +{
> +       struct kvm *kvm = vcpu->kvm;
> +       struct kvm_hv *hv = &kvm->arch.hyperv;
> +       HV_REFERENCE_TSC_PAGE tsc_ref;
> +       u32 tsc_khz;
> +       int r;
> +       u64 gfn, ref_time, tsc_scale, tsc_offset, tsc;
> +
> +       if (WARN_ON_ONCE(!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)))
> +               return -EINVAL;
> +
> +       gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> +       vcpu_debug(vcpu, "tsc page gfn 0x%llx\n", gfn);
> +
> +       tsc_khz = vcpu->arch.virtual_tsc_khz;
> +       if (!tsc_khz) {
> +               vcpu_unimpl(vcpu, "no tsc khz\n");
> +               return setup_blank_tsc_page(vcpu, gfn);
> +       }
> +
> +       r = read_tsc_page(kvm, gfn, &tsc_ref);
> +       if (r) {
> +               vcpu_err(vcpu, "can't access tsc page gfn 0x%llx\n", gfn);
> +               return r;
> +       }
> +
> +       tsc_scale = calc_tsc_page_scale(tsc_khz);
> +       ref_time = get_time_ref_counter(kvm);
> +       tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> +       /* tsc_offset = reftime - tsc * tsc_scale / 2^64 */
> +       tsc_offset = ref_time - mul_u64_u64_shr(tsc, tsc_scale, 64);
> +       vcpu_debug(vcpu, "tsc khz %u tsc %llu scale %llu offset %llu\n",
> +                  tsc_khz, tsc, tsc_scale, tsc_offset);
> +
> +       tsc_ref.tsc_sequence++;
> +       if (tsc_ref.tsc_sequence == 0)

Also avoid tsc_sequence == 0xffffffff here. In the Hyper-V TLFS 4.0
(Win2012 R2) 0xffffffff is the special sequence number to disable the
reference TSC page.

> +               tsc_ref.tsc_sequence = 1;
> +
> +       tsc_ref.tsc_scale = tsc_scale;
> +       tsc_ref.tsc_offset = tsc_offset;
> +
> +       vcpu_debug(vcpu, "tsc page calibration time %llu vs. reftime %llu\n",
> +                  calc_tsc_page_time(vcpu, &tsc_ref),
> +                  get_time_ref_counter(kvm));
> +
> +       return write_tsc_page(kvm, gfn, &tsc_ref);
> +}
> +
>  static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>                              bool host)
>  {
> @@ -790,23 +889,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>                 mark_page_dirty(kvm, gfn);
>                 break;
>         }
> -       case HV_X64_MSR_REFERENCE_TSC: {
> -               u64 gfn;
> -               HV_REFERENCE_TSC_PAGE tsc_ref;
> -
> -               memset(&tsc_ref, 0, sizeof(tsc_ref));
> +       case HV_X64_MSR_REFERENCE_TSC:
>                 hv->hv_tsc_page = data;
> -               if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
> -                       break;
> -               gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> -               if (kvm_write_guest(
> -                               kvm,
> -                               gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
> -                               &tsc_ref, sizeof(tsc_ref)))
> -                       return 1;
> -               mark_page_dirty(kvm, gfn);
> +               if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
> +                       kvm_make_request(KVM_REQ_HV_TSC_PAGE, vcpu);
>                 break;
> -       }
>         case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
>                 return kvm_hv_msr_set_crash_data(vcpu,
>                                                  msr - HV_X64_MSR_CRASH_P0,
> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
> index 60eccd4..e7cc446 100644
> --- a/arch/x86/kvm/hyperv.h
> +++ b/arch/x86/kvm/hyperv.h
> @@ -84,4 +84,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
>
>  void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
>
> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu);
> +
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index aedb1a0..1d821f6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6500,6 +6500,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>                  */
>                 if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
>                         kvm_hv_process_stimers(vcpu);
> +
> +               /*
> +                * KVM_REQ_HV_TSC_PAGE setup should be after
> +                * KVM_REQ_CLOCK_UPDATE processing because
> +                * Hyper-V tsc page content depends
> +                * on kvm->arch.kvmclock_offset value.
> +                */
> +               if (kvm_check_request(KVM_REQ_HV_TSC_PAGE, vcpu)) {
> +                       r = kvm_hv_setup_tsc_page(vcpu);
> +                       if (unlikely(r))
> +                               goto out;
> +               }
>         }
>
>         /*
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 6877b4d7e..93c9e25 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>  #define KVM_REQ_HV_RESET          29
>  #define KVM_REQ_HV_EXIT           30
>  #define KVM_REQ_HV_STIMER         31
> +#define KVM_REQ_HV_TSC_PAGE       32
>
>  #define KVM_REQ_MAX               64
>
> --
> 2.4.3

Looks good, nice helpful comments.

I will note that HV_X64_MSR_REFERENCE_TSC is a good candidate to be
handled in user space rather than in kvm. With my proposed MSR exits
patches that have had some discussion recently, this code to implement
the reference TSC page could be put in qemu instead of kvm.

Peter

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2016-01-05 21:48   ` Peter Hornyack
  0 siblings, 0 replies; 42+ messages in thread
From: Peter Hornyack @ 2016-01-05 21:48 UTC (permalink / raw)
  To: Andrey Smetanin
  Cc: kvm list, Gleb Natapov, qemu-devel, Mike Waychison, Roman Kagan,
	Denis V. Lunev, Paolo Bonzini, Feng Min

On Thu, Dec 24, 2015 at 1:33 AM, Andrey Smetanin
<asmetanin@virtuozzo.com> wrote:
> Lately tsc page was implemented but filled with empty
> values. This patch setup tsc page scale and offset based
> on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.
>
> The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
> reads count to zero which potentially improves performance.
>
> The patch applies on top of
> 'kvm: Make vcpu->requests as 64 bit bitmap'
> previously sent.
>
> Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
> CC: Paolo Bonzini <pbonzini@redhat.com>
> CC: Gleb Natapov <gleb@kernel.org>
> CC: Roman Kagan <rkagan@virtuozzo.com>
> CC: Denis V. Lunev <den@openvz.org>
> CC: qemu-devel@nongnu.org
Reviewed-by: Peter Hornyack <peterhornyack@google.com>

>
> ---
>  arch/x86/kvm/hyperv.c    | 117 +++++++++++++++++++++++++++++++++++++++++------
>  arch/x86/kvm/hyperv.h    |   2 +
>  arch/x86/kvm/x86.c       |  12 +++++
>  include/linux/kvm_host.h |   1 +
>  4 files changed, 117 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index d50675a..504fdc7 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -753,6 +753,105 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
>         return 0;
>  }
>
> +static u64 calc_tsc_page_scale(u32 tsc_khz)
> +{
> +       /*
> +        * reftime (in 100ns) = tsc * tsc_scale / 2^64 + tsc_offset
> +        * so reftime_delta = (tsc_delta * tsc_scale) / 2^64
> +        * so tsc_scale = (2^64 * reftime_delta)/tsc_delta
> +        * so tsc_scale = (2^64 * 10 * 10^6) / tsc_hz = (2^64 * 10000) / tsc_khz
> +        * so tsc_scale = (2^63 * 2 * 10000) / tsc_khz
> +        */
> +       return mul_u64_u32_div(1ULL << 63, 2 * 10000, tsc_khz);
> +}
> +
> +static int write_tsc_page(struct kvm *kvm, u64 gfn,
> +                         PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +       if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
> +                           tsc_ref, sizeof(*tsc_ref)))
> +               return 1;
> +       mark_page_dirty(kvm, gfn);
> +       return 0;
> +}
> +
> +static int read_tsc_page(struct kvm *kvm, u64 gfn,
> +                        PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +       if (kvm_read_guest(kvm, gfn_to_gpa(gfn),
> +                          tsc_ref, sizeof(*tsc_ref)))
> +               return 1;
> +       return 0;
> +}
> +
> +static u64 calc_tsc_page_time(struct kvm_vcpu *vcpu,
> +                             PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +
> +       u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> +       return mul_u64_u64_shr(tsc, tsc_ref->tsc_scale, 64)
> +               + tsc_ref->tsc_offset;
> +}
> +
> +static int setup_blank_tsc_page(struct kvm_vcpu *vcpu, u64 gfn)
> +{
> +       HV_REFERENCE_TSC_PAGE tsc_ref;
> +
> +       memset(&tsc_ref, 0, sizeof(tsc_ref));
> +       return write_tsc_page(vcpu->kvm, gfn, &tsc_ref);
> +}
> +
> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
> +{
> +       struct kvm *kvm = vcpu->kvm;
> +       struct kvm_hv *hv = &kvm->arch.hyperv;
> +       HV_REFERENCE_TSC_PAGE tsc_ref;
> +       u32 tsc_khz;
> +       int r;
> +       u64 gfn, ref_time, tsc_scale, tsc_offset, tsc;
> +
> +       if (WARN_ON_ONCE(!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)))
> +               return -EINVAL;
> +
> +       gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> +       vcpu_debug(vcpu, "tsc page gfn 0x%llx\n", gfn);
> +
> +       tsc_khz = vcpu->arch.virtual_tsc_khz;
> +       if (!tsc_khz) {
> +               vcpu_unimpl(vcpu, "no tsc khz\n");
> +               return setup_blank_tsc_page(vcpu, gfn);
> +       }
> +
> +       r = read_tsc_page(kvm, gfn, &tsc_ref);
> +       if (r) {
> +               vcpu_err(vcpu, "can't access tsc page gfn 0x%llx\n", gfn);
> +               return r;
> +       }
> +
> +       tsc_scale = calc_tsc_page_scale(tsc_khz);
> +       ref_time = get_time_ref_counter(kvm);
> +       tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> +       /* tsc_offset = reftime - tsc * tsc_scale / 2^64 */
> +       tsc_offset = ref_time - mul_u64_u64_shr(tsc, tsc_scale, 64);
> +       vcpu_debug(vcpu, "tsc khz %u tsc %llu scale %llu offset %llu\n",
> +                  tsc_khz, tsc, tsc_scale, tsc_offset);
> +
> +       tsc_ref.tsc_sequence++;
> +       if (tsc_ref.tsc_sequence == 0)

Also avoid tsc_sequence == 0xffffffff here. In the Hyper-V TLFS 4.0
(Win2012 R2) 0xffffffff is the special sequence number to disable the
reference TSC page.

> +               tsc_ref.tsc_sequence = 1;
> +
> +       tsc_ref.tsc_scale = tsc_scale;
> +       tsc_ref.tsc_offset = tsc_offset;
> +
> +       vcpu_debug(vcpu, "tsc page calibration time %llu vs. reftime %llu\n",
> +                  calc_tsc_page_time(vcpu, &tsc_ref),
> +                  get_time_ref_counter(kvm));
> +
> +       return write_tsc_page(kvm, gfn, &tsc_ref);
> +}
> +
>  static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>                              bool host)
>  {
> @@ -790,23 +889,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>                 mark_page_dirty(kvm, gfn);
>                 break;
>         }
> -       case HV_X64_MSR_REFERENCE_TSC: {
> -               u64 gfn;
> -               HV_REFERENCE_TSC_PAGE tsc_ref;
> -
> -               memset(&tsc_ref, 0, sizeof(tsc_ref));
> +       case HV_X64_MSR_REFERENCE_TSC:
>                 hv->hv_tsc_page = data;
> -               if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
> -                       break;
> -               gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> -               if (kvm_write_guest(
> -                               kvm,
> -                               gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
> -                               &tsc_ref, sizeof(tsc_ref)))
> -                       return 1;
> -               mark_page_dirty(kvm, gfn);
> +               if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
> +                       kvm_make_request(KVM_REQ_HV_TSC_PAGE, vcpu);
>                 break;
> -       }
>         case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
>                 return kvm_hv_msr_set_crash_data(vcpu,
>                                                  msr - HV_X64_MSR_CRASH_P0,
> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
> index 60eccd4..e7cc446 100644
> --- a/arch/x86/kvm/hyperv.h
> +++ b/arch/x86/kvm/hyperv.h
> @@ -84,4 +84,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
>
>  void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
>
> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu);
> +
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index aedb1a0..1d821f6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6500,6 +6500,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>                  */
>                 if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
>                         kvm_hv_process_stimers(vcpu);
> +
> +               /*
> +                * KVM_REQ_HV_TSC_PAGE setup should be after
> +                * KVM_REQ_CLOCK_UPDATE processing because
> +                * Hyper-V tsc page content depends
> +                * on kvm->arch.kvmclock_offset value.
> +                */
> +               if (kvm_check_request(KVM_REQ_HV_TSC_PAGE, vcpu)) {
> +                       r = kvm_hv_setup_tsc_page(vcpu);
> +                       if (unlikely(r))
> +                               goto out;
> +               }
>         }
>
>         /*
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 6877b4d7e..93c9e25 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>  #define KVM_REQ_HV_RESET          29
>  #define KVM_REQ_HV_EXIT           30
>  #define KVM_REQ_HV_STIMER         31
> +#define KVM_REQ_HV_TSC_PAGE       32
>
>  #define KVM_REQ_MAX               64
>
> --
> 2.4.3

Looks good, nice helpful comments.

I will note that HV_X64_MSR_REFERENCE_TSC is a good candidate to be
handled in user space rather than in kvm. With my proposed MSR exits
patches that have had some discussion recently, this code to implement
the reference TSC page could be put in qemu instead of kvm.

Peter

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup
  2016-01-05 21:48   ` [Qemu-devel] " Peter Hornyack
@ 2016-01-06  9:22     ` Andrey Smetanin
  -1 siblings, 0 replies; 42+ messages in thread
From: Andrey Smetanin @ 2016-01-06  9:22 UTC (permalink / raw)
  To: Peter Hornyack
  Cc: kvm list, Paolo Bonzini, Gleb Natapov, Roman Kagan,
	Denis V. Lunev, qemu-devel, Feng Min, Mike Waychison



On 01/06/2016 12:48 AM, Peter Hornyack wrote:
> On Thu, Dec 24, 2015 at 1:33 AM, Andrey Smetanin
> <asmetanin@virtuozzo.com> wrote:
>> Lately tsc page was implemented but filled with empty
>> values. This patch setup tsc page scale and offset based
>> on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.
>>
>> The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
>> reads count to zero which potentially improves performance.
>>
>> The patch applies on top of
>> 'kvm: Make vcpu->requests as 64 bit bitmap'
>> previously sent.
>>
>> Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
>> CC: Paolo Bonzini <pbonzini@redhat.com>
>> CC: Gleb Natapov <gleb@kernel.org>
>> CC: Roman Kagan <rkagan@virtuozzo.com>
>> CC: Denis V. Lunev <den@openvz.org>
>> CC: qemu-devel@nongnu.org
> Reviewed-by: Peter Hornyack <peterhornyack@google.com>
>
>>
>> ---
>>   arch/x86/kvm/hyperv.c    | 117 +++++++++++++++++++++++++++++++++++++++++------
>>   arch/x86/kvm/hyperv.h    |   2 +
>>   arch/x86/kvm/x86.c       |  12 +++++
>>   include/linux/kvm_host.h |   1 +
>>   4 files changed, 117 insertions(+), 15 deletions(-)
>>
>> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
>> index d50675a..504fdc7 100644
>> --- a/arch/x86/kvm/hyperv.c
>> +++ b/arch/x86/kvm/hyperv.c
>> @@ -753,6 +753,105 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
>>          return 0;
>>   }
>>
>> +static u64 calc_tsc_page_scale(u32 tsc_khz)
>> +{
>> +       /*
>> +        * reftime (in 100ns) = tsc * tsc_scale / 2^64 + tsc_offset
>> +        * so reftime_delta = (tsc_delta * tsc_scale) / 2^64
>> +        * so tsc_scale = (2^64 * reftime_delta)/tsc_delta
>> +        * so tsc_scale = (2^64 * 10 * 10^6) / tsc_hz = (2^64 * 10000) / tsc_khz
>> +        * so tsc_scale = (2^63 * 2 * 10000) / tsc_khz
>> +        */
>> +       return mul_u64_u32_div(1ULL << 63, 2 * 10000, tsc_khz);
>> +}
>> +
>> +static int write_tsc_page(struct kvm *kvm, u64 gfn,
>> +                         PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> +       if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
>> +                           tsc_ref, sizeof(*tsc_ref)))
>> +               return 1;
>> +       mark_page_dirty(kvm, gfn);
>> +       return 0;
>> +}
>> +
>> +static int read_tsc_page(struct kvm *kvm, u64 gfn,
>> +                        PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> +       if (kvm_read_guest(kvm, gfn_to_gpa(gfn),
>> +                          tsc_ref, sizeof(*tsc_ref)))
>> +               return 1;
>> +       return 0;
>> +}
>> +
>> +static u64 calc_tsc_page_time(struct kvm_vcpu *vcpu,
>> +                             PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> +
>> +       u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
>> +
>> +       return mul_u64_u64_shr(tsc, tsc_ref->tsc_scale, 64)
>> +               + tsc_ref->tsc_offset;
>> +}
>> +
>> +static int setup_blank_tsc_page(struct kvm_vcpu *vcpu, u64 gfn)
>> +{
>> +       HV_REFERENCE_TSC_PAGE tsc_ref;
>> +
>> +       memset(&tsc_ref, 0, sizeof(tsc_ref));
>> +       return write_tsc_page(vcpu->kvm, gfn, &tsc_ref);
>> +}
>> +
>> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
>> +{
>> +       struct kvm *kvm = vcpu->kvm;
>> +       struct kvm_hv *hv = &kvm->arch.hyperv;
>> +       HV_REFERENCE_TSC_PAGE tsc_ref;
>> +       u32 tsc_khz;
>> +       int r;
>> +       u64 gfn, ref_time, tsc_scale, tsc_offset, tsc;
>> +
>> +       if (WARN_ON_ONCE(!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)))
>> +               return -EINVAL;
>> +
>> +       gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
>> +       vcpu_debug(vcpu, "tsc page gfn 0x%llx\n", gfn);
>> +
>> +       tsc_khz = vcpu->arch.virtual_tsc_khz;
>> +       if (!tsc_khz) {
>> +               vcpu_unimpl(vcpu, "no tsc khz\n");
>> +               return setup_blank_tsc_page(vcpu, gfn);
>> +       }
>> +
>> +       r = read_tsc_page(kvm, gfn, &tsc_ref);
>> +       if (r) {
>> +               vcpu_err(vcpu, "can't access tsc page gfn 0x%llx\n", gfn);
>> +               return r;
>> +       }
>> +
>> +       tsc_scale = calc_tsc_page_scale(tsc_khz);
>> +       ref_time = get_time_ref_counter(kvm);
>> +       tsc = kvm_read_l1_tsc(vcpu, rdtsc());
>> +
>> +       /* tsc_offset = reftime - tsc * tsc_scale / 2^64 */
>> +       tsc_offset = ref_time - mul_u64_u64_shr(tsc, tsc_scale, 64);
>> +       vcpu_debug(vcpu, "tsc khz %u tsc %llu scale %llu offset %llu\n",
>> +                  tsc_khz, tsc, tsc_scale, tsc_offset);
>> +
>> +       tsc_ref.tsc_sequence++;
>> +       if (tsc_ref.tsc_sequence == 0)
>
> Also avoid tsc_sequence == 0xffffffff here. In the Hyper-V TLFS 4.0
> (Win2012 R2) 0xffffffff is the special sequence number to disable the
> reference TSC page.
>
we already discussed with Microsoft
that documentation contains wrong sequence number
- 0xffffffff (instead of 0). please take a look into details here:
https://lkml.org/lkml/2015/11/2/655
>> +               tsc_ref.tsc_sequence = 1;
>> +
>> +       tsc_ref.tsc_scale = tsc_scale;
>> +       tsc_ref.tsc_offset = tsc_offset;
>> +
>> +       vcpu_debug(vcpu, "tsc page calibration time %llu vs. reftime %llu\n",
>> +                  calc_tsc_page_time(vcpu, &tsc_ref),
>> +                  get_time_ref_counter(kvm));
>> +
>> +       return write_tsc_page(kvm, gfn, &tsc_ref);
>> +}
>> +
>>   static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>>                               bool host)
>>   {
>> @@ -790,23 +889,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>>                  mark_page_dirty(kvm, gfn);
>>                  break;
>>          }
>> -       case HV_X64_MSR_REFERENCE_TSC: {
>> -               u64 gfn;
>> -               HV_REFERENCE_TSC_PAGE tsc_ref;
>> -
>> -               memset(&tsc_ref, 0, sizeof(tsc_ref));
>> +       case HV_X64_MSR_REFERENCE_TSC:
>>                  hv->hv_tsc_page = data;
>> -               if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
>> -                       break;
>> -               gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
>> -               if (kvm_write_guest(
>> -                               kvm,
>> -                               gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
>> -                               &tsc_ref, sizeof(tsc_ref)))
>> -                       return 1;
>> -               mark_page_dirty(kvm, gfn);
>> +               if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
>> +                       kvm_make_request(KVM_REQ_HV_TSC_PAGE, vcpu);
>>                  break;
>> -       }
>>          case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
>>                  return kvm_hv_msr_set_crash_data(vcpu,
>>                                                   msr - HV_X64_MSR_CRASH_P0,
>> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
>> index 60eccd4..e7cc446 100644
>> --- a/arch/x86/kvm/hyperv.h
>> +++ b/arch/x86/kvm/hyperv.h
>> @@ -84,4 +84,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
>>
>>   void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
>>
>> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu);
>> +
>>   #endif
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index aedb1a0..1d821f6 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -6500,6 +6500,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>>                   */
>>                  if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
>>                          kvm_hv_process_stimers(vcpu);
>> +
>> +               /*
>> +                * KVM_REQ_HV_TSC_PAGE setup should be after
>> +                * KVM_REQ_CLOCK_UPDATE processing because
>> +                * Hyper-V tsc page content depends
>> +                * on kvm->arch.kvmclock_offset value.
>> +                */
>> +               if (kvm_check_request(KVM_REQ_HV_TSC_PAGE, vcpu)) {
>> +                       r = kvm_hv_setup_tsc_page(vcpu);
>> +                       if (unlikely(r))
>> +                               goto out;
>> +               }
>>          }
>>
>>          /*
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index 6877b4d7e..93c9e25 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>>   #define KVM_REQ_HV_RESET          29
>>   #define KVM_REQ_HV_EXIT           30
>>   #define KVM_REQ_HV_STIMER         31
>> +#define KVM_REQ_HV_TSC_PAGE       32
>>
>>   #define KVM_REQ_MAX               64
>>
>> --
>> 2.4.3
>
> Looks good, nice helpful comments.
Thank you for review.
>
> I will note that HV_X64_MSR_REFERENCE_TSC is a good candidate to be
> handled in user space rather than in kvm. With my proposed MSR exits
> patches that have had some discussion recently, this code to implement
> the reference TSC page could be put in qemu instead of kvm.
Yes, this functionality can be implemented in QEMU when MSR exits
API appears inside KVM code.
>
> Peter
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2016-01-06  9:22     ` Andrey Smetanin
  0 siblings, 0 replies; 42+ messages in thread
From: Andrey Smetanin @ 2016-01-06  9:22 UTC (permalink / raw)
  To: Peter Hornyack
  Cc: kvm list, Gleb Natapov, qemu-devel, Mike Waychison, Roman Kagan,
	Denis V. Lunev, Paolo Bonzini, Feng Min



On 01/06/2016 12:48 AM, Peter Hornyack wrote:
> On Thu, Dec 24, 2015 at 1:33 AM, Andrey Smetanin
> <asmetanin@virtuozzo.com> wrote:
>> Lately tsc page was implemented but filled with empty
>> values. This patch setup tsc page scale and offset based
>> on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.
>>
>> The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
>> reads count to zero which potentially improves performance.
>>
>> The patch applies on top of
>> 'kvm: Make vcpu->requests as 64 bit bitmap'
>> previously sent.
>>
>> Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
>> CC: Paolo Bonzini <pbonzini@redhat.com>
>> CC: Gleb Natapov <gleb@kernel.org>
>> CC: Roman Kagan <rkagan@virtuozzo.com>
>> CC: Denis V. Lunev <den@openvz.org>
>> CC: qemu-devel@nongnu.org
> Reviewed-by: Peter Hornyack <peterhornyack@google.com>
>
>>
>> ---
>>   arch/x86/kvm/hyperv.c    | 117 +++++++++++++++++++++++++++++++++++++++++------
>>   arch/x86/kvm/hyperv.h    |   2 +
>>   arch/x86/kvm/x86.c       |  12 +++++
>>   include/linux/kvm_host.h |   1 +
>>   4 files changed, 117 insertions(+), 15 deletions(-)
>>
>> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
>> index d50675a..504fdc7 100644
>> --- a/arch/x86/kvm/hyperv.c
>> +++ b/arch/x86/kvm/hyperv.c
>> @@ -753,6 +753,105 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
>>          return 0;
>>   }
>>
>> +static u64 calc_tsc_page_scale(u32 tsc_khz)
>> +{
>> +       /*
>> +        * reftime (in 100ns) = tsc * tsc_scale / 2^64 + tsc_offset
>> +        * so reftime_delta = (tsc_delta * tsc_scale) / 2^64
>> +        * so tsc_scale = (2^64 * reftime_delta)/tsc_delta
>> +        * so tsc_scale = (2^64 * 10 * 10^6) / tsc_hz = (2^64 * 10000) / tsc_khz
>> +        * so tsc_scale = (2^63 * 2 * 10000) / tsc_khz
>> +        */
>> +       return mul_u64_u32_div(1ULL << 63, 2 * 10000, tsc_khz);
>> +}
>> +
>> +static int write_tsc_page(struct kvm *kvm, u64 gfn,
>> +                         PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> +       if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
>> +                           tsc_ref, sizeof(*tsc_ref)))
>> +               return 1;
>> +       mark_page_dirty(kvm, gfn);
>> +       return 0;
>> +}
>> +
>> +static int read_tsc_page(struct kvm *kvm, u64 gfn,
>> +                        PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> +       if (kvm_read_guest(kvm, gfn_to_gpa(gfn),
>> +                          tsc_ref, sizeof(*tsc_ref)))
>> +               return 1;
>> +       return 0;
>> +}
>> +
>> +static u64 calc_tsc_page_time(struct kvm_vcpu *vcpu,
>> +                             PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> +
>> +       u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
>> +
>> +       return mul_u64_u64_shr(tsc, tsc_ref->tsc_scale, 64)
>> +               + tsc_ref->tsc_offset;
>> +}
>> +
>> +static int setup_blank_tsc_page(struct kvm_vcpu *vcpu, u64 gfn)
>> +{
>> +       HV_REFERENCE_TSC_PAGE tsc_ref;
>> +
>> +       memset(&tsc_ref, 0, sizeof(tsc_ref));
>> +       return write_tsc_page(vcpu->kvm, gfn, &tsc_ref);
>> +}
>> +
>> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
>> +{
>> +       struct kvm *kvm = vcpu->kvm;
>> +       struct kvm_hv *hv = &kvm->arch.hyperv;
>> +       HV_REFERENCE_TSC_PAGE tsc_ref;
>> +       u32 tsc_khz;
>> +       int r;
>> +       u64 gfn, ref_time, tsc_scale, tsc_offset, tsc;
>> +
>> +       if (WARN_ON_ONCE(!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)))
>> +               return -EINVAL;
>> +
>> +       gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
>> +       vcpu_debug(vcpu, "tsc page gfn 0x%llx\n", gfn);
>> +
>> +       tsc_khz = vcpu->arch.virtual_tsc_khz;
>> +       if (!tsc_khz) {
>> +               vcpu_unimpl(vcpu, "no tsc khz\n");
>> +               return setup_blank_tsc_page(vcpu, gfn);
>> +       }
>> +
>> +       r = read_tsc_page(kvm, gfn, &tsc_ref);
>> +       if (r) {
>> +               vcpu_err(vcpu, "can't access tsc page gfn 0x%llx\n", gfn);
>> +               return r;
>> +       }
>> +
>> +       tsc_scale = calc_tsc_page_scale(tsc_khz);
>> +       ref_time = get_time_ref_counter(kvm);
>> +       tsc = kvm_read_l1_tsc(vcpu, rdtsc());
>> +
>> +       /* tsc_offset = reftime - tsc * tsc_scale / 2^64 */
>> +       tsc_offset = ref_time - mul_u64_u64_shr(tsc, tsc_scale, 64);
>> +       vcpu_debug(vcpu, "tsc khz %u tsc %llu scale %llu offset %llu\n",
>> +                  tsc_khz, tsc, tsc_scale, tsc_offset);
>> +
>> +       tsc_ref.tsc_sequence++;
>> +       if (tsc_ref.tsc_sequence == 0)
>
> Also avoid tsc_sequence == 0xffffffff here. In the Hyper-V TLFS 4.0
> (Win2012 R2) 0xffffffff is the special sequence number to disable the
> reference TSC page.
>
we already discussed with Microsoft
that documentation contains wrong sequence number
- 0xffffffff (instead of 0). please take a look into details here:
https://lkml.org/lkml/2015/11/2/655
>> +               tsc_ref.tsc_sequence = 1;
>> +
>> +       tsc_ref.tsc_scale = tsc_scale;
>> +       tsc_ref.tsc_offset = tsc_offset;
>> +
>> +       vcpu_debug(vcpu, "tsc page calibration time %llu vs. reftime %llu\n",
>> +                  calc_tsc_page_time(vcpu, &tsc_ref),
>> +                  get_time_ref_counter(kvm));
>> +
>> +       return write_tsc_page(kvm, gfn, &tsc_ref);
>> +}
>> +
>>   static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>>                               bool host)
>>   {
>> @@ -790,23 +889,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>>                  mark_page_dirty(kvm, gfn);
>>                  break;
>>          }
>> -       case HV_X64_MSR_REFERENCE_TSC: {
>> -               u64 gfn;
>> -               HV_REFERENCE_TSC_PAGE tsc_ref;
>> -
>> -               memset(&tsc_ref, 0, sizeof(tsc_ref));
>> +       case HV_X64_MSR_REFERENCE_TSC:
>>                  hv->hv_tsc_page = data;
>> -               if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
>> -                       break;
>> -               gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
>> -               if (kvm_write_guest(
>> -                               kvm,
>> -                               gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
>> -                               &tsc_ref, sizeof(tsc_ref)))
>> -                       return 1;
>> -               mark_page_dirty(kvm, gfn);
>> +               if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
>> +                       kvm_make_request(KVM_REQ_HV_TSC_PAGE, vcpu);
>>                  break;
>> -       }
>>          case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
>>                  return kvm_hv_msr_set_crash_data(vcpu,
>>                                                   msr - HV_X64_MSR_CRASH_P0,
>> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
>> index 60eccd4..e7cc446 100644
>> --- a/arch/x86/kvm/hyperv.h
>> +++ b/arch/x86/kvm/hyperv.h
>> @@ -84,4 +84,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
>>
>>   void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
>>
>> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu);
>> +
>>   #endif
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index aedb1a0..1d821f6 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -6500,6 +6500,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>>                   */
>>                  if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
>>                          kvm_hv_process_stimers(vcpu);
>> +
>> +               /*
>> +                * KVM_REQ_HV_TSC_PAGE setup should be after
>> +                * KVM_REQ_CLOCK_UPDATE processing because
>> +                * Hyper-V tsc page content depends
>> +                * on kvm->arch.kvmclock_offset value.
>> +                */
>> +               if (kvm_check_request(KVM_REQ_HV_TSC_PAGE, vcpu)) {
>> +                       r = kvm_hv_setup_tsc_page(vcpu);
>> +                       if (unlikely(r))
>> +                               goto out;
>> +               }
>>          }
>>
>>          /*
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index 6877b4d7e..93c9e25 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>>   #define KVM_REQ_HV_RESET          29
>>   #define KVM_REQ_HV_EXIT           30
>>   #define KVM_REQ_HV_STIMER         31
>> +#define KVM_REQ_HV_TSC_PAGE       32
>>
>>   #define KVM_REQ_MAX               64
>>
>> --
>> 2.4.3
>
> Looks good, nice helpful comments.
Thank you for review.
>
> I will note that HV_X64_MSR_REFERENCE_TSC is a good candidate to be
> handled in user space rather than in kvm. With my proposed MSR exits
> patches that have had some discussion recently, this code to implement
> the reference TSC page could be put in qemu instead of kvm.
Yes, this functionality can be implemented in QEMU when MSR exits
API appears inside KVM code.
>
> Peter
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup
  2015-12-24  9:33 ` [Qemu-devel] " Andrey Smetanin
@ 2016-01-12  7:43   ` Andrey Smetanin
  -1 siblings, 0 replies; 42+ messages in thread
From: Andrey Smetanin @ 2016-01-12  7:43 UTC (permalink / raw)
  To: kvm; +Cc: Paolo Bonzini, Gleb Natapov, Roman Kagan, Denis V. Lunev, qemu-devel

ping
On 12/24/2015 12:33 PM, Andrey Smetanin wrote:
> Lately tsc page was implemented but filled with empty
> values. This patch setup tsc page scale and offset based
> on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.
>
> The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
> reads count to zero which potentially improves performance.
>
> The patch applies on top of
> 'kvm: Make vcpu->requests as 64 bit bitmap'
> previously sent.
>
> Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
> CC: Paolo Bonzini <pbonzini@redhat.com>
> CC: Gleb Natapov <gleb@kernel.org>
> CC: Roman Kagan <rkagan@virtuozzo.com>
> CC: Denis V. Lunev <den@openvz.org>
> CC: qemu-devel@nongnu.org
>
> ---
>   arch/x86/kvm/hyperv.c    | 117 +++++++++++++++++++++++++++++++++++++++++------
>   arch/x86/kvm/hyperv.h    |   2 +
>   arch/x86/kvm/x86.c       |  12 +++++
>   include/linux/kvm_host.h |   1 +
>   4 files changed, 117 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index d50675a..504fdc7 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -753,6 +753,105 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
>   	return 0;
>   }
>   
> +static u64 calc_tsc_page_scale(u32 tsc_khz)
> +{
> +	/*
> +	 * reftime (in 100ns) = tsc * tsc_scale / 2^64 + tsc_offset
> +	 * so reftime_delta = (tsc_delta * tsc_scale) / 2^64
> +	 * so tsc_scale = (2^64 * reftime_delta)/tsc_delta
> +	 * so tsc_scale = (2^64 * 10 * 10^6) / tsc_hz = (2^64 * 10000) / tsc_khz
> +	 * so tsc_scale = (2^63 * 2 * 10000) / tsc_khz
> +	 */
> +	return mul_u64_u32_div(1ULL << 63, 2 * 10000, tsc_khz);
> +}
> +
> +static int write_tsc_page(struct kvm *kvm, u64 gfn,
> +			  PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
> +			    tsc_ref, sizeof(*tsc_ref)))
> +		return 1;
> +	mark_page_dirty(kvm, gfn);
> +	return 0;
> +}
> +
> +static int read_tsc_page(struct kvm *kvm, u64 gfn,
> +			 PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +	if (kvm_read_guest(kvm, gfn_to_gpa(gfn),
> +			   tsc_ref, sizeof(*tsc_ref)))
> +		return 1;
> +	return 0;
> +}
> +
> +static u64 calc_tsc_page_time(struct kvm_vcpu *vcpu,
> +			      PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +
> +	u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> +	return mul_u64_u64_shr(tsc, tsc_ref->tsc_scale, 64)
> +		+ tsc_ref->tsc_offset;
> +}
> +
> +static int setup_blank_tsc_page(struct kvm_vcpu *vcpu, u64 gfn)
> +{
> +	HV_REFERENCE_TSC_PAGE tsc_ref;
> +
> +	memset(&tsc_ref, 0, sizeof(tsc_ref));
> +	return write_tsc_page(vcpu->kvm, gfn, &tsc_ref);
> +}
> +
> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm_hv *hv = &kvm->arch.hyperv;
> +	HV_REFERENCE_TSC_PAGE tsc_ref;
> +	u32 tsc_khz;
> +	int r;
> +	u64 gfn, ref_time, tsc_scale, tsc_offset, tsc;
> +
> +	if (WARN_ON_ONCE(!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)))
> +		return -EINVAL;
> +
> +	gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> +	vcpu_debug(vcpu, "tsc page gfn 0x%llx\n", gfn);
> +
> +	tsc_khz = vcpu->arch.virtual_tsc_khz;
> +	if (!tsc_khz) {
> +		vcpu_unimpl(vcpu, "no tsc khz\n");
> +		return setup_blank_tsc_page(vcpu, gfn);
> +	}
> +
> +	r = read_tsc_page(kvm, gfn, &tsc_ref);
> +	if (r) {
> +		vcpu_err(vcpu, "can't access tsc page gfn 0x%llx\n", gfn);
> +		return r;
> +	}
> +
> +	tsc_scale = calc_tsc_page_scale(tsc_khz);
> +	ref_time = get_time_ref_counter(kvm);
> +	tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> +	/* tsc_offset = reftime - tsc * tsc_scale / 2^64 */
> +	tsc_offset = ref_time - mul_u64_u64_shr(tsc, tsc_scale, 64);
> +	vcpu_debug(vcpu, "tsc khz %u tsc %llu scale %llu offset %llu\n",
> +		   tsc_khz, tsc, tsc_scale, tsc_offset);
> +
> +	tsc_ref.tsc_sequence++;
> +	if (tsc_ref.tsc_sequence == 0)
> +		tsc_ref.tsc_sequence = 1;
> +
> +	tsc_ref.tsc_scale = tsc_scale;
> +	tsc_ref.tsc_offset = tsc_offset;
> +
> +	vcpu_debug(vcpu, "tsc page calibration time %llu vs. reftime %llu\n",
> +		   calc_tsc_page_time(vcpu, &tsc_ref),
> +		   get_time_ref_counter(kvm));
> +
> +	return write_tsc_page(kvm, gfn, &tsc_ref);
> +}
> +
>   static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>   			     bool host)
>   {
> @@ -790,23 +889,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>   		mark_page_dirty(kvm, gfn);
>   		break;
>   	}
> -	case HV_X64_MSR_REFERENCE_TSC: {
> -		u64 gfn;
> -		HV_REFERENCE_TSC_PAGE tsc_ref;
> -
> -		memset(&tsc_ref, 0, sizeof(tsc_ref));
> +	case HV_X64_MSR_REFERENCE_TSC:
>   		hv->hv_tsc_page = data;
> -		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
> -			break;
> -		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> -		if (kvm_write_guest(
> -				kvm,
> -				gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
> -				&tsc_ref, sizeof(tsc_ref)))
> -			return 1;
> -		mark_page_dirty(kvm, gfn);
> +		if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
> +			kvm_make_request(KVM_REQ_HV_TSC_PAGE, vcpu);
>   		break;
> -	}
>   	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
>   		return kvm_hv_msr_set_crash_data(vcpu,
>   						 msr - HV_X64_MSR_CRASH_P0,
> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
> index 60eccd4..e7cc446 100644
> --- a/arch/x86/kvm/hyperv.h
> +++ b/arch/x86/kvm/hyperv.h
> @@ -84,4 +84,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
>   
>   void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
>   
> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu);
> +
>   #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index aedb1a0..1d821f6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6500,6 +6500,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>   		 */
>   		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
>   			kvm_hv_process_stimers(vcpu);
> +
> +		/*
> +		 * KVM_REQ_HV_TSC_PAGE setup should be after
> +		 * KVM_REQ_CLOCK_UPDATE processing because
> +		 * Hyper-V tsc page content depends
> +		 * on kvm->arch.kvmclock_offset value.
> +		 */
> +		if (kvm_check_request(KVM_REQ_HV_TSC_PAGE, vcpu)) {
> +			r = kvm_hv_setup_tsc_page(vcpu);
> +			if (unlikely(r))
> +				goto out;
> +		}
>   	}
>   
>   	/*
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 6877b4d7e..93c9e25 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>   #define KVM_REQ_HV_RESET          29
>   #define KVM_REQ_HV_EXIT           30
>   #define KVM_REQ_HV_STIMER         31
> +#define KVM_REQ_HV_TSC_PAGE       32
>   
>   #define KVM_REQ_MAX               64
>   


^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2016-01-12  7:43   ` Andrey Smetanin
  0 siblings, 0 replies; 42+ messages in thread
From: Andrey Smetanin @ 2016-01-12  7:43 UTC (permalink / raw)
  To: kvm; +Cc: Gleb Natapov, Paolo Bonzini, qemu-devel, Roman Kagan, Denis V. Lunev

ping
On 12/24/2015 12:33 PM, Andrey Smetanin wrote:
> Lately tsc page was implemented but filled with empty
> values. This patch setup tsc page scale and offset based
> on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.
>
> The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
> reads count to zero which potentially improves performance.
>
> The patch applies on top of
> 'kvm: Make vcpu->requests as 64 bit bitmap'
> previously sent.
>
> Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
> CC: Paolo Bonzini <pbonzini@redhat.com>
> CC: Gleb Natapov <gleb@kernel.org>
> CC: Roman Kagan <rkagan@virtuozzo.com>
> CC: Denis V. Lunev <den@openvz.org>
> CC: qemu-devel@nongnu.org
>
> ---
>   arch/x86/kvm/hyperv.c    | 117 +++++++++++++++++++++++++++++++++++++++++------
>   arch/x86/kvm/hyperv.h    |   2 +
>   arch/x86/kvm/x86.c       |  12 +++++
>   include/linux/kvm_host.h |   1 +
>   4 files changed, 117 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index d50675a..504fdc7 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -753,6 +753,105 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
>   	return 0;
>   }
>   
> +static u64 calc_tsc_page_scale(u32 tsc_khz)
> +{
> +	/*
> +	 * reftime (in 100ns) = tsc * tsc_scale / 2^64 + tsc_offset
> +	 * so reftime_delta = (tsc_delta * tsc_scale) / 2^64
> +	 * so tsc_scale = (2^64 * reftime_delta)/tsc_delta
> +	 * so tsc_scale = (2^64 * 10 * 10^6) / tsc_hz = (2^64 * 10000) / tsc_khz
> +	 * so tsc_scale = (2^63 * 2 * 10000) / tsc_khz
> +	 */
> +	return mul_u64_u32_div(1ULL << 63, 2 * 10000, tsc_khz);
> +}
> +
> +static int write_tsc_page(struct kvm *kvm, u64 gfn,
> +			  PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
> +			    tsc_ref, sizeof(*tsc_ref)))
> +		return 1;
> +	mark_page_dirty(kvm, gfn);
> +	return 0;
> +}
> +
> +static int read_tsc_page(struct kvm *kvm, u64 gfn,
> +			 PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +	if (kvm_read_guest(kvm, gfn_to_gpa(gfn),
> +			   tsc_ref, sizeof(*tsc_ref)))
> +		return 1;
> +	return 0;
> +}
> +
> +static u64 calc_tsc_page_time(struct kvm_vcpu *vcpu,
> +			      PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +
> +	u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> +	return mul_u64_u64_shr(tsc, tsc_ref->tsc_scale, 64)
> +		+ tsc_ref->tsc_offset;
> +}
> +
> +static int setup_blank_tsc_page(struct kvm_vcpu *vcpu, u64 gfn)
> +{
> +	HV_REFERENCE_TSC_PAGE tsc_ref;
> +
> +	memset(&tsc_ref, 0, sizeof(tsc_ref));
> +	return write_tsc_page(vcpu->kvm, gfn, &tsc_ref);
> +}
> +
> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm_hv *hv = &kvm->arch.hyperv;
> +	HV_REFERENCE_TSC_PAGE tsc_ref;
> +	u32 tsc_khz;
> +	int r;
> +	u64 gfn, ref_time, tsc_scale, tsc_offset, tsc;
> +
> +	if (WARN_ON_ONCE(!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)))
> +		return -EINVAL;
> +
> +	gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> +	vcpu_debug(vcpu, "tsc page gfn 0x%llx\n", gfn);
> +
> +	tsc_khz = vcpu->arch.virtual_tsc_khz;
> +	if (!tsc_khz) {
> +		vcpu_unimpl(vcpu, "no tsc khz\n");
> +		return setup_blank_tsc_page(vcpu, gfn);
> +	}
> +
> +	r = read_tsc_page(kvm, gfn, &tsc_ref);
> +	if (r) {
> +		vcpu_err(vcpu, "can't access tsc page gfn 0x%llx\n", gfn);
> +		return r;
> +	}
> +
> +	tsc_scale = calc_tsc_page_scale(tsc_khz);
> +	ref_time = get_time_ref_counter(kvm);
> +	tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> +	/* tsc_offset = reftime - tsc * tsc_scale / 2^64 */
> +	tsc_offset = ref_time - mul_u64_u64_shr(tsc, tsc_scale, 64);
> +	vcpu_debug(vcpu, "tsc khz %u tsc %llu scale %llu offset %llu\n",
> +		   tsc_khz, tsc, tsc_scale, tsc_offset);
> +
> +	tsc_ref.tsc_sequence++;
> +	if (tsc_ref.tsc_sequence == 0)
> +		tsc_ref.tsc_sequence = 1;
> +
> +	tsc_ref.tsc_scale = tsc_scale;
> +	tsc_ref.tsc_offset = tsc_offset;
> +
> +	vcpu_debug(vcpu, "tsc page calibration time %llu vs. reftime %llu\n",
> +		   calc_tsc_page_time(vcpu, &tsc_ref),
> +		   get_time_ref_counter(kvm));
> +
> +	return write_tsc_page(kvm, gfn, &tsc_ref);
> +}
> +
>   static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>   			     bool host)
>   {
> @@ -790,23 +889,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>   		mark_page_dirty(kvm, gfn);
>   		break;
>   	}
> -	case HV_X64_MSR_REFERENCE_TSC: {
> -		u64 gfn;
> -		HV_REFERENCE_TSC_PAGE tsc_ref;
> -
> -		memset(&tsc_ref, 0, sizeof(tsc_ref));
> +	case HV_X64_MSR_REFERENCE_TSC:
>   		hv->hv_tsc_page = data;
> -		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
> -			break;
> -		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> -		if (kvm_write_guest(
> -				kvm,
> -				gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
> -				&tsc_ref, sizeof(tsc_ref)))
> -			return 1;
> -		mark_page_dirty(kvm, gfn);
> +		if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
> +			kvm_make_request(KVM_REQ_HV_TSC_PAGE, vcpu);
>   		break;
> -	}
>   	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
>   		return kvm_hv_msr_set_crash_data(vcpu,
>   						 msr - HV_X64_MSR_CRASH_P0,
> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
> index 60eccd4..e7cc446 100644
> --- a/arch/x86/kvm/hyperv.h
> +++ b/arch/x86/kvm/hyperv.h
> @@ -84,4 +84,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
>   
>   void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
>   
> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu);
> +
>   #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index aedb1a0..1d821f6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6500,6 +6500,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>   		 */
>   		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
>   			kvm_hv_process_stimers(vcpu);
> +
> +		/*
> +		 * KVM_REQ_HV_TSC_PAGE setup should be after
> +		 * KVM_REQ_CLOCK_UPDATE processing because
> +		 * Hyper-V tsc page content depends
> +		 * on kvm->arch.kvmclock_offset value.
> +		 */
> +		if (kvm_check_request(KVM_REQ_HV_TSC_PAGE, vcpu)) {
> +			r = kvm_hv_setup_tsc_page(vcpu);
> +			if (unlikely(r))
> +				goto out;
> +		}
>   	}
>   
>   	/*
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 6877b4d7e..93c9e25 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>   #define KVM_REQ_HV_RESET          29
>   #define KVM_REQ_HV_EXIT           30
>   #define KVM_REQ_HV_STIMER         31
> +#define KVM_REQ_HV_TSC_PAGE       32
>   
>   #define KVM_REQ_MAX               64
>   

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup
  2015-12-24  9:33 ` [Qemu-devel] " Andrey Smetanin
@ 2016-01-19  7:48   ` Denis V. Lunev
  -1 siblings, 0 replies; 42+ messages in thread
From: Denis V. Lunev @ 2016-01-19  7:48 UTC (permalink / raw)
  To: Andrey Smetanin, kvm; +Cc: Paolo Bonzini, Gleb Natapov, Roman Kagan, qemu-devel

On 12/24/2015 12:33 PM, Andrey Smetanin wrote:
> Lately tsc page was implemented but filled with empty
> values. This patch setup tsc page scale and offset based
> on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.
>
> The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
> reads count to zero which potentially improves performance.
>
> The patch applies on top of
> 'kvm: Make vcpu->requests as 64 bit bitmap'
> previously sent.
>
> Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
> CC: Paolo Bonzini <pbonzini@redhat.com>
> CC: Gleb Natapov <gleb@kernel.org>
> CC: Roman Kagan <rkagan@virtuozzo.com>
> CC: Denis V. Lunev <den@openvz.org>
> CC: qemu-devel@nongnu.org
>
> ---
>   arch/x86/kvm/hyperv.c    | 117 +++++++++++++++++++++++++++++++++++++++++------
>   arch/x86/kvm/hyperv.h    |   2 +
>   arch/x86/kvm/x86.c       |  12 +++++
>   include/linux/kvm_host.h |   1 +
>   4 files changed, 117 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index d50675a..504fdc7 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -753,6 +753,105 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
>   	return 0;
>   }
>   
> +static u64 calc_tsc_page_scale(u32 tsc_khz)
> +{
> +	/*
> +	 * reftime (in 100ns) = tsc * tsc_scale / 2^64 + tsc_offset
> +	 * so reftime_delta = (tsc_delta * tsc_scale) / 2^64
> +	 * so tsc_scale = (2^64 * reftime_delta)/tsc_delta
> +	 * so tsc_scale = (2^64 * 10 * 10^6) / tsc_hz = (2^64 * 10000) / tsc_khz
> +	 * so tsc_scale = (2^63 * 2 * 10000) / tsc_khz
> +	 */
> +	return mul_u64_u32_div(1ULL << 63, 2 * 10000, tsc_khz);
> +}
> +
> +static int write_tsc_page(struct kvm *kvm, u64 gfn,
> +			  PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
> +			    tsc_ref, sizeof(*tsc_ref)))
> +		return 1;
> +	mark_page_dirty(kvm, gfn);
> +	return 0;
> +}
> +
> +static int read_tsc_page(struct kvm *kvm, u64 gfn,
> +			 PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +	if (kvm_read_guest(kvm, gfn_to_gpa(gfn),
> +			   tsc_ref, sizeof(*tsc_ref)))
> +		return 1;
> +	return 0;
> +}
> +
> +static u64 calc_tsc_page_time(struct kvm_vcpu *vcpu,
> +			      PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +
> +	u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> +	return mul_u64_u64_shr(tsc, tsc_ref->tsc_scale, 64)
> +		+ tsc_ref->tsc_offset;
> +}
> +
> +static int setup_blank_tsc_page(struct kvm_vcpu *vcpu, u64 gfn)
> +{
> +	HV_REFERENCE_TSC_PAGE tsc_ref;
> +
> +	memset(&tsc_ref, 0, sizeof(tsc_ref));
> +	return write_tsc_page(vcpu->kvm, gfn, &tsc_ref);
> +}
> +
> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm_hv *hv = &kvm->arch.hyperv;
> +	HV_REFERENCE_TSC_PAGE tsc_ref;
> +	u32 tsc_khz;
> +	int r;
> +	u64 gfn, ref_time, tsc_scale, tsc_offset, tsc;
> +
> +	if (WARN_ON_ONCE(!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)))
> +		return -EINVAL;
> +
> +	gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> +	vcpu_debug(vcpu, "tsc page gfn 0x%llx\n", gfn);
> +
> +	tsc_khz = vcpu->arch.virtual_tsc_khz;
> +	if (!tsc_khz) {
> +		vcpu_unimpl(vcpu, "no tsc khz\n");
> +		return setup_blank_tsc_page(vcpu, gfn);
> +	}
> +
> +	r = read_tsc_page(kvm, gfn, &tsc_ref);
> +	if (r) {
> +		vcpu_err(vcpu, "can't access tsc page gfn 0x%llx\n", gfn);
> +		return r;
> +	}
> +
> +	tsc_scale = calc_tsc_page_scale(tsc_khz);
> +	ref_time = get_time_ref_counter(kvm);
> +	tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> +	/* tsc_offset = reftime - tsc * tsc_scale / 2^64 */
> +	tsc_offset = ref_time - mul_u64_u64_shr(tsc, tsc_scale, 64);
> +	vcpu_debug(vcpu, "tsc khz %u tsc %llu scale %llu offset %llu\n",
> +		   tsc_khz, tsc, tsc_scale, tsc_offset);
> +
> +	tsc_ref.tsc_sequence++;
> +	if (tsc_ref.tsc_sequence == 0)
> +		tsc_ref.tsc_sequence = 1;
> +
> +	tsc_ref.tsc_scale = tsc_scale;
> +	tsc_ref.tsc_offset = tsc_offset;
> +
> +	vcpu_debug(vcpu, "tsc page calibration time %llu vs. reftime %llu\n",
> +		   calc_tsc_page_time(vcpu, &tsc_ref),
> +		   get_time_ref_counter(kvm));
> +
> +	return write_tsc_page(kvm, gfn, &tsc_ref);
> +}
> +
>   static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>   			     bool host)
>   {
> @@ -790,23 +889,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>   		mark_page_dirty(kvm, gfn);
>   		break;
>   	}
> -	case HV_X64_MSR_REFERENCE_TSC: {
> -		u64 gfn;
> -		HV_REFERENCE_TSC_PAGE tsc_ref;
> -
> -		memset(&tsc_ref, 0, sizeof(tsc_ref));
> +	case HV_X64_MSR_REFERENCE_TSC:
>   		hv->hv_tsc_page = data;
> -		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
> -			break;
> -		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> -		if (kvm_write_guest(
> -				kvm,
> -				gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
> -				&tsc_ref, sizeof(tsc_ref)))
> -			return 1;
> -		mark_page_dirty(kvm, gfn);
> +		if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
> +			kvm_make_request(KVM_REQ_HV_TSC_PAGE, vcpu);
>   		break;
> -	}
>   	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
>   		return kvm_hv_msr_set_crash_data(vcpu,
>   						 msr - HV_X64_MSR_CRASH_P0,
> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
> index 60eccd4..e7cc446 100644
> --- a/arch/x86/kvm/hyperv.h
> +++ b/arch/x86/kvm/hyperv.h
> @@ -84,4 +84,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
>   
>   void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
>   
> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu);
> +
>   #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index aedb1a0..1d821f6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6500,6 +6500,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>   		 */
>   		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
>   			kvm_hv_process_stimers(vcpu);
> +
> +		/*
> +		 * KVM_REQ_HV_TSC_PAGE setup should be after
> +		 * KVM_REQ_CLOCK_UPDATE processing because
> +		 * Hyper-V tsc page content depends
> +		 * on kvm->arch.kvmclock_offset value.
> +		 */
> +		if (kvm_check_request(KVM_REQ_HV_TSC_PAGE, vcpu)) {
> +			r = kvm_hv_setup_tsc_page(vcpu);
> +			if (unlikely(r))
> +				goto out;
> +		}
>   	}
>   
>   	/*
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 6877b4d7e..93c9e25 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>   #define KVM_REQ_HV_RESET          29
>   #define KVM_REQ_HV_EXIT           30
>   #define KVM_REQ_HV_STIMER         31
> +#define KVM_REQ_HV_TSC_PAGE       32
>   
>   #define KVM_REQ_MAX               64
>   
ping

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2016-01-19  7:48   ` Denis V. Lunev
  0 siblings, 0 replies; 42+ messages in thread
From: Denis V. Lunev @ 2016-01-19  7:48 UTC (permalink / raw)
  To: Andrey Smetanin, kvm; +Cc: Gleb Natapov, Paolo Bonzini, Roman Kagan, qemu-devel

On 12/24/2015 12:33 PM, Andrey Smetanin wrote:
> Lately tsc page was implemented but filled with empty
> values. This patch setup tsc page scale and offset based
> on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.
>
> The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
> reads count to zero which potentially improves performance.
>
> The patch applies on top of
> 'kvm: Make vcpu->requests as 64 bit bitmap'
> previously sent.
>
> Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
> CC: Paolo Bonzini <pbonzini@redhat.com>
> CC: Gleb Natapov <gleb@kernel.org>
> CC: Roman Kagan <rkagan@virtuozzo.com>
> CC: Denis V. Lunev <den@openvz.org>
> CC: qemu-devel@nongnu.org
>
> ---
>   arch/x86/kvm/hyperv.c    | 117 +++++++++++++++++++++++++++++++++++++++++------
>   arch/x86/kvm/hyperv.h    |   2 +
>   arch/x86/kvm/x86.c       |  12 +++++
>   include/linux/kvm_host.h |   1 +
>   4 files changed, 117 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index d50675a..504fdc7 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -753,6 +753,105 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
>   	return 0;
>   }
>   
> +static u64 calc_tsc_page_scale(u32 tsc_khz)
> +{
> +	/*
> +	 * reftime (in 100ns) = tsc * tsc_scale / 2^64 + tsc_offset
> +	 * so reftime_delta = (tsc_delta * tsc_scale) / 2^64
> +	 * so tsc_scale = (2^64 * reftime_delta)/tsc_delta
> +	 * so tsc_scale = (2^64 * 10 * 10^6) / tsc_hz = (2^64 * 10000) / tsc_khz
> +	 * so tsc_scale = (2^63 * 2 * 10000) / tsc_khz
> +	 */
> +	return mul_u64_u32_div(1ULL << 63, 2 * 10000, tsc_khz);
> +}
> +
> +static int write_tsc_page(struct kvm *kvm, u64 gfn,
> +			  PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
> +			    tsc_ref, sizeof(*tsc_ref)))
> +		return 1;
> +	mark_page_dirty(kvm, gfn);
> +	return 0;
> +}
> +
> +static int read_tsc_page(struct kvm *kvm, u64 gfn,
> +			 PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +	if (kvm_read_guest(kvm, gfn_to_gpa(gfn),
> +			   tsc_ref, sizeof(*tsc_ref)))
> +		return 1;
> +	return 0;
> +}
> +
> +static u64 calc_tsc_page_time(struct kvm_vcpu *vcpu,
> +			      PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +
> +	u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> +	return mul_u64_u64_shr(tsc, tsc_ref->tsc_scale, 64)
> +		+ tsc_ref->tsc_offset;
> +}
> +
> +static int setup_blank_tsc_page(struct kvm_vcpu *vcpu, u64 gfn)
> +{
> +	HV_REFERENCE_TSC_PAGE tsc_ref;
> +
> +	memset(&tsc_ref, 0, sizeof(tsc_ref));
> +	return write_tsc_page(vcpu->kvm, gfn, &tsc_ref);
> +}
> +
> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm_hv *hv = &kvm->arch.hyperv;
> +	HV_REFERENCE_TSC_PAGE tsc_ref;
> +	u32 tsc_khz;
> +	int r;
> +	u64 gfn, ref_time, tsc_scale, tsc_offset, tsc;
> +
> +	if (WARN_ON_ONCE(!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)))
> +		return -EINVAL;
> +
> +	gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> +	vcpu_debug(vcpu, "tsc page gfn 0x%llx\n", gfn);
> +
> +	tsc_khz = vcpu->arch.virtual_tsc_khz;
> +	if (!tsc_khz) {
> +		vcpu_unimpl(vcpu, "no tsc khz\n");
> +		return setup_blank_tsc_page(vcpu, gfn);
> +	}
> +
> +	r = read_tsc_page(kvm, gfn, &tsc_ref);
> +	if (r) {
> +		vcpu_err(vcpu, "can't access tsc page gfn 0x%llx\n", gfn);
> +		return r;
> +	}
> +
> +	tsc_scale = calc_tsc_page_scale(tsc_khz);
> +	ref_time = get_time_ref_counter(kvm);
> +	tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> +	/* tsc_offset = reftime - tsc * tsc_scale / 2^64 */
> +	tsc_offset = ref_time - mul_u64_u64_shr(tsc, tsc_scale, 64);
> +	vcpu_debug(vcpu, "tsc khz %u tsc %llu scale %llu offset %llu\n",
> +		   tsc_khz, tsc, tsc_scale, tsc_offset);
> +
> +	tsc_ref.tsc_sequence++;
> +	if (tsc_ref.tsc_sequence == 0)
> +		tsc_ref.tsc_sequence = 1;
> +
> +	tsc_ref.tsc_scale = tsc_scale;
> +	tsc_ref.tsc_offset = tsc_offset;
> +
> +	vcpu_debug(vcpu, "tsc page calibration time %llu vs. reftime %llu\n",
> +		   calc_tsc_page_time(vcpu, &tsc_ref),
> +		   get_time_ref_counter(kvm));
> +
> +	return write_tsc_page(kvm, gfn, &tsc_ref);
> +}
> +
>   static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>   			     bool host)
>   {
> @@ -790,23 +889,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>   		mark_page_dirty(kvm, gfn);
>   		break;
>   	}
> -	case HV_X64_MSR_REFERENCE_TSC: {
> -		u64 gfn;
> -		HV_REFERENCE_TSC_PAGE tsc_ref;
> -
> -		memset(&tsc_ref, 0, sizeof(tsc_ref));
> +	case HV_X64_MSR_REFERENCE_TSC:
>   		hv->hv_tsc_page = data;
> -		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
> -			break;
> -		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> -		if (kvm_write_guest(
> -				kvm,
> -				gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
> -				&tsc_ref, sizeof(tsc_ref)))
> -			return 1;
> -		mark_page_dirty(kvm, gfn);
> +		if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
> +			kvm_make_request(KVM_REQ_HV_TSC_PAGE, vcpu);
>   		break;
> -	}
>   	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
>   		return kvm_hv_msr_set_crash_data(vcpu,
>   						 msr - HV_X64_MSR_CRASH_P0,
> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
> index 60eccd4..e7cc446 100644
> --- a/arch/x86/kvm/hyperv.h
> +++ b/arch/x86/kvm/hyperv.h
> @@ -84,4 +84,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
>   
>   void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
>   
> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu);
> +
>   #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index aedb1a0..1d821f6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6500,6 +6500,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>   		 */
>   		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
>   			kvm_hv_process_stimers(vcpu);
> +
> +		/*
> +		 * KVM_REQ_HV_TSC_PAGE setup should be after
> +		 * KVM_REQ_CLOCK_UPDATE processing because
> +		 * Hyper-V tsc page content depends
> +		 * on kvm->arch.kvmclock_offset value.
> +		 */
> +		if (kvm_check_request(KVM_REQ_HV_TSC_PAGE, vcpu)) {
> +			r = kvm_hv_setup_tsc_page(vcpu);
> +			if (unlikely(r))
> +				goto out;
> +		}
>   	}
>   
>   	/*
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 6877b4d7e..93c9e25 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>   #define KVM_REQ_HV_RESET          29
>   #define KVM_REQ_HV_EXIT           30
>   #define KVM_REQ_HV_STIMER         31
> +#define KVM_REQ_HV_TSC_PAGE       32
>   
>   #define KVM_REQ_MAX               64
>   
ping

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup
  2016-01-19  7:48   ` [Qemu-devel] " Denis V. Lunev
@ 2016-01-20 14:05     ` Paolo Bonzini
  -1 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2016-01-20 14:05 UTC (permalink / raw)
  To: Denis V. Lunev, Andrey Smetanin, kvm
  Cc: Gleb Natapov, Roman Kagan, qemu-devel



On 19/01/2016 08:48, Denis V. Lunev wrote:
>>
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index 6877b4d7e..93c9e25 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>>   #define KVM_REQ_HV_RESET          29
>>   #define KVM_REQ_HV_EXIT           30
>>   #define KVM_REQ_HV_STIMER         31
>> +#define KVM_REQ_HV_TSC_PAGE       32
>>     #define KVM_REQ_MAX               64
>>   
> ping

Applied with this change:

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index d7716c6e2752..047c275717d3 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -842,7 +842,7 @@ int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
 		   tsc_khz, tsc, tsc_scale, tsc_offset);
 
 	tsc_ref.tsc_sequence++;
-	if (tsc_ref.tsc_sequence == 0)
+	if (tsc_ref.tsc_sequence == 0xFFFFFFFF tsc_ref.tsc_sequence == 0)
 		tsc_ref.tsc_sequence = 1;
 
 	tsc_ref.tsc_scale = tsc_scale;

and renumbering KVM_REQ_HV_TSC_PAGE from 32 to 31.

Paolo

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2016-01-20 14:05     ` Paolo Bonzini
  0 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2016-01-20 14:05 UTC (permalink / raw)
  To: Denis V. Lunev, Andrey Smetanin, kvm
  Cc: Gleb Natapov, Roman Kagan, qemu-devel



On 19/01/2016 08:48, Denis V. Lunev wrote:
>>
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index 6877b4d7e..93c9e25 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>>   #define KVM_REQ_HV_RESET          29
>>   #define KVM_REQ_HV_EXIT           30
>>   #define KVM_REQ_HV_STIMER         31
>> +#define KVM_REQ_HV_TSC_PAGE       32
>>     #define KVM_REQ_MAX               64
>>   
> ping

Applied with this change:

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index d7716c6e2752..047c275717d3 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -842,7 +842,7 @@ int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
 		   tsc_khz, tsc, tsc_scale, tsc_offset);
 
 	tsc_ref.tsc_sequence++;
-	if (tsc_ref.tsc_sequence == 0)
+	if (tsc_ref.tsc_sequence == 0xFFFFFFFF tsc_ref.tsc_sequence == 0)
 		tsc_ref.tsc_sequence = 1;
 
 	tsc_ref.tsc_scale = tsc_scale;

and renumbering KVM_REQ_HV_TSC_PAGE from 32 to 31.

Paolo

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup
  2016-01-20 14:05     ` [Qemu-devel] " Paolo Bonzini
@ 2016-01-20 14:41       ` Andrey Smetanin
  -1 siblings, 0 replies; 42+ messages in thread
From: Andrey Smetanin @ 2016-01-20 14:41 UTC (permalink / raw)
  To: Paolo Bonzini, Denis V. Lunev, kvm; +Cc: Gleb Natapov, Roman Kagan, qemu-devel



On 01/20/2016 05:05 PM, Paolo Bonzini wrote:
>
>
> On 19/01/2016 08:48, Denis V. Lunev wrote:
>>>
>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>> index 6877b4d7e..93c9e25 100644
>>> --- a/include/linux/kvm_host.h
>>> +++ b/include/linux/kvm_host.h
>>> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>>>    #define KVM_REQ_HV_RESET          29
>>>    #define KVM_REQ_HV_EXIT           30
>>>    #define KVM_REQ_HV_STIMER         31
>>> +#define KVM_REQ_HV_TSC_PAGE       32
>>>      #define KVM_REQ_MAX               64
>>>
>> ping
>
> Applied with this change:
>
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index d7716c6e2752..047c275717d3 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -842,7 +842,7 @@ int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
>   		   tsc_khz, tsc, tsc_scale, tsc_offset);
>
>   	tsc_ref.tsc_sequence++;
> -	if (tsc_ref.tsc_sequence == 0)
> +	if (tsc_ref.tsc_sequence == 0xFFFFFFFF tsc_ref.tsc_sequence == 0)
"(tsc_ref.tsc_sequence == 0xFFFFFFFF || tsc_ref.tsc_sequence == 0)" ?
>   		tsc_ref.tsc_sequence = 1;
>
>   	tsc_ref.tsc_scale = tsc_scale;
>
> and renumbering KVM_REQ_HV_TSC_PAGE from 32 to 31.
>
> Paolo
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2016-01-20 14:41       ` Andrey Smetanin
  0 siblings, 0 replies; 42+ messages in thread
From: Andrey Smetanin @ 2016-01-20 14:41 UTC (permalink / raw)
  To: Paolo Bonzini, Denis V. Lunev, kvm; +Cc: Gleb Natapov, Roman Kagan, qemu-devel



On 01/20/2016 05:05 PM, Paolo Bonzini wrote:
>
>
> On 19/01/2016 08:48, Denis V. Lunev wrote:
>>>
>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>> index 6877b4d7e..93c9e25 100644
>>> --- a/include/linux/kvm_host.h
>>> +++ b/include/linux/kvm_host.h
>>> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>>>    #define KVM_REQ_HV_RESET          29
>>>    #define KVM_REQ_HV_EXIT           30
>>>    #define KVM_REQ_HV_STIMER         31
>>> +#define KVM_REQ_HV_TSC_PAGE       32
>>>      #define KVM_REQ_MAX               64
>>>
>> ping
>
> Applied with this change:
>
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index d7716c6e2752..047c275717d3 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -842,7 +842,7 @@ int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
>   		   tsc_khz, tsc, tsc_scale, tsc_offset);
>
>   	tsc_ref.tsc_sequence++;
> -	if (tsc_ref.tsc_sequence == 0)
> +	if (tsc_ref.tsc_sequence == 0xFFFFFFFF tsc_ref.tsc_sequence == 0)
"(tsc_ref.tsc_sequence == 0xFFFFFFFF || tsc_ref.tsc_sequence == 0)" ?
>   		tsc_ref.tsc_sequence = 1;
>
>   	tsc_ref.tsc_scale = tsc_scale;
>
> and renumbering KVM_REQ_HV_TSC_PAGE from 32 to 31.
>
> Paolo
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup
  2016-01-20 14:05     ` [Qemu-devel] " Paolo Bonzini
@ 2016-01-20 14:44       ` Denis V. Lunev
  -1 siblings, 0 replies; 42+ messages in thread
From: Denis V. Lunev @ 2016-01-20 14:44 UTC (permalink / raw)
  To: Paolo Bonzini, Andrey Smetanin, kvm; +Cc: Gleb Natapov, Roman Kagan, qemu-devel

On 01/20/2016 05:05 PM, Paolo Bonzini wrote:
>
> On 19/01/2016 08:48, Denis V. Lunev wrote:
>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>> index 6877b4d7e..93c9e25 100644
>>> --- a/include/linux/kvm_host.h
>>> +++ b/include/linux/kvm_host.h
>>> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>>>    #define KVM_REQ_HV_RESET          29
>>>    #define KVM_REQ_HV_EXIT           30
>>>    #define KVM_REQ_HV_STIMER         31
>>> +#define KVM_REQ_HV_TSC_PAGE       32
>>>      #define KVM_REQ_MAX               64
>>>    
>> ping
> Applied with this change:
>
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index d7716c6e2752..047c275717d3 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -842,7 +842,7 @@ int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
>   		   tsc_khz, tsc, tsc_scale, tsc_offset);
>   
>   	tsc_ref.tsc_sequence++;
> -	if (tsc_ref.tsc_sequence == 0)
> +	if (tsc_ref.tsc_sequence == 0xFFFFFFFF tsc_ref.tsc_sequence == 0)
>   		tsc_ref.tsc_sequence = 1;
>   
>   	tsc_ref.tsc_scale = tsc_scale;
>
> and renumbering KVM_REQ_HV_TSC_PAGE from 32 to 31.
>
> Paolo
the latter does not seem to be correct to me.
MS spec has bug, see this thread:
    https://lkml.org/lkml/2015/11/2/655
and the change was Acked-by: K.Y.

Den

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2016-01-20 14:44       ` Denis V. Lunev
  0 siblings, 0 replies; 42+ messages in thread
From: Denis V. Lunev @ 2016-01-20 14:44 UTC (permalink / raw)
  To: Paolo Bonzini, Andrey Smetanin, kvm; +Cc: Gleb Natapov, Roman Kagan, qemu-devel

On 01/20/2016 05:05 PM, Paolo Bonzini wrote:
>
> On 19/01/2016 08:48, Denis V. Lunev wrote:
>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>> index 6877b4d7e..93c9e25 100644
>>> --- a/include/linux/kvm_host.h
>>> +++ b/include/linux/kvm_host.h
>>> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>>>    #define KVM_REQ_HV_RESET          29
>>>    #define KVM_REQ_HV_EXIT           30
>>>    #define KVM_REQ_HV_STIMER         31
>>> +#define KVM_REQ_HV_TSC_PAGE       32
>>>      #define KVM_REQ_MAX               64
>>>    
>> ping
> Applied with this change:
>
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index d7716c6e2752..047c275717d3 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -842,7 +842,7 @@ int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
>   		   tsc_khz, tsc, tsc_scale, tsc_offset);
>   
>   	tsc_ref.tsc_sequence++;
> -	if (tsc_ref.tsc_sequence == 0)
> +	if (tsc_ref.tsc_sequence == 0xFFFFFFFF tsc_ref.tsc_sequence == 0)
>   		tsc_ref.tsc_sequence = 1;
>   
>   	tsc_ref.tsc_scale = tsc_scale;
>
> and renumbering KVM_REQ_HV_TSC_PAGE from 32 to 31.
>
> Paolo
the latter does not seem to be correct to me.
MS spec has bug, see this thread:
    https://lkml.org/lkml/2015/11/2/655
and the change was Acked-by: K.Y.

Den

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup
  2016-01-20 14:44       ` [Qemu-devel] " Denis V. Lunev
@ 2016-01-20 14:52         ` Roman Kagan
  -1 siblings, 0 replies; 42+ messages in thread
From: Roman Kagan @ 2016-01-20 14:52 UTC (permalink / raw)
  To: Denis V. Lunev
  Cc: Paolo Bonzini, Andrey Smetanin, kvm, Gleb Natapov, qemu-devel

On Wed, Jan 20, 2016 at 05:44:49PM +0300, Denis V. Lunev wrote:
> On 01/20/2016 05:05 PM, Paolo Bonzini wrote:
> >
> >On 19/01/2016 08:48, Denis V. Lunev wrote:
> >>>diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> >>>index 6877b4d7e..93c9e25 100644
> >>>--- a/include/linux/kvm_host.h
> >>>+++ b/include/linux/kvm_host.h
> >>>@@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
> >>>   #define KVM_REQ_HV_RESET          29
> >>>   #define KVM_REQ_HV_EXIT           30
> >>>   #define KVM_REQ_HV_STIMER         31
> >>>+#define KVM_REQ_HV_TSC_PAGE       32
> >>>     #define KVM_REQ_MAX               64
> >>ping
> >Applied with this change:
> >
> >diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> >index d7716c6e2752..047c275717d3 100644
> >--- a/arch/x86/kvm/hyperv.c
> >+++ b/arch/x86/kvm/hyperv.c
> >@@ -842,7 +842,7 @@ int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
> >  		   tsc_khz, tsc, tsc_scale, tsc_offset);
> >  	tsc_ref.tsc_sequence++;
> >-	if (tsc_ref.tsc_sequence == 0)
> >+	if (tsc_ref.tsc_sequence == 0xFFFFFFFF tsc_ref.tsc_sequence == 0)
> >  		tsc_ref.tsc_sequence = 1;
> >  	tsc_ref.tsc_scale = tsc_scale;
> >
> >and renumbering KVM_REQ_HV_TSC_PAGE from 32 to 31.
> >
> >Paolo
> the latter does not seem to be correct to me.
> MS spec has bug, see this thread:
>    https://lkml.org/lkml/2015/11/2/655
> and the change was Acked-by: K.Y.

What happens in the patched code is this: when the sequence counter
wraps around and hits an invalid value, skip to the next valid one.

I think Paolo wanted to be compatible not only with the de facto
implementation in Windows Hyper-V guests, but also with the Linux driver
which followed the buggy spec and thought 0xFFFFFFFF to be invalid.

So the change is fine (as long as there's || rather than a space there
;)

Thanks,
Roman.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2016-01-20 14:52         ` Roman Kagan
  0 siblings, 0 replies; 42+ messages in thread
From: Roman Kagan @ 2016-01-20 14:52 UTC (permalink / raw)
  To: Denis V. Lunev
  Cc: Andrey Smetanin, Paolo Bonzini, Gleb Natapov, qemu-devel, kvm

On Wed, Jan 20, 2016 at 05:44:49PM +0300, Denis V. Lunev wrote:
> On 01/20/2016 05:05 PM, Paolo Bonzini wrote:
> >
> >On 19/01/2016 08:48, Denis V. Lunev wrote:
> >>>diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> >>>index 6877b4d7e..93c9e25 100644
> >>>--- a/include/linux/kvm_host.h
> >>>+++ b/include/linux/kvm_host.h
> >>>@@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
> >>>   #define KVM_REQ_HV_RESET          29
> >>>   #define KVM_REQ_HV_EXIT           30
> >>>   #define KVM_REQ_HV_STIMER         31
> >>>+#define KVM_REQ_HV_TSC_PAGE       32
> >>>     #define KVM_REQ_MAX               64
> >>ping
> >Applied with this change:
> >
> >diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> >index d7716c6e2752..047c275717d3 100644
> >--- a/arch/x86/kvm/hyperv.c
> >+++ b/arch/x86/kvm/hyperv.c
> >@@ -842,7 +842,7 @@ int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
> >  		   tsc_khz, tsc, tsc_scale, tsc_offset);
> >  	tsc_ref.tsc_sequence++;
> >-	if (tsc_ref.tsc_sequence == 0)
> >+	if (tsc_ref.tsc_sequence == 0xFFFFFFFF tsc_ref.tsc_sequence == 0)
> >  		tsc_ref.tsc_sequence = 1;
> >  	tsc_ref.tsc_scale = tsc_scale;
> >
> >and renumbering KVM_REQ_HV_TSC_PAGE from 32 to 31.
> >
> >Paolo
> the latter does not seem to be correct to me.
> MS spec has bug, see this thread:
>    https://lkml.org/lkml/2015/11/2/655
> and the change was Acked-by: K.Y.

What happens in the patched code is this: when the sequence counter
wraps around and hits an invalid value, skip to the next valid one.

I think Paolo wanted to be compatible not only with the de facto
implementation in Windows Hyper-V guests, but also with the Linux driver
which followed the buggy spec and thought 0xFFFFFFFF to be invalid.

So the change is fine (as long as there's || rather than a space there
;)

Thanks,
Roman.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup
  2016-01-20 14:52         ` [Qemu-devel] " Roman Kagan
@ 2016-01-20 14:54           ` Denis V. Lunev
  -1 siblings, 0 replies; 42+ messages in thread
From: Denis V. Lunev @ 2016-01-20 14:54 UTC (permalink / raw)
  To: Roman Kagan, Paolo Bonzini, Andrey Smetanin, kvm, Gleb Natapov,
	qemu-devel

On 01/20/2016 05:52 PM, Roman Kagan wrote:
> On Wed, Jan 20, 2016 at 05:44:49PM +0300, Denis V. Lunev wrote:
>> On 01/20/2016 05:05 PM, Paolo Bonzini wrote:
>>> On 19/01/2016 08:48, Denis V. Lunev wrote:
>>>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>>>> index 6877b4d7e..93c9e25 100644
>>>>> --- a/include/linux/kvm_host.h
>>>>> +++ b/include/linux/kvm_host.h
>>>>> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>>>>>    #define KVM_REQ_HV_RESET          29
>>>>>    #define KVM_REQ_HV_EXIT           30
>>>>>    #define KVM_REQ_HV_STIMER         31
>>>>> +#define KVM_REQ_HV_TSC_PAGE       32
>>>>>      #define KVM_REQ_MAX               64
>>>> ping
>>> Applied with this change:
>>>
>>> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
>>> index d7716c6e2752..047c275717d3 100644
>>> --- a/arch/x86/kvm/hyperv.c
>>> +++ b/arch/x86/kvm/hyperv.c
>>> @@ -842,7 +842,7 @@ int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
>>>   		   tsc_khz, tsc, tsc_scale, tsc_offset);
>>>   	tsc_ref.tsc_sequence++;
>>> -	if (tsc_ref.tsc_sequence == 0)
>>> +	if (tsc_ref.tsc_sequence == 0xFFFFFFFF tsc_ref.tsc_sequence == 0)
>>>   		tsc_ref.tsc_sequence = 1;
>>>   	tsc_ref.tsc_scale = tsc_scale;
>>>
>>> and renumbering KVM_REQ_HV_TSC_PAGE from 32 to 31.
>>>
>>> Paolo
>> the latter does not seem to be correct to me.
>> MS spec has bug, see this thread:
>>     https://lkml.org/lkml/2015/11/2/655
>> and the change was Acked-by: K.Y.
> What happens in the patched code is this: when the sequence counter
> wraps around and hits an invalid value, skip to the next valid one.
>
> I think Paolo wanted to be compatible not only with the de facto
> implementation in Windows Hyper-V guests, but also with the Linux driver
> which followed the buggy spec and thought 0xFFFFFFFF to be invalid.
>
> So the change is fine (as long as there's || rather than a space there
> ;)

this seems reasonable, you are right :)

Thank you for the explanation.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2016-01-20 14:54           ` Denis V. Lunev
  0 siblings, 0 replies; 42+ messages in thread
From: Denis V. Lunev @ 2016-01-20 14:54 UTC (permalink / raw)
  To: Roman Kagan, Paolo Bonzini, Andrey Smetanin, kvm, Gleb Natapov,
	qemu-devel

On 01/20/2016 05:52 PM, Roman Kagan wrote:
> On Wed, Jan 20, 2016 at 05:44:49PM +0300, Denis V. Lunev wrote:
>> On 01/20/2016 05:05 PM, Paolo Bonzini wrote:
>>> On 19/01/2016 08:48, Denis V. Lunev wrote:
>>>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>>>> index 6877b4d7e..93c9e25 100644
>>>>> --- a/include/linux/kvm_host.h
>>>>> +++ b/include/linux/kvm_host.h
>>>>> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>>>>>    #define KVM_REQ_HV_RESET          29
>>>>>    #define KVM_REQ_HV_EXIT           30
>>>>>    #define KVM_REQ_HV_STIMER         31
>>>>> +#define KVM_REQ_HV_TSC_PAGE       32
>>>>>      #define KVM_REQ_MAX               64
>>>> ping
>>> Applied with this change:
>>>
>>> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
>>> index d7716c6e2752..047c275717d3 100644
>>> --- a/arch/x86/kvm/hyperv.c
>>> +++ b/arch/x86/kvm/hyperv.c
>>> @@ -842,7 +842,7 @@ int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
>>>   		   tsc_khz, tsc, tsc_scale, tsc_offset);
>>>   	tsc_ref.tsc_sequence++;
>>> -	if (tsc_ref.tsc_sequence == 0)
>>> +	if (tsc_ref.tsc_sequence == 0xFFFFFFFF tsc_ref.tsc_sequence == 0)
>>>   		tsc_ref.tsc_sequence = 1;
>>>   	tsc_ref.tsc_scale = tsc_scale;
>>>
>>> and renumbering KVM_REQ_HV_TSC_PAGE from 32 to 31.
>>>
>>> Paolo
>> the latter does not seem to be correct to me.
>> MS spec has bug, see this thread:
>>     https://lkml.org/lkml/2015/11/2/655
>> and the change was Acked-by: K.Y.
> What happens in the patched code is this: when the sequence counter
> wraps around and hits an invalid value, skip to the next valid one.
>
> I think Paolo wanted to be compatible not only with the de facto
> implementation in Windows Hyper-V guests, but also with the Linux driver
> which followed the buggy spec and thought 0xFFFFFFFF to be invalid.
>
> So the change is fine (as long as there's || rather than a space there
> ;)

this seems reasonable, you are right :)

Thank you for the explanation.

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup
  2016-01-20 14:52         ` [Qemu-devel] " Roman Kagan
@ 2016-01-20 21:10           ` Paolo Bonzini
  -1 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2016-01-20 21:10 UTC (permalink / raw)
  To: Roman Kagan, Denis V. Lunev, Andrey Smetanin, kvm, Gleb Natapov,
	qemu-devel



On 20/01/2016 15:52, Roman Kagan wrote:
> I think Paolo wanted to be compatible not only with the de facto
> implementation in Windows Hyper-V guests, but also with the Linux driver
> which followed the buggy spec and thought 0xFFFFFFFF to be invalid.

0xFFFFFFFF is not invalid; it means it is valid but you have to use the
MSR.  With 0, you cannot use the MSR either and Windows will use the
ACPI PM timer or similar.  You can see the difference from the result of
QueryPerformanceFrequency, IIRC.

Paolo

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2016-01-20 21:10           ` Paolo Bonzini
  0 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2016-01-20 21:10 UTC (permalink / raw)
  To: Roman Kagan, Denis V. Lunev, Andrey Smetanin, kvm, Gleb Natapov,
	qemu-devel



On 20/01/2016 15:52, Roman Kagan wrote:
> I think Paolo wanted to be compatible not only with the de facto
> implementation in Windows Hyper-V guests, but also with the Linux driver
> which followed the buggy spec and thought 0xFFFFFFFF to be invalid.

0xFFFFFFFF is not invalid; it means it is valid but you have to use the
MSR.  With 0, you cannot use the MSR either and Windows will use the
ACPI PM timer or similar.  You can see the difference from the result of
QueryPerformanceFrequency, IIRC.

Paolo

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup
  2015-12-24  9:33 ` [Qemu-devel] " Andrey Smetanin
@ 2016-01-22 10:08   ` Paolo Bonzini
  -1 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2016-01-22 10:08 UTC (permalink / raw)
  To: Andrey Smetanin, kvm
  Cc: Gleb Natapov, Denis V. Lunev, Roman Kagan, qemu-devel



On 24/12/2015 10:33, Andrey Smetanin wrote:
> Lately tsc page was implemented but filled with empty
> values. This patch setup tsc page scale and offset based
> on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.
> 
> The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
> reads count to zero which potentially improves performance.
> 
> The patch applies on top of
> 'kvm: Make vcpu->requests as 64 bit bitmap'
> previously sent.
> 
> Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
> CC: Paolo Bonzini <pbonzini@redhat.com>
> CC: Gleb Natapov <gleb@kernel.org>
> CC: Roman Kagan <rkagan@virtuozzo.com>
> CC: Denis V. Lunev <den@openvz.org>
> CC: qemu-devel@nongnu.org

Actually there are some more issues:

- unless KVM can use a master clock, it is incorrect to set up the TSC
page this way; the sequence needs to be 0xFFFFFFFF in that case

- writing the TSC page must be done while all VCPUs are stopped, because
the TSC page doesn't provide the possibility for the guest to retry in
the middle of an update (like seqcount in Linux doess)

In the end, the TSC page is actually pretty similar to the kvmclock
master clock and it makes sense to build it on the master clock too.
I'll post a patch next week.

Paolo

> ---
>  arch/x86/kvm/hyperv.c    | 117 +++++++++++++++++++++++++++++++++++++++++------
>  arch/x86/kvm/hyperv.h    |   2 +
>  arch/x86/kvm/x86.c       |  12 +++++
>  include/linux/kvm_host.h |   1 +
>  4 files changed, 117 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index d50675a..504fdc7 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -753,6 +753,105 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
>  	return 0;
>  }
>  
> +static u64 calc_tsc_page_scale(u32 tsc_khz)
> +{
> +	/*
> +	 * reftime (in 100ns) = tsc * tsc_scale / 2^64 + tsc_offset
> +	 * so reftime_delta = (tsc_delta * tsc_scale) / 2^64
> +	 * so tsc_scale = (2^64 * reftime_delta)/tsc_delta
> +	 * so tsc_scale = (2^64 * 10 * 10^6) / tsc_hz = (2^64 * 10000) / tsc_khz
> +	 * so tsc_scale = (2^63 * 2 * 10000) / tsc_khz
> +	 */
> +	return mul_u64_u32_div(1ULL << 63, 2 * 10000, tsc_khz);
> +}
> +
> +static int write_tsc_page(struct kvm *kvm, u64 gfn,
> +			  PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
> +			    tsc_ref, sizeof(*tsc_ref)))
> +		return 1;
> +	mark_page_dirty(kvm, gfn);
> +	return 0;
> +}
> +
> +static int read_tsc_page(struct kvm *kvm, u64 gfn,
> +			 PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +	if (kvm_read_guest(kvm, gfn_to_gpa(gfn),
> +			   tsc_ref, sizeof(*tsc_ref)))
> +		return 1;
> +	return 0;
> +}
> +
> +static u64 calc_tsc_page_time(struct kvm_vcpu *vcpu,
> +			      PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +
> +	u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> +	return mul_u64_u64_shr(tsc, tsc_ref->tsc_scale, 64)
> +		+ tsc_ref->tsc_offset;
> +}
> +
> +static int setup_blank_tsc_page(struct kvm_vcpu *vcpu, u64 gfn)
> +{
> +	HV_REFERENCE_TSC_PAGE tsc_ref;
> +
> +	memset(&tsc_ref, 0, sizeof(tsc_ref));
> +	return write_tsc_page(vcpu->kvm, gfn, &tsc_ref);
> +}
> +
> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm_hv *hv = &kvm->arch.hyperv;
> +	HV_REFERENCE_TSC_PAGE tsc_ref;
> +	u32 tsc_khz;
> +	int r;
> +	u64 gfn, ref_time, tsc_scale, tsc_offset, tsc;
> +
> +	if (WARN_ON_ONCE(!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)))
> +		return -EINVAL;
> +
> +	gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> +	vcpu_debug(vcpu, "tsc page gfn 0x%llx\n", gfn);
> +
> +	tsc_khz = vcpu->arch.virtual_tsc_khz;
> +	if (!tsc_khz) {
> +		vcpu_unimpl(vcpu, "no tsc khz\n");
> +		return setup_blank_tsc_page(vcpu, gfn);
> +	}
> +
> +	r = read_tsc_page(kvm, gfn, &tsc_ref);
> +	if (r) {
> +		vcpu_err(vcpu, "can't access tsc page gfn 0x%llx\n", gfn);
> +		return r;
> +	}
> +
> +	tsc_scale = calc_tsc_page_scale(tsc_khz);
> +	ref_time = get_time_ref_counter(kvm);
> +	tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> +	/* tsc_offset = reftime - tsc * tsc_scale / 2^64 */
> +	tsc_offset = ref_time - mul_u64_u64_shr(tsc, tsc_scale, 64);
> +	vcpu_debug(vcpu, "tsc khz %u tsc %llu scale %llu offset %llu\n",
> +		   tsc_khz, tsc, tsc_scale, tsc_offset);
> +
> +	tsc_ref.tsc_sequence++;
> +	if (tsc_ref.tsc_sequence == 0)
> +		tsc_ref.tsc_sequence = 1;
> +
> +	tsc_ref.tsc_scale = tsc_scale;
> +	tsc_ref.tsc_offset = tsc_offset;
> +
> +	vcpu_debug(vcpu, "tsc page calibration time %llu vs. reftime %llu\n",
> +		   calc_tsc_page_time(vcpu, &tsc_ref),
> +		   get_time_ref_counter(kvm));
> +
> +	return write_tsc_page(kvm, gfn, &tsc_ref);
> +}
> +
>  static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>  			     bool host)
>  {
> @@ -790,23 +889,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>  		mark_page_dirty(kvm, gfn);
>  		break;
>  	}
> -	case HV_X64_MSR_REFERENCE_TSC: {
> -		u64 gfn;
> -		HV_REFERENCE_TSC_PAGE tsc_ref;
> -
> -		memset(&tsc_ref, 0, sizeof(tsc_ref));
> +	case HV_X64_MSR_REFERENCE_TSC:
>  		hv->hv_tsc_page = data;
> -		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
> -			break;
> -		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> -		if (kvm_write_guest(
> -				kvm,
> -				gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
> -				&tsc_ref, sizeof(tsc_ref)))
> -			return 1;
> -		mark_page_dirty(kvm, gfn);
> +		if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
> +			kvm_make_request(KVM_REQ_HV_TSC_PAGE, vcpu);
>  		break;
> -	}
>  	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
>  		return kvm_hv_msr_set_crash_data(vcpu,
>  						 msr - HV_X64_MSR_CRASH_P0,
> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
> index 60eccd4..e7cc446 100644
> --- a/arch/x86/kvm/hyperv.h
> +++ b/arch/x86/kvm/hyperv.h
> @@ -84,4 +84,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
>  
>  void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
>  
> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu);
> +
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index aedb1a0..1d821f6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6500,6 +6500,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  		 */
>  		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
>  			kvm_hv_process_stimers(vcpu);
> +
> +		/*
> +		 * KVM_REQ_HV_TSC_PAGE setup should be after
> +		 * KVM_REQ_CLOCK_UPDATE processing because
> +		 * Hyper-V tsc page content depends
> +		 * on kvm->arch.kvmclock_offset value.
> +		 */
> +		if (kvm_check_request(KVM_REQ_HV_TSC_PAGE, vcpu)) {
> +			r = kvm_hv_setup_tsc_page(vcpu);
> +			if (unlikely(r))
> +				goto out;
> +		}
>  	}
>  
>  	/*
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 6877b4d7e..93c9e25 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>  #define KVM_REQ_HV_RESET          29
>  #define KVM_REQ_HV_EXIT           30
>  #define KVM_REQ_HV_STIMER         31
> +#define KVM_REQ_HV_TSC_PAGE       32
>  
>  #define KVM_REQ_MAX               64
>  
> 

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2016-01-22 10:08   ` Paolo Bonzini
  0 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2016-01-22 10:08 UTC (permalink / raw)
  To: Andrey Smetanin, kvm
  Cc: Gleb Natapov, Denis V. Lunev, Roman Kagan, qemu-devel



On 24/12/2015 10:33, Andrey Smetanin wrote:
> Lately tsc page was implemented but filled with empty
> values. This patch setup tsc page scale and offset based
> on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.
> 
> The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
> reads count to zero which potentially improves performance.
> 
> The patch applies on top of
> 'kvm: Make vcpu->requests as 64 bit bitmap'
> previously sent.
> 
> Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
> CC: Paolo Bonzini <pbonzini@redhat.com>
> CC: Gleb Natapov <gleb@kernel.org>
> CC: Roman Kagan <rkagan@virtuozzo.com>
> CC: Denis V. Lunev <den@openvz.org>
> CC: qemu-devel@nongnu.org

Actually there are some more issues:

- unless KVM can use a master clock, it is incorrect to set up the TSC
page this way; the sequence needs to be 0xFFFFFFFF in that case

- writing the TSC page must be done while all VCPUs are stopped, because
the TSC page doesn't provide the possibility for the guest to retry in
the middle of an update (like seqcount in Linux doess)

In the end, the TSC page is actually pretty similar to the kvmclock
master clock and it makes sense to build it on the master clock too.
I'll post a patch next week.

Paolo

> ---
>  arch/x86/kvm/hyperv.c    | 117 +++++++++++++++++++++++++++++++++++++++++------
>  arch/x86/kvm/hyperv.h    |   2 +
>  arch/x86/kvm/x86.c       |  12 +++++
>  include/linux/kvm_host.h |   1 +
>  4 files changed, 117 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index d50675a..504fdc7 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -753,6 +753,105 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
>  	return 0;
>  }
>  
> +static u64 calc_tsc_page_scale(u32 tsc_khz)
> +{
> +	/*
> +	 * reftime (in 100ns) = tsc * tsc_scale / 2^64 + tsc_offset
> +	 * so reftime_delta = (tsc_delta * tsc_scale) / 2^64
> +	 * so tsc_scale = (2^64 * reftime_delta)/tsc_delta
> +	 * so tsc_scale = (2^64 * 10 * 10^6) / tsc_hz = (2^64 * 10000) / tsc_khz
> +	 * so tsc_scale = (2^63 * 2 * 10000) / tsc_khz
> +	 */
> +	return mul_u64_u32_div(1ULL << 63, 2 * 10000, tsc_khz);
> +}
> +
> +static int write_tsc_page(struct kvm *kvm, u64 gfn,
> +			  PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
> +			    tsc_ref, sizeof(*tsc_ref)))
> +		return 1;
> +	mark_page_dirty(kvm, gfn);
> +	return 0;
> +}
> +
> +static int read_tsc_page(struct kvm *kvm, u64 gfn,
> +			 PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +	if (kvm_read_guest(kvm, gfn_to_gpa(gfn),
> +			   tsc_ref, sizeof(*tsc_ref)))
> +		return 1;
> +	return 0;
> +}
> +
> +static u64 calc_tsc_page_time(struct kvm_vcpu *vcpu,
> +			      PHV_REFERENCE_TSC_PAGE tsc_ref)
> +{
> +
> +	u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> +	return mul_u64_u64_shr(tsc, tsc_ref->tsc_scale, 64)
> +		+ tsc_ref->tsc_offset;
> +}
> +
> +static int setup_blank_tsc_page(struct kvm_vcpu *vcpu, u64 gfn)
> +{
> +	HV_REFERENCE_TSC_PAGE tsc_ref;
> +
> +	memset(&tsc_ref, 0, sizeof(tsc_ref));
> +	return write_tsc_page(vcpu->kvm, gfn, &tsc_ref);
> +}
> +
> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm_hv *hv = &kvm->arch.hyperv;
> +	HV_REFERENCE_TSC_PAGE tsc_ref;
> +	u32 tsc_khz;
> +	int r;
> +	u64 gfn, ref_time, tsc_scale, tsc_offset, tsc;
> +
> +	if (WARN_ON_ONCE(!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)))
> +		return -EINVAL;
> +
> +	gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> +	vcpu_debug(vcpu, "tsc page gfn 0x%llx\n", gfn);
> +
> +	tsc_khz = vcpu->arch.virtual_tsc_khz;
> +	if (!tsc_khz) {
> +		vcpu_unimpl(vcpu, "no tsc khz\n");
> +		return setup_blank_tsc_page(vcpu, gfn);
> +	}
> +
> +	r = read_tsc_page(kvm, gfn, &tsc_ref);
> +	if (r) {
> +		vcpu_err(vcpu, "can't access tsc page gfn 0x%llx\n", gfn);
> +		return r;
> +	}
> +
> +	tsc_scale = calc_tsc_page_scale(tsc_khz);
> +	ref_time = get_time_ref_counter(kvm);
> +	tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> +	/* tsc_offset = reftime - tsc * tsc_scale / 2^64 */
> +	tsc_offset = ref_time - mul_u64_u64_shr(tsc, tsc_scale, 64);
> +	vcpu_debug(vcpu, "tsc khz %u tsc %llu scale %llu offset %llu\n",
> +		   tsc_khz, tsc, tsc_scale, tsc_offset);
> +
> +	tsc_ref.tsc_sequence++;
> +	if (tsc_ref.tsc_sequence == 0)
> +		tsc_ref.tsc_sequence = 1;
> +
> +	tsc_ref.tsc_scale = tsc_scale;
> +	tsc_ref.tsc_offset = tsc_offset;
> +
> +	vcpu_debug(vcpu, "tsc page calibration time %llu vs. reftime %llu\n",
> +		   calc_tsc_page_time(vcpu, &tsc_ref),
> +		   get_time_ref_counter(kvm));
> +
> +	return write_tsc_page(kvm, gfn, &tsc_ref);
> +}
> +
>  static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>  			     bool host)
>  {
> @@ -790,23 +889,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>  		mark_page_dirty(kvm, gfn);
>  		break;
>  	}
> -	case HV_X64_MSR_REFERENCE_TSC: {
> -		u64 gfn;
> -		HV_REFERENCE_TSC_PAGE tsc_ref;
> -
> -		memset(&tsc_ref, 0, sizeof(tsc_ref));
> +	case HV_X64_MSR_REFERENCE_TSC:
>  		hv->hv_tsc_page = data;
> -		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
> -			break;
> -		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
> -		if (kvm_write_guest(
> -				kvm,
> -				gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
> -				&tsc_ref, sizeof(tsc_ref)))
> -			return 1;
> -		mark_page_dirty(kvm, gfn);
> +		if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
> +			kvm_make_request(KVM_REQ_HV_TSC_PAGE, vcpu);
>  		break;
> -	}
>  	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
>  		return kvm_hv_msr_set_crash_data(vcpu,
>  						 msr - HV_X64_MSR_CRASH_P0,
> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
> index 60eccd4..e7cc446 100644
> --- a/arch/x86/kvm/hyperv.h
> +++ b/arch/x86/kvm/hyperv.h
> @@ -84,4 +84,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
>  
>  void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
>  
> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu);
> +
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index aedb1a0..1d821f6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6500,6 +6500,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  		 */
>  		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
>  			kvm_hv_process_stimers(vcpu);
> +
> +		/*
> +		 * KVM_REQ_HV_TSC_PAGE setup should be after
> +		 * KVM_REQ_CLOCK_UPDATE processing because
> +		 * Hyper-V tsc page content depends
> +		 * on kvm->arch.kvmclock_offset value.
> +		 */
> +		if (kvm_check_request(KVM_REQ_HV_TSC_PAGE, vcpu)) {
> +			r = kvm_hv_setup_tsc_page(vcpu);
> +			if (unlikely(r))
> +				goto out;
> +		}
>  	}
>  
>  	/*
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 6877b4d7e..93c9e25 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>  #define KVM_REQ_HV_RESET          29
>  #define KVM_REQ_HV_EXIT           30
>  #define KVM_REQ_HV_STIMER         31
> +#define KVM_REQ_HV_TSC_PAGE       32
>  
>  #define KVM_REQ_MAX               64
>  
> 

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup
  2016-01-22 10:08   ` [Qemu-devel] " Paolo Bonzini
@ 2016-01-22 10:15     ` Andrey Smetanin
  -1 siblings, 0 replies; 42+ messages in thread
From: Andrey Smetanin @ 2016-01-22 10:15 UTC (permalink / raw)
  To: Paolo Bonzini, kvm; +Cc: Gleb Natapov, Roman Kagan, Denis V. Lunev, qemu-devel



On 01/22/2016 01:08 PM, Paolo Bonzini wrote:
>
>
> On 24/12/2015 10:33, Andrey Smetanin wrote:
>> Lately tsc page was implemented but filled with empty
>> values. This patch setup tsc page scale and offset based
>> on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.
>>
>> The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
>> reads count to zero which potentially improves performance.
>>
>> The patch applies on top of
>> 'kvm: Make vcpu->requests as 64 bit bitmap'
>> previously sent.
>>
>> Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
>> CC: Paolo Bonzini <pbonzini@redhat.com>
>> CC: Gleb Natapov <gleb@kernel.org>
>> CC: Roman Kagan <rkagan@virtuozzo.com>
>> CC: Denis V. Lunev <den@openvz.org>
>> CC: qemu-devel@nongnu.org
>
> Actually there are some more issues:
>
> - unless KVM can use a master clock, it is incorrect to set up the TSC
> page this way; the sequence needs to be 0xFFFFFFFF in that case
0xFFFFFFFF is not an invalid value for tsc page,
see https://lkml.org/lkml/2015/11/2/655
>
> - writing the TSC page must be done while all VCPUs are stopped, because
> the TSC page doesn't provide the possibility for the guest to retry in
> the middle of an update (like seqcount in Linux doess)
I think Windows guest gives tsc page address at boot time and protects 
against other vcpu's tsc page access.
>
> In the end, the TSC page is actually pretty similar to the kvmclock
> master clock and it makes sense to build it on the master clock too.
> I'll post a patch next week.
>
> Paolo
>
>> ---
>>   arch/x86/kvm/hyperv.c    | 117 +++++++++++++++++++++++++++++++++++++++++------
>>   arch/x86/kvm/hyperv.h    |   2 +
>>   arch/x86/kvm/x86.c       |  12 +++++
>>   include/linux/kvm_host.h |   1 +
>>   4 files changed, 117 insertions(+), 15 deletions(-)
>>
>> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
>> index d50675a..504fdc7 100644
>> --- a/arch/x86/kvm/hyperv.c
>> +++ b/arch/x86/kvm/hyperv.c
>> @@ -753,6 +753,105 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
>>   	return 0;
>>   }
>>
>> +static u64 calc_tsc_page_scale(u32 tsc_khz)
>> +{
>> +	/*
>> +	 * reftime (in 100ns) = tsc * tsc_scale / 2^64 + tsc_offset
>> +	 * so reftime_delta = (tsc_delta * tsc_scale) / 2^64
>> +	 * so tsc_scale = (2^64 * reftime_delta)/tsc_delta
>> +	 * so tsc_scale = (2^64 * 10 * 10^6) / tsc_hz = (2^64 * 10000) / tsc_khz
>> +	 * so tsc_scale = (2^63 * 2 * 10000) / tsc_khz
>> +	 */
>> +	return mul_u64_u32_div(1ULL << 63, 2 * 10000, tsc_khz);
>> +}
>> +
>> +static int write_tsc_page(struct kvm *kvm, u64 gfn,
>> +			  PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> +	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
>> +			    tsc_ref, sizeof(*tsc_ref)))
>> +		return 1;
>> +	mark_page_dirty(kvm, gfn);
>> +	return 0;
>> +}
>> +
>> +static int read_tsc_page(struct kvm *kvm, u64 gfn,
>> +			 PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> +	if (kvm_read_guest(kvm, gfn_to_gpa(gfn),
>> +			   tsc_ref, sizeof(*tsc_ref)))
>> +		return 1;
>> +	return 0;
>> +}
>> +
>> +static u64 calc_tsc_page_time(struct kvm_vcpu *vcpu,
>> +			      PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> +
>> +	u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
>> +
>> +	return mul_u64_u64_shr(tsc, tsc_ref->tsc_scale, 64)
>> +		+ tsc_ref->tsc_offset;
>> +}
>> +
>> +static int setup_blank_tsc_page(struct kvm_vcpu *vcpu, u64 gfn)
>> +{
>> +	HV_REFERENCE_TSC_PAGE tsc_ref;
>> +
>> +	memset(&tsc_ref, 0, sizeof(tsc_ref));
>> +	return write_tsc_page(vcpu->kvm, gfn, &tsc_ref);
>> +}
>> +
>> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
>> +{
>> +	struct kvm *kvm = vcpu->kvm;
>> +	struct kvm_hv *hv = &kvm->arch.hyperv;
>> +	HV_REFERENCE_TSC_PAGE tsc_ref;
>> +	u32 tsc_khz;
>> +	int r;
>> +	u64 gfn, ref_time, tsc_scale, tsc_offset, tsc;
>> +
>> +	if (WARN_ON_ONCE(!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)))
>> +		return -EINVAL;
>> +
>> +	gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
>> +	vcpu_debug(vcpu, "tsc page gfn 0x%llx\n", gfn);
>> +
>> +	tsc_khz = vcpu->arch.virtual_tsc_khz;
>> +	if (!tsc_khz) {
>> +		vcpu_unimpl(vcpu, "no tsc khz\n");
>> +		return setup_blank_tsc_page(vcpu, gfn);
>> +	}
>> +
>> +	r = read_tsc_page(kvm, gfn, &tsc_ref);
>> +	if (r) {
>> +		vcpu_err(vcpu, "can't access tsc page gfn 0x%llx\n", gfn);
>> +		return r;
>> +	}
>> +
>> +	tsc_scale = calc_tsc_page_scale(tsc_khz);
>> +	ref_time = get_time_ref_counter(kvm);
>> +	tsc = kvm_read_l1_tsc(vcpu, rdtsc());
>> +
>> +	/* tsc_offset = reftime - tsc * tsc_scale / 2^64 */
>> +	tsc_offset = ref_time - mul_u64_u64_shr(tsc, tsc_scale, 64);
>> +	vcpu_debug(vcpu, "tsc khz %u tsc %llu scale %llu offset %llu\n",
>> +		   tsc_khz, tsc, tsc_scale, tsc_offset);
>> +
>> +	tsc_ref.tsc_sequence++;
>> +	if (tsc_ref.tsc_sequence == 0)
>> +		tsc_ref.tsc_sequence = 1;
>> +
>> +	tsc_ref.tsc_scale = tsc_scale;
>> +	tsc_ref.tsc_offset = tsc_offset;
>> +
>> +	vcpu_debug(vcpu, "tsc page calibration time %llu vs. reftime %llu\n",
>> +		   calc_tsc_page_time(vcpu, &tsc_ref),
>> +		   get_time_ref_counter(kvm));
>> +
>> +	return write_tsc_page(kvm, gfn, &tsc_ref);
>> +}
>> +
>>   static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>>   			     bool host)
>>   {
>> @@ -790,23 +889,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>>   		mark_page_dirty(kvm, gfn);
>>   		break;
>>   	}
>> -	case HV_X64_MSR_REFERENCE_TSC: {
>> -		u64 gfn;
>> -		HV_REFERENCE_TSC_PAGE tsc_ref;
>> -
>> -		memset(&tsc_ref, 0, sizeof(tsc_ref));
>> +	case HV_X64_MSR_REFERENCE_TSC:
>>   		hv->hv_tsc_page = data;
>> -		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
>> -			break;
>> -		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
>> -		if (kvm_write_guest(
>> -				kvm,
>> -				gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
>> -				&tsc_ref, sizeof(tsc_ref)))
>> -			return 1;
>> -		mark_page_dirty(kvm, gfn);
>> +		if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
>> +			kvm_make_request(KVM_REQ_HV_TSC_PAGE, vcpu);
>>   		break;
>> -	}
>>   	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
>>   		return kvm_hv_msr_set_crash_data(vcpu,
>>   						 msr - HV_X64_MSR_CRASH_P0,
>> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
>> index 60eccd4..e7cc446 100644
>> --- a/arch/x86/kvm/hyperv.h
>> +++ b/arch/x86/kvm/hyperv.h
>> @@ -84,4 +84,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
>>
>>   void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
>>
>> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu);
>> +
>>   #endif
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index aedb1a0..1d821f6 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -6500,6 +6500,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>>   		 */
>>   		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
>>   			kvm_hv_process_stimers(vcpu);
>> +
>> +		/*
>> +		 * KVM_REQ_HV_TSC_PAGE setup should be after
>> +		 * KVM_REQ_CLOCK_UPDATE processing because
>> +		 * Hyper-V tsc page content depends
>> +		 * on kvm->arch.kvmclock_offset value.
>> +		 */
>> +		if (kvm_check_request(KVM_REQ_HV_TSC_PAGE, vcpu)) {
>> +			r = kvm_hv_setup_tsc_page(vcpu);
>> +			if (unlikely(r))
>> +				goto out;
>> +		}
>>   	}
>>
>>   	/*
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index 6877b4d7e..93c9e25 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>>   #define KVM_REQ_HV_RESET          29
>>   #define KVM_REQ_HV_EXIT           30
>>   #define KVM_REQ_HV_STIMER         31
>> +#define KVM_REQ_HV_TSC_PAGE       32
>>
>>   #define KVM_REQ_MAX               64
>>
>>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2016-01-22 10:15     ` Andrey Smetanin
  0 siblings, 0 replies; 42+ messages in thread
From: Andrey Smetanin @ 2016-01-22 10:15 UTC (permalink / raw)
  To: Paolo Bonzini, kvm; +Cc: Gleb Natapov, Denis V. Lunev, Roman Kagan, qemu-devel



On 01/22/2016 01:08 PM, Paolo Bonzini wrote:
>
>
> On 24/12/2015 10:33, Andrey Smetanin wrote:
>> Lately tsc page was implemented but filled with empty
>> values. This patch setup tsc page scale and offset based
>> on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.
>>
>> The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
>> reads count to zero which potentially improves performance.
>>
>> The patch applies on top of
>> 'kvm: Make vcpu->requests as 64 bit bitmap'
>> previously sent.
>>
>> Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
>> CC: Paolo Bonzini <pbonzini@redhat.com>
>> CC: Gleb Natapov <gleb@kernel.org>
>> CC: Roman Kagan <rkagan@virtuozzo.com>
>> CC: Denis V. Lunev <den@openvz.org>
>> CC: qemu-devel@nongnu.org
>
> Actually there are some more issues:
>
> - unless KVM can use a master clock, it is incorrect to set up the TSC
> page this way; the sequence needs to be 0xFFFFFFFF in that case
0xFFFFFFFF is not an invalid value for tsc page,
see https://lkml.org/lkml/2015/11/2/655
>
> - writing the TSC page must be done while all VCPUs are stopped, because
> the TSC page doesn't provide the possibility for the guest to retry in
> the middle of an update (like seqcount in Linux doess)
I think Windows guest gives tsc page address at boot time and protects 
against other vcpu's tsc page access.
>
> In the end, the TSC page is actually pretty similar to the kvmclock
> master clock and it makes sense to build it on the master clock too.
> I'll post a patch next week.
>
> Paolo
>
>> ---
>>   arch/x86/kvm/hyperv.c    | 117 +++++++++++++++++++++++++++++++++++++++++------
>>   arch/x86/kvm/hyperv.h    |   2 +
>>   arch/x86/kvm/x86.c       |  12 +++++
>>   include/linux/kvm_host.h |   1 +
>>   4 files changed, 117 insertions(+), 15 deletions(-)
>>
>> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
>> index d50675a..504fdc7 100644
>> --- a/arch/x86/kvm/hyperv.c
>> +++ b/arch/x86/kvm/hyperv.c
>> @@ -753,6 +753,105 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
>>   	return 0;
>>   }
>>
>> +static u64 calc_tsc_page_scale(u32 tsc_khz)
>> +{
>> +	/*
>> +	 * reftime (in 100ns) = tsc * tsc_scale / 2^64 + tsc_offset
>> +	 * so reftime_delta = (tsc_delta * tsc_scale) / 2^64
>> +	 * so tsc_scale = (2^64 * reftime_delta)/tsc_delta
>> +	 * so tsc_scale = (2^64 * 10 * 10^6) / tsc_hz = (2^64 * 10000) / tsc_khz
>> +	 * so tsc_scale = (2^63 * 2 * 10000) / tsc_khz
>> +	 */
>> +	return mul_u64_u32_div(1ULL << 63, 2 * 10000, tsc_khz);
>> +}
>> +
>> +static int write_tsc_page(struct kvm *kvm, u64 gfn,
>> +			  PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> +	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
>> +			    tsc_ref, sizeof(*tsc_ref)))
>> +		return 1;
>> +	mark_page_dirty(kvm, gfn);
>> +	return 0;
>> +}
>> +
>> +static int read_tsc_page(struct kvm *kvm, u64 gfn,
>> +			 PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> +	if (kvm_read_guest(kvm, gfn_to_gpa(gfn),
>> +			   tsc_ref, sizeof(*tsc_ref)))
>> +		return 1;
>> +	return 0;
>> +}
>> +
>> +static u64 calc_tsc_page_time(struct kvm_vcpu *vcpu,
>> +			      PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> +
>> +	u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
>> +
>> +	return mul_u64_u64_shr(tsc, tsc_ref->tsc_scale, 64)
>> +		+ tsc_ref->tsc_offset;
>> +}
>> +
>> +static int setup_blank_tsc_page(struct kvm_vcpu *vcpu, u64 gfn)
>> +{
>> +	HV_REFERENCE_TSC_PAGE tsc_ref;
>> +
>> +	memset(&tsc_ref, 0, sizeof(tsc_ref));
>> +	return write_tsc_page(vcpu->kvm, gfn, &tsc_ref);
>> +}
>> +
>> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
>> +{
>> +	struct kvm *kvm = vcpu->kvm;
>> +	struct kvm_hv *hv = &kvm->arch.hyperv;
>> +	HV_REFERENCE_TSC_PAGE tsc_ref;
>> +	u32 tsc_khz;
>> +	int r;
>> +	u64 gfn, ref_time, tsc_scale, tsc_offset, tsc;
>> +
>> +	if (WARN_ON_ONCE(!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)))
>> +		return -EINVAL;
>> +
>> +	gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
>> +	vcpu_debug(vcpu, "tsc page gfn 0x%llx\n", gfn);
>> +
>> +	tsc_khz = vcpu->arch.virtual_tsc_khz;
>> +	if (!tsc_khz) {
>> +		vcpu_unimpl(vcpu, "no tsc khz\n");
>> +		return setup_blank_tsc_page(vcpu, gfn);
>> +	}
>> +
>> +	r = read_tsc_page(kvm, gfn, &tsc_ref);
>> +	if (r) {
>> +		vcpu_err(vcpu, "can't access tsc page gfn 0x%llx\n", gfn);
>> +		return r;
>> +	}
>> +
>> +	tsc_scale = calc_tsc_page_scale(tsc_khz);
>> +	ref_time = get_time_ref_counter(kvm);
>> +	tsc = kvm_read_l1_tsc(vcpu, rdtsc());
>> +
>> +	/* tsc_offset = reftime - tsc * tsc_scale / 2^64 */
>> +	tsc_offset = ref_time - mul_u64_u64_shr(tsc, tsc_scale, 64);
>> +	vcpu_debug(vcpu, "tsc khz %u tsc %llu scale %llu offset %llu\n",
>> +		   tsc_khz, tsc, tsc_scale, tsc_offset);
>> +
>> +	tsc_ref.tsc_sequence++;
>> +	if (tsc_ref.tsc_sequence == 0)
>> +		tsc_ref.tsc_sequence = 1;
>> +
>> +	tsc_ref.tsc_scale = tsc_scale;
>> +	tsc_ref.tsc_offset = tsc_offset;
>> +
>> +	vcpu_debug(vcpu, "tsc page calibration time %llu vs. reftime %llu\n",
>> +		   calc_tsc_page_time(vcpu, &tsc_ref),
>> +		   get_time_ref_counter(kvm));
>> +
>> +	return write_tsc_page(kvm, gfn, &tsc_ref);
>> +}
>> +
>>   static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>>   			     bool host)
>>   {
>> @@ -790,23 +889,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>>   		mark_page_dirty(kvm, gfn);
>>   		break;
>>   	}
>> -	case HV_X64_MSR_REFERENCE_TSC: {
>> -		u64 gfn;
>> -		HV_REFERENCE_TSC_PAGE tsc_ref;
>> -
>> -		memset(&tsc_ref, 0, sizeof(tsc_ref));
>> +	case HV_X64_MSR_REFERENCE_TSC:
>>   		hv->hv_tsc_page = data;
>> -		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
>> -			break;
>> -		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
>> -		if (kvm_write_guest(
>> -				kvm,
>> -				gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
>> -				&tsc_ref, sizeof(tsc_ref)))
>> -			return 1;
>> -		mark_page_dirty(kvm, gfn);
>> +		if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
>> +			kvm_make_request(KVM_REQ_HV_TSC_PAGE, vcpu);
>>   		break;
>> -	}
>>   	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
>>   		return kvm_hv_msr_set_crash_data(vcpu,
>>   						 msr - HV_X64_MSR_CRASH_P0,
>> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
>> index 60eccd4..e7cc446 100644
>> --- a/arch/x86/kvm/hyperv.h
>> +++ b/arch/x86/kvm/hyperv.h
>> @@ -84,4 +84,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
>>
>>   void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
>>
>> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu);
>> +
>>   #endif
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index aedb1a0..1d821f6 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -6500,6 +6500,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>>   		 */
>>   		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
>>   			kvm_hv_process_stimers(vcpu);
>> +
>> +		/*
>> +		 * KVM_REQ_HV_TSC_PAGE setup should be after
>> +		 * KVM_REQ_CLOCK_UPDATE processing because
>> +		 * Hyper-V tsc page content depends
>> +		 * on kvm->arch.kvmclock_offset value.
>> +		 */
>> +		if (kvm_check_request(KVM_REQ_HV_TSC_PAGE, vcpu)) {
>> +			r = kvm_hv_setup_tsc_page(vcpu);
>> +			if (unlikely(r))
>> +				goto out;
>> +		}
>>   	}
>>
>>   	/*
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index 6877b4d7e..93c9e25 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>>   #define KVM_REQ_HV_RESET          29
>>   #define KVM_REQ_HV_EXIT           30
>>   #define KVM_REQ_HV_STIMER         31
>> +#define KVM_REQ_HV_TSC_PAGE       32
>>
>>   #define KVM_REQ_MAX               64
>>
>>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup
  2016-01-22 10:15     ` [Qemu-devel] " Andrey Smetanin
@ 2016-01-22 11:02       ` Paolo Bonzini
  -1 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2016-01-22 11:02 UTC (permalink / raw)
  To: asmetanin, kvm; +Cc: Gleb Natapov, Denis V. Lunev, Roman Kagan, qemu-devel



On 22/01/2016 11:15, Andrey Smetanin wrote:
>>
>> - unless KVM can use a master clock, it is incorrect to set up the TSC
>> page this way; the sequence needs to be 0xFFFFFFFF in that case
> 0xFFFFFFFF is not an invalid value for tsc page,
> see https://lkml.org/lkml/2015/11/2/655

oh, I see now.

>> - writing the TSC page must be done while all VCPUs are stopped, because
>> the TSC page doesn't provide the possibility for the guest to retry in
>> the middle of an update (like seqcount in Linux doess)
> I think Windows guest gives tsc page address at boot time and protects
> against other vcpu's tsc page access.

Sometimes the TSC is detected to be unstable and Linux switches to
another clocksource.  At least in that case you can get a write to the
TSC page while the guest is running.

In that case it would be enough to write a zero to tsc_sequence, which
_can_ be done atomically while the guest is running.  However, KVM
already has a mechanism to stop all VCPUs (KVM_REQ_MASTERCLOCK_UPDATE)
so we might as well use it.

Paolo

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2016-01-22 11:02       ` Paolo Bonzini
  0 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2016-01-22 11:02 UTC (permalink / raw)
  To: asmetanin, kvm; +Cc: Gleb Natapov, Denis V. Lunev, Roman Kagan, qemu-devel



On 22/01/2016 11:15, Andrey Smetanin wrote:
>>
>> - unless KVM can use a master clock, it is incorrect to set up the TSC
>> page this way; the sequence needs to be 0xFFFFFFFF in that case
> 0xFFFFFFFF is not an invalid value for tsc page,
> see https://lkml.org/lkml/2015/11/2/655

oh, I see now.

>> - writing the TSC page must be done while all VCPUs are stopped, because
>> the TSC page doesn't provide the possibility for the guest to retry in
>> the middle of an update (like seqcount in Linux doess)
> I think Windows guest gives tsc page address at boot time and protects
> against other vcpu's tsc page access.

Sometimes the TSC is detected to be unstable and Linux switches to
another clocksource.  At least in that case you can get a write to the
TSC page while the guest is running.

In that case it would be enough to write a zero to tsc_sequence, which
_can_ be done atomically while the guest is running.  However, KVM
already has a mechanism to stop all VCPUs (KVM_REQ_MASTERCLOCK_UPDATE)
so we might as well use it.

Paolo

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup
  2016-01-22 11:02       ` [Qemu-devel] " Paolo Bonzini
@ 2016-01-22 11:11         ` Andrey Smetanin
  -1 siblings, 0 replies; 42+ messages in thread
From: Andrey Smetanin @ 2016-01-22 11:11 UTC (permalink / raw)
  To: Paolo Bonzini, kvm; +Cc: Gleb Natapov, Roman Kagan, Denis V. Lunev, qemu-devel



On 01/22/2016 02:02 PM, Paolo Bonzini wrote:
>
>
> On 22/01/2016 11:15, Andrey Smetanin wrote:
>>>
>>> - unless KVM can use a master clock, it is incorrect to set up the TSC
>>> page this way; the sequence needs to be 0xFFFFFFFF in that case
>> 0xFFFFFFFF is not an invalid value for tsc page,
>> see https://lkml.org/lkml/2015/11/2/655
>
> oh, I see now.
>
>>> - writing the TSC page must be done while all VCPUs are stopped, because
>>> the TSC page doesn't provide the possibility for the guest to retry in
>>> the middle of an update (like seqcount in Linux doess)
>> I think Windows guest gives tsc page address at boot time and protects
>> against other vcpu's tsc page access.
>
> Sometimes the TSC is detected to be unstable and Linux switches to
> another clocksource.  At least in that case you can get a write to the
> TSC page while the guest is running.

I can't understand how write is possible.
Linux Hyper-V driver hv_vmbus.ko does the following inside hv_init() 
drivers/hv/hv.c(line 256):

                 wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
                 clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);

So page is setup only once before registration clock source.
>
> In that case it would be enough to write a zero to tsc_sequence, which
> _can_ be done atomically while the guest is running.  However, KVM
> already has a mechanism to stop all VCPUs (KVM_REQ_MASTERCLOCK_UPDATE)
> so we might as well use it.
>
> Paolo
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2016-01-22 11:11         ` Andrey Smetanin
  0 siblings, 0 replies; 42+ messages in thread
From: Andrey Smetanin @ 2016-01-22 11:11 UTC (permalink / raw)
  To: Paolo Bonzini, kvm; +Cc: Gleb Natapov, Denis V. Lunev, Roman Kagan, qemu-devel



On 01/22/2016 02:02 PM, Paolo Bonzini wrote:
>
>
> On 22/01/2016 11:15, Andrey Smetanin wrote:
>>>
>>> - unless KVM can use a master clock, it is incorrect to set up the TSC
>>> page this way; the sequence needs to be 0xFFFFFFFF in that case
>> 0xFFFFFFFF is not an invalid value for tsc page,
>> see https://lkml.org/lkml/2015/11/2/655
>
> oh, I see now.
>
>>> - writing the TSC page must be done while all VCPUs are stopped, because
>>> the TSC page doesn't provide the possibility for the guest to retry in
>>> the middle of an update (like seqcount in Linux doess)
>> I think Windows guest gives tsc page address at boot time and protects
>> against other vcpu's tsc page access.
>
> Sometimes the TSC is detected to be unstable and Linux switches to
> another clocksource.  At least in that case you can get a write to the
> TSC page while the guest is running.

I can't understand how write is possible.
Linux Hyper-V driver hv_vmbus.ko does the following inside hv_init() 
drivers/hv/hv.c(line 256):

                 wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
                 clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);

So page is setup only once before registration clock source.
>
> In that case it would be enough to write a zero to tsc_sequence, which
> _can_ be done atomically while the guest is running.  However, KVM
> already has a mechanism to stop all VCPUs (KVM_REQ_MASTERCLOCK_UPDATE)
> so we might as well use it.
>
> Paolo
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup
  2016-01-22 11:02       ` [Qemu-devel] " Paolo Bonzini
@ 2016-01-22 11:31         ` Andrey Smetanin
  -1 siblings, 0 replies; 42+ messages in thread
From: Andrey Smetanin @ 2016-01-22 11:31 UTC (permalink / raw)
  To: Paolo Bonzini, kvm; +Cc: Gleb Natapov, Roman Kagan, Denis V. Lunev, qemu-devel



On 01/22/2016 02:02 PM, Paolo Bonzini wrote:
>
>
> On 22/01/2016 11:15, Andrey Smetanin wrote:
>>>
>>> - unless KVM can use a master clock, it is incorrect to set up the TSC
>>> page this way; the sequence needs to be 0xFFFFFFFF in that case
>> 0xFFFFFFFF is not an invalid value for tsc page,
>> see https://lkml.org/lkml/2015/11/2/655
>
> oh, I see now.
>
>>> - writing the TSC page must be done while all VCPUs are stopped, because
>>> the TSC page doesn't provide the possibility for the guest to retry in
>>> the middle of an update (like seqcount in Linux doess)
>> I think Windows guest gives tsc page address at boot time and protects
>> against other vcpu's tsc page access.
>
> Sometimes the TSC is detected to be unstable and Linux switches to
> another clocksource.  At least in that case you can get a write to the
> TSC page while the guest is running.
Sorry, now I got it, you mean host TSC is unstable and we should mark
guest tsc page invalid. Now I understand please ignore my prev. message.
>
> In that case it would be enough to write a zero to tsc_sequence, which
> _can_ be done atomically while the guest is running.  However, KVM
> already has a mechanism to stop all VCPUs (KVM_REQ_MASTERCLOCK_UPDATE)
> so we might as well use it.
>
> Paolo
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2016-01-22 11:31         ` Andrey Smetanin
  0 siblings, 0 replies; 42+ messages in thread
From: Andrey Smetanin @ 2016-01-22 11:31 UTC (permalink / raw)
  To: Paolo Bonzini, kvm; +Cc: Gleb Natapov, Denis V. Lunev, Roman Kagan, qemu-devel



On 01/22/2016 02:02 PM, Paolo Bonzini wrote:
>
>
> On 22/01/2016 11:15, Andrey Smetanin wrote:
>>>
>>> - unless KVM can use a master clock, it is incorrect to set up the TSC
>>> page this way; the sequence needs to be 0xFFFFFFFF in that case
>> 0xFFFFFFFF is not an invalid value for tsc page,
>> see https://lkml.org/lkml/2015/11/2/655
>
> oh, I see now.
>
>>> - writing the TSC page must be done while all VCPUs are stopped, because
>>> the TSC page doesn't provide the possibility for the guest to retry in
>>> the middle of an update (like seqcount in Linux doess)
>> I think Windows guest gives tsc page address at boot time and protects
>> against other vcpu's tsc page access.
>
> Sometimes the TSC is detected to be unstable and Linux switches to
> another clocksource.  At least in that case you can get a write to the
> TSC page while the guest is running.
Sorry, now I got it, you mean host TSC is unstable and we should mark
guest tsc page invalid. Now I understand please ignore my prev. message.
>
> In that case it would be enough to write a zero to tsc_sequence, which
> _can_ be done atomically while the guest is running.  However, KVM
> already has a mechanism to stop all VCPUs (KVM_REQ_MASTERCLOCK_UPDATE)
> so we might as well use it.
>
> Paolo
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup
  2016-01-22 11:31         ` [Qemu-devel] " Andrey Smetanin
@ 2016-01-22 11:53           ` Paolo Bonzini
  -1 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2016-01-22 11:53 UTC (permalink / raw)
  To: asmetanin, kvm; +Cc: Gleb Natapov, Roman Kagan, Denis V. Lunev, qemu-devel



On 22/01/2016 12:31, Andrey Smetanin wrote:
>>
>> Sometimes the TSC is detected to be unstable and Linux switches to
>> another clocksource.  At least in that case you can get a write to the
>> TSC page while the guest is running.
> Sorry, now I got it, you mean host TSC is unstable and we should mark
> guest tsc page invalid. Now I understand please ignore my prev. message.

No problem.  Anyhow yes, this is what I meant: a host write to the TSC
page, not a guest write to the TSC page MSR.

Usually it happens only at migration time to update the sequence---which
I believe your patch wasn't doing either.  But if we tie TSC page
updates to kvm_gen_update_masterclock, we get that for free when the
migration destination calls the KVM_SET_CLOCK ioctl.

Paolo

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2016-01-22 11:53           ` Paolo Bonzini
  0 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2016-01-22 11:53 UTC (permalink / raw)
  To: asmetanin, kvm; +Cc: Gleb Natapov, Denis V. Lunev, Roman Kagan, qemu-devel



On 22/01/2016 12:31, Andrey Smetanin wrote:
>>
>> Sometimes the TSC is detected to be unstable and Linux switches to
>> another clocksource.  At least in that case you can get a write to the
>> TSC page while the guest is running.
> Sorry, now I got it, you mean host TSC is unstable and we should mark
> guest tsc page invalid. Now I understand please ignore my prev. message.

No problem.  Anyhow yes, this is what I meant: a host write to the TSC
page, not a guest write to the TSC page MSR.

Usually it happens only at migration time to update the sequence---which
I believe your patch wasn't doing either.  But if we tie TSC page
updates to kvm_gen_update_masterclock, we get that for free when the
migration destination calls the KVM_SET_CLOCK ioctl.

Paolo

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup
  2016-01-22 11:53           ` [Qemu-devel] " Paolo Bonzini
@ 2016-01-22 11:59             ` Andrey Smetanin
  -1 siblings, 0 replies; 42+ messages in thread
From: Andrey Smetanin @ 2016-01-22 11:59 UTC (permalink / raw)
  To: Paolo Bonzini, kvm; +Cc: Gleb Natapov, Roman Kagan, Denis V. Lunev, qemu-devel



On 01/22/2016 02:53 PM, Paolo Bonzini wrote:
>
>
> On 22/01/2016 12:31, Andrey Smetanin wrote:
>>>
>>> Sometimes the TSC is detected to be unstable and Linux switches to
>>> another clocksource.  At least in that case you can get a write to the
>>> TSC page while the guest is running.
>> Sorry, now I got it, you mean host TSC is unstable and we should mark
>> guest tsc page invalid. Now I understand please ignore my prev. message.
>
> No problem.  Anyhow yes, this is what I meant: a host write to the TSC
> page, not a guest write to the TSC page MSR.
>
> Usually it happens only at migration time to update the sequence---which
> I believe your patch wasn't doing either.
QEMU saves address of page inside ->msr_hv_tsc, so at restore
QEMU sets corresponding MSR and KVM setup's tsc page again.
So migration should able to work.
  But if we tie TSC page
> updates to kvm_gen_update_masterclock, we get that for free when the
> migration destination calls the KVM_SET_CLOCK ioctl.
>
> Paolo
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2016-01-22 11:59             ` Andrey Smetanin
  0 siblings, 0 replies; 42+ messages in thread
From: Andrey Smetanin @ 2016-01-22 11:59 UTC (permalink / raw)
  To: Paolo Bonzini, kvm; +Cc: Gleb Natapov, Denis V. Lunev, Roman Kagan, qemu-devel



On 01/22/2016 02:53 PM, Paolo Bonzini wrote:
>
>
> On 22/01/2016 12:31, Andrey Smetanin wrote:
>>>
>>> Sometimes the TSC is detected to be unstable and Linux switches to
>>> another clocksource.  At least in that case you can get a write to the
>>> TSC page while the guest is running.
>> Sorry, now I got it, you mean host TSC is unstable and we should mark
>> guest tsc page invalid. Now I understand please ignore my prev. message.
>
> No problem.  Anyhow yes, this is what I meant: a host write to the TSC
> page, not a guest write to the TSC page MSR.
>
> Usually it happens only at migration time to update the sequence---which
> I believe your patch wasn't doing either.
QEMU saves address of page inside ->msr_hv_tsc, so at restore
QEMU sets corresponding MSR and KVM setup's tsc page again.
So migration should able to work.
  But if we tie TSC page
> updates to kvm_gen_update_masterclock, we get that for free when the
> migration destination calls the KVM_SET_CLOCK ioctl.
>
> Paolo
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup
  2016-01-22 10:08   ` [Qemu-devel] " Paolo Bonzini
@ 2016-01-22 13:13     ` Andrey Smetanin
  -1 siblings, 0 replies; 42+ messages in thread
From: Andrey Smetanin @ 2016-01-22 13:13 UTC (permalink / raw)
  To: Paolo Bonzini, kvm; +Cc: Gleb Natapov, Roman Kagan, Denis V. Lunev, qemu-devel



On 01/22/2016 01:08 PM, Paolo Bonzini wrote:
>
>
> On 24/12/2015 10:33, Andrey Smetanin wrote:
>> Lately tsc page was implemented but filled with empty
>> values. This patch setup tsc page scale and offset based
>> on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.
>>
>> The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
>> reads count to zero which potentially improves performance.
>>
>> The patch applies on top of
>> 'kvm: Make vcpu->requests as 64 bit bitmap'
>> previously sent.
>>
>> Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
>> CC: Paolo Bonzini <pbonzini@redhat.com>
>> CC: Gleb Natapov <gleb@kernel.org>
>> CC: Roman Kagan <rkagan@virtuozzo.com>
>> CC: Denis V. Lunev <den@openvz.org>
>> CC: qemu-devel@nongnu.org
>
> Actually there are some more issues:
>
> - unless KVM can use a master clock, it is incorrect to set up the TSC
> page this way; the sequence needs to be 0xFFFFFFFF in that case
>
> - writing the TSC page must be done while all VCPUs are stopped, because
> the TSC page doesn't provide the possibility for the guest to retry in
> the middle of an update (like seqcount in Linux doess)
>
> In the end, the TSC page is actually pretty similar to the kvmclock
> master clock and it makes sense to build it on the master clock too.
> I'll post a patch next week.

We(@virtuozzo.com) will be very thankful to you for the patch which 
integrates Hyper-V tsc page with kvm clock. We even may do work by 
yourself, but our priority now is Hyper-V VMBus initialization
(which is not in dependency on Hyper-V tsc page). What is really helpful 
for us now - patches '[PATCH v2 0/5] KVM: Hyper-V VMBus hypercalls'.
>
> Paolo
>
>> ---
>>   arch/x86/kvm/hyperv.c    | 117 +++++++++++++++++++++++++++++++++++++++++------
>>   arch/x86/kvm/hyperv.h    |   2 +
>>   arch/x86/kvm/x86.c       |  12 +++++
>>   include/linux/kvm_host.h |   1 +
>>   4 files changed, 117 insertions(+), 15 deletions(-)
>>
>> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
>> index d50675a..504fdc7 100644
>> --- a/arch/x86/kvm/hyperv.c
>> +++ b/arch/x86/kvm/hyperv.c
>> @@ -753,6 +753,105 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
>>   	return 0;
>>   }
>>
>> +static u64 calc_tsc_page_scale(u32 tsc_khz)
>> +{
>> +	/*
>> +	 * reftime (in 100ns) = tsc * tsc_scale / 2^64 + tsc_offset
>> +	 * so reftime_delta = (tsc_delta * tsc_scale) / 2^64
>> +	 * so tsc_scale = (2^64 * reftime_delta)/tsc_delta
>> +	 * so tsc_scale = (2^64 * 10 * 10^6) / tsc_hz = (2^64 * 10000) / tsc_khz
>> +	 * so tsc_scale = (2^63 * 2 * 10000) / tsc_khz
>> +	 */
>> +	return mul_u64_u32_div(1ULL << 63, 2 * 10000, tsc_khz);
>> +}
>> +
>> +static int write_tsc_page(struct kvm *kvm, u64 gfn,
>> +			  PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> +	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
>> +			    tsc_ref, sizeof(*tsc_ref)))
>> +		return 1;
>> +	mark_page_dirty(kvm, gfn);
>> +	return 0;
>> +}
>> +
>> +static int read_tsc_page(struct kvm *kvm, u64 gfn,
>> +			 PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> +	if (kvm_read_guest(kvm, gfn_to_gpa(gfn),
>> +			   tsc_ref, sizeof(*tsc_ref)))
>> +		return 1;
>> +	return 0;
>> +}
>> +
>> +static u64 calc_tsc_page_time(struct kvm_vcpu *vcpu,
>> +			      PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> +
>> +	u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
>> +
>> +	return mul_u64_u64_shr(tsc, tsc_ref->tsc_scale, 64)
>> +		+ tsc_ref->tsc_offset;
>> +}
>> +
>> +static int setup_blank_tsc_page(struct kvm_vcpu *vcpu, u64 gfn)
>> +{
>> +	HV_REFERENCE_TSC_PAGE tsc_ref;
>> +
>> +	memset(&tsc_ref, 0, sizeof(tsc_ref));
>> +	return write_tsc_page(vcpu->kvm, gfn, &tsc_ref);
>> +}
>> +
>> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
>> +{
>> +	struct kvm *kvm = vcpu->kvm;
>> +	struct kvm_hv *hv = &kvm->arch.hyperv;
>> +	HV_REFERENCE_TSC_PAGE tsc_ref;
>> +	u32 tsc_khz;
>> +	int r;
>> +	u64 gfn, ref_time, tsc_scale, tsc_offset, tsc;
>> +
>> +	if (WARN_ON_ONCE(!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)))
>> +		return -EINVAL;
>> +
>> +	gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
>> +	vcpu_debug(vcpu, "tsc page gfn 0x%llx\n", gfn);
>> +
>> +	tsc_khz = vcpu->arch.virtual_tsc_khz;
>> +	if (!tsc_khz) {
>> +		vcpu_unimpl(vcpu, "no tsc khz\n");
>> +		return setup_blank_tsc_page(vcpu, gfn);
>> +	}
>> +
>> +	r = read_tsc_page(kvm, gfn, &tsc_ref);
>> +	if (r) {
>> +		vcpu_err(vcpu, "can't access tsc page gfn 0x%llx\n", gfn);
>> +		return r;
>> +	}
>> +
>> +	tsc_scale = calc_tsc_page_scale(tsc_khz);
>> +	ref_time = get_time_ref_counter(kvm);
>> +	tsc = kvm_read_l1_tsc(vcpu, rdtsc());
>> +
>> +	/* tsc_offset = reftime - tsc * tsc_scale / 2^64 */
>> +	tsc_offset = ref_time - mul_u64_u64_shr(tsc, tsc_scale, 64);
>> +	vcpu_debug(vcpu, "tsc khz %u tsc %llu scale %llu offset %llu\n",
>> +		   tsc_khz, tsc, tsc_scale, tsc_offset);
>> +
>> +	tsc_ref.tsc_sequence++;
>> +	if (tsc_ref.tsc_sequence == 0)
>> +		tsc_ref.tsc_sequence = 1;
>> +
>> +	tsc_ref.tsc_scale = tsc_scale;
>> +	tsc_ref.tsc_offset = tsc_offset;
>> +
>> +	vcpu_debug(vcpu, "tsc page calibration time %llu vs. reftime %llu\n",
>> +		   calc_tsc_page_time(vcpu, &tsc_ref),
>> +		   get_time_ref_counter(kvm));
>> +
>> +	return write_tsc_page(kvm, gfn, &tsc_ref);
>> +}
>> +
>>   static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>>   			     bool host)
>>   {
>> @@ -790,23 +889,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>>   		mark_page_dirty(kvm, gfn);
>>   		break;
>>   	}
>> -	case HV_X64_MSR_REFERENCE_TSC: {
>> -		u64 gfn;
>> -		HV_REFERENCE_TSC_PAGE tsc_ref;
>> -
>> -		memset(&tsc_ref, 0, sizeof(tsc_ref));
>> +	case HV_X64_MSR_REFERENCE_TSC:
>>   		hv->hv_tsc_page = data;
>> -		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
>> -			break;
>> -		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
>> -		if (kvm_write_guest(
>> -				kvm,
>> -				gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
>> -				&tsc_ref, sizeof(tsc_ref)))
>> -			return 1;
>> -		mark_page_dirty(kvm, gfn);
>> +		if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
>> +			kvm_make_request(KVM_REQ_HV_TSC_PAGE, vcpu);
>>   		break;
>> -	}
>>   	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
>>   		return kvm_hv_msr_set_crash_data(vcpu,
>>   						 msr - HV_X64_MSR_CRASH_P0,
>> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
>> index 60eccd4..e7cc446 100644
>> --- a/arch/x86/kvm/hyperv.h
>> +++ b/arch/x86/kvm/hyperv.h
>> @@ -84,4 +84,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
>>
>>   void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
>>
>> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu);
>> +
>>   #endif
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index aedb1a0..1d821f6 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -6500,6 +6500,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>>   		 */
>>   		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
>>   			kvm_hv_process_stimers(vcpu);
>> +
>> +		/*
>> +		 * KVM_REQ_HV_TSC_PAGE setup should be after
>> +		 * KVM_REQ_CLOCK_UPDATE processing because
>> +		 * Hyper-V tsc page content depends
>> +		 * on kvm->arch.kvmclock_offset value.
>> +		 */
>> +		if (kvm_check_request(KVM_REQ_HV_TSC_PAGE, vcpu)) {
>> +			r = kvm_hv_setup_tsc_page(vcpu);
>> +			if (unlikely(r))
>> +				goto out;
>> +		}
>>   	}
>>
>>   	/*
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index 6877b4d7e..93c9e25 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>>   #define KVM_REQ_HV_RESET          29
>>   #define KVM_REQ_HV_EXIT           30
>>   #define KVM_REQ_HV_STIMER         31
>> +#define KVM_REQ_HV_TSC_PAGE       32
>>
>>   #define KVM_REQ_MAX               64
>>
>>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2016-01-22 13:13     ` Andrey Smetanin
  0 siblings, 0 replies; 42+ messages in thread
From: Andrey Smetanin @ 2016-01-22 13:13 UTC (permalink / raw)
  To: Paolo Bonzini, kvm; +Cc: Gleb Natapov, Denis V. Lunev, Roman Kagan, qemu-devel



On 01/22/2016 01:08 PM, Paolo Bonzini wrote:
>
>
> On 24/12/2015 10:33, Andrey Smetanin wrote:
>> Lately tsc page was implemented but filled with empty
>> values. This patch setup tsc page scale and offset based
>> on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.
>>
>> The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
>> reads count to zero which potentially improves performance.
>>
>> The patch applies on top of
>> 'kvm: Make vcpu->requests as 64 bit bitmap'
>> previously sent.
>>
>> Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
>> CC: Paolo Bonzini <pbonzini@redhat.com>
>> CC: Gleb Natapov <gleb@kernel.org>
>> CC: Roman Kagan <rkagan@virtuozzo.com>
>> CC: Denis V. Lunev <den@openvz.org>
>> CC: qemu-devel@nongnu.org
>
> Actually there are some more issues:
>
> - unless KVM can use a master clock, it is incorrect to set up the TSC
> page this way; the sequence needs to be 0xFFFFFFFF in that case
>
> - writing the TSC page must be done while all VCPUs are stopped, because
> the TSC page doesn't provide the possibility for the guest to retry in
> the middle of an update (like seqcount in Linux doess)
>
> In the end, the TSC page is actually pretty similar to the kvmclock
> master clock and it makes sense to build it on the master clock too.
> I'll post a patch next week.

We(@virtuozzo.com) will be very thankful to you for the patch which 
integrates Hyper-V tsc page with kvm clock. We even may do work by 
yourself, but our priority now is Hyper-V VMBus initialization
(which is not in dependency on Hyper-V tsc page). What is really helpful 
for us now - patches '[PATCH v2 0/5] KVM: Hyper-V VMBus hypercalls'.
>
> Paolo
>
>> ---
>>   arch/x86/kvm/hyperv.c    | 117 +++++++++++++++++++++++++++++++++++++++++------
>>   arch/x86/kvm/hyperv.h    |   2 +
>>   arch/x86/kvm/x86.c       |  12 +++++
>>   include/linux/kvm_host.h |   1 +
>>   4 files changed, 117 insertions(+), 15 deletions(-)
>>
>> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
>> index d50675a..504fdc7 100644
>> --- a/arch/x86/kvm/hyperv.c
>> +++ b/arch/x86/kvm/hyperv.c
>> @@ -753,6 +753,105 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
>>   	return 0;
>>   }
>>
>> +static u64 calc_tsc_page_scale(u32 tsc_khz)
>> +{
>> +	/*
>> +	 * reftime (in 100ns) = tsc * tsc_scale / 2^64 + tsc_offset
>> +	 * so reftime_delta = (tsc_delta * tsc_scale) / 2^64
>> +	 * so tsc_scale = (2^64 * reftime_delta)/tsc_delta
>> +	 * so tsc_scale = (2^64 * 10 * 10^6) / tsc_hz = (2^64 * 10000) / tsc_khz
>> +	 * so tsc_scale = (2^63 * 2 * 10000) / tsc_khz
>> +	 */
>> +	return mul_u64_u32_div(1ULL << 63, 2 * 10000, tsc_khz);
>> +}
>> +
>> +static int write_tsc_page(struct kvm *kvm, u64 gfn,
>> +			  PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> +	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
>> +			    tsc_ref, sizeof(*tsc_ref)))
>> +		return 1;
>> +	mark_page_dirty(kvm, gfn);
>> +	return 0;
>> +}
>> +
>> +static int read_tsc_page(struct kvm *kvm, u64 gfn,
>> +			 PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> +	if (kvm_read_guest(kvm, gfn_to_gpa(gfn),
>> +			   tsc_ref, sizeof(*tsc_ref)))
>> +		return 1;
>> +	return 0;
>> +}
>> +
>> +static u64 calc_tsc_page_time(struct kvm_vcpu *vcpu,
>> +			      PHV_REFERENCE_TSC_PAGE tsc_ref)
>> +{
>> +
>> +	u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
>> +
>> +	return mul_u64_u64_shr(tsc, tsc_ref->tsc_scale, 64)
>> +		+ tsc_ref->tsc_offset;
>> +}
>> +
>> +static int setup_blank_tsc_page(struct kvm_vcpu *vcpu, u64 gfn)
>> +{
>> +	HV_REFERENCE_TSC_PAGE tsc_ref;
>> +
>> +	memset(&tsc_ref, 0, sizeof(tsc_ref));
>> +	return write_tsc_page(vcpu->kvm, gfn, &tsc_ref);
>> +}
>> +
>> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu)
>> +{
>> +	struct kvm *kvm = vcpu->kvm;
>> +	struct kvm_hv *hv = &kvm->arch.hyperv;
>> +	HV_REFERENCE_TSC_PAGE tsc_ref;
>> +	u32 tsc_khz;
>> +	int r;
>> +	u64 gfn, ref_time, tsc_scale, tsc_offset, tsc;
>> +
>> +	if (WARN_ON_ONCE(!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)))
>> +		return -EINVAL;
>> +
>> +	gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
>> +	vcpu_debug(vcpu, "tsc page gfn 0x%llx\n", gfn);
>> +
>> +	tsc_khz = vcpu->arch.virtual_tsc_khz;
>> +	if (!tsc_khz) {
>> +		vcpu_unimpl(vcpu, "no tsc khz\n");
>> +		return setup_blank_tsc_page(vcpu, gfn);
>> +	}
>> +
>> +	r = read_tsc_page(kvm, gfn, &tsc_ref);
>> +	if (r) {
>> +		vcpu_err(vcpu, "can't access tsc page gfn 0x%llx\n", gfn);
>> +		return r;
>> +	}
>> +
>> +	tsc_scale = calc_tsc_page_scale(tsc_khz);
>> +	ref_time = get_time_ref_counter(kvm);
>> +	tsc = kvm_read_l1_tsc(vcpu, rdtsc());
>> +
>> +	/* tsc_offset = reftime - tsc * tsc_scale / 2^64 */
>> +	tsc_offset = ref_time - mul_u64_u64_shr(tsc, tsc_scale, 64);
>> +	vcpu_debug(vcpu, "tsc khz %u tsc %llu scale %llu offset %llu\n",
>> +		   tsc_khz, tsc, tsc_scale, tsc_offset);
>> +
>> +	tsc_ref.tsc_sequence++;
>> +	if (tsc_ref.tsc_sequence == 0)
>> +		tsc_ref.tsc_sequence = 1;
>> +
>> +	tsc_ref.tsc_scale = tsc_scale;
>> +	tsc_ref.tsc_offset = tsc_offset;
>> +
>> +	vcpu_debug(vcpu, "tsc page calibration time %llu vs. reftime %llu\n",
>> +		   calc_tsc_page_time(vcpu, &tsc_ref),
>> +		   get_time_ref_counter(kvm));
>> +
>> +	return write_tsc_page(kvm, gfn, &tsc_ref);
>> +}
>> +
>>   static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>>   			     bool host)
>>   {
>> @@ -790,23 +889,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
>>   		mark_page_dirty(kvm, gfn);
>>   		break;
>>   	}
>> -	case HV_X64_MSR_REFERENCE_TSC: {
>> -		u64 gfn;
>> -		HV_REFERENCE_TSC_PAGE tsc_ref;
>> -
>> -		memset(&tsc_ref, 0, sizeof(tsc_ref));
>> +	case HV_X64_MSR_REFERENCE_TSC:
>>   		hv->hv_tsc_page = data;
>> -		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
>> -			break;
>> -		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
>> -		if (kvm_write_guest(
>> -				kvm,
>> -				gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
>> -				&tsc_ref, sizeof(tsc_ref)))
>> -			return 1;
>> -		mark_page_dirty(kvm, gfn);
>> +		if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
>> +			kvm_make_request(KVM_REQ_HV_TSC_PAGE, vcpu);
>>   		break;
>> -	}
>>   	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
>>   		return kvm_hv_msr_set_crash_data(vcpu,
>>   						 msr - HV_X64_MSR_CRASH_P0,
>> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
>> index 60eccd4..e7cc446 100644
>> --- a/arch/x86/kvm/hyperv.h
>> +++ b/arch/x86/kvm/hyperv.h
>> @@ -84,4 +84,6 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
>>
>>   void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
>>
>> +int kvm_hv_setup_tsc_page(struct kvm_vcpu *vcpu);
>> +
>>   #endif
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index aedb1a0..1d821f6 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -6500,6 +6500,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>>   		 */
>>   		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
>>   			kvm_hv_process_stimers(vcpu);
>> +
>> +		/*
>> +		 * KVM_REQ_HV_TSC_PAGE setup should be after
>> +		 * KVM_REQ_CLOCK_UPDATE processing because
>> +		 * Hyper-V tsc page content depends
>> +		 * on kvm->arch.kvmclock_offset value.
>> +		 */
>> +		if (kvm_check_request(KVM_REQ_HV_TSC_PAGE, vcpu)) {
>> +			r = kvm_hv_setup_tsc_page(vcpu);
>> +			if (unlikely(r))
>> +				goto out;
>> +		}
>>   	}
>>
>>   	/*
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index 6877b4d7e..93c9e25 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -145,6 +145,7 @@ static inline bool is_error_page(struct page *page)
>>   #define KVM_REQ_HV_RESET          29
>>   #define KVM_REQ_HV_EXIT           30
>>   #define KVM_REQ_HV_STIMER         31
>> +#define KVM_REQ_HV_TSC_PAGE       32
>>
>>   #define KVM_REQ_MAX               64
>>
>>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup
  2016-01-22 13:13     ` [Qemu-devel] " Andrey Smetanin
@ 2016-01-22 13:21       ` Paolo Bonzini
  -1 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2016-01-22 13:21 UTC (permalink / raw)
  To: asmetanin, kvm; +Cc: Gleb Natapov, Roman Kagan, Denis V. Lunev, qemu-devel



On 22/01/2016 14:13, Andrey Smetanin wrote:
>>
> 
> We(@virtuozzo.com) will be very thankful to you for the patch which
> integrates Hyper-V tsc page with kvm clock. We even may do work by
> yourself, but our priority now is Hyper-V VMBus initialization
> (which is not in dependency on Hyper-V tsc page). What is really helpful
> for us now - patches '[PATCH v2 0/5] KVM: Hyper-V VMBus hypercalls'.

Yes, I want to merge that very soon as well.

Paolo

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2016-01-22 13:21       ` Paolo Bonzini
  0 siblings, 0 replies; 42+ messages in thread
From: Paolo Bonzini @ 2016-01-22 13:21 UTC (permalink / raw)
  To: asmetanin, kvm; +Cc: Gleb Natapov, Denis V. Lunev, Roman Kagan, qemu-devel



On 22/01/2016 14:13, Andrey Smetanin wrote:
>>
> 
> We(@virtuozzo.com) will be very thankful to you for the patch which
> integrates Hyper-V tsc page with kvm clock. We even may do work by
> yourself, but our priority now is Hyper-V VMBus initialization
> (which is not in dependency on Hyper-V tsc page). What is really helpful
> for us now - patches '[PATCH v2 0/5] KVM: Hyper-V VMBus hypercalls'.

Yes, I want to merge that very soon as well.

Paolo

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH v1] kvm/x86: Hyper-V tsc page setup
  2016-01-22 13:21       ` [Qemu-devel] " Paolo Bonzini
@ 2016-01-22 13:34         ` Andrey Smetanin
  -1 siblings, 0 replies; 42+ messages in thread
From: Andrey Smetanin @ 2016-01-22 13:34 UTC (permalink / raw)
  To: Paolo Bonzini, kvm; +Cc: Gleb Natapov, Roman Kagan, Denis V. Lunev, qemu-devel



On 01/22/2016 04:21 PM, Paolo Bonzini wrote:
>
>
> On 22/01/2016 14:13, Andrey Smetanin wrote:
>>>
>>
>> We(@virtuozzo.com) will be very thankful to you for the patch which
>> integrates Hyper-V tsc page with kvm clock. We even may do work by
>> yourself, but our priority now is Hyper-V VMBus initialization
>> (which is not in dependency on Hyper-V tsc page). What is really helpful
>> for us now - patches '[PATCH v2 0/5] KVM: Hyper-V VMBus hypercalls'.
>
> Yes, I want to merge that very soon as well.
Great, thanks!
>
> Paolo
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [Qemu-devel] [PATCH v1] kvm/x86: Hyper-V tsc page setup
@ 2016-01-22 13:34         ` Andrey Smetanin
  0 siblings, 0 replies; 42+ messages in thread
From: Andrey Smetanin @ 2016-01-22 13:34 UTC (permalink / raw)
  To: Paolo Bonzini, kvm; +Cc: Gleb Natapov, Denis V. Lunev, Roman Kagan, qemu-devel



On 01/22/2016 04:21 PM, Paolo Bonzini wrote:
>
>
> On 22/01/2016 14:13, Andrey Smetanin wrote:
>>>
>>
>> We(@virtuozzo.com) will be very thankful to you for the patch which
>> integrates Hyper-V tsc page with kvm clock. We even may do work by
>> yourself, but our priority now is Hyper-V VMBus initialization
>> (which is not in dependency on Hyper-V tsc page). What is really helpful
>> for us now - patches '[PATCH v2 0/5] KVM: Hyper-V VMBus hypercalls'.
>
> Yes, I want to merge that very soon as well.
Great, thanks!
>
> Paolo
>

^ permalink raw reply	[flat|nested] 42+ messages in thread

end of thread, other threads:[~2016-01-22 13:34 UTC | newest]

Thread overview: 42+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-24  9:33 [PATCH v1] kvm/x86: Hyper-V tsc page setup Andrey Smetanin
2015-12-24  9:33 ` [Qemu-devel] " Andrey Smetanin
2016-01-05 21:48 ` Peter Hornyack
2016-01-05 21:48   ` [Qemu-devel] " Peter Hornyack
2016-01-06  9:22   ` Andrey Smetanin
2016-01-06  9:22     ` [Qemu-devel] " Andrey Smetanin
2016-01-12  7:43 ` Andrey Smetanin
2016-01-12  7:43   ` [Qemu-devel] " Andrey Smetanin
2016-01-19  7:48 ` Denis V. Lunev
2016-01-19  7:48   ` [Qemu-devel] " Denis V. Lunev
2016-01-20 14:05   ` Paolo Bonzini
2016-01-20 14:05     ` [Qemu-devel] " Paolo Bonzini
2016-01-20 14:41     ` Andrey Smetanin
2016-01-20 14:41       ` [Qemu-devel] " Andrey Smetanin
2016-01-20 14:44     ` Denis V. Lunev
2016-01-20 14:44       ` [Qemu-devel] " Denis V. Lunev
2016-01-20 14:52       ` Roman Kagan
2016-01-20 14:52         ` [Qemu-devel] " Roman Kagan
2016-01-20 14:54         ` Denis V. Lunev
2016-01-20 14:54           ` [Qemu-devel] " Denis V. Lunev
2016-01-20 21:10         ` Paolo Bonzini
2016-01-20 21:10           ` [Qemu-devel] " Paolo Bonzini
2016-01-22 10:08 ` Paolo Bonzini
2016-01-22 10:08   ` [Qemu-devel] " Paolo Bonzini
2016-01-22 10:15   ` Andrey Smetanin
2016-01-22 10:15     ` [Qemu-devel] " Andrey Smetanin
2016-01-22 11:02     ` Paolo Bonzini
2016-01-22 11:02       ` [Qemu-devel] " Paolo Bonzini
2016-01-22 11:11       ` Andrey Smetanin
2016-01-22 11:11         ` [Qemu-devel] " Andrey Smetanin
2016-01-22 11:31       ` Andrey Smetanin
2016-01-22 11:31         ` [Qemu-devel] " Andrey Smetanin
2016-01-22 11:53         ` Paolo Bonzini
2016-01-22 11:53           ` [Qemu-devel] " Paolo Bonzini
2016-01-22 11:59           ` Andrey Smetanin
2016-01-22 11:59             ` [Qemu-devel] " Andrey Smetanin
2016-01-22 13:13   ` Andrey Smetanin
2016-01-22 13:13     ` [Qemu-devel] " Andrey Smetanin
2016-01-22 13:21     ` Paolo Bonzini
2016-01-22 13:21       ` [Qemu-devel] " Paolo Bonzini
2016-01-22 13:34       ` Andrey Smetanin
2016-01-22 13:34         ` [Qemu-devel] " Andrey Smetanin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.