linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] KVM: X86: Emulate APERF/MPERF to report actual VCPU frequency
@ 2020-06-23  6:35 Like Xu
  2020-06-23 18:29 ` Sean Christopherson
  0 siblings, 1 reply; 6+ messages in thread
From: Like Xu @ 2020-06-23  6:35 UTC (permalink / raw)
  To: Paolo Bonzini, kvm
  Cc: Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, wei.huang2, Peter Zijlstra, Thomas Gleixner,
	linux-kernel, Like Xu, Li RongQing, Chai Wen, Jia Lina

The aperf/mperf are used to report current CPU frequency after 7d5905dc14a
"x86 / CPU: Always show current CPU frequency in /proc/cpuinfo". But guest
kernel always reports a fixed VCPU frequency in the /proc/cpuinfo, which
may confuse users especially when turbo is enabled on the host.

Emulate guest APERF/MPERF capability based their values on the host.

Co-developed-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Reviewed-by: Chai Wen <chaiwen@baidu.com>
Reviewed-by: Jia Lina <jialina01@baidu.com>
Signed-off-by: Like Xu <like.xu@linux.intel.com>
---
 arch/x86/include/asm/kvm_host.h | 12 ++++++
 arch/x86/kvm/cpuid.c            |  8 +++-
 arch/x86/kvm/x86.c              | 76 ++++++++++++++++++++++++++++++++-
 3 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f852ee350beb..c48b9a0a086e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -539,6 +539,16 @@ struct kvm_vcpu_hv_stimer {
 	bool msg_pending;
 };
 
+/* vCPU thermal and power context */
+struct kvm_vcpu_hwp {
+	/* Hardware Coordination Feedback Capability (Presence of APERF/MPERF) */
+	bool hw_coord_fb_cap;
+	/* MPERF increases with a fixed frequency */
+	u64 mperf;
+	/* APERF increases with the current/actual frequency */
+	u64 aperf;
+};
+
 /* Hyper-V synthetic interrupt controller (SynIC)*/
 struct kvm_vcpu_hv_synic {
 	u64 version;
@@ -829,6 +839,8 @@ struct kvm_vcpu_arch {
 
 	/* AMD MSRC001_0015 Hardware Configuration */
 	u64 msr_hwcr;
+
+	struct kvm_vcpu_hwp hwp;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 8a294f9747aa..7057809e7cfd 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -78,6 +78,11 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 			apic->lapic_timer.timer_mode_mask = 1 << 17;
 	}
 
+	best = kvm_find_cpuid_entry(vcpu, 0x6, 0);
+	if (best && best->function == 0x6 &&
+	    boot_cpu_has(X86_FEATURE_APERFMPERF) && (best->ecx & 0x1))
+		vcpu->arch.hwp.hw_coord_fb_cap = true;
+
 	best = kvm_find_cpuid_entry(vcpu, 7, 0);
 	if (best && boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7)
 		cpuid_entry_change(best, X86_FEATURE_OSPKE,
@@ -561,7 +566,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 	case 6: /* Thermal management */
 		entry->eax = 0x4; /* allow ARAT */
 		entry->ebx = 0;
-		entry->ecx = 0;
+		/* allow aperf/mperf to report the true VCPU frequency. */
+		entry->ecx = boot_cpu_has(X86_FEATURE_APERFMPERF) ? 0x1 : 0;
 		entry->edx = 0;
 		break;
 	/* function 7 has additional index. */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 00c88c2f34e4..d220d9cc904a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3056,6 +3056,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 1;
 		vcpu->arch.msr_misc_features_enables = data;
 		break;
+	case MSR_IA32_MPERF:
+		if (!msr_info->host_initiated && !vcpu->arch.hwp.hw_coord_fb_cap)
+			return 1;
+		vcpu->arch.hwp.mperf = 0;
+		return 0;
+	case MSR_IA32_APERF:
+		if (!msr_info->host_initiated && !vcpu->arch.hwp.hw_coord_fb_cap)
+			return 1;
+		vcpu->arch.hwp.aperf = 0;
+		return 0;
 	default:
 		if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
 			return xen_hvm_config(vcpu, data);
@@ -3323,6 +3333,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_K7_HWCR:
 		msr_info->data = vcpu->arch.msr_hwcr;
 		break;
+	case MSR_IA32_MPERF:
+		if (!msr_info->host_initiated && !vcpu->arch.hwp.hw_coord_fb_cap)
+			return 1;
+		msr_info->data = vcpu->arch.hwp.mperf;
+		break;
+	case MSR_IA32_APERF:
+		if (!msr_info->host_initiated && !vcpu->arch.hwp.hw_coord_fb_cap)
+			return 1;
+		msr_info->data = vcpu->arch.hwp.aperf;
+		break;
 	default:
 		if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
 			return kvm_pmu_get_msr(vcpu, msr_info);
@@ -8300,6 +8320,50 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
 
+static inline void get_host_amperf(u64 *mperf, u64 *aperf)
+{
+	rdmsrl(MSR_IA32_MPERF, *mperf);
+	rdmsrl(MSR_IA32_APERF, *aperf);
+}
+
+static inline u64 get_amperf_delta(u64 enter, u64 exit)
+{
+	return (exit >= enter) ? (exit - enter) : (ULONG_MAX - enter + exit);
+}
+
+static inline void vcpu_update_amperf(struct kvm_vcpu *vcpu, u64 adelta, u64 mdelta)
+{
+	u64 aperf_left, mperf_left, delta, tmp;
+
+	aperf_left = ULONG_MAX - vcpu->arch.hwp.aperf;
+	mperf_left = ULONG_MAX - vcpu->arch.hwp.mperf;
+
+	/* fast path when neither MSR overflows */
+	if (adelta <= aperf_left && mdelta <= mperf_left) {
+		vcpu->arch.hwp.aperf += adelta;
+		vcpu->arch.hwp.mperf += mdelta;
+		return;
+	}
+
+	/* when either MSR overflows, both MSRs are reset to zero and continue to increment. */
+	delta = min(adelta, mdelta);
+	if (delta > aperf_left || delta > mperf_left) {
+		tmp = max(vcpu->arch.hwp.aperf, vcpu->arch.hwp.mperf);
+		tmp = delta - (ULONG_MAX - tmp) - 1;
+		vcpu->arch.hwp.aperf = tmp + adelta - delta;
+		vcpu->arch.hwp.mperf = tmp + mdelta - delta;
+		return;
+	}
+
+	if (mdelta > adelta && mdelta > aperf_left) {
+		vcpu->arch.hwp.mperf = mdelta - mperf_left - 1;
+		vcpu->arch.hwp.aperf = 0;
+	} else {
+		vcpu->arch.hwp.mperf = 0;
+		vcpu->arch.hwp.aperf = adelta - aperf_left - 1;
+	}
+}
+
 /*
  * Returns 1 to let vcpu_run() continue the guest execution loop without
  * exiting to the userspace.  Otherwise, the value will be returned to the
@@ -8312,7 +8376,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		dm_request_for_irq_injection(vcpu) &&
 		kvm_cpu_accept_dm_intr(vcpu);
 	fastpath_t exit_fastpath;
-
+	u64 enter_mperf = 0, enter_aperf = 0, exit_mperf = 0, exit_aperf = 0;
 	bool req_immediate_exit = false;
 
 	if (kvm_request_pending(vcpu)) {
@@ -8516,8 +8580,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
 	}
 
+	if (unlikely(vcpu->arch.hwp.hw_coord_fb_cap))
+		get_host_amperf(&enter_mperf, &enter_aperf);
+
 	exit_fastpath = kvm_x86_ops.run(vcpu);
 
+	if (unlikely(vcpu->arch.hwp.hw_coord_fb_cap)) {
+		get_host_amperf(&exit_mperf, &exit_aperf);
+		vcpu_update_amperf(vcpu, get_amperf_delta(enter_aperf, exit_aperf),
+			get_amperf_delta(enter_mperf, exit_mperf));
+	}
+
 	/*
 	 * Do this here before restoring debug registers on the host.  And
 	 * since we do this before handling the vmexit, a DR access vmexit
@@ -9482,6 +9555,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.pending_external_vector = -1;
 	vcpu->arch.preempted_in_kernel = false;
+	vcpu->arch.hwp.hw_coord_fb_cap = false;
 
 	kvm_hv_vcpu_init(vcpu);
 
-- 
2.21.3


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] KVM: X86: Emulate APERF/MPERF to report actual VCPU frequency
  2020-06-23  6:35 [PATCH] KVM: X86: Emulate APERF/MPERF to report actual VCPU frequency Like Xu
@ 2020-06-23 18:29 ` Sean Christopherson
  2020-06-23 18:39   ` Jim Mattson
  0 siblings, 1 reply; 6+ messages in thread
From: Sean Christopherson @ 2020-06-23 18:29 UTC (permalink / raw)
  To: Like Xu
  Cc: Paolo Bonzini, kvm, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, wei.huang2, Peter Zijlstra, Thomas Gleixner,
	linux-kernel, Li RongQing, Chai Wen, Jia Lina

On Tue, Jun 23, 2020 at 02:35:30PM +0800, Like Xu wrote:
> The aperf/mperf are used to report current CPU frequency after 7d5905dc14a
> "x86 / CPU: Always show current CPU frequency in /proc/cpuinfo". But guest
> kernel always reports a fixed VCPU frequency in the /proc/cpuinfo, which
> may confuse users especially when turbo is enabled on the host.
> 
> Emulate guest APERF/MPERF capability based their values on the host.
> 
> Co-developed-by: Li RongQing <lirongqing@baidu.com>
> Signed-off-by: Li RongQing <lirongqing@baidu.com>
> Reviewed-by: Chai Wen <chaiwen@baidu.com>
> Reviewed-by: Jia Lina <jialina01@baidu.com>
> Signed-off-by: Like Xu <like.xu@linux.intel.com>
> ---

...

> @@ -8312,7 +8376,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  		dm_request_for_irq_injection(vcpu) &&
>  		kvm_cpu_accept_dm_intr(vcpu);
>  	fastpath_t exit_fastpath;
> -
> +	u64 enter_mperf = 0, enter_aperf = 0, exit_mperf = 0, exit_aperf = 0;
>  	bool req_immediate_exit = false;
>  
>  	if (kvm_request_pending(vcpu)) {
> @@ -8516,8 +8580,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  		vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
>  	}
>  
> +	if (unlikely(vcpu->arch.hwp.hw_coord_fb_cap))
> +		get_host_amperf(&enter_mperf, &enter_aperf);
> +
>  	exit_fastpath = kvm_x86_ops.run(vcpu);
>  
> +	if (unlikely(vcpu->arch.hwp.hw_coord_fb_cap)) {
> +		get_host_amperf(&exit_mperf, &exit_aperf);
> +		vcpu_update_amperf(vcpu, get_amperf_delta(enter_aperf, exit_aperf),
> +			get_amperf_delta(enter_mperf, exit_mperf));
> +	}
> +

Is there an alternative approach that doesn't require 4 RDMSRs on every VMX
round trip?  That's literally more expensive than VM-Enter + VM-Exit
combined.

E.g. what about adding KVM_X86_DISABLE_EXITS_APERF_MPERF and exposing the
MSRs for read when that capability is enabled?

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] KVM: X86: Emulate APERF/MPERF to report actual VCPU frequency
  2020-06-23 18:29 ` Sean Christopherson
@ 2020-06-23 18:39   ` Jim Mattson
  2020-06-23 19:05     ` Sean Christopherson
  0 siblings, 1 reply; 6+ messages in thread
From: Jim Mattson @ 2020-06-23 18:39 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Like Xu, Paolo Bonzini, kvm list, Vitaly Kuznetsov, Wanpeng Li,
	Joerg Roedel, wei.huang2, Peter Zijlstra, Thomas Gleixner, LKML,
	Li RongQing, Chai Wen, Jia Lina

On Tue, Jun 23, 2020 at 11:29 AM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Tue, Jun 23, 2020 at 02:35:30PM +0800, Like Xu wrote:
> > The aperf/mperf are used to report current CPU frequency after 7d5905dc14a
> > "x86 / CPU: Always show current CPU frequency in /proc/cpuinfo". But guest
> > kernel always reports a fixed VCPU frequency in the /proc/cpuinfo, which
> > may confuse users especially when turbo is enabled on the host.
> >
> > Emulate guest APERF/MPERF capability based their values on the host.
> >
> > Co-developed-by: Li RongQing <lirongqing@baidu.com>
> > Signed-off-by: Li RongQing <lirongqing@baidu.com>
> > Reviewed-by: Chai Wen <chaiwen@baidu.com>
> > Reviewed-by: Jia Lina <jialina01@baidu.com>
> > Signed-off-by: Like Xu <like.xu@linux.intel.com>
> > ---
>
> ...
>
> > @@ -8312,7 +8376,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >               dm_request_for_irq_injection(vcpu) &&
> >               kvm_cpu_accept_dm_intr(vcpu);
> >       fastpath_t exit_fastpath;
> > -
> > +     u64 enter_mperf = 0, enter_aperf = 0, exit_mperf = 0, exit_aperf = 0;
> >       bool req_immediate_exit = false;
> >
> >       if (kvm_request_pending(vcpu)) {
> > @@ -8516,8 +8580,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >               vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
> >       }
> >
> > +     if (unlikely(vcpu->arch.hwp.hw_coord_fb_cap))
> > +             get_host_amperf(&enter_mperf, &enter_aperf);
> > +
> >       exit_fastpath = kvm_x86_ops.run(vcpu);
> >
> > +     if (unlikely(vcpu->arch.hwp.hw_coord_fb_cap)) {
> > +             get_host_amperf(&exit_mperf, &exit_aperf);
> > +             vcpu_update_amperf(vcpu, get_amperf_delta(enter_aperf, exit_aperf),
> > +                     get_amperf_delta(enter_mperf, exit_mperf));
> > +     }
> > +
>
> Is there an alternative approach that doesn't require 4 RDMSRs on every VMX
> round trip?  That's literally more expensive than VM-Enter + VM-Exit
> combined.
>
> E.g. what about adding KVM_X86_DISABLE_EXITS_APERF_MPERF and exposing the
> MSRs for read when that capability is enabled?

When would you load the hardware MSRs with the guest/host values?

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] KVM: X86: Emulate APERF/MPERF to report actual VCPU frequency
  2020-06-23 18:39   ` Jim Mattson
@ 2020-06-23 19:05     ` Sean Christopherson
  2020-06-23 20:34       ` Jim Mattson
  0 siblings, 1 reply; 6+ messages in thread
From: Sean Christopherson @ 2020-06-23 19:05 UTC (permalink / raw)
  To: Jim Mattson
  Cc: Like Xu, Paolo Bonzini, kvm list, Vitaly Kuznetsov, Wanpeng Li,
	Joerg Roedel, wei.huang2, Peter Zijlstra, Thomas Gleixner, LKML,
	Li RongQing, Chai Wen, Jia Lina

On Tue, Jun 23, 2020 at 11:39:16AM -0700, Jim Mattson wrote:
> On Tue, Jun 23, 2020 at 11:29 AM Sean Christopherson
> <sean.j.christopherson@intel.com> wrote:
> >
> > On Tue, Jun 23, 2020 at 02:35:30PM +0800, Like Xu wrote:
> > > The aperf/mperf are used to report current CPU frequency after 7d5905dc14a
> > > "x86 / CPU: Always show current CPU frequency in /proc/cpuinfo". But guest
> > > kernel always reports a fixed VCPU frequency in the /proc/cpuinfo, which
> > > may confuse users especially when turbo is enabled on the host.
> > >
> > > Emulate guest APERF/MPERF capability based their values on the host.
> > >
> > > Co-developed-by: Li RongQing <lirongqing@baidu.com>
> > > Signed-off-by: Li RongQing <lirongqing@baidu.com>
> > > Reviewed-by: Chai Wen <chaiwen@baidu.com>
> > > Reviewed-by: Jia Lina <jialina01@baidu.com>
> > > Signed-off-by: Like Xu <like.xu@linux.intel.com>
> > > ---
> >
> > ...
> >
> > > @@ -8312,7 +8376,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> > >               dm_request_for_irq_injection(vcpu) &&
> > >               kvm_cpu_accept_dm_intr(vcpu);
> > >       fastpath_t exit_fastpath;
> > > -
> > > +     u64 enter_mperf = 0, enter_aperf = 0, exit_mperf = 0, exit_aperf = 0;
> > >       bool req_immediate_exit = false;
> > >
> > >       if (kvm_request_pending(vcpu)) {
> > > @@ -8516,8 +8580,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> > >               vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
> > >       }
> > >
> > > +     if (unlikely(vcpu->arch.hwp.hw_coord_fb_cap))
> > > +             get_host_amperf(&enter_mperf, &enter_aperf);
> > > +
> > >       exit_fastpath = kvm_x86_ops.run(vcpu);
> > >
> > > +     if (unlikely(vcpu->arch.hwp.hw_coord_fb_cap)) {
> > > +             get_host_amperf(&exit_mperf, &exit_aperf);
> > > +             vcpu_update_amperf(vcpu, get_amperf_delta(enter_aperf, exit_aperf),
> > > +                     get_amperf_delta(enter_mperf, exit_mperf));
> > > +     }
> > > +
> >
> > Is there an alternative approach that doesn't require 4 RDMSRs on every VMX
> > round trip?  That's literally more expensive than VM-Enter + VM-Exit
> > combined.
> >
> > E.g. what about adding KVM_X86_DISABLE_EXITS_APERF_MPERF and exposing the
> > MSRs for read when that capability is enabled?
> 
> When would you load the hardware MSRs with the guest/host values?

Ugh, I was thinking the MSRs were read-only.

Doesn't this also interact with TSC scaling?

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] KVM: X86: Emulate APERF/MPERF to report actual VCPU frequency
  2020-06-23 19:05     ` Sean Christopherson
@ 2020-06-23 20:34       ` Jim Mattson
  2021-12-22  6:56         ` Like Xu
  0 siblings, 1 reply; 6+ messages in thread
From: Jim Mattson @ 2020-06-23 20:34 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Like Xu, Paolo Bonzini, kvm list, Vitaly Kuznetsov, Wanpeng Li,
	Joerg Roedel, wei.huang2, Peter Zijlstra, Thomas Gleixner, LKML,
	Li RongQing, Chai Wen, Jia Lina

On Tue, Jun 23, 2020 at 12:05 PM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Tue, Jun 23, 2020 at 11:39:16AM -0700, Jim Mattson wrote:
> > On Tue, Jun 23, 2020 at 11:29 AM Sean Christopherson
> > <sean.j.christopherson@intel.com> wrote:
> > >
> > > On Tue, Jun 23, 2020 at 02:35:30PM +0800, Like Xu wrote:
> > > > The aperf/mperf are used to report current CPU frequency after 7d5905dc14a
> > > > "x86 / CPU: Always show current CPU frequency in /proc/cpuinfo". But guest
> > > > kernel always reports a fixed VCPU frequency in the /proc/cpuinfo, which
> > > > may confuse users especially when turbo is enabled on the host.
> > > >
> > > > Emulate guest APERF/MPERF capability based their values on the host.
> > > >
> > > > Co-developed-by: Li RongQing <lirongqing@baidu.com>
> > > > Signed-off-by: Li RongQing <lirongqing@baidu.com>
> > > > Reviewed-by: Chai Wen <chaiwen@baidu.com>
> > > > Reviewed-by: Jia Lina <jialina01@baidu.com>
> > > > Signed-off-by: Like Xu <like.xu@linux.intel.com>
> > > > ---
> > >
> > > ...
> > >
> > > > @@ -8312,7 +8376,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> > > >               dm_request_for_irq_injection(vcpu) &&
> > > >               kvm_cpu_accept_dm_intr(vcpu);
> > > >       fastpath_t exit_fastpath;
> > > > -
> > > > +     u64 enter_mperf = 0, enter_aperf = 0, exit_mperf = 0, exit_aperf = 0;
> > > >       bool req_immediate_exit = false;
> > > >
> > > >       if (kvm_request_pending(vcpu)) {
> > > > @@ -8516,8 +8580,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> > > >               vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
> > > >       }
> > > >
> > > > +     if (unlikely(vcpu->arch.hwp.hw_coord_fb_cap))
> > > > +             get_host_amperf(&enter_mperf, &enter_aperf);
> > > > +
> > > >       exit_fastpath = kvm_x86_ops.run(vcpu);
> > > >
> > > > +     if (unlikely(vcpu->arch.hwp.hw_coord_fb_cap)) {
> > > > +             get_host_amperf(&exit_mperf, &exit_aperf);
> > > > +             vcpu_update_amperf(vcpu, get_amperf_delta(enter_aperf, exit_aperf),
> > > > +                     get_amperf_delta(enter_mperf, exit_mperf));
> > > > +     }
> > > > +
> > >
> > > Is there an alternative approach that doesn't require 4 RDMSRs on every VMX
> > > round trip?  That's literally more expensive than VM-Enter + VM-Exit
> > > combined.
> > >
> > > E.g. what about adding KVM_X86_DISABLE_EXITS_APERF_MPERF and exposing the
> > > MSRs for read when that capability is enabled?
> >
> > When would you load the hardware MSRs with the guest/host values?
>
> Ugh, I was thinking the MSRs were read-only.

EVen if they were read-only, they should power on to zero, and they
will most likely not be zero when a guest powers on.

> Doesn't this also interact with TSC scaling?

Yes, it should!

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] KVM: X86: Emulate APERF/MPERF to report actual VCPU frequency
  2020-06-23 20:34       ` Jim Mattson
@ 2021-12-22  6:56         ` Like Xu
  0 siblings, 0 replies; 6+ messages in thread
From: Like Xu @ 2021-12-22  6:56 UTC (permalink / raw)
  To: Jim Mattson, Sean Christopherson
  Cc: Paolo Bonzini, kvm list, Vitaly Kuznetsov, Wanpeng Li,
	Joerg Roedel, wei.huang2, Peter Zijlstra, Thomas Gleixner, LKML,
	Li RongQing, Like Xu

On 24/6/2020 4:34 am, Jim Mattson wrote:
> On Tue, Jun 23, 2020 at 12:05 PM Sean Christopherson
> <sean.j.christopherson@intel.com> wrote:
>>
>> On Tue, Jun 23, 2020 at 11:39:16AM -0700, Jim Mattson wrote:
>>> On Tue, Jun 23, 2020 at 11:29 AM Sean Christopherson
>>> <sean.j.christopherson@intel.com> wrote:
>>>>
>>>> On Tue, Jun 23, 2020 at 02:35:30PM +0800, Like Xu wrote:
>>>>> The aperf/mperf are used to report current CPU frequency after 7d5905dc14a
>>>>> "x86 / CPU: Always show current CPU frequency in /proc/cpuinfo". But guest
>>>>> kernel always reports a fixed VCPU frequency in the /proc/cpuinfo, which
>>>>> may confuse users especially when turbo is enabled on the host.
>>>>>
>>>>> Emulate guest APERF/MPERF capability based their values on the host.
>>>>>
>>>>> Co-developed-by: Li RongQing <lirongqing@baidu.com>
>>>>> Signed-off-by: Li RongQing <lirongqing@baidu.com>
>>>>> Reviewed-by: Chai Wen <chaiwen@baidu.com>
>>>>> Reviewed-by: Jia Lina <jialina01@baidu.com>
>>>>> Signed-off-by: Like Xu <like.xu@linux.intel.com>
>>>>> ---
>>>>
>>>> ...
>>>>
>>>>> @@ -8312,7 +8376,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>>>>>                dm_request_for_irq_injection(vcpu) &&
>>>>>                kvm_cpu_accept_dm_intr(vcpu);
>>>>>        fastpath_t exit_fastpath;
>>>>> -
>>>>> +     u64 enter_mperf = 0, enter_aperf = 0, exit_mperf = 0, exit_aperf = 0;
>>>>>        bool req_immediate_exit = false;
>>>>>
>>>>>        if (kvm_request_pending(vcpu)) {
>>>>> @@ -8516,8 +8580,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>>>>>                vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
>>>>>        }
>>>>>
>>>>> +     if (unlikely(vcpu->arch.hwp.hw_coord_fb_cap))
>>>>> +             get_host_amperf(&enter_mperf, &enter_aperf);
>>>>> +
>>>>>        exit_fastpath = kvm_x86_ops.run(vcpu);
>>>>>
>>>>> +     if (unlikely(vcpu->arch.hwp.hw_coord_fb_cap)) {
>>>>> +             get_host_amperf(&exit_mperf, &exit_aperf);
>>>>> +             vcpu_update_amperf(vcpu, get_amperf_delta(enter_aperf, exit_aperf),
>>>>> +                     get_amperf_delta(enter_mperf, exit_mperf));
>>>>> +     }
>>>>> +
>>>>
>>>> Is there an alternative approach that doesn't require 4 RDMSRs on every VMX
>>>> round trip?  That's literally more expensive than VM-Enter + VM-Exit
>>>> combined.

It looks like we have quite a few users who are expecting this feature in 
different scenarios.

I will add a fast path for RO usage and a slow path if the guest tries to change 
the AMPERF values.

>>>>
>>>> E.g. what about adding KVM_X86_DISABLE_EXITS_APERF_MPERF and exposing the
>>>> MSRs for read when that capability is enabled?
>>>
>>> When would you load the hardware MSRs with the guest/host values?
>>
>> Ugh, I was thinking the MSRs were read-only.
> 
> EVen if they were read-only, they should power on to zero, and they
> will most likely not be zero when a guest powers on.

Can we assume that "not zero when the guest is on" will not harm any guests ?

> 
>> Doesn't this also interact with TSC scaling?
> 
> Yes, it should!

We have too much of a historical burden on TSC emulations.

For practical reasons, what if we only expose the AMPERF cap
if the host/guest has both CONSTANT_TSC and NONSTOP_TSC ?

One more design concern, I wonder if it is *safe* for the guest to
read amperf on pCPU[x] the first time and on pCPU[y] the next time.

Any input ?

Thanks,
Like Xu



^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-12-22  6:57 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-23  6:35 [PATCH] KVM: X86: Emulate APERF/MPERF to report actual VCPU frequency Like Xu
2020-06-23 18:29 ` Sean Christopherson
2020-06-23 18:39   ` Jim Mattson
2020-06-23 19:05     ` Sean Christopherson
2020-06-23 20:34       ` Jim Mattson
2021-12-22  6:56         ` Like Xu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).