linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] [RFC] kvm: x86: emulate APERF/MPERF registers
@ 2020-04-24  5:08 Li RongQing
  2020-04-24 10:01 ` Peter Zijlstra
  0 siblings, 1 reply; 10+ messages in thread
From: Li RongQing @ 2020-04-24  5:08 UTC (permalink / raw)
  To: linux-kernel, kvm, x86, hpa, bp, mingo, tglx, joro, jmattson,
	wanpengli, vkuznets, sean.j.christopherson, pbonzini

Guest kernel reports a fixed cpu frequency in /proc/cpuinfo,
this is confused to user when turbo is enable, and aperf/mperf
can be used to show current cpu frequency after 7d5905dc14a
"(x86 / CPU: Always show current CPU frequency in /proc/cpuinfo)"
so we should emulate aperf mperf to achieve it

the period of aperf/mperf in guest mode are accumulated
as emulated value

Signed-off-by: Li RongQing <lirongqing@baidu.com>
---
 arch/x86/include/asm/kvm_host.h |  5 +++++
 arch/x86/kvm/cpuid.c            |  5 ++++-
 arch/x86/kvm/vmx/vmx.c          | 20 ++++++++++++++++++++
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 42a2d0d3984a..526bd13a3d3d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -820,6 +820,11 @@ struct kvm_vcpu_arch {
 
 	/* AMD MSRC001_0015 Hardware Configuration */
 	u64 msr_hwcr;
+
+	u64 host_mperf;
+	u64 host_aperf;
+	u64 v_mperf;
+	u64 v_aperf;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 901cd1fdecd9..00e4993cb338 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -558,7 +558,10 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 	case 6: /* Thermal management */
 		entry->eax = 0x4; /* allow ARAT */
 		entry->ebx = 0;
-		entry->ecx = 0;
+		if (boot_cpu_has(X86_FEATURE_APERFMPERF))
+			entry->ecx = 0x1;
+		else
+			entry->ecx = 0x0;
 		entry->edx = 0;
 		break;
 	/* function 7 has additional index. */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 91749f1254e8..f20216fc0b57 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1064,6 +1064,11 @@ static inline void pt_save_msr(struct pt_ctx *ctx, u32 addr_range)
 
 static void pt_guest_enter(struct vcpu_vmx *vmx)
 {
+	struct kvm_vcpu *vcpu = &vmx->vcpu;
+
+	rdmsrl(MSR_IA32_MPERF, vcpu->arch.host_mperf);
+	rdmsrl(MSR_IA32_APERF, vcpu->arch.host_aperf);
+
 	if (vmx_pt_mode_is_system())
 		return;
 
@@ -1081,6 +1086,15 @@ static void pt_guest_enter(struct vcpu_vmx *vmx)
 
 static void pt_guest_exit(struct vcpu_vmx *vmx)
 {
+	struct kvm_vcpu *vcpu = &vmx->vcpu;
+	u64 perf;
+
+	rdmsrl(MSR_IA32_MPERF, perf);
+	vcpu->arch.v_mperf += perf - vcpu->arch.host_mperf;
+
+	rdmsrl(MSR_IA32_APERF, perf);
+	vcpu->arch.v_aperf += perf - vcpu->arch.host_aperf;
+
 	if (vmx_pt_mode_is_system())
 		return;
 
@@ -1914,6 +1928,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
 			return 1;
 		goto find_shared_msr;
+	case MSR_IA32_MPERF:
+		msr_info->data = vcpu->arch.v_mperf;
+		break;
+	case MSR_IA32_APERF:
+		msr_info->data = vcpu->arch.v_aperf;
+		break;
 	default:
 	find_shared_msr:
 		msr = find_msr_entry(vmx, msr_info->index);
-- 
2.16.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] [RFC] kvm: x86: emulate APERF/MPERF registers
  2020-04-24  5:08 [PATCH] [RFC] kvm: x86: emulate APERF/MPERF registers Li RongQing
@ 2020-04-24 10:01 ` Peter Zijlstra
  2020-04-24 14:46   ` Sean Christopherson
  2020-04-26  8:30   ` Li,Rongqing
  0 siblings, 2 replies; 10+ messages in thread
From: Peter Zijlstra @ 2020-04-24 10:01 UTC (permalink / raw)
  To: Li RongQing
  Cc: linux-kernel, kvm, x86, hpa, bp, mingo, tglx, joro, jmattson,
	wanpengli, vkuznets, sean.j.christopherson, pbonzini

On Fri, Apr 24, 2020 at 01:08:55PM +0800, Li RongQing wrote:
> Guest kernel reports a fixed cpu frequency in /proc/cpuinfo,
> this is confused to user when turbo is enable, and aperf/mperf
> can be used to show current cpu frequency after 7d5905dc14a
> "(x86 / CPU: Always show current CPU frequency in /proc/cpuinfo)"
> so we should emulate aperf mperf to achieve it
> 
> the period of aperf/mperf in guest mode are accumulated
> as emulated value
> 
> Signed-off-by: Li RongQing <lirongqing@baidu.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  5 +++++
>  arch/x86/kvm/cpuid.c            |  5 ++++-
>  arch/x86/kvm/vmx/vmx.c          | 20 ++++++++++++++++++++
>  3 files changed, 29 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 42a2d0d3984a..526bd13a3d3d 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -820,6 +820,11 @@ struct kvm_vcpu_arch {
>  
>  	/* AMD MSRC001_0015 Hardware Configuration */
>  	u64 msr_hwcr;
> +
> +	u64 host_mperf;
> +	u64 host_aperf;
> +	u64 v_mperf;
> +	u64 v_aperf;
>  };
>  
>  struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 901cd1fdecd9..00e4993cb338 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -558,7 +558,10 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>  	case 6: /* Thermal management */
>  		entry->eax = 0x4; /* allow ARAT */
>  		entry->ebx = 0;
> -		entry->ecx = 0;
> +		if (boot_cpu_has(X86_FEATURE_APERFMPERF))
> +			entry->ecx = 0x1;
> +		else
> +			entry->ecx = 0x0;
>  		entry->edx = 0;
>  		break;
>  	/* function 7 has additional index. */

AFAICT this is generic x86 code, that is, this will tell an AMD SVM
guest it has APERFMPERF on.

> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 91749f1254e8..f20216fc0b57 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1064,6 +1064,11 @@ static inline void pt_save_msr(struct pt_ctx *ctx, u32 addr_range)
>  
>  static void pt_guest_enter(struct vcpu_vmx *vmx)
>  {
> +	struct kvm_vcpu *vcpu = &vmx->vcpu;
> +
> +	rdmsrl(MSR_IA32_MPERF, vcpu->arch.host_mperf);
> +	rdmsrl(MSR_IA32_APERF, vcpu->arch.host_aperf);
> +
>  	if (vmx_pt_mode_is_system())
>  		return;
>  
> @@ -1081,6 +1086,15 @@ static void pt_guest_enter(struct vcpu_vmx *vmx)
>  
>  static void pt_guest_exit(struct vcpu_vmx *vmx)
>  {
> +	struct kvm_vcpu *vcpu = &vmx->vcpu;
> +	u64 perf;
> +
> +	rdmsrl(MSR_IA32_MPERF, perf);
> +	vcpu->arch.v_mperf += perf - vcpu->arch.host_mperf;
> +
> +	rdmsrl(MSR_IA32_APERF, perf);
> +	vcpu->arch.v_aperf += perf - vcpu->arch.host_aperf;
> +
>  	if (vmx_pt_mode_is_system())
>  		return;
>  
> @@ -1914,6 +1928,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
>  			return 1;
>  		goto find_shared_msr;
> +	case MSR_IA32_MPERF:
> +		msr_info->data = vcpu->arch.v_mperf;
> +		break;
> +	case MSR_IA32_APERF:
> +		msr_info->data = vcpu->arch.v_aperf;
> +		break;
>  	default:
>  	find_shared_msr:
>  		msr = find_msr_entry(vmx, msr_info->index);

But then here you only emulate it for VMX, which then results in SVM
guests going wobbly.

Also, on Intel, the moment you advertise APERFMPERF, we'll try and read
MSR_PLATFORM_INFO / MSR_TURBO_RATIO_LIMIT*, I don't suppose you're
passing those through as well?



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] [RFC] kvm: x86: emulate APERF/MPERF registers
  2020-04-24 10:01 ` Peter Zijlstra
@ 2020-04-24 14:46   ` Sean Christopherson
  2020-04-24 16:25     ` Jim Mattson
  2020-04-26  3:22     ` 答复: " Li,Rongqing
  2020-04-26  8:30   ` Li,Rongqing
  1 sibling, 2 replies; 10+ messages in thread
From: Sean Christopherson @ 2020-04-24 14:46 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Li RongQing, linux-kernel, kvm, x86, hpa, bp, mingo, tglx, joro,
	jmattson, wanpengli, vkuznets, pbonzini

On Fri, Apr 24, 2020 at 12:01:43PM +0200, Peter Zijlstra wrote:
> On Fri, Apr 24, 2020 at 01:08:55PM +0800, Li RongQing wrote:
> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> > index 901cd1fdecd9..00e4993cb338 100644
> > --- a/arch/x86/kvm/cpuid.c
> > +++ b/arch/x86/kvm/cpuid.c
> > @@ -558,7 +558,10 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
> >  	case 6: /* Thermal management */
> >  		entry->eax = 0x4; /* allow ARAT */
> >  		entry->ebx = 0;
> > -		entry->ecx = 0;
> > +		if (boot_cpu_has(X86_FEATURE_APERFMPERF))
> > +			entry->ecx = 0x1;
> > +		else
> > +			entry->ecx = 0x0;
> >  		entry->edx = 0;
> >  		break;
> >  	/* function 7 has additional index. */
> 
> AFAICT this is generic x86 code, that is, this will tell an AMD SVM
> guest it has APERFMPERF on.
> 
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index 91749f1254e8..f20216fc0b57 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -1064,6 +1064,11 @@ static inline void pt_save_msr(struct pt_ctx *ctx, u32 addr_range)
> >  
> >  static void pt_guest_enter(struct vcpu_vmx *vmx)
> >  {
> > +	struct kvm_vcpu *vcpu = &vmx->vcpu;
> > +
> > +	rdmsrl(MSR_IA32_MPERF, vcpu->arch.host_mperf);
> > +	rdmsrl(MSR_IA32_APERF, vcpu->arch.host_aperf);

Why are these buried in Processor Trace code?  Is APERFMERF in any way
dependent on PT?

> > +
> >  	if (vmx_pt_mode_is_system())
> >  		return;
> >  
> > @@ -1081,6 +1086,15 @@ static void pt_guest_enter(struct vcpu_vmx *vmx)
> >  
> >  static void pt_guest_exit(struct vcpu_vmx *vmx)
> >  {
> > +	struct kvm_vcpu *vcpu = &vmx->vcpu;
> > +	u64 perf;
> > +
> > +	rdmsrl(MSR_IA32_MPERF, perf);
> > +	vcpu->arch.v_mperf += perf - vcpu->arch.host_mperf;
> > +
> > +	rdmsrl(MSR_IA32_APERF, perf);
> > +	vcpu->arch.v_aperf += perf - vcpu->arch.host_aperf;

This requires four RDMSRs per VMX transition.  Doing that unconditionally
will drastically impact performance.  Not to mention that reading the MSRs
without checking for host support will generate #GPs and WARNs on hardware
without APERFMPERF.

Assuming we're going forward with this, at an absolute minimum the RDMSRs
need to be wrapped with checks on host _and_ guest support for the emulated
behavior.  Given the significant overhead, this might even be something
that should require an extra opt-in from userspace to enable.

> > +
> >  	if (vmx_pt_mode_is_system())
> >  		return;
> >  
> > @@ -1914,6 +1928,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >  		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
> >  			return 1;
> >  		goto find_shared_msr;
> > +	case MSR_IA32_MPERF:
> > +		msr_info->data = vcpu->arch.v_mperf;
> > +		break;
> > +	case MSR_IA32_APERF:
> > +		msr_info->data = vcpu->arch.v_aperf;
> > +		break;
> >  	default:
> >  	find_shared_msr:
> >  		msr = find_msr_entry(vmx, msr_info->index);
> 
> But then here you only emulate it for VMX, which then results in SVM
> guests going wobbly.

Ya.

> Also, on Intel, the moment you advertise APERFMPERF, we'll try and read
> MSR_PLATFORM_INFO / MSR_TURBO_RATIO_LIMIT*, I don't suppose you're
> passing those through as well?

AFAICT, the proposed patch isn't fully advertising APERFMPERF, it's
advertising Turbo Boost / Dynamic Acceleration to the guest when APERFMPERF
can be used by the host to emulate IDA.  The transliteration of the
above code to be VMX-only is:

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 766303b31949..7e459b66b06e 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7191,6 +7191,9 @@ static __init void vmx_set_cpu_caps(void)
        if (nested)
                kvm_cpu_cap_set(X86_FEATURE_VMX);

+       if (boot_cpu_has(X86_FEATURE_APERFMPERF))
+               kvm_cpu_cap_set(X86_FEATURE_IDA);
+
        /* CPUID 0x7 */
        if (kvm_mpx_supported())
                kvm_cpu_cap_check_and_set(X86_FEATURE_MPX);

I have no clue as to whether that's sane/correct.

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] [RFC] kvm: x86: emulate APERF/MPERF registers
  2020-04-24 14:46   ` Sean Christopherson
@ 2020-04-24 16:25     ` Jim Mattson
  2020-04-24 16:30       ` Paolo Bonzini
  2020-04-26  3:22     ` 答复: " Li,Rongqing
  1 sibling, 1 reply; 10+ messages in thread
From: Jim Mattson @ 2020-04-24 16:25 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Peter Zijlstra, Li RongQing, LKML, kvm list,
	the arch/x86 maintainers, H . Peter Anvin, Borislav Petkov,
	Ingo Molnar, Thomas Gleixner, Joerg Roedel, Wanpeng Li,
	Vitaly Kuznetsov, Paolo Bonzini

On Fri, Apr 24, 2020 at 7:46 AM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Fri, Apr 24, 2020 at 12:01:43PM +0200, Peter Zijlstra wrote:
> > On Fri, Apr 24, 2020 at 01:08:55PM +0800, Li RongQing wrote:

> This requires four RDMSRs per VMX transition.  Doing that unconditionally
> will drastically impact performance.  Not to mention that reading the MSRs
> without checking for host support will generate #GPs and WARNs on hardware
> without APERFMPERF.
>
> Assuming we're going forward with this, at an absolute minimum the RDMSRs
> need to be wrapped with checks on host _and_ guest support for the emulated
> behavior.  Given the significant overhead, this might even be something
> that should require an extra opt-in from userspace to enable.

I would like to see performance data before enabling this unconditionally.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] [RFC] kvm: x86: emulate APERF/MPERF registers
  2020-04-24 16:25     ` Jim Mattson
@ 2020-04-24 16:30       ` Paolo Bonzini
  2020-04-26  3:24         ` 答复: " Li,Rongqing
  0 siblings, 1 reply; 10+ messages in thread
From: Paolo Bonzini @ 2020-04-24 16:30 UTC (permalink / raw)
  To: Jim Mattson, Sean Christopherson
  Cc: Peter Zijlstra, Li RongQing, LKML, kvm list,
	the arch/x86 maintainers, H . Peter Anvin, Borislav Petkov,
	Ingo Molnar, Thomas Gleixner, Joerg Roedel, Wanpeng Li,
	Vitaly Kuznetsov

On 24/04/20 18:25, Jim Mattson wrote:
>> Assuming we're going forward with this, at an absolute minimum the RDMSRs
>> need to be wrapped with checks on host _and_ guest support for the emulated
>> behavior.  Given the significant overhead, this might even be something
>> that should require an extra opt-in from userspace to enable.
> 
> I would like to see performance data before enabling this unconditionally.

I wouldn't want this to be enabled unconditionally anyway, because you
need to take into account live migration to and from processors that do
not have APERF/MPERF support.

Paolo


^ permalink raw reply	[flat|nested] 10+ messages in thread

* 答复: [PATCH] [RFC] kvm: x86: emulate APERF/MPERF registers
  2020-04-24 14:46   ` Sean Christopherson
  2020-04-24 16:25     ` Jim Mattson
@ 2020-04-26  3:22     ` Li,Rongqing
  1 sibling, 0 replies; 10+ messages in thread
From: Li,Rongqing @ 2020-04-26  3:22 UTC (permalink / raw)
  To: Sean Christopherson, Peter Zijlstra
  Cc: linux-kernel, kvm, x86, hpa, bp, mingo, tglx, joro, jmattson,
	wanpengli, vkuznets, pbonzini

> -----邮件原件-----
> 发件人: Sean Christopherson [mailto:sean.j.christopherson@intel.com]
> 发送时间: 2020年4月24日 22:46
> 收件人: Peter Zijlstra <peterz@infradead.org>
> 抄送: Li,Rongqing <lirongqing@baidu.com>; linux-kernel@vger.kernel.org;
> kvm@vger.kernel.org; x86@kernel.org; hpa@zytor.com; bp@alien8.de;
> mingo@redhat.com; tglx@linutronix.de; joro@8bytes.org;
> jmattson@google.com; wanpengli@tencent.com; vkuznets@redhat.com;
> pbonzini@redhat.com
> 主题: Re: [PATCH] [RFC] kvm: x86: emulate APERF/MPERF registers
> 
> On Fri, Apr 24, 2020 at 12:01:43PM +0200, Peter Zijlstra wrote:
> > On Fri, Apr 24, 2020 at 01:08:55PM +0800, Li RongQing wrote:
> > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index
> > > 901cd1fdecd9..00e4993cb338 100644
> > > --- a/arch/x86/kvm/cpuid.c
> > > +++ b/arch/x86/kvm/cpuid.c
> > > @@ -558,7 +558,10 @@ static inline int __do_cpuid_func(struct
> kvm_cpuid_array *array, u32 function)
> > >  	case 6: /* Thermal management */
> > >  		entry->eax = 0x4; /* allow ARAT */
> > >  		entry->ebx = 0;
> > > -		entry->ecx = 0;
> > > +		if (boot_cpu_has(X86_FEATURE_APERFMPERF))
> > > +			entry->ecx = 0x1;
> > > +		else
> > > +			entry->ecx = 0x0;
> > >  		entry->edx = 0;
> > >  		break;
> > >  	/* function 7 has additional index. */
> >
> > AFAICT this is generic x86 code, that is, this will tell an AMD SVM
> > guest it has APERFMPERF on.
> >
> > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index
> > > 91749f1254e8..f20216fc0b57 100644
> > > --- a/arch/x86/kvm/vmx/vmx.c
> > > +++ b/arch/x86/kvm/vmx/vmx.c
> > > @@ -1064,6 +1064,11 @@ static inline void pt_save_msr(struct pt_ctx
> > > *ctx, u32 addr_range)
> > >
> > >  static void pt_guest_enter(struct vcpu_vmx *vmx)  {
> > > +	struct kvm_vcpu *vcpu = &vmx->vcpu;
> > > +
> > > +	rdmsrl(MSR_IA32_MPERF, vcpu->arch.host_mperf);
> > > +	rdmsrl(MSR_IA32_APERF, vcpu->arch.host_aperf);
> 
> Why are these buried in Processor Trace code?  Is APERFMERF in any way
> dependent on PT?
> 

No;
I will move it out PT codes

> > > +
> > >  	if (vmx_pt_mode_is_system())
> > >  		return;
> > >
> > > @@ -1081,6 +1086,15 @@ static void pt_guest_enter(struct vcpu_vmx
> > > *vmx)
> > >
> > >  static void pt_guest_exit(struct vcpu_vmx *vmx)  {
> > > +	struct kvm_vcpu *vcpu = &vmx->vcpu;
> > > +	u64 perf;
> > > +
> > > +	rdmsrl(MSR_IA32_MPERF, perf);
> > > +	vcpu->arch.v_mperf += perf - vcpu->arch.host_mperf;
> > > +
> > > +	rdmsrl(MSR_IA32_APERF, perf);
> > > +	vcpu->arch.v_aperf += perf - vcpu->arch.host_aperf;
> 
> This requires four RDMSRs per VMX transition.  Doing that unconditionally will
> drastically impact performance.  Not to mention that reading the MSRs
> without checking for host support will generate #GPs and WARNs on hardware
> without APERFMPERF.
> 
> Assuming we're going forward with this, at an absolute minimum the RDMSRs
> need to be wrapped with checks on host _and_ guest support for the emulated
> behavior.  Given the significant overhead, this might even be something that
> should require an extra opt-in from userspace to enable.
> 

will do as your suggestion 

> > > +
> > >  	if (vmx_pt_mode_is_system())
> > >  		return;
> > >
> > > @@ -1914,6 +1928,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu,
> struct msr_data *msr_info)
> > >  		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
> > >  			return 1;
> > >  		goto find_shared_msr;
> > > +	case MSR_IA32_MPERF:
> > > +		msr_info->data = vcpu->arch.v_mperf;
> > > +		break;
> > > +	case MSR_IA32_APERF:
> > > +		msr_info->data = vcpu->arch.v_aperf;
> > > +		break;
> > >  	default:
> > >  	find_shared_msr:
> > >  		msr = find_msr_entry(vmx, msr_info->index);
> >
> > But then here you only emulate it for VMX, which then results in SVM
> > guests going wobbly.
> 
> Ya.

I will make this code suitable for AMD and Intel cpu

> 
> > Also, on Intel, the moment you advertise APERFMPERF, we'll try and
> > read MSR_PLATFORM_INFO / MSR_TURBO_RATIO_LIMIT*, I don't suppose
> > you're passing those through as well?
> 
> AFAICT, the proposed patch isn't fully advertising APERFMPERF, it's advertising
> Turbo Boost / Dynamic Acceleration to the guest when APERFMPERF can be
> used by the host to emulate IDA.  The transliteration of the above code to be
> VMX-only is:

Do you means when we forward APERFMPERF to guest , guest has similar IDA capability,
 and not need to consider MSR_PLATFORM_INFO / MSR_TURBO_RATIO_LIMIT* ?

-Li
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index
> 766303b31949..7e459b66b06e 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -7191,6 +7191,9 @@ static __init void vmx_set_cpu_caps(void)
>         if (nested)
>                 kvm_cpu_cap_set(X86_FEATURE_VMX);
> 
> +       if (boot_cpu_has(X86_FEATURE_APERFMPERF))
> +               kvm_cpu_cap_set(X86_FEATURE_IDA);
> +
>         /* CPUID 0x7 */
>         if (kvm_mpx_supported())
>                 kvm_cpu_cap_check_and_set(X86_FEATURE_MPX);
> 
> I have no clue as to whether that's sane/correct.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* 答复: [PATCH] [RFC] kvm: x86: emulate APERF/MPERF registers
  2020-04-24 16:30       ` Paolo Bonzini
@ 2020-04-26  3:24         ` Li,Rongqing
  2020-04-27 17:30           ` Jim Mattson
  0 siblings, 1 reply; 10+ messages in thread
From: Li,Rongqing @ 2020-04-26  3:24 UTC (permalink / raw)
  To: Paolo Bonzini, Jim Mattson, Sean Christopherson
  Cc: Peter Zijlstra, LKML, kvm list, the arch/x86 maintainers,
	H . Peter Anvin, Borislav Petkov, Ingo Molnar, Thomas Gleixner,
	Joerg Roedel, Wanpeng Li, Vitaly Kuznetsov



> -----邮件原件-----
> 发件人: Paolo Bonzini [mailto:pbonzini@redhat.com]
> 发送时间: 2020年4月25日 0:30
> 收件人: Jim Mattson <jmattson@google.com>; Sean Christopherson
> <sean.j.christopherson@intel.com>
> 抄送: Peter Zijlstra <peterz@infradead.org>; Li,Rongqing
> <lirongqing@baidu.com>; LKML <linux-kernel@vger.kernel.org>; kvm list
> <kvm@vger.kernel.org>; the arch/x86 maintainers <x86@kernel.org>; H .
> Peter Anvin <hpa@zytor.com>; Borislav Petkov <bp@alien8.de>; Ingo Molnar
> <mingo@redhat.com>; Thomas Gleixner <tglx@linutronix.de>; Joerg Roedel
> <joro@8bytes.org>; Wanpeng Li <wanpengli@tencent.com>; Vitaly Kuznetsov
> <vkuznets@redhat.com>
> 主题: Re: [PATCH] [RFC] kvm: x86: emulate APERF/MPERF registers
> 
> On 24/04/20 18:25, Jim Mattson wrote:
> >> Assuming we're going forward with this, at an absolute minimum the
> >> RDMSRs need to be wrapped with checks on host _and_ guest support for
> >> the emulated behavior.  Given the significant overhead, this might
> >> even be something that should require an extra opt-in from userspace to
> enable.
> >
> > I would like to see performance data before enabling this unconditionally.
> 
> I wouldn't want this to be enabled unconditionally anyway, because you need to
> take into account live migration to and from processors that do not have
> APERF/MPERF support.
> 
> Paolo

I will add a kvm parameter to consider whether enable MPERF/APERF emulations, and make default value to false

Thanks

-Li


^ permalink raw reply	[flat|nested] 10+ messages in thread

* 答复: [PATCH] [RFC] kvm: x86: emulate APERF/MPERF registers
  2020-04-24 10:01 ` Peter Zijlstra
  2020-04-24 14:46   ` Sean Christopherson
@ 2020-04-26  8:30   ` Li,Rongqing
  1 sibling, 0 replies; 10+ messages in thread
From: Li,Rongqing @ 2020-04-26  8:30 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, kvm, x86, hpa, bp, mingo, tglx, joro, jmattson,
	wanpengli, vkuznets, sean.j.christopherson, pbonzini

> 
> But then here you only emulate it for VMX, which then results in SVM guests
> going wobbly.
> 
> Also, on Intel, the moment you advertise APERFMPERF, we'll try and read
> MSR_PLATFORM_INFO / MSR_TURBO_RATIO_LIMIT*, I don't suppose you're
> passing those through as well?
> 

init_freq_invariance(void) is trying read MSR_PLATFORM_INFO / MSR_TURBO_RATIO_LIMIT*,
should we add a check of turbo status in init_freq_invariance, to avoid the reading?

It is unnecessary to call intel_set_max_freq_ratio  If turbo is disabled

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fe3ab9632f3b..54fb88323293 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -2009,6 +2009,9 @@ static void init_freq_invariance(void)
        if (smp_processor_id() != 0 || !boot_cpu_has(X86_FEATURE_APERFMPERF))
                return;
 
+       if (turbo_disabled())
+               return;
+
        if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
                ret = intel_set_max_freq_ratio();

-Li

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] [RFC] kvm: x86: emulate APERF/MPERF registers
  2020-04-26  3:24         ` 答复: " Li,Rongqing
@ 2020-04-27 17:30           ` Jim Mattson
  2020-04-27 19:10             ` Paolo Bonzini
  0 siblings, 1 reply; 10+ messages in thread
From: Jim Mattson @ 2020-04-27 17:30 UTC (permalink / raw)
  To: Li,Rongqing
  Cc: Paolo Bonzini, Sean Christopherson, Peter Zijlstra, LKML,
	kvm list, the arch/x86 maintainers, H . Peter Anvin,
	Borislav Petkov, Ingo Molnar, Thomas Gleixner, Joerg Roedel,
	Wanpeng Li, Vitaly Kuznetsov

On Sat, Apr 25, 2020 at 8:24 PM Li,Rongqing <lirongqing@baidu.com> wrote:
>
>
>
> > -----邮件原件-----
> > 发件人: Paolo Bonzini [mailto:pbonzini@redhat.com]
> > 发送时间: 2020年4月25日 0:30
> > 收件人: Jim Mattson <jmattson@google.com>; Sean Christopherson
> > <sean.j.christopherson@intel.com>
> > 抄送: Peter Zijlstra <peterz@infradead.org>; Li,Rongqing
> > <lirongqing@baidu.com>; LKML <linux-kernel@vger.kernel.org>; kvm list
> > <kvm@vger.kernel.org>; the arch/x86 maintainers <x86@kernel.org>; H .
> > Peter Anvin <hpa@zytor.com>; Borislav Petkov <bp@alien8.de>; Ingo Molnar
> > <mingo@redhat.com>; Thomas Gleixner <tglx@linutronix.de>; Joerg Roedel
> > <joro@8bytes.org>; Wanpeng Li <wanpengli@tencent.com>; Vitaly Kuznetsov
> > <vkuznets@redhat.com>
> > 主题: Re: [PATCH] [RFC] kvm: x86: emulate APERF/MPERF registers
> >
> > On 24/04/20 18:25, Jim Mattson wrote:
> > >> Assuming we're going forward with this, at an absolute minimum the
> > >> RDMSRs need to be wrapped with checks on host _and_ guest support for
> > >> the emulated behavior.  Given the significant overhead, this might
> > >> even be something that should require an extra opt-in from userspace to
> > enable.
> > >
> > > I would like to see performance data before enabling this unconditionally.
> >
> > I wouldn't want this to be enabled unconditionally anyway, because you need to
> > take into account live migration to and from processors that do not have
> > APERF/MPERF support.
> >
> > Paolo
>
> I will add a kvm parameter to consider whether enable MPERF/APERF emulations, and make default value to false

Wouldn't it be better to add a per-VM capability to enable this feature?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] [RFC] kvm: x86: emulate APERF/MPERF registers
  2020-04-27 17:30           ` Jim Mattson
@ 2020-04-27 19:10             ` Paolo Bonzini
  0 siblings, 0 replies; 10+ messages in thread
From: Paolo Bonzini @ 2020-04-27 19:10 UTC (permalink / raw)
  To: Jim Mattson, Li,Rongqing
  Cc: Sean Christopherson, Peter Zijlstra, LKML, kvm list,
	the arch/x86 maintainers, H . Peter Anvin, Borislav Petkov,
	Ingo Molnar, Thomas Gleixner, Joerg Roedel, Wanpeng Li,
	Vitaly Kuznetsov

On 27/04/20 19:30, Jim Mattson wrote:
>>>> I would like to see performance data before enabling this
>>>> unconditionally.
>>> I wouldn't want this to be enabled unconditionally anyway,
>>> because you need to take into account live migration to and from
>>> processors that do not have APERF/MPERF support.
>>> 
>>> Paolo
>> I will add a kvm parameter to consider whether enable MPERF/APERF
>> emulations, and make default value to false
>
> Wouldn't it be better to add a per-VM capability to enable this
> feature?

Yes, you it would be better to use KVM_ENABLE_CAP indeed.

Paolo


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-04-27 19:10 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-24  5:08 [PATCH] [RFC] kvm: x86: emulate APERF/MPERF registers Li RongQing
2020-04-24 10:01 ` Peter Zijlstra
2020-04-24 14:46   ` Sean Christopherson
2020-04-24 16:25     ` Jim Mattson
2020-04-24 16:30       ` Paolo Bonzini
2020-04-26  3:24         ` 答复: " Li,Rongqing
2020-04-27 17:30           ` Jim Mattson
2020-04-27 19:10             ` Paolo Bonzini
2020-04-26  3:22     ` 答复: " Li,Rongqing
2020-04-26  8:30   ` Li,Rongqing

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).