[v3,6/9] KVM: vmx/pmu: Add MSR_ARCH_LBR_CTL emulation for Arch LBR
diff mbox series

Message ID 20210303135756.1546253-7-like.xu@linux.intel.com
State New, archived
Headers show
Series
  • KVM: x86/pmu: Guest Architectural LBR Enabling
Related show

Commit Message

Like Xu March 3, 2021, 1:57 p.m. UTC
Arch LBRs are enabled by setting MSR_ARCH_LBR_CTL.LBREn to 1. A new guest
state field named "Guest IA32_LBR_CTL" is added to enhance guest LBR usage.
When guest Arch LBR is enabled, a guest LBR event will be created like the
model-specific LBR does.

On processors that support Arch LBR, MSR_IA32_DEBUGCTLMSR[bit 0] has no
meaning. It can be written to 0 or 1, but reads will always return 0. On
the vmx_vcpu_reset(), the IA32_LBR_CTL will be cleared to 0.

Signed-off-by: Like Xu <like.xu@linux.intel.com>
---
 arch/x86/include/asm/vmx.h   |  2 ++
 arch/x86/kvm/vmx/pmu_intel.c | 27 ++++++++++++++++++++++-----
 arch/x86/kvm/vmx/vmx.c       |  9 +++++++++
 3 files changed, 33 insertions(+), 5 deletions(-)

Comments

Sean Christopherson March 3, 2021, 5:19 p.m. UTC | #1
On Wed, Mar 03, 2021, Like Xu wrote:
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index 25d620685ae7..d14a14eb712d 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -19,6 +19,7 @@
>  #include "pmu.h"
>  
>  #define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
> +#define KVM_ARCH_LBR_CTL_MASK			0x7f000f

It would nice to build this up with the individual bits instead of tossing in a
magic number. 

>  static struct kvm_event_hw_type_mapping intel_arch_events[] = {
>  	/* Index must match CPUID 0x0A.EBX bit vector */
> @@ -221,6 +222,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
>  		ret = pmu->version > 1;
>  		break;
>  	case MSR_ARCH_LBR_DEPTH:
> +	case MSR_ARCH_LBR_CTL:
>  		ret = guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR);
>  		break;
>  	default:
> @@ -390,6 +392,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  	case MSR_ARCH_LBR_DEPTH:
>  		msr_info->data = lbr_desc->records.nr;
>  		return 0;
> +	case MSR_ARCH_LBR_CTL:
> +		msr_info->data = vmcs_read64(GUEST_IA32_LBR_CTL);
> +		return 0;
>  	default:
>  		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>  		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
> @@ -457,6 +462,15 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		lbr_desc->records.nr = data;
>  		lbr_desc->arch_lbr_reset = true;
>  		return 0;
> +	case MSR_ARCH_LBR_CTL:
> +		if (!(data & ~KVM_ARCH_LBR_CTL_MASK)) {

Maybe invert this to reduce indentation?

		if (data & ...)
			break; (or "return 1;")


> +			vmcs_write64(GUEST_IA32_LBR_CTL, data);
> +			if (intel_pmu_lbr_is_enabled(vcpu) && !lbr_desc->event &&
> +				(data & ARCH_LBR_CTL_LBREN))

Alignment.

> +				intel_pmu_create_guest_lbr_event(vcpu);
> +			return 0;
> +		}
> +		break;
>  	default:
>  		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>  		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
> @@ -635,12 +649,15 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
>   */
>  static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu)
>  {
> -	u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL);
> +	u32 lbr_ctl_field = GUEST_IA32_DEBUGCTL;
>  
> -	if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
> -		data &= ~DEBUGCTLMSR_LBR;
> -		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
> -	}
> +	if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI))
> +		return;
> +
> +	if (guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR))
> +		lbr_ctl_field = GUEST_IA32_LBR_CTL;
> +
> +	vmcs_write64(lbr_ctl_field, vmcs_read64(lbr_ctl_field) & ~BIT(0));

Use ARCH_LBR_CTL_LBREN?

>  }
>  
>  static void intel_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 6d7e760fdfa0..a0660b9934c6 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2036,6 +2036,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  						VM_EXIT_SAVE_DEBUG_CONTROLS)
>  			get_vmcs12(vcpu)->guest_ia32_debugctl = data;
>  
> +		/*
> +		 * For Arch LBR, IA32_DEBUGCTL[bit 0] has no meaning.
> +		 * It can be written to 0 or 1, but reads will always return 0.
> +		 */
> +		if (guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR))
> +			data &= ~DEBUGCTLMSR_LBR;
> +
>  		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
>  		if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
>  		    (data & DEBUGCTLMSR_LBR))
> @@ -4463,6 +4470,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>  		vmcs_writel(GUEST_SYSENTER_ESP, 0);
>  		vmcs_writel(GUEST_SYSENTER_EIP, 0);
>  		vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
> +		if (cpu_has_vmx_arch_lbr())
> +			vmcs_write64(GUEST_IA32_LBR_CTL, 0);

Not that any guest is likely to care, but is the MSR cleared on INIT?  The SDM
has specific language for warm reset, but I can't find anything for INIT.

  On a warm reset, all LBR MSRs, including IA32_LBR_DEPTH, have their values
  preserved. However, IA32_LBR_CTL.LBREn is cleared to 0, disabling LBRs. If a
  warm reset is triggered while the processor is in C6, also known as warm init,
  all LBR MSRs will be reset to their initial values.

>  	}
>  
>  	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
> -- 
> 2.29.2
>
Xu, Like March 4, 2021, 2:58 a.m. UTC | #2
On 2021/3/4 1:19, Sean Christopherson wrote:
> On Wed, Mar 03, 2021, Like Xu wrote:
>> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
>> index 25d620685ae7..d14a14eb712d 100644
>> --- a/arch/x86/kvm/vmx/pmu_intel.c
>> +++ b/arch/x86/kvm/vmx/pmu_intel.c
>> @@ -19,6 +19,7 @@
>>   #include "pmu.h"
>>   
>>   #define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
>> +#define KVM_ARCH_LBR_CTL_MASK			0x7f000f
> It would nice to build this up with the individual bits instead of tossing in a
> magic number.

I will move the mask ARCH_LBR_CTL_MASK from lbr.c to msr-index.h and thus,
#define KVM_ARCH_LBR_CTL_MASK  (ARCH_LBR_CTL_MASK | ARCH_LBR_CTL_LBREN)

I assume the move operation would be a separate patch for host side.

>
>>   static struct kvm_event_hw_type_mapping intel_arch_events[] = {
>>   	/* Index must match CPUID 0x0A.EBX bit vector */
>> @@ -221,6 +222,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
>>   		ret = pmu->version > 1;
>>   		break;
>>   	case MSR_ARCH_LBR_DEPTH:
>> +	case MSR_ARCH_LBR_CTL:
>>   		ret = guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR);
>>   		break;
>>   	default:
>> @@ -390,6 +392,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>   	case MSR_ARCH_LBR_DEPTH:
>>   		msr_info->data = lbr_desc->records.nr;
>>   		return 0;
>> +	case MSR_ARCH_LBR_CTL:
>> +		msr_info->data = vmcs_read64(GUEST_IA32_LBR_CTL);
>> +		return 0;
>>   	default:
>>   		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>>   		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
>> @@ -457,6 +462,15 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>   		lbr_desc->records.nr = data;
>>   		lbr_desc->arch_lbr_reset = true;
>>   		return 0;
>> +	case MSR_ARCH_LBR_CTL:
>> +		if (!(data & ~KVM_ARCH_LBR_CTL_MASK)) {
> Maybe invert this to reduce indentation?
>
> 		if (data & ...)
> 			break; (or "return 1;")

Fine to me.

>
>
>> +			vmcs_write64(GUEST_IA32_LBR_CTL, data);
>> +			if (intel_pmu_lbr_is_enabled(vcpu) && !lbr_desc->event &&
>> +				(data & ARCH_LBR_CTL_LBREN))
> Alignment.

I'll fix it.

>
>> +				intel_pmu_create_guest_lbr_event(vcpu);
>> +			return 0;
>> +		}
>> +		break;
>>   	default:
>>   		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>>   		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
>> @@ -635,12 +649,15 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
>>    */
>>   static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu)
>>   {
>> -	u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL);
>> +	u32 lbr_ctl_field = GUEST_IA32_DEBUGCTL;
>>   
>> -	if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
>> -		data &= ~DEBUGCTLMSR_LBR;
>> -		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
>> -	}
>> +	if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI))
>> +		return;
>> +
>> +	if (guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR))
>> +		lbr_ctl_field = GUEST_IA32_LBR_CTL;
>> +
>> +	vmcs_write64(lbr_ctl_field, vmcs_read64(lbr_ctl_field) & ~BIT(0));
> Use ARCH_LBR_CTL_LBREN?

I'm trying to unify the usage of EN bit for both Arch LBR and legacy LBR:

#define ARCH_LBR_CTL_LBREN        BIT(0)
#define DEBUGCTLMSR_LBR            (1UL <<  0)

Any suggestion ?

>
>>   }
>>   
>>   static void intel_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> index 6d7e760fdfa0..a0660b9934c6 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -2036,6 +2036,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>   						VM_EXIT_SAVE_DEBUG_CONTROLS)
>>   			get_vmcs12(vcpu)->guest_ia32_debugctl = data;
>>   
>> +		/*
>> +		 * For Arch LBR, IA32_DEBUGCTL[bit 0] has no meaning.
>> +		 * It can be written to 0 or 1, but reads will always return 0.
>> +		 */
>> +		if (guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR))
>> +			data &= ~DEBUGCTLMSR_LBR;
>> +
>>   		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
>>   		if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
>>   		    (data & DEBUGCTLMSR_LBR))
>> @@ -4463,6 +4470,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>>   		vmcs_writel(GUEST_SYSENTER_ESP, 0);
>>   		vmcs_writel(GUEST_SYSENTER_EIP, 0);
>>   		vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
>> +		if (cpu_has_vmx_arch_lbr())
>> +			vmcs_write64(GUEST_IA32_LBR_CTL, 0);
> Not that any guest is likely to care, but is the MSR cleared on INIT?  The SDM
> has specific language for warm reset, but I can't find anything for INIT.
>
>    On a warm reset, all LBR MSRs, including IA32_LBR_DEPTH, have their values
>    preserved. However, IA32_LBR_CTL.LBREn is cleared to 0, disabling LBRs. If a
>    warm reset is triggered while the processor is in C6, also known as warm init,
>    all LBR MSRs will be reset to their initial values.

I was told that the reset behavior of GUEST_IA32_LBR_CTL
would be the same as the GUEST_IA32_DEBUGCTL (true for INIT as well).

It looks we have not strictly distinguished the guest's power concept C*.
Do we have two trap paths for "warm reset" and "warm init" ?

>
>>   	}
>>   
>>   	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
>> -- 
>> 2.29.2
>>
Sean Christopherson March 4, 2021, 4:25 p.m. UTC | #3
On Thu, Mar 04, 2021, Xu, Like wrote:
> On 2021/3/4 1:19, Sean Christopherson wrote:
> > > @@ -4463,6 +4470,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> > >   		vmcs_writel(GUEST_SYSENTER_ESP, 0);
> > >   		vmcs_writel(GUEST_SYSENTER_EIP, 0);
> > >   		vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
> > > +		if (cpu_has_vmx_arch_lbr())
> > > +			vmcs_write64(GUEST_IA32_LBR_CTL, 0);
> > Not that any guest is likely to care, but is the MSR cleared on INIT?  The SDM
> > has specific language for warm reset, but I can't find anything for INIT.
> > 
> >    On a warm reset, all LBR MSRs, including IA32_LBR_DEPTH, have their values
> >    preserved. However, IA32_LBR_CTL.LBREn is cleared to 0, disabling LBRs. If a
> >    warm reset is triggered while the processor is in C6, also known as warm init,
> >    all LBR MSRs will be reset to their initial values.
> 
> I was told that the reset behavior of GUEST_IA32_LBR_CTL
> would be the same as the GUEST_IA32_DEBUGCTL (true for INIT as well).

Yes, and DEBUGCTL is preserved on INIT.

	if (!init_event) {
		vmcs_write32(GUEST_SYSENTER_CS, 0);
		vmcs_writel(GUEST_SYSENTER_ESP, 0);
		vmcs_writel(GUEST_SYSENTER_EIP, 0);
		vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
	}

Table 22-10 in the SDM:

	All Other MSRs | Pwr up or Reset: | INIT:
			 Undefined          Unchanged


If IA32_LBR_DEPTH is weirdly exempt, it needs to be documented.  I doubt that's
the case though.

> It looks we have not strictly distinguished the guest's power concept C*.
> Do we have two trap paths for "warm reset" and "warm init" ?

No.  Despite the name .vcpu_reset, KVM doesn't even have a RESET path, userspace
is responsible for modelling RESET.

Patch
diff mbox series

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 358707f60d99..8ec7bc24b37a 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -245,6 +245,8 @@  enum vmcs_field {
 	GUEST_BNDCFGS_HIGH              = 0x00002813,
 	GUEST_IA32_RTIT_CTL		= 0x00002814,
 	GUEST_IA32_RTIT_CTL_HIGH	= 0x00002815,
+	GUEST_IA32_LBR_CTL		= 0x00002816,
+	GUEST_IA32_LBR_CTL_HIGH	= 0x00002817,
 	HOST_IA32_PAT			= 0x00002c00,
 	HOST_IA32_PAT_HIGH		= 0x00002c01,
 	HOST_IA32_EFER			= 0x00002c02,
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 25d620685ae7..d14a14eb712d 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -19,6 +19,7 @@ 
 #include "pmu.h"
 
 #define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
+#define KVM_ARCH_LBR_CTL_MASK			0x7f000f
 
 static struct kvm_event_hw_type_mapping intel_arch_events[] = {
 	/* Index must match CPUID 0x0A.EBX bit vector */
@@ -221,6 +222,7 @@  static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
 		ret = pmu->version > 1;
 		break;
 	case MSR_ARCH_LBR_DEPTH:
+	case MSR_ARCH_LBR_CTL:
 		ret = guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR);
 		break;
 	default:
@@ -390,6 +392,9 @@  static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_ARCH_LBR_DEPTH:
 		msr_info->data = lbr_desc->records.nr;
 		return 0;
+	case MSR_ARCH_LBR_CTL:
+		msr_info->data = vmcs_read64(GUEST_IA32_LBR_CTL);
+		return 0;
 	default:
 		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
 		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
@@ -457,6 +462,15 @@  static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		lbr_desc->records.nr = data;
 		lbr_desc->arch_lbr_reset = true;
 		return 0;
+	case MSR_ARCH_LBR_CTL:
+		if (!(data & ~KVM_ARCH_LBR_CTL_MASK)) {
+			vmcs_write64(GUEST_IA32_LBR_CTL, data);
+			if (intel_pmu_lbr_is_enabled(vcpu) && !lbr_desc->event &&
+				(data & ARCH_LBR_CTL_LBREN))
+				intel_pmu_create_guest_lbr_event(vcpu);
+			return 0;
+		}
+		break;
 	default:
 		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
 		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
@@ -635,12 +649,15 @@  static void intel_pmu_reset(struct kvm_vcpu *vcpu)
  */
 static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu)
 {
-	u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL);
+	u32 lbr_ctl_field = GUEST_IA32_DEBUGCTL;
 
-	if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
-		data &= ~DEBUGCTLMSR_LBR;
-		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
-	}
+	if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI))
+		return;
+
+	if (guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR))
+		lbr_ctl_field = GUEST_IA32_LBR_CTL;
+
+	vmcs_write64(lbr_ctl_field, vmcs_read64(lbr_ctl_field) & ~BIT(0));
 }
 
 static void intel_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6d7e760fdfa0..a0660b9934c6 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2036,6 +2036,13 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 						VM_EXIT_SAVE_DEBUG_CONTROLS)
 			get_vmcs12(vcpu)->guest_ia32_debugctl = data;
 
+		/*
+		 * For Arch LBR, IA32_DEBUGCTL[bit 0] has no meaning.
+		 * It can be written to 0 or 1, but reads will always return 0.
+		 */
+		if (guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR))
+			data &= ~DEBUGCTLMSR_LBR;
+
 		vmcs_write64(GUEST_IA32_DEBUGCTL, data);
 		if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
 		    (data & DEBUGCTLMSR_LBR))
@@ -4463,6 +4470,8 @@  static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 		vmcs_writel(GUEST_SYSENTER_ESP, 0);
 		vmcs_writel(GUEST_SYSENTER_EIP, 0);
 		vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+		if (cpu_has_vmx_arch_lbr())
+			vmcs_write64(GUEST_IA32_LBR_CTL, 0);
 	}
 
 	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);