linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/3] KVM: VMX: Some refactor of VMX vmcs and msr bitmap
@ 2019-10-18  9:37 Xiaoyao Li
  2019-10-18  9:37 ` [PATCH v2 1/3] KVM: VMX: Move vmcs related resetting out of vmx_vcpu_reset() Xiaoyao Li
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Xiaoyao Li @ 2019-10-18  9:37 UTC (permalink / raw)
  To: Paolo Bonzini, Radim Krčmář,
	Sean Christopherson, Vitaly Kuznetsov, Jim Mattson, Joerg Roedel
  Cc: Xiaoyao Li, kvm, linux-kernel

Remove the vcpu creation refactor and FPU allocation cleanup from v1, since
I need more time to invest Sean's suggestion.

This series add one patch to move the vmcs reset from vcpu_reset, based on
Krish's suggestion.

Xiaoyao Li (3):
  KVM: VMX: Move vmcs related resetting out of vmx_vcpu_reset()
  KVM: VMX: Rename {vmx,nested_vmx}_vcpu_setup() and minor cleanup
  KVM: VMX: Some minor refactor of MSR bitmap

 arch/x86/kvm/vmx/nested.c |   2 +-
 arch/x86/kvm/vmx/nested.h |   2 +-
 arch/x86/kvm/vmx/vmx.c    | 173 ++++++++++++++++++++------------------
 3 files changed, 91 insertions(+), 86 deletions(-)

-- 
2.19.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v2 1/3] KVM: VMX: Move vmcs related resetting out of vmx_vcpu_reset()
  2019-10-18  9:37 [PATCH v2 0/3] KVM: VMX: Some refactor of VMX vmcs and msr bitmap Xiaoyao Li
@ 2019-10-18  9:37 ` Xiaoyao Li
  2019-10-18 16:57   ` Sean Christopherson
  2019-10-18  9:37 ` [PATCH v2 2/3] KVM: VMX: Rename {vmx,nested_vmx}_vcpu_setup() and minor cleanup Xiaoyao Li
  2019-10-18  9:37 ` [PATCH v2 3/3] KVM: VMX: Some minor refactor of MSR bitmap Xiaoyao Li
  2 siblings, 1 reply; 12+ messages in thread
From: Xiaoyao Li @ 2019-10-18  9:37 UTC (permalink / raw)
  To: Paolo Bonzini, Radim Krčmář,
	Sean Christopherson, Vitaly Kuznetsov, Jim Mattson, Joerg Roedel
  Cc: Xiaoyao Li, kvm, linux-kernel

Move vmcs related codes into a new function vmx_vmcs_reset() from
vmx_vcpu_reset(). So that it's more clearer which data is related with
vmcs and can be held in vmcs.

Suggested-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
---
 arch/x86/kvm/vmx/vmx.c | 65 ++++++++++++++++++++++++------------------
 1 file changed, 37 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e660e28e9ae0..ef567df344bf 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4271,33 +4271,11 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	}
 }
 
-static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
+static void vmx_vmcs_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	struct msr_data apic_base_msr;
 	u64 cr0;
 
-	vmx->rmode.vm86_active = 0;
-	vmx->spec_ctrl = 0;
-
-	vmx->msr_ia32_umwait_control = 0;
-
-	vcpu->arch.microcode_version = 0x100000000ULL;
-	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
-	vmx->hv_deadline_tsc = -1;
-	kvm_set_cr8(vcpu, 0);
-
-	if (!init_event) {
-		apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
-				     MSR_IA32_APICBASE_ENABLE;
-		if (kvm_vcpu_is_reset_bsp(vcpu))
-			apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
-		apic_base_msr.host_initiated = true;
-		kvm_set_apic_base(vcpu, &apic_base_msr);
-	}
-
-	vmx_segment_cache_clear(vmx);
-
 	seg_setup(VCPU_SREG_CS);
 	vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
 	vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
@@ -4340,8 +4318,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	if (kvm_mpx_supported())
 		vmcs_write64(GUEST_BNDCFGS, 0);
 
-	setup_msrs(vmx);
-
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);  /* 22.2.1 */
 
 	if (cpu_has_vmx_tpr_shadow() && !init_event) {
@@ -4357,19 +4333,52 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	if (vmx->vpid != 0)
 		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
 
+	vpid_sync_context(vmx->vpid);
+
 	cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
-	vmx->vcpu.arch.cr0 = cr0;
+	vcpu->arch.cr0 = cr0;
 	vmx_set_cr0(vcpu, cr0); /* enter rmode */
 	vmx_set_cr4(vcpu, 0);
-	vmx_set_efer(vcpu, 0);
 
 	update_exception_bitmap(vcpu);
 
-	vpid_sync_context(vmx->vpid);
 	if (init_event)
 		vmx_clear_hlt(vcpu);
 }
 
+static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct msr_data apic_base_msr;
+
+	vmx->rmode.vm86_active = 0;
+	vmx->spec_ctrl = 0;
+
+	vmx->msr_ia32_umwait_control = 0;
+
+	vcpu->arch.microcode_version = 0x100000000ULL;
+	kvm_rdx_write(vcpu, get_rdx_init_val());
+	vmx->hv_deadline_tsc = -1;
+	kvm_set_cr8(vcpu, 0);
+
+	if (!init_event) {
+		apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
+				     MSR_IA32_APICBASE_ENABLE;
+		if (kvm_vcpu_is_reset_bsp(vcpu))
+			apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
+		apic_base_msr.host_initiated = true;
+		kvm_set_apic_base(vcpu, &apic_base_msr);
+	}
+
+	vmx_segment_cache_clear(vmx);
+
+	setup_msrs(vmx);
+
+	vmx_set_efer(vcpu, 0);
+
+	vmx_vmcs_reset(vcpu, init_event);
+}
+
 static void enable_irq_window(struct kvm_vcpu *vcpu)
 {
 	exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING);
-- 
2.19.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2 2/3] KVM: VMX: Rename {vmx,nested_vmx}_vcpu_setup() and minor cleanup
  2019-10-18  9:37 [PATCH v2 0/3] KVM: VMX: Some refactor of VMX vmcs and msr bitmap Xiaoyao Li
  2019-10-18  9:37 ` [PATCH v2 1/3] KVM: VMX: Move vmcs related resetting out of vmx_vcpu_reset() Xiaoyao Li
@ 2019-10-18  9:37 ` Xiaoyao Li
  2019-10-18 17:09   ` Sean Christopherson
  2019-10-18  9:37 ` [PATCH v2 3/3] KVM: VMX: Some minor refactor of MSR bitmap Xiaoyao Li
  2 siblings, 1 reply; 12+ messages in thread
From: Xiaoyao Li @ 2019-10-18  9:37 UTC (permalink / raw)
  To: Paolo Bonzini, Radim Krčmář,
	Sean Christopherson, Vitaly Kuznetsov, Jim Mattson, Joerg Roedel
  Cc: Xiaoyao Li, kvm, linux-kernel

Rename {vmx,nested_vmx}_vcpu_setup() to {vmx,nested_vmx}_vmcs_setup,
to match what they really do.

Aslo remove the vmcs unrelated codes to vmx_vcpu_create().

The initialization of vmx->hv_deadline_tsc can be removed here, because
it will be called in vmx_vcpu_reset() as the flow:

kvm_arch_vcpu_setup()
  -> kvm_vcpu_reset()
       -> vmx_vcpu_reset()

Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
---
Changes in v2:
  - move out the vmcs unrelated codes
---
 arch/x86/kvm/vmx/nested.c |  2 +-
 arch/x86/kvm/vmx/nested.h |  2 +-
 arch/x86/kvm/vmx/vmx.c    | 45 +++++++++++++++++----------------------
 3 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 5e231da00310..7935422d311f 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5768,7 +5768,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
 	return ret;
 }
 
-void nested_vmx_vcpu_setup(void)
+void nested_vmx_vmcs_setup(void)
 {
 	if (enable_shadow_vmcs) {
 		vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 187d39bf0bf1..2be1ba7482c9 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -11,7 +11,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
 				bool apicv);
 void nested_vmx_hardware_unsetup(void);
 __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
-void nested_vmx_vcpu_setup(void);
+void nested_vmx_vmcs_setup(void);
 void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu);
 int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry);
 bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ef567df344bf..b083316a598d 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4161,15 +4161,10 @@ static void ept_set_mmio_spte_mask(void)
 
 #define VMX_XSS_EXIT_BITMAP 0
 
-/*
- * Sets up the vmcs for emulated real mode.
- */
-static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
+static void vmx_vmcs_setup(struct vcpu_vmx *vmx)
 {
-	int i;
-
 	if (nested)
-		nested_vmx_vcpu_setup();
+		nested_vmx_vmcs_setup();
 
 	if (cpu_has_vmx_msr_bitmap())
 		vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
@@ -4178,7 +4173,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
 	/* Control */
 	pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
-	vmx->hv_deadline_tsc = -1;
 
 	exec_controls_set(vmx, vmx_exec_control(vmx));
 
@@ -4227,21 +4221,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
 		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
 
-	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
-		u32 index = vmx_msr_index[i];
-		u32 data_low, data_high;
-		int j = vmx->nmsrs;
-
-		if (rdmsr_safe(index, &data_low, &data_high) < 0)
-			continue;
-		if (wrmsr_safe(index, data_low, data_high) < 0)
-			continue;
-		vmx->guest_msrs[j].index = i;
-		vmx->guest_msrs[j].data = 0;
-		vmx->guest_msrs[j].mask = -1ull;
-		++vmx->nmsrs;
-	}
-
 	vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
 
 	/* 22.2.1, 20.8.1 */
@@ -6710,7 +6689,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	int err;
 	struct vcpu_vmx *vmx;
 	unsigned long *msr_bitmap;
-	int cpu;
+	int i, cpu;
 
 	BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
 		"struct kvm_vcpu must be at offset 0 for arch usercopy region");
@@ -6786,9 +6765,25 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	cpu = get_cpu();
 	vmx_vcpu_load(&vmx->vcpu, cpu);
 	vmx->vcpu.cpu = cpu;
-	vmx_vcpu_setup(vmx);
+	vmx_vmcs_setup(vmx);
 	vmx_vcpu_put(&vmx->vcpu);
 	put_cpu();
+
+	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
+		u32 index = vmx_msr_index[i];
+		u32 data_low, data_high;
+		int j = vmx->nmsrs;
+
+		if (rdmsr_safe(index, &data_low, &data_high) < 0)
+			continue;
+		if (wrmsr_safe(index, data_low, data_high) < 0)
+			continue;
+		vmx->guest_msrs[j].index = i;
+		vmx->guest_msrs[j].data = 0;
+		vmx->guest_msrs[j].mask = -1ull;
+		++vmx->nmsrs;
+	}
+
 	if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
 		err = alloc_apic_access_page(kvm);
 		if (err)
-- 
2.19.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2 3/3] KVM: VMX: Some minor refactor of MSR bitmap
  2019-10-18  9:37 [PATCH v2 0/3] KVM: VMX: Some refactor of VMX vmcs and msr bitmap Xiaoyao Li
  2019-10-18  9:37 ` [PATCH v2 1/3] KVM: VMX: Move vmcs related resetting out of vmx_vcpu_reset() Xiaoyao Li
  2019-10-18  9:37 ` [PATCH v2 2/3] KVM: VMX: Rename {vmx,nested_vmx}_vcpu_setup() and minor cleanup Xiaoyao Li
@ 2019-10-18  9:37 ` Xiaoyao Li
  2019-10-18 17:27   ` Sean Christopherson
  2 siblings, 1 reply; 12+ messages in thread
From: Xiaoyao Li @ 2019-10-18  9:37 UTC (permalink / raw)
  To: Paolo Bonzini, Radim Krčmář,
	Sean Christopherson, Vitaly Kuznetsov, Jim Mattson, Joerg Roedel
  Cc: Xiaoyao Li, kvm, linux-kernel

Move the MSR bitmap capability check from vmx_disable_intercept_for_msr()
and vmx_enable_intercept_for_msr(), so that we can do the check far
early before we really want to touch the bitmap.

Also, we can move the common MSR not-intercept setup to where msr bitmap
is actually used.

Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
---
Changes in v2:
  - Remove the check of cpu_has_vmx_msr_bitmap() from
    vmx_{disable,enable}_intercept_for_msr (Krish)
---
 arch/x86/kvm/vmx/vmx.c | 65 +++++++++++++++++++++---------------------
 1 file changed, 33 insertions(+), 32 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b083316a598d..017689d0144e 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -343,8 +343,8 @@ module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
 
 static bool guest_state_valid(struct kvm_vcpu *vcpu);
 static u32 vmx_segment_access_rights(struct kvm_segment *var);
-static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
-							  u32 msr, int type);
+static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
+		u32 msr, int type, bool value);
 
 void vmx_vmexit(void);
 
@@ -2000,9 +2000,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		 * in the merging. We update the vmcs01 here for L1 as well
 		 * since it will end up touching the MSR anyway now.
 		 */
-		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
-					      MSR_IA32_SPEC_CTRL,
-					      MSR_TYPE_RW);
+		vmx_set_intercept_for_msr(vmx->vmcs01.msr_bitmap,
+					  MSR_IA32_SPEC_CTRL,
+					  MSR_TYPE_RW, false);
 		break;
 	case MSR_IA32_PRED_CMD:
 		if (!msr_info->host_initiated &&
@@ -2028,8 +2028,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		 * vmcs02.msr_bitmap here since it gets completely overwritten
 		 * in the merging.
 		 */
-		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
-					      MSR_TYPE_W);
+		vmx_set_intercept_for_msr(vmx->vmcs01.msr_bitmap,
+					  MSR_IA32_PRED_CMD,
+					  MSR_TYPE_W, false);
 		break;
 	case MSR_IA32_CR_PAT:
 		if (!kvm_pat_valid(data))
@@ -3599,9 +3600,6 @@ static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bit
 {
 	int f = sizeof(unsigned long);
 
-	if (!cpu_has_vmx_msr_bitmap())
-		return;
-
 	if (static_branch_unlikely(&enable_evmcs))
 		evmcs_touch_msr_bitmap();
 
@@ -3637,9 +3635,6 @@ static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitm
 {
 	int f = sizeof(unsigned long);
 
-	if (!cpu_has_vmx_msr_bitmap())
-		return;
-
 	if (static_branch_unlikely(&enable_evmcs))
 		evmcs_touch_msr_bitmap();
 
@@ -3673,6 +3668,9 @@ static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitm
 static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
 			     			      u32 msr, int type, bool value)
 {
+	if (!cpu_has_vmx_msr_bitmap())
+		return;
+
 	if (value)
 		vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
 	else
@@ -4163,11 +4161,30 @@ static void ept_set_mmio_spte_mask(void)
 
 static void vmx_vmcs_setup(struct vcpu_vmx *vmx)
 {
+	unsigned long *msr_bitmap;
+
 	if (nested)
 		nested_vmx_vmcs_setup();
 
-	if (cpu_has_vmx_msr_bitmap())
+	if (cpu_has_vmx_msr_bitmap()) {
+		msr_bitmap = vmx->vmcs01.msr_bitmap;
+		vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R);
+		vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
+		vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
+		vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
+		vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
+		vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
+		vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
+		if (kvm_cstate_in_guest(vmx->vcpu.kvm)) {
+			vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R);
+			vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
+			vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
+			vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
+		}
+
 		vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
+	}
+	vmx->msr_bitmap_mode = 0;
 
 	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
 
@@ -6074,7 +6091,8 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
 	}
 	secondary_exec_controls_set(vmx, sec_exec_control);
 
-	vmx_update_msr_bitmap(vcpu);
+	if (cpu_has_vmx_msr_bitmap())
+		vmx_update_msr_bitmap(vcpu);
 }
 
 static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
@@ -6688,7 +6706,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 {
 	int err;
 	struct vcpu_vmx *vmx;
-	unsigned long *msr_bitmap;
 	int i, cpu;
 
 	BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
@@ -6745,22 +6762,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	if (err < 0)
 		goto free_msrs;
 
-	msr_bitmap = vmx->vmcs01.msr_bitmap;
-	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R);
-	vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
-	vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
-	vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
-	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
-	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
-	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
-	if (kvm_cstate_in_guest(kvm)) {
-		vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R);
-		vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
-		vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
-		vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
-	}
-	vmx->msr_bitmap_mode = 0;
-
 	vmx->loaded_vmcs = &vmx->vmcs01;
 	cpu = get_cpu();
 	vmx_vcpu_load(&vmx->vcpu, cpu);
-- 
2.19.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 1/3] KVM: VMX: Move vmcs related resetting out of vmx_vcpu_reset()
  2019-10-18  9:37 ` [PATCH v2 1/3] KVM: VMX: Move vmcs related resetting out of vmx_vcpu_reset() Xiaoyao Li
@ 2019-10-18 16:57   ` Sean Christopherson
  2019-10-18 18:34     ` Xiaoyao Li
  0 siblings, 1 reply; 12+ messages in thread
From: Sean Christopherson @ 2019-10-18 16:57 UTC (permalink / raw)
  To: Xiaoyao Li
  Cc: Paolo Bonzini, Radim Krčmář,
	Vitaly Kuznetsov, Jim Mattson, Joerg Roedel, kvm, linux-kernel

On Fri, Oct 18, 2019 at 05:37:21PM +0800, Xiaoyao Li wrote:
> Move vmcs related codes into a new function vmx_vmcs_reset() from
> vmx_vcpu_reset(). So that it's more clearer which data is related with
> vmcs and can be held in vmcs.
> 
> Suggested-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> ---
>  arch/x86/kvm/vmx/vmx.c | 65 ++++++++++++++++++++++++------------------
>  1 file changed, 37 insertions(+), 28 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index e660e28e9ae0..ef567df344bf 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -4271,33 +4271,11 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
>  	}
>  }
>  
> -static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> +static void vmx_vmcs_reset(struct kvm_vcpu *vcpu, bool init_event)

I'd strongly prefer to keep the existing code.  For me, "vmcs_reset" means
zeroing out the VMCS, i.e. reset the VMCS to a virgin state.  "vcpu_reset"
means exactly that, stuff vCPU state to emulate RESET/INIT.

And the split is arbitrary and funky, e.g. EFER is integrated into the
VMCS on all recent CPUs, but here it's handled in vcpu_reset.  

>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> -	struct msr_data apic_base_msr;
>  	u64 cr0;
>  
> -	vmx->rmode.vm86_active = 0;
> -	vmx->spec_ctrl = 0;
> -
> -	vmx->msr_ia32_umwait_control = 0;
> -
> -	vcpu->arch.microcode_version = 0x100000000ULL;
> -	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
> -	vmx->hv_deadline_tsc = -1;
> -	kvm_set_cr8(vcpu, 0);
> -
> -	if (!init_event) {
> -		apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
> -				     MSR_IA32_APICBASE_ENABLE;
> -		if (kvm_vcpu_is_reset_bsp(vcpu))
> -			apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
> -		apic_base_msr.host_initiated = true;
> -		kvm_set_apic_base(vcpu, &apic_base_msr);
> -	}
> -
> -	vmx_segment_cache_clear(vmx);
> -
>  	seg_setup(VCPU_SREG_CS);
>  	vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
>  	vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
> @@ -4340,8 +4318,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>  	if (kvm_mpx_supported())
>  		vmcs_write64(GUEST_BNDCFGS, 0);
>  
> -	setup_msrs(vmx);
> -
>  	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);  /* 22.2.1 */
>  
>  	if (cpu_has_vmx_tpr_shadow() && !init_event) {
> @@ -4357,19 +4333,52 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>  	if (vmx->vpid != 0)
>  		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
>  
> +	vpid_sync_context(vmx->vpid);
> +
>  	cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
> -	vmx->vcpu.arch.cr0 = cr0;
> +	vcpu->arch.cr0 = cr0;
>  	vmx_set_cr0(vcpu, cr0); /* enter rmode */
>  	vmx_set_cr4(vcpu, 0);
> -	vmx_set_efer(vcpu, 0);
>  
>  	update_exception_bitmap(vcpu);
>  
> -	vpid_sync_context(vmx->vpid);
>  	if (init_event)
>  		vmx_clear_hlt(vcpu);
>  }
>  
> +static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	struct msr_data apic_base_msr;
> +
> +	vmx->rmode.vm86_active = 0;
> +	vmx->spec_ctrl = 0;
> +
> +	vmx->msr_ia32_umwait_control = 0;
> +
> +	vcpu->arch.microcode_version = 0x100000000ULL;
> +	kvm_rdx_write(vcpu, get_rdx_init_val());
> +	vmx->hv_deadline_tsc = -1;
> +	kvm_set_cr8(vcpu, 0);
> +
> +	if (!init_event) {
> +		apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
> +				     MSR_IA32_APICBASE_ENABLE;
> +		if (kvm_vcpu_is_reset_bsp(vcpu))
> +			apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
> +		apic_base_msr.host_initiated = true;
> +		kvm_set_apic_base(vcpu, &apic_base_msr);
> +	}
> +
> +	vmx_segment_cache_clear(vmx);
> +
> +	setup_msrs(vmx);
> +
> +	vmx_set_efer(vcpu, 0);

Setting EFER before CR0/CR4 is a functional change, and likely wrong, e.g.
vmx_set_cr0() queries EFER_LME to trigger exit_lmode() if INIT/RESET is
received while the vCPU is in long mode.

> +	vmx_vmcs_reset(vcpu, init_event);
> +}
> +
>  static void enable_irq_window(struct kvm_vcpu *vcpu)
>  {
>  	exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING);
> -- 
> 2.19.1
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 2/3] KVM: VMX: Rename {vmx,nested_vmx}_vcpu_setup() and minor cleanup
  2019-10-18  9:37 ` [PATCH v2 2/3] KVM: VMX: Rename {vmx,nested_vmx}_vcpu_setup() and minor cleanup Xiaoyao Li
@ 2019-10-18 17:09   ` Sean Christopherson
  2019-10-18 18:38     ` Xiaoyao Li
  0 siblings, 1 reply; 12+ messages in thread
From: Sean Christopherson @ 2019-10-18 17:09 UTC (permalink / raw)
  To: Xiaoyao Li
  Cc: Paolo Bonzini, Radim Krčmář,
	Vitaly Kuznetsov, Jim Mattson, Joerg Roedel, kvm, linux-kernel

On Fri, Oct 18, 2019 at 05:37:22PM +0800, Xiaoyao Li wrote:
> Rename {vmx,nested_vmx}_vcpu_setup() to {vmx,nested_vmx}_vmcs_setup,
> to match what they really do.
> 
> Aslo remove the vmcs unrelated codes to vmx_vcpu_create().

Do this in a separate patch, just in case there is a dependencies we're
missing.

> The initialization of vmx->hv_deadline_tsc can be removed here, because
> it will be called in vmx_vcpu_reset() as the flow:
> 
> kvm_arch_vcpu_setup()
>   -> kvm_vcpu_reset()
>        -> vmx_vcpu_reset()

Definitely needs to be in a separate patch.

> 
> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> ---
> Changes in v2:
>   - move out the vmcs unrelated codes
> ---
>  arch/x86/kvm/vmx/nested.c |  2 +-
>  arch/x86/kvm/vmx/nested.h |  2 +-
>  arch/x86/kvm/vmx/vmx.c    | 45 +++++++++++++++++----------------------
>  3 files changed, 22 insertions(+), 27 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 5e231da00310..7935422d311f 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -5768,7 +5768,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
>  	return ret;
>  }
>  
> -void nested_vmx_vcpu_setup(void)
> +void nested_vmx_vmcs_setup(void)

"vmcs_setup" sounds like we're allocating and loading a VMCS.  Maybe
{nested_,}vmx_set_initial_vmcs_state() a la vmx_set_constant_host_state()?

>  {
>  	if (enable_shadow_vmcs) {
>  		vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
> diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
> index 187d39bf0bf1..2be1ba7482c9 100644
> --- a/arch/x86/kvm/vmx/nested.h
> +++ b/arch/x86/kvm/vmx/nested.h
> @@ -11,7 +11,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
>  				bool apicv);
>  void nested_vmx_hardware_unsetup(void);
>  __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
> -void nested_vmx_vcpu_setup(void);
> +void nested_vmx_vmcs_setup(void);
>  void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu);
>  int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry);
>  bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason);
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index ef567df344bf..b083316a598d 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -4161,15 +4161,10 @@ static void ept_set_mmio_spte_mask(void)
>  
>  #define VMX_XSS_EXIT_BITMAP 0
>  
> -/*
> - * Sets up the vmcs for emulated real mode.
> - */
> -static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
> +static void vmx_vmcs_setup(struct vcpu_vmx *vmx)
>  {
> -	int i;
> -
>  	if (nested)
> -		nested_vmx_vcpu_setup();
> +		nested_vmx_vmcs_setup();
>  
>  	if (cpu_has_vmx_msr_bitmap())
>  		vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
> @@ -4178,7 +4173,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
>  
>  	/* Control */
>  	pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
> -	vmx->hv_deadline_tsc = -1;
>  
>  	exec_controls_set(vmx, vmx_exec_control(vmx));
>  
> @@ -4227,21 +4221,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
>  	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
>  		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
>  
> -	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
> -		u32 index = vmx_msr_index[i];
> -		u32 data_low, data_high;
> -		int j = vmx->nmsrs;
> -
> -		if (rdmsr_safe(index, &data_low, &data_high) < 0)
> -			continue;
> -		if (wrmsr_safe(index, data_low, data_high) < 0)
> -			continue;
> -		vmx->guest_msrs[j].index = i;
> -		vmx->guest_msrs[j].data = 0;
> -		vmx->guest_msrs[j].mask = -1ull;
> -		++vmx->nmsrs;
> -	}
> -
>  	vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
>  
>  	/* 22.2.1, 20.8.1 */
> @@ -6710,7 +6689,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>  	int err;
>  	struct vcpu_vmx *vmx;
>  	unsigned long *msr_bitmap;
> -	int cpu;
> +	int i, cpu;
>  
>  	BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
>  		"struct kvm_vcpu must be at offset 0 for arch usercopy region");
> @@ -6786,9 +6765,25 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>  	cpu = get_cpu();
>  	vmx_vcpu_load(&vmx->vcpu, cpu);
>  	vmx->vcpu.cpu = cpu;
> -	vmx_vcpu_setup(vmx);
> +	vmx_vmcs_setup(vmx);
>  	vmx_vcpu_put(&vmx->vcpu);
>  	put_cpu();
> +
> +	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
> +		u32 index = vmx_msr_index[i];
> +		u32 data_low, data_high;
> +		int j = vmx->nmsrs;
> +
> +		if (rdmsr_safe(index, &data_low, &data_high) < 0)
> +			continue;
> +		if (wrmsr_safe(index, data_low, data_high) < 0)
> +			continue;
> +		vmx->guest_msrs[j].index = i;
> +		vmx->guest_msrs[j].data = 0;
> +		vmx->guest_msrs[j].mask = -1ull;
> +		++vmx->nmsrs;
> +	}

I'd put this immediately after guest_msrs is allocated.  Yeah, we'll waste
a few cycles if allocating vmcs01 fails, but that should be a very rare
event.

> +
>  	if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
>  		err = alloc_apic_access_page(kvm);
>  		if (err)
> -- 
> 2.19.1
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 3/3] KVM: VMX: Some minor refactor of MSR bitmap
  2019-10-18  9:37 ` [PATCH v2 3/3] KVM: VMX: Some minor refactor of MSR bitmap Xiaoyao Li
@ 2019-10-18 17:27   ` Sean Christopherson
  2019-10-18 18:39     ` Xiaoyao Li
  2019-10-19  1:28     ` Xiaoyao Li
  0 siblings, 2 replies; 12+ messages in thread
From: Sean Christopherson @ 2019-10-18 17:27 UTC (permalink / raw)
  To: Xiaoyao Li
  Cc: Paolo Bonzini, Radim Krčmář,
	Vitaly Kuznetsov, Jim Mattson, Joerg Roedel, kvm, linux-kernel

On Fri, Oct 18, 2019 at 05:37:23PM +0800, Xiaoyao Li wrote:
> Move the MSR bitmap capability check from vmx_disable_intercept_for_msr()
> and vmx_enable_intercept_for_msr(), so that we can do the check far
> early before we really want to touch the bitmap.
> 
> Also, we can move the common MSR not-intercept setup to where msr bitmap
> is actually used.
> 
> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> ---
> Changes in v2:
>   - Remove the check of cpu_has_vmx_msr_bitmap() from
>     vmx_{disable,enable}_intercept_for_msr (Krish)
> ---
>  arch/x86/kvm/vmx/vmx.c | 65 +++++++++++++++++++++---------------------
>  1 file changed, 33 insertions(+), 32 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index b083316a598d..017689d0144e 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -343,8 +343,8 @@ module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
>  
>  static bool guest_state_valid(struct kvm_vcpu *vcpu);
>  static u32 vmx_segment_access_rights(struct kvm_segment *var);
> -static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
> -							  u32 msr, int type);
> +static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
> +		u32 msr, int type, bool value);
>  
>  void vmx_vmexit(void);
>  
> @@ -2000,9 +2000,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		 * in the merging. We update the vmcs01 here for L1 as well
>  		 * since it will end up touching the MSR anyway now.
>  		 */
> -		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
> -					      MSR_IA32_SPEC_CTRL,
> -					      MSR_TYPE_RW);
> +		vmx_set_intercept_for_msr(vmx->vmcs01.msr_bitmap,
> +					  MSR_IA32_SPEC_CTRL,
> +					  MSR_TYPE_RW, false);

IMO this is a net negative.  The explicit "disable" is significantly more
intuitive than "set" with a %false param, e.g. at a quick glance it would
be easy to think this code is "setting", i.e. "enabling" interception.

>  		break;
>  	case MSR_IA32_PRED_CMD:
>  		if (!msr_info->host_initiated &&
> @@ -2028,8 +2028,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		 * vmcs02.msr_bitmap here since it gets completely overwritten
>  		 * in the merging.
>  		 */
> -		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
> -					      MSR_TYPE_W);
> +		vmx_set_intercept_for_msr(vmx->vmcs01.msr_bitmap,
> +					  MSR_IA32_PRED_CMD,
> +					  MSR_TYPE_W, false);
>  		break;
>  	case MSR_IA32_CR_PAT:
>  		if (!kvm_pat_valid(data))
> @@ -3599,9 +3600,6 @@ static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bit
>  {
>  	int f = sizeof(unsigned long);
>  
> -	if (!cpu_has_vmx_msr_bitmap())
> -		return;

As above, I'd rather keep these here.  Functionally it changes nothing on
CPUs with an MSR bitmap.  For old CPUs, it saves all of two uops in paths
that aren't performance critical.

> -
>  	if (static_branch_unlikely(&enable_evmcs))
>  		evmcs_touch_msr_bitmap();
>  
> @@ -3637,9 +3635,6 @@ static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitm
>  {
>  	int f = sizeof(unsigned long);
>  
> -	if (!cpu_has_vmx_msr_bitmap())
> -		return;
> -
>  	if (static_branch_unlikely(&enable_evmcs))
>  		evmcs_touch_msr_bitmap();
>  
> @@ -3673,6 +3668,9 @@ static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitm
>  static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
>  			     			      u32 msr, int type, bool value)
>  {
> +	if (!cpu_has_vmx_msr_bitmap())
> +		return;
> +
>  	if (value)
>  		vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
>  	else
> @@ -4163,11 +4161,30 @@ static void ept_set_mmio_spte_mask(void)
>  
>  static void vmx_vmcs_setup(struct vcpu_vmx *vmx)
>  {
> +	unsigned long *msr_bitmap;
> +
>  	if (nested)
>  		nested_vmx_vmcs_setup();
>  
> -	if (cpu_has_vmx_msr_bitmap())
> +	if (cpu_has_vmx_msr_bitmap()) {
> +		msr_bitmap = vmx->vmcs01.msr_bitmap;
> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R);
> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
> +		if (kvm_cstate_in_guest(vmx->vcpu.kvm)) {
> +			vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R);
> +			vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
> +			vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
> +			vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
> +		}
> +
>  		vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
> +	}
> +	vmx->msr_bitmap_mode = 0;

Zeroing msr_bitmap_mode can be skipped as well.

>  	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
>  
> @@ -6074,7 +6091,8 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
>  	}
>  	secondary_exec_controls_set(vmx, sec_exec_control);
>  
> -	vmx_update_msr_bitmap(vcpu);
> +	if (cpu_has_vmx_msr_bitmap())
> +		vmx_update_msr_bitmap(vcpu);
>  }
>  
>  static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
> @@ -6688,7 +6706,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>  {
>  	int err;
>  	struct vcpu_vmx *vmx;
> -	unsigned long *msr_bitmap;
>  	int i, cpu;
>  
>  	BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
> @@ -6745,22 +6762,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>  	if (err < 0)
>  		goto free_msrs;
>  
> -	msr_bitmap = vmx->vmcs01.msr_bitmap;
> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R);
> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
> -	if (kvm_cstate_in_guest(kvm)) {
> -		vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R);
> -		vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
> -		vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
> -		vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
> -	}
> -	vmx->msr_bitmap_mode = 0;

Keep this code here to be consistent with the previous change that moved
the guest_msrs intialization *out* of the VMCS specific function.  Both
are collateral pages that are not directly part of the VMCS.

I'd be tempted to use a goto to skip the code, the line length is bad
enough as it is, e.g.:

	if (!cpu_has_vmx_msr_bitmap())
		goto skip_msr_bitmap;

	vmx->msr_bitmap_mode = 0;
skip_msr_bitmap:

> -
>  	vmx->loaded_vmcs = &vmx->vmcs01;
>  	cpu = get_cpu();
>  	vmx_vcpu_load(&vmx->vcpu, cpu);
> -- 
> 2.19.1
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 1/3] KVM: VMX: Move vmcs related resetting out of vmx_vcpu_reset()
  2019-10-18 16:57   ` Sean Christopherson
@ 2019-10-18 18:34     ` Xiaoyao Li
  0 siblings, 0 replies; 12+ messages in thread
From: Xiaoyao Li @ 2019-10-18 18:34 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Paolo Bonzini, Radim Krčmář,
	Vitaly Kuznetsov, Jim Mattson, Joerg Roedel, kvm, linux-kernel

On 10/19/2019 12:57 AM, Sean Christopherson wrote:
> On Fri, Oct 18, 2019 at 05:37:21PM +0800, Xiaoyao Li wrote:
>> Move vmcs related codes into a new function vmx_vmcs_reset() from
>> vmx_vcpu_reset(). So that it's more clearer which data is related with
>> vmcs and can be held in vmcs.
>>
>> Suggested-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>> ---
>>   arch/x86/kvm/vmx/vmx.c | 65 ++++++++++++++++++++++++------------------
>>   1 file changed, 37 insertions(+), 28 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> index e660e28e9ae0..ef567df344bf 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -4271,33 +4271,11 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
>>   	}
>>   }
>>   
>> -static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>> +static void vmx_vmcs_reset(struct kvm_vcpu *vcpu, bool init_event)
> 
> I'd strongly prefer to keep the existing code.  For me, "vmcs_reset" means
> zeroing out the VMCS, i.e. reset the VMCS to a virgin state.  "vcpu_reset"
> means exactly that, stuff vCPU state to emulate RESET/INIT.
> 
> And the split is arbitrary and funky, e.g. EFER is integrated into the
> VMCS on all recent CPUs, but here it's handled in vcpu_reset.
>

I left EFER in vcpu_reset() because it doesn't directly lead to a 
vmcs_write in vmx_set_efer().

OK. I'll drop this patch.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 2/3] KVM: VMX: Rename {vmx,nested_vmx}_vcpu_setup() and minor cleanup
  2019-10-18 17:09   ` Sean Christopherson
@ 2019-10-18 18:38     ` Xiaoyao Li
  0 siblings, 0 replies; 12+ messages in thread
From: Xiaoyao Li @ 2019-10-18 18:38 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Paolo Bonzini, Radim Krčmář,
	Vitaly Kuznetsov, Jim Mattson, Joerg Roedel, kvm, linux-kernel

On 10/19/2019 1:09 AM, Sean Christopherson wrote:
> On Fri, Oct 18, 2019 at 05:37:22PM +0800, Xiaoyao Li wrote:
>> Rename {vmx,nested_vmx}_vcpu_setup() to {vmx,nested_vmx}_vmcs_setup,
>> to match what they really do.
>>
>> Aslo remove the vmcs unrelated codes to vmx_vcpu_create().
> 
> Do this in a separate patch, just in case there is a dependencies we're
> missing.
> 
>> The initialization of vmx->hv_deadline_tsc can be removed here, because
>> it will be called in vmx_vcpu_reset() as the flow:
>>
>> kvm_arch_vcpu_setup()
>>    -> kvm_vcpu_reset()
>>         -> vmx_vcpu_reset()
> 
> Definitely needs to be in a separate patch.
> 

OK, I'll split it into 3 patches.

>>
>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>> ---
>> Changes in v2:
>>    - move out the vmcs unrelated codes
>> ---
>>   arch/x86/kvm/vmx/nested.c |  2 +-
>>   arch/x86/kvm/vmx/nested.h |  2 +-
>>   arch/x86/kvm/vmx/vmx.c    | 45 +++++++++++++++++----------------------
>>   3 files changed, 22 insertions(+), 27 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
>> index 5e231da00310..7935422d311f 100644
>> --- a/arch/x86/kvm/vmx/nested.c
>> +++ b/arch/x86/kvm/vmx/nested.c
>> @@ -5768,7 +5768,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
>>   	return ret;
>>   }
>>   
>> -void nested_vmx_vcpu_setup(void)
>> +void nested_vmx_vmcs_setup(void)
> 
> "vmcs_setup" sounds like we're allocating and loading a VMCS.  Maybe
> {nested_,}vmx_set_initial_vmcs_state() a la vmx_set_constant_host_state()?
> 
>>   {
>>   	if (enable_shadow_vmcs) {
>>   		vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
>> diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
>> index 187d39bf0bf1..2be1ba7482c9 100644
>> --- a/arch/x86/kvm/vmx/nested.h
>> +++ b/arch/x86/kvm/vmx/nested.h
>> @@ -11,7 +11,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
>>   				bool apicv);
>>   void nested_vmx_hardware_unsetup(void);
>>   __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
>> -void nested_vmx_vcpu_setup(void);
>> +void nested_vmx_vmcs_setup(void);
>>   void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu);
>>   int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry);
>>   bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason);
>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> index ef567df344bf..b083316a598d 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -4161,15 +4161,10 @@ static void ept_set_mmio_spte_mask(void)
>>   
>>   #define VMX_XSS_EXIT_BITMAP 0
>>   
>> -/*
>> - * Sets up the vmcs for emulated real mode.
>> - */
>> -static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
>> +static void vmx_vmcs_setup(struct vcpu_vmx *vmx)
>>   {
>> -	int i;
>> -
>>   	if (nested)
>> -		nested_vmx_vcpu_setup();
>> +		nested_vmx_vmcs_setup();
>>   
>>   	if (cpu_has_vmx_msr_bitmap())
>>   		vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
>> @@ -4178,7 +4173,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
>>   
>>   	/* Control */
>>   	pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
>> -	vmx->hv_deadline_tsc = -1;
>>   
>>   	exec_controls_set(vmx, vmx_exec_control(vmx));
>>   
>> @@ -4227,21 +4221,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
>>   	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
>>   		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
>>   
>> -	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
>> -		u32 index = vmx_msr_index[i];
>> -		u32 data_low, data_high;
>> -		int j = vmx->nmsrs;
>> -
>> -		if (rdmsr_safe(index, &data_low, &data_high) < 0)
>> -			continue;
>> -		if (wrmsr_safe(index, data_low, data_high) < 0)
>> -			continue;
>> -		vmx->guest_msrs[j].index = i;
>> -		vmx->guest_msrs[j].data = 0;
>> -		vmx->guest_msrs[j].mask = -1ull;
>> -		++vmx->nmsrs;
>> -	}
>> -
>>   	vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
>>   
>>   	/* 22.2.1, 20.8.1 */
>> @@ -6710,7 +6689,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>>   	int err;
>>   	struct vcpu_vmx *vmx;
>>   	unsigned long *msr_bitmap;
>> -	int cpu;
>> +	int i, cpu;
>>   
>>   	BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
>>   		"struct kvm_vcpu must be at offset 0 for arch usercopy region");
>> @@ -6786,9 +6765,25 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>>   	cpu = get_cpu();
>>   	vmx_vcpu_load(&vmx->vcpu, cpu);
>>   	vmx->vcpu.cpu = cpu;
>> -	vmx_vcpu_setup(vmx);
>> +	vmx_vmcs_setup(vmx);
>>   	vmx_vcpu_put(&vmx->vcpu);
>>   	put_cpu();
>> +
>> +	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
>> +		u32 index = vmx_msr_index[i];
>> +		u32 data_low, data_high;
>> +		int j = vmx->nmsrs;
>> +
>> +		if (rdmsr_safe(index, &data_low, &data_high) < 0)
>> +			continue;
>> +		if (wrmsr_safe(index, data_low, data_high) < 0)
>> +			continue;
>> +		vmx->guest_msrs[j].index = i;
>> +		vmx->guest_msrs[j].data = 0;
>> +		vmx->guest_msrs[j].mask = -1ull;
>> +		++vmx->nmsrs;
>> +	}
> 
> I'd put this immediately after guest_msrs is allocated.  Yeah, we'll waste
> a few cycles if allocating vmcs01 fails, but that should be a very rare
> event.
> 

OK.

>> +
>>   	if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
>>   		err = alloc_apic_access_page(kvm);
>>   		if (err)
>> -- 
>> 2.19.1
>>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 3/3] KVM: VMX: Some minor refactor of MSR bitmap
  2019-10-18 17:27   ` Sean Christopherson
@ 2019-10-18 18:39     ` Xiaoyao Li
  2019-10-18 19:52       ` Sean Christopherson
  2019-10-19  1:28     ` Xiaoyao Li
  1 sibling, 1 reply; 12+ messages in thread
From: Xiaoyao Li @ 2019-10-18 18:39 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Paolo Bonzini, Radim Krčmář,
	Vitaly Kuznetsov, Jim Mattson, Joerg Roedel, kvm, linux-kernel

On 10/19/2019 1:27 AM, Sean Christopherson wrote:
> On Fri, Oct 18, 2019 at 05:37:23PM +0800, Xiaoyao Li wrote:
>> Move the MSR bitmap capability check from vmx_disable_intercept_for_msr()
>> and vmx_enable_intercept_for_msr(), so that we can do the check far
>> early before we really want to touch the bitmap.
>>
>> Also, we can move the common MSR not-intercept setup to where msr bitmap
>> is actually used.
>>
>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>> ---
>> Changes in v2:
>>    - Remove the check of cpu_has_vmx_msr_bitmap() from
>>      vmx_{disable,enable}_intercept_for_msr (Krish)
>> ---
>>   arch/x86/kvm/vmx/vmx.c | 65 +++++++++++++++++++++---------------------
>>   1 file changed, 33 insertions(+), 32 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> index b083316a598d..017689d0144e 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -343,8 +343,8 @@ module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
>>   
>>   static bool guest_state_valid(struct kvm_vcpu *vcpu);
>>   static u32 vmx_segment_access_rights(struct kvm_segment *var);
>> -static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
>> -							  u32 msr, int type);
>> +static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
>> +		u32 msr, int type, bool value);
>>   
>>   void vmx_vmexit(void);
>>   
>> @@ -2000,9 +2000,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>   		 * in the merging. We update the vmcs01 here for L1 as well
>>   		 * since it will end up touching the MSR anyway now.
>>   		 */
>> -		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
>> -					      MSR_IA32_SPEC_CTRL,
>> -					      MSR_TYPE_RW);
>> +		vmx_set_intercept_for_msr(vmx->vmcs01.msr_bitmap,
>> +					  MSR_IA32_SPEC_CTRL,
>> +					  MSR_TYPE_RW, false);
> 
> IMO this is a net negative.  The explicit "disable" is significantly more
> intuitive than "set" with a %false param, e.g. at a quick glance it would
> be easy to think this code is "setting", i.e. "enabling" interception.
> 
>>   		break;
>>   	case MSR_IA32_PRED_CMD:
>>   		if (!msr_info->host_initiated &&
>> @@ -2028,8 +2028,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>   		 * vmcs02.msr_bitmap here since it gets completely overwritten
>>   		 * in the merging.
>>   		 */
>> -		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
>> -					      MSR_TYPE_W);
>> +		vmx_set_intercept_for_msr(vmx->vmcs01.msr_bitmap,
>> +					  MSR_IA32_PRED_CMD,
>> +					  MSR_TYPE_W, false);
>>   		break;
>>   	case MSR_IA32_CR_PAT:
>>   		if (!kvm_pat_valid(data))
>> @@ -3599,9 +3600,6 @@ static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bit
>>   {
>>   	int f = sizeof(unsigned long);
>>   
>> -	if (!cpu_has_vmx_msr_bitmap())
>> -		return;
> 
> As above, I'd rather keep these here.  Functionally it changes nothing on
> CPUs with an MSR bitmap.  For old CPUs, it saves all of two uops in paths
> that aren't performance critical.
> 
>> -
>>   	if (static_branch_unlikely(&enable_evmcs))
>>   		evmcs_touch_msr_bitmap();
>>   
>> @@ -3637,9 +3635,6 @@ static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitm
>>   {
>>   	int f = sizeof(unsigned long);
>>   
>> -	if (!cpu_has_vmx_msr_bitmap())
>> -		return;
>> -
>>   	if (static_branch_unlikely(&enable_evmcs))
>>   		evmcs_touch_msr_bitmap();
>>   
>> @@ -3673,6 +3668,9 @@ static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitm
>>   static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
>>   			     			      u32 msr, int type, bool value)
>>   {
>> +	if (!cpu_has_vmx_msr_bitmap())
>> +		return;
>> +
>>   	if (value)
>>   		vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
>>   	else
>> @@ -4163,11 +4161,30 @@ static void ept_set_mmio_spte_mask(void)
>>   
>>   static void vmx_vmcs_setup(struct vcpu_vmx *vmx)
>>   {
>> +	unsigned long *msr_bitmap;
>> +
>>   	if (nested)
>>   		nested_vmx_vmcs_setup();
>>   
>> -	if (cpu_has_vmx_msr_bitmap())
>> +	if (cpu_has_vmx_msr_bitmap()) {
>> +		msr_bitmap = vmx->vmcs01.msr_bitmap;
>> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R);
>> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
>> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
>> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
>> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
>> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
>> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
>> +		if (kvm_cstate_in_guest(vmx->vcpu.kvm)) {
>> +			vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R);
>> +			vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
>> +			vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
>> +			vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
>> +		}
>> +
>>   		vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
>> +	}
>> +	vmx->msr_bitmap_mode = 0;
> 
> Zeroing msr_bitmap_mode can be skipped as well.
> 
>>   	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
>>   
>> @@ -6074,7 +6091,8 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
>>   	}
>>   	secondary_exec_controls_set(vmx, sec_exec_control);
>>   
>> -	vmx_update_msr_bitmap(vcpu);
>> +	if (cpu_has_vmx_msr_bitmap())
>> +		vmx_update_msr_bitmap(vcpu);
>>   }
>>   
>>   static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
>> @@ -6688,7 +6706,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>>   {
>>   	int err;
>>   	struct vcpu_vmx *vmx;
>> -	unsigned long *msr_bitmap;
>>   	int i, cpu;
>>   
>>   	BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
>> @@ -6745,22 +6762,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>>   	if (err < 0)
>>   		goto free_msrs;
>>   
>> -	msr_bitmap = vmx->vmcs01.msr_bitmap;
>> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R);
>> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
>> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
>> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
>> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
>> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
>> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
>> -	if (kvm_cstate_in_guest(kvm)) {
>> -		vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R);
>> -		vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
>> -		vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
>> -		vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
>> -	}
>> -	vmx->msr_bitmap_mode = 0;
> 
> Keep this code here to be consistent with the previous change that moved
> the guest_msrs intialization *out* of the VMCS specific function.  Both
> are collateral pages that are not directly part of the VMCS.
>

OK. Then this patch is unnecessary too.

> I'd be tempted to use a goto to skip the code, the line length is bad
> enough as it is, e.g.:
> 
> 	if (!cpu_has_vmx_msr_bitmap())
> 		goto skip_msr_bitmap;
> 
> 	vmx->msr_bitmap_mode = 0;
> skip_msr_bitmap:
> 
>> -
>>   	vmx->loaded_vmcs = &vmx->vmcs01;
>>   	cpu = get_cpu();
>>   	vmx_vcpu_load(&vmx->vcpu, cpu);
>> -- 
>> 2.19.1
>>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 3/3] KVM: VMX: Some minor refactor of MSR bitmap
  2019-10-18 18:39     ` Xiaoyao Li
@ 2019-10-18 19:52       ` Sean Christopherson
  0 siblings, 0 replies; 12+ messages in thread
From: Sean Christopherson @ 2019-10-18 19:52 UTC (permalink / raw)
  To: Xiaoyao Li
  Cc: Paolo Bonzini, Radim Krčmář,
	Vitaly Kuznetsov, Jim Mattson, Joerg Roedel, kvm, linux-kernel

On Sat, Oct 19, 2019 at 02:39:57AM +0800, Xiaoyao Li wrote:
> On 10/19/2019 1:27 AM, Sean Christopherson wrote:
> >>@@ -6745,22 +6762,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
> >>  	if (err < 0)
> >>  		goto free_msrs;
> >>-	msr_bitmap = vmx->vmcs01.msr_bitmap;
> >>-	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R);
> >>-	vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
> >>-	vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
> >>-	vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
> >>-	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
> >>-	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
> >>-	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
> >>-	if (kvm_cstate_in_guest(kvm)) {
> >>-		vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R);
> >>-		vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
> >>-		vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
> >>-		vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
> >>-	}
> >>-	vmx->msr_bitmap_mode = 0;
> >
> >Keep this code here to be consistent with the previous change that moved
> >the guest_msrs intialization *out* of the VMCS specific function.  Both
> >are collateral pages that are not directly part of the VMCS.
> >
> 
> OK. Then this patch is unnecessary too.

I'd keep the change to skip this code if the CPU doesn't have msr bitmaps.
Performance wise it may be largely irrelevant, but it's a good change from
a readability perspective.

> 
> >I'd be tempted to use a goto to skip the code, the line length is bad
> >enough as it is, e.g.:
> >
> >	if (!cpu_has_vmx_msr_bitmap())
> >		goto skip_msr_bitmap;
> >
> >	vmx->msr_bitmap_mode = 0;
> >skip_msr_bitmap:
> >
> >>-
> >>  	vmx->loaded_vmcs = &vmx->vmcs01;
> >>  	cpu = get_cpu();
> >>  	vmx_vcpu_load(&vmx->vcpu, cpu);
> >>-- 
> >>2.19.1
> >>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 3/3] KVM: VMX: Some minor refactor of MSR bitmap
  2019-10-18 17:27   ` Sean Christopherson
  2019-10-18 18:39     ` Xiaoyao Li
@ 2019-10-19  1:28     ` Xiaoyao Li
  1 sibling, 0 replies; 12+ messages in thread
From: Xiaoyao Li @ 2019-10-19  1:28 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Paolo Bonzini, Radim Krčmář,
	Vitaly Kuznetsov, Jim Mattson, Joerg Roedel, kvm, linux-kernel

On 10/19/2019 1:27 AM, Sean Christopherson wrote:
> On Fri, Oct 18, 2019 at 05:37:23PM +0800, Xiaoyao Li wrote:
>> Move the MSR bitmap capability check from vmx_disable_intercept_for_msr()
>> and vmx_enable_intercept_for_msr(), so that we can do the check far
>> early before we really want to touch the bitmap.
>>
>> Also, we can move the common MSR not-intercept setup to where msr bitmap
>> is actually used.
>>
>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>> ---
>> Changes in v2:
>>    - Remove the check of cpu_has_vmx_msr_bitmap() from
>>      vmx_{disable,enable}_intercept_for_msr (Krish)
>> ---
>>   arch/x86/kvm/vmx/vmx.c | 65 +++++++++++++++++++++---------------------
>>   1 file changed, 33 insertions(+), 32 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> index b083316a598d..017689d0144e 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -343,8 +343,8 @@ module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
>>   
>>   static bool guest_state_valid(struct kvm_vcpu *vcpu);
>>   static u32 vmx_segment_access_rights(struct kvm_segment *var);
>> -static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
>> -							  u32 msr, int type);
>> +static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
>> +		u32 msr, int type, bool value);
>>   
>>   void vmx_vmexit(void);
>>   
>> @@ -2000,9 +2000,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>   		 * in the merging. We update the vmcs01 here for L1 as well
>>   		 * since it will end up touching the MSR anyway now.
>>   		 */
>> -		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
>> -					      MSR_IA32_SPEC_CTRL,
>> -					      MSR_TYPE_RW);
>> +		vmx_set_intercept_for_msr(vmx->vmcs01.msr_bitmap,
>> +					  MSR_IA32_SPEC_CTRL,
>> +					  MSR_TYPE_RW, false);
> 
> IMO this is a net negative.  The explicit "disable" is significantly more
> intuitive than "set" with a %false param, e.g. at a quick glance it would
> be easy to think this code is "setting", i.e. "enabling" interception.
>

How about renaming it to vmx_switch_intercept_for_msr()?
or just add the cpu_has_vmx_msr_bitmap() check outside because the check 
is removed in vmx_disable_intercept_for_msr()

>>   		break;
>>   	case MSR_IA32_PRED_CMD:
>>   		if (!msr_info->host_initiated &&
>> @@ -2028,8 +2028,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>   		 * vmcs02.msr_bitmap here since it gets completely overwritten
>>   		 * in the merging.
>>   		 */
>> -		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
>> -					      MSR_TYPE_W);
>> +		vmx_set_intercept_for_msr(vmx->vmcs01.msr_bitmap,
>> +					  MSR_IA32_PRED_CMD,
>> +					  MSR_TYPE_W, false);
>>   		break;
>>   	case MSR_IA32_CR_PAT:
>>   		if (!kvm_pat_valid(data))
>> @@ -3599,9 +3600,6 @@ static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bit
>>   {
>>   	int f = sizeof(unsigned long);
>>   
>> -	if (!cpu_has_vmx_msr_bitmap())
>> -		return;
> 
> As above, I'd rather keep these here.  Functionally it changes nothing on
> CPUs with an MSR bitmap.  For old CPUs, it saves all of two uops in paths
> that aren't performance critical.
> 
>> -
>>   	if (static_branch_unlikely(&enable_evmcs))
>>   		evmcs_touch_msr_bitmap();
>>   
>> @@ -3637,9 +3635,6 @@ static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitm
>>   {
>>   	int f = sizeof(unsigned long);
>>   
>> -	if (!cpu_has_vmx_msr_bitmap())
>> -		return;
>> -
>>   	if (static_branch_unlikely(&enable_evmcs))
>>   		evmcs_touch_msr_bitmap();
>>   
>> @@ -3673,6 +3668,9 @@ static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitm
>>   static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
>>   			     			      u32 msr, int type, bool value)
>>   {
>> +	if (!cpu_has_vmx_msr_bitmap())
>> +		return;
>> +
>>   	if (value)
>>   		vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
>>   	else
>> @@ -4163,11 +4161,30 @@ static void ept_set_mmio_spte_mask(void)
>>   
>>   static void vmx_vmcs_setup(struct vcpu_vmx *vmx)
>>   {
>> +	unsigned long *msr_bitmap;
>> +
>>   	if (nested)
>>   		nested_vmx_vmcs_setup();
>>   
>> -	if (cpu_has_vmx_msr_bitmap())
>> +	if (cpu_has_vmx_msr_bitmap()) {
>> +		msr_bitmap = vmx->vmcs01.msr_bitmap;
>> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R);
>> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
>> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
>> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
>> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
>> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
>> +		vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
>> +		if (kvm_cstate_in_guest(vmx->vcpu.kvm)) {
>> +			vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R);
>> +			vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
>> +			vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
>> +			vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
>> +		}
>> +
>>   		vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
>> +	}
>> +	vmx->msr_bitmap_mode = 0;
> 
> Zeroing msr_bitmap_mode can be skipped as well.
> 
>>   	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
>>   
>> @@ -6074,7 +6091,8 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
>>   	}
>>   	secondary_exec_controls_set(vmx, sec_exec_control);
>>   
>> -	vmx_update_msr_bitmap(vcpu);
>> +	if (cpu_has_vmx_msr_bitmap())
>> +		vmx_update_msr_bitmap(vcpu);
>>   }
>>   
>>   static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
>> @@ -6688,7 +6706,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>>   {
>>   	int err;
>>   	struct vcpu_vmx *vmx;
>> -	unsigned long *msr_bitmap;
>>   	int i, cpu;
>>   
>>   	BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
>> @@ -6745,22 +6762,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>>   	if (err < 0)
>>   		goto free_msrs;
>>   
>> -	msr_bitmap = vmx->vmcs01.msr_bitmap;
>> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R);
>> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
>> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
>> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
>> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
>> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
>> -	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
>> -	if (kvm_cstate_in_guest(kvm)) {
>> -		vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R);
>> -		vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
>> -		vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
>> -		vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
>> -	}
>> -	vmx->msr_bitmap_mode = 0;
> 
> Keep this code here to be consistent with the previous change that moved
> the guest_msrs intialization *out* of the VMCS specific function.  Both
> are collateral pages that are not directly part of the VMCS.
> 
> I'd be tempted to use a goto to skip the code, the line length is bad
> enough as it is, e.g.:
> 
> 	if (!cpu_has_vmx_msr_bitmap())
> 		goto skip_msr_bitmap;
> 
> 	vmx->msr_bitmap_mode = 0;
> skip_msr_bitmap:
> 
>> -
>>   	vmx->loaded_vmcs = &vmx->vmcs01;
>>   	cpu = get_cpu();
>>   	vmx_vcpu_load(&vmx->vcpu, cpu);
>> -- 
>> 2.19.1
>>

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2019-10-19  1:28 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-18  9:37 [PATCH v2 0/3] KVM: VMX: Some refactor of VMX vmcs and msr bitmap Xiaoyao Li
2019-10-18  9:37 ` [PATCH v2 1/3] KVM: VMX: Move vmcs related resetting out of vmx_vcpu_reset() Xiaoyao Li
2019-10-18 16:57   ` Sean Christopherson
2019-10-18 18:34     ` Xiaoyao Li
2019-10-18  9:37 ` [PATCH v2 2/3] KVM: VMX: Rename {vmx,nested_vmx}_vcpu_setup() and minor cleanup Xiaoyao Li
2019-10-18 17:09   ` Sean Christopherson
2019-10-18 18:38     ` Xiaoyao Li
2019-10-18  9:37 ` [PATCH v2 3/3] KVM: VMX: Some minor refactor of MSR bitmap Xiaoyao Li
2019-10-18 17:27   ` Sean Christopherson
2019-10-18 18:39     ` Xiaoyao Li
2019-10-18 19:52       ` Sean Christopherson
2019-10-19  1:28     ` Xiaoyao Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).