* [PATCH v5 1/3] KVM: vmx: Enable VMFUNCs
2017-07-28 19:52 [PATCH v5 0/3] Expose VMFUNC to the nested hypervisor Bandan Das
@ 2017-07-28 19:52 ` Bandan Das
2017-07-28 19:52 ` [PATCH v5 2/3] KVM: nVMX: Enable VMFUNC for the L1 hypervisor Bandan Das
2017-07-28 19:52 ` [PATCH v5 3/3] KVM: nVMX: Emulate EPTP switching " Bandan Das
2 siblings, 0 replies; 10+ messages in thread
From: Bandan Das @ 2017-07-28 19:52 UTC (permalink / raw)
To: kvm; +Cc: pbonzini, david, rkrcmar, jmattson, linux-kernel
Enable VMFUNC in the secondary execution controls. This simplifies the
changes necessary to expose it to nested hypervisors. VMFUNCs still
cause #UD when invoked.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Bandan Das <bsd@redhat.com>
---
arch/x86/include/asm/vmx.h | 3 +++
arch/x86/kvm/vmx.c | 22 +++++++++++++++++++++-
2 files changed, 24 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 35cd06f..da5375e 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -72,6 +72,7 @@
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
#define SECONDARY_EXEC_RDRAND 0x00000800
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
+#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
#define SECONDARY_EXEC_RDSEED 0x00010000
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
@@ -187,6 +188,8 @@ enum vmcs_field {
APIC_ACCESS_ADDR_HIGH = 0x00002015,
POSTED_INTR_DESC_ADDR = 0x00002016,
POSTED_INTR_DESC_ADDR_HIGH = 0x00002017,
+ VM_FUNCTION_CONTROL = 0x00002018,
+ VM_FUNCTION_CONTROL_HIGH = 0x00002019,
EPT_POINTER = 0x0000201a,
EPT_POINTER_HIGH = 0x0000201b,
EOI_EXIT_BITMAP0 = 0x0000201c,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ca5d2b9..a483b49 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1314,6 +1314,12 @@ static inline bool cpu_has_vmx_tsc_scaling(void)
SECONDARY_EXEC_TSC_SCALING;
}
+static inline bool cpu_has_vmx_vmfunc(void)
+{
+ return vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_ENABLE_VMFUNC;
+}
+
static inline bool report_flexpriority(void)
{
return flexpriority_enabled;
@@ -3575,7 +3581,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_SHADOW_VMCS |
SECONDARY_EXEC_XSAVES |
SECONDARY_EXEC_ENABLE_PML |
- SECONDARY_EXEC_TSC_SCALING;
+ SECONDARY_EXEC_TSC_SCALING |
+ SECONDARY_EXEC_ENABLE_VMFUNC;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
&_cpu_based_2nd_exec_control) < 0)
@@ -5233,6 +5240,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
#endif
+ if (cpu_has_vmx_vmfunc())
+ vmcs_write64(VM_FUNCTION_CONTROL, 0);
+
vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
@@ -7740,6 +7750,12 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu)
return 1;
}
+static int handle_vmfunc(struct kvm_vcpu *vcpu)
+{
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -7790,6 +7806,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_XSAVES] = handle_xsaves,
[EXIT_REASON_XRSTORS] = handle_xrstors,
[EXIT_REASON_PML_FULL] = handle_pml_full,
+ [EXIT_REASON_VMFUNC] = handle_vmfunc,
[EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
};
@@ -8111,6 +8128,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
case EXIT_REASON_PML_FULL:
/* We emulate PML support to L1. */
return false;
+ case EXIT_REASON_VMFUNC:
+ /* VM functions are emulated through L2->L0 vmexits. */
+ return false;
default:
return true;
}
--
2.9.4
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v5 2/3] KVM: nVMX: Enable VMFUNC for the L1 hypervisor
2017-07-28 19:52 [PATCH v5 0/3] Expose VMFUNC to the nested hypervisor Bandan Das
2017-07-28 19:52 ` [PATCH v5 1/3] KVM: vmx: Enable VMFUNCs Bandan Das
@ 2017-07-28 19:52 ` Bandan Das
2017-07-28 19:52 ` [PATCH v5 3/3] KVM: nVMX: Emulate EPTP switching " Bandan Das
2 siblings, 0 replies; 10+ messages in thread
From: Bandan Das @ 2017-07-28 19:52 UTC (permalink / raw)
To: kvm; +Cc: pbonzini, david, rkrcmar, jmattson, linux-kernel
Expose VMFUNC in MSRs and VMCS fields. No actual VMFUNCs are enabled.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Bandan Das <bsd@redhat.com>
---
arch/x86/kvm/vmx.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 51 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a483b49..fe8f5fc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -240,6 +240,7 @@ struct __packed vmcs12 {
u64 virtual_apic_page_addr;
u64 apic_access_addr;
u64 posted_intr_desc_addr;
+ u64 vm_function_control;
u64 ept_pointer;
u64 eoi_exit_bitmap0;
u64 eoi_exit_bitmap1;
@@ -481,6 +482,7 @@ struct nested_vmx {
u64 nested_vmx_cr4_fixed0;
u64 nested_vmx_cr4_fixed1;
u64 nested_vmx_vmcs_enum;
+ u64 nested_vmx_vmfunc_controls;
};
#define POSTED_INTR_ON 0
@@ -763,6 +765,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr),
+ FIELD64(VM_FUNCTION_CONTROL, vm_function_control),
FIELD64(EPT_POINTER, ept_pointer),
FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0),
FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
@@ -1394,6 +1397,11 @@ static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
}
+static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12)
+{
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC);
+}
+
static inline bool is_nmi(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -2780,6 +2788,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
} else
vmx->nested.nested_vmx_ept_caps = 0;
+ if (cpu_has_vmx_vmfunc()) {
+ vmx->nested.nested_vmx_secondary_ctls_high |=
+ SECONDARY_EXEC_ENABLE_VMFUNC;
+ vmx->nested.nested_vmx_vmfunc_controls = 0;
+ }
+
/*
* Old versions of KVM use the single-context version without
* checking for support, so declare that it is supported even
@@ -3149,6 +3163,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
*pdata = vmx->nested.nested_vmx_ept_caps |
((u64)vmx->nested.nested_vmx_vpid_caps << 32);
break;
+ case MSR_IA32_VMX_VMFUNC:
+ *pdata = vmx->nested.nested_vmx_vmfunc_controls;
+ break;
default:
return 1;
}
@@ -7752,7 +7769,29 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu)
static int handle_vmfunc(struct kvm_vcpu *vcpu)
{
- kvm_queue_exception(vcpu, UD_VECTOR);
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct vmcs12 *vmcs12;
+ u32 function = vcpu->arch.regs[VCPU_REGS_RAX];
+
+ /*
+ * VMFUNC is only supported for nested guests, but we always enable the
+ * secondary control for simplicity; for non-nested mode, fake that we
+ * didn't by injecting #UD.
+ */
+ if (!is_guest_mode(vcpu)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ vmcs12 = get_vmcs12(vcpu);
+ if ((vmcs12->vm_function_control & (1 << function)) == 0)
+ goto fail;
+ WARN_ONCE(1, "VMCS12 VM function control should have been zero");
+
+fail:
+ nested_vmx_vmexit(vcpu, vmx->exit_reason,
+ vmcs_read32(VM_EXIT_INTR_INFO),
+ vmcs_readl(EXIT_QUALIFICATION));
return 1;
}
@@ -10053,7 +10092,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
- SECONDARY_EXEC_APIC_REGISTER_VIRT);
+ SECONDARY_EXEC_APIC_REGISTER_VIRT |
+ SECONDARY_EXEC_ENABLE_VMFUNC);
if (nested_cpu_has(vmcs12,
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
@@ -10061,6 +10101,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
exec_control |= vmcs12_exec_ctrl;
}
+ /* All VMFUNCs are currently emulated through L0 vmexits. */
+ if (exec_control & SECONDARY_EXEC_ENABLE_VMFUNC)
+ vmcs_write64(VM_FUNCTION_CONTROL, 0);
+
if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
vmcs_write64(EOI_EXIT_BITMAP0,
vmcs12->eoi_exit_bitmap0);
@@ -10310,6 +10354,11 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmx->nested.nested_vmx_entry_ctls_high))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ if (nested_cpu_has_vmfunc(vmcs12) &&
+ (vmcs12->vm_function_control &
+ ~vmx->nested.nested_vmx_vmfunc_controls))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
--
2.9.4
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v5 3/3] KVM: nVMX: Emulate EPTP switching for the L1 hypervisor
2017-07-28 19:52 [PATCH v5 0/3] Expose VMFUNC to the nested hypervisor Bandan Das
2017-07-28 19:52 ` [PATCH v5 1/3] KVM: vmx: Enable VMFUNCs Bandan Das
2017-07-28 19:52 ` [PATCH v5 2/3] KVM: nVMX: Enable VMFUNC for the L1 hypervisor Bandan Das
@ 2017-07-28 19:52 ` Bandan Das
2017-07-31 11:59 ` David Hildenbrand
2017-08-01 15:17 ` Radim Krčmář
2 siblings, 2 replies; 10+ messages in thread
From: Bandan Das @ 2017-07-28 19:52 UTC (permalink / raw)
To: kvm; +Cc: pbonzini, david, rkrcmar, jmattson, linux-kernel
When L2 uses vmfunc, L0 utilizes the associated vmexit to
emulate a switching of the ept pointer by reloading the
guest MMU.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Bandan Das <bsd@redhat.com>
---
arch/x86/include/asm/vmx.h | 6 +++
arch/x86/kvm/vmx.c | 124 ++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 124 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index da5375e..5f63a2e 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -115,6 +115,10 @@
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
#define VMX_MISC_ACTIVITY_HLT 0x00000040
+/* VMFUNC functions */
+#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001
+#define VMFUNC_EPTP_ENTRIES 512
+
static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
{
return vmx_basic & GENMASK_ULL(30, 0);
@@ -200,6 +204,8 @@ enum vmcs_field {
EOI_EXIT_BITMAP2_HIGH = 0x00002021,
EOI_EXIT_BITMAP3 = 0x00002022,
EOI_EXIT_BITMAP3_HIGH = 0x00002023,
+ EPTP_LIST_ADDRESS = 0x00002024,
+ EPTP_LIST_ADDRESS_HIGH = 0x00002025,
VMREAD_BITMAP = 0x00002026,
VMWRITE_BITMAP = 0x00002028,
XSS_EXIT_BITMAP = 0x0000202C,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fe8f5fc..f1ab783 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -246,6 +246,7 @@ struct __packed vmcs12 {
u64 eoi_exit_bitmap1;
u64 eoi_exit_bitmap2;
u64 eoi_exit_bitmap3;
+ u64 eptp_list_address;
u64 xss_exit_bitmap;
u64 guest_physical_address;
u64 vmcs_link_pointer;
@@ -771,6 +772,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
+ FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
@@ -1402,6 +1404,13 @@ static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12)
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC);
}
+static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
+{
+ return nested_cpu_has_vmfunc(vmcs12) &&
+ (vmcs12->vm_function_control &
+ VMX_VMFUNC_EPTP_SWITCHING);
+}
+
static inline bool is_nmi(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -2791,7 +2800,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
if (cpu_has_vmx_vmfunc()) {
vmx->nested.nested_vmx_secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_VMFUNC;
- vmx->nested.nested_vmx_vmfunc_controls = 0;
+ /*
+ * Advertise EPTP switching unconditionally
+ * since we emulate it
+ */
+ vmx->nested.nested_vmx_vmfunc_controls =
+ VMX_VMFUNC_EPTP_SWITCHING;
}
/*
@@ -7767,6 +7781,85 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu)
return 1;
}
+static bool check_ept_address_valid(struct kvm_vcpu *vcpu, u64 address)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ u64 mask = VMX_EPT_RWX_MASK;
+ int maxphyaddr = cpuid_maxphyaddr(vcpu);
+ struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+
+ /* Check for execute_only validity */
+ if ((address & mask) == VMX_EPT_EXECUTABLE_MASK) {
+ if (!(vmx->nested.nested_vmx_ept_caps &
+ VMX_EPT_EXECUTE_ONLY_BIT))
+ return false;
+ }
+
+ /* Bits 5:3 must be 3 */
+ if (((address >> VMX_EPT_GAW_EPTP_SHIFT) & 0x7) != VMX_EPT_DEFAULT_GAW)
+ return false;
+
+ /* Reserved bits should not be set */
+ if (address >> maxphyaddr || ((address >> 7) & 0x1f))
+ return false;
+
+ /* AD, if set, should be supported */
+ if ((address & VMX_EPT_AD_ENABLE_BIT)) {
+ if (!enable_ept_ad_bits)
+ return false;
+ mmu->ept_ad = true;
+ } else
+ mmu->ept_ad = false;
+
+ return true;
+}
+
+static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
+ u64 *l1_eptp_list, address;
+ struct page *page;
+
+ if (!nested_cpu_has_eptp_switching(vmcs12) ||
+ !nested_cpu_has_ept(vmcs12))
+ return 1;
+
+ if (index >= VMFUNC_EPTP_ENTRIES)
+ return 1;
+
+ page = nested_get_page(vcpu, vmcs12->eptp_list_address);
+ if (!page)
+ return 1;
+
+ l1_eptp_list = kmap(page);
+ address = l1_eptp_list[index];
+
+ /*
+ * If the (L2) guest does a vmfunc to the currently
+ * active ept pointer, we don't have to do anything else
+ */
+ if (vmcs12->ept_pointer != address) {
+ if (!check_ept_address_valid(vcpu, address)) {
+ kunmap(page);
+ nested_release_page_clean(page);
+ return 1;
+ }
+ kvm_mmu_unload(vcpu);
+ vmcs12->ept_pointer = address;
+ /*
+ * TODO: Check what's the correct approach in case
+ * mmu reload fails. Currently, we just let the next
+ * reload potentially fail
+ */
+ kvm_mmu_reload(vcpu);
+ }
+
+ kunmap(page);
+ nested_release_page_clean(page);
+ return 0;
+}
+
static int handle_vmfunc(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -7786,7 +7879,16 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
vmcs12 = get_vmcs12(vcpu);
if ((vmcs12->vm_function_control & (1 << function)) == 0)
goto fail;
- WARN_ONCE(1, "VMCS12 VM function control should have been zero");
+
+ switch (function) {
+ case 0:
+ if (nested_vmx_eptp_switching(vcpu, vmcs12))
+ goto fail;
+ break;
+ default:
+ goto fail;
+ }
+ return kvm_skip_emulated_instruction(vcpu);
fail:
nested_vmx_vmexit(vcpu, vmx->exit_reason,
@@ -10354,10 +10456,20 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmx->nested.nested_vmx_entry_ctls_high))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
- if (nested_cpu_has_vmfunc(vmcs12) &&
- (vmcs12->vm_function_control &
- ~vmx->nested.nested_vmx_vmfunc_controls))
- return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ if (nested_cpu_has_vmfunc(vmcs12)) {
+ if (vmcs12->vm_function_control &
+ ~vmx->nested.nested_vmx_vmfunc_controls)
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+ if (nested_cpu_has_eptp_switching(vmcs12)) {
+ if (!nested_cpu_has_ept(vmcs12) ||
+ (vmcs12->eptp_list_address >>
+ cpuid_maxphyaddr(vcpu)) ||
+ !IS_ALIGNED(vmcs12->eptp_list_address, 4096))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ }
+ }
+
if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
--
2.9.4
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH v5 3/3] KVM: nVMX: Emulate EPTP switching for the L1 hypervisor
2017-07-28 19:52 ` [PATCH v5 3/3] KVM: nVMX: Emulate EPTP switching " Bandan Das
@ 2017-07-31 11:59 ` David Hildenbrand
2017-07-31 19:32 ` Bandan Das
2017-08-01 15:17 ` Radim Krčmář
1 sibling, 1 reply; 10+ messages in thread
From: David Hildenbrand @ 2017-07-31 11:59 UTC (permalink / raw)
To: Bandan Das, kvm; +Cc: pbonzini, rkrcmar, jmattson, linux-kernel
> +static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
> +{
> + return nested_cpu_has_vmfunc(vmcs12) &&
> + (vmcs12->vm_function_control &
> + VMX_VMFUNC_EPTP_SWITCHING);
> +}
> +
> static inline bool is_nmi(u32 intr_info)
> {
> return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
> @@ -2791,7 +2800,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
> if (cpu_has_vmx_vmfunc()) {
> vmx->nested.nested_vmx_secondary_ctls_high |=
> SECONDARY_EXEC_ENABLE_VMFUNC;
> - vmx->nested.nested_vmx_vmfunc_controls = 0;
> + /*
> + * Advertise EPTP switching unconditionally
> + * since we emulate it
> + */
> + vmx->nested.nested_vmx_vmfunc_controls =
> + VMX_VMFUNC_EPTP_SWITCHING;
Should this only be advertised, if enable_ept is set (if the guest also
sees/can use SECONDARY_EXEC_ENABLE_EPT)?
> }
>
> /*
> @@ -7767,6 +7781,85 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu)
> return 1;
> }
>
> +static bool check_ept_address_valid(struct kvm_vcpu *vcpu, u64 address)
check_..._valid -> valid_ept_address() ?
> +{
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> + u64 mask = VMX_EPT_RWX_MASK;
> + int maxphyaddr = cpuid_maxphyaddr(vcpu);
> + struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
> +
> + /* Check for execute_only validity */
> + if ((address & mask) == VMX_EPT_EXECUTABLE_MASK) {
> + if (!(vmx->nested.nested_vmx_ept_caps &
> + VMX_EPT_EXECUTE_ONLY_BIT))
> + return false;
> + }
> +
> + /* Bits 5:3 must be 3 */
> + if (((address >> VMX_EPT_GAW_EPTP_SHIFT) & 0x7) != VMX_EPT_DEFAULT_GAW)
> + return false;
> +
> + /* Reserved bits should not be set */
> + if (address >> maxphyaddr || ((address >> 7) & 0x1f))
> + return false;
> +
> + /* AD, if set, should be supported */
> + if ((address & VMX_EPT_AD_ENABLE_BIT)) {
> + if (!enable_ept_ad_bits)
> + return false;
> + mmu->ept_ad = true;
> + } else
> + mmu->ept_ad = false;
I wouldn't expect a "check" function to modify the mmu. Can you move
modifying the mmu outside of this function (leaving the
enable_ept_ad_bits check in place)? (and maybe even set mmu->ept_ad
_after_ the kvm_mmu_unload(vcpu)?, just when setting vmcs12->ept_pointer?)
> +
> + return true;
> +}
> +
> +static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
> + struct vmcs12 *vmcs12)
> +{
> + u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
> + u64 *l1_eptp_list, address;
> + struct page *page;
> +
> + if (!nested_cpu_has_eptp_switching(vmcs12) ||
> + !nested_cpu_has_ept(vmcs12))
> + return 1;
> +
> + if (index >= VMFUNC_EPTP_ENTRIES)
> + return 1;
> +
> + page = nested_get_page(vcpu, vmcs12->eptp_list_address);
> + if (!page)
> + return 1;
> +
> + l1_eptp_list = kmap(page);
> + address = l1_eptp_list[index];
> +
> + /*
> + * If the (L2) guest does a vmfunc to the currently
> + * active ept pointer, we don't have to do anything else
> + */
> + if (vmcs12->ept_pointer != address) {
> + if (!check_ept_address_valid(vcpu, address)) {
> + kunmap(page);
> + nested_release_page_clean(page);
> + return 1;
> + }
> + kvm_mmu_unload(vcpu);
> + vmcs12->ept_pointer = address;
> + /*
> + * TODO: Check what's the correct approach in case
> + * mmu reload fails. Currently, we just let the next
> + * reload potentially fail
> + */
> + kvm_mmu_reload(vcpu);
So, what actually happens if this generates a tripple fault? I guess we
will kill the (nested) hypervisor?
> + }
> +
> + kunmap(page);
> + nested_release_page_clean(page);
> + return 0;
> +}
> +
> static int handle_vmfunc(struct kvm_vcpu *vcpu)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> @@ -7786,7 +7879,16 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
> vmcs12 = get_vmcs12(vcpu);
> if ((vmcs12->vm_function_control & (1 << function)) == 0)
> goto fail;
> - WARN_ONCE(1, "VMCS12 VM function control should have been zero");
> +
> + switch (function) {
> + case 0:
> + if (nested_vmx_eptp_switching(vcpu, vmcs12))
> + goto fail;
> + break;
> + default:
> + goto fail;
> + }
> + return kvm_skip_emulated_instruction(vcpu);
>
> fail:
> nested_vmx_vmexit(vcpu, vmx->exit_reason,
> @@ -10354,10 +10456,20 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> vmx->nested.nested_vmx_entry_ctls_high))
> return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
>
> - if (nested_cpu_has_vmfunc(vmcs12) &&
> - (vmcs12->vm_function_control &
> - ~vmx->nested.nested_vmx_vmfunc_controls))
> - return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
> + if (nested_cpu_has_vmfunc(vmcs12)) {
> + if (vmcs12->vm_function_control &
> + ~vmx->nested.nested_vmx_vmfunc_controls)
> + return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
> +
> + if (nested_cpu_has_eptp_switching(vmcs12)) {
> + if (!nested_cpu_has_ept(vmcs12) ||
> + (vmcs12->eptp_list_address >>
> + cpuid_maxphyaddr(vcpu)) ||
> + !IS_ALIGNED(vmcs12->eptp_list_address, 4096))
> + return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
> + }
> + }
> +
>
> if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
> return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
>
--
Thanks,
David
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v5 3/3] KVM: nVMX: Emulate EPTP switching for the L1 hypervisor
2017-07-31 11:59 ` David Hildenbrand
@ 2017-07-31 19:32 ` Bandan Das
2017-08-01 11:40 ` David Hildenbrand
0 siblings, 1 reply; 10+ messages in thread
From: Bandan Das @ 2017-07-31 19:32 UTC (permalink / raw)
To: David Hildenbrand; +Cc: kvm, pbonzini, rkrcmar, jmattson, linux-kernel
Hi David,
David Hildenbrand <david@redhat.com> writes:
>> +static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
>> +{
>> + return nested_cpu_has_vmfunc(vmcs12) &&
>> + (vmcs12->vm_function_control &
>> + VMX_VMFUNC_EPTP_SWITCHING);
>> +}
>> +
>> static inline bool is_nmi(u32 intr_info)
>> {
>> return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
>> @@ -2791,7 +2800,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
>> if (cpu_has_vmx_vmfunc()) {
>> vmx->nested.nested_vmx_secondary_ctls_high |=
>> SECONDARY_EXEC_ENABLE_VMFUNC;
>> - vmx->nested.nested_vmx_vmfunc_controls = 0;
>> + /*
>> + * Advertise EPTP switching unconditionally
>> + * since we emulate it
>> + */
>> + vmx->nested.nested_vmx_vmfunc_controls =
>> + VMX_VMFUNC_EPTP_SWITCHING;
>
> Should this only be advertised, if enable_ept is set (if the guest also
> sees/can use SECONDARY_EXEC_ENABLE_EPT)?
This represents the function control MSR, which on the hardware is
a RO value. The checks for enable_ept and such are somewhere else.
>> }
>>
>> /*
>> @@ -7767,6 +7781,85 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu)
>> return 1;
>> }
>>
>> +static bool check_ept_address_valid(struct kvm_vcpu *vcpu, u64 address)
>
> check_..._valid -> valid_ept_address() ?
I think either of the names is fine and I would prefer not
to respin unless you feel really strongly about it :)
>
>> +{
>> + struct vcpu_vmx *vmx = to_vmx(vcpu);
>> + u64 mask = VMX_EPT_RWX_MASK;
>> + int maxphyaddr = cpuid_maxphyaddr(vcpu);
>> + struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
>> +
>> + /* Check for execute_only validity */
>> + if ((address & mask) == VMX_EPT_EXECUTABLE_MASK) {
>> + if (!(vmx->nested.nested_vmx_ept_caps &
>> + VMX_EPT_EXECUTE_ONLY_BIT))
>> + return false;
>> + }
>> +
>> + /* Bits 5:3 must be 3 */
>> + if (((address >> VMX_EPT_GAW_EPTP_SHIFT) & 0x7) != VMX_EPT_DEFAULT_GAW)
>> + return false;
>> +
>> + /* Reserved bits should not be set */
>> + if (address >> maxphyaddr || ((address >> 7) & 0x1f))
>> + return false;
>> +
>> + /* AD, if set, should be supported */
>> + if ((address & VMX_EPT_AD_ENABLE_BIT)) {
>> + if (!enable_ept_ad_bits)
>> + return false;
>> + mmu->ept_ad = true;
>> + } else
>> + mmu->ept_ad = false;
>
> I wouldn't expect a "check" function to modify the mmu. Can you move
> modifying the mmu outside of this function (leaving the
> enable_ept_ad_bits check in place)? (and maybe even set mmu->ept_ad
> _after_ the kvm_mmu_unload(vcpu)?, just when setting vmcs12->ept_pointer?)
>
Well, the correct thing to do is have a wrapper around it in mmu.c
without directly calling here and also call this function before
nested_mmu is initialized. I am working on a separate patch for this btw.
It seems to me setting mmu->ept_ad after kvm_mmu_unload is unnecessary
since it's already being set only if everything else succeeds.
kvm_mmu_unload() isn't affected by the setting of this flag if I understand
correctly.
>> +
>> + return true;
>> +}
>> +
>> +static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
>> + struct vmcs12 *vmcs12)
>> +{
>> + u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
>> + u64 *l1_eptp_list, address;
>> + struct page *page;
>> +
>> + if (!nested_cpu_has_eptp_switching(vmcs12) ||
>> + !nested_cpu_has_ept(vmcs12))
>> + return 1;
>> +
>> + if (index >= VMFUNC_EPTP_ENTRIES)
>> + return 1;
>> +
>> + page = nested_get_page(vcpu, vmcs12->eptp_list_address);
>> + if (!page)
>> + return 1;
>> +
>> + l1_eptp_list = kmap(page);
>> + address = l1_eptp_list[index];
>> +
>> + /*
>> + * If the (L2) guest does a vmfunc to the currently
>> + * active ept pointer, we don't have to do anything else
>> + */
>> + if (vmcs12->ept_pointer != address) {
>> + if (!check_ept_address_valid(vcpu, address)) {
>> + kunmap(page);
>> + nested_release_page_clean(page);
>> + return 1;
>> + }
>> + kvm_mmu_unload(vcpu);
>> + vmcs12->ept_pointer = address;
>> + /*
>> + * TODO: Check what's the correct approach in case
>> + * mmu reload fails. Currently, we just let the next
>> + * reload potentially fail
>> + */
>> + kvm_mmu_reload(vcpu);
>
> So, what actually happens if this generates a tripple fault? I guess we
> will kill the (nested) hypervisor?
Yes. Not sure what's the right thing to do is though...
Bandan
>> + }
>> +
>> + kunmap(page);
>> + nested_release_page_clean(page);
>> + return 0;
>> +}
>> +
>> static int handle_vmfunc(struct kvm_vcpu *vcpu)
>> {
>> struct vcpu_vmx *vmx = to_vmx(vcpu);
>> @@ -7786,7 +7879,16 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
>> vmcs12 = get_vmcs12(vcpu);
>> if ((vmcs12->vm_function_control & (1 << function)) == 0)
>> goto fail;
>> - WARN_ONCE(1, "VMCS12 VM function control should have been zero");
>> +
>> + switch (function) {
>> + case 0:
>> + if (nested_vmx_eptp_switching(vcpu, vmcs12))
>> + goto fail;
>> + break;
>> + default:
>> + goto fail;
>> + }
>> + return kvm_skip_emulated_instruction(vcpu);
>>
>> fail:
>> nested_vmx_vmexit(vcpu, vmx->exit_reason,
>> @@ -10354,10 +10456,20 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>> vmx->nested.nested_vmx_entry_ctls_high))
>> return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
>>
>> - if (nested_cpu_has_vmfunc(vmcs12) &&
>> - (vmcs12->vm_function_control &
>> - ~vmx->nested.nested_vmx_vmfunc_controls))
>> - return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
>> + if (nested_cpu_has_vmfunc(vmcs12)) {
>> + if (vmcs12->vm_function_control &
>> + ~vmx->nested.nested_vmx_vmfunc_controls)
>> + return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
>> +
>> + if (nested_cpu_has_eptp_switching(vmcs12)) {
>> + if (!nested_cpu_has_ept(vmcs12) ||
>> + (vmcs12->eptp_list_address >>
>> + cpuid_maxphyaddr(vcpu)) ||
>> + !IS_ALIGNED(vmcs12->eptp_list_address, 4096))
>> + return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
>> + }
>> + }
>> +
>>
>> if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
>> return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
>>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v5 3/3] KVM: nVMX: Emulate EPTP switching for the L1 hypervisor
2017-07-31 19:32 ` Bandan Das
@ 2017-08-01 11:40 ` David Hildenbrand
2017-08-01 14:55 ` Radim Krčmář
0 siblings, 1 reply; 10+ messages in thread
From: David Hildenbrand @ 2017-08-01 11:40 UTC (permalink / raw)
To: Bandan Das; +Cc: kvm, pbonzini, rkrcmar, jmattson, linux-kernel
On 31.07.2017 21:32, Bandan Das wrote:
> Hi David,
>
> David Hildenbrand <david@redhat.com> writes:
>
>>> +static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
>>> +{
>>> + return nested_cpu_has_vmfunc(vmcs12) &&
>>> + (vmcs12->vm_function_control &
>>> + VMX_VMFUNC_EPTP_SWITCHING);
>>> +}
>>> +
>>> static inline bool is_nmi(u32 intr_info)
>>> {
>>> return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
>>> @@ -2791,7 +2800,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
>>> if (cpu_has_vmx_vmfunc()) {
>>> vmx->nested.nested_vmx_secondary_ctls_high |=
>>> SECONDARY_EXEC_ENABLE_VMFUNC;
>>> - vmx->nested.nested_vmx_vmfunc_controls = 0;
>>> + /*
>>> + * Advertise EPTP switching unconditionally
>>> + * since we emulate it
>>> + */
>>> + vmx->nested.nested_vmx_vmfunc_controls =
>>> + VMX_VMFUNC_EPTP_SWITCHING;
>>
>> Should this only be advertised, if enable_ept is set (if the guest also
>> sees/can use SECONDARY_EXEC_ENABLE_EPT)?
>
> This represents the function control MSR, which on the hardware is
> a RO value. The checks for enable_ept and such are somewhere else.
This is the
if (!nested_cpu_has_eptp_switching(vmcs12) ||
!nested_cpu_has_ept(vmcs12))
return 1;
check then, I assume. Makes sense.
>
>>> }
>>>
>>> /*
>>> @@ -7767,6 +7781,85 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu)
>>> return 1;
>>> }
>>>
>>> +static bool check_ept_address_valid(struct kvm_vcpu *vcpu, u64 address)
>>
>> check_..._valid -> valid_ept_address() ?
>
> I think either of the names is fine and I would prefer not
> to respin unless you feel really strongly about it :)
Sure, if you have to respin, you can fix this up.
>
>>
>>> +{
>>> + struct vcpu_vmx *vmx = to_vmx(vcpu);
>>> + u64 mask = VMX_EPT_RWX_MASK;
>>> + int maxphyaddr = cpuid_maxphyaddr(vcpu);
>>> + struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
>>> +
>>> + /* Check for execute_only validity */
>>> + if ((address & mask) == VMX_EPT_EXECUTABLE_MASK) {
>>> + if (!(vmx->nested.nested_vmx_ept_caps &
>>> + VMX_EPT_EXECUTE_ONLY_BIT))
>>> + return false;
>>> + }
>>> +
>>> + /* Bits 5:3 must be 3 */
>>> + if (((address >> VMX_EPT_GAW_EPTP_SHIFT) & 0x7) != VMX_EPT_DEFAULT_GAW)
>>> + return false;
>>> +
>>> + /* Reserved bits should not be set */
>>> + if (address >> maxphyaddr || ((address >> 7) & 0x1f))
>>> + return false;
>>> +
>>> + /* AD, if set, should be supported */
>>> + if ((address & VMX_EPT_AD_ENABLE_BIT)) {
>>> + if (!enable_ept_ad_bits)
>>> + return false;
>>> + mmu->ept_ad = true;
>>> + } else
>>> + mmu->ept_ad = false;
>>
>> I wouldn't expect a "check" function to modify the mmu. Can you move
>> modifying the mmu outside of this function (leaving the
>> enable_ept_ad_bits check in place)? (and maybe even set mmu->ept_ad
>> _after_ the kvm_mmu_unload(vcpu)?, just when setting vmcs12->ept_pointer?)
>>
>
> Well, the correct thing to do is have a wrapper around it in mmu.c
> without directly calling here and also call this function before
> nested_mmu is initialized. I am working on a separate patch for this btw.
Sounds good. Also thought that encapsulating this might be a good option.
> It seems to me setting mmu->ept_ad after kvm_mmu_unload is unnecessary
> since it's already being set only if everything else succeeds.
> kvm_mmu_unload() isn't affected by the setting of this flag if I understand
> correctly.
It looks at least cleaner to set everything up after the unload has
happened (and could avoid future bugs, if that unload would every rely
on that setting!). + we can reuse that function more easily (e.g. vm entry).
But that's just my personal opinion. Feel free to ignore.
>
>>> +
>>> + return true;
>>> +}
>>> +
>>> +static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
>>> + struct vmcs12 *vmcs12)
>>> +{
>>> + u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
>>> + u64 *l1_eptp_list, address;
>>> + struct page *page;
>>> +
>>> + if (!nested_cpu_has_eptp_switching(vmcs12) ||
>>> + !nested_cpu_has_ept(vmcs12))
>>> + return 1;
>>> +
>>> + if (index >= VMFUNC_EPTP_ENTRIES)
>>> + return 1;
>>> +
>>> + page = nested_get_page(vcpu, vmcs12->eptp_list_address);
>>> + if (!page)
>>> + return 1;
>>> +
>>> + l1_eptp_list = kmap(page);
>>> + address = l1_eptp_list[index];
>>> +
>>> + /*
>>> + * If the (L2) guest does a vmfunc to the currently
>>> + * active ept pointer, we don't have to do anything else
>>> + */
>>> + if (vmcs12->ept_pointer != address) {
>>> + if (!check_ept_address_valid(vcpu, address)) {
>>> + kunmap(page);
>>> + nested_release_page_clean(page);
>>> + return 1;
>>> + }
>>> + kvm_mmu_unload(vcpu);
>>> + vmcs12->ept_pointer = address;
>>> + /*
>>> + * TODO: Check what's the correct approach in case
>>> + * mmu reload fails. Currently, we just let the next
>>> + * reload potentially fail
>>> + */
>>> + kvm_mmu_reload(vcpu);
>>
>> So, what actually happens if this generates a tripple fault? I guess we
>> will kill the (nested) hypervisor?
>
> Yes. Not sure what's the right thing to do is though...
Wonder what happens on real hardware.
--
Thanks,
David
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v5 3/3] KVM: nVMX: Emulate EPTP switching for the L1 hypervisor
2017-08-01 11:40 ` David Hildenbrand
@ 2017-08-01 14:55 ` Radim Krčmář
0 siblings, 0 replies; 10+ messages in thread
From: Radim Krčmář @ 2017-08-01 14:55 UTC (permalink / raw)
To: David Hildenbrand; +Cc: Bandan Das, kvm, pbonzini, jmattson, linux-kernel
2017-08-01 13:40+0200, David Hildenbrand:
> On 31.07.2017 21:32, Bandan Das wrote:
> > David Hildenbrand <david@redhat.com> writes:
> >>> + /* AD, if set, should be supported */
> >>> + if ((address & VMX_EPT_AD_ENABLE_BIT)) {
> >>> + if (!enable_ept_ad_bits)
> >>> + return false;
> >>> + mmu->ept_ad = true;
> >>> + } else
> >>> + mmu->ept_ad = false;
This block should also set the mmu->base_role.ad_disabled.
(The idea being that things should be done as if the EPTP was set during
a VM entry. The only notable difference is that we do not reload
PDPTRS.)
> >> I wouldn't expect a "check" function to modify the mmu. Can you move
> >> modifying the mmu outside of this function (leaving the
> >> enable_ept_ad_bits check in place)? (and maybe even set mmu->ept_ad
> >> _after_ the kvm_mmu_unload(vcpu)?, just when setting vmcs12->ept_pointer?)
> >>
> >
> > Well, the correct thing to do is have a wrapper around it in mmu.c
> > without directly calling here and also call this function before
> > nested_mmu is initialized. I am working on a separate patch for this btw.
>
> Sounds good. Also thought that encapsulating this might be a good option.
Seconded. :)
> >>> + if (vmcs12->ept_pointer != address) {
> >>> + if (!check_ept_address_valid(vcpu, address)) {
> >>> + kunmap(page);
> >>> + nested_release_page_clean(page);
> >>> + return 1;
> >>> + }
> >>> + kvm_mmu_unload(vcpu);
> >>> + vmcs12->ept_pointer = address;
> >>> + /*
> >>> + * TODO: Check what's the correct approach in case
> >>> + * mmu reload fails. Currently, we just let the next
> >>> + * reload potentially fail
> >>> + */
> >>> + kvm_mmu_reload(vcpu);
> >>
> >> So, what actually happens if this generates a tripple fault? I guess we
> >> will kill the (nested) hypervisor?
> >
> > Yes. Not sure what's the right thing to do is though...
Right, we even drop kvm_mmu_reload() here for now and let the one in
vcpu_enter_guest() take care of the thing.
> Wonder what happens on real hardware.
This operation cannot fail or real hardware. All addresses within the
physical address limit return something when read. On Intel, this is a
value with all bits set (-1) and will cause an EPT misconfiguration VM
exit on the next page walk (instruction decoding).
Thanks.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v5 3/3] KVM: nVMX: Emulate EPTP switching for the L1 hypervisor
2017-07-28 19:52 ` [PATCH v5 3/3] KVM: nVMX: Emulate EPTP switching " Bandan Das
2017-07-31 11:59 ` David Hildenbrand
@ 2017-08-01 15:17 ` Radim Krčmář
2017-08-01 18:30 ` Bandan Das
1 sibling, 1 reply; 10+ messages in thread
From: Radim Krčmář @ 2017-08-01 15:17 UTC (permalink / raw)
To: Bandan Das; +Cc: kvm, pbonzini, david, jmattson, linux-kernel
2017-07-28 15:52-0400, Bandan Das:
> When L2 uses vmfunc, L0 utilizes the associated vmexit to
> emulate a switching of the ept pointer by reloading the
> guest MMU.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Bandan Das <bsd@redhat.com>
> ---
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> @@ -7767,6 +7781,85 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu)
> return 1;
> }
>
> +static bool check_ept_address_valid(struct kvm_vcpu *vcpu, u64 address)
> +{
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> + u64 mask = VMX_EPT_RWX_MASK;
> + int maxphyaddr = cpuid_maxphyaddr(vcpu);
> + struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
> +
> + /* Check for execute_only validity */
> + if ((address & mask) == VMX_EPT_EXECUTABLE_MASK) {
> + if (!(vmx->nested.nested_vmx_ept_caps &
> + VMX_EPT_EXECUTE_ONLY_BIT))
> + return false;
> + }
This checks looks wrong ... bits 0:2 define the memory type:
0 = Uncacheable (UC)
6 = Write-back (WB)
If those are supported MSR IA32_VMX_EPT_VPID_CAP, so I think it should
return false when
(address & 0x7) == 0 && !(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_UC_BIT))
the same for 6 and VMX_EPTP_WB_BIT and unconditionally for the remaining
types.
Btw. when is TLB flushed after EPTP switching?
> @@ -10354,10 +10456,20 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> vmx->nested.nested_vmx_entry_ctls_high))
> return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
>
> - if (nested_cpu_has_vmfunc(vmcs12) &&
> - (vmcs12->vm_function_control &
> - ~vmx->nested.nested_vmx_vmfunc_controls))
> - return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
> + if (nested_cpu_has_vmfunc(vmcs12)) {
> + if (vmcs12->vm_function_control &
> + ~vmx->nested.nested_vmx_vmfunc_controls)
> + return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
> +
> + if (nested_cpu_has_eptp_switching(vmcs12)) {
> + if (!nested_cpu_has_ept(vmcs12) ||
> + (vmcs12->eptp_list_address >>
> + cpuid_maxphyaddr(vcpu)) ||
> + !IS_ALIGNED(vmcs12->eptp_list_address, 4096))
page_address_valid() would make this check a bit nicer,
thanks.
> + return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v5 3/3] KVM: nVMX: Emulate EPTP switching for the L1 hypervisor
2017-08-01 15:17 ` Radim Krčmář
@ 2017-08-01 18:30 ` Bandan Das
0 siblings, 0 replies; 10+ messages in thread
From: Bandan Das @ 2017-08-01 18:30 UTC (permalink / raw)
To: Radim Krčmář; +Cc: kvm, pbonzini, david, jmattson, linux-kernel
Radim Krčmář <rkrcmar@redhat.com> writes:
> 2017-07-28 15:52-0400, Bandan Das:
>> When L2 uses vmfunc, L0 utilizes the associated vmexit to
>> emulate a switching of the ept pointer by reloading the
>> guest MMU.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> Signed-off-by: Bandan Das <bsd@redhat.com>
>> ---
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> @@ -7767,6 +7781,85 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu)
>> return 1;
>> }
>>
>> +static bool check_ept_address_valid(struct kvm_vcpu *vcpu, u64 address)
>> +{
>> + struct vcpu_vmx *vmx = to_vmx(vcpu);
>> + u64 mask = VMX_EPT_RWX_MASK;
>> + int maxphyaddr = cpuid_maxphyaddr(vcpu);
>> + struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
>> +
>> + /* Check for execute_only validity */
>> + if ((address & mask) == VMX_EPT_EXECUTABLE_MASK) {
>> + if (!(vmx->nested.nested_vmx_ept_caps &
>> + VMX_EPT_EXECUTE_ONLY_BIT))
>> + return false;
>> + }
>
> This checks looks wrong ... bits 0:2 define the memory type:
>
> 0 = Uncacheable (UC)
> 6 = Write-back (WB)
Oops, sorry, I badly messed this up! I will incorporate these
changes and the suggestions by David to a new version.
> If those are supported MSR IA32_VMX_EPT_VPID_CAP, so I think it should
> return false when
>
> (address & 0x7) == 0 && !(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_UC_BIT))
>
> the same for 6 and VMX_EPTP_WB_BIT and unconditionally for the remaining
> types.
>
> Btw. when is TLB flushed after EPTP switching?
>From what I understand, mmu_sync_roots() calls kvm_mmu_flush_or_zap()
that sets KVM_REQ_TLB_FLUSH.
Bandan
>> @@ -10354,10 +10456,20 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>> vmx->nested.nested_vmx_entry_ctls_high))
>> return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
>>
>> - if (nested_cpu_has_vmfunc(vmcs12) &&
>> - (vmcs12->vm_function_control &
>> - ~vmx->nested.nested_vmx_vmfunc_controls))
>> - return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
>> + if (nested_cpu_has_vmfunc(vmcs12)) {
>> + if (vmcs12->vm_function_control &
>> + ~vmx->nested.nested_vmx_vmfunc_controls)
>> + return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
>> +
>> + if (nested_cpu_has_eptp_switching(vmcs12)) {
>> + if (!nested_cpu_has_ept(vmcs12) ||
>> + (vmcs12->eptp_list_address >>
>> + cpuid_maxphyaddr(vcpu)) ||
>> + !IS_ALIGNED(vmcs12->eptp_list_address, 4096))
>
> page_address_valid() would make this check a bit nicer,
>
> thanks.
>
>> + return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
^ permalink raw reply [flat|nested] 10+ messages in thread