All of lore.kernel.org
 help / color / mirror / Atom feed
From: Bandan Das <bsd@redhat.com>
To: kvm@vger.kernel.org
Cc: pbonzini@redhat.com, linux-kernel@vger.kernel.org
Subject: [PATCH 1/2] KVM: nVMX: Implement EPTP switching for the L1 hypervisor
Date: Thu, 29 Jun 2017 19:29:55 -0400	[thread overview]
Message-ID: <20170629232956.12300-2-bsd@redhat.com> (raw)
In-Reply-To: <20170629232956.12300-1-bsd@redhat.com>

This is a mix of emulation/passthrough to implement EPTP
switching for the nested hypervisor.

If the shadow EPT are absent, a vmexit occurs with reason 59.
L0 can then create shadow structures based on the entry that the
guest calls with to obtain a new root_hpa that can be written to
the shadow list and subsequently, reload the mmu to resume L2.
On the next vmfunc(0, index) however, the processor will load the
entry without an exit.

Signed-off-by: Bandan Das <bsd@redhat.com>
---
 arch/x86/include/asm/vmx.h |   5 +++
 arch/x86/kvm/vmx.c         | 104 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 109 insertions(+)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 35cd06f..e06783e 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -72,6 +72,7 @@
 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
 #define SECONDARY_EXEC_RDRAND			0x00000800
 #define SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
+#define SECONDARY_EXEC_ENABLE_VMFUNC            0x00002000
 #define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
 #define SECONDARY_EXEC_RDSEED			0x00010000
 #define SECONDARY_EXEC_ENABLE_PML               0x00020000
@@ -114,6 +115,10 @@
 #define VMX_MISC_SAVE_EFER_LMA			0x00000020
 #define VMX_MISC_ACTIVITY_HLT			0x00000040
 
+/* VMFUNC functions */
+#define VMX_VMFUNC_EPTP_SWITCHING               0x00000001
+#define VMFUNC_EPTP_ENTRIES  512
+
 static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
 {
 	return vmx_basic & GENMASK_ULL(30, 0);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ca5d2b9..75049c0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -240,11 +240,13 @@ struct __packed vmcs12 {
 	u64 virtual_apic_page_addr;
 	u64 apic_access_addr;
 	u64 posted_intr_desc_addr;
+	u64 vm_function_control;
 	u64 ept_pointer;
 	u64 eoi_exit_bitmap0;
 	u64 eoi_exit_bitmap1;
 	u64 eoi_exit_bitmap2;
 	u64 eoi_exit_bitmap3;
+	u64 eptp_list_address;
 	u64 xss_exit_bitmap;
 	u64 guest_physical_address;
 	u64 vmcs_link_pointer;
@@ -441,6 +443,7 @@ struct nested_vmx {
 	struct page *apic_access_page;
 	struct page *virtual_apic_page;
 	struct page *pi_desc_page;
+	struct page *shadow_eptp_list;
 	struct pi_desc *pi_desc;
 	bool pi_pending;
 	u16 posted_intr_nv;
@@ -481,6 +484,7 @@ struct nested_vmx {
 	u64 nested_vmx_cr4_fixed0;
 	u64 nested_vmx_cr4_fixed1;
 	u64 nested_vmx_vmcs_enum;
+	u64 nested_vmx_vmfunc_controls;
 };
 
 #define POSTED_INTR_ON  0
@@ -1314,6 +1318,22 @@ static inline bool cpu_has_vmx_tsc_scaling(void)
 		SECONDARY_EXEC_TSC_SCALING;
 }
 
+static inline bool cpu_has_vmx_vmfunc(void)
+{
+	return vmcs_config.cpu_based_exec_ctrl &
+		SECONDARY_EXEC_ENABLE_VMFUNC;
+}
+
+static inline u64 vmx_vmfunc_controls(void)
+{
+	u64 controls = 0;
+
+	if (cpu_has_vmx_vmfunc())
+		rdmsrl(MSR_IA32_VMX_VMFUNC, controls);
+
+	return controls;
+}
+
 static inline bool report_flexpriority(void)
 {
 	return flexpriority_enabled;
@@ -1388,6 +1408,18 @@ static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
 	return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
 }
 
+static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12)
+{
+	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC);
+}
+
+static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
+{
+	return nested_cpu_has_vmfunc(vmcs12) &&
+		(vmcs12->vm_function_control &
+		 VMX_VMFUNC_EPTP_SWITCHING);
+}
+
 static inline bool is_nmi(u32 intr_info)
 {
 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -3143,6 +3175,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 		*pdata = vmx->nested.nested_vmx_ept_caps |
 			((u64)vmx->nested.nested_vmx_vpid_caps << 32);
 		break;
+	case MSR_IA32_VMX_VMFUNC:
+		*pdata = vmx->nested.nested_vmx_vmfunc_controls;
+		break;
 	default:
 		return 1;
 	}
@@ -6959,6 +6994,14 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
 		vmx->vmcs01.shadow_vmcs = shadow_vmcs;
 	}
 
+	if (vmx_vmfunc_controls() & 1) {
+		struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+
+		if (!page)
+			goto out_shadow_vmcs;
+		vmx->nested.shadow_eptp_list = page;
+	}
+
 	INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
 	vmx->nested.vmcs02_num = 0;
 
@@ -7128,6 +7171,11 @@ static void free_nested(struct vcpu_vmx *vmx)
 		vmx->vmcs01.shadow_vmcs = NULL;
 	}
 	kfree(vmx->nested.cached_vmcs12);
+
+	if (vmx->nested.shadow_eptp_list) {
+		__free_page(vmx->nested.shadow_eptp_list);
+		vmx->nested.shadow_eptp_list = NULL;
+	}
 	/* Unpin physical memory we referred to in current vmcs02 */
 	if (vmx->nested.apic_access_page) {
 		nested_release_page(vmx->nested.apic_access_page);
@@ -7740,6 +7788,61 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static int handle_vmfunc(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct vmcs12 *vmcs12;
+	struct page *page = NULL,
+		*shadow_page = vmx->nested.shadow_eptp_list;
+	u64 *l1_eptp_list, *shadow_eptp_list;
+	u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
+	u32 function = vcpu->arch.regs[VCPU_REGS_RAX];
+
+	if (is_guest_mode(vcpu)) {
+		vmcs12 = get_vmcs12(vcpu);
+		if (!nested_cpu_has_ept(vmcs12) ||
+		    !nested_cpu_has_eptp_switching(vmcs12))
+			goto fail;
+
+		/*
+		 * Only function 0 is valid, everything upto 63 injects VMFUNC
+		 * exit reason to L1, and #UD thereafter
+		 */
+		if (function || !vmcs12->eptp_list_address ||
+		    index >= VMFUNC_EPTP_ENTRIES)
+			goto fail;
+
+		page = nested_get_page(vcpu, vmcs12->eptp_list_address);
+		if (!page)
+			goto fail;
+
+		l1_eptp_list = kmap(page);
+		if (!l1_eptp_list[index])
+			goto fail;
+
+		kvm_mmu_unload(vcpu);
+		/*
+		 * TODO: Verify that guest ept satisfies vmentry prereqs
+		 */
+		vmcs12->ept_pointer = l1_eptp_list[index];
+		shadow_eptp_list = phys_to_virt(page_to_phys(shadow_page));
+		kvm_mmu_reload(vcpu);
+		shadow_eptp_list[index] =
+			construct_eptp(vcpu->arch.mmu.root_hpa);
+		kunmap(page);
+
+		return kvm_skip_emulated_instruction(vcpu);
+	}
+
+fail:
+	if (page)
+		kunmap(page);
+	nested_vmx_vmexit(vcpu, vmx->exit_reason,
+			  vmcs_read32(VM_EXIT_INTR_INFO),
+			  vmcs_readl(EXIT_QUALIFICATION));
+	return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -7790,6 +7893,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_XSAVES]                  = handle_xsaves,
 	[EXIT_REASON_XRSTORS]                 = handle_xrstors,
 	[EXIT_REASON_PML_FULL]		      = handle_pml_full,
+	[EXIT_REASON_VMFUNC]                  = handle_vmfunc,
 	[EXIT_REASON_PREEMPTION_TIMER]	      = handle_preemption_timer,
 };
 
-- 
2.9.4

  reply	other threads:[~2017-06-29 23:30 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-06-29 23:29 [PATCH 0/2] Expose VMFUNC to the nested hypervisor Bandan Das
2017-06-29 23:29 ` Bandan Das [this message]
2017-06-30  7:29   ` [PATCH 1/2] KVM: nVMX: Implement EPTP switching for the L1 hypervisor Paolo Bonzini
2017-06-30 17:20     ` Bandan Das
2017-07-01  5:34       ` Paolo Bonzini
2017-06-29 23:29 ` [PATCH 2/2] KVM: nVMX: Advertise VMFUNC to " Bandan Das
2017-06-30 17:06 ` [PATCH 0/2] Expose VMFUNC to the nested hypervisor Jim Mattson
2017-06-30 17:58   ` Bandan Das
2017-06-30 19:06   ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170629232956.12300-2-bsd@redhat.com \
    --to=bsd@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.