All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nitin A Kamble <nitin.a.kamble@intel.com>
To: avi@redhat.com
Cc: kvm@vger.kernel.org
Subject: [patch] VMX Unrestricted mode support
Date: Thu, 28 May 2009 16:11:32 -0700	[thread overview]
Message-ID: <1243552292.25456.23.camel@mukti.sc.intel.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 7559 bytes --]

Avi,

A new VMX feature "Unrestricted Guest" feature is added in the VMX
specification. You can look at the latest Intel processor manual for
details of the feature here:

 http://www.intel.com/products/processor/manuals

    It allows kvm guests to run real mode and unpaged mode
code natively in the VMX mode when EPT is turned on. With the
unrestricted guest there is no need to emulate the guest real mode code
in the vm86 container or in the emulator. Also the guest big real mode
code works like native. 

  The attached patch enhances KVM to use the unrestricted guest feature
if available on the processor. It also adds a new kernel/module
parameter to disable the unrestricted guest feature at the boot time. 

Signed-Off-By: Nitin A Kamble <nitin.a.kamble@intel.com>

Thanks & Regards,
Nitin


diff --git a/arch/x86/include/asm/kvm_host.h
b/arch/x86/include/asm/kvm_host.h
index d2b082d..7832599 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -40,9 +40,13 @@
 #define KVM_GUEST_CR0_MASK				   \
 	(X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE \
 	 | X86_CR0_NW | X86_CR0_CD)
+#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST				\
+	(X86_CR0_WP | X86_CR0_NE | X86_CR0_TS | X86_CR0_MP)
+#define KVM_VM_CR0_ALWAYS_ON_RESTRICTED_GUEST				\
+	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
 #define KVM_VM_CR0_ALWAYS_ON						\
-	(X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE | X86_CR0_TS \
-	 | X86_CR0_MP)
+	(enable_unrestricted_guest ? KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \
+	: KVM_VM_CR0_ALWAYS_ON_RESTRICTED_GUEST)
 #define KVM_GUEST_CR4_MASK						\
 	(X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE)
 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 498f944..c73da02 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -55,6 +55,7 @@
 #define SECONDARY_EXEC_ENABLE_EPT               0x00000002
 #define SECONDARY_EXEC_ENABLE_VPID              0x00000020
 #define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
+#define SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
 
 
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 25f1239..703d2c4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -50,6 +50,10 @@ module_param_named(flexpriority,
flexpriority_enabled, bool, S_IRUGO);
 static int __read_mostly enable_ept = 1;
 module_param_named(ept, enable_ept, bool, S_IRUGO);
 
+static int __read_mostly enable_unrestricted_guest = 1;
+module_param_named(unrestricted_guest,
+			enable_unrestricted_guest, bool, S_IRUGO);
+
 static int __read_mostly emulate_invalid_guest_state = 0;
 module_param(emulate_invalid_guest_state, bool, S_IRUGO);
 
@@ -268,6 +272,12 @@ static inline int cpu_has_vmx_ept(void)
 		SECONDARY_EXEC_ENABLE_EPT;
 }
 
+static inline int cpu_has_vmx_unrestricted_guest(void)
+{
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_UNRESTRICTED_GUEST;
+}
+
 static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
 {
 	return flexpriority_enabled &&
@@ -731,7 +741,7 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu
*vcpu)
 
 static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 {
-	if (vcpu->arch.rmode.active)
+	if (vcpu->arch.rmode.active && !enable_unrestricted_guest)
 		rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
 	vmcs_writel(GUEST_RFLAGS, rflags);
 }
@@ -1195,7 +1205,8 @@ static __init int setup_vmcs_config(struct
vmcs_config *vmcs_conf)
 		opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
 			SECONDARY_EXEC_WBINVD_EXITING |
 			SECONDARY_EXEC_ENABLE_VPID |
-			SECONDARY_EXEC_ENABLE_EPT;
+			SECONDARY_EXEC_ENABLE_EPT |
+			SECONDARY_EXEC_UNRESTRICTED_GUEST;
 		if (adjust_vmx_controls(min2, opt2,
 					MSR_IA32_VMX_PROCBASED_CTLS2,
 					&_cpu_based_2nd_exec_control) < 0)
@@ -1325,8 +1336,13 @@ static __init int hardware_setup(void)
 	if (!cpu_has_vmx_vpid())
 		enable_vpid = 0;
 
-	if (!cpu_has_vmx_ept())
+	if (!cpu_has_vmx_ept()) {
 		enable_ept = 0;
+		enable_unrestricted_guest = 0;
+	}
+
+	if (!cpu_has_vmx_unrestricted_guest())
+		enable_unrestricted_guest = 0;
 
 	if (!cpu_has_vmx_flexpriority())
 		flexpriority_enabled = 0;
@@ -1363,9 +1379,17 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 	unsigned long flags;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	vmx->emulation_required = 1;
 	vcpu->arch.rmode.active = 0;
 
+	update_exception_bitmap(vcpu);
+
+	if (enable_unrestricted_guest) {
+		vmx->emulation_required = 0;
+		return;
+	}
+
+	vmx->emulation_required = 1;
+
 	vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base);
 	vmcs_write32(GUEST_TR_LIMIT, vcpu->arch.rmode.tr.limit);
 	vmcs_write32(GUEST_TR_AR_BYTES, vcpu->arch.rmode.tr.ar);
@@ -1378,8 +1402,6 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 	vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
 			(vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME));
 
-	update_exception_bitmap(vcpu);
-
 	if (emulate_invalid_guest_state)
 		return;
 
@@ -1425,8 +1447,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 	unsigned long flags;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	vmx->emulation_required = 1;
 	vcpu->arch.rmode.active = 1;
+	update_exception_bitmap(vcpu);
+
+	if (enable_unrestricted_guest) {
+		vmx->emulation_required = 0;
+		goto continue_rmode;
+	}
+
+	vmx->emulation_required = 1;
 
 	vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
 	vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
@@ -1445,7 +1474,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 
 	vmcs_writel(GUEST_RFLAGS, flags);
 	vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
-	update_exception_bitmap(vcpu);
 
 	if (emulate_invalid_guest_state)
 		goto continue_rmode;
@@ -1563,7 +1591,6 @@ static void ept_update_paging_mode_cr0(unsigned
long *hw_cr0,
 			      CPU_BASED_CR3_STORE_EXITING));
 		vcpu->arch.cr0 = cr0;
 		vmx_set_cr4(vcpu, vcpu->arch.cr4);
-		*hw_cr0 |= X86_CR0_PE | X86_CR0_PG;
 		*hw_cr0 &= ~X86_CR0_WP;
 	} else if (!is_paging(vcpu)) {
 		/* From nonpaging to paging */
@@ -1733,6 +1760,23 @@ static u32 vmx_segment_access_rights(struct
kvm_segment *var)
 	return ar;
 }
 
+static inline u32 get_segment_ar(int seg)
+{
+	if (!enable_unrestricted_guest)
+		return 0xf3;
+
+	switch (seg) {
+	case VCPU_SREG_CS:
+		return 0x9b;
+	case VCPU_SREG_TR:
+		return 0x8b;
+	case VCPU_SREG_LDTR:
+		return 0x82;
+	default:
+		return 0x93;
+	}
+}
+
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg)
 {
@@ -1755,7 +1799,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 		 */
 		if (var->base == 0xffff0000 && var->selector == 0xf000)
 			vmcs_writel(sf->base, 0xf0000);
-		ar = 0xf3;
+		ar = get_segment_ar(seg);
 	} else
 		ar = vmx_segment_access_rights(var);
 	vmcs_write32(sf->ar_bytes, ar);
@@ -2058,7 +2102,7 @@ static void seg_setup(int seg)
 	vmcs_write16(sf->selector, 0);
 	vmcs_writel(sf->base, 0);
 	vmcs_write32(sf->limit, 0xffff);
-	vmcs_write32(sf->ar_bytes, 0xf3);
+	vmcs_write32(sf->ar_bytes, get_segment_ar(seg));
 }
 
 static int alloc_apic_access_page(struct kvm *kvm)
@@ -2201,6 +2245,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 			exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
 		if (!enable_ept)
 			exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
+		if (!enable_unrestricted_guest)
+			exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
 	}
 

[-- Attachment #2: ept_realmode_patch_20090528.diff --]
[-- Type: text/x-patch, Size: 6755 bytes --]

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d2b082d..7832599 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -40,9 +40,13 @@
 #define KVM_GUEST_CR0_MASK				   \
 	(X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE \
 	 | X86_CR0_NW | X86_CR0_CD)
+#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST				\
+	(X86_CR0_WP | X86_CR0_NE | X86_CR0_TS | X86_CR0_MP)
+#define KVM_VM_CR0_ALWAYS_ON_RESTRICTED_GUEST				\
+	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
 #define KVM_VM_CR0_ALWAYS_ON						\
-	(X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE | X86_CR0_TS \
-	 | X86_CR0_MP)
+	(enable_unrestricted_guest ? KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \
+	: KVM_VM_CR0_ALWAYS_ON_RESTRICTED_GUEST)
 #define KVM_GUEST_CR4_MASK						\
 	(X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE)
 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 498f944..c73da02 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -55,6 +55,7 @@
 #define SECONDARY_EXEC_ENABLE_EPT               0x00000002
 #define SECONDARY_EXEC_ENABLE_VPID              0x00000020
 #define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
+#define SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
 
 
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 25f1239..703d2c4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -50,6 +50,10 @@ module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
 static int __read_mostly enable_ept = 1;
 module_param_named(ept, enable_ept, bool, S_IRUGO);
 
+static int __read_mostly enable_unrestricted_guest = 1;
+module_param_named(unrestricted_guest,
+			enable_unrestricted_guest, bool, S_IRUGO);
+
 static int __read_mostly emulate_invalid_guest_state = 0;
 module_param(emulate_invalid_guest_state, bool, S_IRUGO);
 
@@ -268,6 +272,12 @@ static inline int cpu_has_vmx_ept(void)
 		SECONDARY_EXEC_ENABLE_EPT;
 }
 
+static inline int cpu_has_vmx_unrestricted_guest(void)
+{
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_UNRESTRICTED_GUEST;
+}
+
 static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
 {
 	return flexpriority_enabled &&
@@ -731,7 +741,7 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
 
 static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 {
-	if (vcpu->arch.rmode.active)
+	if (vcpu->arch.rmode.active && !enable_unrestricted_guest)
 		rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
 	vmcs_writel(GUEST_RFLAGS, rflags);
 }
@@ -1195,7 +1205,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 		opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
 			SECONDARY_EXEC_WBINVD_EXITING |
 			SECONDARY_EXEC_ENABLE_VPID |
-			SECONDARY_EXEC_ENABLE_EPT;
+			SECONDARY_EXEC_ENABLE_EPT |
+			SECONDARY_EXEC_UNRESTRICTED_GUEST;
 		if (adjust_vmx_controls(min2, opt2,
 					MSR_IA32_VMX_PROCBASED_CTLS2,
 					&_cpu_based_2nd_exec_control) < 0)
@@ -1325,8 +1336,13 @@ static __init int hardware_setup(void)
 	if (!cpu_has_vmx_vpid())
 		enable_vpid = 0;
 
-	if (!cpu_has_vmx_ept())
+	if (!cpu_has_vmx_ept()) {
 		enable_ept = 0;
+		enable_unrestricted_guest = 0;
+	}
+
+	if (!cpu_has_vmx_unrestricted_guest())
+		enable_unrestricted_guest = 0;
 
 	if (!cpu_has_vmx_flexpriority())
 		flexpriority_enabled = 0;
@@ -1363,9 +1379,17 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 	unsigned long flags;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	vmx->emulation_required = 1;
 	vcpu->arch.rmode.active = 0;
 
+	update_exception_bitmap(vcpu);
+
+	if (enable_unrestricted_guest) {
+		vmx->emulation_required = 0;
+		return;
+	}
+
+	vmx->emulation_required = 1;
+
 	vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base);
 	vmcs_write32(GUEST_TR_LIMIT, vcpu->arch.rmode.tr.limit);
 	vmcs_write32(GUEST_TR_AR_BYTES, vcpu->arch.rmode.tr.ar);
@@ -1378,8 +1402,6 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 	vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
 			(vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME));
 
-	update_exception_bitmap(vcpu);
-
 	if (emulate_invalid_guest_state)
 		return;
 
@@ -1425,8 +1447,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 	unsigned long flags;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	vmx->emulation_required = 1;
 	vcpu->arch.rmode.active = 1;
+	update_exception_bitmap(vcpu);
+
+	if (enable_unrestricted_guest) {
+		vmx->emulation_required = 0;
+		goto continue_rmode;
+	}
+
+	vmx->emulation_required = 1;
 
 	vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
 	vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
@@ -1445,7 +1474,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 
 	vmcs_writel(GUEST_RFLAGS, flags);
 	vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
-	update_exception_bitmap(vcpu);
 
 	if (emulate_invalid_guest_state)
 		goto continue_rmode;
@@ -1563,7 +1591,6 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
 			      CPU_BASED_CR3_STORE_EXITING));
 		vcpu->arch.cr0 = cr0;
 		vmx_set_cr4(vcpu, vcpu->arch.cr4);
-		*hw_cr0 |= X86_CR0_PE | X86_CR0_PG;
 		*hw_cr0 &= ~X86_CR0_WP;
 	} else if (!is_paging(vcpu)) {
 		/* From nonpaging to paging */
@@ -1733,6 +1760,23 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var)
 	return ar;
 }
 
+static inline u32 get_segment_ar(int seg)
+{
+	if (!enable_unrestricted_guest)
+		return 0xf3;
+
+	switch (seg) {
+	case VCPU_SREG_CS:
+		return 0x9b;
+	case VCPU_SREG_TR:
+		return 0x8b;
+	case VCPU_SREG_LDTR:
+		return 0x82;
+	default:
+		return 0x93;
+	}
+}
+
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg)
 {
@@ -1755,7 +1799,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 		 */
 		if (var->base == 0xffff0000 && var->selector == 0xf000)
 			vmcs_writel(sf->base, 0xf0000);
-		ar = 0xf3;
+		ar = get_segment_ar(seg);
 	} else
 		ar = vmx_segment_access_rights(var);
 	vmcs_write32(sf->ar_bytes, ar);
@@ -2058,7 +2102,7 @@ static void seg_setup(int seg)
 	vmcs_write16(sf->selector, 0);
 	vmcs_writel(sf->base, 0);
 	vmcs_write32(sf->limit, 0xffff);
-	vmcs_write32(sf->ar_bytes, 0xf3);
+	vmcs_write32(sf->ar_bytes, get_segment_ar(seg));
 }
 
 static int alloc_apic_access_page(struct kvm *kvm)
@@ -2201,6 +2245,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 			exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
 		if (!enable_ept)
 			exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
+		if (!enable_unrestricted_guest)
+			exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
 	}
 

             reply	other threads:[~2009-05-28 23:11 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-05-28 23:11 Nitin A Kamble [this message]
2009-05-28 23:39 ` [patch] VMX Unrestricted mode support Alexey Eremenko
2009-05-29  4:04   ` [patch] VMX Unrestricted guest " Nitin A Kamble
2009-05-31  8:39 ` [patch] VMX Unrestricted " Avi Kivity
2009-06-01 18:06   ` Nitin A Kamble
2009-06-01 18:35     ` Nitin A Kamble
2009-06-01 18:38       ` Avi Kivity
2009-06-03 18:08         ` Nitin A Kamble
2009-06-03 18:18           ` Nitin A Kamble
2009-06-03 18:20           ` Avi Kivity
2009-06-04 18:13           ` Jan Kiszka
2009-05-31 15:23 ` Anthony Liguori
2009-05-31 16:01   ` Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1243552292.25456.23.camel@mukti.sc.intel.com \
    --to=nitin.a.kamble@intel.com \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.