All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] kvm-unit-tests : The first version of VMX nested test case
@ 2013-07-16  9:27 Arthur Chunqi Li
  2013-07-16  9:35 ` Arthur Chunqi Li
  2013-07-16 10:28 ` Paolo Bonzini
  0 siblings, 2 replies; 14+ messages in thread
From: Arthur Chunqi Li @ 2013-07-16  9:27 UTC (permalink / raw)
  To: kvm; +Cc: jan.kiszka, gleb, pbonzini, Arthur Chunqi Li

This is the first version for VMX nested environment test case. It
contains the basic VMX instructions test cases, including VMXON/
VMXOFF/VMXPTRLD/VMXPTRST/VMCLEAR/VMLAUNCH/VMRESUME/VMCALL. This patch
also tests the basic execution routine in VMX nested environment and
let the VM print "Hello World" to inform its successfully run.

New files added:
x86/vmx.h : contains all VMX related macro declerations
x86/vmx.c : main file for VMX nested test case

Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
---
 config-x86-common.mak |    2 +
 config-x86_64.mak     |    1 +
 lib/x86/msr.h         |    5 +
 x86/cstart64.S        |    4 +
 x86/unittests.cfg     |    6 +
 x86/vmx.c             |  568 +++++++++++++++++++++++++++++++++++++++++++++++++
 x86/vmx.h             |  406 +++++++++++++++++++++++++++++++++++
 7 files changed, 992 insertions(+)
 create mode 100644 x86/vmx.c
 create mode 100644 x86/vmx.h

diff --git a/config-x86-common.mak b/config-x86-common.mak
index 455032b..34a41e1 100644
--- a/config-x86-common.mak
+++ b/config-x86-common.mak
@@ -101,6 +101,8 @@ $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o
 
 $(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o
 
+$(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o
+
 arch_clean:
 	$(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \
 	$(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o
diff --git a/config-x86_64.mak b/config-x86_64.mak
index 4e525f5..bb8ee89 100644
--- a/config-x86_64.mak
+++ b/config-x86_64.mak
@@ -9,5 +9,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
 	  $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \
 	  $(TEST_DIR)/pcid.flat
 tests += $(TEST_DIR)/svm.flat
+tests += $(TEST_DIR)/vmx.flat
 
 include config-x86-common.mak
diff --git a/lib/x86/msr.h b/lib/x86/msr.h
index 509a421..281255a 100644
--- a/lib/x86/msr.h
+++ b/lib/x86/msr.h
@@ -396,6 +396,11 @@
 #define MSR_IA32_VMX_VMCS_ENUM          0x0000048a
 #define MSR_IA32_VMX_PROCBASED_CTLS2    0x0000048b
 #define MSR_IA32_VMX_EPT_VPID_CAP       0x0000048c
+#define MSR_IA32_VMX_TRUE_PIN		0x0000048d
+#define MSR_IA32_VMX_TRUE_PROC		0x0000048e
+#define MSR_IA32_VMX_TRUE_EXIT		0x0000048f
+#define MSR_IA32_VMX_TRUE_ENTRY		0x00000490
+
 
 /* AMD-V MSRs */
 
diff --git a/x86/cstart64.S b/x86/cstart64.S
index 24df5f8..0fe76da 100644
--- a/x86/cstart64.S
+++ b/x86/cstart64.S
@@ -4,6 +4,10 @@
 .globl boot_idt
 boot_idt = 0
 
+.globl idt_descr
+.globl tss_descr
+.globl gdt64_desc
+
 ipi_vector = 0x20
 
 max_cpus = 64
diff --git a/x86/unittests.cfg b/x86/unittests.cfg
index bc9643e..e846739 100644
--- a/x86/unittests.cfg
+++ b/x86/unittests.cfg
@@ -149,3 +149,9 @@ extra_params = --append "10000000 `date +%s`"
 file = pcid.flat
 extra_params = -cpu qemu64,+pcid
 arch = x86_64
+
+[vmx]
+file = vmx.flat
+extra_params = -cpu Nehalem,+vmx
+arch = x86_64
+
diff --git a/x86/vmx.c b/x86/vmx.c
new file mode 100644
index 0000000..0435746
--- /dev/null
+++ b/x86/vmx.c
@@ -0,0 +1,568 @@
+#include "libcflat.h"
+#include "processor.h"
+#include "vm.h"
+#include "desc.h"
+#include "vmx.h"
+#include "msr.h"
+#include "smp.h"
+#include "io.h"
+
+
+int fails = 0, tests = 0;
+u32 *vmxon_region;
+struct vmcs *vmcs_root;
+void *io_bmp1, *io_bmp2;
+void *msr_bmp;
+u32 vpid_ctr;
+char *guest_stack, *host_stack;
+char *guest_syscall_stack, *host_syscall_stack;
+u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2];
+ulong fix_cr0_set, fix_cr0_clr;
+ulong fix_cr4_set, fix_cr4_clr;
+struct regs regs;
+
+extern u64 gdt64_desc[];
+extern u64 idt_descr[];
+extern u64 tss_descr[];
+extern void *entry_vmx;
+extern void *entry_sysenter;
+extern void *entry_guest;
+
+void report(const char *name, int result)
+{
+	++tests;
+	if (result)
+		printf("PASS: %s\n", name);
+	else {
+		printf("FAIL: %s\n", name);
+		++fails;
+	}
+}
+
+inline u64 get_rflags(void)
+{
+	u64 r;
+	asm volatile("pushf; pop %0\n\t" : "=q"(r) : : "cc");
+	return r;
+}
+
+inline void set_rflags(u64 r)
+{
+	asm volatile("push %0; popf\n\t" : : "q"(r) : "cc");
+}
+
+int vmcs_clear(struct vmcs *vmcs)
+{
+	bool ret;
+	asm volatile ("vmclear %1; seta %0" : "=q" (ret) : "m" (vmcs) : "cc");
+	return !ret;
+}
+
+u64 vmcs_read(enum Encoding enc)
+{
+	u64 val;
+	asm volatile ("vmread %1, %0" : "=rm" (val) : "r" ((u64)enc) : "cc");
+	return val;
+}
+
+int vmcs_write(enum Encoding enc, u64 val)
+{
+	bool ret;
+	asm volatile ("vmwrite %1, %2; seta %0"
+		: "=q"(ret) : "rm" (val), "r" ((u64)enc) : "cc");
+	return !ret;
+}
+
+int make_vmcs_current(struct vmcs *vmcs)
+{
+	bool ret;
+
+	asm volatile ("vmptrld %1; seta %0" : "=q" (ret) : "m" (vmcs) : "cc");
+	return !ret;
+}
+
+int save_vmcs(struct vmcs **vmcs)
+{
+	bool ret;
+
+	asm volatile ("vmptrst %1; seta %0" : "=q" (ret) : "m" (*vmcs) : "cc");
+	return !ret;
+}
+
+/* entry_vmx */
+asm(
+	".align	4, 0x90\n\t"
+	".globl	entry_vmx\n\t"
+	"entry_vmx:\n\t"
+	SAVE_GPR
+	"	call	vmx_handler\n\t"
+	LOAD_GPR
+	"	vmresume\n\t"
+);
+
+/* entry_sysenter */
+asm(
+	".align	4, 0x90\n\t"
+	".globl	entry_sysenter\n\t"
+	"entry_sysenter:\n\t"
+	SAVE_GPR
+	"	and	$0xf, %rax\n\t"
+	"	push	%rax\n\t"
+	"	call	syscall_handler\n\t"
+);
+
+void syscall_handler(u64 syscall_no)
+{
+	printf("Here in syscall_handler, syscall_no = %d\n", syscall_no);
+}
+
+void vmx_run()
+{
+	bool ret;
+	printf("Now run vm.\n\n");
+	asm volatile("vmlaunch;seta %0\n\t" : "=m"(ret));
+	printf("VMLAUNCH error, ret=%d\n", ret);
+}
+
+void vmx_resume()
+{
+	asm volatile(LOAD_GPR
+		"vmresume\n\t");
+	/* VMRESUME fail if reach here */
+}
+
+void print_vmexit_info()
+{
+	u64 guest_rip, guest_rsp;
+	ulong reason = vmcs_read(EXI_REASON) & 0xff;
+	ulong exit_qual = vmcs_read(EXI_QUALIFICATION);
+	guest_rip = vmcs_read(GUEST_RIP);
+	guest_rsp = vmcs_read(GUEST_RSP);
+	printf("VMEXIT info:\n");
+	printf("\tvmexit reason = %d\n", reason);
+	printf("\texit qualification = 0x%x\n", exit_qual);
+	printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1);
+	printf("\tguest_rip = 0x%llx\n", guest_rip);
+	printf("\tRAX=0x%llx    RBX=0x%llx    RCX=0x%llx    RDX=0x%llx\n",
+		regs.rax, regs.rbx, regs.rcx, regs.rdx);
+	printf("\tRSP=0x%llx    RBP=0x%llx    RSI=0x%llx    RDI=0x%llx\n",
+		guest_rsp, regs.rbp, regs.rsi, regs.rdi);
+	printf("\tR8 =0x%llx    R9 =0x%llx    R10=0x%llx    R11=0x%llx\n",
+		regs.r8, regs.r9, regs.r10, regs.r11);
+	printf("\tR12=0x%llx    R13=0x%llx    R14=0x%llx    R15=0x%llx\n",
+		regs.r12, regs.r13, regs.r14, regs.r15);
+}
+
+void test_vmclear(void)
+{
+	u64 rflags;
+
+	rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
+	set_rflags(rflags);
+	report("test vmclear", vmcs_clear(vmcs_root) == 0);
+}
+
+void test_vmxoff(void)
+{
+	bool ret;
+	u64 rflags;
+
+	rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
+	set_rflags(rflags);
+	asm volatile("vmxoff; seta %0\n\t" : "=q"(ret) : : "cc");
+	report("test vmxoff", ret);
+}
+
+void vmx_exit(void)
+{
+	test_vmxoff();
+	printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
+	exit(fails ? -1 : 0);
+}
+
+void vmx_handler()
+{
+	u64 guest_rip;
+	ulong reason = vmcs_read(EXI_REASON) & 0xff;
+
+	if ((read_cr4() & CR4_PAE) && (read_cr0() & CR0_PG)
+		&& !(rdmsr(MSR_EFER) & EFER_LMA))
+		printf("ERROR : PDPTEs should be checked\n");
+
+	guest_rip = vmcs_read(GUEST_RIP);
+
+	switch (reason) {
+	case VMX_VMCALL:
+		switch (regs.rax) {
+		case TEST_VMRESUME:
+			regs.rax = 0xFFFF;
+			break;
+		default:
+			printf("ERROR : Invalid VMCALL param : %d\n", regs.rax);
+		}
+		vmcs_write(GUEST_RIP, guest_rip + 3);
+		goto vmx_resume;
+	case VMX_IO:
+		print_vmexit_info();
+		break;
+	case VMX_HLT:
+		printf("\nVM exit.\n");
+		vmx_exit();
+		/* Should not reach here */
+		goto vmx_exit;
+	case VMX_EXC_NMI:
+	case VMX_EXTINT:
+	case VMX_INVLPG:
+	case VMX_CR:
+	case VMX_EPT_VIOLATION:
+	default:
+		break;
+	}
+	printf("ERROR : Unhandled vmx exit.\n");
+	print_vmexit_info();
+vmx_exit:
+	exit(-1);
+vmx_resume:
+	vmx_resume();
+	/* Should not reach here */
+	exit(-1);
+}
+
+void test_vmresume()
+{
+	u64 rax;
+	u64 rsp, resume_rsp;
+
+	rax = 0;
+	asm volatile("mov %%rsp, %0\n\t" : "=r"(rsp));
+	asm volatile("mov %2, %%rax\n\t"
+		"vmcall\n\t"
+		"mov %%rax, %0\n\t"
+		"mov %%rsp, %1\n\t"
+		: "=r"(rax), "=r"(resume_rsp)
+		: "g"(TEST_VMRESUME));
+	report("test vmresume", (rax == 0xFFFF) && (rsp == resume_rsp));
+}
+
+/* entry_guest */
+asm(
+	".align	4, 0x90\n\t"
+	".globl	entry_guest\n\t"
+	"entry_guest:\n\t"
+	"	call	guest_main\n\t"
+	"	hlt\n\t"
+);
+
+void guest_main(void)
+{
+	/* If reach here, VMLAUNCH runs OK */
+	report("test vmlaunch", 1);
+	printf("cr0 in guest = %llx\n", read_cr0());
+	printf("cr3 in guest = %llx\n", read_cr3());
+	printf("cr4 in guest = %llx\n", read_cr4());
+	printf("\nHello World!\n");
+	test_vmresume();
+}
+
+void init_vmcs_ctrl(void)
+{
+	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
+	/* 26.2.1.1 */
+	vmcs_write(PIN_CONTROLS, ctrl_pin);
+	/* Disable VMEXIT of IO instruction */
+	vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]);
+	if (ctrl_cpu_rev[0].set & CPU_SECONDARY) {
+		ctrl_cpu[1] |= ctrl_cpu_rev[1].set & ctrl_cpu_rev[1].clr;
+		vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]);
+	}
+	vmcs_write(CR3_TARGET_COUNT, 0);
+	io_bmp1 = alloc_page();
+	io_bmp2 = alloc_page();
+	memset(io_bmp1, 0, PAGE_SIZE);
+	memset(io_bmp2, 0, PAGE_SIZE);
+	vmcs_write(IO_BITMAP_A, (u64)io_bmp1);
+	vmcs_write(IO_BITMAP_B, (u64)io_bmp2);
+	msr_bmp = alloc_page();
+	memset(msr_bmp, 0, PAGE_SIZE);
+	vmcs_write(MSR_BITMAP, (u64)msr_bmp);
+	vmcs_write(VPID, ++vpid_ctr);
+}
+
+void init_vmcs_host(void)
+{
+	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
+	/* 26.2.1.2 */
+	vmcs_write(HOST_EFER, rdmsr(MSR_EFER));
+
+	/* 26.2.1.3 */
+	vmcs_write(ENT_CONTROLS, ctrl_enter);
+	vmcs_write(EXI_CONTROLS, ctrl_exit);
+
+	/* 26.2.2 */
+	vmcs_write(HOST_CR0, read_cr0());
+	vmcs_write(HOST_CR3, read_cr3());
+	vmcs_write(HOST_CR4, read_cr4());
+	vmcs_write(HOST_SYSENTER_ESP,
+		(u64)(host_syscall_stack + PAGE_SIZE - 1));
+	vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
+	vmcs_write(HOST_SYSENTER_CS,  SEL_KERN_CODE_64);
+
+	/* 26.2.3 */
+	vmcs_write(HOST_SEL_CS, SEL_KERN_CODE_64);
+	vmcs_write(HOST_SEL_SS, SEL_KERN_DATA_64);
+	vmcs_write(HOST_SEL_DS, SEL_KERN_DATA_64);
+	vmcs_write(HOST_SEL_ES, SEL_KERN_DATA_64);
+	vmcs_write(HOST_SEL_FS, SEL_KERN_DATA_64);
+	vmcs_write(HOST_SEL_GS, SEL_KERN_DATA_64);
+	vmcs_write(HOST_SEL_TR, SEL_TSS_RUN);
+	vmcs_write(HOST_BASE_TR,   (u64)tss_descr);
+	vmcs_write(HOST_BASE_GDTR, (u64)gdt64_desc);
+	vmcs_write(HOST_BASE_IDTR, (u64)idt_descr);
+	vmcs_write(HOST_BASE_FS, 0);
+	vmcs_write(HOST_BASE_GS, 0);
+
+	/* Set other vmcs area */
+	vmcs_write(PF_ERROR_MASK, 0);
+	vmcs_write(PF_ERROR_MATCH, 0);
+	vmcs_write(VMCS_LINK_PTR, ~0ul);
+	vmcs_write(VMCS_LINK_PTR_HI, ~0ul);
+	vmcs_write(HOST_RSP, (u64)(host_stack + PAGE_SIZE - 1));
+	vmcs_write(HOST_RIP, (u64)(&entry_vmx));
+}
+
+void init_vmcs_guest(void)
+{
+	/* 26.3 CHECKING AND LOADING GUEST STATE */
+	ulong guest_cr0, guest_cr4, guest_cr3;
+	/* 26.3.1.1 */
+	guest_cr0 = read_cr0();
+	guest_cr4 = read_cr4();
+	guest_cr3 = read_cr3();
+	if (ctrl_enter & ENT_GUEST_64) {
+		guest_cr0 |= CR0_PG;
+		guest_cr4 |= CR4_PAE;
+	}
+	if ((ctrl_enter & ENT_GUEST_64) == 0)
+		guest_cr4 &= (~CR4_PCIDE);
+	if (guest_cr0 & CR0_PG)
+		guest_cr0 |= CR0_PE;
+	vmcs_write(GUEST_CR0, guest_cr0);
+	vmcs_write(GUEST_CR3, guest_cr3);
+	vmcs_write(GUEST_CR4, guest_cr4);
+	vmcs_write(GUEST_SYSENTER_CS,  SEL_KERN_CODE_64);
+	vmcs_write(GUEST_SYSENTER_ESP,
+		(u64)(guest_syscall_stack + PAGE_SIZE - 1));
+	vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter));
+	vmcs_write(GUEST_DR7, 0);
+	vmcs_write(GUEST_EFER, rdmsr(MSR_EFER));
+
+	/* 26.3.1.2 */
+	vmcs_write(GUEST_SEL_CS, SEL_KERN_CODE_64);
+	vmcs_write(GUEST_SEL_SS, SEL_KERN_DATA_64);
+	vmcs_write(GUEST_SEL_DS, SEL_KERN_DATA_64);
+	vmcs_write(GUEST_SEL_ES, SEL_KERN_DATA_64);
+	vmcs_write(GUEST_SEL_FS, SEL_KERN_DATA_64);
+	vmcs_write(GUEST_SEL_GS, SEL_KERN_DATA_64);
+	vmcs_write(GUEST_SEL_TR, SEL_TSS_RUN);
+	vmcs_write(GUEST_SEL_LDTR, 0);
+
+	vmcs_write(GUEST_BASE_CS, 0);
+	vmcs_write(GUEST_BASE_ES, 0);
+	vmcs_write(GUEST_BASE_SS, 0);
+	vmcs_write(GUEST_BASE_DS, 0);
+	vmcs_write(GUEST_BASE_FS, 0);
+	vmcs_write(GUEST_BASE_GS, 0);
+	vmcs_write(GUEST_BASE_TR,   (u64)tss_descr);
+	vmcs_write(GUEST_BASE_LDTR, 0);
+
+	vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_LDTR, 0xffff);
+	vmcs_write(GUEST_LIMIT_TR, ((struct descr *)tss_descr)->limit);
+
+	vmcs_write(GUEST_AR_CS, 0xa09b);
+	vmcs_write(GUEST_AR_DS, 0xc093);
+	vmcs_write(GUEST_AR_ES, 0xc093);
+	vmcs_write(GUEST_AR_FS, 0xc093);
+	vmcs_write(GUEST_AR_GS, 0xc093);
+	vmcs_write(GUEST_AR_SS, 0xc093);
+	vmcs_write(GUEST_AR_LDTR, 0x82);
+	vmcs_write(GUEST_AR_TR, 0x8b);
+
+	/* 26.3.1.3 */
+	vmcs_write(GUEST_BASE_GDTR, (u64)gdt64_desc);
+	vmcs_write(GUEST_BASE_IDTR, (u64)idt_descr);
+	vmcs_write(GUEST_LIMIT_GDTR,
+		((struct descr *)gdt64_desc)->limit & 0xffff);
+	vmcs_write(GUEST_LIMIT_IDTR,
+		((struct descr *)idt_descr)->limit & 0xffff);
+
+	/* 26.3.1.4 */
+	vmcs_write(GUEST_RIP, (u64)(&entry_guest));
+	vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1));
+	vmcs_write(GUEST_RFLAGS, 0x2);
+
+	/* 26.3.1.5 */
+	vmcs_write(GUEST_ACTV_STATE, 0);
+	vmcs_write(GUEST_INTR_STATE, 0);
+}
+
+int init_vmcs(struct vmcs **vmcs)
+{
+	*vmcs = alloc_page();
+	memset(*vmcs, 0, PAGE_SIZE);
+	(*vmcs)->revision_id = basic.revision;
+	/* vmclear first to init vmcs */
+	if (vmcs_clear(*vmcs)) {
+		printf("%s : vmcs_clear error\n", __func__);
+		return 1;
+	}
+
+	if (make_vmcs_current(*vmcs)) {
+		printf("%s : make_vmcs_current error\n", __func__);
+		return 1;
+	}
+
+	/* All settings to pin/exit/enter/cpu
+	   control fields should place here */
+	ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
+	ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
+	ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
+	ctrl_cpu[0] |= CPU_HLT;
+	/* DIsable IO instruction VMEXIT now */
+	ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
+	ctrl_cpu[1] = 0;
+
+	ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr;
+	ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr;
+	ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr;
+	ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr;
+
+	init_vmcs_ctrl();
+	init_vmcs_host();
+	init_vmcs_guest();
+	return 0;
+}
+
+void init_vmx(void)
+{
+	vmxon_region = alloc_page();
+	memset(vmxon_region, 0, PAGE_SIZE);
+
+	fix_cr0_set =  rdmsr(MSR_IA32_VMX_CR0_FIXED0);
+	fix_cr0_clr =  rdmsr(MSR_IA32_VMX_CR0_FIXED1);
+	fix_cr4_set =  rdmsr(MSR_IA32_VMX_CR4_FIXED0);
+	fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
+	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
+	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
+			: MSR_IA32_VMX_PINBASED_CTLS);
+	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
+			: MSR_IA32_VMX_EXIT_CTLS);
+	ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
+			: MSR_IA32_VMX_ENTRY_CTLS);
+	ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
+			: MSR_IA32_VMX_PROCBASED_CTLS);
+	if (ctrl_cpu_rev[0].set & CPU_SECONDARY)
+		ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
+	if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID)
+		ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
+
+	write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
+	write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | CR4_VMXE);
+
+	*vmxon_region = basic.revision;
+
+	guest_stack = alloc_page();
+	memset(guest_stack, 0, PAGE_SIZE);
+	guest_syscall_stack = alloc_page();
+	memset(guest_syscall_stack, 0, PAGE_SIZE);
+	host_stack = alloc_page();
+	memset(host_stack, 0, PAGE_SIZE);
+	host_syscall_stack = alloc_page();
+	memset(host_syscall_stack, 0, PAGE_SIZE);
+}
+
+int test_vmx_capability(void)
+{
+	struct cpuid r;
+	u64 ret1, ret2;
+	r = cpuid(1);
+	ret1 = ((r.c) >> 5) & 1;
+	ret2 = ((rdmsr(MSR_IA32_FEATURE_CONTROL) & 0x5) == 0x5);
+	report("test vmx capability", ret1 & ret2);
+	/* TODO: Fix here after patches are accepted */
+	return 0;
+	return !(ret1 & ret2);
+}
+
+int test_vmxon(void)
+{
+	bool ret;
+	u64 rflags;
+
+	rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
+	set_rflags(rflags);
+	asm volatile ("vmxon %1; seta %0\n\t"
+		: "=q"(ret) : "m"(vmxon_region) : "cc");
+	report("test vmxon", ret);
+	/* TODO: Change here after bug fixed */
+	return 0;
+	/* return !ret; */
+}
+
+void test_vmptrld(void)
+{
+	u64 rflags;
+	struct vmcs *vmcs;
+
+	vmcs = alloc_page();
+	vmcs->revision_id = basic.revision;
+	rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
+	set_rflags(rflags);
+	report("test vmptrld", make_vmcs_current(vmcs) == 0);
+}
+
+void test_vmptrst(void)
+{
+	u64 rflags;
+	int ret;
+	struct vmcs *vmcs1, *vmcs2;
+
+	vmcs1 = alloc_page();
+	memset(vmcs1, 0, PAGE_SIZE);
+	init_vmcs(&vmcs1);
+	rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
+	set_rflags(rflags);
+	ret = save_vmcs(&vmcs2);
+	report("test vmptrst", (!ret) && (vmcs1 == vmcs2));
+}
+
+int main(void)
+{
+	setup_vm();
+	setup_idt();
+
+	if (test_vmx_capability() != 0) {
+		printf("ERROR : vmx not supported, check +vmx option\n");
+		goto exit;
+	}
+	init_vmx();
+	if (test_vmxon() != 0)
+		goto exit;
+	test_vmptrld();
+	test_vmclear();
+	test_vmptrst();
+	init_vmcs(&vmcs_root);
+
+	vmx_run();
+	/* Should not reach here */
+	report("test vmlaunch", 0);
+
+exit:
+	printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
+	return fails ? 1 : 0;
+}
diff --git a/x86/vmx.h b/x86/vmx.h
new file mode 100644
index 0000000..167815d
--- /dev/null
+++ b/x86/vmx.h
@@ -0,0 +1,406 @@
+#ifndef __HYPERVISOR_H
+#define __HYPERVISOR_H
+
+#include "libcflat.h"
+
+struct vmcs {
+	u32 revision_id; /* vmcs revision identifier */
+	u32 abort; /* VMX-abort indicator */
+	/* VMCS data */
+	char data[0];
+};
+
+struct regs {
+	u64 rax;
+	u64 rcx;
+	u64 rdx;
+	u64 rbx;
+	u64 cr2;
+	u64 rbp;
+	u64 rsi;
+	u64 rdi;
+	u64 r8;
+	u64 r9;
+	u64 r10;
+	u64 r11;
+	u64 r12;
+	u64 r13;
+	u64 r14;
+	u64 r15;
+};
+
+static union vmx_basic {
+	u64 val;
+	struct {
+		u32 revision;
+		u32	size:13,
+			: 3,
+			width:1,
+			dual:1,
+			type:4,
+			insouts:1,
+			ctrl:1;
+	};
+} basic;
+
+static union vmx_ctrl_pin {
+	u64 val;
+	struct {
+		u32 set, clr;
+	};
+} ctrl_pin_rev;
+
+static union vmx_ctrl_cpu {
+	u64 val;
+	struct {
+		u32 set, clr;
+	};
+} ctrl_cpu_rev[2];
+
+static union vmx_ctrl_exit {
+	u64 val;
+	struct {
+		u32 set, clr;
+	};
+} ctrl_exit_rev;
+
+static union vmx_ctrl_ent {
+	u64 val;
+	struct {
+		u32 set, clr;
+	};
+} ctrl_enter_rev;
+
+static union vmx_ept_vpid {
+	u64 val;
+	struct {
+		u32:16,
+			super:2,
+			: 2,
+			invept:1,
+			: 11;
+		u32	invvpid:1;
+	};
+} ept_vpid;
+
+struct descr {
+	u16 limit;
+	u64 addr;
+};
+
+enum Encoding {
+	/* 16-Bit Control Fields */
+	VPID			= 0x0000ul,
+	/* Posted-interrupt notification vector */
+	PINV			= 0x0002ul,
+	/* EPTP index */
+	EPTP_IDX		= 0x0004ul,
+
+	/* 16-Bit Guest State Fields */
+	GUEST_SEL_ES		= 0x0800ul,
+	GUEST_SEL_CS		= 0x0802ul,
+	GUEST_SEL_SS		= 0x0804ul,
+	GUEST_SEL_DS		= 0x0806ul,
+	GUEST_SEL_FS		= 0x0808ul,
+	GUEST_SEL_GS		= 0x080aul,
+	GUEST_SEL_LDTR		= 0x080cul,
+	GUEST_SEL_TR		= 0x080eul,
+	GUEST_INT_STATUS	= 0x0810ul,
+
+	/* 16-Bit Host State Fields */
+	HOST_SEL_ES		= 0x0c00ul,
+	HOST_SEL_CS		= 0x0c02ul,
+	HOST_SEL_SS		= 0x0c04ul,
+	HOST_SEL_DS		= 0x0c06ul,
+	HOST_SEL_FS		= 0x0c08ul,
+	HOST_SEL_GS		= 0x0c0aul,
+	HOST_SEL_TR		= 0x0c0cul,
+
+	/* 64-Bit Control Fields */
+	IO_BITMAP_A		= 0x2000ul,
+	IO_BITMAP_B		= 0x2002ul,
+	MSR_BITMAP		= 0x2004ul,
+	EXIT_MSR_ST_ADDR	= 0x2006ul,
+	EXIT_MSR_LD_ADDR	= 0x2008ul,
+	ENTER_MSR_LD_ADDR	= 0x200aul,
+	VMCS_EXEC_PTR		= 0x200cul,
+	TSC_OFFSET		= 0x2010ul,
+	TSC_OFFSET_HI		= 0x2011ul,
+	APIC_VIRT_ADDR		= 0x2012ul,
+	APIC_ACCS_ADDR		= 0x2014ul,
+	EPTP			= 0x201aul,
+	EPTP_HI			= 0x201bul,
+
+	/* 64-Bit Readonly Data Field */
+	INFO_PHYS_ADDR		= 0x2400ul,
+
+	/* 64-Bit Guest State */
+	VMCS_LINK_PTR		= 0x2800ul,
+	VMCS_LINK_PTR_HI	= 0x2801ul,
+	GUEST_DEBUGCTL		= 0x2802ul,
+	GUEST_DEBUGCTL_HI	= 0x2803ul,
+	GUEST_EFER		= 0x2806ul,
+	GUEST_PERF_GLOBAL_CTRL	= 0x2808ul,
+	GUEST_PDPTE		= 0x280aul,
+
+	/* 64-Bit Host State */
+	HOST_EFER		= 0x2c02ul,
+	HOST_PERF_GLOBAL_CTRL	= 0x2c04ul,
+
+	/* 32-Bit Control Fields */
+	PIN_CONTROLS		= 0x4000ul,
+	CPU_EXEC_CTRL0		= 0x4002ul,
+	EXC_BITMAP		= 0x4004ul,
+	PF_ERROR_MASK		= 0x4006ul,
+	PF_ERROR_MATCH		= 0x4008ul,
+	CR3_TARGET_COUNT	= 0x400aul,
+	EXI_CONTROLS		= 0x400cul,
+	EXI_MSR_ST_CNT		= 0x400eul,
+	EXI_MSR_LD_CNT		= 0x4010ul,
+	ENT_CONTROLS		= 0x4012ul,
+	ENT_MSR_LD_CNT		= 0x4014ul,
+	ENT_INTR_INFO		= 0x4016ul,
+	ENT_INTR_ERROR		= 0x4018ul,
+	ENT_INST_LEN		= 0x401aul,
+	TPR_THRESHOLD		= 0x401cul,
+	CPU_EXEC_CTRL1		= 0x401eul,
+
+	/* 32-Bit R/O Data Fields */
+	VMX_INST_ERROR		= 0x4400ul,
+	EXI_REASON		= 0x4402ul,
+	EXI_INTR_INFO		= 0x4404ul,
+	EXI_INTR_ERROR		= 0x4406ul,
+	IDT_VECT_INFO		= 0x4408ul,
+	IDT_VECT_ERROR		= 0x440aul,
+	EXI_INST_LEN		= 0x440cul,
+	EXI_INST_INFO		= 0x440eul,
+
+	/* 32-Bit Guest State Fields */
+	GUEST_LIMIT_ES		= 0x4800ul,
+	GUEST_LIMIT_CS		= 0x4802ul,
+	GUEST_LIMIT_SS		= 0x4804ul,
+	GUEST_LIMIT_DS		= 0x4806ul,
+	GUEST_LIMIT_FS		= 0x4808ul,
+	GUEST_LIMIT_GS		= 0x480aul,
+	GUEST_LIMIT_LDTR	= 0x480cul,
+	GUEST_LIMIT_TR		= 0x480eul,
+	GUEST_LIMIT_GDTR	= 0x4810ul,
+	GUEST_LIMIT_IDTR	= 0x4812ul,
+	GUEST_AR_ES		= 0x4814ul,
+	GUEST_AR_CS		= 0x4816ul,
+	GUEST_AR_SS		= 0x4818ul,
+	GUEST_AR_DS		= 0x481aul,
+	GUEST_AR_FS		= 0x481cul,
+	GUEST_AR_GS		= 0x481eul,
+	GUEST_AR_LDTR		= 0x4820ul,
+	GUEST_AR_TR		= 0x4822ul,
+	GUEST_INTR_STATE	= 0x4824ul,
+	GUEST_ACTV_STATE	= 0x4826ul,
+	GUEST_SMBASE		= 0x4828ul,
+	GUEST_SYSENTER_CS	= 0x482aul,
+
+	/* 32-Bit Host State Fields */
+	HOST_SYSENTER_CS	= 0x4c00ul,
+
+	/* Natural-Width Control Fields */
+	CR0_MASK		= 0x6000ul,
+	CR4_MASK		= 0x6002ul,
+	CR0_READ_SHADOW	= 0x6004ul,
+	CR4_READ_SHADOW	= 0x6006ul,
+	CR3_TARGET_0		= 0x6008ul,
+	CR3_TARGET_1		= 0x600aul,
+	CR3_TARGET_2		= 0x600cul,
+	CR3_TARGET_3		= 0x600eul,
+
+	/* Natural-Width R/O Data Fields */
+	EXI_QUALIFICATION	= 0x6400ul,
+	IO_RCX			= 0x6402ul,
+	IO_RSI			= 0x6404ul,
+	IO_RDI			= 0x6406ul,
+	IO_RIP			= 0x6408ul,
+	GUEST_LINEAR_ADDRESS	= 0x640aul,
+
+	/* Natural-Width Guest State Fields */
+	GUEST_CR0		= 0x6800ul,
+	GUEST_CR3		= 0x6802ul,
+	GUEST_CR4		= 0x6804ul,
+	GUEST_BASE_ES		= 0x6806ul,
+	GUEST_BASE_CS		= 0x6808ul,
+	GUEST_BASE_SS		= 0x680aul,
+	GUEST_BASE_DS		= 0x680cul,
+	GUEST_BASE_FS		= 0x680eul,
+	GUEST_BASE_GS		= 0x6810ul,
+	GUEST_BASE_LDTR		= 0x6812ul,
+	GUEST_BASE_TR		= 0x6814ul,
+	GUEST_BASE_GDTR		= 0x6816ul,
+	GUEST_BASE_IDTR		= 0x6818ul,
+	GUEST_DR7		= 0x681aul,
+	GUEST_RSP		= 0x681cul,
+	GUEST_RIP		= 0x681eul,
+	GUEST_RFLAGS		= 0x6820ul,
+	GUEST_PENDING_DEBUG	= 0x6822ul,
+	GUEST_SYSENTER_ESP	= 0x6824ul,
+	GUEST_SYSENTER_EIP	= 0x6826ul,
+
+	/* Natural-Width Host State Fields */
+	HOST_CR0		= 0x6c00ul,
+	HOST_CR3		= 0x6c02ul,
+	HOST_CR4		= 0x6c04ul,
+	HOST_BASE_FS		= 0x6c06ul,
+	HOST_BASE_GS		= 0x6c08ul,
+	HOST_BASE_TR		= 0x6c0aul,
+	HOST_BASE_GDTR		= 0x6c0cul,
+	HOST_BASE_IDTR		= 0x6c0eul,
+	HOST_SYSENTER_ESP	= 0x6c10ul,
+	HOST_SYSENTER_EIP	= 0x6c12ul,
+	HOST_RSP		= 0x6c14ul,
+	HOST_RIP		= 0x6c16ul
+};
+
+enum Reason {
+	VMX_EXC_NMI		= 0,
+	VMX_EXTINT		= 1,
+	VMX_TRIPLE_FAULT	= 2,
+	VMX_INIT		= 3,
+	VMX_SIPI		= 4,
+	VMX_SMI_IO		= 5,
+	VMX_SMI_OTHER		= 6,
+	VMX_INTR_WINDOW		= 7,
+	VMX_NMI_WINDOW		= 8,
+	VMX_TASK_SWITCH		= 9,
+	VMX_CPUID		= 10,
+	VMX_GETSEC		= 11,
+	VMX_HLT			= 12,
+	VMX_INVD		= 13,
+	VMX_INVLPG		= 14,
+	VMX_RDPMC		= 15,
+	VMX_RDTSC		= 16,
+	VMX_RSM			= 17,
+	VMX_VMCALL		= 18,
+	VMX_VMCLEAR		= 19,
+	VMX_VMLAUNCH		= 20,
+	VMX_VMPTRLD		= 21,
+	VMX_VMPTRST		= 22,
+	VMX_VMREAD		= 23,
+	VMX_VMRESUME		= 24,
+	VMX_VMWRITE		= 25,
+	VMX_VMXOFF		= 26,
+	VMX_VMXON		= 27,
+	VMX_CR			= 28,
+	VMX_DR			= 29,
+	VMX_IO			= 30,
+	VMX_RDMSR		= 31,
+	VMX_WRMSR		= 32,
+	VMX_FAIL_STATE		= 33,
+	VMX_FAIL_MSR		= 34,
+	VMX_MWAIT		= 36,
+	VMX_MTF			= 37,
+	VMX_MONITOR		= 39,
+	VMX_PAUSE		= 40,
+	VMX_FAIL_MCHECK		= 41,
+	VMX_TPR_THRESHOLD	= 43,
+	VMX_APIC_ACCESS		= 44,
+	VMX_GDTR_IDTR		= 46,
+	VMX_LDTR_TR		= 47,
+	VMX_EPT_VIOLATION	= 48,
+	VMX_EPT_MISCONFIG	= 49,
+	VMX_INVEPT		= 50,
+	VMX_PREEMPT		= 52,
+	VMX_INVVPID		= 53,
+	VMX_WBINVD		= 54,
+	VMX_XSETBV		= 55
+};
+
+#define X86_EFLAGS_CF	0x00000001 /* Carry Flag */
+#define X86_EFLAGS_ZF	0x00000040 /* Zero Flag */
+
+enum Ctrl_exi {
+	EXI_HOST_64             = 1UL << 9,
+	EXI_LOAD_PERF		= 1UL << 12,
+	EXI_INTA                = 1UL << 15,
+	EXI_LOAD_EFER           = 1UL << 21,
+};
+
+enum Ctrl_ent {
+	ENT_GUEST_64            = 1UL << 9,
+	ENT_LOAD_EFER           = 1UL << 15,
+};
+
+enum Ctrl_pin {
+	PIN_EXTINT              = 1ul << 0,
+	PIN_NMI                 = 1ul << 3,
+	PIN_VIRT_NMI            = 1ul << 5,
+};
+
+enum Ctrl0 {
+	CPU_INTR_WINDOW		= 1ul << 2,
+	CPU_HLT			= 1ul << 7,
+	CPU_INVLPG		= 1ul << 9,
+	CPU_CR3_LOAD		= 1ul << 15,
+	CPU_CR3_STORE		= 1ul << 16,
+	CPU_TPR_SHADOW		= 1ul << 21,
+	CPU_NMI_WINDOW		= 1ul << 22,
+	CPU_IO			= 1ul << 24,
+	CPU_IO_BITMAP		= 1ul << 25,
+	CPU_SECONDARY		= 1ul << 31,
+};
+
+enum Ctrl1 {
+	CPU_EPT			= 1ul << 1,
+	CPU_VPID		= 1ul << 5,
+	CPU_URG			= 1ul << 7,
+};
+
+#define SEL_NULL_DESC		0x0
+#define SEL_KERN_CODE_64	0x8
+#define SEL_KERN_DATA_64	0x10
+#define SEL_USER_CODE_64	0x18
+#define SEL_USER_DATA_64	0x20
+#define SEL_CODE_32		0x28
+#define SEL_DATA_32		0x30
+#define SEL_CODE_16		0x38
+#define SEL_DATA_16		0x40
+#define SEL_TSS_RUN		0x48
+
+#define SAVE_GPR				\
+	"xchg %rax, regs\n\t"			\
+	"xchg %rbx, regs+0x8\n\t"		\
+	"xchg %rcx, regs+0x10\n\t"		\
+	"xchg %rdx, regs+0x18\n\t"		\
+	"xchg %rbp, regs+0x28\n\t"		\
+	"xchg %rsi, regs+0x30\n\t"		\
+	"xchg %rdi, regs+0x38\n\t"		\
+	"xchg %r8, regs+0x40\n\t"		\
+	"xchg %r9, regs+0x48\n\t"		\
+	"xchg %r10, regs+0x50\n\t"		\
+	"xchg %r11, regs+0x58\n\t"		\
+	"xchg %r12, regs+0x60\n\t"		\
+	"xchg %r13, regs+0x68\n\t"		\
+	"xchg %r14, regs+0x70\n\t"		\
+	"xchg %r15, regs+0x78\n\t"
+
+#define LOAD_GPR	SAVE_GPR
+
+#define CR0_PE		(1ul << 0)
+#define CR0_PG		(1ul << 31)
+#define CR4_VMXE	(1ul << 0)
+#define CR4_PAE		(1ul << 5)
+#define CR4_PCIDE	(1ul << 17)
+
+#define VMX_IO_SIZE_MASK		0x7
+#define _VMX_IO_BYTE			1
+#define _VMX_IO_WORD			2
+#define _VMX_IO_LONG			3
+#define VMX_IO_DIRECTION_MASK		(1ul << 3)
+#define VMX_IO_IN			(1ul << 3)
+#define VMX_IO_OUT			0
+#define VMX_IO_STRING			(1ul << 4)
+#define VMX_IO_REP			(1ul << 5)
+#define VMX_IO_OPRAND_DX		(1ul << 6)
+#define VMX_IO_PORT_MASK		0xFFFF0000
+#define VMX_IO_PORT_SHIFT		16
+
+#define TEST_VMRESUME		0x1001
+
+#endif
+
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm-unit-tests : The first version of VMX nested test case
  2013-07-16  9:27 [PATCH] kvm-unit-tests : The first version of VMX nested test case Arthur Chunqi Li
@ 2013-07-16  9:35 ` Arthur Chunqi Li
  2013-07-16  9:45   ` Gleb Natapov
  2013-07-16 10:28 ` Paolo Bonzini
  1 sibling, 1 reply; 14+ messages in thread
From: Arthur Chunqi Li @ 2013-07-16  9:35 UTC (permalink / raw)
  To: kvm; +Cc: Jan Kiszka, Gleb Natapov, Paolo Bonzini, Arthur Chunqi Li

Hi there,
This is a version calling for comments. Some minor changes should be
done before final commitment (TODOs in it), because these places are
related to the bugs I have commited in the previous weeks and the
relevant patches are not accpeted. But these bugs are all about some
unexpected occasions and the hypervisor can run if we simply ignore
them. After all bugs fixed, everything will be OK.

Arthur

On Tue, Jul 16, 2013 at 5:27 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote:
> This is the first version for VMX nested environment test case. It
> contains the basic VMX instructions test cases, including VMXON/
> VMXOFF/VMXPTRLD/VMXPTRST/VMCLEAR/VMLAUNCH/VMRESUME/VMCALL. This patch
> also tests the basic execution routine in VMX nested environment and
> let the VM print "Hello World" to inform its successfully run.
>
> New files added:
> x86/vmx.h : contains all VMX related macro declerations
> x86/vmx.c : main file for VMX nested test case
>
> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> ---
>  config-x86-common.mak |    2 +
>  config-x86_64.mak     |    1 +
>  lib/x86/msr.h         |    5 +
>  x86/cstart64.S        |    4 +
>  x86/unittests.cfg     |    6 +
>  x86/vmx.c             |  568 +++++++++++++++++++++++++++++++++++++++++++++++++
>  x86/vmx.h             |  406 +++++++++++++++++++++++++++++++++++
>  7 files changed, 992 insertions(+)
>  create mode 100644 x86/vmx.c
>  create mode 100644 x86/vmx.h
>
> diff --git a/config-x86-common.mak b/config-x86-common.mak
> index 455032b..34a41e1 100644
> --- a/config-x86-common.mak
> +++ b/config-x86-common.mak
> @@ -101,6 +101,8 @@ $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o
>
>  $(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o
>
> +$(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o
> +
>  arch_clean:
>         $(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \
>         $(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o
> diff --git a/config-x86_64.mak b/config-x86_64.mak
> index 4e525f5..bb8ee89 100644
> --- a/config-x86_64.mak
> +++ b/config-x86_64.mak
> @@ -9,5 +9,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
>           $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \
>           $(TEST_DIR)/pcid.flat
>  tests += $(TEST_DIR)/svm.flat
> +tests += $(TEST_DIR)/vmx.flat
>
>  include config-x86-common.mak
> diff --git a/lib/x86/msr.h b/lib/x86/msr.h
> index 509a421..281255a 100644
> --- a/lib/x86/msr.h
> +++ b/lib/x86/msr.h
> @@ -396,6 +396,11 @@
>  #define MSR_IA32_VMX_VMCS_ENUM          0x0000048a
>  #define MSR_IA32_VMX_PROCBASED_CTLS2    0x0000048b
>  #define MSR_IA32_VMX_EPT_VPID_CAP       0x0000048c
> +#define MSR_IA32_VMX_TRUE_PIN          0x0000048d
> +#define MSR_IA32_VMX_TRUE_PROC         0x0000048e
> +#define MSR_IA32_VMX_TRUE_EXIT         0x0000048f
> +#define MSR_IA32_VMX_TRUE_ENTRY                0x00000490
> +
>
>  /* AMD-V MSRs */
>
> diff --git a/x86/cstart64.S b/x86/cstart64.S
> index 24df5f8..0fe76da 100644
> --- a/x86/cstart64.S
> +++ b/x86/cstart64.S
> @@ -4,6 +4,10 @@
>  .globl boot_idt
>  boot_idt = 0
>
> +.globl idt_descr
> +.globl tss_descr
> +.globl gdt64_desc
> +
>  ipi_vector = 0x20
>
>  max_cpus = 64
> diff --git a/x86/unittests.cfg b/x86/unittests.cfg
> index bc9643e..e846739 100644
> --- a/x86/unittests.cfg
> +++ b/x86/unittests.cfg
> @@ -149,3 +149,9 @@ extra_params = --append "10000000 `date +%s`"
>  file = pcid.flat
>  extra_params = -cpu qemu64,+pcid
>  arch = x86_64
> +
> +[vmx]
> +file = vmx.flat
> +extra_params = -cpu Nehalem,+vmx
> +arch = x86_64
> +
> diff --git a/x86/vmx.c b/x86/vmx.c
> new file mode 100644
> index 0000000..0435746
> --- /dev/null
> +++ b/x86/vmx.c
> @@ -0,0 +1,568 @@
> +#include "libcflat.h"
> +#include "processor.h"
> +#include "vm.h"
> +#include "desc.h"
> +#include "vmx.h"
> +#include "msr.h"
> +#include "smp.h"
> +#include "io.h"
> +
> +
> +int fails = 0, tests = 0;
> +u32 *vmxon_region;
> +struct vmcs *vmcs_root;
> +void *io_bmp1, *io_bmp2;
> +void *msr_bmp;
> +u32 vpid_ctr;
> +char *guest_stack, *host_stack;
> +char *guest_syscall_stack, *host_syscall_stack;
> +u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2];
> +ulong fix_cr0_set, fix_cr0_clr;
> +ulong fix_cr4_set, fix_cr4_clr;
> +struct regs regs;
> +
> +extern u64 gdt64_desc[];
> +extern u64 idt_descr[];
> +extern u64 tss_descr[];
> +extern void *entry_vmx;
> +extern void *entry_sysenter;
> +extern void *entry_guest;
> +
> +void report(const char *name, int result)
> +{
> +       ++tests;
> +       if (result)
> +               printf("PASS: %s\n", name);
> +       else {
> +               printf("FAIL: %s\n", name);
> +               ++fails;
> +       }
> +}
> +
> +inline u64 get_rflags(void)
> +{
> +       u64 r;
> +       asm volatile("pushf; pop %0\n\t" : "=q"(r) : : "cc");
> +       return r;
> +}
> +
> +inline void set_rflags(u64 r)
> +{
> +       asm volatile("push %0; popf\n\t" : : "q"(r) : "cc");
> +}
> +
> +int vmcs_clear(struct vmcs *vmcs)
> +{
> +       bool ret;
> +       asm volatile ("vmclear %1; seta %0" : "=q" (ret) : "m" (vmcs) : "cc");
> +       return !ret;
> +}
> +
> +u64 vmcs_read(enum Encoding enc)
> +{
> +       u64 val;
> +       asm volatile ("vmread %1, %0" : "=rm" (val) : "r" ((u64)enc) : "cc");
> +       return val;
> +}
> +
> +int vmcs_write(enum Encoding enc, u64 val)
> +{
> +       bool ret;
> +       asm volatile ("vmwrite %1, %2; seta %0"
> +               : "=q"(ret) : "rm" (val), "r" ((u64)enc) : "cc");
> +       return !ret;
> +}
> +
> +int make_vmcs_current(struct vmcs *vmcs)
> +{
> +       bool ret;
> +
> +       asm volatile ("vmptrld %1; seta %0" : "=q" (ret) : "m" (vmcs) : "cc");
> +       return !ret;
> +}
> +
> +int save_vmcs(struct vmcs **vmcs)
> +{
> +       bool ret;
> +
> +       asm volatile ("vmptrst %1; seta %0" : "=q" (ret) : "m" (*vmcs) : "cc");
> +       return !ret;
> +}
> +
> +/* entry_vmx */
> +asm(
> +       ".align 4, 0x90\n\t"
> +       ".globl entry_vmx\n\t"
> +       "entry_vmx:\n\t"
> +       SAVE_GPR
> +       "       call    vmx_handler\n\t"
> +       LOAD_GPR
> +       "       vmresume\n\t"
> +);
> +
> +/* entry_sysenter */
> +asm(
> +       ".align 4, 0x90\n\t"
> +       ".globl entry_sysenter\n\t"
> +       "entry_sysenter:\n\t"
> +       SAVE_GPR
> +       "       and     $0xf, %rax\n\t"
> +       "       push    %rax\n\t"
> +       "       call    syscall_handler\n\t"
> +);
> +
> +void syscall_handler(u64 syscall_no)
> +{
> +       printf("Here in syscall_handler, syscall_no = %d\n", syscall_no);
> +}
> +
> +void vmx_run()
> +{
> +       bool ret;
> +       printf("Now run vm.\n\n");
> +       asm volatile("vmlaunch;seta %0\n\t" : "=m"(ret));
> +       printf("VMLAUNCH error, ret=%d\n", ret);
> +}
> +
> +void vmx_resume()
> +{
> +       asm volatile(LOAD_GPR
> +               "vmresume\n\t");
> +       /* VMRESUME fail if reach here */
> +}
> +
> +void print_vmexit_info()
> +{
> +       u64 guest_rip, guest_rsp;
> +       ulong reason = vmcs_read(EXI_REASON) & 0xff;
> +       ulong exit_qual = vmcs_read(EXI_QUALIFICATION);
> +       guest_rip = vmcs_read(GUEST_RIP);
> +       guest_rsp = vmcs_read(GUEST_RSP);
> +       printf("VMEXIT info:\n");
> +       printf("\tvmexit reason = %d\n", reason);
> +       printf("\texit qualification = 0x%x\n", exit_qual);
> +       printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1);
> +       printf("\tguest_rip = 0x%llx\n", guest_rip);
> +       printf("\tRAX=0x%llx    RBX=0x%llx    RCX=0x%llx    RDX=0x%llx\n",
> +               regs.rax, regs.rbx, regs.rcx, regs.rdx);
> +       printf("\tRSP=0x%llx    RBP=0x%llx    RSI=0x%llx    RDI=0x%llx\n",
> +               guest_rsp, regs.rbp, regs.rsi, regs.rdi);
> +       printf("\tR8 =0x%llx    R9 =0x%llx    R10=0x%llx    R11=0x%llx\n",
> +               regs.r8, regs.r9, regs.r10, regs.r11);
> +       printf("\tR12=0x%llx    R13=0x%llx    R14=0x%llx    R15=0x%llx\n",
> +               regs.r12, regs.r13, regs.r14, regs.r15);
> +}
> +
> +void test_vmclear(void)
> +{
> +       u64 rflags;
> +
> +       rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> +       set_rflags(rflags);
> +       report("test vmclear", vmcs_clear(vmcs_root) == 0);
> +}
> +
> +void test_vmxoff(void)
> +{
> +       bool ret;
> +       u64 rflags;
> +
> +       rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> +       set_rflags(rflags);
> +       asm volatile("vmxoff; seta %0\n\t" : "=q"(ret) : : "cc");
> +       report("test vmxoff", ret);
> +}
> +
> +void vmx_exit(void)
> +{
> +       test_vmxoff();
> +       printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
> +       exit(fails ? -1 : 0);
> +}
> +
> +void vmx_handler()
> +{
> +       u64 guest_rip;
> +       ulong reason = vmcs_read(EXI_REASON) & 0xff;
> +
> +       if ((read_cr4() & CR4_PAE) && (read_cr0() & CR0_PG)
> +               && !(rdmsr(MSR_EFER) & EFER_LMA))
> +               printf("ERROR : PDPTEs should be checked\n");
> +
> +       guest_rip = vmcs_read(GUEST_RIP);
> +
> +       switch (reason) {
> +       case VMX_VMCALL:
> +               switch (regs.rax) {
> +               case TEST_VMRESUME:
> +                       regs.rax = 0xFFFF;
> +                       break;
> +               default:
> +                       printf("ERROR : Invalid VMCALL param : %d\n", regs.rax);
> +               }
> +               vmcs_write(GUEST_RIP, guest_rip + 3);
> +               goto vmx_resume;
> +       case VMX_IO:
> +               print_vmexit_info();
> +               break;
> +       case VMX_HLT:
> +               printf("\nVM exit.\n");
> +               vmx_exit();
> +               /* Should not reach here */
> +               goto vmx_exit;
> +       case VMX_EXC_NMI:
> +       case VMX_EXTINT:
> +       case VMX_INVLPG:
> +       case VMX_CR:
> +       case VMX_EPT_VIOLATION:
> +       default:
> +               break;
> +       }
> +       printf("ERROR : Unhandled vmx exit.\n");
> +       print_vmexit_info();
> +vmx_exit:
> +       exit(-1);
> +vmx_resume:
> +       vmx_resume();
> +       /* Should not reach here */
> +       exit(-1);
> +}
> +
> +void test_vmresume()
> +{
> +       u64 rax;
> +       u64 rsp, resume_rsp;
> +
> +       rax = 0;
> +       asm volatile("mov %%rsp, %0\n\t" : "=r"(rsp));
> +       asm volatile("mov %2, %%rax\n\t"
> +               "vmcall\n\t"
> +               "mov %%rax, %0\n\t"
> +               "mov %%rsp, %1\n\t"
> +               : "=r"(rax), "=r"(resume_rsp)
> +               : "g"(TEST_VMRESUME));
> +       report("test vmresume", (rax == 0xFFFF) && (rsp == resume_rsp));
> +}
> +
> +/* entry_guest */
> +asm(
> +       ".align 4, 0x90\n\t"
> +       ".globl entry_guest\n\t"
> +       "entry_guest:\n\t"
> +       "       call    guest_main\n\t"
> +       "       hlt\n\t"
> +);
> +
> +void guest_main(void)
> +{
> +       /* If reach here, VMLAUNCH runs OK */
> +       report("test vmlaunch", 1);
> +       printf("cr0 in guest = %llx\n", read_cr0());
> +       printf("cr3 in guest = %llx\n", read_cr3());
> +       printf("cr4 in guest = %llx\n", read_cr4());
> +       printf("\nHello World!\n");
> +       test_vmresume();
> +}
> +
> +void init_vmcs_ctrl(void)
> +{
> +       /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
> +       /* 26.2.1.1 */
> +       vmcs_write(PIN_CONTROLS, ctrl_pin);
> +       /* Disable VMEXIT of IO instruction */
> +       vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]);
> +       if (ctrl_cpu_rev[0].set & CPU_SECONDARY) {
> +               ctrl_cpu[1] |= ctrl_cpu_rev[1].set & ctrl_cpu_rev[1].clr;
> +               vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]);
> +       }
> +       vmcs_write(CR3_TARGET_COUNT, 0);
> +       io_bmp1 = alloc_page();
> +       io_bmp2 = alloc_page();
> +       memset(io_bmp1, 0, PAGE_SIZE);
> +       memset(io_bmp2, 0, PAGE_SIZE);
> +       vmcs_write(IO_BITMAP_A, (u64)io_bmp1);
> +       vmcs_write(IO_BITMAP_B, (u64)io_bmp2);
> +       msr_bmp = alloc_page();
> +       memset(msr_bmp, 0, PAGE_SIZE);
> +       vmcs_write(MSR_BITMAP, (u64)msr_bmp);
> +       vmcs_write(VPID, ++vpid_ctr);
> +}
> +
> +void init_vmcs_host(void)
> +{
> +       /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
> +       /* 26.2.1.2 */
> +       vmcs_write(HOST_EFER, rdmsr(MSR_EFER));
> +
> +       /* 26.2.1.3 */
> +       vmcs_write(ENT_CONTROLS, ctrl_enter);
> +       vmcs_write(EXI_CONTROLS, ctrl_exit);
> +
> +       /* 26.2.2 */
> +       vmcs_write(HOST_CR0, read_cr0());
> +       vmcs_write(HOST_CR3, read_cr3());
> +       vmcs_write(HOST_CR4, read_cr4());
> +       vmcs_write(HOST_SYSENTER_ESP,
> +               (u64)(host_syscall_stack + PAGE_SIZE - 1));
> +       vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
> +       vmcs_write(HOST_SYSENTER_CS,  SEL_KERN_CODE_64);
> +
> +       /* 26.2.3 */
> +       vmcs_write(HOST_SEL_CS, SEL_KERN_CODE_64);
> +       vmcs_write(HOST_SEL_SS, SEL_KERN_DATA_64);
> +       vmcs_write(HOST_SEL_DS, SEL_KERN_DATA_64);
> +       vmcs_write(HOST_SEL_ES, SEL_KERN_DATA_64);
> +       vmcs_write(HOST_SEL_FS, SEL_KERN_DATA_64);
> +       vmcs_write(HOST_SEL_GS, SEL_KERN_DATA_64);
> +       vmcs_write(HOST_SEL_TR, SEL_TSS_RUN);
> +       vmcs_write(HOST_BASE_TR,   (u64)tss_descr);
> +       vmcs_write(HOST_BASE_GDTR, (u64)gdt64_desc);
> +       vmcs_write(HOST_BASE_IDTR, (u64)idt_descr);
> +       vmcs_write(HOST_BASE_FS, 0);
> +       vmcs_write(HOST_BASE_GS, 0);
> +
> +       /* Set other vmcs area */
> +       vmcs_write(PF_ERROR_MASK, 0);
> +       vmcs_write(PF_ERROR_MATCH, 0);
> +       vmcs_write(VMCS_LINK_PTR, ~0ul);
> +       vmcs_write(VMCS_LINK_PTR_HI, ~0ul);
> +       vmcs_write(HOST_RSP, (u64)(host_stack + PAGE_SIZE - 1));
> +       vmcs_write(HOST_RIP, (u64)(&entry_vmx));
> +}
> +
> +void init_vmcs_guest(void)
> +{
> +       /* 26.3 CHECKING AND LOADING GUEST STATE */
> +       ulong guest_cr0, guest_cr4, guest_cr3;
> +       /* 26.3.1.1 */
> +       guest_cr0 = read_cr0();
> +       guest_cr4 = read_cr4();
> +       guest_cr3 = read_cr3();
> +       if (ctrl_enter & ENT_GUEST_64) {
> +               guest_cr0 |= CR0_PG;
> +               guest_cr4 |= CR4_PAE;
> +       }
> +       if ((ctrl_enter & ENT_GUEST_64) == 0)
> +               guest_cr4 &= (~CR4_PCIDE);
> +       if (guest_cr0 & CR0_PG)
> +               guest_cr0 |= CR0_PE;
> +       vmcs_write(GUEST_CR0, guest_cr0);
> +       vmcs_write(GUEST_CR3, guest_cr3);
> +       vmcs_write(GUEST_CR4, guest_cr4);
> +       vmcs_write(GUEST_SYSENTER_CS,  SEL_KERN_CODE_64);
> +       vmcs_write(GUEST_SYSENTER_ESP,
> +               (u64)(guest_syscall_stack + PAGE_SIZE - 1));
> +       vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter));
> +       vmcs_write(GUEST_DR7, 0);
> +       vmcs_write(GUEST_EFER, rdmsr(MSR_EFER));
> +
> +       /* 26.3.1.2 */
> +       vmcs_write(GUEST_SEL_CS, SEL_KERN_CODE_64);
> +       vmcs_write(GUEST_SEL_SS, SEL_KERN_DATA_64);
> +       vmcs_write(GUEST_SEL_DS, SEL_KERN_DATA_64);
> +       vmcs_write(GUEST_SEL_ES, SEL_KERN_DATA_64);
> +       vmcs_write(GUEST_SEL_FS, SEL_KERN_DATA_64);
> +       vmcs_write(GUEST_SEL_GS, SEL_KERN_DATA_64);
> +       vmcs_write(GUEST_SEL_TR, SEL_TSS_RUN);
> +       vmcs_write(GUEST_SEL_LDTR, 0);
> +
> +       vmcs_write(GUEST_BASE_CS, 0);
> +       vmcs_write(GUEST_BASE_ES, 0);
> +       vmcs_write(GUEST_BASE_SS, 0);
> +       vmcs_write(GUEST_BASE_DS, 0);
> +       vmcs_write(GUEST_BASE_FS, 0);
> +       vmcs_write(GUEST_BASE_GS, 0);
> +       vmcs_write(GUEST_BASE_TR,   (u64)tss_descr);
> +       vmcs_write(GUEST_BASE_LDTR, 0);
> +
> +       vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF);
> +       vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF);
> +       vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF);
> +       vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF);
> +       vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF);
> +       vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF);
> +       vmcs_write(GUEST_LIMIT_LDTR, 0xffff);
> +       vmcs_write(GUEST_LIMIT_TR, ((struct descr *)tss_descr)->limit);
> +
> +       vmcs_write(GUEST_AR_CS, 0xa09b);
> +       vmcs_write(GUEST_AR_DS, 0xc093);
> +       vmcs_write(GUEST_AR_ES, 0xc093);
> +       vmcs_write(GUEST_AR_FS, 0xc093);
> +       vmcs_write(GUEST_AR_GS, 0xc093);
> +       vmcs_write(GUEST_AR_SS, 0xc093);
> +       vmcs_write(GUEST_AR_LDTR, 0x82);
> +       vmcs_write(GUEST_AR_TR, 0x8b);
> +
> +       /* 26.3.1.3 */
> +       vmcs_write(GUEST_BASE_GDTR, (u64)gdt64_desc);
> +       vmcs_write(GUEST_BASE_IDTR, (u64)idt_descr);
> +       vmcs_write(GUEST_LIMIT_GDTR,
> +               ((struct descr *)gdt64_desc)->limit & 0xffff);
> +       vmcs_write(GUEST_LIMIT_IDTR,
> +               ((struct descr *)idt_descr)->limit & 0xffff);
> +
> +       /* 26.3.1.4 */
> +       vmcs_write(GUEST_RIP, (u64)(&entry_guest));
> +       vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1));
> +       vmcs_write(GUEST_RFLAGS, 0x2);
> +
> +       /* 26.3.1.5 */
> +       vmcs_write(GUEST_ACTV_STATE, 0);
> +       vmcs_write(GUEST_INTR_STATE, 0);
> +}
> +
> +int init_vmcs(struct vmcs **vmcs)
> +{
> +       *vmcs = alloc_page();
> +       memset(*vmcs, 0, PAGE_SIZE);
> +       (*vmcs)->revision_id = basic.revision;
> +       /* vmclear first to init vmcs */
> +       if (vmcs_clear(*vmcs)) {
> +               printf("%s : vmcs_clear error\n", __func__);
> +               return 1;
> +       }
> +
> +       if (make_vmcs_current(*vmcs)) {
> +               printf("%s : make_vmcs_current error\n", __func__);
> +               return 1;
> +       }
> +
> +       /* All settings to pin/exit/enter/cpu
> +          control fields should place here */
> +       ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
> +       ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
> +       ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
> +       ctrl_cpu[0] |= CPU_HLT;
> +       /* DIsable IO instruction VMEXIT now */
> +       ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
> +       ctrl_cpu[1] = 0;
> +
> +       ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr;
> +       ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr;
> +       ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr;
> +       ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr;
> +
> +       init_vmcs_ctrl();
> +       init_vmcs_host();
> +       init_vmcs_guest();
> +       return 0;
> +}
> +
> +void init_vmx(void)
> +{
> +       vmxon_region = alloc_page();
> +       memset(vmxon_region, 0, PAGE_SIZE);
> +
> +       fix_cr0_set =  rdmsr(MSR_IA32_VMX_CR0_FIXED0);
> +       fix_cr0_clr =  rdmsr(MSR_IA32_VMX_CR0_FIXED1);
> +       fix_cr4_set =  rdmsr(MSR_IA32_VMX_CR4_FIXED0);
> +       fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
> +       basic.val = rdmsr(MSR_IA32_VMX_BASIC);
> +       ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
> +                       : MSR_IA32_VMX_PINBASED_CTLS);
> +       ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
> +                       : MSR_IA32_VMX_EXIT_CTLS);
> +       ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
> +                       : MSR_IA32_VMX_ENTRY_CTLS);
> +       ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
> +                       : MSR_IA32_VMX_PROCBASED_CTLS);
> +       if (ctrl_cpu_rev[0].set & CPU_SECONDARY)
> +               ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
> +       if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID)
> +               ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
> +
> +       write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
> +       write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | CR4_VMXE);
> +
> +       *vmxon_region = basic.revision;
> +
> +       guest_stack = alloc_page();
> +       memset(guest_stack, 0, PAGE_SIZE);
> +       guest_syscall_stack = alloc_page();
> +       memset(guest_syscall_stack, 0, PAGE_SIZE);
> +       host_stack = alloc_page();
> +       memset(host_stack, 0, PAGE_SIZE);
> +       host_syscall_stack = alloc_page();
> +       memset(host_syscall_stack, 0, PAGE_SIZE);
> +}
> +
> +int test_vmx_capability(void)
> +{
> +       struct cpuid r;
> +       u64 ret1, ret2;
> +       r = cpuid(1);
> +       ret1 = ((r.c) >> 5) & 1;
> +       ret2 = ((rdmsr(MSR_IA32_FEATURE_CONTROL) & 0x5) == 0x5);
> +       report("test vmx capability", ret1 & ret2);
> +       /* TODO: Fix here after patches are accepted */
> +       return 0;
> +       return !(ret1 & ret2);
> +}
> +
> +int test_vmxon(void)
> +{
> +       bool ret;
> +       u64 rflags;
> +
> +       rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> +       set_rflags(rflags);
> +       asm volatile ("vmxon %1; seta %0\n\t"
> +               : "=q"(ret) : "m"(vmxon_region) : "cc");
> +       report("test vmxon", ret);
> +       /* TODO: Change here after bug fixed */
> +       return 0;
> +       /* return !ret; */
> +}
> +
> +void test_vmptrld(void)
> +{
> +       u64 rflags;
> +       struct vmcs *vmcs;
> +
> +       vmcs = alloc_page();
> +       vmcs->revision_id = basic.revision;
> +       rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> +       set_rflags(rflags);
> +       report("test vmptrld", make_vmcs_current(vmcs) == 0);
> +}
> +
> +void test_vmptrst(void)
> +{
> +       u64 rflags;
> +       int ret;
> +       struct vmcs *vmcs1, *vmcs2;
> +
> +       vmcs1 = alloc_page();
> +       memset(vmcs1, 0, PAGE_SIZE);
> +       init_vmcs(&vmcs1);
> +       rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> +       set_rflags(rflags);
> +       ret = save_vmcs(&vmcs2);
> +       report("test vmptrst", (!ret) && (vmcs1 == vmcs2));
> +}
> +
> +int main(void)
> +{
> +       setup_vm();
> +       setup_idt();
> +
> +       if (test_vmx_capability() != 0) {
> +               printf("ERROR : vmx not supported, check +vmx option\n");
> +               goto exit;
> +       }
> +       init_vmx();
> +       if (test_vmxon() != 0)
> +               goto exit;
> +       test_vmptrld();
> +       test_vmclear();
> +       test_vmptrst();
> +       init_vmcs(&vmcs_root);
> +
> +       vmx_run();
> +       /* Should not reach here */
> +       report("test vmlaunch", 0);
> +
> +exit:
> +       printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
> +       return fails ? 1 : 0;
> +}
> diff --git a/x86/vmx.h b/x86/vmx.h
> new file mode 100644
> index 0000000..167815d
> --- /dev/null
> +++ b/x86/vmx.h
> @@ -0,0 +1,406 @@
> +#ifndef __HYPERVISOR_H
> +#define __HYPERVISOR_H
> +
> +#include "libcflat.h"
> +
> +struct vmcs {
> +       u32 revision_id; /* vmcs revision identifier */
> +       u32 abort; /* VMX-abort indicator */
> +       /* VMCS data */
> +       char data[0];
> +};
> +
> +struct regs {
> +       u64 rax;
> +       u64 rcx;
> +       u64 rdx;
> +       u64 rbx;
> +       u64 cr2;
> +       u64 rbp;
> +       u64 rsi;
> +       u64 rdi;
> +       u64 r8;
> +       u64 r9;
> +       u64 r10;
> +       u64 r11;
> +       u64 r12;
> +       u64 r13;
> +       u64 r14;
> +       u64 r15;
> +};
> +
> +static union vmx_basic {
> +       u64 val;
> +       struct {
> +               u32 revision;
> +               u32     size:13,
> +                       : 3,
> +                       width:1,
> +                       dual:1,
> +                       type:4,
> +                       insouts:1,
> +                       ctrl:1;
> +       };
> +} basic;
> +
> +static union vmx_ctrl_pin {
> +       u64 val;
> +       struct {
> +               u32 set, clr;
> +       };
> +} ctrl_pin_rev;
> +
> +static union vmx_ctrl_cpu {
> +       u64 val;
> +       struct {
> +               u32 set, clr;
> +       };
> +} ctrl_cpu_rev[2];
> +
> +static union vmx_ctrl_exit {
> +       u64 val;
> +       struct {
> +               u32 set, clr;
> +       };
> +} ctrl_exit_rev;
> +
> +static union vmx_ctrl_ent {
> +       u64 val;
> +       struct {
> +               u32 set, clr;
> +       };
> +} ctrl_enter_rev;
> +
> +static union vmx_ept_vpid {
> +       u64 val;
> +       struct {
> +               u32:16,
> +                       super:2,
> +                       : 2,
> +                       invept:1,
> +                       : 11;
> +               u32     invvpid:1;
> +       };
> +} ept_vpid;
> +
> +struct descr {
> +       u16 limit;
> +       u64 addr;
> +};
> +
> +enum Encoding {
> +       /* 16-Bit Control Fields */
> +       VPID                    = 0x0000ul,
> +       /* Posted-interrupt notification vector */
> +       PINV                    = 0x0002ul,
> +       /* EPTP index */
> +       EPTP_IDX                = 0x0004ul,
> +
> +       /* 16-Bit Guest State Fields */
> +       GUEST_SEL_ES            = 0x0800ul,
> +       GUEST_SEL_CS            = 0x0802ul,
> +       GUEST_SEL_SS            = 0x0804ul,
> +       GUEST_SEL_DS            = 0x0806ul,
> +       GUEST_SEL_FS            = 0x0808ul,
> +       GUEST_SEL_GS            = 0x080aul,
> +       GUEST_SEL_LDTR          = 0x080cul,
> +       GUEST_SEL_TR            = 0x080eul,
> +       GUEST_INT_STATUS        = 0x0810ul,
> +
> +       /* 16-Bit Host State Fields */
> +       HOST_SEL_ES             = 0x0c00ul,
> +       HOST_SEL_CS             = 0x0c02ul,
> +       HOST_SEL_SS             = 0x0c04ul,
> +       HOST_SEL_DS             = 0x0c06ul,
> +       HOST_SEL_FS             = 0x0c08ul,
> +       HOST_SEL_GS             = 0x0c0aul,
> +       HOST_SEL_TR             = 0x0c0cul,
> +
> +       /* 64-Bit Control Fields */
> +       IO_BITMAP_A             = 0x2000ul,
> +       IO_BITMAP_B             = 0x2002ul,
> +       MSR_BITMAP              = 0x2004ul,
> +       EXIT_MSR_ST_ADDR        = 0x2006ul,
> +       EXIT_MSR_LD_ADDR        = 0x2008ul,
> +       ENTER_MSR_LD_ADDR       = 0x200aul,
> +       VMCS_EXEC_PTR           = 0x200cul,
> +       TSC_OFFSET              = 0x2010ul,
> +       TSC_OFFSET_HI           = 0x2011ul,
> +       APIC_VIRT_ADDR          = 0x2012ul,
> +       APIC_ACCS_ADDR          = 0x2014ul,
> +       EPTP                    = 0x201aul,
> +       EPTP_HI                 = 0x201bul,
> +
> +       /* 64-Bit Readonly Data Field */
> +       INFO_PHYS_ADDR          = 0x2400ul,
> +
> +       /* 64-Bit Guest State */
> +       VMCS_LINK_PTR           = 0x2800ul,
> +       VMCS_LINK_PTR_HI        = 0x2801ul,
> +       GUEST_DEBUGCTL          = 0x2802ul,
> +       GUEST_DEBUGCTL_HI       = 0x2803ul,
> +       GUEST_EFER              = 0x2806ul,
> +       GUEST_PERF_GLOBAL_CTRL  = 0x2808ul,
> +       GUEST_PDPTE             = 0x280aul,
> +
> +       /* 64-Bit Host State */
> +       HOST_EFER               = 0x2c02ul,
> +       HOST_PERF_GLOBAL_CTRL   = 0x2c04ul,
> +
> +       /* 32-Bit Control Fields */
> +       PIN_CONTROLS            = 0x4000ul,
> +       CPU_EXEC_CTRL0          = 0x4002ul,
> +       EXC_BITMAP              = 0x4004ul,
> +       PF_ERROR_MASK           = 0x4006ul,
> +       PF_ERROR_MATCH          = 0x4008ul,
> +       CR3_TARGET_COUNT        = 0x400aul,
> +       EXI_CONTROLS            = 0x400cul,
> +       EXI_MSR_ST_CNT          = 0x400eul,
> +       EXI_MSR_LD_CNT          = 0x4010ul,
> +       ENT_CONTROLS            = 0x4012ul,
> +       ENT_MSR_LD_CNT          = 0x4014ul,
> +       ENT_INTR_INFO           = 0x4016ul,
> +       ENT_INTR_ERROR          = 0x4018ul,
> +       ENT_INST_LEN            = 0x401aul,
> +       TPR_THRESHOLD           = 0x401cul,
> +       CPU_EXEC_CTRL1          = 0x401eul,
> +
> +       /* 32-Bit R/O Data Fields */
> +       VMX_INST_ERROR          = 0x4400ul,
> +       EXI_REASON              = 0x4402ul,
> +       EXI_INTR_INFO           = 0x4404ul,
> +       EXI_INTR_ERROR          = 0x4406ul,
> +       IDT_VECT_INFO           = 0x4408ul,
> +       IDT_VECT_ERROR          = 0x440aul,
> +       EXI_INST_LEN            = 0x440cul,
> +       EXI_INST_INFO           = 0x440eul,
> +
> +       /* 32-Bit Guest State Fields */
> +       GUEST_LIMIT_ES          = 0x4800ul,
> +       GUEST_LIMIT_CS          = 0x4802ul,
> +       GUEST_LIMIT_SS          = 0x4804ul,
> +       GUEST_LIMIT_DS          = 0x4806ul,
> +       GUEST_LIMIT_FS          = 0x4808ul,
> +       GUEST_LIMIT_GS          = 0x480aul,
> +       GUEST_LIMIT_LDTR        = 0x480cul,
> +       GUEST_LIMIT_TR          = 0x480eul,
> +       GUEST_LIMIT_GDTR        = 0x4810ul,
> +       GUEST_LIMIT_IDTR        = 0x4812ul,
> +       GUEST_AR_ES             = 0x4814ul,
> +       GUEST_AR_CS             = 0x4816ul,
> +       GUEST_AR_SS             = 0x4818ul,
> +       GUEST_AR_DS             = 0x481aul,
> +       GUEST_AR_FS             = 0x481cul,
> +       GUEST_AR_GS             = 0x481eul,
> +       GUEST_AR_LDTR           = 0x4820ul,
> +       GUEST_AR_TR             = 0x4822ul,
> +       GUEST_INTR_STATE        = 0x4824ul,
> +       GUEST_ACTV_STATE        = 0x4826ul,
> +       GUEST_SMBASE            = 0x4828ul,
> +       GUEST_SYSENTER_CS       = 0x482aul,
> +
> +       /* 32-Bit Host State Fields */
> +       HOST_SYSENTER_CS        = 0x4c00ul,
> +
> +       /* Natural-Width Control Fields */
> +       CR0_MASK                = 0x6000ul,
> +       CR4_MASK                = 0x6002ul,
> +       CR0_READ_SHADOW = 0x6004ul,
> +       CR4_READ_SHADOW = 0x6006ul,
> +       CR3_TARGET_0            = 0x6008ul,
> +       CR3_TARGET_1            = 0x600aul,
> +       CR3_TARGET_2            = 0x600cul,
> +       CR3_TARGET_3            = 0x600eul,
> +
> +       /* Natural-Width R/O Data Fields */
> +       EXI_QUALIFICATION       = 0x6400ul,
> +       IO_RCX                  = 0x6402ul,
> +       IO_RSI                  = 0x6404ul,
> +       IO_RDI                  = 0x6406ul,
> +       IO_RIP                  = 0x6408ul,
> +       GUEST_LINEAR_ADDRESS    = 0x640aul,
> +
> +       /* Natural-Width Guest State Fields */
> +       GUEST_CR0               = 0x6800ul,
> +       GUEST_CR3               = 0x6802ul,
> +       GUEST_CR4               = 0x6804ul,
> +       GUEST_BASE_ES           = 0x6806ul,
> +       GUEST_BASE_CS           = 0x6808ul,
> +       GUEST_BASE_SS           = 0x680aul,
> +       GUEST_BASE_DS           = 0x680cul,
> +       GUEST_BASE_FS           = 0x680eul,
> +       GUEST_BASE_GS           = 0x6810ul,
> +       GUEST_BASE_LDTR         = 0x6812ul,
> +       GUEST_BASE_TR           = 0x6814ul,
> +       GUEST_BASE_GDTR         = 0x6816ul,
> +       GUEST_BASE_IDTR         = 0x6818ul,
> +       GUEST_DR7               = 0x681aul,
> +       GUEST_RSP               = 0x681cul,
> +       GUEST_RIP               = 0x681eul,
> +       GUEST_RFLAGS            = 0x6820ul,
> +       GUEST_PENDING_DEBUG     = 0x6822ul,
> +       GUEST_SYSENTER_ESP      = 0x6824ul,
> +       GUEST_SYSENTER_EIP      = 0x6826ul,
> +
> +       /* Natural-Width Host State Fields */
> +       HOST_CR0                = 0x6c00ul,
> +       HOST_CR3                = 0x6c02ul,
> +       HOST_CR4                = 0x6c04ul,
> +       HOST_BASE_FS            = 0x6c06ul,
> +       HOST_BASE_GS            = 0x6c08ul,
> +       HOST_BASE_TR            = 0x6c0aul,
> +       HOST_BASE_GDTR          = 0x6c0cul,
> +       HOST_BASE_IDTR          = 0x6c0eul,
> +       HOST_SYSENTER_ESP       = 0x6c10ul,
> +       HOST_SYSENTER_EIP       = 0x6c12ul,
> +       HOST_RSP                = 0x6c14ul,
> +       HOST_RIP                = 0x6c16ul
> +};
> +
> +enum Reason {
> +       VMX_EXC_NMI             = 0,
> +       VMX_EXTINT              = 1,
> +       VMX_TRIPLE_FAULT        = 2,
> +       VMX_INIT                = 3,
> +       VMX_SIPI                = 4,
> +       VMX_SMI_IO              = 5,
> +       VMX_SMI_OTHER           = 6,
> +       VMX_INTR_WINDOW         = 7,
> +       VMX_NMI_WINDOW          = 8,
> +       VMX_TASK_SWITCH         = 9,
> +       VMX_CPUID               = 10,
> +       VMX_GETSEC              = 11,
> +       VMX_HLT                 = 12,
> +       VMX_INVD                = 13,
> +       VMX_INVLPG              = 14,
> +       VMX_RDPMC               = 15,
> +       VMX_RDTSC               = 16,
> +       VMX_RSM                 = 17,
> +       VMX_VMCALL              = 18,
> +       VMX_VMCLEAR             = 19,
> +       VMX_VMLAUNCH            = 20,
> +       VMX_VMPTRLD             = 21,
> +       VMX_VMPTRST             = 22,
> +       VMX_VMREAD              = 23,
> +       VMX_VMRESUME            = 24,
> +       VMX_VMWRITE             = 25,
> +       VMX_VMXOFF              = 26,
> +       VMX_VMXON               = 27,
> +       VMX_CR                  = 28,
> +       VMX_DR                  = 29,
> +       VMX_IO                  = 30,
> +       VMX_RDMSR               = 31,
> +       VMX_WRMSR               = 32,
> +       VMX_FAIL_STATE          = 33,
> +       VMX_FAIL_MSR            = 34,
> +       VMX_MWAIT               = 36,
> +       VMX_MTF                 = 37,
> +       VMX_MONITOR             = 39,
> +       VMX_PAUSE               = 40,
> +       VMX_FAIL_MCHECK         = 41,
> +       VMX_TPR_THRESHOLD       = 43,
> +       VMX_APIC_ACCESS         = 44,
> +       VMX_GDTR_IDTR           = 46,
> +       VMX_LDTR_TR             = 47,
> +       VMX_EPT_VIOLATION       = 48,
> +       VMX_EPT_MISCONFIG       = 49,
> +       VMX_INVEPT              = 50,
> +       VMX_PREEMPT             = 52,
> +       VMX_INVVPID             = 53,
> +       VMX_WBINVD              = 54,
> +       VMX_XSETBV              = 55
> +};
> +
> +#define X86_EFLAGS_CF  0x00000001 /* Carry Flag */
> +#define X86_EFLAGS_ZF  0x00000040 /* Zero Flag */
> +
> +enum Ctrl_exi {
> +       EXI_HOST_64             = 1UL << 9,
> +       EXI_LOAD_PERF           = 1UL << 12,
> +       EXI_INTA                = 1UL << 15,
> +       EXI_LOAD_EFER           = 1UL << 21,
> +};
> +
> +enum Ctrl_ent {
> +       ENT_GUEST_64            = 1UL << 9,
> +       ENT_LOAD_EFER           = 1UL << 15,
> +};
> +
> +enum Ctrl_pin {
> +       PIN_EXTINT              = 1ul << 0,
> +       PIN_NMI                 = 1ul << 3,
> +       PIN_VIRT_NMI            = 1ul << 5,
> +};
> +
> +enum Ctrl0 {
> +       CPU_INTR_WINDOW         = 1ul << 2,
> +       CPU_HLT                 = 1ul << 7,
> +       CPU_INVLPG              = 1ul << 9,
> +       CPU_CR3_LOAD            = 1ul << 15,
> +       CPU_CR3_STORE           = 1ul << 16,
> +       CPU_TPR_SHADOW          = 1ul << 21,
> +       CPU_NMI_WINDOW          = 1ul << 22,
> +       CPU_IO                  = 1ul << 24,
> +       CPU_IO_BITMAP           = 1ul << 25,
> +       CPU_SECONDARY           = 1ul << 31,
> +};
> +
> +enum Ctrl1 {
> +       CPU_EPT                 = 1ul << 1,
> +       CPU_VPID                = 1ul << 5,
> +       CPU_URG                 = 1ul << 7,
> +};
> +
> +#define SEL_NULL_DESC          0x0
> +#define SEL_KERN_CODE_64       0x8
> +#define SEL_KERN_DATA_64       0x10
> +#define SEL_USER_CODE_64       0x18
> +#define SEL_USER_DATA_64       0x20
> +#define SEL_CODE_32            0x28
> +#define SEL_DATA_32            0x30
> +#define SEL_CODE_16            0x38
> +#define SEL_DATA_16            0x40
> +#define SEL_TSS_RUN            0x48
> +
> +#define SAVE_GPR                               \
> +       "xchg %rax, regs\n\t"                   \
> +       "xchg %rbx, regs+0x8\n\t"               \
> +       "xchg %rcx, regs+0x10\n\t"              \
> +       "xchg %rdx, regs+0x18\n\t"              \
> +       "xchg %rbp, regs+0x28\n\t"              \
> +       "xchg %rsi, regs+0x30\n\t"              \
> +       "xchg %rdi, regs+0x38\n\t"              \
> +       "xchg %r8, regs+0x40\n\t"               \
> +       "xchg %r9, regs+0x48\n\t"               \
> +       "xchg %r10, regs+0x50\n\t"              \
> +       "xchg %r11, regs+0x58\n\t"              \
> +       "xchg %r12, regs+0x60\n\t"              \
> +       "xchg %r13, regs+0x68\n\t"              \
> +       "xchg %r14, regs+0x70\n\t"              \
> +       "xchg %r15, regs+0x78\n\t"
> +
> +#define LOAD_GPR       SAVE_GPR
> +
> +#define CR0_PE         (1ul << 0)
> +#define CR0_PG         (1ul << 31)
> +#define CR4_VMXE       (1ul << 0)
> +#define CR4_PAE                (1ul << 5)
> +#define CR4_PCIDE      (1ul << 17)
> +
> +#define VMX_IO_SIZE_MASK               0x7
> +#define _VMX_IO_BYTE                   1
> +#define _VMX_IO_WORD                   2
> +#define _VMX_IO_LONG                   3
> +#define VMX_IO_DIRECTION_MASK          (1ul << 3)
> +#define VMX_IO_IN                      (1ul << 3)
> +#define VMX_IO_OUT                     0
> +#define VMX_IO_STRING                  (1ul << 4)
> +#define VMX_IO_REP                     (1ul << 5)
> +#define VMX_IO_OPRAND_DX               (1ul << 6)
> +#define VMX_IO_PORT_MASK               0xFFFF0000
> +#define VMX_IO_PORT_SHIFT              16
> +
> +#define TEST_VMRESUME          0x1001
> +
> +#endif
> +
> --
> 1.7.9.5
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm-unit-tests : The first version of VMX nested test case
  2013-07-16  9:35 ` Arthur Chunqi Li
@ 2013-07-16  9:45   ` Gleb Natapov
  2013-07-16  9:53     ` Arthur Chunqi Li
  0 siblings, 1 reply; 14+ messages in thread
From: Gleb Natapov @ 2013-07-16  9:45 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: kvm, Jan Kiszka, Paolo Bonzini

On Tue, Jul 16, 2013 at 05:35:16PM +0800, Arthur Chunqi Li wrote:
> Hi there,
> This is a version calling for comments. Some minor changes should be
Add RFC before PATCH for such submission please.

> done before final commitment (TODOs in it), because these places are
> related to the bugs I have commited in the previous weeks and the
> relevant patches are not accpeted. But these bugs are all about some
I am aware of only one patch. Did I miss/forgot something?

> unexpected occasions and the hypervisor can run if we simply ignore
> them. After all bugs fixed, everything will be OK.
> 
> Arthur
> 

--
			Gleb.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm-unit-tests : The first version of VMX nested test case
  2013-07-16  9:45   ` Gleb Natapov
@ 2013-07-16  9:53     ` Arthur Chunqi Li
  2013-07-16  9:58       ` Gleb Natapov
  0 siblings, 1 reply; 14+ messages in thread
From: Arthur Chunqi Li @ 2013-07-16  9:53 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, Jan Kiszka, Paolo Bonzini

On Tue, Jul 16, 2013 at 5:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Tue, Jul 16, 2013 at 05:35:16PM +0800, Arthur Chunqi Li wrote:
>> Hi there,
>> This is a version calling for comments. Some minor changes should be
> Add RFC before PATCH for such submission please.
>
>> done before final commitment (TODOs in it), because these places are
>> related to the bugs I have commited in the previous weeks and the
>> relevant patches are not accpeted. But these bugs are all about some
> I am aware of only one patch. Did I miss/forgot something?
I have only commit one patch related to VMX tests. Other patches cited
here is about the bug fixes such as
[http://www.mail-archive.com/kvm@vger.kernel.org/msg92932.html]
[http://www.mail-archive.com/kvm@vger.kernel.org/msg93046.html], which
will cause some test cases fail in this patch.

I have been discussing with Jan in the past weeks because most of the
questions are technical affairs, which I don't think it's suitable to
discuss in the community. This is a rather mature version and I think
I can commit it with some minor changes.
>
>> unexpected occasions and the hypervisor can run if we simply ignore
>> them. After all bugs fixed, everything will be OK.
>>
>> Arthur
>>
>
> --
>                         Gleb.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm-unit-tests : The first version of VMX nested test case
  2013-07-16  9:53     ` Arthur Chunqi Li
@ 2013-07-16  9:58       ` Gleb Natapov
  0 siblings, 0 replies; 14+ messages in thread
From: Gleb Natapov @ 2013-07-16  9:58 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: kvm, Jan Kiszka, Paolo Bonzini

On Tue, Jul 16, 2013 at 05:53:56PM +0800, Arthur Chunqi Li wrote:
> On Tue, Jul 16, 2013 at 5:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Tue, Jul 16, 2013 at 05:35:16PM +0800, Arthur Chunqi Li wrote:
> >> Hi there,
> >> This is a version calling for comments. Some minor changes should be
> > Add RFC before PATCH for such submission please.
> >
> >> done before final commitment (TODOs in it), because these places are
> >> related to the bugs I have commited in the previous weeks and the
> >> relevant patches are not accpeted. But these bugs are all about some
> > I am aware of only one patch. Did I miss/forgot something?
> I have only commit one patch related to VMX tests. Other patches cited
> here is about the bug fixes such as
> [http://www.mail-archive.com/kvm@vger.kernel.org/msg92932.html]
> [http://www.mail-archive.com/kvm@vger.kernel.org/msg93046.html], which
> will cause some test cases fail in this patch.
> 
Those are applied to queue already. I am aware of one that is not
applied yet but it looks fine to me, waiting for Paolo's review.

> I have been discussing with Jan in the past weeks because most of the
> questions are technical affairs, which I don't think it's suitable to
> discuss in the community. This is a rather mature version and I think
> I can commit it with some minor changes.
> >
> >> unexpected occasions and the hypervisor can run if we simply ignore
> >> them. After all bugs fixed, everything will be OK.
> >>
> >> Arthur
> >>
> >
> > --
> >                         Gleb.

--
			Gleb.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm-unit-tests : The first version of VMX nested test case
  2013-07-16  9:27 [PATCH] kvm-unit-tests : The first version of VMX nested test case Arthur Chunqi Li
  2013-07-16  9:35 ` Arthur Chunqi Li
@ 2013-07-16 10:28 ` Paolo Bonzini
  2013-07-16 11:47   ` Arthur Chunqi Li
  2013-07-16 15:20   ` Gleb Natapov
  1 sibling, 2 replies; 14+ messages in thread
From: Paolo Bonzini @ 2013-07-16 10:28 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: kvm, jan.kiszka, gleb

Il 16/07/2013 11:27, Arthur Chunqi Li ha scritto:
> This is the first version for VMX nested environment test case. It
> contains the basic VMX instructions test cases, including VMXON/
> VMXOFF/VMXPTRLD/VMXPTRST/VMCLEAR/VMLAUNCH/VMRESUME/VMCALL. This patch
> also tests the basic execution routine in VMX nested environment and
> let the VM print "Hello World" to inform its successfully run.
> 
> New files added:
> x86/vmx.h : contains all VMX related macro declerations
> x86/vmx.c : main file for VMX nested test case
> 
> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> ---
>  config-x86-common.mak |    2 +
>  config-x86_64.mak     |    1 +
>  lib/x86/msr.h         |    5 +
>  x86/cstart64.S        |    4 +
>  x86/unittests.cfg     |    6 +
>  x86/vmx.c             |  568 +++++++++++++++++++++++++++++++++++++++++++++++++
>  x86/vmx.h             |  406 +++++++++++++++++++++++++++++++++++
>  7 files changed, 992 insertions(+)
>  create mode 100644 x86/vmx.c
>  create mode 100644 x86/vmx.h
> 
> diff --git a/config-x86-common.mak b/config-x86-common.mak
> index 455032b..34a41e1 100644
> --- a/config-x86-common.mak
> +++ b/config-x86-common.mak
> @@ -101,6 +101,8 @@ $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o
>  
>  $(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o
>  
> +$(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o
> +
>  arch_clean:
>  	$(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \
>  	$(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o
> diff --git a/config-x86_64.mak b/config-x86_64.mak
> index 4e525f5..bb8ee89 100644
> --- a/config-x86_64.mak
> +++ b/config-x86_64.mak
> @@ -9,5 +9,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
>  	  $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \
>  	  $(TEST_DIR)/pcid.flat
>  tests += $(TEST_DIR)/svm.flat
> +tests += $(TEST_DIR)/vmx.flat
>  
>  include config-x86-common.mak
> diff --git a/lib/x86/msr.h b/lib/x86/msr.h
> index 509a421..281255a 100644
> --- a/lib/x86/msr.h
> +++ b/lib/x86/msr.h
> @@ -396,6 +396,11 @@
>  #define MSR_IA32_VMX_VMCS_ENUM          0x0000048a
>  #define MSR_IA32_VMX_PROCBASED_CTLS2    0x0000048b
>  #define MSR_IA32_VMX_EPT_VPID_CAP       0x0000048c
> +#define MSR_IA32_VMX_TRUE_PIN		0x0000048d
> +#define MSR_IA32_VMX_TRUE_PROC		0x0000048e
> +#define MSR_IA32_VMX_TRUE_EXIT		0x0000048f
> +#define MSR_IA32_VMX_TRUE_ENTRY		0x00000490
> +
>  
>  /* AMD-V MSRs */
>  
> diff --git a/x86/cstart64.S b/x86/cstart64.S
> index 24df5f8..0fe76da 100644
> --- a/x86/cstart64.S
> +++ b/x86/cstart64.S
> @@ -4,6 +4,10 @@
>  .globl boot_idt
>  boot_idt = 0
>  
> +.globl idt_descr
> +.globl tss_descr
> +.globl gdt64_desc
> +
>  ipi_vector = 0x20
>  
>  max_cpus = 64
> diff --git a/x86/unittests.cfg b/x86/unittests.cfg
> index bc9643e..e846739 100644
> --- a/x86/unittests.cfg
> +++ b/x86/unittests.cfg
> @@ -149,3 +149,9 @@ extra_params = --append "10000000 `date +%s`"
>  file = pcid.flat
>  extra_params = -cpu qemu64,+pcid
>  arch = x86_64
> +
> +[vmx]
> +file = vmx.flat
> +extra_params = -cpu Nehalem,+vmx

Should this use "-cpu host" instead? (Or "-cpu host,+vmx", I don't
remember).

> +arch = x86_64
> +
> diff --git a/x86/vmx.c b/x86/vmx.c
> new file mode 100644
> index 0000000..0435746
> --- /dev/null
> +++ b/x86/vmx.c
> @@ -0,0 +1,568 @@
> +#include "libcflat.h"
> +#include "processor.h"
> +#include "vm.h"
> +#include "desc.h"
> +#include "vmx.h"
> +#include "msr.h"
> +#include "smp.h"
> +#include "io.h"
> +
> +
> +int fails = 0, tests = 0;
> +u32 *vmxon_region;
> +struct vmcs *vmcs_root;
> +void *io_bmp1, *io_bmp2;
> +void *msr_bmp;
> +u32 vpid_ctr;
> +char *guest_stack, *host_stack;
> +char *guest_syscall_stack, *host_syscall_stack;
> +u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2];
> +ulong fix_cr0_set, fix_cr0_clr;
> +ulong fix_cr4_set, fix_cr4_clr;
> +struct regs regs;
> +
> +extern u64 gdt64_desc[];
> +extern u64 idt_descr[];
> +extern u64 tss_descr[];
> +extern void *entry_vmx;
> +extern void *entry_sysenter;
> +extern void *entry_guest;
> +
> +void report(const char *name, int result)
> +{
> +	++tests;
> +	if (result)
> +		printf("PASS: %s\n", name);
> +	else {
> +		printf("FAIL: %s\n", name);
> +		++fails;
> +	}
> +}
> +
> +inline u64 get_rflags(void)
> +{
> +	u64 r;
> +	asm volatile("pushf; pop %0\n\t" : "=q"(r) : : "cc");
> +	return r;
> +}
> +
> +inline void set_rflags(u64 r)
> +{
> +	asm volatile("push %0; popf\n\t" : : "q"(r) : "cc");
> +}
> +
> +int vmcs_clear(struct vmcs *vmcs)
> +{
> +	bool ret;
> +	asm volatile ("vmclear %1; seta %0" : "=q" (ret) : "m" (vmcs) : "cc");

You can use "setbe", it's clearer and avoids the ! in the return statement.

We should later add tests for failure conditions, since failing to
detect errors could give rise to L2->L1 attack vectors.  When we do so,
we will have to distinguish CF from ZF.


> +	return !ret;
> +}
> +
> +u64 vmcs_read(enum Encoding enc)
> +{
> +	u64 val;
> +	asm volatile ("vmread %1, %0" : "=rm" (val) : "r" ((u64)enc) : "cc");
> +	return val;
> +}
> +
> +int vmcs_write(enum Encoding enc, u64 val)
> +{
> +	bool ret;
> +	asm volatile ("vmwrite %1, %2; seta %0"
> +		: "=q"(ret) : "rm" (val), "r" ((u64)enc) : "cc");
> +	return !ret;
> +}
> +
> +int make_vmcs_current(struct vmcs *vmcs)
> +{
> +	bool ret;
> +
> +	asm volatile ("vmptrld %1; seta %0" : "=q" (ret) : "m" (vmcs) : "cc");
> +	return !ret;
> +}
> +
> +int save_vmcs(struct vmcs **vmcs)
> +{
> +	bool ret;
> +
> +	asm volatile ("vmptrst %1; seta %0" : "=q" (ret) : "m" (*vmcs) : "cc");
> +	return !ret;
> +}
> +
> +/* entry_vmx */
> +asm(
> +	".align	4, 0x90\n\t"
> +	".globl	entry_vmx\n\t"
> +	"entry_vmx:\n\t"
> +	SAVE_GPR
> +	"	call	vmx_handler\n\t"
> +	LOAD_GPR
> +	"	vmresume\n\t"
> +);
> +
> +/* entry_sysenter */
> +asm(
> +	".align	4, 0x90\n\t"
> +	".globl	entry_sysenter\n\t"
> +	"entry_sysenter:\n\t"
> +	SAVE_GPR
> +	"	and	$0xf, %rax\n\t"
> +	"	push	%rax\n\t"
> +	"	call	syscall_handler\n\t"
> +);
> +
> +void syscall_handler(u64 syscall_no)
> +{
> +	printf("Here in syscall_handler, syscall_no = %d\n", syscall_no);
> +}
> +
> +void vmx_run()
> +{
> +	bool ret;
> +	printf("Now run vm.\n\n");
> +	asm volatile("vmlaunch;seta %0\n\t" : "=m"(ret));
> +	printf("VMLAUNCH error, ret=%d\n", ret);
> +}
> +
> +void vmx_resume()
> +{
> +	asm volatile(LOAD_GPR
> +		"vmresume\n\t");
> +	/* VMRESUME fail if reach here */
> +}
> +
> +void print_vmexit_info()
> +{
> +	u64 guest_rip, guest_rsp;
> +	ulong reason = vmcs_read(EXI_REASON) & 0xff;
> +	ulong exit_qual = vmcs_read(EXI_QUALIFICATION);
> +	guest_rip = vmcs_read(GUEST_RIP);
> +	guest_rsp = vmcs_read(GUEST_RSP);
> +	printf("VMEXIT info:\n");
> +	printf("\tvmexit reason = %d\n", reason);
> +	printf("\texit qualification = 0x%x\n", exit_qual);
> +	printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1);
> +	printf("\tguest_rip = 0x%llx\n", guest_rip);
> +	printf("\tRAX=0x%llx    RBX=0x%llx    RCX=0x%llx    RDX=0x%llx\n",
> +		regs.rax, regs.rbx, regs.rcx, regs.rdx);
> +	printf("\tRSP=0x%llx    RBP=0x%llx    RSI=0x%llx    RDI=0x%llx\n",
> +		guest_rsp, regs.rbp, regs.rsi, regs.rdi);
> +	printf("\tR8 =0x%llx    R9 =0x%llx    R10=0x%llx    R11=0x%llx\n",
> +		regs.r8, regs.r9, regs.r10, regs.r11);
> +	printf("\tR12=0x%llx    R13=0x%llx    R14=0x%llx    R15=0x%llx\n",
> +		regs.r12, regs.r13, regs.r14, regs.r15);
> +}
> +
> +void test_vmclear(void)
> +{
> +	u64 rflags;
> +
> +	rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> +	set_rflags(rflags);
> +	report("test vmclear", vmcs_clear(vmcs_root) == 0);
> +}
> +
> +void test_vmxoff(void)
> +{
> +	bool ret;
> +	u64 rflags;
> +
> +	rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> +	set_rflags(rflags);
> +	asm volatile("vmxoff; seta %0\n\t" : "=q"(ret) : : "cc");
> +	report("test vmxoff", ret);
> +}
> +
> +void vmx_exit(void)
> +{
> +	test_vmxoff();
> +	printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
> +	exit(fails ? -1 : 0);
> +}

Can you try to jump back to main, and do test_vmxoff there?  This will
avoid having to write our tests in callback style, which is a pain.
Basically something similar to setjmp/longjmp.  In main:

	if (setjmp(jmpbuf) == 0) {
		vmx_run();
		/* Should not reach here */
		report("test vmlaunch", 0);
	}
	test_vmxoff();

exit:
	printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
	return fails ? 1 : 0;

In vmx_handler:

	case VMX_HLT:
		printf("\nVM exit.\n");
		longjmp(jmpbuf, 1);

> +void vmx_handler()
> +{
> +	u64 guest_rip;
> +	ulong reason = vmcs_read(EXI_REASON) & 0xff;
> +
> +	if ((read_cr4() & CR4_PAE) && (read_cr0() & CR0_PG)
> +		&& !(rdmsr(MSR_EFER) & EFER_LMA))
> +		printf("ERROR : PDPTEs should be checked\n");
> +
> +	guest_rip = vmcs_read(GUEST_RIP);
> +
> +	switch (reason) {
> +	case VMX_VMCALL:
> +		switch (regs.rax) {
> +		case TEST_VMRESUME:
> +			regs.rax = 0xFFFF;
> +			break;
> +		default:
> +			printf("ERROR : Invalid VMCALL param : %d\n", regs.rax);
> +		}
> +		vmcs_write(GUEST_RIP, guest_rip + 3);
> +		goto vmx_resume;
> +	case VMX_IO:
> +		print_vmexit_info();
> +		break;
> +	case VMX_HLT:
> +		printf("\nVM exit.\n");
> +		vmx_exit();
> +		/* Should not reach here */
> +		goto vmx_exit;
> +	case VMX_EXC_NMI:
> +	case VMX_EXTINT:
> +	case VMX_INVLPG:
> +	case VMX_CR:
> +	case VMX_EPT_VIOLATION:
> +	default:
> +		break;
> +	}
> +	printf("ERROR : Unhandled vmx exit.\n");
> +	print_vmexit_info();
> +vmx_exit:
> +	exit(-1);
> +vmx_resume:
> +	vmx_resume();
> +	/* Should not reach here */
> +	exit(-1);
> +}
> +
> +void test_vmresume()
> +{
> +	u64 rax;
> +	u64 rsp, resume_rsp;
> +
> +	rax = 0;
> +	asm volatile("mov %%rsp, %0\n\t" : "=r"(rsp));
> +	asm volatile("mov %2, %%rax\n\t"
> +		"vmcall\n\t"
> +		"mov %%rax, %0\n\t"
> +		"mov %%rsp, %1\n\t"
> +		: "=r"(rax), "=r"(resume_rsp)
> +		: "g"(TEST_VMRESUME));
> +	report("test vmresume", (rax == 0xFFFF) && (rsp == resume_rsp));
> +}
> +
> +/* entry_guest */
> +asm(
> +	".align	4, 0x90\n\t"
> +	".globl	entry_guest\n\t"
> +	"entry_guest:\n\t"
> +	"	call	guest_main\n\t"
> +	"	hlt\n\t"
> +);
> +
> +void guest_main(void)
> +{
> +	/* If reach here, VMLAUNCH runs OK */
> +	report("test vmlaunch", 1);
> +	printf("cr0 in guest = %llx\n", read_cr0());
> +	printf("cr3 in guest = %llx\n", read_cr3());
> +	printf("cr4 in guest = %llx\n", read_cr4());
> +	printf("\nHello World!\n");
> +	test_vmresume();
> +}
> +
> +void init_vmcs_ctrl(void)
> +{
> +	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
> +	/* 26.2.1.1 */
> +	vmcs_write(PIN_CONTROLS, ctrl_pin);
> +	/* Disable VMEXIT of IO instruction */
> +	vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]);
> +	if (ctrl_cpu_rev[0].set & CPU_SECONDARY) {
> +		ctrl_cpu[1] |= ctrl_cpu_rev[1].set & ctrl_cpu_rev[1].clr;
> +		vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]);
> +	}
> +	vmcs_write(CR3_TARGET_COUNT, 0);
> +	io_bmp1 = alloc_page();
> +	io_bmp2 = alloc_page();
> +	memset(io_bmp1, 0, PAGE_SIZE);
> +	memset(io_bmp2, 0, PAGE_SIZE);
> +	vmcs_write(IO_BITMAP_A, (u64)io_bmp1);
> +	vmcs_write(IO_BITMAP_B, (u64)io_bmp2);
> +	msr_bmp = alloc_page();
> +	memset(msr_bmp, 0, PAGE_SIZE);
> +	vmcs_write(MSR_BITMAP, (u64)msr_bmp);
> +	vmcs_write(VPID, ++vpid_ctr);
> +}
> +
> +void init_vmcs_host(void)
> +{
> +	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
> +	/* 26.2.1.2 */
> +	vmcs_write(HOST_EFER, rdmsr(MSR_EFER));
> +
> +	/* 26.2.1.3 */
> +	vmcs_write(ENT_CONTROLS, ctrl_enter);
> +	vmcs_write(EXI_CONTROLS, ctrl_exit);
> +
> +	/* 26.2.2 */
> +	vmcs_write(HOST_CR0, read_cr0());
> +	vmcs_write(HOST_CR3, read_cr3());
> +	vmcs_write(HOST_CR4, read_cr4());
> +	vmcs_write(HOST_SYSENTER_ESP,
> +		(u64)(host_syscall_stack + PAGE_SIZE - 1));
> +	vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
> +	vmcs_write(HOST_SYSENTER_CS,  SEL_KERN_CODE_64);
> +
> +	/* 26.2.3 */
> +	vmcs_write(HOST_SEL_CS, SEL_KERN_CODE_64);
> +	vmcs_write(HOST_SEL_SS, SEL_KERN_DATA_64);
> +	vmcs_write(HOST_SEL_DS, SEL_KERN_DATA_64);
> +	vmcs_write(HOST_SEL_ES, SEL_KERN_DATA_64);
> +	vmcs_write(HOST_SEL_FS, SEL_KERN_DATA_64);
> +	vmcs_write(HOST_SEL_GS, SEL_KERN_DATA_64);
> +	vmcs_write(HOST_SEL_TR, SEL_TSS_RUN);
> +	vmcs_write(HOST_BASE_TR,   (u64)tss_descr);
> +	vmcs_write(HOST_BASE_GDTR, (u64)gdt64_desc);
> +	vmcs_write(HOST_BASE_IDTR, (u64)idt_descr);
> +	vmcs_write(HOST_BASE_FS, 0);
> +	vmcs_write(HOST_BASE_GS, 0);
> +
> +	/* Set other vmcs area */
> +	vmcs_write(PF_ERROR_MASK, 0);
> +	vmcs_write(PF_ERROR_MATCH, 0);
> +	vmcs_write(VMCS_LINK_PTR, ~0ul);
> +	vmcs_write(VMCS_LINK_PTR_HI, ~0ul);
> +	vmcs_write(HOST_RSP, (u64)(host_stack + PAGE_SIZE - 1));
> +	vmcs_write(HOST_RIP, (u64)(&entry_vmx));
> +}
> +
> +void init_vmcs_guest(void)
> +{
> +	/* 26.3 CHECKING AND LOADING GUEST STATE */
> +	ulong guest_cr0, guest_cr4, guest_cr3;
> +	/* 26.3.1.1 */
> +	guest_cr0 = read_cr0();
> +	guest_cr4 = read_cr4();
> +	guest_cr3 = read_cr3();
> +	if (ctrl_enter & ENT_GUEST_64) {
> +		guest_cr0 |= CR0_PG;
> +		guest_cr4 |= CR4_PAE;
> +	}
> +	if ((ctrl_enter & ENT_GUEST_64) == 0)
> +		guest_cr4 &= (~CR4_PCIDE);
> +	if (guest_cr0 & CR0_PG)
> +		guest_cr0 |= CR0_PE;
> +	vmcs_write(GUEST_CR0, guest_cr0);
> +	vmcs_write(GUEST_CR3, guest_cr3);
> +	vmcs_write(GUEST_CR4, guest_cr4);
> +	vmcs_write(GUEST_SYSENTER_CS,  SEL_KERN_CODE_64);
> +	vmcs_write(GUEST_SYSENTER_ESP,
> +		(u64)(guest_syscall_stack + PAGE_SIZE - 1));
> +	vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter));
> +	vmcs_write(GUEST_DR7, 0);
> +	vmcs_write(GUEST_EFER, rdmsr(MSR_EFER));
> +
> +	/* 26.3.1.2 */
> +	vmcs_write(GUEST_SEL_CS, SEL_KERN_CODE_64);
> +	vmcs_write(GUEST_SEL_SS, SEL_KERN_DATA_64);
> +	vmcs_write(GUEST_SEL_DS, SEL_KERN_DATA_64);
> +	vmcs_write(GUEST_SEL_ES, SEL_KERN_DATA_64);
> +	vmcs_write(GUEST_SEL_FS, SEL_KERN_DATA_64);
> +	vmcs_write(GUEST_SEL_GS, SEL_KERN_DATA_64);
> +	vmcs_write(GUEST_SEL_TR, SEL_TSS_RUN);
> +	vmcs_write(GUEST_SEL_LDTR, 0);
> +
> +	vmcs_write(GUEST_BASE_CS, 0);
> +	vmcs_write(GUEST_BASE_ES, 0);
> +	vmcs_write(GUEST_BASE_SS, 0);
> +	vmcs_write(GUEST_BASE_DS, 0);
> +	vmcs_write(GUEST_BASE_FS, 0);
> +	vmcs_write(GUEST_BASE_GS, 0);
> +	vmcs_write(GUEST_BASE_TR,   (u64)tss_descr);
> +	vmcs_write(GUEST_BASE_LDTR, 0);
> +
> +	vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF);
> +	vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF);
> +	vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF);
> +	vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF);
> +	vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF);
> +	vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF);
> +	vmcs_write(GUEST_LIMIT_LDTR, 0xffff);
> +	vmcs_write(GUEST_LIMIT_TR, ((struct descr *)tss_descr)->limit);
> +
> +	vmcs_write(GUEST_AR_CS, 0xa09b);
> +	vmcs_write(GUEST_AR_DS, 0xc093);
> +	vmcs_write(GUEST_AR_ES, 0xc093);
> +	vmcs_write(GUEST_AR_FS, 0xc093);
> +	vmcs_write(GUEST_AR_GS, 0xc093);
> +	vmcs_write(GUEST_AR_SS, 0xc093);
> +	vmcs_write(GUEST_AR_LDTR, 0x82);
> +	vmcs_write(GUEST_AR_TR, 0x8b);
> +
> +	/* 26.3.1.3 */
> +	vmcs_write(GUEST_BASE_GDTR, (u64)gdt64_desc);
> +	vmcs_write(GUEST_BASE_IDTR, (u64)idt_descr);
> +	vmcs_write(GUEST_LIMIT_GDTR,
> +		((struct descr *)gdt64_desc)->limit & 0xffff);
> +	vmcs_write(GUEST_LIMIT_IDTR,
> +		((struct descr *)idt_descr)->limit & 0xffff);
> +
> +	/* 26.3.1.4 */
> +	vmcs_write(GUEST_RIP, (u64)(&entry_guest));
> +	vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1));
> +	vmcs_write(GUEST_RFLAGS, 0x2);
> +
> +	/* 26.3.1.5 */
> +	vmcs_write(GUEST_ACTV_STATE, 0);
> +	vmcs_write(GUEST_INTR_STATE, 0);
> +}
> +
> +int init_vmcs(struct vmcs **vmcs)
> +{
> +	*vmcs = alloc_page();
> +	memset(*vmcs, 0, PAGE_SIZE);
> +	(*vmcs)->revision_id = basic.revision;
> +	/* vmclear first to init vmcs */
> +	if (vmcs_clear(*vmcs)) {
> +		printf("%s : vmcs_clear error\n", __func__);
> +		return 1;
> +	}
> +
> +	if (make_vmcs_current(*vmcs)) {
> +		printf("%s : make_vmcs_current error\n", __func__);
> +		return 1;
> +	}
> +
> +	/* All settings to pin/exit/enter/cpu
> +	   control fields should place here */

should be placed here

> +	ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
> +	ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
> +	ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
> +	ctrl_cpu[0] |= CPU_HLT;
> +	/* DIsable IO instruction VMEXIT now */
> +	ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
> +	ctrl_cpu[1] = 0;
> +
> +	ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr;
> +	ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr;
> +	ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr;
> +	ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr;
> +
> +	init_vmcs_ctrl();
> +	init_vmcs_host();
> +	init_vmcs_guest();
> +	return 0;
> +}
> +
> +void init_vmx(void)
> +{
> +	vmxon_region = alloc_page();
> +	memset(vmxon_region, 0, PAGE_SIZE);
> +
> +	fix_cr0_set =  rdmsr(MSR_IA32_VMX_CR0_FIXED0);
> +	fix_cr0_clr =  rdmsr(MSR_IA32_VMX_CR0_FIXED1);
> +	fix_cr4_set =  rdmsr(MSR_IA32_VMX_CR4_FIXED0);
> +	fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
> +	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
> +	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
> +			: MSR_IA32_VMX_PINBASED_CTLS);
> +	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
> +			: MSR_IA32_VMX_EXIT_CTLS);
> +	ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
> +			: MSR_IA32_VMX_ENTRY_CTLS);
> +	ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
> +			: MSR_IA32_VMX_PROCBASED_CTLS);
> +	if (ctrl_cpu_rev[0].set & CPU_SECONDARY)
> +		ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
> +	if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID)
> +		ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
> +
> +	write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
> +	write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | CR4_VMXE);
> +
> +	*vmxon_region = basic.revision;
> +
> +	guest_stack = alloc_page();
> +	memset(guest_stack, 0, PAGE_SIZE);
> +	guest_syscall_stack = alloc_page();
> +	memset(guest_syscall_stack, 0, PAGE_SIZE);
> +	host_stack = alloc_page();
> +	memset(host_stack, 0, PAGE_SIZE);
> +	host_syscall_stack = alloc_page();
> +	memset(host_syscall_stack, 0, PAGE_SIZE);
> +}
> +
> +int test_vmx_capability(void)
> +{
> +	struct cpuid r;
> +	u64 ret1, ret2;
> +	r = cpuid(1);
> +	ret1 = ((r.c) >> 5) & 1;
> +	ret2 = ((rdmsr(MSR_IA32_FEATURE_CONTROL) & 0x5) == 0x5);
> +	report("test vmx capability", ret1 & ret2);
> +	/* TODO: Fix here after patches are accepted */

It's fine to include a failing test.

> +	return 0;
> +	return !(ret1 & ret2);
> +}
> +
> +int test_vmxon(void)
> +{
> +	bool ret;
> +	u64 rflags;
> +
> +	rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> +	set_rflags(rflags);
> +	asm volatile ("vmxon %1; seta %0\n\t"
> +		: "=q"(ret) : "m"(vmxon_region) : "cc");
> +	report("test vmxon", ret);
> +	/* TODO: Change here after bug fixed */
> +	return 0;
> +	/* return !ret; */
> +}
> +
> +void test_vmptrld(void)
> +{
> +	u64 rflags;
> +	struct vmcs *vmcs;
> +
> +	vmcs = alloc_page();
> +	vmcs->revision_id = basic.revision;
> +	rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> +	set_rflags(rflags);
> +	report("test vmptrld", make_vmcs_current(vmcs) == 0);
> +}
> +
> +void test_vmptrst(void)
> +{
> +	u64 rflags;
> +	int ret;
> +	struct vmcs *vmcs1, *vmcs2;
> +
> +	vmcs1 = alloc_page();
> +	memset(vmcs1, 0, PAGE_SIZE);
> +	init_vmcs(&vmcs1);
> +	rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
> +	set_rflags(rflags);
> +	ret = save_vmcs(&vmcs2);
> +	report("test vmptrst", (!ret) && (vmcs1 == vmcs2));
> +}
> +
> +int main(void)
> +{
> +	setup_vm();
> +	setup_idt();
> +
> +	if (test_vmx_capability() != 0) {
> +		printf("ERROR : vmx not supported, check +vmx option\n");
> +		goto exit;
> +	}
> +	init_vmx();
> +	if (test_vmxon() != 0)
> +		goto exit;
> +	test_vmptrld();
> +	test_vmclear();
> +	test_vmptrst();
> +	init_vmcs(&vmcs_root);
> +
> +	vmx_run();
> +	/* Should not reach here */
> +	report("test vmlaunch", 0);
> +
> +exit:
> +	printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
> +	return fails ? 1 : 0;
> +}
> diff --git a/x86/vmx.h b/x86/vmx.h
> new file mode 100644
> index 0000000..167815d
> --- /dev/null
> +++ b/x86/vmx.h
> @@ -0,0 +1,406 @@
> +#ifndef __HYPERVISOR_H
> +#define __HYPERVISOR_H
> +
> +#include "libcflat.h"
> +
> +struct vmcs {
> +	u32 revision_id; /* vmcs revision identifier */
> +	u32 abort; /* VMX-abort indicator */
> +	/* VMCS data */
> +	char data[0];
> +};
> +
> +struct regs {
> +	u64 rax;
> +	u64 rcx;
> +	u64 rdx;
> +	u64 rbx;
> +	u64 cr2;
> +	u64 rbp;
> +	u64 rsi;
> +	u64 rdi;
> +	u64 r8;
> +	u64 r9;
> +	u64 r10;
> +	u64 r11;
> +	u64 r12;
> +	u64 r13;
> +	u64 r14;
> +	u64 r15;
> +};
> +
> +static union vmx_basic {
> +	u64 val;
> +	struct {
> +		u32 revision;
> +		u32	size:13,
> +			: 3,
> +			width:1,
> +			dual:1,
> +			type:4,
> +			insouts:1,
> +			ctrl:1;
> +	};
> +} basic;
> +
> +static union vmx_ctrl_pin {
> +	u64 val;
> +	struct {
> +		u32 set, clr;
> +	};
> +} ctrl_pin_rev;
> +
> +static union vmx_ctrl_cpu {
> +	u64 val;
> +	struct {
> +		u32 set, clr;
> +	};
> +} ctrl_cpu_rev[2];
> +
> +static union vmx_ctrl_exit {
> +	u64 val;
> +	struct {
> +		u32 set, clr;
> +	};
> +} ctrl_exit_rev;
> +
> +static union vmx_ctrl_ent {
> +	u64 val;
> +	struct {
> +		u32 set, clr;
> +	};
> +} ctrl_enter_rev;
> +
> +static union vmx_ept_vpid {
> +	u64 val;
> +	struct {
> +		u32:16,
> +			super:2,
> +			: 2,
> +			invept:1,
> +			: 11;
> +		u32	invvpid:1;
> +	};
> +} ept_vpid;
> +
> +struct descr {
> +	u16 limit;
> +	u64 addr;
> +};
> +
> +enum Encoding {
> +	/* 16-Bit Control Fields */
> +	VPID			= 0x0000ul,
> +	/* Posted-interrupt notification vector */
> +	PINV			= 0x0002ul,
> +	/* EPTP index */
> +	EPTP_IDX		= 0x0004ul,
> +
> +	/* 16-Bit Guest State Fields */
> +	GUEST_SEL_ES		= 0x0800ul,
> +	GUEST_SEL_CS		= 0x0802ul,
> +	GUEST_SEL_SS		= 0x0804ul,
> +	GUEST_SEL_DS		= 0x0806ul,
> +	GUEST_SEL_FS		= 0x0808ul,
> +	GUEST_SEL_GS		= 0x080aul,
> +	GUEST_SEL_LDTR		= 0x080cul,
> +	GUEST_SEL_TR		= 0x080eul,
> +	GUEST_INT_STATUS	= 0x0810ul,
> +
> +	/* 16-Bit Host State Fields */
> +	HOST_SEL_ES		= 0x0c00ul,
> +	HOST_SEL_CS		= 0x0c02ul,
> +	HOST_SEL_SS		= 0x0c04ul,
> +	HOST_SEL_DS		= 0x0c06ul,
> +	HOST_SEL_FS		= 0x0c08ul,
> +	HOST_SEL_GS		= 0x0c0aul,
> +	HOST_SEL_TR		= 0x0c0cul,
> +
> +	/* 64-Bit Control Fields */
> +	IO_BITMAP_A		= 0x2000ul,
> +	IO_BITMAP_B		= 0x2002ul,
> +	MSR_BITMAP		= 0x2004ul,
> +	EXIT_MSR_ST_ADDR	= 0x2006ul,
> +	EXIT_MSR_LD_ADDR	= 0x2008ul,
> +	ENTER_MSR_LD_ADDR	= 0x200aul,
> +	VMCS_EXEC_PTR		= 0x200cul,
> +	TSC_OFFSET		= 0x2010ul,
> +	TSC_OFFSET_HI		= 0x2011ul,
> +	APIC_VIRT_ADDR		= 0x2012ul,
> +	APIC_ACCS_ADDR		= 0x2014ul,
> +	EPTP			= 0x201aul,
> +	EPTP_HI			= 0x201bul,
> +
> +	/* 64-Bit Readonly Data Field */
> +	INFO_PHYS_ADDR		= 0x2400ul,
> +
> +	/* 64-Bit Guest State */
> +	VMCS_LINK_PTR		= 0x2800ul,
> +	VMCS_LINK_PTR_HI	= 0x2801ul,
> +	GUEST_DEBUGCTL		= 0x2802ul,
> +	GUEST_DEBUGCTL_HI	= 0x2803ul,
> +	GUEST_EFER		= 0x2806ul,
> +	GUEST_PERF_GLOBAL_CTRL	= 0x2808ul,
> +	GUEST_PDPTE		= 0x280aul,
> +
> +	/* 64-Bit Host State */
> +	HOST_EFER		= 0x2c02ul,
> +	HOST_PERF_GLOBAL_CTRL	= 0x2c04ul,
> +
> +	/* 32-Bit Control Fields */
> +	PIN_CONTROLS		= 0x4000ul,
> +	CPU_EXEC_CTRL0		= 0x4002ul,
> +	EXC_BITMAP		= 0x4004ul,
> +	PF_ERROR_MASK		= 0x4006ul,
> +	PF_ERROR_MATCH		= 0x4008ul,
> +	CR3_TARGET_COUNT	= 0x400aul,
> +	EXI_CONTROLS		= 0x400cul,
> +	EXI_MSR_ST_CNT		= 0x400eul,
> +	EXI_MSR_LD_CNT		= 0x4010ul,
> +	ENT_CONTROLS		= 0x4012ul,
> +	ENT_MSR_LD_CNT		= 0x4014ul,
> +	ENT_INTR_INFO		= 0x4016ul,
> +	ENT_INTR_ERROR		= 0x4018ul,
> +	ENT_INST_LEN		= 0x401aul,
> +	TPR_THRESHOLD		= 0x401cul,
> +	CPU_EXEC_CTRL1		= 0x401eul,
> +
> +	/* 32-Bit R/O Data Fields */
> +	VMX_INST_ERROR		= 0x4400ul,
> +	EXI_REASON		= 0x4402ul,
> +	EXI_INTR_INFO		= 0x4404ul,
> +	EXI_INTR_ERROR		= 0x4406ul,
> +	IDT_VECT_INFO		= 0x4408ul,
> +	IDT_VECT_ERROR		= 0x440aul,
> +	EXI_INST_LEN		= 0x440cul,
> +	EXI_INST_INFO		= 0x440eul,
> +
> +	/* 32-Bit Guest State Fields */
> +	GUEST_LIMIT_ES		= 0x4800ul,
> +	GUEST_LIMIT_CS		= 0x4802ul,
> +	GUEST_LIMIT_SS		= 0x4804ul,
> +	GUEST_LIMIT_DS		= 0x4806ul,
> +	GUEST_LIMIT_FS		= 0x4808ul,
> +	GUEST_LIMIT_GS		= 0x480aul,
> +	GUEST_LIMIT_LDTR	= 0x480cul,
> +	GUEST_LIMIT_TR		= 0x480eul,
> +	GUEST_LIMIT_GDTR	= 0x4810ul,
> +	GUEST_LIMIT_IDTR	= 0x4812ul,
> +	GUEST_AR_ES		= 0x4814ul,
> +	GUEST_AR_CS		= 0x4816ul,
> +	GUEST_AR_SS		= 0x4818ul,
> +	GUEST_AR_DS		= 0x481aul,
> +	GUEST_AR_FS		= 0x481cul,
> +	GUEST_AR_GS		= 0x481eul,
> +	GUEST_AR_LDTR		= 0x4820ul,
> +	GUEST_AR_TR		= 0x4822ul,
> +	GUEST_INTR_STATE	= 0x4824ul,
> +	GUEST_ACTV_STATE	= 0x4826ul,
> +	GUEST_SMBASE		= 0x4828ul,
> +	GUEST_SYSENTER_CS	= 0x482aul,
> +
> +	/* 32-Bit Host State Fields */
> +	HOST_SYSENTER_CS	= 0x4c00ul,
> +
> +	/* Natural-Width Control Fields */
> +	CR0_MASK		= 0x6000ul,
> +	CR4_MASK		= 0x6002ul,
> +	CR0_READ_SHADOW	= 0x6004ul,
> +	CR4_READ_SHADOW	= 0x6006ul,
> +	CR3_TARGET_0		= 0x6008ul,
> +	CR3_TARGET_1		= 0x600aul,
> +	CR3_TARGET_2		= 0x600cul,
> +	CR3_TARGET_3		= 0x600eul,
> +
> +	/* Natural-Width R/O Data Fields */
> +	EXI_QUALIFICATION	= 0x6400ul,
> +	IO_RCX			= 0x6402ul,
> +	IO_RSI			= 0x6404ul,
> +	IO_RDI			= 0x6406ul,
> +	IO_RIP			= 0x6408ul,
> +	GUEST_LINEAR_ADDRESS	= 0x640aul,
> +
> +	/* Natural-Width Guest State Fields */
> +	GUEST_CR0		= 0x6800ul,
> +	GUEST_CR3		= 0x6802ul,
> +	GUEST_CR4		= 0x6804ul,
> +	GUEST_BASE_ES		= 0x6806ul,
> +	GUEST_BASE_CS		= 0x6808ul,
> +	GUEST_BASE_SS		= 0x680aul,
> +	GUEST_BASE_DS		= 0x680cul,
> +	GUEST_BASE_FS		= 0x680eul,
> +	GUEST_BASE_GS		= 0x6810ul,
> +	GUEST_BASE_LDTR		= 0x6812ul,
> +	GUEST_BASE_TR		= 0x6814ul,
> +	GUEST_BASE_GDTR		= 0x6816ul,
> +	GUEST_BASE_IDTR		= 0x6818ul,
> +	GUEST_DR7		= 0x681aul,
> +	GUEST_RSP		= 0x681cul,
> +	GUEST_RIP		= 0x681eul,
> +	GUEST_RFLAGS		= 0x6820ul,
> +	GUEST_PENDING_DEBUG	= 0x6822ul,
> +	GUEST_SYSENTER_ESP	= 0x6824ul,
> +	GUEST_SYSENTER_EIP	= 0x6826ul,
> +
> +	/* Natural-Width Host State Fields */
> +	HOST_CR0		= 0x6c00ul,
> +	HOST_CR3		= 0x6c02ul,
> +	HOST_CR4		= 0x6c04ul,
> +	HOST_BASE_FS		= 0x6c06ul,
> +	HOST_BASE_GS		= 0x6c08ul,
> +	HOST_BASE_TR		= 0x6c0aul,
> +	HOST_BASE_GDTR		= 0x6c0cul,
> +	HOST_BASE_IDTR		= 0x6c0eul,
> +	HOST_SYSENTER_ESP	= 0x6c10ul,
> +	HOST_SYSENTER_EIP	= 0x6c12ul,
> +	HOST_RSP		= 0x6c14ul,
> +	HOST_RIP		= 0x6c16ul
> +};
> +
> +enum Reason {
> +	VMX_EXC_NMI		= 0,
> +	VMX_EXTINT		= 1,
> +	VMX_TRIPLE_FAULT	= 2,
> +	VMX_INIT		= 3,
> +	VMX_SIPI		= 4,
> +	VMX_SMI_IO		= 5,
> +	VMX_SMI_OTHER		= 6,
> +	VMX_INTR_WINDOW		= 7,
> +	VMX_NMI_WINDOW		= 8,
> +	VMX_TASK_SWITCH		= 9,
> +	VMX_CPUID		= 10,
> +	VMX_GETSEC		= 11,
> +	VMX_HLT			= 12,
> +	VMX_INVD		= 13,
> +	VMX_INVLPG		= 14,
> +	VMX_RDPMC		= 15,
> +	VMX_RDTSC		= 16,
> +	VMX_RSM			= 17,
> +	VMX_VMCALL		= 18,
> +	VMX_VMCLEAR		= 19,
> +	VMX_VMLAUNCH		= 20,
> +	VMX_VMPTRLD		= 21,
> +	VMX_VMPTRST		= 22,
> +	VMX_VMREAD		= 23,
> +	VMX_VMRESUME		= 24,
> +	VMX_VMWRITE		= 25,
> +	VMX_VMXOFF		= 26,
> +	VMX_VMXON		= 27,
> +	VMX_CR			= 28,
> +	VMX_DR			= 29,
> +	VMX_IO			= 30,
> +	VMX_RDMSR		= 31,
> +	VMX_WRMSR		= 32,
> +	VMX_FAIL_STATE		= 33,
> +	VMX_FAIL_MSR		= 34,
> +	VMX_MWAIT		= 36,
> +	VMX_MTF			= 37,
> +	VMX_MONITOR		= 39,
> +	VMX_PAUSE		= 40,
> +	VMX_FAIL_MCHECK		= 41,
> +	VMX_TPR_THRESHOLD	= 43,
> +	VMX_APIC_ACCESS		= 44,
> +	VMX_GDTR_IDTR		= 46,
> +	VMX_LDTR_TR		= 47,
> +	VMX_EPT_VIOLATION	= 48,
> +	VMX_EPT_MISCONFIG	= 49,
> +	VMX_INVEPT		= 50,
> +	VMX_PREEMPT		= 52,
> +	VMX_INVVPID		= 53,
> +	VMX_WBINVD		= 54,
> +	VMX_XSETBV		= 55
> +};
> +
> +#define X86_EFLAGS_CF	0x00000001 /* Carry Flag */
> +#define X86_EFLAGS_ZF	0x00000040 /* Zero Flag */
> +
> +enum Ctrl_exi {
> +	EXI_HOST_64             = 1UL << 9,
> +	EXI_LOAD_PERF		= 1UL << 12,
> +	EXI_INTA                = 1UL << 15,
> +	EXI_LOAD_EFER           = 1UL << 21,
> +};
> +
> +enum Ctrl_ent {
> +	ENT_GUEST_64            = 1UL << 9,
> +	ENT_LOAD_EFER           = 1UL << 15,
> +};
> +
> +enum Ctrl_pin {
> +	PIN_EXTINT              = 1ul << 0,
> +	PIN_NMI                 = 1ul << 3,
> +	PIN_VIRT_NMI            = 1ul << 5,
> +};
> +
> +enum Ctrl0 {
> +	CPU_INTR_WINDOW		= 1ul << 2,
> +	CPU_HLT			= 1ul << 7,
> +	CPU_INVLPG		= 1ul << 9,
> +	CPU_CR3_LOAD		= 1ul << 15,
> +	CPU_CR3_STORE		= 1ul << 16,
> +	CPU_TPR_SHADOW		= 1ul << 21,
> +	CPU_NMI_WINDOW		= 1ul << 22,
> +	CPU_IO			= 1ul << 24,
> +	CPU_IO_BITMAP		= 1ul << 25,
> +	CPU_SECONDARY		= 1ul << 31,
> +};
> +
> +enum Ctrl1 {
> +	CPU_EPT			= 1ul << 1,
> +	CPU_VPID		= 1ul << 5,
> +	CPU_URG			= 1ul << 7,
> +};
> +
> +#define SEL_NULL_DESC		0x0
> +#define SEL_KERN_CODE_64	0x8
> +#define SEL_KERN_DATA_64	0x10
> +#define SEL_USER_CODE_64	0x18
> +#define SEL_USER_DATA_64	0x20
> +#define SEL_CODE_32		0x28
> +#define SEL_DATA_32		0x30
> +#define SEL_CODE_16		0x38
> +#define SEL_DATA_16		0x40
> +#define SEL_TSS_RUN		0x48
> +
> +#define SAVE_GPR				\
> +	"xchg %rax, regs\n\t"			\
> +	"xchg %rbx, regs+0x8\n\t"		\
> +	"xchg %rcx, regs+0x10\n\t"		\
> +	"xchg %rdx, regs+0x18\n\t"		\
> +	"xchg %rbp, regs+0x28\n\t"		\
> +	"xchg %rsi, regs+0x30\n\t"		\
> +	"xchg %rdi, regs+0x38\n\t"		\
> +	"xchg %r8, regs+0x40\n\t"		\
> +	"xchg %r9, regs+0x48\n\t"		\
> +	"xchg %r10, regs+0x50\n\t"		\
> +	"xchg %r11, regs+0x58\n\t"		\
> +	"xchg %r12, regs+0x60\n\t"		\
> +	"xchg %r13, regs+0x68\n\t"		\
> +	"xchg %r14, regs+0x70\n\t"		\
> +	"xchg %r15, regs+0x78\n\t"
> +
> +#define LOAD_GPR	SAVE_GPR
> +
> +#define CR0_PE		(1ul << 0)
> +#define CR0_PG		(1ul << 31)
> +#define CR4_VMXE	(1ul << 0)
> +#define CR4_PAE		(1ul << 5)
> +#define CR4_PCIDE	(1ul << 17)
> +
> +#define VMX_IO_SIZE_MASK		0x7
> +#define _VMX_IO_BYTE			1
> +#define _VMX_IO_WORD			2
> +#define _VMX_IO_LONG			3
> +#define VMX_IO_DIRECTION_MASK		(1ul << 3)
> +#define VMX_IO_IN			(1ul << 3)
> +#define VMX_IO_OUT			0
> +#define VMX_IO_STRING			(1ul << 4)
> +#define VMX_IO_REP			(1ul << 5)
> +#define VMX_IO_OPRAND_DX		(1ul << 6)
> +#define VMX_IO_PORT_MASK		0xFFFF0000
> +#define VMX_IO_PORT_SHIFT		16
> +
> +#define TEST_VMRESUME		0x1001
> +
> +#endif
> +
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm-unit-tests : The first version of VMX nested test case
  2013-07-16 10:28 ` Paolo Bonzini
@ 2013-07-16 11:47   ` Arthur Chunqi Li
  2013-07-16 11:58     ` Paolo Bonzini
  2013-07-16 15:20   ` Gleb Natapov
  1 sibling, 1 reply; 14+ messages in thread
From: Arthur Chunqi Li @ 2013-07-16 11:47 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: kvm, Jan Kiszka, Gleb Natapov

Hi Paolo,

On Tue, Jul 16, 2013 at 6:28 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 16/07/2013 11:27, Arthur Chunqi Li ha scritto:
>> This is the first version for VMX nested environment test case. It
>> contains the basic VMX instructions test cases, including VMXON/
>> VMXOFF/VMXPTRLD/VMXPTRST/VMCLEAR/VMLAUNCH/VMRESUME/VMCALL. This patch
>> also tests the basic execution routine in VMX nested environment and
>> let the VM print "Hello World" to inform its successfully run.
>>
>> New files added:
>> x86/vmx.h : contains all VMX related macro declerations
>> x86/vmx.c : main file for VMX nested test case
>>
>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>> ---
>>  config-x86-common.mak |    2 +
>>  config-x86_64.mak     |    1 +
>>  lib/x86/msr.h         |    5 +
>>  x86/cstart64.S        |    4 +
>>  x86/unittests.cfg     |    6 +
>>  x86/vmx.c             |  568 +++++++++++++++++++++++++++++++++++++++++++++++++
>>  x86/vmx.h             |  406 +++++++++++++++++++++++++++++++++++
>>  7 files changed, 992 insertions(+)
>>  create mode 100644 x86/vmx.c
>>  create mode 100644 x86/vmx.h
>>
>> diff --git a/config-x86-common.mak b/config-x86-common.mak
>> index 455032b..34a41e1 100644
>> --- a/config-x86-common.mak
>> +++ b/config-x86-common.mak
>> @@ -101,6 +101,8 @@ $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o
>>
>>  $(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o
>>
>> +$(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o
>> +
>>  arch_clean:
>>       $(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \
>>       $(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o
>> diff --git a/config-x86_64.mak b/config-x86_64.mak
>> index 4e525f5..bb8ee89 100644
>> --- a/config-x86_64.mak
>> +++ b/config-x86_64.mak
>> @@ -9,5 +9,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
>>         $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \
>>         $(TEST_DIR)/pcid.flat
>>  tests += $(TEST_DIR)/svm.flat
>> +tests += $(TEST_DIR)/vmx.flat
>>
>>  include config-x86-common.mak
>> diff --git a/lib/x86/msr.h b/lib/x86/msr.h
>> index 509a421..281255a 100644
>> --- a/lib/x86/msr.h
>> +++ b/lib/x86/msr.h
>> @@ -396,6 +396,11 @@
>>  #define MSR_IA32_VMX_VMCS_ENUM          0x0000048a
>>  #define MSR_IA32_VMX_PROCBASED_CTLS2    0x0000048b
>>  #define MSR_IA32_VMX_EPT_VPID_CAP       0x0000048c
>> +#define MSR_IA32_VMX_TRUE_PIN                0x0000048d
>> +#define MSR_IA32_VMX_TRUE_PROC               0x0000048e
>> +#define MSR_IA32_VMX_TRUE_EXIT               0x0000048f
>> +#define MSR_IA32_VMX_TRUE_ENTRY              0x00000490
>> +
>>
>>  /* AMD-V MSRs */
>>
>> diff --git a/x86/cstart64.S b/x86/cstart64.S
>> index 24df5f8..0fe76da 100644
>> --- a/x86/cstart64.S
>> +++ b/x86/cstart64.S
>> @@ -4,6 +4,10 @@
>>  .globl boot_idt
>>  boot_idt = 0
>>
>> +.globl idt_descr
>> +.globl tss_descr
>> +.globl gdt64_desc
>> +
>>  ipi_vector = 0x20
>>
>>  max_cpus = 64
>> diff --git a/x86/unittests.cfg b/x86/unittests.cfg
>> index bc9643e..e846739 100644
>> --- a/x86/unittests.cfg
>> +++ b/x86/unittests.cfg
>> @@ -149,3 +149,9 @@ extra_params = --append "10000000 `date +%s`"
>>  file = pcid.flat
>>  extra_params = -cpu qemu64,+pcid
>>  arch = x86_64
>> +
>> +[vmx]
>> +file = vmx.flat
>> +extra_params = -cpu Nehalem,+vmx
>
> Should this use "-cpu host" instead? (Or "-cpu host,+vmx", I don't
> remember).
>
>> +arch = x86_64
>> +
>> diff --git a/x86/vmx.c b/x86/vmx.c
>> new file mode 100644
>> index 0000000..0435746
>> --- /dev/null
>> +++ b/x86/vmx.c
>> @@ -0,0 +1,568 @@
>> +#include "libcflat.h"
>> +#include "processor.h"
>> +#include "vm.h"
>> +#include "desc.h"
>> +#include "vmx.h"
>> +#include "msr.h"
>> +#include "smp.h"
>> +#include "io.h"
>> +
>> +
>> +int fails = 0, tests = 0;
>> +u32 *vmxon_region;
>> +struct vmcs *vmcs_root;
>> +void *io_bmp1, *io_bmp2;
>> +void *msr_bmp;
>> +u32 vpid_ctr;
>> +char *guest_stack, *host_stack;
>> +char *guest_syscall_stack, *host_syscall_stack;
>> +u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2];
>> +ulong fix_cr0_set, fix_cr0_clr;
>> +ulong fix_cr4_set, fix_cr4_clr;
>> +struct regs regs;
>> +
>> +extern u64 gdt64_desc[];
>> +extern u64 idt_descr[];
>> +extern u64 tss_descr[];
>> +extern void *entry_vmx;
>> +extern void *entry_sysenter;
>> +extern void *entry_guest;
>> +
>> +void report(const char *name, int result)
>> +{
>> +     ++tests;
>> +     if (result)
>> +             printf("PASS: %s\n", name);
>> +     else {
>> +             printf("FAIL: %s\n", name);
>> +             ++fails;
>> +     }
>> +}
>> +
>> +inline u64 get_rflags(void)
>> +{
>> +     u64 r;
>> +     asm volatile("pushf; pop %0\n\t" : "=q"(r) : : "cc");
>> +     return r;
>> +}
>> +
>> +inline void set_rflags(u64 r)
>> +{
>> +     asm volatile("push %0; popf\n\t" : : "q"(r) : "cc");
>> +}
>> +
>> +int vmcs_clear(struct vmcs *vmcs)
>> +{
>> +     bool ret;
>> +     asm volatile ("vmclear %1; seta %0" : "=q" (ret) : "m" (vmcs) : "cc");
>
> You can use "setbe", it's clearer and avoids the ! in the return statement.
>
> We should later add tests for failure conditions, since failing to
> detect errors could give rise to L2->L1 attack vectors.  When we do so,
> we will have to distinguish CF from ZF.
>
>
>> +     return !ret;
>> +}
>> +
>> +u64 vmcs_read(enum Encoding enc)
>> +{
>> +     u64 val;
>> +     asm volatile ("vmread %1, %0" : "=rm" (val) : "r" ((u64)enc) : "cc");
>> +     return val;
>> +}
>> +
>> +int vmcs_write(enum Encoding enc, u64 val)
>> +{
>> +     bool ret;
>> +     asm volatile ("vmwrite %1, %2; seta %0"
>> +             : "=q"(ret) : "rm" (val), "r" ((u64)enc) : "cc");
>> +     return !ret;
>> +}
>> +
>> +int make_vmcs_current(struct vmcs *vmcs)
>> +{
>> +     bool ret;
>> +
>> +     asm volatile ("vmptrld %1; seta %0" : "=q" (ret) : "m" (vmcs) : "cc");
>> +     return !ret;
>> +}
>> +
>> +int save_vmcs(struct vmcs **vmcs)
>> +{
>> +     bool ret;
>> +
>> +     asm volatile ("vmptrst %1; seta %0" : "=q" (ret) : "m" (*vmcs) : "cc");
>> +     return !ret;
>> +}
>> +
>> +/* entry_vmx */
>> +asm(
>> +     ".align 4, 0x90\n\t"
>> +     ".globl entry_vmx\n\t"
>> +     "entry_vmx:\n\t"
>> +     SAVE_GPR
>> +     "       call    vmx_handler\n\t"
>> +     LOAD_GPR
>> +     "       vmresume\n\t"
>> +);
>> +
>> +/* entry_sysenter */
>> +asm(
>> +     ".align 4, 0x90\n\t"
>> +     ".globl entry_sysenter\n\t"
>> +     "entry_sysenter:\n\t"
>> +     SAVE_GPR
>> +     "       and     $0xf, %rax\n\t"
>> +     "       push    %rax\n\t"
>> +     "       call    syscall_handler\n\t"
>> +);
>> +
>> +void syscall_handler(u64 syscall_no)
>> +{
>> +     printf("Here in syscall_handler, syscall_no = %d\n", syscall_no);
>> +}
>> +
>> +void vmx_run()
>> +{
>> +     bool ret;
>> +     printf("Now run vm.\n\n");
>> +     asm volatile("vmlaunch;seta %0\n\t" : "=m"(ret));
>> +     printf("VMLAUNCH error, ret=%d\n", ret);
>> +}
>> +
>> +void vmx_resume()
>> +{
>> +     asm volatile(LOAD_GPR
>> +             "vmresume\n\t");
>> +     /* VMRESUME fail if reach here */
>> +}
>> +
>> +void print_vmexit_info()
>> +{
>> +     u64 guest_rip, guest_rsp;
>> +     ulong reason = vmcs_read(EXI_REASON) & 0xff;
>> +     ulong exit_qual = vmcs_read(EXI_QUALIFICATION);
>> +     guest_rip = vmcs_read(GUEST_RIP);
>> +     guest_rsp = vmcs_read(GUEST_RSP);
>> +     printf("VMEXIT info:\n");
>> +     printf("\tvmexit reason = %d\n", reason);
>> +     printf("\texit qualification = 0x%x\n", exit_qual);
>> +     printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1);
>> +     printf("\tguest_rip = 0x%llx\n", guest_rip);
>> +     printf("\tRAX=0x%llx    RBX=0x%llx    RCX=0x%llx    RDX=0x%llx\n",
>> +             regs.rax, regs.rbx, regs.rcx, regs.rdx);
>> +     printf("\tRSP=0x%llx    RBP=0x%llx    RSI=0x%llx    RDI=0x%llx\n",
>> +             guest_rsp, regs.rbp, regs.rsi, regs.rdi);
>> +     printf("\tR8 =0x%llx    R9 =0x%llx    R10=0x%llx    R11=0x%llx\n",
>> +             regs.r8, regs.r9, regs.r10, regs.r11);
>> +     printf("\tR12=0x%llx    R13=0x%llx    R14=0x%llx    R15=0x%llx\n",
>> +             regs.r12, regs.r13, regs.r14, regs.r15);
>> +}
>> +
>> +void test_vmclear(void)
>> +{
>> +     u64 rflags;
>> +
>> +     rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
>> +     set_rflags(rflags);
>> +     report("test vmclear", vmcs_clear(vmcs_root) == 0);
>> +}
>> +
>> +void test_vmxoff(void)
>> +{
>> +     bool ret;
>> +     u64 rflags;
>> +
>> +     rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
>> +     set_rflags(rflags);
>> +     asm volatile("vmxoff; seta %0\n\t" : "=q"(ret) : : "cc");
>> +     report("test vmxoff", ret);
>> +}
>> +
>> +void vmx_exit(void)
>> +{
>> +     test_vmxoff();
>> +     printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
>> +     exit(fails ? -1 : 0);
>> +}
>
> Can you try to jump back to main, and do test_vmxoff there?  This will
> avoid having to write our tests in callback style, which is a pain.
> Basically something similar to setjmp/longjmp.  In main:
>
>         if (setjmp(jmpbuf) == 0) {
>                 vmx_run();
>                 /* Should not reach here */
>                 report("test vmlaunch", 0);
>         }
>         test_vmxoff();
>
> exit:
>         printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
>         return fails ? 1 : 0;
>
> In vmx_handler:
>
>         case VMX_HLT:
>                 printf("\nVM exit.\n");
>                 longjmp(jmpbuf, 1);
>
setjmp and longjmp is not implemented in our test environment, and
these two functions are highly depend on architecture. Do you think we
need to write a general code for both 32bit and 64bit, or just write
specific one just for this test case?
>> +void vmx_handler()
>> +{
>> +     u64 guest_rip;
>> +     ulong reason = vmcs_read(EXI_REASON) & 0xff;
>> +
>> +     if ((read_cr4() & CR4_PAE) && (read_cr0() & CR0_PG)
>> +             && !(rdmsr(MSR_EFER) & EFER_LMA))
>> +             printf("ERROR : PDPTEs should be checked\n");
>> +
>> +     guest_rip = vmcs_read(GUEST_RIP);
>> +
>> +     switch (reason) {
>> +     case VMX_VMCALL:
>> +             switch (regs.rax) {
>> +             case TEST_VMRESUME:
>> +                     regs.rax = 0xFFFF;
>> +                     break;
>> +             default:
>> +                     printf("ERROR : Invalid VMCALL param : %d\n", regs.rax);
>> +             }
>> +             vmcs_write(GUEST_RIP, guest_rip + 3);
>> +             goto vmx_resume;
>> +     case VMX_IO:
>> +             print_vmexit_info();
>> +             break;
>> +     case VMX_HLT:
>> +             printf("\nVM exit.\n");
>> +             vmx_exit();
>> +             /* Should not reach here */
>> +             goto vmx_exit;
>> +     case VMX_EXC_NMI:
>> +     case VMX_EXTINT:
>> +     case VMX_INVLPG:
>> +     case VMX_CR:
>> +     case VMX_EPT_VIOLATION:
>> +     default:
>> +             break;
>> +     }
>> +     printf("ERROR : Unhandled vmx exit.\n");
>> +     print_vmexit_info();
>> +vmx_exit:
>> +     exit(-1);
>> +vmx_resume:
>> +     vmx_resume();
>> +     /* Should not reach here */
>> +     exit(-1);
>> +}
>> +
>> +void test_vmresume()
>> +{
>> +     u64 rax;
>> +     u64 rsp, resume_rsp;
>> +
>> +     rax = 0;
>> +     asm volatile("mov %%rsp, %0\n\t" : "=r"(rsp));
>> +     asm volatile("mov %2, %%rax\n\t"
>> +             "vmcall\n\t"
>> +             "mov %%rax, %0\n\t"
>> +             "mov %%rsp, %1\n\t"
>> +             : "=r"(rax), "=r"(resume_rsp)
>> +             : "g"(TEST_VMRESUME));
>> +     report("test vmresume", (rax == 0xFFFF) && (rsp == resume_rsp));
>> +}
>> +
>> +/* entry_guest */
>> +asm(
>> +     ".align 4, 0x90\n\t"
>> +     ".globl entry_guest\n\t"
>> +     "entry_guest:\n\t"
>> +     "       call    guest_main\n\t"
>> +     "       hlt\n\t"
>> +);
>> +
>> +void guest_main(void)
>> +{
>> +     /* If reach here, VMLAUNCH runs OK */
>> +     report("test vmlaunch", 1);
>> +     printf("cr0 in guest = %llx\n", read_cr0());
>> +     printf("cr3 in guest = %llx\n", read_cr3());
>> +     printf("cr4 in guest = %llx\n", read_cr4());
>> +     printf("\nHello World!\n");
>> +     test_vmresume();
>> +}
>> +
>> +void init_vmcs_ctrl(void)
>> +{
>> +     /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
>> +     /* 26.2.1.1 */
>> +     vmcs_write(PIN_CONTROLS, ctrl_pin);
>> +     /* Disable VMEXIT of IO instruction */
>> +     vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]);
>> +     if (ctrl_cpu_rev[0].set & CPU_SECONDARY) {
>> +             ctrl_cpu[1] |= ctrl_cpu_rev[1].set & ctrl_cpu_rev[1].clr;
>> +             vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]);
>> +     }
>> +     vmcs_write(CR3_TARGET_COUNT, 0);
>> +     io_bmp1 = alloc_page();
>> +     io_bmp2 = alloc_page();
>> +     memset(io_bmp1, 0, PAGE_SIZE);
>> +     memset(io_bmp2, 0, PAGE_SIZE);
>> +     vmcs_write(IO_BITMAP_A, (u64)io_bmp1);
>> +     vmcs_write(IO_BITMAP_B, (u64)io_bmp2);
>> +     msr_bmp = alloc_page();
>> +     memset(msr_bmp, 0, PAGE_SIZE);
>> +     vmcs_write(MSR_BITMAP, (u64)msr_bmp);
>> +     vmcs_write(VPID, ++vpid_ctr);
>> +}
>> +
>> +void init_vmcs_host(void)
>> +{
>> +     /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
>> +     /* 26.2.1.2 */
>> +     vmcs_write(HOST_EFER, rdmsr(MSR_EFER));
>> +
>> +     /* 26.2.1.3 */
>> +     vmcs_write(ENT_CONTROLS, ctrl_enter);
>> +     vmcs_write(EXI_CONTROLS, ctrl_exit);
>> +
>> +     /* 26.2.2 */
>> +     vmcs_write(HOST_CR0, read_cr0());
>> +     vmcs_write(HOST_CR3, read_cr3());
>> +     vmcs_write(HOST_CR4, read_cr4());
>> +     vmcs_write(HOST_SYSENTER_ESP,
>> +             (u64)(host_syscall_stack + PAGE_SIZE - 1));
>> +     vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
>> +     vmcs_write(HOST_SYSENTER_CS,  SEL_KERN_CODE_64);
>> +
>> +     /* 26.2.3 */
>> +     vmcs_write(HOST_SEL_CS, SEL_KERN_CODE_64);
>> +     vmcs_write(HOST_SEL_SS, SEL_KERN_DATA_64);
>> +     vmcs_write(HOST_SEL_DS, SEL_KERN_DATA_64);
>> +     vmcs_write(HOST_SEL_ES, SEL_KERN_DATA_64);
>> +     vmcs_write(HOST_SEL_FS, SEL_KERN_DATA_64);
>> +     vmcs_write(HOST_SEL_GS, SEL_KERN_DATA_64);
>> +     vmcs_write(HOST_SEL_TR, SEL_TSS_RUN);
>> +     vmcs_write(HOST_BASE_TR,   (u64)tss_descr);
>> +     vmcs_write(HOST_BASE_GDTR, (u64)gdt64_desc);
>> +     vmcs_write(HOST_BASE_IDTR, (u64)idt_descr);
>> +     vmcs_write(HOST_BASE_FS, 0);
>> +     vmcs_write(HOST_BASE_GS, 0);
>> +
>> +     /* Set other vmcs area */
>> +     vmcs_write(PF_ERROR_MASK, 0);
>> +     vmcs_write(PF_ERROR_MATCH, 0);
>> +     vmcs_write(VMCS_LINK_PTR, ~0ul);
>> +     vmcs_write(VMCS_LINK_PTR_HI, ~0ul);
>> +     vmcs_write(HOST_RSP, (u64)(host_stack + PAGE_SIZE - 1));
>> +     vmcs_write(HOST_RIP, (u64)(&entry_vmx));
>> +}
>> +
>> +void init_vmcs_guest(void)
>> +{
>> +     /* 26.3 CHECKING AND LOADING GUEST STATE */
>> +     ulong guest_cr0, guest_cr4, guest_cr3;
>> +     /* 26.3.1.1 */
>> +     guest_cr0 = read_cr0();
>> +     guest_cr4 = read_cr4();
>> +     guest_cr3 = read_cr3();
>> +     if (ctrl_enter & ENT_GUEST_64) {
>> +             guest_cr0 |= CR0_PG;
>> +             guest_cr4 |= CR4_PAE;
>> +     }
>> +     if ((ctrl_enter & ENT_GUEST_64) == 0)
>> +             guest_cr4 &= (~CR4_PCIDE);
>> +     if (guest_cr0 & CR0_PG)
>> +             guest_cr0 |= CR0_PE;
>> +     vmcs_write(GUEST_CR0, guest_cr0);
>> +     vmcs_write(GUEST_CR3, guest_cr3);
>> +     vmcs_write(GUEST_CR4, guest_cr4);
>> +     vmcs_write(GUEST_SYSENTER_CS,  SEL_KERN_CODE_64);
>> +     vmcs_write(GUEST_SYSENTER_ESP,
>> +             (u64)(guest_syscall_stack + PAGE_SIZE - 1));
>> +     vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter));
>> +     vmcs_write(GUEST_DR7, 0);
>> +     vmcs_write(GUEST_EFER, rdmsr(MSR_EFER));
>> +
>> +     /* 26.3.1.2 */
>> +     vmcs_write(GUEST_SEL_CS, SEL_KERN_CODE_64);
>> +     vmcs_write(GUEST_SEL_SS, SEL_KERN_DATA_64);
>> +     vmcs_write(GUEST_SEL_DS, SEL_KERN_DATA_64);
>> +     vmcs_write(GUEST_SEL_ES, SEL_KERN_DATA_64);
>> +     vmcs_write(GUEST_SEL_FS, SEL_KERN_DATA_64);
>> +     vmcs_write(GUEST_SEL_GS, SEL_KERN_DATA_64);
>> +     vmcs_write(GUEST_SEL_TR, SEL_TSS_RUN);
>> +     vmcs_write(GUEST_SEL_LDTR, 0);
>> +
>> +     vmcs_write(GUEST_BASE_CS, 0);
>> +     vmcs_write(GUEST_BASE_ES, 0);
>> +     vmcs_write(GUEST_BASE_SS, 0);
>> +     vmcs_write(GUEST_BASE_DS, 0);
>> +     vmcs_write(GUEST_BASE_FS, 0);
>> +     vmcs_write(GUEST_BASE_GS, 0);
>> +     vmcs_write(GUEST_BASE_TR,   (u64)tss_descr);
>> +     vmcs_write(GUEST_BASE_LDTR, 0);
>> +
>> +     vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF);
>> +     vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF);
>> +     vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF);
>> +     vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF);
>> +     vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF);
>> +     vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF);
>> +     vmcs_write(GUEST_LIMIT_LDTR, 0xffff);
>> +     vmcs_write(GUEST_LIMIT_TR, ((struct descr *)tss_descr)->limit);
>> +
>> +     vmcs_write(GUEST_AR_CS, 0xa09b);
>> +     vmcs_write(GUEST_AR_DS, 0xc093);
>> +     vmcs_write(GUEST_AR_ES, 0xc093);
>> +     vmcs_write(GUEST_AR_FS, 0xc093);
>> +     vmcs_write(GUEST_AR_GS, 0xc093);
>> +     vmcs_write(GUEST_AR_SS, 0xc093);
>> +     vmcs_write(GUEST_AR_LDTR, 0x82);
>> +     vmcs_write(GUEST_AR_TR, 0x8b);
>> +
>> +     /* 26.3.1.3 */
>> +     vmcs_write(GUEST_BASE_GDTR, (u64)gdt64_desc);
>> +     vmcs_write(GUEST_BASE_IDTR, (u64)idt_descr);
>> +     vmcs_write(GUEST_LIMIT_GDTR,
>> +             ((struct descr *)gdt64_desc)->limit & 0xffff);
>> +     vmcs_write(GUEST_LIMIT_IDTR,
>> +             ((struct descr *)idt_descr)->limit & 0xffff);
>> +
>> +     /* 26.3.1.4 */
>> +     vmcs_write(GUEST_RIP, (u64)(&entry_guest));
>> +     vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1));
>> +     vmcs_write(GUEST_RFLAGS, 0x2);
>> +
>> +     /* 26.3.1.5 */
>> +     vmcs_write(GUEST_ACTV_STATE, 0);
>> +     vmcs_write(GUEST_INTR_STATE, 0);
>> +}
>> +
>> +int init_vmcs(struct vmcs **vmcs)
>> +{
>> +     *vmcs = alloc_page();
>> +     memset(*vmcs, 0, PAGE_SIZE);
>> +     (*vmcs)->revision_id = basic.revision;
>> +     /* vmclear first to init vmcs */
>> +     if (vmcs_clear(*vmcs)) {
>> +             printf("%s : vmcs_clear error\n", __func__);
>> +             return 1;
>> +     }
>> +
>> +     if (make_vmcs_current(*vmcs)) {
>> +             printf("%s : make_vmcs_current error\n", __func__);
>> +             return 1;
>> +     }
>> +
>> +     /* All settings to pin/exit/enter/cpu
>> +        control fields should place here */
>
> should be placed here
>
>> +     ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
>> +     ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
>> +     ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
>> +     ctrl_cpu[0] |= CPU_HLT;
>> +     /* DIsable IO instruction VMEXIT now */
>> +     ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
>> +     ctrl_cpu[1] = 0;
>> +
>> +     ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr;
>> +     ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr;
>> +     ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr;
>> +     ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr;
>> +
>> +     init_vmcs_ctrl();
>> +     init_vmcs_host();
>> +     init_vmcs_guest();
>> +     return 0;
>> +}
>> +
>> +void init_vmx(void)
>> +{
>> +     vmxon_region = alloc_page();
>> +     memset(vmxon_region, 0, PAGE_SIZE);
>> +
>> +     fix_cr0_set =  rdmsr(MSR_IA32_VMX_CR0_FIXED0);
>> +     fix_cr0_clr =  rdmsr(MSR_IA32_VMX_CR0_FIXED1);
>> +     fix_cr4_set =  rdmsr(MSR_IA32_VMX_CR4_FIXED0);
>> +     fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
>> +     basic.val = rdmsr(MSR_IA32_VMX_BASIC);
>> +     ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
>> +                     : MSR_IA32_VMX_PINBASED_CTLS);
>> +     ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
>> +                     : MSR_IA32_VMX_EXIT_CTLS);
>> +     ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
>> +                     : MSR_IA32_VMX_ENTRY_CTLS);
>> +     ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
>> +                     : MSR_IA32_VMX_PROCBASED_CTLS);
>> +     if (ctrl_cpu_rev[0].set & CPU_SECONDARY)
>> +             ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
>> +     if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID)
>> +             ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
>> +
>> +     write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
>> +     write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | CR4_VMXE);
>> +
>> +     *vmxon_region = basic.revision;
>> +
>> +     guest_stack = alloc_page();
>> +     memset(guest_stack, 0, PAGE_SIZE);
>> +     guest_syscall_stack = alloc_page();
>> +     memset(guest_syscall_stack, 0, PAGE_SIZE);
>> +     host_stack = alloc_page();
>> +     memset(host_stack, 0, PAGE_SIZE);
>> +     host_syscall_stack = alloc_page();
>> +     memset(host_syscall_stack, 0, PAGE_SIZE);
>> +}
>> +
>> +int test_vmx_capability(void)
>> +{
>> +     struct cpuid r;
>> +     u64 ret1, ret2;
>> +     r = cpuid(1);
>> +     ret1 = ((r.c) >> 5) & 1;
>> +     ret2 = ((rdmsr(MSR_IA32_FEATURE_CONTROL) & 0x5) == 0x5);
>> +     report("test vmx capability", ret1 & ret2);
>> +     /* TODO: Fix here after patches are accepted */
>
> It's fine to include a failing test.
>
>> +     return 0;
>> +     return !(ret1 & ret2);
>> +}
>> +
>> +int test_vmxon(void)
>> +{
>> +     bool ret;
>> +     u64 rflags;
>> +
>> +     rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
>> +     set_rflags(rflags);
>> +     asm volatile ("vmxon %1; seta %0\n\t"
>> +             : "=q"(ret) : "m"(vmxon_region) : "cc");
>> +     report("test vmxon", ret);
>> +     /* TODO: Change here after bug fixed */
>> +     return 0;
>> +     /* return !ret; */
>> +}
>> +
>> +void test_vmptrld(void)
>> +{
>> +     u64 rflags;
>> +     struct vmcs *vmcs;
>> +
>> +     vmcs = alloc_page();
>> +     vmcs->revision_id = basic.revision;
>> +     rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
>> +     set_rflags(rflags);
>> +     report("test vmptrld", make_vmcs_current(vmcs) == 0);
>> +}
>> +
>> +void test_vmptrst(void)
>> +{
>> +     u64 rflags;
>> +     int ret;
>> +     struct vmcs *vmcs1, *vmcs2;
>> +
>> +     vmcs1 = alloc_page();
>> +     memset(vmcs1, 0, PAGE_SIZE);
>> +     init_vmcs(&vmcs1);
>> +     rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
>> +     set_rflags(rflags);
>> +     ret = save_vmcs(&vmcs2);
>> +     report("test vmptrst", (!ret) && (vmcs1 == vmcs2));
>> +}
>> +
>> +int main(void)
>> +{
>> +     setup_vm();
>> +     setup_idt();
>> +
>> +     if (test_vmx_capability() != 0) {
>> +             printf("ERROR : vmx not supported, check +vmx option\n");
>> +             goto exit;
>> +     }
>> +     init_vmx();
>> +     if (test_vmxon() != 0)
>> +             goto exit;
>> +     test_vmptrld();
>> +     test_vmclear();
>> +     test_vmptrst();
>> +     init_vmcs(&vmcs_root);
>> +
>> +     vmx_run();
>> +     /* Should not reach here */
>> +     report("test vmlaunch", 0);
>> +
>> +exit:
>> +     printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
>> +     return fails ? 1 : 0;
>> +}
>> diff --git a/x86/vmx.h b/x86/vmx.h
>> new file mode 100644
>> index 0000000..167815d
>> --- /dev/null
>> +++ b/x86/vmx.h
>> @@ -0,0 +1,406 @@
>> +#ifndef __HYPERVISOR_H
>> +#define __HYPERVISOR_H
>> +
>> +#include "libcflat.h"
>> +
>> +struct vmcs {
>> +     u32 revision_id; /* vmcs revision identifier */
>> +     u32 abort; /* VMX-abort indicator */
>> +     /* VMCS data */
>> +     char data[0];
>> +};
>> +
>> +struct regs {
>> +     u64 rax;
>> +     u64 rcx;
>> +     u64 rdx;
>> +     u64 rbx;
>> +     u64 cr2;
>> +     u64 rbp;
>> +     u64 rsi;
>> +     u64 rdi;
>> +     u64 r8;
>> +     u64 r9;
>> +     u64 r10;
>> +     u64 r11;
>> +     u64 r12;
>> +     u64 r13;
>> +     u64 r14;
>> +     u64 r15;
>> +};
>> +
>> +static union vmx_basic {
>> +     u64 val;
>> +     struct {
>> +             u32 revision;
>> +             u32     size:13,
>> +                     : 3,
>> +                     width:1,
>> +                     dual:1,
>> +                     type:4,
>> +                     insouts:1,
>> +                     ctrl:1;
>> +     };
>> +} basic;
>> +
>> +static union vmx_ctrl_pin {
>> +     u64 val;
>> +     struct {
>> +             u32 set, clr;
>> +     };
>> +} ctrl_pin_rev;
>> +
>> +static union vmx_ctrl_cpu {
>> +     u64 val;
>> +     struct {
>> +             u32 set, clr;
>> +     };
>> +} ctrl_cpu_rev[2];
>> +
>> +static union vmx_ctrl_exit {
>> +     u64 val;
>> +     struct {
>> +             u32 set, clr;
>> +     };
>> +} ctrl_exit_rev;
>> +
>> +static union vmx_ctrl_ent {
>> +     u64 val;
>> +     struct {
>> +             u32 set, clr;
>> +     };
>> +} ctrl_enter_rev;
>> +
>> +static union vmx_ept_vpid {
>> +     u64 val;
>> +     struct {
>> +             u32:16,
>> +                     super:2,
>> +                     : 2,
>> +                     invept:1,
>> +                     : 11;
>> +             u32     invvpid:1;
>> +     };
>> +} ept_vpid;
>> +
>> +struct descr {
>> +     u16 limit;
>> +     u64 addr;
>> +};
>> +
>> +enum Encoding {
>> +     /* 16-Bit Control Fields */
>> +     VPID                    = 0x0000ul,
>> +     /* Posted-interrupt notification vector */
>> +     PINV                    = 0x0002ul,
>> +     /* EPTP index */
>> +     EPTP_IDX                = 0x0004ul,
>> +
>> +     /* 16-Bit Guest State Fields */
>> +     GUEST_SEL_ES            = 0x0800ul,
>> +     GUEST_SEL_CS            = 0x0802ul,
>> +     GUEST_SEL_SS            = 0x0804ul,
>> +     GUEST_SEL_DS            = 0x0806ul,
>> +     GUEST_SEL_FS            = 0x0808ul,
>> +     GUEST_SEL_GS            = 0x080aul,
>> +     GUEST_SEL_LDTR          = 0x080cul,
>> +     GUEST_SEL_TR            = 0x080eul,
>> +     GUEST_INT_STATUS        = 0x0810ul,
>> +
>> +     /* 16-Bit Host State Fields */
>> +     HOST_SEL_ES             = 0x0c00ul,
>> +     HOST_SEL_CS             = 0x0c02ul,
>> +     HOST_SEL_SS             = 0x0c04ul,
>> +     HOST_SEL_DS             = 0x0c06ul,
>> +     HOST_SEL_FS             = 0x0c08ul,
>> +     HOST_SEL_GS             = 0x0c0aul,
>> +     HOST_SEL_TR             = 0x0c0cul,
>> +
>> +     /* 64-Bit Control Fields */
>> +     IO_BITMAP_A             = 0x2000ul,
>> +     IO_BITMAP_B             = 0x2002ul,
>> +     MSR_BITMAP              = 0x2004ul,
>> +     EXIT_MSR_ST_ADDR        = 0x2006ul,
>> +     EXIT_MSR_LD_ADDR        = 0x2008ul,
>> +     ENTER_MSR_LD_ADDR       = 0x200aul,
>> +     VMCS_EXEC_PTR           = 0x200cul,
>> +     TSC_OFFSET              = 0x2010ul,
>> +     TSC_OFFSET_HI           = 0x2011ul,
>> +     APIC_VIRT_ADDR          = 0x2012ul,
>> +     APIC_ACCS_ADDR          = 0x2014ul,
>> +     EPTP                    = 0x201aul,
>> +     EPTP_HI                 = 0x201bul,
>> +
>> +     /* 64-Bit Readonly Data Field */
>> +     INFO_PHYS_ADDR          = 0x2400ul,
>> +
>> +     /* 64-Bit Guest State */
>> +     VMCS_LINK_PTR           = 0x2800ul,
>> +     VMCS_LINK_PTR_HI        = 0x2801ul,
>> +     GUEST_DEBUGCTL          = 0x2802ul,
>> +     GUEST_DEBUGCTL_HI       = 0x2803ul,
>> +     GUEST_EFER              = 0x2806ul,
>> +     GUEST_PERF_GLOBAL_CTRL  = 0x2808ul,
>> +     GUEST_PDPTE             = 0x280aul,
>> +
>> +     /* 64-Bit Host State */
>> +     HOST_EFER               = 0x2c02ul,
>> +     HOST_PERF_GLOBAL_CTRL   = 0x2c04ul,
>> +
>> +     /* 32-Bit Control Fields */
>> +     PIN_CONTROLS            = 0x4000ul,
>> +     CPU_EXEC_CTRL0          = 0x4002ul,
>> +     EXC_BITMAP              = 0x4004ul,
>> +     PF_ERROR_MASK           = 0x4006ul,
>> +     PF_ERROR_MATCH          = 0x4008ul,
>> +     CR3_TARGET_COUNT        = 0x400aul,
>> +     EXI_CONTROLS            = 0x400cul,
>> +     EXI_MSR_ST_CNT          = 0x400eul,
>> +     EXI_MSR_LD_CNT          = 0x4010ul,
>> +     ENT_CONTROLS            = 0x4012ul,
>> +     ENT_MSR_LD_CNT          = 0x4014ul,
>> +     ENT_INTR_INFO           = 0x4016ul,
>> +     ENT_INTR_ERROR          = 0x4018ul,
>> +     ENT_INST_LEN            = 0x401aul,
>> +     TPR_THRESHOLD           = 0x401cul,
>> +     CPU_EXEC_CTRL1          = 0x401eul,
>> +
>> +     /* 32-Bit R/O Data Fields */
>> +     VMX_INST_ERROR          = 0x4400ul,
>> +     EXI_REASON              = 0x4402ul,
>> +     EXI_INTR_INFO           = 0x4404ul,
>> +     EXI_INTR_ERROR          = 0x4406ul,
>> +     IDT_VECT_INFO           = 0x4408ul,
>> +     IDT_VECT_ERROR          = 0x440aul,
>> +     EXI_INST_LEN            = 0x440cul,
>> +     EXI_INST_INFO           = 0x440eul,
>> +
>> +     /* 32-Bit Guest State Fields */
>> +     GUEST_LIMIT_ES          = 0x4800ul,
>> +     GUEST_LIMIT_CS          = 0x4802ul,
>> +     GUEST_LIMIT_SS          = 0x4804ul,
>> +     GUEST_LIMIT_DS          = 0x4806ul,
>> +     GUEST_LIMIT_FS          = 0x4808ul,
>> +     GUEST_LIMIT_GS          = 0x480aul,
>> +     GUEST_LIMIT_LDTR        = 0x480cul,
>> +     GUEST_LIMIT_TR          = 0x480eul,
>> +     GUEST_LIMIT_GDTR        = 0x4810ul,
>> +     GUEST_LIMIT_IDTR        = 0x4812ul,
>> +     GUEST_AR_ES             = 0x4814ul,
>> +     GUEST_AR_CS             = 0x4816ul,
>> +     GUEST_AR_SS             = 0x4818ul,
>> +     GUEST_AR_DS             = 0x481aul,
>> +     GUEST_AR_FS             = 0x481cul,
>> +     GUEST_AR_GS             = 0x481eul,
>> +     GUEST_AR_LDTR           = 0x4820ul,
>> +     GUEST_AR_TR             = 0x4822ul,
>> +     GUEST_INTR_STATE        = 0x4824ul,
>> +     GUEST_ACTV_STATE        = 0x4826ul,
>> +     GUEST_SMBASE            = 0x4828ul,
>> +     GUEST_SYSENTER_CS       = 0x482aul,
>> +
>> +     /* 32-Bit Host State Fields */
>> +     HOST_SYSENTER_CS        = 0x4c00ul,
>> +
>> +     /* Natural-Width Control Fields */
>> +     CR0_MASK                = 0x6000ul,
>> +     CR4_MASK                = 0x6002ul,
>> +     CR0_READ_SHADOW = 0x6004ul,
>> +     CR4_READ_SHADOW = 0x6006ul,
>> +     CR3_TARGET_0            = 0x6008ul,
>> +     CR3_TARGET_1            = 0x600aul,
>> +     CR3_TARGET_2            = 0x600cul,
>> +     CR3_TARGET_3            = 0x600eul,
>> +
>> +     /* Natural-Width R/O Data Fields */
>> +     EXI_QUALIFICATION       = 0x6400ul,
>> +     IO_RCX                  = 0x6402ul,
>> +     IO_RSI                  = 0x6404ul,
>> +     IO_RDI                  = 0x6406ul,
>> +     IO_RIP                  = 0x6408ul,
>> +     GUEST_LINEAR_ADDRESS    = 0x640aul,
>> +
>> +     /* Natural-Width Guest State Fields */
>> +     GUEST_CR0               = 0x6800ul,
>> +     GUEST_CR3               = 0x6802ul,
>> +     GUEST_CR4               = 0x6804ul,
>> +     GUEST_BASE_ES           = 0x6806ul,
>> +     GUEST_BASE_CS           = 0x6808ul,
>> +     GUEST_BASE_SS           = 0x680aul,
>> +     GUEST_BASE_DS           = 0x680cul,
>> +     GUEST_BASE_FS           = 0x680eul,
>> +     GUEST_BASE_GS           = 0x6810ul,
>> +     GUEST_BASE_LDTR         = 0x6812ul,
>> +     GUEST_BASE_TR           = 0x6814ul,
>> +     GUEST_BASE_GDTR         = 0x6816ul,
>> +     GUEST_BASE_IDTR         = 0x6818ul,
>> +     GUEST_DR7               = 0x681aul,
>> +     GUEST_RSP               = 0x681cul,
>> +     GUEST_RIP               = 0x681eul,
>> +     GUEST_RFLAGS            = 0x6820ul,
>> +     GUEST_PENDING_DEBUG     = 0x6822ul,
>> +     GUEST_SYSENTER_ESP      = 0x6824ul,
>> +     GUEST_SYSENTER_EIP      = 0x6826ul,
>> +
>> +     /* Natural-Width Host State Fields */
>> +     HOST_CR0                = 0x6c00ul,
>> +     HOST_CR3                = 0x6c02ul,
>> +     HOST_CR4                = 0x6c04ul,
>> +     HOST_BASE_FS            = 0x6c06ul,
>> +     HOST_BASE_GS            = 0x6c08ul,
>> +     HOST_BASE_TR            = 0x6c0aul,
>> +     HOST_BASE_GDTR          = 0x6c0cul,
>> +     HOST_BASE_IDTR          = 0x6c0eul,
>> +     HOST_SYSENTER_ESP       = 0x6c10ul,
>> +     HOST_SYSENTER_EIP       = 0x6c12ul,
>> +     HOST_RSP                = 0x6c14ul,
>> +     HOST_RIP                = 0x6c16ul
>> +};
>> +
>> +enum Reason {
>> +     VMX_EXC_NMI             = 0,
>> +     VMX_EXTINT              = 1,
>> +     VMX_TRIPLE_FAULT        = 2,
>> +     VMX_INIT                = 3,
>> +     VMX_SIPI                = 4,
>> +     VMX_SMI_IO              = 5,
>> +     VMX_SMI_OTHER           = 6,
>> +     VMX_INTR_WINDOW         = 7,
>> +     VMX_NMI_WINDOW          = 8,
>> +     VMX_TASK_SWITCH         = 9,
>> +     VMX_CPUID               = 10,
>> +     VMX_GETSEC              = 11,
>> +     VMX_HLT                 = 12,
>> +     VMX_INVD                = 13,
>> +     VMX_INVLPG              = 14,
>> +     VMX_RDPMC               = 15,
>> +     VMX_RDTSC               = 16,
>> +     VMX_RSM                 = 17,
>> +     VMX_VMCALL              = 18,
>> +     VMX_VMCLEAR             = 19,
>> +     VMX_VMLAUNCH            = 20,
>> +     VMX_VMPTRLD             = 21,
>> +     VMX_VMPTRST             = 22,
>> +     VMX_VMREAD              = 23,
>> +     VMX_VMRESUME            = 24,
>> +     VMX_VMWRITE             = 25,
>> +     VMX_VMXOFF              = 26,
>> +     VMX_VMXON               = 27,
>> +     VMX_CR                  = 28,
>> +     VMX_DR                  = 29,
>> +     VMX_IO                  = 30,
>> +     VMX_RDMSR               = 31,
>> +     VMX_WRMSR               = 32,
>> +     VMX_FAIL_STATE          = 33,
>> +     VMX_FAIL_MSR            = 34,
>> +     VMX_MWAIT               = 36,
>> +     VMX_MTF                 = 37,
>> +     VMX_MONITOR             = 39,
>> +     VMX_PAUSE               = 40,
>> +     VMX_FAIL_MCHECK         = 41,
>> +     VMX_TPR_THRESHOLD       = 43,
>> +     VMX_APIC_ACCESS         = 44,
>> +     VMX_GDTR_IDTR           = 46,
>> +     VMX_LDTR_TR             = 47,
>> +     VMX_EPT_VIOLATION       = 48,
>> +     VMX_EPT_MISCONFIG       = 49,
>> +     VMX_INVEPT              = 50,
>> +     VMX_PREEMPT             = 52,
>> +     VMX_INVVPID             = 53,
>> +     VMX_WBINVD              = 54,
>> +     VMX_XSETBV              = 55
>> +};
>> +
>> +#define X86_EFLAGS_CF        0x00000001 /* Carry Flag */
>> +#define X86_EFLAGS_ZF        0x00000040 /* Zero Flag */
>> +
>> +enum Ctrl_exi {
>> +     EXI_HOST_64             = 1UL << 9,
>> +     EXI_LOAD_PERF           = 1UL << 12,
>> +     EXI_INTA                = 1UL << 15,
>> +     EXI_LOAD_EFER           = 1UL << 21,
>> +};
>> +
>> +enum Ctrl_ent {
>> +     ENT_GUEST_64            = 1UL << 9,
>> +     ENT_LOAD_EFER           = 1UL << 15,
>> +};
>> +
>> +enum Ctrl_pin {
>> +     PIN_EXTINT              = 1ul << 0,
>> +     PIN_NMI                 = 1ul << 3,
>> +     PIN_VIRT_NMI            = 1ul << 5,
>> +};
>> +
>> +enum Ctrl0 {
>> +     CPU_INTR_WINDOW         = 1ul << 2,
>> +     CPU_HLT                 = 1ul << 7,
>> +     CPU_INVLPG              = 1ul << 9,
>> +     CPU_CR3_LOAD            = 1ul << 15,
>> +     CPU_CR3_STORE           = 1ul << 16,
>> +     CPU_TPR_SHADOW          = 1ul << 21,
>> +     CPU_NMI_WINDOW          = 1ul << 22,
>> +     CPU_IO                  = 1ul << 24,
>> +     CPU_IO_BITMAP           = 1ul << 25,
>> +     CPU_SECONDARY           = 1ul << 31,
>> +};
>> +
>> +enum Ctrl1 {
>> +     CPU_EPT                 = 1ul << 1,
>> +     CPU_VPID                = 1ul << 5,
>> +     CPU_URG                 = 1ul << 7,
>> +};
>> +
>> +#define SEL_NULL_DESC                0x0
>> +#define SEL_KERN_CODE_64     0x8
>> +#define SEL_KERN_DATA_64     0x10
>> +#define SEL_USER_CODE_64     0x18
>> +#define SEL_USER_DATA_64     0x20
>> +#define SEL_CODE_32          0x28
>> +#define SEL_DATA_32          0x30
>> +#define SEL_CODE_16          0x38
>> +#define SEL_DATA_16          0x40
>> +#define SEL_TSS_RUN          0x48
>> +
>> +#define SAVE_GPR                             \
>> +     "xchg %rax, regs\n\t"                   \
>> +     "xchg %rbx, regs+0x8\n\t"               \
>> +     "xchg %rcx, regs+0x10\n\t"              \
>> +     "xchg %rdx, regs+0x18\n\t"              \
>> +     "xchg %rbp, regs+0x28\n\t"              \
>> +     "xchg %rsi, regs+0x30\n\t"              \
>> +     "xchg %rdi, regs+0x38\n\t"              \
>> +     "xchg %r8, regs+0x40\n\t"               \
>> +     "xchg %r9, regs+0x48\n\t"               \
>> +     "xchg %r10, regs+0x50\n\t"              \
>> +     "xchg %r11, regs+0x58\n\t"              \
>> +     "xchg %r12, regs+0x60\n\t"              \
>> +     "xchg %r13, regs+0x68\n\t"              \
>> +     "xchg %r14, regs+0x70\n\t"              \
>> +     "xchg %r15, regs+0x78\n\t"
>> +
>> +#define LOAD_GPR     SAVE_GPR
>> +
>> +#define CR0_PE               (1ul << 0)
>> +#define CR0_PG               (1ul << 31)
>> +#define CR4_VMXE     (1ul << 0)
>> +#define CR4_PAE              (1ul << 5)
>> +#define CR4_PCIDE    (1ul << 17)
>> +
>> +#define VMX_IO_SIZE_MASK             0x7
>> +#define _VMX_IO_BYTE                 1
>> +#define _VMX_IO_WORD                 2
>> +#define _VMX_IO_LONG                 3
>> +#define VMX_IO_DIRECTION_MASK                (1ul << 3)
>> +#define VMX_IO_IN                    (1ul << 3)
>> +#define VMX_IO_OUT                   0
>> +#define VMX_IO_STRING                        (1ul << 4)
>> +#define VMX_IO_REP                   (1ul << 5)
>> +#define VMX_IO_OPRAND_DX             (1ul << 6)
>> +#define VMX_IO_PORT_MASK             0xFFFF0000
>> +#define VMX_IO_PORT_SHIFT            16
>> +
>> +#define TEST_VMRESUME                0x1001
>> +
>> +#endif
>> +
>>
>



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm-unit-tests : The first version of VMX nested test case
  2013-07-16 11:47   ` Arthur Chunqi Li
@ 2013-07-16 11:58     ` Paolo Bonzini
  0 siblings, 0 replies; 14+ messages in thread
From: Paolo Bonzini @ 2013-07-16 11:58 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: kvm, Jan Kiszka, Gleb Natapov

Il 16/07/2013 13:47, Arthur Chunqi Li ha scritto:
> setjmp and longjmp is not implemented in our test environment, and
> these two functions are highly depend on architecture. Do you think we
> need to write a general code for both 32bit and 64bit, or just write
> specific one just for this test case?

Supporting it in x86-64 is enough for now.

Paolo

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm-unit-tests : The first version of VMX nested test case
  2013-07-16 10:28 ` Paolo Bonzini
  2013-07-16 11:47   ` Arthur Chunqi Li
@ 2013-07-16 15:20   ` Gleb Natapov
  2013-07-16 15:29     ` Arthur Chunqi Li
  1 sibling, 1 reply; 14+ messages in thread
From: Gleb Natapov @ 2013-07-16 15:20 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Arthur Chunqi Li, kvm, jan.kiszka

On Tue, Jul 16, 2013 at 12:28:05PM +0200, Paolo Bonzini wrote:
> > +void vmx_exit(void)
> > +{
> > +	test_vmxoff();
> > +	printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
> > +	exit(fails ? -1 : 0);
> > +}
> 
> Can you try to jump back to main, and do test_vmxoff there?  This will
> avoid having to write our tests in callback style, which is a pain.
> Basically something similar to setjmp/longjmp.  In main:
> 
> 	if (setjmp(jmpbuf) == 0) {
> 		vmx_run();
> 		/* Should not reach here */
> 		report("test vmlaunch", 0);
> 	}
> 	test_vmxoff();
> 
> exit:
> 	printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
> 	return fails ? 1 : 0;
> 
> In vmx_handler:
> 
> 	case VMX_HLT:
> 		printf("\nVM exit.\n");
> 		longjmp(jmpbuf, 1);
> 
Why not just make vmexit occur after vmlaunch/vmresume like KVM does. It
will make code much more straightforward and easer to follow.

--
			Gleb.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm-unit-tests : The first version of VMX nested test case
  2013-07-16 15:20   ` Gleb Natapov
@ 2013-07-16 15:29     ` Arthur Chunqi Li
  2013-07-16 16:45       ` Gleb Natapov
  0 siblings, 1 reply; 14+ messages in thread
From: Arthur Chunqi Li @ 2013-07-16 15:29 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Paolo Bonzini, kvm, Jan Kiszka

On Tue, Jul 16, 2013 at 11:20 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Tue, Jul 16, 2013 at 12:28:05PM +0200, Paolo Bonzini wrote:
>> > +void vmx_exit(void)
>> > +{
>> > +   test_vmxoff();
>> > +   printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
>> > +   exit(fails ? -1 : 0);
>> > +}
>>
>> Can you try to jump back to main, and do test_vmxoff there?  This will
>> avoid having to write our tests in callback style, which is a pain.
>> Basically something similar to setjmp/longjmp.  In main:
>>
>>       if (setjmp(jmpbuf) == 0) {
>>               vmx_run();
>>               /* Should not reach here */
>>               report("test vmlaunch", 0);
>>       }
>>       test_vmxoff();
>>
>> exit:
>>       printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
>>       return fails ? 1 : 0;
>>
>> In vmx_handler:
>>
>>       case VMX_HLT:
>>               printf("\nVM exit.\n");
>>               longjmp(jmpbuf, 1);
>>
> Why not just make vmexit occur after vmlaunch/vmresume like KVM does. It
> will make code much more straightforward and easer to follow.
The concept "easier to follow" may have different meanings in
different view. This achievement puts all the test cases in main
function instead of scattering everywhere, which is another view to
"easy to follow". As this is just a test case, I prefer this one.

Besides, this way we can start another VM following the previous one
simply in main function. This is flexible if we want to test re-enter
to VMX mode or so.

Arthur
>
> --
>                         Gleb.



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm-unit-tests : The first version of VMX nested test case
  2013-07-16 15:29     ` Arthur Chunqi Li
@ 2013-07-16 16:45       ` Gleb Natapov
  2013-07-16 17:13         ` Arthur Chunqi Li
  0 siblings, 1 reply; 14+ messages in thread
From: Gleb Natapov @ 2013-07-16 16:45 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: Paolo Bonzini, kvm, Jan Kiszka

On Tue, Jul 16, 2013 at 11:29:20PM +0800, Arthur Chunqi Li wrote:
> On Tue, Jul 16, 2013 at 11:20 PM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Tue, Jul 16, 2013 at 12:28:05PM +0200, Paolo Bonzini wrote:
> >> > +void vmx_exit(void)
> >> > +{
> >> > +   test_vmxoff();
> >> > +   printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
> >> > +   exit(fails ? -1 : 0);
> >> > +}
> >>
> >> Can you try to jump back to main, and do test_vmxoff there?  This will
> >> avoid having to write our tests in callback style, which is a pain.
> >> Basically something similar to setjmp/longjmp.  In main:
> >>
> >>       if (setjmp(jmpbuf) == 0) {
> >>               vmx_run();
> >>               /* Should not reach here */
> >>               report("test vmlaunch", 0);
> >>       }
> >>       test_vmxoff();
> >>
> >> exit:
> >>       printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
> >>       return fails ? 1 : 0;
> >>
> >> In vmx_handler:
> >>
> >>       case VMX_HLT:
> >>               printf("\nVM exit.\n");
> >>               longjmp(jmpbuf, 1);
> >>
> > Why not just make vmexit occur after vmlaunch/vmresume like KVM does. It
> > will make code much more straightforward and easer to follow.
> The concept "easier to follow" may have different meanings in
> different view. This achievement puts all the test cases in main
> function instead of scattering everywhere, which is another view to
> "easy to follow". As this is just a test case, I prefer this one.
> 
I do not see why what I propose will prevent you to put all tests into main.

vmx_run() will looks like that:

   vmlaunch
   while(1) {
       vmresume
         <---- vmexit jumps here
       switch(exit reason) {
          case reason1:
          break;
          case reason2:
          break;
          case HLT
          return;
       }
   }          
          
> Besides, this way we can start another VM following the previous one
> simply in main function. This is flexible if we want to test re-enter
> to VMX mode or so.
> 
That's what I am missing. How do one writes more tests now?

I was thinking about interface like that:

guest_func_test1()
{
}

tes1t_exit_handlers[] = {test1_handle_hlt, test1_handle_exception, ....}

main()
{

   init_vmcs(); /* generic stuff */
   init_vmcs_test1(); /* test1 related stuff */
   r = run_in_guest(guest_func_test1, test1_exit_handlers);
   report("test1", r);
}

--
			Gleb.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm-unit-tests : The first version of VMX nested test case
  2013-07-16 16:45       ` Gleb Natapov
@ 2013-07-16 17:13         ` Arthur Chunqi Li
  2013-07-16 17:24           ` Paolo Bonzini
  2013-07-16 17:47           ` Gleb Natapov
  0 siblings, 2 replies; 14+ messages in thread
From: Arthur Chunqi Li @ 2013-07-16 17:13 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Paolo Bonzini, kvm, Jan Kiszka

On Wed, Jul 17, 2013 at 12:45 AM, Gleb Natapov <gleb@redhat.com> wrote:
> On Tue, Jul 16, 2013 at 11:29:20PM +0800, Arthur Chunqi Li wrote:
>> On Tue, Jul 16, 2013 at 11:20 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> > On Tue, Jul 16, 2013 at 12:28:05PM +0200, Paolo Bonzini wrote:
>> >> > +void vmx_exit(void)
>> >> > +{
>> >> > +   test_vmxoff();
>> >> > +   printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
>> >> > +   exit(fails ? -1 : 0);
>> >> > +}
>> >>
>> >> Can you try to jump back to main, and do test_vmxoff there?  This will
>> >> avoid having to write our tests in callback style, which is a pain.
>> >> Basically something similar to setjmp/longjmp.  In main:
>> >>
>> >>       if (setjmp(jmpbuf) == 0) {
>> >>               vmx_run();
>> >>               /* Should not reach here */
>> >>               report("test vmlaunch", 0);
>> >>       }
>> >>       test_vmxoff();
>> >>
>> >> exit:
>> >>       printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
>> >>       return fails ? 1 : 0;
>> >>
>> >> In vmx_handler:
>> >>
>> >>       case VMX_HLT:
>> >>               printf("\nVM exit.\n");
>> >>               longjmp(jmpbuf, 1);
>> >>
>> > Why not just make vmexit occur after vmlaunch/vmresume like KVM does. It
>> > will make code much more straightforward and easer to follow.
>> The concept "easier to follow" may have different meanings in
>> different view. This achievement puts all the test cases in main
>> function instead of scattering everywhere, which is another view to
>> "easy to follow". As this is just a test case, I prefer this one.
>>
> I do not see why what I propose will prevent you to put all tests into main.
>
> vmx_run() will looks like that:
>
>    vmlaunch
>    while(1) {
>        vmresume
>          <---- vmexit jumps here
>        switch(exit reason) {
>           case reason1:
>           break;
>           case reason2:
>           break;
>           case HLT
>           return;
>        }
>    }
Yes, this recalls me some KVM codes I have read before. This mixes
vmlaunch/resume and vmx_handler into one piece of code. It is a good
way to explicitly show the execution sequence though, it increases LOC
in one function.
>
>> Besides, this way we can start another VM following the previous one
>> simply in main function. This is flexible if we want to test re-enter
>> to VMX mode or so.
>>
> That's what I am missing. How do one writes more tests now?
>
> I was thinking about interface like that:
>
> guest_func_test1()
> {
> }
>
> tes1t_exit_handlers[] = {test1_handle_hlt, test1_handle_exception, ....}
>
> main()
> {
>
>    init_vmcs(); /* generic stuff */
>    init_vmcs_test1(); /* test1 related stuff */
>    r = run_in_guest(guest_func_test1, test1_exit_handlers);
>    report("test1", r);
> }
>
I have thought about this question and I'm not quite sure how to solve
it now. I have two ways. The first is that we just leave vmx.c as the
VMX instructions and execution routine test suite, and develop other
test cases in other files. Since all other tests of nested vmx is
independent to the basic routine and it is hard for us to put all test
cases for nested VMX in one file, so we just let this file do simple
things and reuse some of its functions in other test suites of nested
vmx. Your proposal of adding new test cases can be implemented in
other test suites.

The other way is not splitting nested vmx tests cases in contrast.
This method may cause a HUGE vmx.c file, and tests for different parts
are not distinctive.

Actually, I prefer the former solution.
> --
>                         Gleb.


Besides, there is also another "pseudo" bug in PATCH 2/2, here:

+int test_vmx_capability(void)
+{
+       struct cpuid r;
+       u64 ret1, ret2;
+       r = cpuid(1);
+       ret1 = ((r.c) >> 5) & 1;
+       ret2 = ((rdmsr(MSR_IA32_FEATURE_CONTROL) & 0x5) == 0x5);
+       report("test vmx capability", ret1 & ret2);
+       return !(ret1 & ret2);
+}

The IA32_FEATURE_CONTROL MSR should be set by seabios. SInce there's
no patch for seabios and in fact software can also set this MSR if its
lock bit is unset. So I prefer to change it like this:

+int test_vmx_capability(void)
+{
+       struct cpuid r;
+       u64 ret1, ret2;
+       u64 ia32_feature_control;
+       r = cpuid(1);
+       ret1 = ((r.c) >> 5) & 1;
+       ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
+       ret2 = ((ia32_feature_control & 0x5) == 0x5);
+       if ((!ret2) && ((ia32_feature_control & 0x1) == 0)){
+               wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5);
+               ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
+               ret2 = ((ia32_feature_control & 0x5) == 0x5);
+       }
+       report("test vmx capability", ret1 & ret2);
+       return !(ret1 & ret2);
+}


Arthur
--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm-unit-tests : The first version of VMX nested test case
  2013-07-16 17:13         ` Arthur Chunqi Li
@ 2013-07-16 17:24           ` Paolo Bonzini
  2013-07-16 17:47           ` Gleb Natapov
  1 sibling, 0 replies; 14+ messages in thread
From: Paolo Bonzini @ 2013-07-16 17:24 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: Gleb Natapov, kvm, Jan Kiszka

Il 16/07/2013 19:13, Arthur Chunqi Li ha scritto:
> On Wed, Jul 17, 2013 at 12:45 AM, Gleb Natapov <gleb@redhat.com> wrote:
>> On Tue, Jul 16, 2013 at 11:29:20PM +0800, Arthur Chunqi Li wrote:
>>> On Tue, Jul 16, 2013 at 11:20 PM, Gleb Natapov <gleb@redhat.com> wrote:
>>>> On Tue, Jul 16, 2013 at 12:28:05PM +0200, Paolo Bonzini wrote:
>>>>>> +void vmx_exit(void)
>>>>>> +{
>>>>>> +   test_vmxoff();
>>>>>> +   printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
>>>>>> +   exit(fails ? -1 : 0);
>>>>>> +}
>>>>>
>>>>> Can you try to jump back to main, and do test_vmxoff there?  This will
>>>>> avoid having to write our tests in callback style, which is a pain.
>>>>> Basically something similar to setjmp/longjmp.  In main:
>>>>>
>>>>>       if (setjmp(jmpbuf) == 0) {
>>>>>               vmx_run();
>>>>>               /* Should not reach here */
>>>>>               report("test vmlaunch", 0);
>>>>>       }
>>>>>       test_vmxoff();
>>>>>
>>>>> exit:
>>>>>       printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
>>>>>       return fails ? 1 : 0;
>>>>>
>>>>> In vmx_handler:
>>>>>
>>>>>       case VMX_HLT:
>>>>>               printf("\nVM exit.\n");
>>>>>               longjmp(jmpbuf, 1);
>>>>>
>>>> Why not just make vmexit occur after vmlaunch/vmresume like KVM does. It
>>>> will make code much more straightforward and easer to follow.
>>> The concept "easier to follow" may have different meanings in
>>> different view. This achievement puts all the test cases in main
>>> function instead of scattering everywhere, which is another view to
>>> "easy to follow". As this is just a test case, I prefer this one.
>>>
>> I do not see why what I propose will prevent you to put all tests into main.
>>
>> vmx_run() will looks like that:
>>
>>    vmlaunch
>>    while(1) {
>>        vmresume
>>          <---- vmexit jumps here
>>        switch(exit reason) {
>>           case reason1:
>>           break;
>>           case reason2:
>>           break;
>>           case HLT
>>           return;
>>        }
>>    }
> Yes, this recalls me some KVM codes I have read before. This mixes
> vmlaunch/resume and vmx_handler into one piece of code. It is a good
> way to explicitly show the execution sequence though, it increases LOC
> in one function.
>>
>>> Besides, this way we can start another VM following the previous one
>>> simply in main function. This is flexible if we want to test re-enter
>>> to VMX mode or so.
>>>
>> That's what I am missing. How do one writes more tests now?
>>
>> I was thinking about interface like that:
>>
>> guest_func_test1()
>> {
>> }
>>
>> tes1t_exit_handlers[] = {test1_handle_hlt, test1_handle_exception, ....}
>>
>> main()
>> {
>>
>>    init_vmcs(); /* generic stuff */
>>    init_vmcs_test1(); /* test1 related stuff */
>>    r = run_in_guest(guest_func_test1, test1_exit_handlers);
>>    report("test1", r);
>> }
>>
> I have thought about this question and I'm not quite sure how to solve
> it now.

Why can't you just use a different vmx_handler (e.g. with an indirect
call in entry_vmx) for each test (as in Gleb's test1_exit_handlers)?
run_in_guest would prepare the function pointers and do

	init_vmcs(&vmcs_root);

	if (setjmp(env) == 0){
		vmx_run();
		/* Should not reach here */
		report("test vmlaunch", 0);
	}

as in your current testcase.

vmx.c would be a "library", and testcases could be either grouped in the
same file or spread across many of them, as you see fit.

Paolo

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm-unit-tests : The first version of VMX nested test case
  2013-07-16 17:13         ` Arthur Chunqi Li
  2013-07-16 17:24           ` Paolo Bonzini
@ 2013-07-16 17:47           ` Gleb Natapov
  1 sibling, 0 replies; 14+ messages in thread
From: Gleb Natapov @ 2013-07-16 17:47 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: Paolo Bonzini, kvm, Jan Kiszka

On Wed, Jul 17, 2013 at 01:13:56AM +0800, Arthur Chunqi Li wrote:
> On Wed, Jul 17, 2013 at 12:45 AM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Tue, Jul 16, 2013 at 11:29:20PM +0800, Arthur Chunqi Li wrote:
> >> On Tue, Jul 16, 2013 at 11:20 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> > On Tue, Jul 16, 2013 at 12:28:05PM +0200, Paolo Bonzini wrote:
> >> >> > +void vmx_exit(void)
> >> >> > +{
> >> >> > +   test_vmxoff();
> >> >> > +   printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
> >> >> > +   exit(fails ? -1 : 0);
> >> >> > +}
> >> >>
> >> >> Can you try to jump back to main, and do test_vmxoff there?  This will
> >> >> avoid having to write our tests in callback style, which is a pain.
> >> >> Basically something similar to setjmp/longjmp.  In main:
> >> >>
> >> >>       if (setjmp(jmpbuf) == 0) {
> >> >>               vmx_run();
> >> >>               /* Should not reach here */
> >> >>               report("test vmlaunch", 0);
> >> >>       }
> >> >>       test_vmxoff();
> >> >>
> >> >> exit:
> >> >>       printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
> >> >>       return fails ? 1 : 0;
> >> >>
> >> >> In vmx_handler:
> >> >>
> >> >>       case VMX_HLT:
> >> >>               printf("\nVM exit.\n");
> >> >>               longjmp(jmpbuf, 1);
> >> >>
> >> > Why not just make vmexit occur after vmlaunch/vmresume like KVM does. It
> >> > will make code much more straightforward and easer to follow.
> >> The concept "easier to follow" may have different meanings in
> >> different view. This achievement puts all the test cases in main
> >> function instead of scattering everywhere, which is another view to
> >> "easy to follow". As this is just a test case, I prefer this one.
> >>
> > I do not see why what I propose will prevent you to put all tests into main.
> >
> > vmx_run() will looks like that:
> >
> >    vmlaunch
> >    while(1) {
> >        vmresume
> >          <---- vmexit jumps here
> >        switch(exit reason) {
> >           case reason1:
> >           break;
> >           case reason2:
> >           break;
> >           case HLT
> >           return;
> >        }
> >    }
> Yes, this recalls me some KVM codes I have read before. This mixes
> vmlaunch/resume and vmx_handler into one piece of code. It is a good
> way to explicitly show the execution sequence though, it increases LOC
> in one function.
LOC in one function is not an issue to be considered at all. Besides you
can put the switch into separate vmx_handler() function, or have an
array of vmexits.

> >
> >> Besides, this way we can start another VM following the previous one
> >> simply in main function. This is flexible if we want to test re-enter
> >> to VMX mode or so.
> >>
> > That's what I am missing. How do one writes more tests now?
> >
> > I was thinking about interface like that:
> >
> > guest_func_test1()
> > {
> > }
> >
> > tes1t_exit_handlers[] = {test1_handle_hlt, test1_handle_exception, ....}
> >
> > main()
> > {
> >
> >    init_vmcs(); /* generic stuff */
> >    init_vmcs_test1(); /* test1 related stuff */
> >    r = run_in_guest(guest_func_test1, test1_exit_handlers);
> >    report("test1", r);
> > }
> >
> I have thought about this question and I'm not quite sure how to solve
> it now. I have two ways. The first is that we just leave vmx.c as the
> VMX instructions and execution routine test suite, and develop other
> test cases in other files. Since all other tests of nested vmx is
> independent to the basic routine and it is hard for us to put all test
> cases for nested VMX in one file, so we just let this file do simple
> things and reuse some of its functions in other test suites of nested
> vmx. Your proposal of adding new test cases can be implemented in
> other test suites.
> 
> The other way is not splitting nested vmx tests cases in contrast.
> This method may cause a HUGE vmx.c file, and tests for different parts
> are not distinctive.
> 
> Actually, I prefer the former solution.
I do not think we need separate infrastructure just to test basic
instructions. Actually testing those are less interesting part (well
just to sorely test vmlaunch/vmresume we will likely have to write
hundred of tests to verify that all things that should cause failure do
that, but we likely settle for only a couple). I am not worrying about
vmx.c become a huge file as long as writing test is easy and more or
less self contained task.

--
			Gleb.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2013-07-16 17:47 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-16  9:27 [PATCH] kvm-unit-tests : The first version of VMX nested test case Arthur Chunqi Li
2013-07-16  9:35 ` Arthur Chunqi Li
2013-07-16  9:45   ` Gleb Natapov
2013-07-16  9:53     ` Arthur Chunqi Li
2013-07-16  9:58       ` Gleb Natapov
2013-07-16 10:28 ` Paolo Bonzini
2013-07-16 11:47   ` Arthur Chunqi Li
2013-07-16 11:58     ` Paolo Bonzini
2013-07-16 15:20   ` Gleb Natapov
2013-07-16 15:29     ` Arthur Chunqi Li
2013-07-16 16:45       ` Gleb Natapov
2013-07-16 17:13         ` Arthur Chunqi Li
2013-07-16 17:24           ` Paolo Bonzini
2013-07-16 17:47           ` Gleb Natapov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.