All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
@ 2013-07-17 18:54 Arthur Chunqi Li
  2013-07-18  5:52 ` Paolo Bonzini
  0 siblings, 1 reply; 24+ messages in thread
From: Arthur Chunqi Li @ 2013-07-17 18:54 UTC (permalink / raw)
  To: kvm; +Cc: jan.kiszka, gleb, pbonzini, Arthur Chunqi Li

This is the first version of VMX nested environment. It contains the
basic VMX instructions test cases, including VMXON/VMXOFF/VMXPTRLD/
VMXPTRST/VMCLEAR/VMLAUNCH/VMRESUME/VMCALL. This patchalso tests the
basic execution routine in VMX nested environment andlet the VM print
"Hello World" to inform its successfully run.

The first release also includes a test suite for vmenter (vmlaunch and
vmresume). Besides, hypercall mechanism is included and currently it is
used to invoke VM normal exit.

New files added:
x86/vmx.h : contains all VMX related macro declerations
x86/vmx.c : main file for VMX nested test case

Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
---
 config-x86-common.mak |    2 +
 config-x86_64.mak     |    1 +
 lib/x86/msr.h         |    5 +
 x86/cstart64.S        |    4 +
 x86/unittests.cfg     |    6 +
 x86/vmx.c             |  681 +++++++++++++++++++++++++++++++++++++++++++++++++
 x86/vmx.h             |  455 +++++++++++++++++++++++++++++++++
 7 files changed, 1154 insertions(+)
 create mode 100644 x86/vmx.c
 create mode 100644 x86/vmx.h

diff --git a/config-x86-common.mak b/config-x86-common.mak
index 455032b..34a41e1 100644
--- a/config-x86-common.mak
+++ b/config-x86-common.mak
@@ -101,6 +101,8 @@ $(TEST_DIR)/asyncpf.elf: $(cstart.o) $(TEST_DIR)/asyncpf.o
 
 $(TEST_DIR)/pcid.elf: $(cstart.o) $(TEST_DIR)/pcid.o
 
+$(TEST_DIR)/vmx.elf: $(cstart.o) $(TEST_DIR)/vmx.o
+
 arch_clean:
 	$(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \
 	$(TEST_DIR)/.*.d $(TEST_DIR)/lib/.*.d $(TEST_DIR)/lib/*.o
diff --git a/config-x86_64.mak b/config-x86_64.mak
index 4e525f5..bb8ee89 100644
--- a/config-x86_64.mak
+++ b/config-x86_64.mak
@@ -9,5 +9,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
 	  $(TEST_DIR)/xsave.flat $(TEST_DIR)/rmap_chain.flat \
 	  $(TEST_DIR)/pcid.flat
 tests += $(TEST_DIR)/svm.flat
+tests += $(TEST_DIR)/vmx.flat
 
 include config-x86-common.mak
diff --git a/lib/x86/msr.h b/lib/x86/msr.h
index 509a421..281255a 100644
--- a/lib/x86/msr.h
+++ b/lib/x86/msr.h
@@ -396,6 +396,11 @@
 #define MSR_IA32_VMX_VMCS_ENUM          0x0000048a
 #define MSR_IA32_VMX_PROCBASED_CTLS2    0x0000048b
 #define MSR_IA32_VMX_EPT_VPID_CAP       0x0000048c
+#define MSR_IA32_VMX_TRUE_PIN		0x0000048d
+#define MSR_IA32_VMX_TRUE_PROC		0x0000048e
+#define MSR_IA32_VMX_TRUE_EXIT		0x0000048f
+#define MSR_IA32_VMX_TRUE_ENTRY		0x00000490
+
 
 /* AMD-V MSRs */
 
diff --git a/x86/cstart64.S b/x86/cstart64.S
index 24df5f8..0fe76da 100644
--- a/x86/cstart64.S
+++ b/x86/cstart64.S
@@ -4,6 +4,10 @@
 .globl boot_idt
 boot_idt = 0
 
+.globl idt_descr
+.globl tss_descr
+.globl gdt64_desc
+
 ipi_vector = 0x20
 
 max_cpus = 64
diff --git a/x86/unittests.cfg b/x86/unittests.cfg
index bc9643e..85c36aa 100644
--- a/x86/unittests.cfg
+++ b/x86/unittests.cfg
@@ -149,3 +149,9 @@ extra_params = --append "10000000 `date +%s`"
 file = pcid.flat
 extra_params = -cpu qemu64,+pcid
 arch = x86_64
+
+[vmx]
+file = vmx.flat
+extra_params = -cpu host,+vmx
+arch = x86_64
+
diff --git a/x86/vmx.c b/x86/vmx.c
new file mode 100644
index 0000000..a1023b1
--- /dev/null
+++ b/x86/vmx.c
@@ -0,0 +1,681 @@
+#include "libcflat.h"
+#include "processor.h"
+#include "vm.h"
+#include "desc.h"
+#include "vmx.h"
+#include "msr.h"
+#include "smp.h"
+#include "io.h"
+
+int fails = 0, tests = 0;
+u32 *vmxon_region;
+struct vmcs *vmcs_root;
+u32 vpid_cnt;
+void *guest_stack, *guest_syscall_stack;
+u32 ctrl_pin, ctrl_enter, ctrl_exit, ctrl_cpu[2];
+ulong fix_cr0_set, fix_cr0_clr;
+ulong fix_cr4_set, fix_cr4_clr;
+struct regs regs;
+struct vmx_test *current;
+u64 hypercall_field = 0;
+
+extern u64 gdt64_desc[];
+extern u64 idt_descr[];
+extern u64 tss_descr[];
+extern void *vmx_return;
+extern void *entry_sysenter;
+extern void *guest_entry;
+
+void report(const char *name, int result)
+{
+	++tests;
+	if (result)
+		printf("PASS: %s\n", name);
+	else {
+		printf("FAIL: %s\n", name);
+		++fails;
+	}
+}
+
+int vmcs_clear(struct vmcs *vmcs)
+{
+	bool ret;
+	asm volatile ("vmclear %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc");
+	return ret;
+}
+
+u64 vmcs_read(enum Encoding enc)
+{
+	u64 val;
+	asm volatile ("vmread %1, %0" : "=rm" (val) : "r" ((u64)enc) : "cc");
+	return val;
+}
+
+int vmcs_write(enum Encoding enc, u64 val)
+{
+	bool ret;
+	asm volatile ("vmwrite %1, %2; setbe %0"
+		: "=q"(ret) : "rm" (val), "r" ((u64)enc) : "cc");
+	return ret;
+}
+
+int make_vmcs_current(struct vmcs *vmcs)
+{
+	bool ret;
+
+	asm volatile ("vmptrld %1; setbe %0" : "=q" (ret) : "m" (vmcs) : "cc");
+	return ret;
+}
+
+int save_vmcs(struct vmcs **vmcs)
+{
+	bool ret;
+
+	asm volatile ("vmptrst %1; setbe %0" : "=q" (ret) : "m" (*vmcs) : "cc");
+	return ret;
+}
+
+/* entry_sysenter */
+asm(
+	".align	4, 0x90\n\t"
+	".globl	entry_sysenter\n\t"
+	"entry_sysenter:\n\t"
+	SAVE_GPR
+	"	and	$0xf, %rax\n\t"
+	"	push	%rax\n\t"
+	"	call	syscall_handler\n\t"
+	LOAD_GPR
+	"	vmresume\n\t"
+);
+
+void syscall_handler(u64 syscall_no)
+{
+	current->syscall_handler(syscall_no);
+}
+
+static inline int vmx_on()
+{
+	bool ret;
+	asm volatile ("vmxon %1; setbe %0\n\t"
+		: "=q"(ret) : "m"(vmxon_region) : "cc");
+	return ret;
+}
+
+static inline int vmx_off()
+{
+	bool ret;
+	asm volatile("vmxoff; setbe %0\n\t"
+		: "=q"(ret) : : "cc");
+	return ret;
+}
+
+void print_vmexit_info()
+{
+	u64 guest_rip, guest_rsp;
+	ulong reason = vmcs_read(EXI_REASON) & 0xff;
+	ulong exit_qual = vmcs_read(EXI_QUALIFICATION);
+	guest_rip = vmcs_read(GUEST_RIP);
+	guest_rsp = vmcs_read(GUEST_RSP);
+	printf("VMEXIT info:\n");
+	printf("\tvmexit reason = %d\n", reason);
+	printf("\texit qualification = 0x%x\n", exit_qual);
+	printf("\tBit 31 of reason = %x\n", (vmcs_read(EXI_REASON) >> 31) & 1);
+	printf("\tguest_rip = 0x%llx\n", guest_rip);
+	printf("\tRAX=0x%llx    RBX=0x%llx    RCX=0x%llx    RDX=0x%llx\n",
+		regs.rax, regs.rbx, regs.rcx, regs.rdx);
+	printf("\tRSP=0x%llx    RBP=0x%llx    RSI=0x%llx    RDI=0x%llx\n",
+		guest_rsp, regs.rbp, regs.rsi, regs.rdi);
+	printf("\tR8 =0x%llx    R9 =0x%llx    R10=0x%llx    R11=0x%llx\n",
+		regs.r8, regs.r9, regs.r10, regs.r11);
+	printf("\tR12=0x%llx    R13=0x%llx    R14=0x%llx    R15=0x%llx\n",
+		regs.r12, regs.r13, regs.r14, regs.r15);
+}
+
+void test_vmclear(void)
+{
+	u64 rflags;
+
+	rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
+	set_rflags(rflags);
+	report("test vmclear", vmcs_clear(vmcs_root) == 0);
+}
+
+void test_vmxoff(void)
+{
+	int ret;
+	u64 rflags;
+
+	rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
+	set_rflags(rflags);
+	ret = vmx_off();
+	report("test vmxoff", !ret);
+}
+
+/* guest_entry */
+asm(
+	".align	4, 0x90\n\t"
+	".globl	entry_guest\n\t"
+	"guest_entry:\n\t"
+	"	call guest_main\n\t"
+	"	mov $1, %edi\n\t"
+	"	call hypercall\n\t"
+);
+
+void guest_main(void)
+{
+	current->guest_main();
+}
+
+void init_vmcs_ctrl(void)
+{
+	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
+	/* 26.2.1.1 */
+	vmcs_write(PIN_CONTROLS, ctrl_pin);
+	/* Disable VMEXIT of IO instruction */
+	vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]);
+	if (ctrl_cpu_rev[0].set & CPU_SECONDARY) {
+		ctrl_cpu[1] |= ctrl_cpu_rev[1].set & ctrl_cpu_rev[1].clr;
+		vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1]);
+	}
+	vmcs_write(CR3_TARGET_COUNT, 0);
+	vmcs_write(VPID, ++vpid_cnt);
+}
+
+void init_vmcs_host(void)
+{
+	/* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */
+	/* 26.2.1.2 */
+	vmcs_write(HOST_EFER, rdmsr(MSR_EFER));
+
+	/* 26.2.1.3 */
+	vmcs_write(ENT_CONTROLS, ctrl_enter);
+	vmcs_write(EXI_CONTROLS, ctrl_exit);
+
+	/* 26.2.2 */
+	vmcs_write(HOST_CR0, read_cr0());
+	vmcs_write(HOST_CR3, read_cr3());
+	vmcs_write(HOST_CR4, read_cr4());
+	vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
+	vmcs_write(HOST_SYSENTER_CS,  SEL_KERN_CODE_64);
+
+	/* 26.2.3 */
+	vmcs_write(HOST_SEL_CS, SEL_KERN_CODE_64);
+	vmcs_write(HOST_SEL_SS, SEL_KERN_DATA_64);
+	vmcs_write(HOST_SEL_DS, SEL_KERN_DATA_64);
+	vmcs_write(HOST_SEL_ES, SEL_KERN_DATA_64);
+	vmcs_write(HOST_SEL_FS, SEL_KERN_DATA_64);
+	vmcs_write(HOST_SEL_GS, SEL_KERN_DATA_64);
+	vmcs_write(HOST_SEL_TR, SEL_TSS_RUN);
+	vmcs_write(HOST_BASE_TR,   (u64)tss_descr);
+	vmcs_write(HOST_BASE_GDTR, (u64)gdt64_desc);
+	vmcs_write(HOST_BASE_IDTR, (u64)idt_descr);
+	vmcs_write(HOST_BASE_FS, 0);
+	vmcs_write(HOST_BASE_GS, 0);
+
+	/* Set other vmcs area */
+	vmcs_write(PF_ERROR_MASK, 0);
+	vmcs_write(PF_ERROR_MATCH, 0);
+	vmcs_write(VMCS_LINK_PTR, ~0ul);
+	vmcs_write(VMCS_LINK_PTR_HI, ~0ul);
+	vmcs_write(HOST_RIP, (u64)(&vmx_return));
+}
+
+void init_vmcs_guest(void)
+{
+	/* 26.3 CHECKING AND LOADING GUEST STATE */
+	ulong guest_cr0, guest_cr4, guest_cr3;
+	/* 26.3.1.1 */
+	guest_cr0 = read_cr0();
+	guest_cr4 = read_cr4();
+	guest_cr3 = read_cr3();
+	if (ctrl_enter & ENT_GUEST_64) {
+		guest_cr0 |= CR0_PG;
+		guest_cr4 |= CR4_PAE;
+	}
+	if ((ctrl_enter & ENT_GUEST_64) == 0)
+		guest_cr4 &= (~CR4_PCIDE);
+	if (guest_cr0 & CR0_PG)
+		guest_cr0 |= CR0_PE;
+	vmcs_write(GUEST_CR0, guest_cr0);
+	vmcs_write(GUEST_CR3, guest_cr3);
+	vmcs_write(GUEST_CR4, guest_cr4);
+	vmcs_write(GUEST_SYSENTER_CS,  SEL_KERN_CODE_64);
+	vmcs_write(GUEST_SYSENTER_ESP,
+		(u64)(guest_syscall_stack + PAGE_SIZE - 1));
+	vmcs_write(GUEST_SYSENTER_EIP, (u64)(&entry_sysenter));
+	vmcs_write(GUEST_DR7, 0);
+	vmcs_write(GUEST_EFER, rdmsr(MSR_EFER));
+
+	/* 26.3.1.2 */
+	vmcs_write(GUEST_SEL_CS, SEL_KERN_CODE_64);
+	vmcs_write(GUEST_SEL_SS, SEL_KERN_DATA_64);
+	vmcs_write(GUEST_SEL_DS, SEL_KERN_DATA_64);
+	vmcs_write(GUEST_SEL_ES, SEL_KERN_DATA_64);
+	vmcs_write(GUEST_SEL_FS, SEL_KERN_DATA_64);
+	vmcs_write(GUEST_SEL_GS, SEL_KERN_DATA_64);
+	vmcs_write(GUEST_SEL_TR, SEL_TSS_RUN);
+	vmcs_write(GUEST_SEL_LDTR, 0);
+
+	vmcs_write(GUEST_BASE_CS, 0);
+	vmcs_write(GUEST_BASE_ES, 0);
+	vmcs_write(GUEST_BASE_SS, 0);
+	vmcs_write(GUEST_BASE_DS, 0);
+	vmcs_write(GUEST_BASE_FS, 0);
+	vmcs_write(GUEST_BASE_GS, 0);
+	vmcs_write(GUEST_BASE_TR,   (u64)tss_descr);
+	vmcs_write(GUEST_BASE_LDTR, 0);
+
+	vmcs_write(GUEST_LIMIT_CS, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_DS, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_ES, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_SS, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_FS, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_GS, 0xFFFFFFFF);
+	vmcs_write(GUEST_LIMIT_LDTR, 0xffff);
+	vmcs_write(GUEST_LIMIT_TR, ((struct descr *)tss_descr)->limit);
+
+	vmcs_write(GUEST_AR_CS, 0xa09b);
+	vmcs_write(GUEST_AR_DS, 0xc093);
+	vmcs_write(GUEST_AR_ES, 0xc093);
+	vmcs_write(GUEST_AR_FS, 0xc093);
+	vmcs_write(GUEST_AR_GS, 0xc093);
+	vmcs_write(GUEST_AR_SS, 0xc093);
+	vmcs_write(GUEST_AR_LDTR, 0x82);
+	vmcs_write(GUEST_AR_TR, 0x8b);
+
+	/* 26.3.1.3 */
+	vmcs_write(GUEST_BASE_GDTR, (u64)gdt64_desc);
+	vmcs_write(GUEST_BASE_IDTR, (u64)idt_descr);
+	vmcs_write(GUEST_LIMIT_GDTR,
+		((struct descr *)gdt64_desc)->limit & 0xffff);
+	vmcs_write(GUEST_LIMIT_IDTR,
+		((struct descr *)idt_descr)->limit & 0xffff);
+
+	/* 26.3.1.4 */
+	vmcs_write(GUEST_RIP, (u64)(&guest_entry));
+	vmcs_write(GUEST_RSP, (u64)(guest_stack + PAGE_SIZE - 1));
+	vmcs_write(GUEST_RFLAGS, 0x2);
+
+	/* 26.3.1.5 */
+	vmcs_write(GUEST_ACTV_STATE, 0);
+	vmcs_write(GUEST_INTR_STATE, 0);
+}
+
+int init_vmcs(struct vmcs **vmcs)
+{
+	*vmcs = alloc_page();
+	memset(*vmcs, 0, PAGE_SIZE);
+	(*vmcs)->revision_id = basic.revision;
+	/* vmclear first to init vmcs */
+	if (vmcs_clear(*vmcs)) {
+		printf("%s : vmcs_clear error\n", __func__);
+		return 1;
+	}
+
+	if (make_vmcs_current(*vmcs)) {
+		printf("%s : make_vmcs_current error\n", __func__);
+		return 1;
+	}
+
+	/* All settings to pin/exit/enter/cpu
+	   control fields should be placed here */
+	ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
+	ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
+	ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
+	ctrl_cpu[0] |= CPU_HLT;
+	/* DIsable IO instruction VMEXIT now */
+	ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
+	ctrl_cpu[1] = 0;
+
+	ctrl_pin = (ctrl_pin | ctrl_pin_rev.set) & ctrl_pin_rev.clr;
+	ctrl_enter = (ctrl_enter | ctrl_enter_rev.set) & ctrl_enter_rev.clr;
+	ctrl_exit = (ctrl_exit | ctrl_exit_rev.set) & ctrl_exit_rev.clr;
+	ctrl_cpu[0] = (ctrl_cpu[0] | ctrl_cpu_rev[0].set) & ctrl_cpu_rev[0].clr;
+
+	init_vmcs_ctrl();
+	init_vmcs_host();
+	init_vmcs_guest();
+	return 0;
+}
+
+void init_vmx(void)
+{
+	vmxon_region = alloc_page();
+	memset(vmxon_region, 0, PAGE_SIZE);
+
+	fix_cr0_set =  rdmsr(MSR_IA32_VMX_CR0_FIXED0);
+	fix_cr0_clr =  rdmsr(MSR_IA32_VMX_CR0_FIXED1);
+	fix_cr4_set =  rdmsr(MSR_IA32_VMX_CR4_FIXED0);
+	fix_cr4_clr = rdmsr(MSR_IA32_VMX_CR4_FIXED1);
+	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
+	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
+			: MSR_IA32_VMX_PINBASED_CTLS);
+	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
+			: MSR_IA32_VMX_EXIT_CTLS);
+	ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
+			: MSR_IA32_VMX_ENTRY_CTLS);
+	ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
+			: MSR_IA32_VMX_PROCBASED_CTLS);
+	if (ctrl_cpu_rev[0].set & CPU_SECONDARY)
+		ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
+	if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID)
+		ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
+
+	write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
+	write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | CR4_VMXE);
+
+	*vmxon_region = basic.revision;
+
+	guest_stack = alloc_page();
+	memset(guest_stack, 0, PAGE_SIZE);
+	guest_syscall_stack = alloc_page();
+	memset(guest_syscall_stack, 0, PAGE_SIZE);
+}
+
+int test_vmx_capability(void)
+{
+	struct cpuid r;
+	u64 ret1, ret2;
+	u64 ia32_feature_control;
+	r = cpuid(1);
+	ret1 = ((r.c) >> 5) & 1;
+	ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
+	ret2 = ((ia32_feature_control & 0x5) == 0x5);
+	if ((!ret2) && ((ia32_feature_control & 0x1) == 0)) {
+		wrmsr(MSR_IA32_FEATURE_CONTROL, 0x5);
+		ia32_feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
+		ret2 = ((ia32_feature_control & 0x5) == 0x5);
+	}
+	report("test vmx capability", ret1 & ret2);
+	return !(ret1 & ret2);
+}
+
+int test_vmxon(void)
+{
+	int ret;
+	u64 rflags;
+
+	rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
+	set_rflags(rflags);
+	ret = vmx_on();
+	report("test vmxon", !ret);
+	return ret;
+}
+
+void test_vmptrld(void)
+{
+	u64 rflags;
+	struct vmcs *vmcs;
+
+	vmcs = alloc_page();
+	vmcs->revision_id = basic.revision;
+	rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
+	set_rflags(rflags);
+	report("test vmptrld", make_vmcs_current(vmcs) == 0);
+}
+
+void test_vmptrst(void)
+{
+	u64 rflags;
+	int ret;
+	struct vmcs *vmcs1, *vmcs2;
+
+	vmcs1 = alloc_page();
+	memset(vmcs1, 0, PAGE_SIZE);
+	init_vmcs(&vmcs1);
+	rflags = get_rflags() | X86_EFLAGS_CF | X86_EFLAGS_ZF;
+	set_rflags(rflags);
+	ret = save_vmcs(&vmcs2);
+	report("test vmptrst", (!ret) && (vmcs1 == vmcs2));
+}
+
+/* This function can only be called in guest */
+void hypercall(u32 hypercall_no)
+{
+	u64 val = 0;
+	val = (hypercall_no & HYPERCALL_MASK) | HYPERCALL_BIT;
+	hypercall_field = val;
+	asm volatile("vmcall\n\t");
+}
+
+bool is_hypercall()
+{
+	ulong reason, hyper_bit;
+
+	reason = vmcs_read(EXI_REASON) & 0xff;
+	hyper_bit = hypercall_field & HYPERCALL_BIT;
+	if (reason == VMX_VMCALL && hyper_bit)
+		return true;
+	return false;
+}
+
+int handle_hypercall()
+{
+	ulong hypercall_no;
+
+	hypercall_no = hypercall_field & HYPERCALL_MASK;
+	hypercall_field = 0;
+	switch (hypercall_no) {
+	case HYPERCALL_VMEXIT:
+		return VMX_TEST_VMEXIT;
+	default:
+		printf("ERROR : Invalid hypercall number : %d\n", hypercall_no);
+	}
+	return VMX_TEST_EXIT;
+}
+
+int exit_handler()
+{
+	int ret;
+
+	current->exits++;
+	current->guest_regs = regs;
+	if (is_hypercall())
+		ret = handle_hypercall();
+	else
+		ret = current->exit_handler();
+	regs = current->guest_regs;
+	switch (ret) {
+	case VMX_TEST_VMEXIT:
+	case VMX_TEST_RESUME:
+		return ret;
+	case VMX_TEST_EXIT:
+		break;
+	default:
+		printf("ERROR : Invalid exit_handler return val %d.\n"
+			, ret);
+	}
+	print_vmexit_info();
+	exit(-1);
+	return 0;
+}
+
+int vmx_run()
+{
+	bool ret;
+	u32 eax;
+	u64 rsp;
+
+	asm volatile("mov %%rsp, %0\n\t" : "=r"(rsp));
+	vmcs_write(HOST_RSP, rsp);
+	asm volatile("vmlaunch;seta %0\n\t" : "=m"(ret));
+	if (ret) {
+		printf("%s : vmlaunch failed.\n", __func__);
+		return 1;
+	}
+	while (1) {
+		asm volatile(
+			LOAD_GPR_C
+			"vmresume;seta %0\n\t"
+			: "=m"(ret));
+		if (ret) {
+			printf("%s : vmresume failed.\n", __func__);
+			return 1;
+		}
+		asm volatile(
+			".global vmx_return\n\t"
+			"vmx_return:\n\t"
+			SAVE_GPR_C
+			"call exit_handler\n\t"
+			"mov %%eax, %0\n\t"
+			: "=r"(eax)
+		);
+		switch (eax) {
+		case VMX_TEST_VMEXIT:
+			return 0;
+		case VMX_TEST_RESUME:
+			break;
+		default:
+			printf("%s : unhandled ret from exit_handler.\n", __func__);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+int test_run(struct vmx_test *test)
+{
+	if (test->name == NULL)
+		test->name = "(no name)";
+	if (vmx_on()) {
+		printf("%s : vmxon failed.\n", __func__);
+		return 1;
+	}
+	init_vmcs(&(test->vmcs));
+	/* Directly call test->init is ok here, init_vmcs has done
+	   vmcs init, vmclear and vmptrld*/
+	if (test->init)
+		test->init(test->vmcs);
+	test->exits = 0;
+	current = test;
+	regs = test->guest_regs;
+	printf("\nTest suite : %s\n", test->name);
+	vmx_run();
+	if (vmx_off()) {
+		printf("%s : vmxoff failed.\n", __func__);
+		return 1;
+	}
+	return 0;
+}
+
+void basic_init()
+{
+}
+
+void basic_guest_main()
+{
+	/* Here is null guest_main, print Hello World */
+	printf("\tHello World, this is null_guest_main!\n");
+}
+
+int basic_exit_handler()
+{
+	u64 guest_rip;
+	ulong reason;
+
+	guest_rip = vmcs_read(GUEST_RIP);
+	reason = vmcs_read(EXI_REASON) & 0xff;
+
+	switch (reason) {
+	case VMX_VMCALL:
+		print_vmexit_info();
+		vmcs_write(GUEST_RIP, guest_rip + 3);
+		return VMX_TEST_RESUME;
+	default:
+		break;
+	}
+	printf("ERROR : Unhandled vmx exit.\n");
+	print_vmexit_info();
+	return VMX_TEST_EXIT;
+}
+
+void basic_syscall_handler(u64 syscall_no)
+{
+}
+
+void vmenter_main()
+{
+	u64 rax;
+	u64 rsp, resume_rsp;
+
+	report("test vmlaunch", 1);
+
+	asm volatile(
+		"mov %%rsp, %0\n\t"
+		"mov %3, %%rax\n\t"
+		"vmcall\n\t"
+		"mov %%rax, %1\n\t"
+		"mov %%rsp, %2\n\t"
+		: "=r"(rsp), "=r"(rax), "=r"(resume_rsp)
+		: "g"(0xABCD));
+	report("test vmresume", (rax == 0xFFFF) && (rsp == resume_rsp));
+}
+
+int vmenter_exit_handler()
+{
+	u64 guest_rip;
+	ulong reason;
+
+	guest_rip = vmcs_read(GUEST_RIP);
+	reason = vmcs_read(EXI_REASON) & 0xff;
+	switch (reason) {
+	case VMX_VMCALL:
+		if (current->guest_regs.rax != 0xABCD) {
+			report("test vmresume", 0);
+			return VMX_TEST_VMEXIT;
+		}
+		current->guest_regs.rax = 0xFFFF;
+		vmcs_write(GUEST_RIP, guest_rip + 3);
+		return VMX_TEST_RESUME;
+	default:
+		report("test vmresume", 0);
+		print_vmexit_info();
+	}
+	return VMX_TEST_VMEXIT;
+}
+
+
+/* name/init/guest_main/exit_handler/syscall_handler/guest_regs
+   basic_* just implement some basic functions */
+static struct vmx_test vmx_tests[] = {
+	{ "null", basic_init, basic_guest_main, basic_exit_handler,
+		basic_syscall_handler, {0} },
+	{ "vmenter", basic_init, vmenter_main, vmenter_exit_handler,
+		basic_syscall_handler, {0} },
+};
+
+int main(void)
+{
+	int i;
+
+	setup_vm();
+	setup_idt();
+
+	if (test_vmx_capability() != 0) {
+		printf("ERROR : vmx not supported, check +vmx option\n");
+		goto exit;
+	}
+	init_vmx();
+	/* Set basic test ctxt the same as "null" */
+	current = &vmx_tests[0];
+	if (test_vmxon() != 0)
+		goto exit;
+	test_vmptrld();
+	test_vmclear();
+	test_vmptrst();
+	init_vmcs(&vmcs_root);
+	if (vmx_run()) {
+		report("test vmlaunch", 0);
+		goto exit;
+	}
+	test_vmxoff();
+
+	for (i = 1; i < ARRAY_SIZE(vmx_tests); ++i) {
+		if (test_run(&vmx_tests[i]))
+			goto exit;
+	}
+
+exit:
+	printf("\nSUMMARY: %d tests, %d failures\n", tests, fails);
+	return fails ? 1 : 0;
+}
diff --git a/x86/vmx.h b/x86/vmx.h
new file mode 100644
index 0000000..80c7f70
--- /dev/null
+++ b/x86/vmx.h
@@ -0,0 +1,455 @@
+#ifndef __HYPERVISOR_H
+#define __HYPERVISOR_H
+
+#include "libcflat.h"
+
+struct vmcs {
+	u32 revision_id; /* vmcs revision identifier */
+	u32 abort; /* VMX-abort indicator */
+	/* VMCS data */
+	char data[0];
+};
+
+struct regs {
+	u64 rax;
+	u64 rcx;
+	u64 rdx;
+	u64 rbx;
+	u64 cr2;
+	u64 rbp;
+	u64 rsi;
+	u64 rdi;
+	u64 r8;
+	u64 r9;
+	u64 r10;
+	u64 r11;
+	u64 r12;
+	u64 r13;
+	u64 r14;
+	u64 r15;
+};
+
+struct vmx_test {
+	const char *name;
+	void (*init)(struct vmcs *vmcs);
+	void (*guest_main)();
+	int (*exit_handler)();
+	void (*syscall_handler)(u64 syscall_no);
+	struct regs guest_regs;
+	struct vmcs *vmcs;
+	int exits;
+};
+
+static union vmx_basic {
+	u64 val;
+	struct {
+		u32 revision;
+		u32	size:13,
+			: 3,
+			width:1,
+			dual:1,
+			type:4,
+			insouts:1,
+			ctrl:1;
+	};
+} basic;
+
+static union vmx_ctrl_pin {
+	u64 val;
+	struct {
+		u32 set, clr;
+	};
+} ctrl_pin_rev;
+
+static union vmx_ctrl_cpu {
+	u64 val;
+	struct {
+		u32 set, clr;
+	};
+} ctrl_cpu_rev[2];
+
+static union vmx_ctrl_exit {
+	u64 val;
+	struct {
+		u32 set, clr;
+	};
+} ctrl_exit_rev;
+
+static union vmx_ctrl_ent {
+	u64 val;
+	struct {
+		u32 set, clr;
+	};
+} ctrl_enter_rev;
+
+static union vmx_ept_vpid {
+	u64 val;
+	struct {
+		u32:16,
+			super:2,
+			: 2,
+			invept:1,
+			: 11;
+		u32	invvpid:1;
+	};
+} ept_vpid;
+
+struct descr {
+	u16 limit;
+	u64 addr;
+};
+
+enum Encoding {
+	/* 16-Bit Control Fields */
+	VPID			= 0x0000ul,
+	/* Posted-interrupt notification vector */
+	PINV			= 0x0002ul,
+	/* EPTP index */
+	EPTP_IDX		= 0x0004ul,
+
+	/* 16-Bit Guest State Fields */
+	GUEST_SEL_ES		= 0x0800ul,
+	GUEST_SEL_CS		= 0x0802ul,
+	GUEST_SEL_SS		= 0x0804ul,
+	GUEST_SEL_DS		= 0x0806ul,
+	GUEST_SEL_FS		= 0x0808ul,
+	GUEST_SEL_GS		= 0x080aul,
+	GUEST_SEL_LDTR		= 0x080cul,
+	GUEST_SEL_TR		= 0x080eul,
+	GUEST_INT_STATUS	= 0x0810ul,
+
+	/* 16-Bit Host State Fields */
+	HOST_SEL_ES		= 0x0c00ul,
+	HOST_SEL_CS		= 0x0c02ul,
+	HOST_SEL_SS		= 0x0c04ul,
+	HOST_SEL_DS		= 0x0c06ul,
+	HOST_SEL_FS		= 0x0c08ul,
+	HOST_SEL_GS		= 0x0c0aul,
+	HOST_SEL_TR		= 0x0c0cul,
+
+	/* 64-Bit Control Fields */
+	IO_BITMAP_A		= 0x2000ul,
+	IO_BITMAP_B		= 0x2002ul,
+	MSR_BITMAP		= 0x2004ul,
+	EXIT_MSR_ST_ADDR	= 0x2006ul,
+	EXIT_MSR_LD_ADDR	= 0x2008ul,
+	ENTER_MSR_LD_ADDR	= 0x200aul,
+	VMCS_EXEC_PTR		= 0x200cul,
+	TSC_OFFSET		= 0x2010ul,
+	TSC_OFFSET_HI		= 0x2011ul,
+	APIC_VIRT_ADDR		= 0x2012ul,
+	APIC_ACCS_ADDR		= 0x2014ul,
+	EPTP			= 0x201aul,
+	EPTP_HI			= 0x201bul,
+
+	/* 64-Bit Readonly Data Field */
+	INFO_PHYS_ADDR		= 0x2400ul,
+
+	/* 64-Bit Guest State */
+	VMCS_LINK_PTR		= 0x2800ul,
+	VMCS_LINK_PTR_HI	= 0x2801ul,
+	GUEST_DEBUGCTL		= 0x2802ul,
+	GUEST_DEBUGCTL_HI	= 0x2803ul,
+	GUEST_EFER		= 0x2806ul,
+	GUEST_PERF_GLOBAL_CTRL	= 0x2808ul,
+	GUEST_PDPTE		= 0x280aul,
+
+	/* 64-Bit Host State */
+	HOST_EFER		= 0x2c02ul,
+	HOST_PERF_GLOBAL_CTRL	= 0x2c04ul,
+
+	/* 32-Bit Control Fields */
+	PIN_CONTROLS		= 0x4000ul,
+	CPU_EXEC_CTRL0		= 0x4002ul,
+	EXC_BITMAP		= 0x4004ul,
+	PF_ERROR_MASK		= 0x4006ul,
+	PF_ERROR_MATCH		= 0x4008ul,
+	CR3_TARGET_COUNT	= 0x400aul,
+	EXI_CONTROLS		= 0x400cul,
+	EXI_MSR_ST_CNT		= 0x400eul,
+	EXI_MSR_LD_CNT		= 0x4010ul,
+	ENT_CONTROLS		= 0x4012ul,
+	ENT_MSR_LD_CNT		= 0x4014ul,
+	ENT_INTR_INFO		= 0x4016ul,
+	ENT_INTR_ERROR		= 0x4018ul,
+	ENT_INST_LEN		= 0x401aul,
+	TPR_THRESHOLD		= 0x401cul,
+	CPU_EXEC_CTRL1		= 0x401eul,
+
+	/* 32-Bit R/O Data Fields */
+	VMX_INST_ERROR		= 0x4400ul,
+	EXI_REASON		= 0x4402ul,
+	EXI_INTR_INFO		= 0x4404ul,
+	EXI_INTR_ERROR		= 0x4406ul,
+	IDT_VECT_INFO		= 0x4408ul,
+	IDT_VECT_ERROR		= 0x440aul,
+	EXI_INST_LEN		= 0x440cul,
+	EXI_INST_INFO		= 0x440eul,
+
+	/* 32-Bit Guest State Fields */
+	GUEST_LIMIT_ES		= 0x4800ul,
+	GUEST_LIMIT_CS		= 0x4802ul,
+	GUEST_LIMIT_SS		= 0x4804ul,
+	GUEST_LIMIT_DS		= 0x4806ul,
+	GUEST_LIMIT_FS		= 0x4808ul,
+	GUEST_LIMIT_GS		= 0x480aul,
+	GUEST_LIMIT_LDTR	= 0x480cul,
+	GUEST_LIMIT_TR		= 0x480eul,
+	GUEST_LIMIT_GDTR	= 0x4810ul,
+	GUEST_LIMIT_IDTR	= 0x4812ul,
+	GUEST_AR_ES		= 0x4814ul,
+	GUEST_AR_CS		= 0x4816ul,
+	GUEST_AR_SS		= 0x4818ul,
+	GUEST_AR_DS		= 0x481aul,
+	GUEST_AR_FS		= 0x481cul,
+	GUEST_AR_GS		= 0x481eul,
+	GUEST_AR_LDTR		= 0x4820ul,
+	GUEST_AR_TR		= 0x4822ul,
+	GUEST_INTR_STATE	= 0x4824ul,
+	GUEST_ACTV_STATE	= 0x4826ul,
+	GUEST_SMBASE		= 0x4828ul,
+	GUEST_SYSENTER_CS	= 0x482aul,
+
+	/* 32-Bit Host State Fields */
+	HOST_SYSENTER_CS	= 0x4c00ul,
+
+	/* Natural-Width Control Fields */
+	CR0_MASK		= 0x6000ul,
+	CR4_MASK		= 0x6002ul,
+	CR0_READ_SHADOW	= 0x6004ul,
+	CR4_READ_SHADOW	= 0x6006ul,
+	CR3_TARGET_0		= 0x6008ul,
+	CR3_TARGET_1		= 0x600aul,
+	CR3_TARGET_2		= 0x600cul,
+	CR3_TARGET_3		= 0x600eul,
+
+	/* Natural-Width R/O Data Fields */
+	EXI_QUALIFICATION	= 0x6400ul,
+	IO_RCX			= 0x6402ul,
+	IO_RSI			= 0x6404ul,
+	IO_RDI			= 0x6406ul,
+	IO_RIP			= 0x6408ul,
+	GUEST_LINEAR_ADDRESS	= 0x640aul,
+
+	/* Natural-Width Guest State Fields */
+	GUEST_CR0		= 0x6800ul,
+	GUEST_CR3		= 0x6802ul,
+	GUEST_CR4		= 0x6804ul,
+	GUEST_BASE_ES		= 0x6806ul,
+	GUEST_BASE_CS		= 0x6808ul,
+	GUEST_BASE_SS		= 0x680aul,
+	GUEST_BASE_DS		= 0x680cul,
+	GUEST_BASE_FS		= 0x680eul,
+	GUEST_BASE_GS		= 0x6810ul,
+	GUEST_BASE_LDTR		= 0x6812ul,
+	GUEST_BASE_TR		= 0x6814ul,
+	GUEST_BASE_GDTR		= 0x6816ul,
+	GUEST_BASE_IDTR		= 0x6818ul,
+	GUEST_DR7		= 0x681aul,
+	GUEST_RSP		= 0x681cul,
+	GUEST_RIP		= 0x681eul,
+	GUEST_RFLAGS		= 0x6820ul,
+	GUEST_PENDING_DEBUG	= 0x6822ul,
+	GUEST_SYSENTER_ESP	= 0x6824ul,
+	GUEST_SYSENTER_EIP	= 0x6826ul,
+
+	/* Natural-Width Host State Fields */
+	HOST_CR0		= 0x6c00ul,
+	HOST_CR3		= 0x6c02ul,
+	HOST_CR4		= 0x6c04ul,
+	HOST_BASE_FS		= 0x6c06ul,
+	HOST_BASE_GS		= 0x6c08ul,
+	HOST_BASE_TR		= 0x6c0aul,
+	HOST_BASE_GDTR		= 0x6c0cul,
+	HOST_BASE_IDTR		= 0x6c0eul,
+	HOST_SYSENTER_ESP	= 0x6c10ul,
+	HOST_SYSENTER_EIP	= 0x6c12ul,
+	HOST_RSP		= 0x6c14ul,
+	HOST_RIP		= 0x6c16ul
+};
+
+enum Reason {
+	VMX_EXC_NMI		= 0,
+	VMX_EXTINT		= 1,
+	VMX_TRIPLE_FAULT	= 2,
+	VMX_INIT		= 3,
+	VMX_SIPI		= 4,
+	VMX_SMI_IO		= 5,
+	VMX_SMI_OTHER		= 6,
+	VMX_INTR_WINDOW		= 7,
+	VMX_NMI_WINDOW		= 8,
+	VMX_TASK_SWITCH		= 9,
+	VMX_CPUID		= 10,
+	VMX_GETSEC		= 11,
+	VMX_HLT			= 12,
+	VMX_INVD		= 13,
+	VMX_INVLPG		= 14,
+	VMX_RDPMC		= 15,
+	VMX_RDTSC		= 16,
+	VMX_RSM			= 17,
+	VMX_VMCALL		= 18,
+	VMX_VMCLEAR		= 19,
+	VMX_VMLAUNCH		= 20,
+	VMX_VMPTRLD		= 21,
+	VMX_VMPTRST		= 22,
+	VMX_VMREAD		= 23,
+	VMX_VMRESUME		= 24,
+	VMX_VMWRITE		= 25,
+	VMX_VMXOFF		= 26,
+	VMX_VMXON		= 27,
+	VMX_CR			= 28,
+	VMX_DR			= 29,
+	VMX_IO			= 30,
+	VMX_RDMSR		= 31,
+	VMX_WRMSR		= 32,
+	VMX_FAIL_STATE		= 33,
+	VMX_FAIL_MSR		= 34,
+	VMX_MWAIT		= 36,
+	VMX_MTF			= 37,
+	VMX_MONITOR		= 39,
+	VMX_PAUSE		= 40,
+	VMX_FAIL_MCHECK		= 41,
+	VMX_TPR_THRESHOLD	= 43,
+	VMX_APIC_ACCESS		= 44,
+	VMX_GDTR_IDTR		= 46,
+	VMX_LDTR_TR		= 47,
+	VMX_EPT_VIOLATION	= 48,
+	VMX_EPT_MISCONFIG	= 49,
+	VMX_INVEPT		= 50,
+	VMX_PREEMPT		= 52,
+	VMX_INVVPID		= 53,
+	VMX_WBINVD		= 54,
+	VMX_XSETBV		= 55
+};
+
+#define X86_EFLAGS_CF	0x00000001 /* Carry Flag */
+#define X86_EFLAGS_ZF	0x00000040 /* Zero Flag */
+
+enum Ctrl_exi {
+	EXI_HOST_64             = 1UL << 9,
+	EXI_LOAD_PERF		= 1UL << 12,
+	EXI_INTA                = 1UL << 15,
+	EXI_LOAD_EFER           = 1UL << 21,
+};
+
+enum Ctrl_ent {
+	ENT_GUEST_64            = 1UL << 9,
+	ENT_LOAD_EFER           = 1UL << 15,
+};
+
+enum Ctrl_pin {
+	PIN_EXTINT              = 1ul << 0,
+	PIN_NMI                 = 1ul << 3,
+	PIN_VIRT_NMI            = 1ul << 5,
+};
+
+enum Ctrl0 {
+	CPU_INTR_WINDOW		= 1ul << 2,
+	CPU_HLT			= 1ul << 7,
+	CPU_INVLPG		= 1ul << 9,
+	CPU_CR3_LOAD		= 1ul << 15,
+	CPU_CR3_STORE		= 1ul << 16,
+	CPU_TPR_SHADOW		= 1ul << 21,
+	CPU_NMI_WINDOW		= 1ul << 22,
+	CPU_IO			= 1ul << 24,
+	CPU_IO_BITMAP		= 1ul << 25,
+	CPU_SECONDARY		= 1ul << 31,
+};
+
+enum Ctrl1 {
+	CPU_EPT			= 1ul << 1,
+	CPU_VPID		= 1ul << 5,
+	CPU_URG			= 1ul << 7,
+};
+
+#define SEL_NULL_DESC		0x0
+#define SEL_KERN_CODE_64	0x8
+#define SEL_KERN_DATA_64	0x10
+#define SEL_USER_CODE_64	0x18
+#define SEL_USER_DATA_64	0x20
+#define SEL_CODE_32		0x28
+#define SEL_DATA_32		0x30
+#define SEL_CODE_16		0x38
+#define SEL_DATA_16		0x40
+#define SEL_TSS_RUN		0x48
+
+#define SAVE_GPR				\
+	"xchg %rax, regs\n\t"			\
+	"xchg %rbx, regs+0x8\n\t"		\
+	"xchg %rcx, regs+0x10\n\t"		\
+	"xchg %rdx, regs+0x18\n\t"		\
+	"xchg %rbp, regs+0x28\n\t"		\
+	"xchg %rsi, regs+0x30\n\t"		\
+	"xchg %rdi, regs+0x38\n\t"		\
+	"xchg %r8, regs+0x40\n\t"		\
+	"xchg %r9, regs+0x48\n\t"		\
+	"xchg %r10, regs+0x50\n\t"		\
+	"xchg %r11, regs+0x58\n\t"		\
+	"xchg %r12, regs+0x60\n\t"		\
+	"xchg %r13, regs+0x68\n\t"		\
+	"xchg %r14, regs+0x70\n\t"		\
+	"xchg %r15, regs+0x78\n\t"
+
+#define LOAD_GPR	SAVE_GPR
+
+#define SAVE_GPR_C				\
+	"xchg %%rax, regs\n\t"			\
+	"xchg %%rbx, regs+0x8\n\t"		\
+	"xchg %%rcx, regs+0x10\n\t"		\
+	"xchg %%rdx, regs+0x18\n\t"		\
+	"xchg %%rbp, regs+0x28\n\t"		\
+	"xchg %%rsi, regs+0x30\n\t"		\
+	"xchg %%rdi, regs+0x38\n\t"		\
+	"xchg %%r8, regs+0x40\n\t"		\
+	"xchg %%r9, regs+0x48\n\t"		\
+	"xchg %%r10, regs+0x50\n\t"		\
+	"xchg %%r11, regs+0x58\n\t"		\
+	"xchg %%r12, regs+0x60\n\t"		\
+	"xchg %%r13, regs+0x68\n\t"		\
+	"xchg %%r14, regs+0x70\n\t"		\
+	"xchg %%r15, regs+0x78\n\t"
+
+#define LOAD_GPR_C	SAVE_GPR_C
+
+#define CR0_PE		(1ul << 0)
+#define CR0_PG		(1ul << 31)
+#define CR4_VMXE	(1ul << 0)
+#define CR4_PAE		(1ul << 5)
+#define CR4_PCIDE	(1ul << 17)
+
+#define VMX_IO_SIZE_MASK		0x7
+#define _VMX_IO_BYTE			1
+#define _VMX_IO_WORD			2
+#define _VMX_IO_LONG			3
+#define VMX_IO_DIRECTION_MASK		(1ul << 3)
+#define VMX_IO_IN			(1ul << 3)
+#define VMX_IO_OUT			0
+#define VMX_IO_STRING			(1ul << 4)
+#define VMX_IO_REP			(1ul << 5)
+#define VMX_IO_OPRAND_DX		(1ul << 6)
+#define VMX_IO_PORT_MASK		0xFFFF0000
+#define VMX_IO_PORT_SHIFT		16
+
+#define VMX_TEST_VMEXIT		1
+#define VMX_TEST_EXIT		2
+#define VMX_TEST_RESUME		3
+
+#define HYPERCALL_BIT		(1ul << 12)
+#define HYPERCALL_MASK		0xFFF
+#define HYPERCALL_VMEXIT	0x1
+
+
+inline u64 get_rflags(void)
+{
+	u64 r;
+	asm volatile("pushf; pop %0\n\t" : "=q"(r) : : "cc");
+	return r;
+}
+
+inline void set_rflags(u64 r)
+{
+	asm volatile("push %0; popf\n\t" : : "q"(r) : "cc");
+}
+
+#endif
+
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-17 18:54 [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case Arthur Chunqi Li
@ 2013-07-18  5:52 ` Paolo Bonzini
  2013-07-18  7:26   ` Gleb Natapov
  0 siblings, 1 reply; 24+ messages in thread
From: Paolo Bonzini @ 2013-07-18  5:52 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: kvm, jan.kiszka, gleb

Il 17/07/2013 20:54, Arthur Chunqi Li ha scritto:
> +/* entry_sysenter */
> +asm(
> +	".align	4, 0x90\n\t"
> +	".globl	entry_sysenter\n\t"
> +	"entry_sysenter:\n\t"
> +	SAVE_GPR
> +	"	and	$0xf, %rax\n\t"
> +	"	push	%rax\n\t"

push should be wrong here, the first argument is in %rdi.

> +	"	call	syscall_handler\n\t"
> +	LOAD_GPR
> +	"	vmresume\n\t"
> +);
> +
> +int exit_handler()

This (and syscall_handler too) needs __attribute__((__used__)) because
it's only used from assembly.

Please add "static" to all functions except main, it triggers better
compiler optimization and warnings and it will avoid nasty surprises in
the future if the compiler becomes smarter.

> +{
> +	int ret;
> +
> +	current->exits++;
> +	current->guest_regs = regs;
> +	if (is_hypercall())
> +		ret = handle_hypercall();
> +	else
> +		ret = current->exit_handler();
> +	regs = current->guest_regs;
> +	switch (ret) {
> +	case VMX_TEST_VMEXIT:
> +	case VMX_TEST_RESUME:
> +		return ret;
> +	case VMX_TEST_EXIT:
> +		break;
> +	default:
> +		printf("ERROR : Invalid exit_handler return val %d.\n"
> +			, ret);
> +	}

Here have to propagate the exit codes through multiple levels, because
we're not using setjmp/longjmp.  Not a big deal.  My worries with this
version are below.

> +	print_vmexit_info();
> +	exit(-1);
> +	return 0;
> +}
> +
> +int vmx_run()
> +{
> +	bool ret;
> +	u32 eax;
> +	u64 rsp;
> +
> +	asm volatile("mov %%rsp, %0\n\t" : "=r"(rsp));
> +	vmcs_write(HOST_RSP, rsp);
> +	asm volatile("vmlaunch;seta %0\n\t" : "=m"(ret));

setbe (this path is probably untested because it never happens in practice).

Also, missing memory clobber.

> +	if (ret) {
> +		printf("%s : vmlaunch failed.\n", __func__);
> +		return 1;
> +	}
> +	while (1) {
> +		asm volatile(
> +			LOAD_GPR_C
> +			"vmresume;seta %0\n\t"
> +			: "=m"(ret));

setbe and missing memory clobber.

> +		if (ret) {
> +			printf("%s : vmresume failed.\n", __func__);
> +			return 1;
> +		}
> +		asm volatile(
> +			".global vmx_return\n\t"

.global should not be needed.

> +			"vmx_return:\n\t"
> +			SAVE_GPR_C
> +			"call exit_handler\n\t"
> +			"mov %%eax, %0\n\t"
> +			: "=r"(eax)
> +		);

I had written a long explanation here about why I don't trust the
compiler to do the right thing, and ideas about how to fix that.  But in
the end the only workable solution is a single assembly blob like vmx.c
in KVM to do vmlaunch/vmresume, so I'll get right to the point:

   *  the "similarity with KVM code" and "as little assembly as  *
   *  possible" goals are mutually exclusive                     *

and for a testsuite I'd prefer the latter---which means I'd still favor
setjmp/longjmp.

Now, here is the long explanation.

I must admit that the code looks nice.  There are some nits I'd like to
see done differently (such as putting vmx_return at the beginning of the
while (1), and the vmresume asm at the end), but it is indeed nice.

However, it is also very deceiving, because the processor is not doing
what the code says.  If I see:

       vmlaunch();
       exit(-1);

it is clear that magic happens in vmlaunch().  If I see

       asm volatile("vmlaunch;setbe %0\n\t" : "=m"(ret));
       if (ret) {
           ...
       }
       asm("vmx_return:")

it is absolutely not clear that the setbe and "if" are skipped on a
successful vmlaunch.  So, at the very least you need a comment before
those "if (ret)" to document the control flow, or you can use a jmp like
this:

       asm volatile goto ("vmlaunch;jmp %0\n\t"
                          : : : "memory" : vmlaunch_error);
       if (0) {
           vmlaunch_error: ...
       }

The unconditional jump and "asm goto" make it clear that magic happens.

By the way, "asm goto" is also needed to tell the compiler that the
vmlaunch and vmresume asms reenter at vmx_resume(*).  Without it, there
is no guarantee that rsp is the same when you save it and at the
vmx_resume entry point (in fact, I'd feel safer if you did the vmwrite
to HOST_RSP and vmlaunch in the same "asm goto".

Using "asm goto" to tell the compiler about vmx_resume poses another
problem.  "asm goto" takes a C label, vmx_resume is an assembly label.
The compiler might put extra instruction between the C label and
vmx_resume, for example to read back values it has spilled to the stack.

Overall, I don't trust the compiler to compile this code right
(especially if we later include a 32-bit version) and the only solution
is to write the whole thing in assembly.  If you want to write the logic
in C, you need the setjmp/longjmp version.  That version needs no such
guarantee because all the trampolines are away from the hands of the
compiler.

Paolo

> +		switch (eax) {
> +		case VMX_TEST_VMEXIT:
> +			return 0;
> +		case VMX_TEST_RESUME:
> +			break;
> +		default:
> +			printf("%s : unhandled ret from exit_handler.\n", __func__);
> +			return 1;
> +		}
> +	}
> +	return 0;
> +}


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-18  5:52 ` Paolo Bonzini
@ 2013-07-18  7:26   ` Gleb Natapov
  2013-07-18 10:47     ` Paolo Bonzini
  0 siblings, 1 reply; 24+ messages in thread
From: Gleb Natapov @ 2013-07-18  7:26 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Arthur Chunqi Li, kvm, jan.kiszka

On Thu, Jul 18, 2013 at 07:52:21AM +0200, Paolo Bonzini wrote:
> Il 17/07/2013 20:54, Arthur Chunqi Li ha scritto:
> > +	".globl	entry_sysenter\n\t"
> > +	"entry_sysenter:\n\t"
> > +	SAVE_GPR
> > +	"	and	$0xf, %rax\n\t"
> > +	"	push	%rax\n\t"
> 
> push should be wrong here, the first argument is in %rdi.
> 
> > +	"	call	syscall_handler\n\t"
> > +	LOAD_GPR
> > +	"	vmresume\n\t"
> > +);
> > +
> > +int exit_handler()
> 
> This (and syscall_handler too) needs __attribute__((__used__)) because
> it's only used from assembly.
> 
That was my question actually, why is it used from assembly? Calling it
from see should not be a problem.

> Please add "static" to all functions except main, it triggers better
> compiler optimization and warnings and it will avoid nasty surprises in
> the future if the compiler becomes smarter.
> 
> > +{
> > +	int ret;
> > +
> > +	current->exits++;
> > +	current->guest_regs = regs;
> > +	if (is_hypercall())
> > +		ret = handle_hypercall();
> > +	else
> > +		ret = current->exit_handler();
> > +	regs = current->guest_regs;
> > +	switch (ret) {
> > +	case VMX_TEST_VMEXIT:
> > +	case VMX_TEST_RESUME:
> > +		return ret;
> > +	case VMX_TEST_EXIT:
> > +		break;
> > +	default:
> > +		printf("ERROR : Invalid exit_handler return val %d.\n"
> > +			, ret);
> > +	}
> 
> Here have to propagate the exit codes through multiple levels, because
> we're not using setjmp/longjmp.  Not a big deal.  My worries with this
> version are below.
> 
> > +	print_vmexit_info();
> > +	exit(-1);
> > +	return 0;
> > +}
> > +
> > +int vmx_run()
> > +{
> > +	bool ret;
> > +	u32 eax;
> > +	u64 rsp;
> > +
> > +	asm volatile("mov %%rsp, %0\n\t" : "=r"(rsp));
> > +	vmcs_write(HOST_RSP, rsp);
> > +	asm volatile("vmlaunch;seta %0\n\t" : "=m"(ret));
> 
> setbe (this path is probably untested because it never happens in practice).
> 
At least one of the tests should set up wrong vmcs state to verify that
nested vmx handles it correctly. In fact one of the patches that Arthur
have sent to nested vmx fixes exactly that code path.

> Also, missing memory clobber.
> 
> > +	if (ret) {
> > +		printf("%s : vmlaunch failed.\n", __func__);
> > +		return 1;
> > +	}
> > +	while (1) {
> > +		asm volatile(
> > +			LOAD_GPR_C
> > +			"vmresume;seta %0\n\t"
> > +			: "=m"(ret));
> 
> setbe and missing memory clobber.
> 
> > +		if (ret) {
> > +			printf("%s : vmresume failed.\n", __func__);
> > +			return 1;
> > +		}
> > +		asm volatile(
> > +			".global vmx_return\n\t"
> 
> .global should not be needed.
> 
> > +			"vmx_return:\n\t"
> > +			SAVE_GPR_C
> > +			"call exit_handler\n\t"
> > +			"mov %%eax, %0\n\t"
> > +			: "=r"(eax)
> > +		);
> 
> I had written a long explanation here about why I don't trust the
> compiler to do the right thing, and ideas about how to fix that.  But in
> the end the only workable solution is a single assembly blob like vmx.c
> in KVM to do vmlaunch/vmresume, so I'll get right to the point:
> 
>    *  the "similarity with KVM code" and "as little assembly as  *
>    *  possible" goals are mutually exclusive                     *
> 
I never said that code should be similar to KVM code or have as little
assembly as possible :) Reread the thread. The only thing I asked for
is to make code flow linear, if it makes code looks more like KVM or
reduce amount of assembly this is just a bonus.

> and for a testsuite I'd prefer the latter---which means I'd still favor
> setjmp/longjmp.
> 
> Now, here is the long explanation.
> 
> I must admit that the code looks nice.  There are some nits I'd like to
> see done differently (such as putting vmx_return at the beginning of the
> while (1), and the vmresume asm at the end), but it is indeed nice.
> 
Why do you prefer setjmp/longjmp then?

> However, it is also very deceiving, because the processor is not doing
> what the code says.  If I see:
> 
>        vmlaunch();
>        exit(-1);
> 
> it is clear that magic happens in vmlaunch().  If I see
> 
>        asm volatile("vmlaunch;setbe %0\n\t" : "=m"(ret));
>        if (ret) {
>            ...
>        }
>        asm("vmx_return:")
> 
> it is absolutely not clear that the setbe and "if" are skipped on a
> successful vmlaunch.  So, at the very least you need a comment before
> those "if (ret)" to document the control flow, or you can use a jmp like
> this:
> 
>        asm volatile goto ("vmlaunch;jmp %0\n\t"
>                           : : : "memory" : vmlaunch_error);
>        if (0) {
>            vmlaunch_error: ...
>        }
> 
> The unconditional jump and "asm goto" make it clear that magic happens.
Agree, I dislike this magic too, but this is fixed by you suggestion
above about putting vmx_return at the beginning of while(). So the logic
will looks like that:

asm volatile("vmlaunch;setbe %0\n\t" : "=m"(ret));
while(ret) {
   vmx_return:
   SAVE_GPR_C
   eax = exit_handler();
   switch(eax) {
   }
   LOAD_GPR_C
   asm volatile("vmresume;seta %0\n\t" : "=m"(ret));
}


Rewriting the whole guest entry exit path in asm like you suggest bellow
will eliminate the magic too.

> 
> By the way, "asm goto" is also needed to tell the compiler that the
> vmlaunch and vmresume asms reenter at vmx_resume(*).  Without it, there
> is no guarantee that rsp is the same when you save it and at the
> vmx_resume entry point (in fact, I'd feel safer if you did the vmwrite
> to HOST_RSP and vmlaunch in the same "asm goto".
> 
> Using "asm goto" to tell the compiler about vmx_resume poses another
> problem.  "asm goto" takes a C label, vmx_resume is an assembly label.
> The compiler might put extra instruction between the C label and
> vmx_resume, for example to read back values it has spilled to the stack.
> 
> Overall, I don't trust the compiler to compile this code right
> (especially if we later include a 32-bit version) and the only solution
> is to write the whole thing in assembly.  If you want to write the logic
> in C, you need the setjmp/longjmp version.  That version needs no such
> guarantee because all the trampolines are away from the hands of the
> compiler.
> 
I much prefer one big asm statement than many small asm statement
scattered among two or three C lines.

--
			Gleb.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-18  7:26   ` Gleb Natapov
@ 2013-07-18 10:47     ` Paolo Bonzini
  2013-07-18 11:06       ` Gleb Natapov
  0 siblings, 1 reply; 24+ messages in thread
From: Paolo Bonzini @ 2013-07-18 10:47 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Arthur Chunqi Li, kvm, jan.kiszka

Il 18/07/2013 09:26, Gleb Natapov ha scritto:
> > I had written a long explanation here about why I don't trust the
> > compiler to do the right thing, and ideas about how to fix that.  But in
> > the end the only workable solution is a single assembly blob like vmx.c
> > in KVM to do vmlaunch/vmresume, so I'll get right to the point:
> > 
> >    *  the "similarity with KVM code" and "as little assembly as  *
> >    *  possible" goals are mutually exclusive                     *
> 
> I never said that code should be similar to KVM code or have as little
> assembly as possible :) Reread the thread. The only thing I asked for
> is to make code flow linear, if it makes code looks more like KVM or
> reduce amount of assembly this is just a bonus.

Point taken.

> > and for a testsuite I'd prefer the latter---which means I'd still favor
> > setjmp/longjmp.
> > 
> > Now, here is the long explanation.
> > 
> > I must admit that the code looks nice.  There are some nits I'd like to
> > see done differently (such as putting vmx_return at the beginning of the
> > while (1), and the vmresume asm at the end), but it is indeed nice.
>
> Why do you prefer setjmp/longjmp then?

Because it is still deceiving, and I dislike the deceit more than I like
the linear code flow.

> Agree, I dislike this magic too, but this is fixed by you suggestion
> above about putting vmx_return at the beginning of while(). So the logic
> will looks like that:
> 
> asm volatile("vmlaunch;setbe %0\n\t" : "=m"(ret));
> while(ret) {

while(!ret) if you use setbe, of course.

>    vmx_return:
>    SAVE_GPR_C
>    eax = exit_handler();
>    switch(eax) {
>    }
>    LOAD_GPR_C
>    asm volatile("vmresume;seta %0\n\t" : "=m"(ret));
> }

It is still somewhat magic: the "while (ret)" is only there to please
the compiler, and you rely on the compiler not changing %rsp between the
vmlaunch and the vmx_return label.  Minor nit, you cannot anymore print
different error messages for vmlaunch vs. vmresume failure.

In the end the choice is between "all in asm" and "small asm trampoline"
(which also happens to use setjmp/longjmp, but perhaps Arthur can
propagate return codes without using setjmp/longjmp, too).

> Rewriting the whole guest entry exit path in asm like you suggest bellow
> will eliminate the magic too.

> I much prefer one big asm statement than many small asm statement
> scattered among two or three C lines.

It's not many asm statements, it's just a dozen lines:


	align	4, 0x90
	entry_vmx:
		SAVE_GPR
		call default_exit_handler
		/* Should not reach here*/
		mov $1, %eax
		call exit
	.align	4, 0x90
	entry_sysenter:
		SAVE_GPR
		and	$0xf, %eax
		mov	%eax, %edi
		call	default_syscall_handler
		/* Arthur, is something missing here? :)) */

Paolo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-18 10:47     ` Paolo Bonzini
@ 2013-07-18 11:06       ` Gleb Natapov
  2013-07-18 12:08         ` Paolo Bonzini
  0 siblings, 1 reply; 24+ messages in thread
From: Gleb Natapov @ 2013-07-18 11:06 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Arthur Chunqi Li, kvm, jan.kiszka

On Thu, Jul 18, 2013 at 12:47:46PM +0200, Paolo Bonzini wrote:
> > > and for a testsuite I'd prefer the latter---which means I'd still favor
> > > setjmp/longjmp.
> > > 
> > > Now, here is the long explanation.
> > > 
> > > I must admit that the code looks nice.  There are some nits I'd like to
> > > see done differently (such as putting vmx_return at the beginning of the
> > > while (1), and the vmresume asm at the end), but it is indeed nice.
> >
> > Why do you prefer setjmp/longjmp then?
> 
> Because it is still deceiving, and I dislike the deceit more than I like
> the linear code flow.
> 
What is deceiving about it? Of course for someone who has no idea how
vmx works the code will not be obvious, but why should we care. For
someone who knows what is deceiving about returning into the same
function guest was launched from by using VMX mechanism instead of
longjmp()?

> > Agree, I dislike this magic too, but this is fixed by you suggestion
> > above about putting vmx_return at the beginning of while(). So the logic
> > will looks like that:
> > 
> > asm volatile("vmlaunch;setbe %0\n\t" : "=m"(ret));
> > while(ret) {
> 
> while(!ret) if you use setbe, of course.
> 
Yeah, this not meant to be compilable code :)

> >    vmx_return:
> >    SAVE_GPR_C
> >    eax = exit_handler();
> >    switch(eax) {
> >    }
> >    LOAD_GPR_C
> >    asm volatile("vmresume;seta %0\n\t" : "=m"(ret));
> > }
> 
> It is still somewhat magic: the "while (ret)" is only there to please
No, it it there to catch vmlaunch/vmresume errors.

> the compiler, and you rely on the compiler not changing %rsp between the
> vmlaunch and the vmx_return label.  Minor nit, you cannot anymore print
HOST_RSP should be loaded on each guest entry.

> different error messages for vmlaunch vs. vmresume failure.
Just because the same variable is used to store error values :)
Make vmlaunch use err1 and vmresume err2 and do "while (!err1 & !err2)"

> 
> In the end the choice is between "all in asm" and "small asm trampoline"
> (which also happens to use setjmp/longjmp, but perhaps Arthur can
> propagate return codes without using setjmp/longjmp, too).
> 
> > Rewriting the whole guest entry exit path in asm like you suggest bellow
> > will eliminate the magic too.
> 
> > I much prefer one big asm statement than many small asm statement
> > scattered among two or three C lines.
> 
> It's not many asm statements, it's just a dozen lines:
> 
I am not talking about overall file, but the how vmx_run() is written:
asm()
C code
asm()
C code
...

I much prefer:
C code

big asm() blob

C code.


--
			Gleb.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-18 11:06       ` Gleb Natapov
@ 2013-07-18 12:08         ` Paolo Bonzini
  2013-07-18 14:11           ` Arthur Chunqi Li
  2013-07-18 19:57           ` Gleb Natapov
  0 siblings, 2 replies; 24+ messages in thread
From: Paolo Bonzini @ 2013-07-18 12:08 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Arthur Chunqi Li, kvm, jan.kiszka

Il 18/07/2013 13:06, Gleb Natapov ha scritto:
> On Thu, Jul 18, 2013 at 12:47:46PM +0200, Paolo Bonzini wrote:
>>>> and for a testsuite I'd prefer the latter---which means I'd still favor
>>>> setjmp/longjmp.
>>>>
>>>> Now, here is the long explanation.
>>>>
>>>> I must admit that the code looks nice.  There are some nits I'd like to
>>>> see done differently (such as putting vmx_return at the beginning of the
>>>> while (1), and the vmresume asm at the end), but it is indeed nice.
>>>
>>> Why do you prefer setjmp/longjmp then?
>>
>> Because it is still deceiving, and I dislike the deceit more than I like
>> the linear code flow.
>>
> What is deceiving about it? Of course for someone who has no idea how
> vmx works the code will not be obvious, but why should we care. For
> someone who knows what is deceiving about returning into the same
> function guest was launched from by using VMX mechanism

The way the code is written is deceiving.  If I see

  asm("vmlaunch; seta %0")
  while (ret)

I expect HOST_RIP to point at the seta or somewhere near, not at a
completely different label somewhere else.

>> instead of longjmp()?

Look again at the setjmp/longjmp version.  longjmp is not used to handle
vmexit.  It is used to jump back from the vmexit handler to main, which
is exactly what setjmp/longjmp is meant for.

>> It is still somewhat magic: the "while (ret)" is only there to please
>> the compiler
>
> No, it it there to catch vmlaunch/vmresume errors.

You could change it to "while (0)" and it would still work.  That's what
I mean by "only there to please the compiler".

>> the compiler, and you rely on the compiler not changing %rsp between the
>> vmlaunch and the vmx_return label.  Minor nit, you cannot anymore print
> HOST_RSP should be loaded on each guest entry.

Right, but my point is: without a big asm blob, you don't know the right
value to load.  It depends on the generated code.  And the big asm blob
limits a lot the "code looks nice" value of this approach.

>> different error messages for vmlaunch vs. vmresume failure.
> Just because the same variable is used to store error values :)
> Make vmlaunch use err1 and vmresume err2 and do "while (!err1 & !err2)"

Yeah. :)

>> In the end the choice is between "all in asm" and "small asm trampoline"
>> (which also happens to use setjmp/longjmp, but perhaps Arthur can
>> propagate return codes without using setjmp/longjmp, too).
>>
>>> Rewriting the whole guest entry exit path in asm like you suggest bellow
>>> will eliminate the magic too.
>>
>>> I much prefer one big asm statement than many small asm statement
>>> scattered among two or three C lines.
>>
>> It's not many asm statements, it's just a dozen lines:
>>
> I am not talking about overall file, but the how vmx_run() is written:
> asm()
> C code
> asm()
> C code
> ...
> 
> I much prefer:
> C code
> 
> big asm() blob
> 
> C code.

Me too.  But the trampoline version is neither, it is

static inline void vmlaunch() { asm("vmlaunch") }
static inline void vmresume() { asm("vmresume") }
small asm() blob
C code

Paolo


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-18 12:08         ` Paolo Bonzini
@ 2013-07-18 14:11           ` Arthur Chunqi Li
  2013-07-18 19:57           ` Gleb Natapov
  1 sibling, 0 replies; 24+ messages in thread
From: Arthur Chunqi Li @ 2013-07-18 14:11 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Gleb Natapov, kvm, Jan Kiszka

On Thu, Jul 18, 2013 at 8:08 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 18/07/2013 13:06, Gleb Natapov ha scritto:
>> On Thu, Jul 18, 2013 at 12:47:46PM +0200, Paolo Bonzini wrote:
>>>>> and for a testsuite I'd prefer the latter---which means I'd still favor
>>>>> setjmp/longjmp.
>>>>>
>>>>> Now, here is the long explanation.
>>>>>
>>>>> I must admit that the code looks nice.  There are some nits I'd like to
>>>>> see done differently (such as putting vmx_return at the beginning of the
>>>>> while (1), and the vmresume asm at the end), but it is indeed nice.
>>>>
>>>> Why do you prefer setjmp/longjmp then?
>>>
>>> Because it is still deceiving, and I dislike the deceit more than I like
>>> the linear code flow.
>>>
>> What is deceiving about it? Of course for someone who has no idea how
>> vmx works the code will not be obvious, but why should we care. For
>> someone who knows what is deceiving about returning into the same
>> function guest was launched from by using VMX mechanism
>
> The way the code is written is deceiving.  If I see
>
>   asm("vmlaunch; seta %0")
>   while (ret)
>
> I expect HOST_RIP to point at the seta or somewhere near, not at a
> completely different label somewhere else.
Here for me, I prefer a separate asm blob of entry_vmx instead of one
"hidden" in a C function, which may make it much harder to trace for
someone not familiar with vmx in KVM.
>
>>> instead of longjmp()?
>
> Look again at the setjmp/longjmp version.  longjmp is not used to handle
> vmexit.  It is used to jump back from the vmexit handler to main, which
> is exactly what setjmp/longjmp is meant for.
>
>>> It is still somewhat magic: the "while (ret)" is only there to please
>>> the compiler
>>
>> No, it it there to catch vmlaunch/vmresume errors.
>
> You could change it to "while (0)" and it would still work.  That's what
> I mean by "only there to please the compiler".
>
>>> the compiler, and you rely on the compiler not changing %rsp between the
>>> vmlaunch and the vmx_return label.  Minor nit, you cannot anymore print
>> HOST_RSP should be loaded on each guest entry.
>
> Right, but my point is: without a big asm blob, you don't know the right
> value to load.  It depends on the generated code.  And the big asm blob
> limits a lot the "code looks nice" value of this approach.
>
>>> different error messages for vmlaunch vs. vmresume failure.
>> Just because the same variable is used to store error values :)
>> Make vmlaunch use err1 and vmresume err2 and do "while (!err1 & !err2)"
>
> Yeah. :)
>
>>> In the end the choice is between "all in asm" and "small asm trampoline"
>>> (which also happens to use setjmp/longjmp, but perhaps Arthur can
>>> propagate return codes without using setjmp/longjmp, too).
>>>
>>>> Rewriting the whole guest entry exit path in asm like you suggest bellow
>>>> will eliminate the magic too.
>>>
>>>> I much prefer one big asm statement than many small asm statement
>>>> scattered among two or three C lines.
>>>
>>> It's not many asm statements, it's just a dozen lines:
>>>
>> I am not talking about overall file, but the how vmx_run() is written:
>> asm()
>> C code
>> asm()
>> C code
>> ...
>>
>> I much prefer:
>> C code
>>
>> big asm() blob
>>
>> C code.
>
> Me too.  But the trampoline version is neither, it is
>
> static inline void vmlaunch() { asm("vmlaunch") }
> static inline void vmresume() { asm("vmresume") }
> small asm() blob
> C code
So this is a style problem on the basis of right code generation
indeed. When I write codes of this version, it occurs that the
compiler optimized some of my codes and something goes wrong. If we
use C style, we need setjmp/longjmp, otherwise we need big asm blob to
forbid compiler optimization.

I prefer to Paolo indeed as to my writing the two versions. Writing
asm in C is sometimes uncomfortable (e.g. %rax and %%rax, and
considering the C codes before and after the asm blob). Actually, we
can hide setjmp in vmx_on and longjmp in the asm codes after executing
exit_handler, thus we just need to call vmx_on to enter VM and return
a designed value (e.g. -1) in exit_handler to exit VM. Then any
following codes don't need to care about setjmp/longjmp.

Arthur
>
> Paolo
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-18 12:08         ` Paolo Bonzini
  2013-07-18 14:11           ` Arthur Chunqi Li
@ 2013-07-18 19:57           ` Gleb Natapov
  2013-07-19  6:42             ` Paolo Bonzini
  1 sibling, 1 reply; 24+ messages in thread
From: Gleb Natapov @ 2013-07-18 19:57 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Arthur Chunqi Li, kvm, jan.kiszka

On Thu, Jul 18, 2013 at 02:08:51PM +0200, Paolo Bonzini wrote:
> Il 18/07/2013 13:06, Gleb Natapov ha scritto:
> > On Thu, Jul 18, 2013 at 12:47:46PM +0200, Paolo Bonzini wrote:
> >>>> and for a testsuite I'd prefer the latter---which means I'd still favor
> >>>> setjmp/longjmp.
> >>>>
> >>>> Now, here is the long explanation.
> >>>>
> >>>> I must admit that the code looks nice.  There are some nits I'd like to
> >>>> see done differently (such as putting vmx_return at the beginning of the
> >>>> while (1), and the vmresume asm at the end), but it is indeed nice.
> >>>
> >>> Why do you prefer setjmp/longjmp then?
> >>
> >> Because it is still deceiving, and I dislike the deceit more than I like
> >> the linear code flow.
> >>
> > What is deceiving about it? Of course for someone who has no idea how
> > vmx works the code will not be obvious, but why should we care. For
> > someone who knows what is deceiving about returning into the same
> > function guest was launched from by using VMX mechanism
> 
> The way the code is written is deceiving.  If I see
> 
>   asm("vmlaunch; seta %0")
>   while (ret)
> 
> I expect HOST_RIP to point at the seta or somewhere near, not at a
> completely different label somewhere else.
> 
Why would you expect that assuming you know what vmlaunch is? So it is OK
when HOST_RIP point to somewhere outside a function, but "deceiving" if
it point 3 lines down in the same function? I have a hard time following
this logic.

> >> instead of longjmp()?
> 
> Look again at the setjmp/longjmp version.  longjmp is not used to handle
> vmexit.  It is used to jump back from the vmexit handler to main, which
> is exactly what setjmp/longjmp is meant for.
> 
That's because simple return will not work in that version, this is
artifact of how vmexit was done.

> >> It is still somewhat magic: the "while (ret)" is only there to please
> >> the compiler
> >
> > No, it it there to catch vmlaunch/vmresume errors.
> 
> You could change it to "while (0)" and it would still work.  That's what
> I mean by "only there to please the compiler".
But while (1) will not, so the code is executed, it is not just for
compiler there, but it is executed only if vmlaunch/vmresume fails.

> 
> >> the compiler, and you rely on the compiler not changing %rsp between the
> >> vmlaunch and the vmx_return label.  Minor nit, you cannot anymore print
> > HOST_RSP should be loaded on each guest entry.
> 
> Right, but my point is: without a big asm blob, you don't know the right
> value to load.  It depends on the generated code.  And the big asm blob
> limits a lot the "code looks nice" value of this approach.
> 
I said it number of times already, this is not about "code looking nice",
"code looks like KVM" or use less assembler as possible", this is about
linear data flow. It is not fun chasing code execution path. Yes, you
can argue that vmlaunch/vmresume inherently non linear, but there is a
difference between skipping one instruction and remain in the same
function and on the same stack, or jump completely to a different
context.

Speaking about KVM. Guest enter/exit assembly blob is around ~50 lines
if assembly code and more then half of that is saving restoring context.
And the code plays some tricks there for optimization that we do not
need to do here, so I expect the code to be even smaller, not much more
then 10 lines of assembly excluding state save/restore.

> >> different error messages for vmlaunch vs. vmresume failure.
> > Just because the same variable is used to store error values :)
> > Make vmlaunch use err1 and vmresume err2 and do "while (!err1 & !err2)"
> 
> Yeah. :)
> 
> >> In the end the choice is between "all in asm" and "small asm trampoline"
> >> (which also happens to use setjmp/longjmp, but perhaps Arthur can
> >> propagate return codes without using setjmp/longjmp, too).
> >>
> >>> Rewriting the whole guest entry exit path in asm like you suggest bellow
> >>> will eliminate the magic too.
> >>
> >>> I much prefer one big asm statement than many small asm statement
> >>> scattered among two or three C lines.
> >>
> >> It's not many asm statements, it's just a dozen lines:
> >>
> > I am not talking about overall file, but the how vmx_run() is written:
> > asm()
> > C code
> > asm()
> > C code
> > ...
> > 
> > I much prefer:
> > C code
> > 
> > big asm() blob
> > 
> > C code.
> 
> Me too.  But the trampoline version is neither, it is
> 
> static inline void vmlaunch() { asm("vmlaunch") }
> static inline void vmresume() { asm("vmresume") }
> small asm() blob
I is small only because context save restore is hidden behind
macro and 4 asm instructions (vmlaunch/seta/vmresume/seta) a hidden
somewhere else. The actually differences in asm instruction between both
version will not be bigger then a couple of lines (if we will not take
setjmp/longjmp implementation into account :)), but I do not even see
why we discuss this argument since minimizing asm instructions here is
not an point. We should not use more then needed to achieve the goal of
course, but design should not be around number of assembly lines.

> C code
> 
> Paolo

--
			Gleb.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-18 19:57           ` Gleb Natapov
@ 2013-07-19  6:42             ` Paolo Bonzini
  2013-07-19  9:40               ` Gleb Natapov
  0 siblings, 1 reply; 24+ messages in thread
From: Paolo Bonzini @ 2013-07-19  6:42 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Arthur Chunqi Li, kvm, jan.kiszka

Il 18/07/2013 21:57, Gleb Natapov ha scritto:
> On Thu, Jul 18, 2013 at 02:08:51PM +0200, Paolo Bonzini wrote:
>> Il 18/07/2013 13:06, Gleb Natapov ha scritto:
>>> On Thu, Jul 18, 2013 at 12:47:46PM +0200, Paolo Bonzini wrote:
>>>>>> and for a testsuite I'd prefer the latter---which means I'd still favor
>>>>>> setjmp/longjmp.
>>>>>>
>>>>>> Now, here is the long explanation.
>>>>>>
>>>>>> I must admit that the code looks nice.  There are some nits I'd like to
>>>>>> see done differently (such as putting vmx_return at the beginning of the
>>>>>> while (1), and the vmresume asm at the end), but it is indeed nice.
>>>>>
>>>>> Why do you prefer setjmp/longjmp then?
>>>>
>>>> Because it is still deceiving, and I dislike the deceit more than I like
>>>> the linear code flow.
>>>>
>>> What is deceiving about it? Of course for someone who has no idea how
>>> vmx works the code will not be obvious, but why should we care. For
>>> someone who knows what is deceiving about returning into the same
>>> function guest was launched from by using VMX mechanism
>>
>> The way the code is written is deceiving.  If I see
>>
>>   asm("vmlaunch; seta %0")
>>   while (ret)
>>
>> I expect HOST_RIP to point at the seta or somewhere near, not at a
>> completely different label somewhere else.
>>
> Why would you expect that assuming you know what vmlaunch is?

Because this is written in C, and I know trying to fool the compiler is
a losing game.  So my reaction is "okay, HOST_RIP must be set so that
code will not jump around".  If I see

   asm("vmlaunch")
   exit(-1)

the reaction is the opposite: "hmm, anything that jumps around would
have a hard time with the compiler, there must be some assembly
trampoline somewhere; let's check what HOST_RIP is".

>>>> instead of longjmp()?
>>
>> Look again at the setjmp/longjmp version.  longjmp is not used to handle
>> vmexit.  It is used to jump back from the vmexit handler to main, which
>> is exactly what setjmp/longjmp is meant for.
>>
> That's because simple return will not work in that version, this is
> artifact of how vmexit was done.

I think it can be made to work without setjmp/longjmp, but the code
would be ugly.

>>>> the compiler, and you rely on the compiler not changing %rsp between the
>>>> vmlaunch and the vmx_return label.  Minor nit, you cannot anymore print
>>> HOST_RSP should be loaded on each guest entry.
>>
>> Right, but my point is: without a big asm blob, you don't know the right
>> value to load.  It depends on the generated code.  And the big asm blob
>> limits a lot the "code looks nice" value of this approach.
>>
> I said it number of times already, this is not about "code looking nice",
> "code looks like KVM" or use less assembler as possible", this is about
> linear data flow. It is not fun chasing code execution path. Yes, you
> can argue that vmlaunch/vmresume inherently non linear, but there is a
> difference between skipping one instruction and remain in the same
> function and on the same stack, or jump completely to a different
> context.

I don't see anything bad in jumping completely to a different context.
The guest and host are sort of like two coroutines, they hardly have any
connection with the code that called vmlaunch.

> The actually differences in asm instruction between both
> version will not be bigger then a couple of lines (if we will not take
> setjmp/longjmp implementation into account :)),

I was waiting for this parenthetical remark to appear. ;)

> but I do not even see
> why we discuss this argument since minimizing asm instructions here is
> not an point. We should not use more then needed to achieve the goal of
> course, but design should not be around number of assembly lines.

I agree, I only mentioned it because you talked about

   asm
   C
   asm
   C

and this is not what the setjmp/longjmp code looks like---using inlines
for asm as if they were builtin functions is good, interspersing asm and
C in the same function is bad.

Paolo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-19  6:42             ` Paolo Bonzini
@ 2013-07-19  9:40               ` Gleb Natapov
  2013-07-19 12:06                 ` Paolo Bonzini
  0 siblings, 1 reply; 24+ messages in thread
From: Gleb Natapov @ 2013-07-19  9:40 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Arthur Chunqi Li, kvm, jan.kiszka

On Fri, Jul 19, 2013 at 08:42:20AM +0200, Paolo Bonzini wrote:
> Il 18/07/2013 21:57, Gleb Natapov ha scritto:
> > On Thu, Jul 18, 2013 at 02:08:51PM +0200, Paolo Bonzini wrote:
> >> Il 18/07/2013 13:06, Gleb Natapov ha scritto:
> >>> On Thu, Jul 18, 2013 at 12:47:46PM +0200, Paolo Bonzini wrote:
> >>>>>> and for a testsuite I'd prefer the latter---which means I'd still favor
> >>>>>> setjmp/longjmp.
> >>>>>>
> >>>>>> Now, here is the long explanation.
> >>>>>>
> >>>>>> I must admit that the code looks nice.  There are some nits I'd like to
> >>>>>> see done differently (such as putting vmx_return at the beginning of the
> >>>>>> while (1), and the vmresume asm at the end), but it is indeed nice.
> >>>>>
> >>>>> Why do you prefer setjmp/longjmp then?
> >>>>
> >>>> Because it is still deceiving, and I dislike the deceit more than I like
> >>>> the linear code flow.
> >>>>
> >>> What is deceiving about it? Of course for someone who has no idea how
> >>> vmx works the code will not be obvious, but why should we care. For
> >>> someone who knows what is deceiving about returning into the same
> >>> function guest was launched from by using VMX mechanism
> >>
> >> The way the code is written is deceiving.  If I see
> >>
> >>   asm("vmlaunch; seta %0")
> >>   while (ret)
> >>
> >> I expect HOST_RIP to point at the seta or somewhere near, not at a
> >> completely different label somewhere else.
> >>
> > Why would you expect that assuming you know what vmlaunch is?
> 
> Because this is written in C, and I know trying to fool the compiler is
> a losing game.  So my reaction is "okay, HOST_RIP must be set so that
> code will not jump around".  If I see
> 
>    asm("vmlaunch")
>    exit(-1)
> 
> the reaction is the opposite: "hmm, anything that jumps around would
> have a hard time with the compiler, there must be some assembly
> trampoline somewhere; let's check what HOST_RIP is".
> 
We do try to fool compiler often even without vmx: code patching. This is
why asm goto was invented, no? So, like you said in previous emails,
asm goto is appropriate way here to tell compiler what is going on.

> >>>> instead of longjmp()?
> >>
> >> Look again at the setjmp/longjmp version.  longjmp is not used to handle
> >> vmexit.  It is used to jump back from the vmexit handler to main, which
> >> is exactly what setjmp/longjmp is meant for.
> >>
> > That's because simple return will not work in that version, this is
> > artifact of how vmexit was done.
> 
> I think it can be made to work without setjmp/longjmp, but the code
> would be ugly.
> 
> >>>> the compiler, and you rely on the compiler not changing %rsp between the
> >>>> vmlaunch and the vmx_return label.  Minor nit, you cannot anymore print
> >>> HOST_RSP should be loaded on each guest entry.
> >>
> >> Right, but my point is: without a big asm blob, you don't know the right
> >> value to load.  It depends on the generated code.  And the big asm blob
> >> limits a lot the "code looks nice" value of this approach.
> >>
> > I said it number of times already, this is not about "code looking nice",
> > "code looks like KVM" or use less assembler as possible", this is about
> > linear data flow. It is not fun chasing code execution path. Yes, you
> > can argue that vmlaunch/vmresume inherently non linear, but there is a
> > difference between skipping one instruction and remain in the same
> > function and on the same stack, or jump completely to a different
> > context.
> 
> I don't see anything bad in jumping completely to a different context.
> The guest and host are sort of like two coroutines, they hardly have any
> connection with the code that called vmlaunch.
You can see it as two coroutines, or you can see it as linear logic:
  do host stuff
  enter guest
  do guest stuff
  exit guest
  continue doing host stuff

Both can be implemented, I prefer linear one. I would prefer linear one
to coroutine in any code design, no connection to vmx. Sometimes
coroutine are better than alternative (and in those cases alternative is
never a linear logic), but this is not the case.


> 
> > The actually differences in asm instruction between both
> > version will not be bigger then a couple of lines (if we will not take
> > setjmp/longjmp implementation into account :)),
> 
> I was waiting for this parenthetical remark to appear. ;)
> 
I've put a smile there :) I realize this argument is not completely fair,
but for the sake of argument everything goes!

--
			Gleb.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-19  9:40               ` Gleb Natapov
@ 2013-07-19 12:06                 ` Paolo Bonzini
  2013-07-24  6:11                   ` Arthur Chunqi Li
  0 siblings, 1 reply; 24+ messages in thread
From: Paolo Bonzini @ 2013-07-19 12:06 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Arthur Chunqi Li, kvm, jan.kiszka

Il 19/07/2013 11:40, Gleb Natapov ha scritto:
>> Because this is written in C, and I know trying to fool the compiler is
>> a losing game.  So my reaction is "okay, HOST_RIP must be set so that
>> code will not jump around".  If I see
>>
>>    asm("vmlaunch")
>>    exit(-1)
>>
>> the reaction is the opposite: "hmm, anything that jumps around would
>> have a hard time with the compiler, there must be some assembly
>> trampoline somewhere; let's check what HOST_RIP is".
>>
> We do try to fool compiler often even without vmx: code patching. This is
> why asm goto was invented, no? So, like you said in previous emails,
> asm goto is appropriate way here to tell compiler what is going on.

Code patching is "only" reimplementing an existing C structure (if/else)
in a different manner.  Here the actual code flow (location of HOST_RIP
and value of HOST_RSP) cannot be expressed correctly to the compiler
because we do not use the C label (we use an asm label).

I don't think asm goto can be made to work for vmx_return, though if we
go for a big blob it could be useful to jump to the error handling code.

>> I don't see anything bad in jumping completely to a different context.
>> The guest and host are sort of like two coroutines, they hardly have any
>> connection with the code that called vmlaunch.
> You can see it as two coroutines, or you can see it as linear logic:
>   do host stuff
>   enter guest
>   do guest stuff
>   exit guest
>   continue doing host stuff
> 
> Both can be implemented, I prefer linear one. I would prefer linear one
> to coroutine in any code design, no connection to vmx. Sometimes
> coroutine are better than alternative (and in those cases alternative is
> never a linear logic), but this is not the case.

Fair enough.

As things stand, we have only one version that works reliably with
past/present/future compilers, and that is the one with setjmp/longjmp.
 A v5 would be needed anyway.  If Arthur (and Jan) want to write the
vmentry as a big asm blob, that's fine by me.  Still, some variety adds
value in a testsuite: think of a hypothetic nested VMX implementation
that ignores HOST_RIP and just falls through to the next host %rip, we
want that to fail the tests! (*)

    (*) In fact, I think this must be a requirement even if Arthur
        goes for a big asm blob.

If they prefer to keep setjmp/longjmp and fix the few remaining nits, I
think that should be acceptable anyway.  It would even make sense to
have multiple vmentries if you can show they stress the hypervisor
differently.

In any case, I think we all agree (Arthur too) that this RFC doing mixed
asm and C is the worst of both worlds.

>>> The actually differences in asm instruction between both
>>> version will not be bigger then a couple of lines (if we will not take
>>> setjmp/longjmp implementation into account :)),
>>
>> I was waiting for this parenthetical remark to appear. ;)
>>
> I've put a smile there :) I realize this argument is not completely fair,
> but for the sake of argument everything goes!

Yes, I caught the irony. ;)

Paolo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-19 12:06                 ` Paolo Bonzini
@ 2013-07-24  6:11                   ` Arthur Chunqi Li
  2013-07-24  6:40                     ` Paolo Bonzini
  0 siblings, 1 reply; 24+ messages in thread
From: Arthur Chunqi Li @ 2013-07-24  6:11 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Gleb Natapov, kvm, Jan Kiszka

Hi Gleb and Paolo,
What about organizing vmx_run() as follows:

static int vmx_run()
{
    u32 eax;
    bool ret;

    vmcs_write(HOST_RSP, get_rsp());
    ret = vmlaunch();
    while (!ret) {
        asm volatile(
            "vmx_return:\n\t"
            SAVE_GPR
        );
        eax = exit_handler();
        switch (eax) {
        case VMX_TEST_VMEXIT:
            return 0;
        case VMX_TEST_RESUME:
            break;
        default:
            printf("%s : unhandled ret from exit_handler.\n", __func__);
            return 1;
        }
        ret = vmresume();
    }
    printf("%s : vmenter failed.\n", __func__);
    return 1;
}

Arthur

On Fri, Jul 19, 2013 at 8:06 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 19/07/2013 11:40, Gleb Natapov ha scritto:
>>> Because this is written in C, and I know trying to fool the compiler is
>>> a losing game.  So my reaction is "okay, HOST_RIP must be set so that
>>> code will not jump around".  If I see
>>>
>>>    asm("vmlaunch")
>>>    exit(-1)
>>>
>>> the reaction is the opposite: "hmm, anything that jumps around would
>>> have a hard time with the compiler, there must be some assembly
>>> trampoline somewhere; let's check what HOST_RIP is".
>>>
>> We do try to fool compiler often even without vmx: code patching. This is
>> why asm goto was invented, no? So, like you said in previous emails,
>> asm goto is appropriate way here to tell compiler what is going on.
>
> Code patching is "only" reimplementing an existing C structure (if/else)
> in a different manner.  Here the actual code flow (location of HOST_RIP
> and value of HOST_RSP) cannot be expressed correctly to the compiler
> because we do not use the C label (we use an asm label).
>
> I don't think asm goto can be made to work for vmx_return, though if we
> go for a big blob it could be useful to jump to the error handling code.
>
>>> I don't see anything bad in jumping completely to a different context.
>>> The guest and host are sort of like two coroutines, they hardly have any
>>> connection with the code that called vmlaunch.
>> You can see it as two coroutines, or you can see it as linear logic:
>>   do host stuff
>>   enter guest
>>   do guest stuff
>>   exit guest
>>   continue doing host stuff
>>
>> Both can be implemented, I prefer linear one. I would prefer linear one
>> to coroutine in any code design, no connection to vmx. Sometimes
>> coroutine are better than alternative (and in those cases alternative is
>> never a linear logic), but this is not the case.
>
> Fair enough.
>
> As things stand, we have only one version that works reliably with
> past/present/future compilers, and that is the one with setjmp/longjmp.
>  A v5 would be needed anyway.  If Arthur (and Jan) want to write the
> vmentry as a big asm blob, that's fine by me.  Still, some variety adds
> value in a testsuite: think of a hypothetic nested VMX implementation
> that ignores HOST_RIP and just falls through to the next host %rip, we
> want that to fail the tests! (*)
>
>     (*) In fact, I think this must be a requirement even if Arthur
>         goes for a big asm blob.
>
> If they prefer to keep setjmp/longjmp and fix the few remaining nits, I
> think that should be acceptable anyway.  It would even make sense to
> have multiple vmentries if you can show they stress the hypervisor
> differently.
>
> In any case, I think we all agree (Arthur too) that this RFC doing mixed
> asm and C is the worst of both worlds.
>
>>>> The actually differences in asm instruction between both
>>>> version will not be bigger then a couple of lines (if we will not take
>>>> setjmp/longjmp implementation into account :)),
>>>
>>> I was waiting for this parenthetical remark to appear. ;)
>>>
>> I've put a smile there :) I realize this argument is not completely fair,
>> but for the sake of argument everything goes!
>
> Yes, I caught the irony. ;)
>
> Paolo



-- 
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-24  6:11                   ` Arthur Chunqi Li
@ 2013-07-24  6:40                     ` Paolo Bonzini
  2013-07-24  6:46                       ` Arthur Chunqi Li
  0 siblings, 1 reply; 24+ messages in thread
From: Paolo Bonzini @ 2013-07-24  6:40 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: Gleb Natapov, kvm, Jan Kiszka

Il 24/07/2013 08:11, Arthur Chunqi Li ha scritto:
> 
> static int vmx_run()
> {
>     u32 eax;
>     bool ret;
> 
>     vmcs_write(HOST_RSP, get_rsp());
>     ret = vmlaunch();

The compiler can still change rsp between here...

>     while (!ret) {
>         asm volatile(
>             "vmx_return:\n\t"

... and here.

If you want to write it in C, the only thing that can be after
vmlaunch/vmresume is "exit()".  Else it has to be asm.

Paolo

>             SAVE_GPR
>         );
>         eax = exit_handler();
>         switch (eax) {
>         case VMX_TEST_VMEXIT:
>             return 0;
>         case VMX_TEST_RESUME:
>             break;
>         default:
>             printf("%s : unhandled ret from exit_handler.\n", __func__);
>             return 1;
>         }
>         ret = vmresume();
>     }
>     printf("%s : vmenter failed.\n", __func__);
>     return 1;
> }


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-24  6:40                     ` Paolo Bonzini
@ 2013-07-24  6:46                       ` Arthur Chunqi Li
  2013-07-24  6:48                         ` Paolo Bonzini
  0 siblings, 1 reply; 24+ messages in thread
From: Arthur Chunqi Li @ 2013-07-24  6:46 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Gleb Natapov, kvm, Jan Kiszka

On Wed, Jul 24, 2013 at 2:40 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 24/07/2013 08:11, Arthur Chunqi Li ha scritto:
>>
>> static int vmx_run()
>> {
>>     u32 eax;
>>     bool ret;
>>
>>     vmcs_write(HOST_RSP, get_rsp());
>>     ret = vmlaunch();
>
> The compiler can still change rsp between here...
>
>>     while (!ret) {
>>         asm volatile(
>>             "vmx_return:\n\t"
>
> ... and here.
>
> If you want to write it in C, the only thing that can be after
> vmlaunch/vmresume is "exit()".  Else it has to be asm.
Actually, you mean we need to write all the codes in asm to avoid
changing to rsp, right?

Arthur
>
> Paolo
>
>>             SAVE_GPR
>>         );
>>         eax = exit_handler();
>>         switch (eax) {
>>         case VMX_TEST_VMEXIT:
>>             return 0;
>>         case VMX_TEST_RESUME:
>>             break;
>>         default:
>>             printf("%s : unhandled ret from exit_handler.\n", __func__);
>>             return 1;
>>         }
>>         ret = vmresume();
>>     }
>>     printf("%s : vmenter failed.\n", __func__);
>>     return 1;
>> }
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-24  6:46                       ` Arthur Chunqi Li
@ 2013-07-24  6:48                         ` Paolo Bonzini
  2013-07-24  8:48                           ` Arthur Chunqi Li
  0 siblings, 1 reply; 24+ messages in thread
From: Paolo Bonzini @ 2013-07-24  6:48 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: Gleb Natapov, kvm, Jan Kiszka

Il 24/07/2013 08:46, Arthur Chunqi Li ha scritto:
> On Wed, Jul 24, 2013 at 2:40 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> Il 24/07/2013 08:11, Arthur Chunqi Li ha scritto:
>>>
>>> static int vmx_run()
>>> {
>>>     u32 eax;
>>>     bool ret;
>>>
>>>     vmcs_write(HOST_RSP, get_rsp());
>>>     ret = vmlaunch();
>>
>> The compiler can still change rsp between here...
>>
>>>     while (!ret) {
>>>         asm volatile(
>>>             "vmx_return:\n\t"
>>
>> ... and here.
>>
>> If you want to write it in C, the only thing that can be after
>> vmlaunch/vmresume is "exit()".  Else it has to be asm.
> Actually, you mean we need to write all the codes in asm to avoid
> changing to rsp, right?

Not necessarily all the code.  It is also ok to use setjmp/longjmp with
a small asm trampoline, because this method won't care about the exact
rsp values that are used.  But if you want to do as Gleb said, and put
vmx_return just after vmlaunch, it has to be all asm as in KVM's
arch/x86/kvm/vmx.c.

Paolo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-24  6:48                         ` Paolo Bonzini
@ 2013-07-24  8:48                           ` Arthur Chunqi Li
  2013-07-24  8:53                             ` Jan Kiszka
  2013-07-24  9:16                             ` Paolo Bonzini
  0 siblings, 2 replies; 24+ messages in thread
From: Arthur Chunqi Li @ 2013-07-24  8:48 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Gleb Natapov, kvm, Jan Kiszka

So as what Gleb said, what about the following codes:

static int vmx_run2()
{
    u32 eax;
    bool ret;

    asm volatile(
        "mov %%rsp, %%rsi\n\t"
        "mov %2, %%edi\n\t"
        "call vmcs_write\n\t"
        "vmlaunch\n\t"
        "setbe %0\n\t"
        "jne 4f\n\t"

        "vmx_return:\n\t"
        SAVE_GPR_C
        "call exit_handler\n\t"
        "cmp %3, %%eax\n\t"
        "je 2f\n\t"
        "cmp %4, %%eax\n\t"
        "je 1f\n\t"
        "jmp 3f\n\t"

        /* VMX_TEST_RESUME */
        "1:\n\t"
        LOAD_GPR_C
        "vmresume\n\t"
        "setbe %0\n\t"
        "jne 4f\n\t"
        /* VMX_TEST_VMEXIT */
        "2:\n\t"
        "mov $0, %1\n\t"
        "jmp 5f\n\t"
        /* undefined ret from exit_handler */
        "3:\n\t"
        "mov $2, %1\n\t"
        "jmp 5f\n\t"
        /* vmlaunch/vmresume failed, exit */
        "4:\n\t"
        "mov $1, %1\n\t"
        "5:\n\t"
        : "=r"(ret), "=r"(eax)
        : "i"(HOST_RSP), "i"(VMX_TEST_VMEXIT),
          "i"(VMX_TEST_RESUME)
        : "rax", "rbx", "rdi", "rsi",
           "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
          "memory", "cc"
    );
    switch (eax) {
    case 0:
        return 0;
    case 1:
        printf("%s : vmenter failed.\n", __func__);
        break;
    default:
        printf("%s : unhandled ret from exit_handler.\n", __func__);
        break;
    }
    return 1;
}

On Wed, Jul 24, 2013 at 2:48 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 24/07/2013 08:46, Arthur Chunqi Li ha scritto:
>> On Wed, Jul 24, 2013 at 2:40 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>> Il 24/07/2013 08:11, Arthur Chunqi Li ha scritto:
>>>>
>>>> static int vmx_run()
>>>> {
>>>>     u32 eax;
>>>>     bool ret;
>>>>
>>>>     vmcs_write(HOST_RSP, get_rsp());
>>>>     ret = vmlaunch();
>>>
>>> The compiler can still change rsp between here...
>>>
>>>>     while (!ret) {
>>>>         asm volatile(
>>>>             "vmx_return:\n\t"
>>>
>>> ... and here.
>>>
>>> If you want to write it in C, the only thing that can be after
>>> vmlaunch/vmresume is "exit()".  Else it has to be asm.
>> Actually, you mean we need to write all the codes in asm to avoid
>> changing to rsp, right?
>
> Not necessarily all the code.  It is also ok to use setjmp/longjmp with
> a small asm trampoline, because this method won't care about the exact
> rsp values that are used.  But if you want to do as Gleb said, and put
> vmx_return just after vmlaunch, it has to be all asm as in KVM's
> arch/x86/kvm/vmx.c.
>
> Paolo



-- 
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-24  8:48                           ` Arthur Chunqi Li
@ 2013-07-24  8:53                             ` Jan Kiszka
  2013-07-24  9:16                             ` Paolo Bonzini
  1 sibling, 0 replies; 24+ messages in thread
From: Jan Kiszka @ 2013-07-24  8:53 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: Paolo Bonzini, Gleb Natapov, kvm

[-- Attachment #1: Type: text/plain, Size: 3037 bytes --]

On 2013-07-24 10:48, Arthur Chunqi Li wrote:
> So as what Gleb said, what about the following codes:
> 
> static int vmx_run2()
> {
>     u32 eax;
>     bool ret;
> 
>     asm volatile(
>         "mov %%rsp, %%rsi\n\t"
>         "mov %2, %%edi\n\t"
>         "call vmcs_write\n\t"
>         "vmlaunch\n\t"

Just like in KVM, provide a flag to the asm block that selects vmlaunch
or vmresume, then grab all the required information on return and leave
the asm block quickly again.

Jan

>         "setbe %0\n\t"
>         "jne 4f\n\t"
> 
>         "vmx_return:\n\t"
>         SAVE_GPR_C
>         "call exit_handler\n\t"
>         "cmp %3, %%eax\n\t"
>         "je 2f\n\t"
>         "cmp %4, %%eax\n\t"
>         "je 1f\n\t"
>         "jmp 3f\n\t"
> 
>         /* VMX_TEST_RESUME */
>         "1:\n\t"
>         LOAD_GPR_C
>         "vmresume\n\t"
>         "setbe %0\n\t"
>         "jne 4f\n\t"
>         /* VMX_TEST_VMEXIT */
>         "2:\n\t"
>         "mov $0, %1\n\t"
>         "jmp 5f\n\t"
>         /* undefined ret from exit_handler */
>         "3:\n\t"
>         "mov $2, %1\n\t"
>         "jmp 5f\n\t"
>         /* vmlaunch/vmresume failed, exit */
>         "4:\n\t"
>         "mov $1, %1\n\t"
>         "5:\n\t"
>         : "=r"(ret), "=r"(eax)
>         : "i"(HOST_RSP), "i"(VMX_TEST_VMEXIT),
>           "i"(VMX_TEST_RESUME)
>         : "rax", "rbx", "rdi", "rsi",
>            "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
>           "memory", "cc"
>     );
>     switch (eax) {
>     case 0:
>         return 0;
>     case 1:
>         printf("%s : vmenter failed.\n", __func__);
>         break;
>     default:
>         printf("%s : unhandled ret from exit_handler.\n", __func__);
>         break;
>     }
>     return 1;
> }
> 
> On Wed, Jul 24, 2013 at 2:48 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> Il 24/07/2013 08:46, Arthur Chunqi Li ha scritto:
>>> On Wed, Jul 24, 2013 at 2:40 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>>> Il 24/07/2013 08:11, Arthur Chunqi Li ha scritto:
>>>>>
>>>>> static int vmx_run()
>>>>> {
>>>>>     u32 eax;
>>>>>     bool ret;
>>>>>
>>>>>     vmcs_write(HOST_RSP, get_rsp());
>>>>>     ret = vmlaunch();
>>>>
>>>> The compiler can still change rsp between here...
>>>>
>>>>>     while (!ret) {
>>>>>         asm volatile(
>>>>>             "vmx_return:\n\t"
>>>>
>>>> ... and here.
>>>>
>>>> If you want to write it in C, the only thing that can be after
>>>> vmlaunch/vmresume is "exit()".  Else it has to be asm.
>>> Actually, you mean we need to write all the codes in asm to avoid
>>> changing to rsp, right?
>>
>> Not necessarily all the code.  It is also ok to use setjmp/longjmp with
>> a small asm trampoline, because this method won't care about the exact
>> rsp values that are used.  But if you want to do as Gleb said, and put
>> vmx_return just after vmlaunch, it has to be all asm as in KVM's
>> arch/x86/kvm/vmx.c.
>>
>> Paolo
> 
> 
> 



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 263 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-24  8:48                           ` Arthur Chunqi Li
  2013-07-24  8:53                             ` Jan Kiszka
@ 2013-07-24  9:16                             ` Paolo Bonzini
  2013-07-24  9:56                               ` Arthur Chunqi Li
  1 sibling, 1 reply; 24+ messages in thread
From: Paolo Bonzini @ 2013-07-24  9:16 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: Gleb Natapov, kvm, Jan Kiszka

Il 24/07/2013 10:48, Arthur Chunqi Li ha scritto:
> So as what Gleb said, what about the following codes:
> 
> static int vmx_run2()
> {
>     u32 eax;
>     bool ret;
> 
>     asm volatile(
>         "mov %%rsp, %%rsi\n\t"
>         "mov %2, %%edi\n\t"
>         "call vmcs_write\n\t"
>         "vmlaunch\n\t"
>         "setbe %0\n\t"
>         "jne 4f\n\t"

setbe doesn't set the flags, so you need jbe here (and then you can have
a "mov $1, %0" at label 4 instead of using setbe).

> 
>         "vmx_return:\n\t"
>         SAVE_GPR_C
>         "call exit_handler\n\t"
>         "cmp %3, %%eax\n\t"
>         "je 2f\n\t"
>         "cmp %4, %%eax\n\t"
>         "je 1f\n\t"
>         "jmp 3f\n\t"
> 
>         /* VMX_TEST_RESUME */
>         "1:\n\t"
>         LOAD_GPR_C
>         "vmresume\n\t"

You need a SAVE_GPR_C here.  Then it is better to put the jbe at the
vmx_return label:

    ... do vmlaunch or vmreturn as Jan said ...
 vmx_return:
    SAVE_GPR_C
    jbe 4f
    call exit_handler
    etc.

Here is the relevant code from KVM that Jan was referring to:

                "jne 1f \n\t"
                __ex(ASM_VMX_VMLAUNCH) "\n\t"
                "jmp 2f \n\t"
                "1: " __ex(ASM_VMX_VMRESUME) "\n\t"
                "2: "

I'd prefer to have a "jbe vmx_return; ud2" after the vmlaunch/vmresume,
in order to test that the hypervisor respects HOST_RIP.

Paolo

>         "setbe %0\n\t"
>         "jne 4f\n\t"
>         /* VMX_TEST_VMEXIT */
>         "2:\n\t"
>         "mov $0, %1\n\t"
>         "jmp 5f\n\t"
>         /* undefined ret from exit_handler */
>         "3:\n\t"
>         "mov $2, %1\n\t"
>         "jmp 5f\n\t"
>         /* vmlaunch/vmresume failed, exit */
>         "4:\n\t"
>         "mov $1, %1\n\t"
>         "5:\n\t"
>         : "=r"(ret), "=r"(eax)
>         : "i"(HOST_RSP), "i"(VMX_TEST_VMEXIT),
>           "i"(VMX_TEST_RESUME)
>         : "rax", "rbx", "rdi", "rsi",
>            "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
>           "memory", "cc"
>     );
>     switch (eax) {
>     case 0:
>         return 0;
>     case 1:
>         printf("%s : vmenter failed.\n", __func__);
>         break;
>     default:
>         printf("%s : unhandled ret from exit_handler.\n", __func__);
>         break;
>     }
>     return 1;
> }
> 
> On Wed, Jul 24, 2013 at 2:48 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> Il 24/07/2013 08:46, Arthur Chunqi Li ha scritto:
>>> On Wed, Jul 24, 2013 at 2:40 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>>> Il 24/07/2013 08:11, Arthur Chunqi Li ha scritto:
>>>>>
>>>>> static int vmx_run()
>>>>> {
>>>>>     u32 eax;
>>>>>     bool ret;
>>>>>
>>>>>     vmcs_write(HOST_RSP, get_rsp());
>>>>>     ret = vmlaunch();
>>>>
>>>> The compiler can still change rsp between here...
>>>>
>>>>>     while (!ret) {
>>>>>         asm volatile(
>>>>>             "vmx_return:\n\t"
>>>>
>>>> ... and here.
>>>>
>>>> If you want to write it in C, the only thing that can be after
>>>> vmlaunch/vmresume is "exit()".  Else it has to be asm.
>>> Actually, you mean we need to write all the codes in asm to avoid
>>> changing to rsp, right?
>>
>> Not necessarily all the code.  It is also ok to use setjmp/longjmp with
>> a small asm trampoline, because this method won't care about the exact
>> rsp values that are used.  But if you want to do as Gleb said, and put
>> vmx_return just after vmlaunch, it has to be all asm as in KVM's
>> arch/x86/kvm/vmx.c.
>>
>> Paolo
> 
> 
> 


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-24  9:16                             ` Paolo Bonzini
@ 2013-07-24  9:56                               ` Arthur Chunqi Li
  2013-07-24 10:03                                 ` Jan Kiszka
  0 siblings, 1 reply; 24+ messages in thread
From: Arthur Chunqi Li @ 2013-07-24  9:56 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Gleb Natapov, kvm, Jan Kiszka

So what about this one. I merged all the exit reason to "ret" and
remove the flag detection after vmlaunch/vmresume (because I think
this detection is useless). Currently we support only one guest, so
variant "launched" is located in vmx_run(). If we want to support
multiple guest, we could move it to some structures (e.g.
environment_ctxt). Now I just put it here.

static int vmx_run()
{
    u32 ret = 0;
    bool launched = 0;

    asm volatile(
        "mov %%rsp, %%rsi\n\t"
        "mov %2, %%edi\n\t"
        "call vmcs_write\n\t"

        "0: "
        LOAD_GPR_C
        "cmp $0, %1\n\t"
        "jne 1f\n\t"
        "vmlaunch\n\t"
        SAVE_GPR_C
        /* vmlaunch error, return VMX_TEST_LAUNCH_ERR */
        "mov %3, %0\n\t"
        "jmp 2f\n\t"
        "1: "
        "vmresume\n\t"
        SAVE_GPR_C
        /* vmresume error, return VMX_TEST_RESUME_ERR */
        "mov %4, %0\n\t"
        "jmp 2f\n\t"
        "vmx_return:\n\t"
        SAVE_GPR_C
        "call exit_handler\n\t"
        /* set launched = 1 */
        "mov $0x1, %1\n\t"
        "cmp %5, %%eax\n\t"
        "je 0b\n\t"
        "mov %%eax, %0\n\t"
        "2: "
        : "=r"(ret), "=r"(launched)
        : "i"(HOST_RSP), "i"(VMX_TEST_LAUNCH_ERR),
          "i"(VMX_TEST_RESUME_ERR), "i"(VMX_TEST_RESUME)
        : "rax", "rbx", "rdi", "rsi",
          "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
          "memory", "cc"
    );
    switch (ret) {
    case VMX_TEST_VMEXIT:
        return 0;
    case VMX_TEST_LAUNCH_ERR:
        printf("%s : vmlaunch failed.\n", __func__);
        break;
    case VMX_TEST_RESUME_ERR:
        printf("%s : vmresume failed.\n", __func__);
        break;
    default:
        printf("%s : unhandled ret from exit_handler, ret=%d.\n",
__func__, ret);
        break;
    }
    return 1;
}

On Wed, Jul 24, 2013 at 5:16 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 24/07/2013 10:48, Arthur Chunqi Li ha scritto:
>> So as what Gleb said, what about the following codes:
>>
>> static int vmx_run2()
>> {
>>     u32 eax;
>>     bool ret;
>>
>>     asm volatile(
>>         "mov %%rsp, %%rsi\n\t"
>>         "mov %2, %%edi\n\t"
>>         "call vmcs_write\n\t"
>>         "vmlaunch\n\t"
>>         "setbe %0\n\t"
>>         "jne 4f\n\t"
>
> setbe doesn't set the flags, so you need jbe here (and then you can have
> a "mov $1, %0" at label 4 instead of using setbe).
>
>>
>>         "vmx_return:\n\t"
>>         SAVE_GPR_C
>>         "call exit_handler\n\t"
>>         "cmp %3, %%eax\n\t"
>>         "je 2f\n\t"
>>         "cmp %4, %%eax\n\t"
>>         "je 1f\n\t"
>>         "jmp 3f\n\t"
>>
>>         /* VMX_TEST_RESUME */
>>         "1:\n\t"
>>         LOAD_GPR_C
>>         "vmresume\n\t"
>
> You need a SAVE_GPR_C here.  Then it is better to put the jbe at the
> vmx_return label:
>
>     ... do vmlaunch or vmreturn as Jan said ...
>  vmx_return:
>     SAVE_GPR_C
>     jbe 4f
>     call exit_handler
>     etc.
>
> Here is the relevant code from KVM that Jan was referring to:
>
>                 "jne 1f \n\t"
>                 __ex(ASM_VMX_VMLAUNCH) "\n\t"
>                 "jmp 2f \n\t"
>                 "1: " __ex(ASM_VMX_VMRESUME) "\n\t"
>                 "2: "
>
> I'd prefer to have a "jbe vmx_return; ud2" after the vmlaunch/vmresume,
> in order to test that the hypervisor respects HOST_RIP.
>
> Paolo
>
>>         "setbe %0\n\t"
>>         "jne 4f\n\t"
>>         /* VMX_TEST_VMEXIT */
>>         "2:\n\t"
>>         "mov $0, %1\n\t"
>>         "jmp 5f\n\t"
>>         /* undefined ret from exit_handler */
>>         "3:\n\t"
>>         "mov $2, %1\n\t"
>>         "jmp 5f\n\t"
>>         /* vmlaunch/vmresume failed, exit */
>>         "4:\n\t"
>>         "mov $1, %1\n\t"
>>         "5:\n\t"
>>         : "=r"(ret), "=r"(eax)
>>         : "i"(HOST_RSP), "i"(VMX_TEST_VMEXIT),
>>           "i"(VMX_TEST_RESUME)
>>         : "rax", "rbx", "rdi", "rsi",
>>            "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
>>           "memory", "cc"
>>     );
>>     switch (eax) {
>>     case 0:
>>         return 0;
>>     case 1:
>>         printf("%s : vmenter failed.\n", __func__);
>>         break;
>>     default:
>>         printf("%s : unhandled ret from exit_handler.\n", __func__);
>>         break;
>>     }
>>     return 1;
>> }
>>
>> On Wed, Jul 24, 2013 at 2:48 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>> Il 24/07/2013 08:46, Arthur Chunqi Li ha scritto:
>>>> On Wed, Jul 24, 2013 at 2:40 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>>>> Il 24/07/2013 08:11, Arthur Chunqi Li ha scritto:
>>>>>>
>>>>>> static int vmx_run()
>>>>>> {
>>>>>>     u32 eax;
>>>>>>     bool ret;
>>>>>>
>>>>>>     vmcs_write(HOST_RSP, get_rsp());
>>>>>>     ret = vmlaunch();
>>>>>
>>>>> The compiler can still change rsp between here...
>>>>>
>>>>>>     while (!ret) {
>>>>>>         asm volatile(
>>>>>>             "vmx_return:\n\t"
>>>>>
>>>>> ... and here.
>>>>>
>>>>> If you want to write it in C, the only thing that can be after
>>>>> vmlaunch/vmresume is "exit()".  Else it has to be asm.
>>>> Actually, you mean we need to write all the codes in asm to avoid
>>>> changing to rsp, right?
>>>
>>> Not necessarily all the code.  It is also ok to use setjmp/longjmp with
>>> a small asm trampoline, because this method won't care about the exact
>>> rsp values that are used.  But if you want to do as Gleb said, and put
>>> vmx_return just after vmlaunch, it has to be all asm as in KVM's
>>> arch/x86/kvm/vmx.c.
>>>
>>> Paolo
>>
>>
>>
>



-- 
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-24  9:56                               ` Arthur Chunqi Li
@ 2013-07-24 10:03                                 ` Jan Kiszka
  2013-07-24 10:16                                   ` Arthur Chunqi Li
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Kiszka @ 2013-07-24 10:03 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: Paolo Bonzini, Gleb Natapov, kvm

[-- Attachment #1: Type: text/plain, Size: 2168 bytes --]

On 2013-07-24 11:56, Arthur Chunqi Li wrote:
> So what about this one. I merged all the exit reason to "ret" and
> remove the flag detection after vmlaunch/vmresume (because I think
> this detection is useless). Currently we support only one guest, so
> variant "launched" is located in vmx_run(). If we want to support
> multiple guest, we could move it to some structures (e.g.
> environment_ctxt). Now I just put it here.
> 
> static int vmx_run()
> {
>     u32 ret = 0;
>     bool launched = 0;
> 
>     asm volatile(
>         "mov %%rsp, %%rsi\n\t"
>         "mov %2, %%edi\n\t"
>         "call vmcs_write\n\t"
> 
>         "0: "
>         LOAD_GPR_C
>         "cmp $0, %1\n\t"
>         "jne 1f\n\t"
>         "vmlaunch\n\t"
>         SAVE_GPR_C
>         /* vmlaunch error, return VMX_TEST_LAUNCH_ERR */
>         "mov %3, %0\n\t"
>         "jmp 2f\n\t"
>         "1: "
>         "vmresume\n\t"
>         SAVE_GPR_C
>         /* vmresume error, return VMX_TEST_RESUME_ERR */
>         "mov %4, %0\n\t"
>         "jmp 2f\n\t"

Where do you store the flags now? You may want to differentiate / test
if ZF of CF is set.

Jan

>         "vmx_return:\n\t"
>         SAVE_GPR_C
>         "call exit_handler\n\t"
>         /* set launched = 1 */
>         "mov $0x1, %1\n\t"
>         "cmp %5, %%eax\n\t"
>         "je 0b\n\t"
>         "mov %%eax, %0\n\t"
>         "2: "
>         : "=r"(ret), "=r"(launched)
>         : "i"(HOST_RSP), "i"(VMX_TEST_LAUNCH_ERR),
>           "i"(VMX_TEST_RESUME_ERR), "i"(VMX_TEST_RESUME)
>         : "rax", "rbx", "rdi", "rsi",
>           "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
>           "memory", "cc"
>     );
>     switch (ret) {
>     case VMX_TEST_VMEXIT:
>         return 0;
>     case VMX_TEST_LAUNCH_ERR:
>         printf("%s : vmlaunch failed.\n", __func__);
>         break;
>     case VMX_TEST_RESUME_ERR:
>         printf("%s : vmresume failed.\n", __func__);
>         break;
>     default:
>         printf("%s : unhandled ret from exit_handler, ret=%d.\n",
> __func__, ret);
>         break;
>     }
>     return 1;
> }
> 



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 263 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-24 10:03                                 ` Jan Kiszka
@ 2013-07-24 10:16                                   ` Arthur Chunqi Li
  2013-07-24 10:24                                     ` Jan Kiszka
  0 siblings, 1 reply; 24+ messages in thread
From: Arthur Chunqi Li @ 2013-07-24 10:16 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Paolo Bonzini, Gleb Natapov, kvm

On Wed, Jul 24, 2013 at 6:03 PM, Jan Kiszka <jan.kiszka@web.de> wrote:
> On 2013-07-24 11:56, Arthur Chunqi Li wrote:
>> So what about this one. I merged all the exit reason to "ret" and
>> remove the flag detection after vmlaunch/vmresume (because I think
>> this detection is useless). Currently we support only one guest, so
>> variant "launched" is located in vmx_run(). If we want to support
>> multiple guest, we could move it to some structures (e.g.
>> environment_ctxt). Now I just put it here.
>>
>> static int vmx_run()
>> {
>>     u32 ret = 0;
>>     bool launched = 0;
>>
>>     asm volatile(
>>         "mov %%rsp, %%rsi\n\t"
>>         "mov %2, %%edi\n\t"
>>         "call vmcs_write\n\t"
>>
>>         "0: "
>>         LOAD_GPR_C
>>         "cmp $0, %1\n\t"
>>         "jne 1f\n\t"
>>         "vmlaunch\n\t"
>>         SAVE_GPR_C
>>         /* vmlaunch error, return VMX_TEST_LAUNCH_ERR */
>>         "mov %3, %0\n\t"
>>         "jmp 2f\n\t"
>>         "1: "
>>         "vmresume\n\t"
>>         SAVE_GPR_C
>>         /* vmresume error, return VMX_TEST_RESUME_ERR */
>>         "mov %4, %0\n\t"
>>         "jmp 2f\n\t"
>
> Where do you store the flags now? You may want to differentiate / test
> if ZF of CF is set.
I store the flags as a global variant. You mean I need to detect ZF/CF
after vmlaunch/vmresume?

Arthur
>
> Jan
>
>>         "vmx_return:\n\t"
>>         SAVE_GPR_C
>>         "call exit_handler\n\t"
>>         /* set launched = 1 */
>>         "mov $0x1, %1\n\t"
>>         "cmp %5, %%eax\n\t"
>>         "je 0b\n\t"
>>         "mov %%eax, %0\n\t"
>>         "2: "
>>         : "=r"(ret), "=r"(launched)
>>         : "i"(HOST_RSP), "i"(VMX_TEST_LAUNCH_ERR),
>>           "i"(VMX_TEST_RESUME_ERR), "i"(VMX_TEST_RESUME)
>>         : "rax", "rbx", "rdi", "rsi",
>>           "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
>>           "memory", "cc"
>>     );
>>     switch (ret) {
>>     case VMX_TEST_VMEXIT:
>>         return 0;
>>     case VMX_TEST_LAUNCH_ERR:
>>         printf("%s : vmlaunch failed.\n", __func__);
>>         break;
>>     case VMX_TEST_RESUME_ERR:
>>         printf("%s : vmresume failed.\n", __func__);
>>         break;
>>     default:
>>         printf("%s : unhandled ret from exit_handler, ret=%d.\n",
>> __func__, ret);
>>         break;
>>     }
>>     return 1;
>> }
>>
>
>



-- 
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-24 10:16                                   ` Arthur Chunqi Li
@ 2013-07-24 10:24                                     ` Jan Kiszka
  2013-07-24 11:20                                       ` Arthur Chunqi Li
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Kiszka @ 2013-07-24 10:24 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: Paolo Bonzini, Gleb Natapov, kvm

[-- Attachment #1: Type: text/plain, Size: 1549 bytes --]

On 2013-07-24 12:16, Arthur Chunqi Li wrote:
> On Wed, Jul 24, 2013 at 6:03 PM, Jan Kiszka <jan.kiszka@web.de> wrote:
>> On 2013-07-24 11:56, Arthur Chunqi Li wrote:
>>> So what about this one. I merged all the exit reason to "ret" and
>>> remove the flag detection after vmlaunch/vmresume (because I think
>>> this detection is useless). Currently we support only one guest, so
>>> variant "launched" is located in vmx_run(). If we want to support
>>> multiple guest, we could move it to some structures (e.g.
>>> environment_ctxt). Now I just put it here.
>>>
>>> static int vmx_run()
>>> {
>>>     u32 ret = 0;
>>>     bool launched = 0;
>>>
>>>     asm volatile(
>>>         "mov %%rsp, %%rsi\n\t"
>>>         "mov %2, %%edi\n\t"
>>>         "call vmcs_write\n\t"
>>>
>>>         "0: "
>>>         LOAD_GPR_C
>>>         "cmp $0, %1\n\t"
>>>         "jne 1f\n\t"
>>>         "vmlaunch\n\t"
>>>         SAVE_GPR_C
>>>         /* vmlaunch error, return VMX_TEST_LAUNCH_ERR */
>>>         "mov %3, %0\n\t"
>>>         "jmp 2f\n\t"
>>>         "1: "
>>>         "vmresume\n\t"
>>>         SAVE_GPR_C
>>>         /* vmresume error, return VMX_TEST_RESUME_ERR */
>>>         "mov %4, %0\n\t"
>>>         "jmp 2f\n\t"
>>
>> Where do you store the flags now? You may want to differentiate / test
>> if ZF of CF is set.
> I store the flags as a global variant. You mean I need to detect ZF/CF
> after vmlaunch/vmresume?

Yes - if you want to check correct emulation of those instructions
completely.

Jan



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 263 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-24 10:24                                     ` Jan Kiszka
@ 2013-07-24 11:20                                       ` Arthur Chunqi Li
  2013-07-24 11:25                                         ` Jan Kiszka
  0 siblings, 1 reply; 24+ messages in thread
From: Arthur Chunqi Li @ 2013-07-24 11:20 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Paolo Bonzini, Gleb Natapov, kvm

And what about this version:

static int vmx_run()
{
    u32 ret = 0;

    asm volatile(
        "mov %%rsp, %%rsi\n\t"
        "mov %2, %%edi\n\t"
        "call vmcs_write\n\t"

        "0: "
        LOAD_GPR_C
        "cmpl $0, %1\n\t"
        "jne 1f\n\t"
        "vmlaunch;seta %1\n\t"
        /* vmlaunch error, return VMX_TEST_LAUNCH_ERR */
        "movl %3, %0\n\t"
        SAVE_GPR_C
        "jmp 2f\n\t"
        "1: "
        "vmresume;seta %1\n\t"
        /* vmresume error, return VMX_TEST_RESUME_ERR */
        "movl %4, %0\n\t"
        SAVE_GPR_C
        "jmp 2f\n\t"

        "vmx_return:\n\t"
        SAVE_GPR_C
        "call exit_handler\n\t"
        /* set launched = 1 */
        "movl $0x1, %1\n\t"
        /* jump to resume when VMX_TEST_RESUME */
        "cmp %5, %%eax\n\t"
        "je 0b\n\t"
        "mov %%eax, %0\n\t"
        "2: "
        : "=m"(ret), "=m"(launched)
        : "i"(HOST_RSP), "i"(VMX_TEST_LAUNCH_ERR),
          "i"(VMX_TEST_RESUME_ERR), "i"(VMX_TEST_RESUME)
        : "rax", "rbx", "rdi", "rsi",
         "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
          "memory", "cc"
    );
    switch (ret) {
    case VMX_TEST_VMEXIT:
        launched = 0;
        return 0;
    case VMX_TEST_LAUNCH_ERR:
        printf("%s : vmlaunch failed.\n", __func__);
        if (launched != 0)
            printf("\tvmlaunch set flags error\n");
        report("test vmlaunch", 0);
        break;
    case VMX_TEST_RESUME_ERR:
        printf("%s : vmresume failed.\n", __func__);
        if (launched != 0)
            printf("\tvmlaunch set flags error\n");
        report("test vmresume", 0);
        break;
    default:
        launched = 0;
        printf("%s : unhandled ret from exit_handler, ret=%d.\n",
__func__, ret);
        break;
    }
    return 1;
}

On Wed, Jul 24, 2013 at 6:24 PM, Jan Kiszka <jan.kiszka@web.de> wrote:
> On 2013-07-24 12:16, Arthur Chunqi Li wrote:
>> On Wed, Jul 24, 2013 at 6:03 PM, Jan Kiszka <jan.kiszka@web.de> wrote:
>>> On 2013-07-24 11:56, Arthur Chunqi Li wrote:
>>>> So what about this one. I merged all the exit reason to "ret" and
>>>> remove the flag detection after vmlaunch/vmresume (because I think
>>>> this detection is useless). Currently we support only one guest, so
>>>> variant "launched" is located in vmx_run(). If we want to support
>>>> multiple guest, we could move it to some structures (e.g.
>>>> environment_ctxt). Now I just put it here.
>>>>
>>>> static int vmx_run()
>>>> {
>>>>     u32 ret = 0;
>>>>     bool launched = 0;
>>>>
>>>>     asm volatile(
>>>>         "mov %%rsp, %%rsi\n\t"
>>>>         "mov %2, %%edi\n\t"
>>>>         "call vmcs_write\n\t"
>>>>
>>>>         "0: "
>>>>         LOAD_GPR_C
>>>>         "cmp $0, %1\n\t"
>>>>         "jne 1f\n\t"
>>>>         "vmlaunch\n\t"
>>>>         SAVE_GPR_C
>>>>         /* vmlaunch error, return VMX_TEST_LAUNCH_ERR */
>>>>         "mov %3, %0\n\t"
>>>>         "jmp 2f\n\t"
>>>>         "1: "
>>>>         "vmresume\n\t"
>>>>         SAVE_GPR_C
>>>>         /* vmresume error, return VMX_TEST_RESUME_ERR */
>>>>         "mov %4, %0\n\t"
>>>>         "jmp 2f\n\t"
>>>
>>> Where do you store the flags now? You may want to differentiate / test
>>> if ZF of CF is set.
>> I store the flags as a global variant. You mean I need to detect ZF/CF
>> after vmlaunch/vmresume?
>
> Yes - if you want to check correct emulation of those instructions
> completely.
>
> Jan
>
>



-- 
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case
  2013-07-24 11:20                                       ` Arthur Chunqi Li
@ 2013-07-24 11:25                                         ` Jan Kiszka
  0 siblings, 0 replies; 24+ messages in thread
From: Jan Kiszka @ 2013-07-24 11:25 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: Paolo Bonzini, Gleb Natapov, kvm

[-- Attachment #1: Type: text/plain, Size: 548 bytes --]

On 2013-07-24 13:20, Arthur Chunqi Li wrote:
> And what about this version:
> 
> static int vmx_run()
> {
>     u32 ret = 0;
> 
>     asm volatile(
>         "mov %%rsp, %%rsi\n\t"
>         "mov %2, %%edi\n\t"
>         "call vmcs_write\n\t"
> 
>         "0: "
>         LOAD_GPR_C
>         "cmpl $0, %1\n\t"
>         "jne 1f\n\t"
>         "vmlaunch;seta %1\n\t"

That doesn't differentiate between CF and ZF (so that you can check if
vmlaunch set the right flag).

Also, only one instruction per line, please.

Jan



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 263 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2013-07-24 11:25 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-17 18:54 [RFC PATCH] kvm-unit-tests : Basic architecture of VMX nested test case Arthur Chunqi Li
2013-07-18  5:52 ` Paolo Bonzini
2013-07-18  7:26   ` Gleb Natapov
2013-07-18 10:47     ` Paolo Bonzini
2013-07-18 11:06       ` Gleb Natapov
2013-07-18 12:08         ` Paolo Bonzini
2013-07-18 14:11           ` Arthur Chunqi Li
2013-07-18 19:57           ` Gleb Natapov
2013-07-19  6:42             ` Paolo Bonzini
2013-07-19  9:40               ` Gleb Natapov
2013-07-19 12:06                 ` Paolo Bonzini
2013-07-24  6:11                   ` Arthur Chunqi Li
2013-07-24  6:40                     ` Paolo Bonzini
2013-07-24  6:46                       ` Arthur Chunqi Li
2013-07-24  6:48                         ` Paolo Bonzini
2013-07-24  8:48                           ` Arthur Chunqi Li
2013-07-24  8:53                             ` Jan Kiszka
2013-07-24  9:16                             ` Paolo Bonzini
2013-07-24  9:56                               ` Arthur Chunqi Li
2013-07-24 10:03                                 ` Jan Kiszka
2013-07-24 10:16                                   ` Arthur Chunqi Li
2013-07-24 10:24                                     ` Jan Kiszka
2013-07-24 11:20                                       ` Arthur Chunqi Li
2013-07-24 11:25                                         ` Jan Kiszka

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.