All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] HVM: produce better binary code
@ 2013-08-23 13:58 Jan Beulich
  2013-08-23 14:01 ` [PATCH 1/4] VMX: streamline entry.S code Jan Beulich
                   ` (4 more replies)
  0 siblings, 5 replies; 60+ messages in thread
From: Jan Beulich @ 2013-08-23 13:58 UTC (permalink / raw)
  To: xen-devel; +Cc: Keir Fraser

1: VMX: streamline entry.S code
2: VMX: move various uses of UD2 out of fast paths
3: VMX: use proper instruction mnemonics if assembler supports them
4: SVM: streamline entry.S code

Signed-off-by: Jan Beulich <jbeulich@suse.com>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 1/4] VMX: streamline entry.S code
  2013-08-23 13:58 [PATCH 0/4] HVM: produce better binary code Jan Beulich
@ 2013-08-23 14:01 ` Jan Beulich
  2013-08-26 10:44   ` Andrew Cooper
  2013-08-29 11:01   ` Tim Deegan
  2013-08-23 14:02 ` [PATCH 2/4] VMX: move various uses of UD2 out of fast paths Jan Beulich
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 60+ messages in thread
From: Jan Beulich @ 2013-08-23 14:01 UTC (permalink / raw)
  To: xen-devel; +Cc: Keir Fraser, Eddie Dong, Jun Nakajima

[-- Attachment #1: Type: text/plain, Size: 6121 bytes --]

- move stuff easily/better done in C into C code
- re-arrange code paths so that no redundant GET_CURRENT() would remain
  on the fast paths
- move long latency operations earlier
- slightly defer disabling interrupts on the VM entry path
- use ENTRY() instead of open coding it

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hvm/vmx/entry.S
+++ b/xen/arch/x86/hvm/vmx/entry.S
@@ -28,109 +28,66 @@
 
 #define VMRESUME     .byte 0x0f,0x01,0xc3
 #define VMLAUNCH     .byte 0x0f,0x01,0xc2
-#define VMREAD(off)  .byte 0x0f,0x78,0x47,((off)-UREGS_rip)
-#define VMWRITE(off) .byte 0x0f,0x79,0x47,((off)-UREGS_rip)
 
-/* VMCS field encodings */
-#define GUEST_RSP    0x681c
-#define GUEST_RIP    0x681e
-#define GUEST_RFLAGS 0x6820
-
-        ALIGN
-.globl vmx_asm_vmexit_handler
-vmx_asm_vmexit_handler:
+ENTRY(vmx_asm_vmexit_handler)
         push %rdi
         push %rsi
         push %rdx
         push %rcx
         push %rax
+        mov  %cr2,%rax
         push %r8
         push %r9
         push %r10
         push %r11
         push %rbx
+        GET_CURRENT(%rbx)
         push %rbp
         push %r12
         push %r13
         push %r14
         push %r15
 
-        GET_CURRENT(%rbx)
-
         movb $1,VCPU_vmx_launched(%rbx)
-
-        lea  UREGS_rip(%rsp),%rdi
-        mov  $GUEST_RIP,%eax
-        /*VMREAD(UREGS_rip)*/
-        .byte 0x0f,0x78,0x07  /* vmread %rax,(%rdi) */
-        mov  $GUEST_RSP,%eax
-        VMREAD(UREGS_rsp)
-        mov  $GUEST_RFLAGS,%eax
-        VMREAD(UREGS_eflags)
-
-        mov  %cr2,%rax
         mov  %rax,VCPU_hvm_guest_cr2(%rbx)
 
-#ifndef NDEBUG
-        mov  $0xbeef,%ax
-        mov  %ax,UREGS_error_code(%rsp)
-        mov  %ax,UREGS_entry_vector(%rsp)
-        mov  %ax,UREGS_saved_upcall_mask(%rsp)
-        mov  %ax,UREGS_cs(%rsp)
-        mov  %ax,UREGS_ds(%rsp)
-        mov  %ax,UREGS_es(%rsp)
-        mov  %ax,UREGS_fs(%rsp)
-        mov  %ax,UREGS_gs(%rsp)
-        mov  %ax,UREGS_ss(%rsp)
-#endif
-
         mov  %rsp,%rdi
         call vmx_vmexit_handler
 
-.globl vmx_asm_do_vmentry
-vmx_asm_do_vmentry:
+.Lvmx_do_vmentry:
         call vmx_intr_assist
         call nvmx_switch_guest
         ASSERT_NOT_IN_ATOMIC
 
-        GET_CURRENT(%rbx)
-        cli
-
         mov  VCPU_processor(%rbx),%eax
-        shl  $IRQSTAT_shift,%eax
         lea  irq_stat+IRQSTAT_softirq_pending(%rip),%rdx
-        cmpl $0,(%rdx,%rax,1)
+        xor  %ecx,%ecx
+        shl  $IRQSTAT_shift,%eax
+        cli
+        cmp  %ecx,(%rdx,%rax,1)
         jnz  .Lvmx_process_softirqs
 
-        testb $0xff,VCPU_vmx_emulate(%rbx)
-        jnz .Lvmx_goto_emulator
-        testb $0xff,VCPU_vmx_realmode(%rbx)
-UNLIKELY_START(nz, realmode)
-        cmpw $0,VCPU_vm86_seg_mask(%rbx)
+        cmp  %cl,VCPU_vmx_emulate(%rbx)
+        jne .Lvmx_goto_emulator
+        cmp  %cl,VCPU_vmx_realmode(%rbx)
+UNLIKELY_START(ne, realmode)
+        cmp  %cx,VCPU_vm86_seg_mask(%rbx)
         jnz .Lvmx_goto_emulator
         mov  %rsp,%rdi
         call vmx_enter_realmode
 UNLIKELY_END(realmode)
 
+        mov  %rsp,%rdi
         call vmx_vmenter_helper
         mov  VCPU_hvm_guest_cr2(%rbx),%rax
-        mov  %rax,%cr2
-
-        lea  UREGS_rip(%rsp),%rdi
-        mov  $GUEST_RIP,%eax
-        /*VMWRITE(UREGS_rip)*/
-        .byte 0x0f,0x79,0x07  /* vmwrite (%rdi),%rax */
-        mov  $GUEST_RSP,%eax
-        VMWRITE(UREGS_rsp)
-        mov  $GUEST_RFLAGS,%eax
-        VMWRITE(UREGS_eflags)
 
-        cmpb $0,VCPU_vmx_launched(%rbx)
         pop  %r15
         pop  %r14
         pop  %r13
         pop  %r12
         pop  %rbp
+        mov  %rax,%cr2
+        cmpb $0,VCPU_vmx_launched(%rbx)
         pop  %rbx
         pop  %r11
         pop  %r10
@@ -155,13 +112,17 @@ UNLIKELY_END(realmode)
         call vm_launch_fail
         ud2
 
+ENTRY(vmx_asm_do_vmentry)
+        GET_CURRENT(%rbx)
+        jmp  .Lvmx_do_vmentry
+
 .Lvmx_goto_emulator:
         sti
         mov  %rsp,%rdi
         call vmx_realmode
-        jmp  vmx_asm_do_vmentry
+        jmp  .Lvmx_do_vmentry
 
 .Lvmx_process_softirqs:
         sti
         call do_softirq
-        jmp  vmx_asm_do_vmentry
+        jmp  .Lvmx_do_vmentry
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2375,6 +2375,12 @@ void vmx_vmexit_handler(struct cpu_user_
     unsigned long exit_qualification, inst_len = 0;
     struct vcpu *v = current;
 
+    regs->rip    = __vmread(GUEST_RIP);
+    regs->rsp    = __vmread(GUEST_RSP);
+    regs->rflags = __vmread(GUEST_RFLAGS);
+
+    hvm_invalidate_regs_fields(regs);
+
     if ( paging_mode_hap(v->domain) && hvm_paging_enabled(v) )
         v->arch.hvm_vcpu.guest_cr[3] = v->arch.hvm_vcpu.hw_cr[3] =
             __vmread(GUEST_CR3);
@@ -2868,7 +2874,7 @@ out:
         nvmx_idtv_handling();
 }
 
-void vmx_vmenter_helper(void)
+void vmx_vmenter_helper(const struct cpu_user_regs *regs)
 {
     struct vcpu *curr = current;
     u32 new_asid, old_asid;
@@ -2910,6 +2916,10 @@ void vmx_vmenter_helper(void)
 
  out:
     HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
+
+    __vmwrite(GUEST_RIP,    regs->rip);
+    __vmwrite(GUEST_RSP,    regs->rsp);
+    __vmwrite(GUEST_RFLAGS, regs->rflags);
 }
 
 /*
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -439,6 +439,21 @@ static inline void hvm_set_info_guest(st
 
 int hvm_debug_op(struct vcpu *v, int32_t op);
 
+static inline void hvm_invalidate_regs_fields(struct cpu_user_regs *regs)
+{
+#ifndef NDEBUG
+    regs->error_code = 0xbeef;
+    regs->entry_vector = 0xbeef;
+    regs->saved_upcall_mask = 0xbf;
+    regs->cs = 0xbeef;
+    regs->ss = 0xbeef;
+    regs->ds = 0xbeef;
+    regs->es = 0xbeef;
+    regs->fs = 0xbeef;
+    regs->gs = 0xbeef;
+#endif
+}
+
 int hvm_hap_nested_page_fault(paddr_t gpa,
                               bool_t gla_valid, unsigned long gla,
                               bool_t access_r,



[-- Attachment #2: VMX-adjust-entry.patch --]
[-- Type: text/plain, Size: 6149 bytes --]

VMX: streamline entry.S code

- move stuff easily/better done in C into C code
- re-arrange code paths so that no redundant GET_CURRENT() would remain
  on the fast paths
- move long latency operations earlier
- slightly defer disabling interrupts on the VM entry path
- use ENTRY() instead of open coding it

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hvm/vmx/entry.S
+++ b/xen/arch/x86/hvm/vmx/entry.S
@@ -28,109 +28,66 @@
 
 #define VMRESUME     .byte 0x0f,0x01,0xc3
 #define VMLAUNCH     .byte 0x0f,0x01,0xc2
-#define VMREAD(off)  .byte 0x0f,0x78,0x47,((off)-UREGS_rip)
-#define VMWRITE(off) .byte 0x0f,0x79,0x47,((off)-UREGS_rip)
 
-/* VMCS field encodings */
-#define GUEST_RSP    0x681c
-#define GUEST_RIP    0x681e
-#define GUEST_RFLAGS 0x6820
-
-        ALIGN
-.globl vmx_asm_vmexit_handler
-vmx_asm_vmexit_handler:
+ENTRY(vmx_asm_vmexit_handler)
         push %rdi
         push %rsi
         push %rdx
         push %rcx
         push %rax
+        mov  %cr2,%rax
         push %r8
         push %r9
         push %r10
         push %r11
         push %rbx
+        GET_CURRENT(%rbx)
         push %rbp
         push %r12
         push %r13
         push %r14
         push %r15
 
-        GET_CURRENT(%rbx)
-
         movb $1,VCPU_vmx_launched(%rbx)
-
-        lea  UREGS_rip(%rsp),%rdi
-        mov  $GUEST_RIP,%eax
-        /*VMREAD(UREGS_rip)*/
-        .byte 0x0f,0x78,0x07  /* vmread %rax,(%rdi) */
-        mov  $GUEST_RSP,%eax
-        VMREAD(UREGS_rsp)
-        mov  $GUEST_RFLAGS,%eax
-        VMREAD(UREGS_eflags)
-
-        mov  %cr2,%rax
         mov  %rax,VCPU_hvm_guest_cr2(%rbx)
 
-#ifndef NDEBUG
-        mov  $0xbeef,%ax
-        mov  %ax,UREGS_error_code(%rsp)
-        mov  %ax,UREGS_entry_vector(%rsp)
-        mov  %ax,UREGS_saved_upcall_mask(%rsp)
-        mov  %ax,UREGS_cs(%rsp)
-        mov  %ax,UREGS_ds(%rsp)
-        mov  %ax,UREGS_es(%rsp)
-        mov  %ax,UREGS_fs(%rsp)
-        mov  %ax,UREGS_gs(%rsp)
-        mov  %ax,UREGS_ss(%rsp)
-#endif
-
         mov  %rsp,%rdi
         call vmx_vmexit_handler
 
-.globl vmx_asm_do_vmentry
-vmx_asm_do_vmentry:
+.Lvmx_do_vmentry:
         call vmx_intr_assist
         call nvmx_switch_guest
         ASSERT_NOT_IN_ATOMIC
 
-        GET_CURRENT(%rbx)
-        cli
-
         mov  VCPU_processor(%rbx),%eax
-        shl  $IRQSTAT_shift,%eax
         lea  irq_stat+IRQSTAT_softirq_pending(%rip),%rdx
-        cmpl $0,(%rdx,%rax,1)
+        xor  %ecx,%ecx
+        shl  $IRQSTAT_shift,%eax
+        cli
+        cmp  %ecx,(%rdx,%rax,1)
         jnz  .Lvmx_process_softirqs
 
-        testb $0xff,VCPU_vmx_emulate(%rbx)
-        jnz .Lvmx_goto_emulator
-        testb $0xff,VCPU_vmx_realmode(%rbx)
-UNLIKELY_START(nz, realmode)
-        cmpw $0,VCPU_vm86_seg_mask(%rbx)
+        cmp  %cl,VCPU_vmx_emulate(%rbx)
+        jne .Lvmx_goto_emulator
+        cmp  %cl,VCPU_vmx_realmode(%rbx)
+UNLIKELY_START(ne, realmode)
+        cmp  %cx,VCPU_vm86_seg_mask(%rbx)
         jnz .Lvmx_goto_emulator
         mov  %rsp,%rdi
         call vmx_enter_realmode
 UNLIKELY_END(realmode)
 
+        mov  %rsp,%rdi
         call vmx_vmenter_helper
         mov  VCPU_hvm_guest_cr2(%rbx),%rax
-        mov  %rax,%cr2
-
-        lea  UREGS_rip(%rsp),%rdi
-        mov  $GUEST_RIP,%eax
-        /*VMWRITE(UREGS_rip)*/
-        .byte 0x0f,0x79,0x07  /* vmwrite (%rdi),%rax */
-        mov  $GUEST_RSP,%eax
-        VMWRITE(UREGS_rsp)
-        mov  $GUEST_RFLAGS,%eax
-        VMWRITE(UREGS_eflags)
 
-        cmpb $0,VCPU_vmx_launched(%rbx)
         pop  %r15
         pop  %r14
         pop  %r13
         pop  %r12
         pop  %rbp
+        mov  %rax,%cr2
+        cmpb $0,VCPU_vmx_launched(%rbx)
         pop  %rbx
         pop  %r11
         pop  %r10
@@ -155,13 +112,17 @@ UNLIKELY_END(realmode)
         call vm_launch_fail
         ud2
 
+ENTRY(vmx_asm_do_vmentry)
+        GET_CURRENT(%rbx)
+        jmp  .Lvmx_do_vmentry
+
 .Lvmx_goto_emulator:
         sti
         mov  %rsp,%rdi
         call vmx_realmode
-        jmp  vmx_asm_do_vmentry
+        jmp  .Lvmx_do_vmentry
 
 .Lvmx_process_softirqs:
         sti
         call do_softirq
-        jmp  vmx_asm_do_vmentry
+        jmp  .Lvmx_do_vmentry
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2375,6 +2375,12 @@ void vmx_vmexit_handler(struct cpu_user_
     unsigned long exit_qualification, inst_len = 0;
     struct vcpu *v = current;
 
+    regs->rip    = __vmread(GUEST_RIP);
+    regs->rsp    = __vmread(GUEST_RSP);
+    regs->rflags = __vmread(GUEST_RFLAGS);
+
+    hvm_invalidate_regs_fields(regs);
+
     if ( paging_mode_hap(v->domain) && hvm_paging_enabled(v) )
         v->arch.hvm_vcpu.guest_cr[3] = v->arch.hvm_vcpu.hw_cr[3] =
             __vmread(GUEST_CR3);
@@ -2868,7 +2874,7 @@ out:
         nvmx_idtv_handling();
 }
 
-void vmx_vmenter_helper(void)
+void vmx_vmenter_helper(const struct cpu_user_regs *regs)
 {
     struct vcpu *curr = current;
     u32 new_asid, old_asid;
@@ -2910,6 +2916,10 @@ void vmx_vmenter_helper(void)
 
  out:
     HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
+
+    __vmwrite(GUEST_RIP,    regs->rip);
+    __vmwrite(GUEST_RSP,    regs->rsp);
+    __vmwrite(GUEST_RFLAGS, regs->rflags);
 }
 
 /*
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -439,6 +439,21 @@ static inline void hvm_set_info_guest(st
 
 int hvm_debug_op(struct vcpu *v, int32_t op);
 
+static inline void hvm_invalidate_regs_fields(struct cpu_user_regs *regs)
+{
+#ifndef NDEBUG
+    regs->error_code = 0xbeef;
+    regs->entry_vector = 0xbeef;
+    regs->saved_upcall_mask = 0xbf;
+    regs->cs = 0xbeef;
+    regs->ss = 0xbeef;
+    regs->ds = 0xbeef;
+    regs->es = 0xbeef;
+    regs->fs = 0xbeef;
+    regs->gs = 0xbeef;
+#endif
+}
+
 int hvm_hap_nested_page_fault(paddr_t gpa,
                               bool_t gla_valid, unsigned long gla,
                               bool_t access_r,

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 2/4] VMX: move various uses of UD2 out of fast paths
  2013-08-23 13:58 [PATCH 0/4] HVM: produce better binary code Jan Beulich
  2013-08-23 14:01 ` [PATCH 1/4] VMX: streamline entry.S code Jan Beulich
@ 2013-08-23 14:02 ` Jan Beulich
  2013-08-23 22:06   ` Andrew Cooper
  2013-08-23 14:03 ` [PATCH 3/4] VMX: use proper instruction mnemonics if assembler supports them Jan Beulich
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2013-08-23 14:02 UTC (permalink / raw)
  To: xen-devel; +Cc: Keir Fraser, Eddie Dong, Jun Nakajima

[-- Attachment #1: Type: text/plain, Size: 3973 bytes --]

... at once making conditional forward jumps, which are statically
predicted to be not taken, only used for the unlikely (error) cases.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -67,6 +67,30 @@ void ret_from_intr(void);
 #define ASSERT_NOT_IN_ATOMIC
 #endif
 
+#else
+
+#ifdef __clang__ /* clang's builtin assember can't do .subsection */
+
+#define UNLIKELY_START_SECTION ".pushsection .fixup,\"ax\""
+#define UNLIKELY_END_SECTION   ".popsection"
+
+#else
+
+#define UNLIKELY_START_SECTION ".subsection 1"
+#define UNLIKELY_END_SECTION   ".subsection 0"
+
+#endif
+
+#define UNLIKELY_START(cond, tag)          \
+        "j" #cond " .Lunlikely%=.tag;\n\t" \
+        UNLIKELY_START_SECTION "\n"        \
+        ".Lunlikely%=.tag:"
+
+#define UNLIKELY_END(tag)                  \
+        "jmp .Llikely%=.tag;\n\t"          \
+        UNLIKELY_END_SECTION "\n"          \
+        ".Llikely%=.tag:"
+
 #endif
 
 #endif /* __X86_ASM_DEFNS_H__ */
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -285,7 +285,9 @@ static inline void __vmptrld(u64 addr)
     asm volatile ( VMPTRLD_OPCODE
                    MODRM_EAX_06
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 1f ; ud2 ; 1:\n"
+                   UNLIKELY_START(be, vmptrld)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION
                    :
                    : "a" (&addr)
                    : "memory");
@@ -296,7 +298,9 @@ static inline void __vmpclear(u64 addr)
     asm volatile ( VMCLEAR_OPCODE
                    MODRM_EAX_06
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 1f ; ud2 ; 1:\n"
+                   UNLIKELY_START(be, vmclear)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION
                    :
                    : "a" (&addr)
                    : "memory");
@@ -309,7 +313,9 @@ static inline unsigned long __vmread(uns
     asm volatile ( VMREAD_OPCODE
                    MODRM_EAX_ECX
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 1f ; ud2 ; 1:\n"
+                   UNLIKELY_START(be, vmread)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION
                    : "=c" (ecx)
                    : "a" (field)
                    : "memory");
@@ -322,7 +328,9 @@ static inline void __vmwrite(unsigned lo
     asm volatile ( VMWRITE_OPCODE
                    MODRM_EAX_ECX
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 1f ; ud2 ; 1:\n"
+                   UNLIKELY_START(be, vmwrite)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION
                    : 
                    : "a" (field) , "c" (value)
                    : "memory");
@@ -360,7 +368,9 @@ static inline void __invept(int type, u6
     asm volatile ( INVEPT_OPCODE
                    MODRM_EAX_08
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 1f ; ud2 ; 1:\n"
+                   UNLIKELY_START(be, invept)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION
                    :
                    : "a" (&operand), "c" (type)
                    : "memory" );
@@ -377,7 +387,9 @@ static inline void __invvpid(int type, u
     /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
     asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 2f ; ud2 ; 2:\n"
+                   "2: " UNLIKELY_START(be, invvpid)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION "\n"
                    _ASM_EXTABLE(1b, 2b)
                    :
                    : "a" (&operand), "c" (type)




[-- Attachment #2: VMX-ud2-out-of-line.patch --]
[-- Type: text/plain, Size: 4018 bytes --]

VMX: move various uses of UD2 out of fast paths

... at once making conditional forward jumps, which are statically
predicted to be not taken, only used for the unlikely (error) cases.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -67,6 +67,30 @@ void ret_from_intr(void);
 #define ASSERT_NOT_IN_ATOMIC
 #endif
 
+#else
+
+#ifdef __clang__ /* clang's builtin assember can't do .subsection */
+
+#define UNLIKELY_START_SECTION ".pushsection .fixup,\"ax\""
+#define UNLIKELY_END_SECTION   ".popsection"
+
+#else
+
+#define UNLIKELY_START_SECTION ".subsection 1"
+#define UNLIKELY_END_SECTION   ".subsection 0"
+
+#endif
+
+#define UNLIKELY_START(cond, tag)          \
+        "j" #cond " .Lunlikely%=.tag;\n\t" \
+        UNLIKELY_START_SECTION "\n"        \
+        ".Lunlikely%=.tag:"
+
+#define UNLIKELY_END(tag)                  \
+        "jmp .Llikely%=.tag;\n\t"          \
+        UNLIKELY_END_SECTION "\n"          \
+        ".Llikely%=.tag:"
+
 #endif
 
 #endif /* __X86_ASM_DEFNS_H__ */
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -285,7 +285,9 @@ static inline void __vmptrld(u64 addr)
     asm volatile ( VMPTRLD_OPCODE
                    MODRM_EAX_06
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 1f ; ud2 ; 1:\n"
+                   UNLIKELY_START(be, vmptrld)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION
                    :
                    : "a" (&addr)
                    : "memory");
@@ -296,7 +298,9 @@ static inline void __vmpclear(u64 addr)
     asm volatile ( VMCLEAR_OPCODE
                    MODRM_EAX_06
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 1f ; ud2 ; 1:\n"
+                   UNLIKELY_START(be, vmclear)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION
                    :
                    : "a" (&addr)
                    : "memory");
@@ -309,7 +313,9 @@ static inline unsigned long __vmread(uns
     asm volatile ( VMREAD_OPCODE
                    MODRM_EAX_ECX
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 1f ; ud2 ; 1:\n"
+                   UNLIKELY_START(be, vmread)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION
                    : "=c" (ecx)
                    : "a" (field)
                    : "memory");
@@ -322,7 +328,9 @@ static inline void __vmwrite(unsigned lo
     asm volatile ( VMWRITE_OPCODE
                    MODRM_EAX_ECX
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 1f ; ud2 ; 1:\n"
+                   UNLIKELY_START(be, vmwrite)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION
                    : 
                    : "a" (field) , "c" (value)
                    : "memory");
@@ -360,7 +368,9 @@ static inline void __invept(int type, u6
     asm volatile ( INVEPT_OPCODE
                    MODRM_EAX_08
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 1f ; ud2 ; 1:\n"
+                   UNLIKELY_START(be, invept)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION
                    :
                    : "a" (&operand), "c" (type)
                    : "memory" );
@@ -377,7 +387,9 @@ static inline void __invvpid(int type, u
     /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
     asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 2f ; ud2 ; 2:\n"
+                   "2: " UNLIKELY_START(be, invvpid)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION "\n"
                    _ASM_EXTABLE(1b, 2b)
                    :
                    : "a" (&operand), "c" (type)

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-23 13:58 [PATCH 0/4] HVM: produce better binary code Jan Beulich
  2013-08-23 14:01 ` [PATCH 1/4] VMX: streamline entry.S code Jan Beulich
  2013-08-23 14:02 ` [PATCH 2/4] VMX: move various uses of UD2 out of fast paths Jan Beulich
@ 2013-08-23 14:03 ` Jan Beulich
  2013-08-24 22:18   ` Andrew Cooper
  2013-08-26  9:03   ` [PATCH v2 " Jan Beulich
  2013-08-23 14:04 ` [PATCH 4/4] SVM: streamline entry.S code Jan Beulich
  2013-09-04 10:06 ` Ping: [PATCH 0/4] HVM: produce better binary code Jan Beulich
  4 siblings, 2 replies; 60+ messages in thread
From: Jan Beulich @ 2013-08-23 14:03 UTC (permalink / raw)
  To: xen-devel; +Cc: Keir Fraser, Eddie Dong, Jun Nakajima

[-- Attachment #1: Type: text/plain, Size: 9502 bytes --]

With the hex byte emission we were taking away a good part of
flexibility from the compiler, as for simplicity reasons these were
built using fixed operands. All half way modern build environments
would allow using the mnemonics (but we can't disable the hex variants
yet, since the binutils around at the time gcc 4.1 got released didn't
support these yet).

I didn't convert __vmread() yet because that would, just like for
__vmread_safe(), imply converting to a macro so that the output operand
can be the caller supplied variable rather than an intermediate one. As
that would require touching all invocation points of __vmread() (of
which there are quite a few), I'd first like to be certain the approach
is acceptable; the main question being whether the now conditional code
might be considered to cause future maintenance issues, and the second
being that of parameter/argument ordering (here I made __vmread_safe()
match __vmwrite(), but one could also take the position that read and
write should use the inverse order of one another, in line with the
actual instruction operands).

Additionally I was quite puzzled to find that all the asm()-s involved
here have memory clobbers - what are they needed for? Or can they be
dropped at least in some cases?

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/Config.mk
+++ b/Config.mk
@@ -4,6 +4,12 @@ ifeq ($(filter /%,$(XEN_ROOT)),)
 $(error XEN_ROOT must be absolute)
 endif
 
+# Convenient variables
+comma   := ,
+squote  := '
+empty   :=
+space   := $(empty) $(empty)
+
 # fallback for older make
 realpath = $(wildcard $(foreach file,$(1),$(shell cd -P $(dir $(file)) && echo "$$PWD/$(notdir $(file))")))
 
@@ -128,6 +134,21 @@ endef
 check-$(gcc) = $(call cc-ver-check,CC,0x040100,"Xen requires at least gcc-4.1")
 $(eval $(check-y))
 
+# as-insn: Check whether assembler supports an instruction.
+# Usage: cflags-y += $(call as-insn "insn",option-yes,option-no)
+as-insn = $(if $(shell echo 'void _(void) { asm volatile ( $(2) ); }' \
+                       | $(1) -c -x c -o /dev/null - 2>&1),$(4),$(3))
+
+# as-insn-check: Add an option to compilation flags, but only if insn is
+#                supported by assembler.
+# Usage: $(call as-insn-check CFLAGS,CC,"nop",-DHAVE_GAS_NOP)
+as-insn-check = $(eval $(call as-insn-check-closure,$(1),$(2),$(3),$(4)))
+define as-insn-check-closure
+    ifeq ($$(call as-insn,$$($(2)),$(3),y,n),y)
+        $(1) += $(4)
+    endif
+endef
+
 define buildmakevars2shellvars
     export PREFIX="$(PREFIX)";                                            \
     export XEN_SCRIPT_DIR="$(XEN_SCRIPT_DIR)";                            \
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -28,6 +28,8 @@ CFLAGS += -msoft-float
 
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
+$(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
+$(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
 
 ifeq ($(supervisor_mode_kernel),y)
 CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1292,12 +1292,11 @@ void vmx_do_resume(struct vcpu *v)
     reset_stack_and_jump(vmx_asm_do_vmentry);
 }
 
-static unsigned long vmr(unsigned long field)
+static inline unsigned long vmr(unsigned long field)
 {
-    int rc;
     unsigned long val;
-    val = __vmread_safe(field, &rc);
-    return rc ? 0 : val;
+
+    return __vmread_safe(field, val) ? val : 0;
 }
 
 static void vmx_dump_sel(char *name, uint32_t selector)
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -951,13 +951,11 @@ fallback:
         vvmcs_to_shadow(vvmcs, field[i]);
 }
 
-static void shadow_to_vvmcs(void *vvmcs, unsigned int field)
+static inline void shadow_to_vvmcs(void *vvmcs, unsigned int field)
 {
     u64 value;
-    int rc;
 
-    value = __vmread_safe(field, &rc);
-    if ( !rc )
+    if ( __vmread_safe(field, value) )
         __set_vvmcs(vvmcs, field, value);
 }
 
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -282,27 +282,44 @@ extern uint8_t posted_intr_vector;
 
 static inline void __vmptrld(u64 addr)
 {
-    asm volatile ( VMPTRLD_OPCODE
-                   MODRM_EAX_06
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmptrld %0\n"
+#else
+                   VMPTRLD_OPCODE MODRM_EAX_06
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmptrld)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_VMX
+                   : "m" (addr)
+#else
                    : "a" (&addr)
+#endif
                    : "memory");
 }
 
 static inline void __vmpclear(u64 addr)
 {
-    asm volatile ( VMCLEAR_OPCODE
-                   MODRM_EAX_06
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmclear %0\n"
+#else
+                   VMCLEAR_OPCODE MODRM_EAX_06
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmclear)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
+#ifdef HAVE_GAS_VMX
+                   : "+m" (addr)
+                   :
+#else
                    :
                    : "a" (&addr)
+#endif
                    : "memory");
 }
 
@@ -325,31 +342,58 @@ static inline unsigned long __vmread(uns
 
 static inline void __vmwrite(unsigned long field, unsigned long value)
 {
-    asm volatile ( VMWRITE_OPCODE
-                   MODRM_EAX_ECX
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmwrite %1, %0\n"
+#else
+                   VMWRITE_OPCODE MODRM_EAX_ECX
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmwrite)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    : 
+#ifdef HAVE_GAS_VMX
+                   : "r" (field) , "rm" (value)
+#else
                    : "a" (field) , "c" (value)
+#endif
                    : "memory");
 }
 
-static inline unsigned long __vmread_safe(unsigned long field, int *error)
-{
-    unsigned long ecx;
-
-    asm volatile ( VMREAD_OPCODE
-                   MODRM_EAX_ECX
-                   /* CF==1 or ZF==1 --> rc = -1 */
-                   "setna %b0 ; neg %0"
-                   : "=q" (*error), "=c" (ecx)
-                   : "0" (0), "a" (field)
-                   : "memory");
-
-    return ecx;
-}
+#ifdef HAVE_GAS_VMX
+#define __vmread_safe(field, value) ({ \
+    bool_t okay_; \
+    if ( sizeof(value) == sizeof(long) ) \
+        asm volatile ( "vmread %q2, %1\n\t" \
+                       /* CF==1 or ZF==1 --> rc = 0 */ \
+                       "setnbe %0" \
+                       : "=qm" (okay_), "=rm" (value) \
+                       : "r" (field) \
+                       : "memory"); \
+    else \
+    { \
+        asm volatile ( "vmread %q2, %q1\n\t" \
+                       /* CF==1 or ZF==1 --> rc = 0 */ \
+                       "setnbe %0" \
+                       : "=qm" (okay_), "=r" (value) \
+                       : "r" (field) \
+                       : "memory"); \
+    } \
+    okay_; \
+})
+#else
+#define __vmread_safe(field, value) ({ \
+    bool_t okay_; \
+    asm volatile ( VMREAD_OPCODE MODRM_EAX_ECX \
+                   /* CF==1 or ZF==1 --> rc = 0 */ \
+                   "setnbe %0" \
+                   : "=qm" (okay_), "=c" (value) \
+                   : "a" (field) \
+                   : "memory"); \
+    okay_; \
+})
+#endif
 
 static inline void __invept(int type, u64 eptp, u64 gpa)
 {
@@ -365,14 +409,22 @@ static inline void __invept(int type, u6
          !cpu_has_vmx_ept_invept_single_context )
         type = INVEPT_ALL_CONTEXT;
 
-    asm volatile ( INVEPT_OPCODE
-                   MODRM_EAX_08
+    asm volatile (
+#ifdef HAVE_GAS_EPT
+                   "invept %0, %q1\n"
+#else
+                   INVEPT_OPCODE MODRM_EAX_08
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, invept)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_EPT
+                   : "m" (operand), "r" (type)
+#else
                    : "a" (&operand), "c" (type)
+#endif
                    : "memory" );
 }
 
@@ -385,14 +437,23 @@ static inline void __invvpid(int type, u
     } __attribute__ ((packed)) operand = {vpid, 0, gva};
 
     /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
-    asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
+    asm volatile ( "1: "
+#ifdef HAVE_GAS_EPT
+                   "invvpid %0, %q1\n"
+#else
+                   INVVPID_OPCODE MODRM_EAX_08
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    "2: " UNLIKELY_START(be, invvpid)
                    "\tud2\n"
                    UNLIKELY_END_SECTION "\n"
                    _ASM_EXTABLE(1b, 2b)
                    :
+#ifdef HAVE_GAS_EPT
+                   : "m" (operand), "r" (type)
+#else
                    : "a" (&operand), "c" (type)
+#endif
                    : "memory" );
 }
 



[-- Attachment #2: VMX-use-mnemonics.patch --]
[-- Type: text/plain, Size: 9566 bytes --]

VMX: use proper instruction mnemonics if assembler supports them

With the hex byte emission we were taking away a good part of
flexibility from the compiler, as for simplicity reasons these were
built using fixed operands. All half way modern build environments
would allow using the mnemonics (but we can't disable the hex variants
yet, since the binutils around at the time gcc 4.1 got released didn't
support these yet).

I didn't convert __vmread() yet because that would, just like for
__vmread_safe(), imply converting to a macro so that the output operand
can be the caller supplied variable rather than an intermediate one. As
that would require touching all invocation points of __vmread() (of
which there are quite a few), I'd first like to be certain the approach
is acceptable; the main question being whether the now conditional code
might be considered to cause future maintenance issues, and the second
being that of parameter/argument ordering (here I made __vmread_safe()
match __vmwrite(), but one could also take the position that read and
write should use the inverse order of one another, in line with the
actual instruction operands).

Additionally I was quite puzzled to find that all the asm()-s involved
here have memory clobbers - what are they needed for? Or can they be
dropped at least in some cases?

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/Config.mk
+++ b/Config.mk
@@ -4,6 +4,12 @@ ifeq ($(filter /%,$(XEN_ROOT)),)
 $(error XEN_ROOT must be absolute)
 endif
 
+# Convenient variables
+comma   := ,
+squote  := '
+empty   :=
+space   := $(empty) $(empty)
+
 # fallback for older make
 realpath = $(wildcard $(foreach file,$(1),$(shell cd -P $(dir $(file)) && echo "$$PWD/$(notdir $(file))")))
 
@@ -128,6 +134,21 @@ endef
 check-$(gcc) = $(call cc-ver-check,CC,0x040100,"Xen requires at least gcc-4.1")
 $(eval $(check-y))
 
+# as-insn: Check whether assembler supports an instruction.
+# Usage: cflags-y += $(call as-insn "insn",option-yes,option-no)
+as-insn = $(if $(shell echo 'void _(void) { asm volatile ( $(2) ); }' \
+                       | $(1) -c -x c -o /dev/null - 2>&1),$(4),$(3))
+
+# as-insn-check: Add an option to compilation flags, but only if insn is
+#                supported by assembler.
+# Usage: $(call as-insn-check CFLAGS,CC,"nop",-DHAVE_GAS_NOP)
+as-insn-check = $(eval $(call as-insn-check-closure,$(1),$(2),$(3),$(4)))
+define as-insn-check-closure
+    ifeq ($$(call as-insn,$$($(2)),$(3),y,n),y)
+        $(1) += $(4)
+    endif
+endef
+
 define buildmakevars2shellvars
     export PREFIX="$(PREFIX)";                                            \
     export XEN_SCRIPT_DIR="$(XEN_SCRIPT_DIR)";                            \
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -28,6 +28,8 @@ CFLAGS += -msoft-float
 
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
+$(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
+$(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
 
 ifeq ($(supervisor_mode_kernel),y)
 CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1292,12 +1292,11 @@ void vmx_do_resume(struct vcpu *v)
     reset_stack_and_jump(vmx_asm_do_vmentry);
 }
 
-static unsigned long vmr(unsigned long field)
+static inline unsigned long vmr(unsigned long field)
 {
-    int rc;
     unsigned long val;
-    val = __vmread_safe(field, &rc);
-    return rc ? 0 : val;
+
+    return __vmread_safe(field, val) ? val : 0;
 }
 
 static void vmx_dump_sel(char *name, uint32_t selector)
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -951,13 +951,11 @@ fallback:
         vvmcs_to_shadow(vvmcs, field[i]);
 }
 
-static void shadow_to_vvmcs(void *vvmcs, unsigned int field)
+static inline void shadow_to_vvmcs(void *vvmcs, unsigned int field)
 {
     u64 value;
-    int rc;
 
-    value = __vmread_safe(field, &rc);
-    if ( !rc )
+    if ( __vmread_safe(field, value) )
         __set_vvmcs(vvmcs, field, value);
 }
 
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -282,27 +282,44 @@ extern uint8_t posted_intr_vector;
 
 static inline void __vmptrld(u64 addr)
 {
-    asm volatile ( VMPTRLD_OPCODE
-                   MODRM_EAX_06
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmptrld %0\n"
+#else
+                   VMPTRLD_OPCODE MODRM_EAX_06
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmptrld)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_VMX
+                   : "m" (addr)
+#else
                    : "a" (&addr)
+#endif
                    : "memory");
 }
 
 static inline void __vmpclear(u64 addr)
 {
-    asm volatile ( VMCLEAR_OPCODE
-                   MODRM_EAX_06
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmclear %0\n"
+#else
+                   VMCLEAR_OPCODE MODRM_EAX_06
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmclear)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
+#ifdef HAVE_GAS_VMX
+                   : "+m" (addr)
+                   :
+#else
                    :
                    : "a" (&addr)
+#endif
                    : "memory");
 }
 
@@ -325,31 +342,58 @@ static inline unsigned long __vmread(uns
 
 static inline void __vmwrite(unsigned long field, unsigned long value)
 {
-    asm volatile ( VMWRITE_OPCODE
-                   MODRM_EAX_ECX
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmwrite %1, %0\n"
+#else
+                   VMWRITE_OPCODE MODRM_EAX_ECX
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmwrite)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    : 
+#ifdef HAVE_GAS_VMX
+                   : "r" (field) , "rm" (value)
+#else
                    : "a" (field) , "c" (value)
+#endif
                    : "memory");
 }
 
-static inline unsigned long __vmread_safe(unsigned long field, int *error)
-{
-    unsigned long ecx;
-
-    asm volatile ( VMREAD_OPCODE
-                   MODRM_EAX_ECX
-                   /* CF==1 or ZF==1 --> rc = -1 */
-                   "setna %b0 ; neg %0"
-                   : "=q" (*error), "=c" (ecx)
-                   : "0" (0), "a" (field)
-                   : "memory");
-
-    return ecx;
-}
+#ifdef HAVE_GAS_VMX
+#define __vmread_safe(field, value) ({ \
+    bool_t okay_; \
+    if ( sizeof(value) == sizeof(long) ) \
+        asm volatile ( "vmread %q2, %1\n\t" \
+                       /* CF==1 or ZF==1 --> rc = 0 */ \
+                       "setnbe %0" \
+                       : "=qm" (okay_), "=rm" (value) \
+                       : "r" (field) \
+                       : "memory"); \
+    else \
+    { \
+        asm volatile ( "vmread %q2, %q1\n\t" \
+                       /* CF==1 or ZF==1 --> rc = 0 */ \
+                       "setnbe %0" \
+                       : "=qm" (okay_), "=r" (value) \
+                       : "r" (field) \
+                       : "memory"); \
+    } \
+    okay_; \
+})
+#else
+#define __vmread_safe(field, value) ({ \
+    bool_t okay_; \
+    asm volatile ( VMREAD_OPCODE MODRM_EAX_ECX \
+                   /* CF==1 or ZF==1 --> rc = 0 */ \
+                   "setnbe %0" \
+                   : "=qm" (okay_), "=c" (value) \
+                   : "a" (field) \
+                   : "memory"); \
+    okay_; \
+})
+#endif
 
 static inline void __invept(int type, u64 eptp, u64 gpa)
 {
@@ -365,14 +409,22 @@ static inline void __invept(int type, u6
          !cpu_has_vmx_ept_invept_single_context )
         type = INVEPT_ALL_CONTEXT;
 
-    asm volatile ( INVEPT_OPCODE
-                   MODRM_EAX_08
+    asm volatile (
+#ifdef HAVE_GAS_EPT
+                   "invept %0, %q1\n"
+#else
+                   INVEPT_OPCODE MODRM_EAX_08
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, invept)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_EPT
+                   : "m" (operand), "r" (type)
+#else
                    : "a" (&operand), "c" (type)
+#endif
                    : "memory" );
 }
 
@@ -385,14 +437,23 @@ static inline void __invvpid(int type, u
     } __attribute__ ((packed)) operand = {vpid, 0, gva};
 
     /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
-    asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
+    asm volatile ( "1: "
+#ifdef HAVE_GAS_EPT
+                   "invvpid %0, %q1\n"
+#else
+                   INVVPID_OPCODE MODRM_EAX_08
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    "2: " UNLIKELY_START(be, invvpid)
                    "\tud2\n"
                    UNLIKELY_END_SECTION "\n"
                    _ASM_EXTABLE(1b, 2b)
                    :
+#ifdef HAVE_GAS_EPT
+                   : "m" (operand), "r" (type)
+#else
                    : "a" (&operand), "c" (type)
+#endif
                    : "memory" );
 }
 

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH 4/4] SVM: streamline entry.S code
  2013-08-23 13:58 [PATCH 0/4] HVM: produce better binary code Jan Beulich
                   ` (2 preceding siblings ...)
  2013-08-23 14:03 ` [PATCH 3/4] VMX: use proper instruction mnemonics if assembler supports them Jan Beulich
@ 2013-08-23 14:04 ` Jan Beulich
  2013-08-26 16:20   ` Andrew Cooper
                     ` (3 more replies)
  2013-09-04 10:06 ` Ping: [PATCH 0/4] HVM: produce better binary code Jan Beulich
  4 siblings, 4 replies; 60+ messages in thread
From: Jan Beulich @ 2013-08-23 14:04 UTC (permalink / raw)
  To: xen-devel; +Cc: Keir Fraser, Boris Ostrovsky, Jacob Shin, suravee.suthikulpanit

[-- Attachment #1: Type: text/plain, Size: 4938 bytes --]

- move stuff easily/better done in C into C code
- re-arrange code paths so that no redundant GET_CURRENT() would remain
  on the fast paths
- move long latency operations earlier
- slightly defer disabling global interrupts on the VM entry path

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hvm/svm/entry.S
+++ b/xen/arch/x86/hvm/svm/entry.S
@@ -32,28 +32,34 @@
 #define CLGI   .byte 0x0F,0x01,0xDD
 
 ENTRY(svm_asm_do_resume)
+        GET_CURRENT(%rbx)
+.Lsvm_do_resume:
         call svm_intr_assist
         mov  %rsp,%rdi
         call nsvm_vcpu_switch
         ASSERT_NOT_IN_ATOMIC
 
-        GET_CURRENT(%rbx)
-        CLGI
-
         mov  VCPU_processor(%rbx),%eax
-        shl  $IRQSTAT_shift,%eax
         lea  irq_stat+IRQSTAT_softirq_pending(%rip),%rdx
-        cmpl $0,(%rdx,%rax,1)
+        xor  %ecx,%ecx
+        shl  $IRQSTAT_shift,%eax
+        CLGI
+        cmp  %ecx,(%rdx,%rax,1)
         jne  .Lsvm_process_softirqs
 
-        testb $0, VCPU_nsvm_hap_enabled(%rbx)
-UNLIKELY_START(nz, nsvm_hap)
-        mov  VCPU_nhvm_p2m(%rbx),%rax
-        test %rax,%rax
+        cmp  %cl,VCPU_nsvm_hap_enabled(%rbx)
+UNLIKELY_START(ne, nsvm_hap)
+        cmp  %rcx,VCPU_nhvm_p2m(%rbx)
         sete %al
-        andb VCPU_nhvm_guestmode(%rbx),%al
-        jnz  .Lsvm_nsvm_no_p2m
-UNLIKELY_END(nsvm_hap)
+        test VCPU_nhvm_guestmode(%rbx),%al
+        UNLIKELY_DONE(z, nsvm_hap)
+        /*
+         * Someone shot down our nested p2m table; go round again
+         * and nsvm_vcpu_switch() will fix it for us.
+         */
+        STGI
+        jmp  .Lsvm_do_resume
+__UNLIKELY_END(nsvm_hap)
 
         call svm_asid_handle_vmrun
 
@@ -72,13 +78,12 @@ UNLIKELY_END(svm_trace)
         mov  UREGS_eflags(%rsp),%rax
         mov  %rax,VMCB_rflags(%rcx)
 
-        mov  VCPU_svm_vmcb_pa(%rbx),%rax
-
         pop  %r15
         pop  %r14
         pop  %r13
         pop  %r12
         pop  %rbp
+        mov  VCPU_svm_vmcb_pa(%rbx),%rax
         pop  %rbx
         pop  %r11
         pop  %r10
@@ -92,25 +97,26 @@ UNLIKELY_END(svm_trace)
 
         VMRUN
 
+        GET_CURRENT(%rax)
         push %rdi
         push %rsi
         push %rdx
         push %rcx
+        mov  VCPU_svm_vmcb(%rax),%rcx
         push %rax
         push %r8
         push %r9
         push %r10
         push %r11
         push %rbx
+        mov  %rax,%rbx
         push %rbp
         push %r12
         push %r13
         push %r14
         push %r15
 
-        GET_CURRENT(%rbx)
         movb $0,VCPU_svm_vmcb_in_sync(%rbx)
-        mov  VCPU_svm_vmcb(%rbx),%rcx
         mov  VMCB_rax(%rcx),%rax
         mov  %rax,UREGS_rax(%rsp)
         mov  VMCB_rip(%rcx),%rax
@@ -120,33 +126,14 @@ UNLIKELY_END(svm_trace)
         mov  VMCB_rflags(%rcx),%rax
         mov  %rax,UREGS_eflags(%rsp)
 
-#ifndef NDEBUG
-        mov  $0xbeef,%ax
-        mov  %ax,UREGS_error_code(%rsp)
-        mov  %ax,UREGS_entry_vector(%rsp)
-        mov  %ax,UREGS_saved_upcall_mask(%rsp)
-        mov  %ax,UREGS_cs(%rsp)
-        mov  %ax,UREGS_ds(%rsp)
-        mov  %ax,UREGS_es(%rsp)
-        mov  %ax,UREGS_fs(%rsp)
-        mov  %ax,UREGS_gs(%rsp)
-        mov  %ax,UREGS_ss(%rsp)
-#endif
-
         STGI
 .globl svm_stgi_label
 svm_stgi_label:
         mov  %rsp,%rdi
         call svm_vmexit_handler
-        jmp  svm_asm_do_resume
+        jmp  .Lsvm_do_resume
 
 .Lsvm_process_softirqs:
         STGI
         call do_softirq
-        jmp  svm_asm_do_resume
-
-.Lsvm_nsvm_no_p2m:
-        /* Someone shot down our nested p2m table; go round again
-         * and nsvm_vcpu_switch() will fix it for us. */
-        STGI
-        jmp  svm_asm_do_resume
+        jmp  .Lsvm_do_resume
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -2069,6 +2069,8 @@ void svm_vmexit_handler(struct cpu_user_
     vintr_t intr;
     bool_t vcpu_guestmode = 0;
 
+    hvm_invalidate_regs_fields(regs);
+
     if ( paging_mode_hap(v->domain) )
         v->arch.hvm_vcpu.guest_cr[3] = v->arch.hvm_vcpu.hw_cr[3] =
             vmcb_get_cr3(vmcb);
--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -39,11 +39,17 @@ void ret_from_intr(void);
         .subsection 1;            \
         .Lunlikely.tag:
 
-#define UNLIKELY_END(tag)         \
-        jmp .Llikely.tag;         \
+#define UNLIKELY_DONE(cond, tag)  \
+        j##cond .Llikely.tag
+
+#define __UNLIKELY_END(tag)       \
         .subsection 0;            \
         .Llikely.tag:
 
+#define UNLIKELY_END(tag)         \
+        UNLIKELY_DONE(mp, tag);   \
+        __UNLIKELY_END(tag)
+
 #define STACK_CPUINFO_FIELD(field) (STACK_SIZE-CPUINFO_sizeof+CPUINFO_##field)
 #define GET_STACK_BASE(reg)                       \
         movq $~(STACK_SIZE-1),reg;                \



[-- Attachment #2: SVM-adjust-entry.patch --]
[-- Type: text/plain, Size: 4966 bytes --]

SVM: streamline entry.S code

- move stuff easily/better done in C into C code
- re-arrange code paths so that no redundant GET_CURRENT() would remain
  on the fast paths
- move long latency operations earlier
- slightly defer disabling global interrupts on the VM entry path

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hvm/svm/entry.S
+++ b/xen/arch/x86/hvm/svm/entry.S
@@ -32,28 +32,34 @@
 #define CLGI   .byte 0x0F,0x01,0xDD
 
 ENTRY(svm_asm_do_resume)
+        GET_CURRENT(%rbx)
+.Lsvm_do_resume:
         call svm_intr_assist
         mov  %rsp,%rdi
         call nsvm_vcpu_switch
         ASSERT_NOT_IN_ATOMIC
 
-        GET_CURRENT(%rbx)
-        CLGI
-
         mov  VCPU_processor(%rbx),%eax
-        shl  $IRQSTAT_shift,%eax
         lea  irq_stat+IRQSTAT_softirq_pending(%rip),%rdx
-        cmpl $0,(%rdx,%rax,1)
+        xor  %ecx,%ecx
+        shl  $IRQSTAT_shift,%eax
+        CLGI
+        cmp  %ecx,(%rdx,%rax,1)
         jne  .Lsvm_process_softirqs
 
-        testb $0, VCPU_nsvm_hap_enabled(%rbx)
-UNLIKELY_START(nz, nsvm_hap)
-        mov  VCPU_nhvm_p2m(%rbx),%rax
-        test %rax,%rax
+        cmp  %cl,VCPU_nsvm_hap_enabled(%rbx)
+UNLIKELY_START(ne, nsvm_hap)
+        cmp  %rcx,VCPU_nhvm_p2m(%rbx)
         sete %al
-        andb VCPU_nhvm_guestmode(%rbx),%al
-        jnz  .Lsvm_nsvm_no_p2m
-UNLIKELY_END(nsvm_hap)
+        test VCPU_nhvm_guestmode(%rbx),%al
+        UNLIKELY_DONE(z, nsvm_hap)
+        /*
+         * Someone shot down our nested p2m table; go round again
+         * and nsvm_vcpu_switch() will fix it for us.
+         */
+        STGI
+        jmp  .Lsvm_do_resume
+__UNLIKELY_END(nsvm_hap)
 
         call svm_asid_handle_vmrun
 
@@ -72,13 +78,12 @@ UNLIKELY_END(svm_trace)
         mov  UREGS_eflags(%rsp),%rax
         mov  %rax,VMCB_rflags(%rcx)
 
-        mov  VCPU_svm_vmcb_pa(%rbx),%rax
-
         pop  %r15
         pop  %r14
         pop  %r13
         pop  %r12
         pop  %rbp
+        mov  VCPU_svm_vmcb_pa(%rbx),%rax
         pop  %rbx
         pop  %r11
         pop  %r10
@@ -92,25 +97,26 @@ UNLIKELY_END(svm_trace)
 
         VMRUN
 
+        GET_CURRENT(%rax)
         push %rdi
         push %rsi
         push %rdx
         push %rcx
+        mov  VCPU_svm_vmcb(%rax),%rcx
         push %rax
         push %r8
         push %r9
         push %r10
         push %r11
         push %rbx
+        mov  %rax,%rbx
         push %rbp
         push %r12
         push %r13
         push %r14
         push %r15
 
-        GET_CURRENT(%rbx)
         movb $0,VCPU_svm_vmcb_in_sync(%rbx)
-        mov  VCPU_svm_vmcb(%rbx),%rcx
         mov  VMCB_rax(%rcx),%rax
         mov  %rax,UREGS_rax(%rsp)
         mov  VMCB_rip(%rcx),%rax
@@ -120,33 +126,14 @@ UNLIKELY_END(svm_trace)
         mov  VMCB_rflags(%rcx),%rax
         mov  %rax,UREGS_eflags(%rsp)
 
-#ifndef NDEBUG
-        mov  $0xbeef,%ax
-        mov  %ax,UREGS_error_code(%rsp)
-        mov  %ax,UREGS_entry_vector(%rsp)
-        mov  %ax,UREGS_saved_upcall_mask(%rsp)
-        mov  %ax,UREGS_cs(%rsp)
-        mov  %ax,UREGS_ds(%rsp)
-        mov  %ax,UREGS_es(%rsp)
-        mov  %ax,UREGS_fs(%rsp)
-        mov  %ax,UREGS_gs(%rsp)
-        mov  %ax,UREGS_ss(%rsp)
-#endif
-
         STGI
 .globl svm_stgi_label
 svm_stgi_label:
         mov  %rsp,%rdi
         call svm_vmexit_handler
-        jmp  svm_asm_do_resume
+        jmp  .Lsvm_do_resume
 
 .Lsvm_process_softirqs:
         STGI
         call do_softirq
-        jmp  svm_asm_do_resume
-
-.Lsvm_nsvm_no_p2m:
-        /* Someone shot down our nested p2m table; go round again
-         * and nsvm_vcpu_switch() will fix it for us. */
-        STGI
-        jmp  svm_asm_do_resume
+        jmp  .Lsvm_do_resume
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -2069,6 +2069,8 @@ void svm_vmexit_handler(struct cpu_user_
     vintr_t intr;
     bool_t vcpu_guestmode = 0;
 
+    hvm_invalidate_regs_fields(regs);
+
     if ( paging_mode_hap(v->domain) )
         v->arch.hvm_vcpu.guest_cr[3] = v->arch.hvm_vcpu.hw_cr[3] =
             vmcb_get_cr3(vmcb);
--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -39,11 +39,17 @@ void ret_from_intr(void);
         .subsection 1;            \
         .Lunlikely.tag:
 
-#define UNLIKELY_END(tag)         \
-        jmp .Llikely.tag;         \
+#define UNLIKELY_DONE(cond, tag)  \
+        j##cond .Llikely.tag
+
+#define __UNLIKELY_END(tag)       \
         .subsection 0;            \
         .Llikely.tag:
 
+#define UNLIKELY_END(tag)         \
+        UNLIKELY_DONE(mp, tag);   \
+        __UNLIKELY_END(tag)
+
 #define STACK_CPUINFO_FIELD(field) (STACK_SIZE-CPUINFO_sizeof+CPUINFO_##field)
 #define GET_STACK_BASE(reg)                       \
         movq $~(STACK_SIZE-1),reg;                \

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 2/4] VMX: move various uses of UD2 out of fast paths
  2013-08-23 14:02 ` [PATCH 2/4] VMX: move various uses of UD2 out of fast paths Jan Beulich
@ 2013-08-23 22:06   ` Andrew Cooper
  2013-08-26  8:50     ` Jan Beulich
  2013-08-26  8:58     ` [PATCH v2 " Jan Beulich
  0 siblings, 2 replies; 60+ messages in thread
From: Andrew Cooper @ 2013-08-23 22:06 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima


[-- Attachment #1.1: Type: text/plain, Size: 5230 bytes --]

On 23/08/2013 15:02, Jan Beulich wrote:
> ... at once making conditional forward jumps, which are statically
> predicted to be not taken, only used for the unlikely (error) cases.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

I presume it is intentional to create many different symbols for
different ud2 instructions right next to each other?  It isn't clear
from the commit message, but would certainly be the logical choice for
debugging an issue arising from such a bug.

As each of these ud2 instructions will never result in continued normal
execution, is it necessary to have the 'likely' tag and jump back?  I
cant see any non-buggy case where they would be executed, at which point
they are superfluous.

>
> --- a/xen/include/asm-x86/asm_defns.h
> +++ b/xen/include/asm-x86/asm_defns.h
> @@ -67,6 +67,30 @@ void ret_from_intr(void);
>  #define ASSERT_NOT_IN_ATOMIC
>  #endif
>  
> +#else
> +
> +#ifdef __clang__ /* clang's builtin assember can't do .subsection */
> +
> +#define UNLIKELY_START_SECTION ".pushsection .fixup,\"ax\""
> +#define UNLIKELY_END_SECTION   ".popsection"
> +
> +#else
> +
> +#define UNLIKELY_START_SECTION ".subsection 1"
> +#define UNLIKELY_END_SECTION   ".subsection 0"
> +
> +#endif
> +
> +#define UNLIKELY_START(cond, tag)          \
> +        "j" #cond " .Lunlikely%=.tag;\n\t" \
> +        UNLIKELY_START_SECTION "\n"        \
> +        ".Lunlikely%=.tag:"
> +
> +#define UNLIKELY_END(tag)                  \
> +        "jmp .Llikely%=.tag;\n\t"          \
> +        UNLIKELY_END_SECTION "\n"          \
> +        ".Llikely%=.tag:"
> +
>  #endif
>  
>  #endif /* __X86_ASM_DEFNS_H__ */
> --- a/xen/include/asm-x86/hvm/vmx/vmx.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmx.h
> @@ -285,7 +285,9 @@ static inline void __vmptrld(u64 addr)
>      asm volatile ( VMPTRLD_OPCODE
>                     MODRM_EAX_06
>                     /* CF==1 or ZF==1 --> crash (ud2) */
> -                   "ja 1f ; ud2 ; 1:\n"
> +                   UNLIKELY_START(be, vmptrld)
> +                   "\tud2\n"
> +                   UNLIKELY_END_SECTION
>                     :
>                     : "a" (&addr)
>                     : "memory");
> @@ -296,7 +298,9 @@ static inline void __vmpclear(u64 addr)
>      asm volatile ( VMCLEAR_OPCODE
>                     MODRM_EAX_06
>                     /* CF==1 or ZF==1 --> crash (ud2) */
> -                   "ja 1f ; ud2 ; 1:\n"
> +                   UNLIKELY_START(be, vmclear)
> +                   "\tud2\n"
> +                   UNLIKELY_END_SECTION
>                     :
>                     : "a" (&addr)
>                     : "memory");
> @@ -309,7 +313,9 @@ static inline unsigned long __vmread(uns
>      asm volatile ( VMREAD_OPCODE
>                     MODRM_EAX_ECX
>                     /* CF==1 or ZF==1 --> crash (ud2) */
> -                   "ja 1f ; ud2 ; 1:\n"
> +                   UNLIKELY_START(be, vmread)
> +                   "\tud2\n"
> +                   UNLIKELY_END_SECTION
>                     : "=c" (ecx)
>                     : "a" (field)
>                     : "memory");
> @@ -322,7 +328,9 @@ static inline void __vmwrite(unsigned lo
>      asm volatile ( VMWRITE_OPCODE
>                     MODRM_EAX_ECX
>                     /* CF==1 or ZF==1 --> crash (ud2) */
> -                   "ja 1f ; ud2 ; 1:\n"
> +                   UNLIKELY_START(be, vmwrite)
> +                   "\tud2\n"
> +                   UNLIKELY_END_SECTION
>                     : 
>                     : "a" (field) , "c" (value)
>                     : "memory");
> @@ -360,7 +368,9 @@ static inline void __invept(int type, u6
>      asm volatile ( INVEPT_OPCODE
>                     MODRM_EAX_08
>                     /* CF==1 or ZF==1 --> crash (ud2) */
> -                   "ja 1f ; ud2 ; 1:\n"
> +                   UNLIKELY_START(be, invept)
> +                   "\tud2\n"
> +                   UNLIKELY_END_SECTION
>                     :
>                     : "a" (&operand), "c" (type)
>                     : "memory" );
> @@ -377,7 +387,9 @@ static inline void __invvpid(int type, u
>      /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
>      asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
>                     /* CF==1 or ZF==1 --> crash (ud2) */
> -                   "ja 2f ; ud2 ; 2:\n"
> +                   "2: " UNLIKELY_START(be, invvpid)
> +                   "\tud2\n"
> +                   UNLIKELY_END_SECTION "\n"
>                     _ASM_EXTABLE(1b, 2b)

Is this logic still correct?

To my reading, the logic used to say "if there was an exception while
executing INVVPID, then jump to 2: to fix up." where 2: happens to be
the end of the inline assembly, bypassing the ud2.

After the change, the jbe (formed from UNLIKELY_START) is part of the
fixup section, which possibly includes the ud2.  As the state of the
flags while executing the INVVPID as unknown, this leaves an unknown
chance of "fixing up" to a ud2 instruction.

~Andrew

>                     :
>                     : "a" (&operand), "c" (type)
>
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel


[-- Attachment #1.2: Type: text/html, Size: 6177 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-23 14:03 ` [PATCH 3/4] VMX: use proper instruction mnemonics if assembler supports them Jan Beulich
@ 2013-08-24 22:18   ` Andrew Cooper
  2013-08-26  9:06     ` Jan Beulich
  2013-08-26  9:03   ` [PATCH v2 " Jan Beulich
  1 sibling, 1 reply; 60+ messages in thread
From: Andrew Cooper @ 2013-08-24 22:18 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima


[-- Attachment #1.1: Type: text/plain, Size: 11040 bytes --]

On 23/08/2013 15:03, Jan Beulich wrote:
> With the hex byte emission we were taking away a good part of
> flexibility from the compiler, as for simplicity reasons these were
> built using fixed operands. All half way modern build environments
> would allow using the mnemonics (but we can't disable the hex variants
> yet, since the binutils around at the time gcc 4.1 got released didn't
> support these yet).
>
> I didn't convert __vmread() yet because that would, just like for
> __vmread_safe(), imply converting to a macro so that the output operand
> can be the caller supplied variable rather than an intermediate one. As
> that would require touching all invocation points of __vmread() (of
> which there are quite a few), I'd first like to be certain the approach
> is acceptable; the main question being whether the now conditional code
> might be considered to cause future maintenance issues, and the second
> being that of parameter/argument ordering (here I made __vmread_safe()
> match __vmwrite(), but one could also take the position that read and
> write should use the inverse order of one another, in line with the
> actual instruction operands).
>
> Additionally I was quite puzzled to find that all the asm()-s involved
> here have memory clobbers - what are they needed for? Or can they be
> dropped at least in some cases?
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

I certainly like and encourage of the idea of getting rid of the fixed
registers due to the hex literals.  However, are macros really the
correct solution?

__vmread_safe(field, value) now has non-function like semantics for value.

To me, the more obvious (and as far as I can tell, functionally
identical) approach would be for

unsigned long always_inline __vmread_safe(unsigned long field, unsigned
long *value)

The compiler, given the correct constraints for value could resolve the
pointer back to the caller as it inlined the function, and calls would
look like "__vmread_safe(field, &value);" which looks rather more natural.

As for the memory clobbers, I see no indication from the VMX instruction
reference that vmread or inv{ept,vpid} have relevant side effects
requiring a clobber.  For the load functions and vmwrite, there is a
possibilty of a memory clobber if someone has peaked into the relevant
VMCS page(s).  However, doing so defeats the purpose of vmread/write, so
as far as I can tell, we could argue away the clobbers on those grounds
alone.

~Andrew

>
> --- a/Config.mk
> +++ b/Config.mk
> @@ -4,6 +4,12 @@ ifeq ($(filter /%,$(XEN_ROOT)),)
>  $(error XEN_ROOT must be absolute)
>  endif
>  
> +# Convenient variables
> +comma   := ,
> +squote  := '
> +empty   :=
> +space   := $(empty) $(empty)
> +
>  # fallback for older make
>  realpath = $(wildcard $(foreach file,$(1),$(shell cd -P $(dir $(file)) && echo "$$PWD/$(notdir $(file))")))
>  
> @@ -128,6 +134,21 @@ endef
>  check-$(gcc) = $(call cc-ver-check,CC,0x040100,"Xen requires at least gcc-4.1")
>  $(eval $(check-y))
>  
> +# as-insn: Check whether assembler supports an instruction.
> +# Usage: cflags-y += $(call as-insn "insn",option-yes,option-no)
> +as-insn = $(if $(shell echo 'void _(void) { asm volatile ( $(2) ); }' \
> +                       | $(1) -c -x c -o /dev/null - 2>&1),$(4),$(3))
> +
> +# as-insn-check: Add an option to compilation flags, but only if insn is
> +#                supported by assembler.
> +# Usage: $(call as-insn-check CFLAGS,CC,"nop",-DHAVE_GAS_NOP)
> +as-insn-check = $(eval $(call as-insn-check-closure,$(1),$(2),$(3),$(4)))
> +define as-insn-check-closure
> +    ifeq ($$(call as-insn,$$($(2)),$(3),y,n),y)
> +        $(1) += $(4)
> +    endif
> +endef
> +
>  define buildmakevars2shellvars
>      export PREFIX="$(PREFIX)";                                            \
>      export XEN_SCRIPT_DIR="$(XEN_SCRIPT_DIR)";                            \
> --- a/xen/arch/x86/Rules.mk
> +++ b/xen/arch/x86/Rules.mk
> @@ -28,6 +28,8 @@ CFLAGS += -msoft-float
>  
>  $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
>  $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
> +$(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
> +$(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
>  
>  ifeq ($(supervisor_mode_kernel),y)
>  CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -1292,12 +1292,11 @@ void vmx_do_resume(struct vcpu *v)
>      reset_stack_and_jump(vmx_asm_do_vmentry);
>  }
>  
> -static unsigned long vmr(unsigned long field)
> +static inline unsigned long vmr(unsigned long field)
>  {
> -    int rc;
>      unsigned long val;
> -    val = __vmread_safe(field, &rc);
> -    return rc ? 0 : val;
> +
> +    return __vmread_safe(field, val) ? val : 0;
>  }
>  
>  static void vmx_dump_sel(char *name, uint32_t selector)
> --- a/xen/arch/x86/hvm/vmx/vvmx.c
> +++ b/xen/arch/x86/hvm/vmx/vvmx.c
> @@ -951,13 +951,11 @@ fallback:
>          vvmcs_to_shadow(vvmcs, field[i]);
>  }
>  
> -static void shadow_to_vvmcs(void *vvmcs, unsigned int field)
> +static inline void shadow_to_vvmcs(void *vvmcs, unsigned int field)
>  {
>      u64 value;
> -    int rc;
>  
> -    value = __vmread_safe(field, &rc);
> -    if ( !rc )
> +    if ( __vmread_safe(field, value) )
>          __set_vvmcs(vvmcs, field, value);
>  }
>  
> --- a/xen/include/asm-x86/hvm/vmx/vmx.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmx.h
> @@ -282,27 +282,44 @@ extern uint8_t posted_intr_vector;
>  
>  static inline void __vmptrld(u64 addr)
>  {
> -    asm volatile ( VMPTRLD_OPCODE
> -                   MODRM_EAX_06
> +    asm volatile (
> +#ifdef HAVE_GAS_VMX
> +                   "vmptrld %0\n"
> +#else
> +                   VMPTRLD_OPCODE MODRM_EAX_06
> +#endif
>                     /* CF==1 or ZF==1 --> crash (ud2) */
>                     UNLIKELY_START(be, vmptrld)
>                     "\tud2\n"
>                     UNLIKELY_END_SECTION
>                     :
> +#ifdef HAVE_GAS_VMX
> +                   : "m" (addr)
> +#else
>                     : "a" (&addr)
> +#endif
>                     : "memory");
>  }
>  
>  static inline void __vmpclear(u64 addr)
>  {
> -    asm volatile ( VMCLEAR_OPCODE
> -                   MODRM_EAX_06
> +    asm volatile (
> +#ifdef HAVE_GAS_VMX
> +                   "vmclear %0\n"
> +#else
> +                   VMCLEAR_OPCODE MODRM_EAX_06
> +#endif
>                     /* CF==1 or ZF==1 --> crash (ud2) */
>                     UNLIKELY_START(be, vmclear)
>                     "\tud2\n"
>                     UNLIKELY_END_SECTION
> +#ifdef HAVE_GAS_VMX
> +                   : "+m" (addr)
> +                   :
> +#else
>                     :
>                     : "a" (&addr)
> +#endif
>                     : "memory");
>  }
>  
> @@ -325,31 +342,58 @@ static inline unsigned long __vmread(uns
>  
>  static inline void __vmwrite(unsigned long field, unsigned long value)
>  {
> -    asm volatile ( VMWRITE_OPCODE
> -                   MODRM_EAX_ECX
> +    asm volatile (
> +#ifdef HAVE_GAS_VMX
> +                   "vmwrite %1, %0\n"
> +#else
> +                   VMWRITE_OPCODE MODRM_EAX_ECX
> +#endif
>                     /* CF==1 or ZF==1 --> crash (ud2) */
>                     UNLIKELY_START(be, vmwrite)
>                     "\tud2\n"
>                     UNLIKELY_END_SECTION
>                     : 
> +#ifdef HAVE_GAS_VMX
> +                   : "r" (field) , "rm" (value)
> +#else
>                     : "a" (field) , "c" (value)
> +#endif
>                     : "memory");
>  }
>  
> -static inline unsigned long __vmread_safe(unsigned long field, int *error)
> -{
> -    unsigned long ecx;
> -
> -    asm volatile ( VMREAD_OPCODE
> -                   MODRM_EAX_ECX
> -                   /* CF==1 or ZF==1 --> rc = -1 */
> -                   "setna %b0 ; neg %0"
> -                   : "=q" (*error), "=c" (ecx)
> -                   : "0" (0), "a" (field)
> -                   : "memory");
> -
> -    return ecx;
> -}
> +#ifdef HAVE_GAS_VMX
> +#define __vmread_safe(field, value) ({ \
> +    bool_t okay_; \
> +    if ( sizeof(value) == sizeof(long) ) \
> +        asm volatile ( "vmread %q2, %1\n\t" \
> +                       /* CF==1 or ZF==1 --> rc = 0 */ \
> +                       "setnbe %0" \
> +                       : "=qm" (okay_), "=rm" (value) \
> +                       : "r" (field) \
> +                       : "memory"); \
> +    else \
> +    { \
> +        asm volatile ( "vmread %q2, %q1\n\t" \
> +                       /* CF==1 or ZF==1 --> rc = 0 */ \
> +                       "setnbe %0" \
> +                       : "=qm" (okay_), "=r" (value) \
> +                       : "r" (field) \
> +                       : "memory"); \
> +    } \
> +    okay_; \
> +})
> +#else
> +#define __vmread_safe(field, value) ({ \
> +    bool_t okay_; \
> +    asm volatile ( VMREAD_OPCODE MODRM_EAX_ECX \
> +                   /* CF==1 or ZF==1 --> rc = 0 */ \
> +                   "setnbe %0" \
> +                   : "=qm" (okay_), "=c" (value) \
> +                   : "a" (field) \
> +                   : "memory"); \
> +    okay_; \
> +})
> +#endif
>  
>  static inline void __invept(int type, u64 eptp, u64 gpa)
>  {
> @@ -365,14 +409,22 @@ static inline void __invept(int type, u6
>           !cpu_has_vmx_ept_invept_single_context )
>          type = INVEPT_ALL_CONTEXT;
>  
> -    asm volatile ( INVEPT_OPCODE
> -                   MODRM_EAX_08
> +    asm volatile (
> +#ifdef HAVE_GAS_EPT
> +                   "invept %0, %q1\n"
> +#else
> +                   INVEPT_OPCODE MODRM_EAX_08
> +#endif
>                     /* CF==1 or ZF==1 --> crash (ud2) */
>                     UNLIKELY_START(be, invept)
>                     "\tud2\n"
>                     UNLIKELY_END_SECTION
>                     :
> +#ifdef HAVE_GAS_EPT
> +                   : "m" (operand), "r" (type)
> +#else
>                     : "a" (&operand), "c" (type)
> +#endif
>                     : "memory" );
>  }
>  
> @@ -385,14 +437,23 @@ static inline void __invvpid(int type, u
>      } __attribute__ ((packed)) operand = {vpid, 0, gva};
>  
>      /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
> -    asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
> +    asm volatile ( "1: "
> +#ifdef HAVE_GAS_EPT
> +                   "invvpid %0, %q1\n"
> +#else
> +                   INVVPID_OPCODE MODRM_EAX_08
> +#endif
>                     /* CF==1 or ZF==1 --> crash (ud2) */
>                     "2: " UNLIKELY_START(be, invvpid)
>                     "\tud2\n"
>                     UNLIKELY_END_SECTION "\n"
>                     _ASM_EXTABLE(1b, 2b)
>                     :
> +#ifdef HAVE_GAS_EPT
> +                   : "m" (operand), "r" (type)
> +#else
>                     : "a" (&operand), "c" (type)
> +#endif
>                     : "memory" );
>  }
>  
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel


[-- Attachment #1.2: Type: text/html, Size: 11551 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 2/4] VMX: move various uses of UD2 out of fast paths
  2013-08-23 22:06   ` Andrew Cooper
@ 2013-08-26  8:50     ` Jan Beulich
  2013-08-26  9:07       ` Andrew Cooper
  2013-08-26  8:58     ` [PATCH v2 " Jan Beulich
  1 sibling, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2013-08-26  8:50 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

>>> On 24.08.13 at 00:06, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> On 23/08/2013 15:02, Jan Beulich wrote:
>> ... at once making conditional forward jumps, which are statically
>> predicted to be not taken, only used for the unlikely (error) cases.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> I presume it is intentional to create many different symbols for
> different ud2 instructions right next to each other?  It isn't clear
> from the commit message, but would certainly be the logical choice for
> debugging an issue arising from such a bug.

These are labels that don't make it into the object file's symbol
table anyway, so their names don't matter (but need to all be
distinct to avoid duplicate names at assembly time).

The addresses were chosen to be all different (rather than
having a single global UD2 that all of the failure paths jump
to) in order to be able to re-associate an eventual crash with
its origin. As that isn't different from how the code behaved
before, I don't see a need for mentioning this in the description.

> As each of these ud2 instructions will never result in continued normal
> execution, is it necessary to have the 'likely' tag and jump back?  I
> cant see any non-buggy case where they would be executed, at which point
> they are superfluous.

Which "likely" tag are you talking about? They all use
UNLIKELY_END_SECTION rather than UNLIKELY_END, so there's
no extra code being generated afaict.

>> @@ -377,7 +387,9 @@ static inline void __invvpid(int type, u
>>      /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
>>      asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
>>                     /* CF==1 or ZF==1 --> crash (ud2) */
>> -                   "ja 2f ; ud2 ; 2:\n"
>> +                   "2: " UNLIKELY_START(be, invvpid)
>> +                   "\tud2\n"
>> +                   UNLIKELY_END_SECTION "\n"
>>                     _ASM_EXTABLE(1b, 2b)
> 
> Is this logic still correct?
> 
> To my reading, the logic used to say "if there was an exception while
> executing INVVPID, then jump to 2: to fix up." where 2: happens to be
> the end of the inline assembly, bypassing the ud2.
> 
> After the change, the jbe (formed from UNLIKELY_START) is part of the
> fixup section, which possibly includes the ud2.  As the state of the
> flags while executing the INVVPID as unknown, this leaves an unknown
> chance of "fixing up" to a ud2 instruction.

Indeed, I screwed this up. Hence there'll be a v2 of this patch
shortly. Thanks for spotting!

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH v2 2/4] VMX: move various uses of UD2 out of fast paths
  2013-08-23 22:06   ` Andrew Cooper
  2013-08-26  8:50     ` Jan Beulich
@ 2013-08-26  8:58     ` Jan Beulich
  2013-08-26  9:09       ` Andrew Cooper
  2013-08-29 11:08       ` Tim Deegan
  1 sibling, 2 replies; 60+ messages in thread
From: Jan Beulich @ 2013-08-26  8:58 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser, Eddie Dong, Jun Nakajima

[-- Attachment #1: Type: text/plain, Size: 4084 bytes --]

... at once making conditional forward jumps, which are statically
predicted to be not taken, only used for the unlikely (error) cases.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Fix #UD recovery for INVVPID (unintended flow change spotted by Andrew Cooper).

--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -67,6 +67,30 @@ void ret_from_intr(void);
 #define ASSERT_NOT_IN_ATOMIC
 #endif
 
+#else
+
+#ifdef __clang__ /* clang's builtin assember can't do .subsection */
+
+#define UNLIKELY_START_SECTION ".pushsection .fixup,\"ax\""
+#define UNLIKELY_END_SECTION   ".popsection"
+
+#else
+
+#define UNLIKELY_START_SECTION ".subsection 1"
+#define UNLIKELY_END_SECTION   ".subsection 0"
+
+#endif
+
+#define UNLIKELY_START(cond, tag)          \
+        "j" #cond " .Lunlikely%=.tag;\n\t" \
+        UNLIKELY_START_SECTION "\n"        \
+        ".Lunlikely%=.tag:"
+
+#define UNLIKELY_END(tag)                  \
+        "jmp .Llikely%=.tag;\n\t"          \
+        UNLIKELY_END_SECTION "\n"          \
+        ".Llikely%=.tag:"
+
 #endif
 
 #endif /* __X86_ASM_DEFNS_H__ */
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -285,7 +285,9 @@ static inline void __vmptrld(u64 addr)
     asm volatile ( VMPTRLD_OPCODE
                    MODRM_EAX_06
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 1f ; ud2 ; 1:\n"
+                   UNLIKELY_START(be, vmptrld)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION
                    :
                    : "a" (&addr)
                    : "memory");
@@ -296,7 +298,9 @@ static inline void __vmpclear(u64 addr)
     asm volatile ( VMCLEAR_OPCODE
                    MODRM_EAX_06
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 1f ; ud2 ; 1:\n"
+                   UNLIKELY_START(be, vmclear)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION
                    :
                    : "a" (&addr)
                    : "memory");
@@ -309,7 +313,9 @@ static inline unsigned long __vmread(uns
     asm volatile ( VMREAD_OPCODE
                    MODRM_EAX_ECX
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 1f ; ud2 ; 1:\n"
+                   UNLIKELY_START(be, vmread)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION
                    : "=c" (ecx)
                    : "a" (field)
                    : "memory");
@@ -322,7 +328,9 @@ static inline void __vmwrite(unsigned lo
     asm volatile ( VMWRITE_OPCODE
                    MODRM_EAX_ECX
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 1f ; ud2 ; 1:\n"
+                   UNLIKELY_START(be, vmwrite)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION
                    : 
                    : "a" (field) , "c" (value)
                    : "memory");
@@ -360,7 +368,9 @@ static inline void __invept(int type, u6
     asm volatile ( INVEPT_OPCODE
                    MODRM_EAX_08
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 1f ; ud2 ; 1:\n"
+                   UNLIKELY_START(be, invept)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION
                    :
                    : "a" (&operand), "c" (type)
                    : "memory" );
@@ -377,7 +387,10 @@ static inline void __invvpid(int type, u
     /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
     asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 2f ; ud2 ; 2:\n"
+                   UNLIKELY_START(be, invvpid)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION "\n"
+                   "2:"
                    _ASM_EXTABLE(1b, 2b)
                    :
                    : "a" (&operand), "c" (type)




[-- Attachment #2: VMX-ud2-out-of-line.patch --]
[-- Type: text/plain, Size: 4129 bytes --]

VMX: move various uses of UD2 out of fast paths

... at once making conditional forward jumps, which are statically
predicted to be not taken, only used for the unlikely (error) cases.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Fix #UD recovery for INVVPID (unintended flow change spotted by Andrew Cooper).

--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -67,6 +67,30 @@ void ret_from_intr(void);
 #define ASSERT_NOT_IN_ATOMIC
 #endif
 
+#else
+
+#ifdef __clang__ /* clang's builtin assember can't do .subsection */
+
+#define UNLIKELY_START_SECTION ".pushsection .fixup,\"ax\""
+#define UNLIKELY_END_SECTION   ".popsection"
+
+#else
+
+#define UNLIKELY_START_SECTION ".subsection 1"
+#define UNLIKELY_END_SECTION   ".subsection 0"
+
+#endif
+
+#define UNLIKELY_START(cond, tag)          \
+        "j" #cond " .Lunlikely%=.tag;\n\t" \
+        UNLIKELY_START_SECTION "\n"        \
+        ".Lunlikely%=.tag:"
+
+#define UNLIKELY_END(tag)                  \
+        "jmp .Llikely%=.tag;\n\t"          \
+        UNLIKELY_END_SECTION "\n"          \
+        ".Llikely%=.tag:"
+
 #endif
 
 #endif /* __X86_ASM_DEFNS_H__ */
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -285,7 +285,9 @@ static inline void __vmptrld(u64 addr)
     asm volatile ( VMPTRLD_OPCODE
                    MODRM_EAX_06
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 1f ; ud2 ; 1:\n"
+                   UNLIKELY_START(be, vmptrld)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION
                    :
                    : "a" (&addr)
                    : "memory");
@@ -296,7 +298,9 @@ static inline void __vmpclear(u64 addr)
     asm volatile ( VMCLEAR_OPCODE
                    MODRM_EAX_06
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 1f ; ud2 ; 1:\n"
+                   UNLIKELY_START(be, vmclear)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION
                    :
                    : "a" (&addr)
                    : "memory");
@@ -309,7 +313,9 @@ static inline unsigned long __vmread(uns
     asm volatile ( VMREAD_OPCODE
                    MODRM_EAX_ECX
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 1f ; ud2 ; 1:\n"
+                   UNLIKELY_START(be, vmread)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION
                    : "=c" (ecx)
                    : "a" (field)
                    : "memory");
@@ -322,7 +328,9 @@ static inline void __vmwrite(unsigned lo
     asm volatile ( VMWRITE_OPCODE
                    MODRM_EAX_ECX
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 1f ; ud2 ; 1:\n"
+                   UNLIKELY_START(be, vmwrite)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION
                    : 
                    : "a" (field) , "c" (value)
                    : "memory");
@@ -360,7 +368,9 @@ static inline void __invept(int type, u6
     asm volatile ( INVEPT_OPCODE
                    MODRM_EAX_08
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 1f ; ud2 ; 1:\n"
+                   UNLIKELY_START(be, invept)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION
                    :
                    : "a" (&operand), "c" (type)
                    : "memory" );
@@ -377,7 +387,10 @@ static inline void __invvpid(int type, u
     /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
     asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
                    /* CF==1 or ZF==1 --> crash (ud2) */
-                   "ja 2f ; ud2 ; 2:\n"
+                   UNLIKELY_START(be, invvpid)
+                   "\tud2\n"
+                   UNLIKELY_END_SECTION "\n"
+                   "2:"
                    _ASM_EXTABLE(1b, 2b)
                    :
                    : "a" (&operand), "c" (type)

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH v2 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-23 14:03 ` [PATCH 3/4] VMX: use proper instruction mnemonics if assembler supports them Jan Beulich
  2013-08-24 22:18   ` Andrew Cooper
@ 2013-08-26  9:03   ` Jan Beulich
  1 sibling, 0 replies; 60+ messages in thread
From: Jan Beulich @ 2013-08-26  9:03 UTC (permalink / raw)
  To: xen-devel; +Cc: Keir Fraser, Eddie Dong, Jun Nakajima

[-- Attachment #1: Type: text/plain, Size: 9600 bytes --]

With the hex byte emission we were taking away a good part of
flexibility from the compiler, as for simplicity reasons these were
built using fixed operands. All half way modern build environments
would allow using the mnemonics (but we can't disable the hex variants
yet, since the binutils around at the time gcc 4.1 got released didn't
support these yet).

I didn't convert __vmread() yet because that would, just like for
__vmread_safe(), imply converting to a macro so that the output operand
can be the caller supplied variable rather than an intermediate one. As
that would require touching all invocation points of __vmread() (of
which there are quite a few), I'd first like to be certain the approach
is acceptable; the main question being whether the now conditional code
might be considered to cause future maintenance issues, and the second
being that of parameter/argument ordering (here I made __vmread_safe()
match __vmwrite(), but one could also take the position that read and
write should use the inverse order of one another, in line with the
actual instruction operands).

Additionally I was quite puzzled to find that all the asm()-s involved
here have memory clobbers - what are they needed for? Or can they be
dropped at least in some cases?

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Fix up context after changed patch 2 of this series.

--- a/Config.mk
+++ b/Config.mk
@@ -4,6 +4,12 @@ ifeq ($(filter /%,$(XEN_ROOT)),)
 $(error XEN_ROOT must be absolute)
 endif
 
+# Convenient variables
+comma   := ,
+squote  := '
+empty   :=
+space   := $(empty) $(empty)
+
 # fallback for older make
 realpath = $(wildcard $(foreach file,$(1),$(shell cd -P $(dir $(file)) && echo "$$PWD/$(notdir $(file))")))
 
@@ -128,6 +134,21 @@ endef
 check-$(gcc) = $(call cc-ver-check,CC,0x040100,"Xen requires at least gcc-4.1")
 $(eval $(check-y))
 
+# as-insn: Check whether assembler supports an instruction.
+# Usage: cflags-y += $(call as-insn "insn",option-yes,option-no)
+as-insn = $(if $(shell echo 'void _(void) { asm volatile ( $(2) ); }' \
+                       | $(1) -c -x c -o /dev/null - 2>&1),$(4),$(3))
+
+# as-insn-check: Add an option to compilation flags, but only if insn is
+#                supported by assembler.
+# Usage: $(call as-insn-check CFLAGS,CC,"nop",-DHAVE_GAS_NOP)
+as-insn-check = $(eval $(call as-insn-check-closure,$(1),$(2),$(3),$(4)))
+define as-insn-check-closure
+    ifeq ($$(call as-insn,$$($(2)),$(3),y,n),y)
+        $(1) += $(4)
+    endif
+endef
+
 define buildmakevars2shellvars
     export PREFIX="$(PREFIX)";                                            \
     export XEN_SCRIPT_DIR="$(XEN_SCRIPT_DIR)";                            \
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -28,6 +28,8 @@ CFLAGS += -msoft-float
 
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
+$(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
+$(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
 
 ifeq ($(supervisor_mode_kernel),y)
 CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1292,12 +1292,11 @@ void vmx_do_resume(struct vcpu *v)
     reset_stack_and_jump(vmx_asm_do_vmentry);
 }
 
-static unsigned long vmr(unsigned long field)
+static inline unsigned long vmr(unsigned long field)
 {
-    int rc;
     unsigned long val;
-    val = __vmread_safe(field, &rc);
-    return rc ? 0 : val;
+
+    return __vmread_safe(field, val) ? val : 0;
 }
 
 static void vmx_dump_sel(char *name, uint32_t selector)
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -951,13 +951,11 @@ fallback:
         vvmcs_to_shadow(vvmcs, field[i]);
 }
 
-static void shadow_to_vvmcs(void *vvmcs, unsigned int field)
+static inline void shadow_to_vvmcs(void *vvmcs, unsigned int field)
 {
     u64 value;
-    int rc;
 
-    value = __vmread_safe(field, &rc);
-    if ( !rc )
+    if ( __vmread_safe(field, value) )
         __set_vvmcs(vvmcs, field, value);
 }
 
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -282,27 +282,44 @@ extern uint8_t posted_intr_vector;
 
 static inline void __vmptrld(u64 addr)
 {
-    asm volatile ( VMPTRLD_OPCODE
-                   MODRM_EAX_06
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmptrld %0\n"
+#else
+                   VMPTRLD_OPCODE MODRM_EAX_06
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmptrld)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_VMX
+                   : "m" (addr)
+#else
                    : "a" (&addr)
+#endif
                    : "memory");
 }
 
 static inline void __vmpclear(u64 addr)
 {
-    asm volatile ( VMCLEAR_OPCODE
-                   MODRM_EAX_06
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmclear %0\n"
+#else
+                   VMCLEAR_OPCODE MODRM_EAX_06
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmclear)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
+#ifdef HAVE_GAS_VMX
+                   : "+m" (addr)
+                   :
+#else
                    :
                    : "a" (&addr)
+#endif
                    : "memory");
 }
 
@@ -325,31 +342,58 @@ static inline unsigned long __vmread(uns
 
 static inline void __vmwrite(unsigned long field, unsigned long value)
 {
-    asm volatile ( VMWRITE_OPCODE
-                   MODRM_EAX_ECX
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmwrite %1, %0\n"
+#else
+                   VMWRITE_OPCODE MODRM_EAX_ECX
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmwrite)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    : 
+#ifdef HAVE_GAS_VMX
+                   : "r" (field) , "rm" (value)
+#else
                    : "a" (field) , "c" (value)
+#endif
                    : "memory");
 }
 
-static inline unsigned long __vmread_safe(unsigned long field, int *error)
-{
-    unsigned long ecx;
-
-    asm volatile ( VMREAD_OPCODE
-                   MODRM_EAX_ECX
-                   /* CF==1 or ZF==1 --> rc = -1 */
-                   "setna %b0 ; neg %0"
-                   : "=q" (*error), "=c" (ecx)
-                   : "0" (0), "a" (field)
-                   : "memory");
-
-    return ecx;
-}
+#ifdef HAVE_GAS_VMX
+#define __vmread_safe(field, value) ({ \
+    bool_t okay_; \
+    if ( sizeof(value) == sizeof(long) ) \
+        asm volatile ( "vmread %q2, %1\n\t" \
+                       /* CF==1 or ZF==1 --> rc = 0 */ \
+                       "setnbe %0" \
+                       : "=qm" (okay_), "=rm" (value) \
+                       : "r" (field) \
+                       : "memory"); \
+    else \
+    { \
+        asm volatile ( "vmread %q2, %q1\n\t" \
+                       /* CF==1 or ZF==1 --> rc = 0 */ \
+                       "setnbe %0" \
+                       : "=qm" (okay_), "=r" (value) \
+                       : "r" (field) \
+                       : "memory"); \
+    } \
+    okay_; \
+})
+#else
+#define __vmread_safe(field, value) ({ \
+    bool_t okay_; \
+    asm volatile ( VMREAD_OPCODE MODRM_EAX_ECX \
+                   /* CF==1 or ZF==1 --> rc = 0 */ \
+                   "setnbe %0" \
+                   : "=qm" (okay_), "=c" (value) \
+                   : "a" (field) \
+                   : "memory"); \
+    okay_; \
+})
+#endif
 
 static inline void __invept(int type, u64 eptp, u64 gpa)
 {
@@ -365,14 +409,22 @@ static inline void __invept(int type, u6
          !cpu_has_vmx_ept_invept_single_context )
         type = INVEPT_ALL_CONTEXT;
 
-    asm volatile ( INVEPT_OPCODE
-                   MODRM_EAX_08
+    asm volatile (
+#ifdef HAVE_GAS_EPT
+                   "invept %0, %q1\n"
+#else
+                   INVEPT_OPCODE MODRM_EAX_08
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, invept)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_EPT
+                   : "m" (operand), "r" (type)
+#else
                    : "a" (&operand), "c" (type)
+#endif
                    : "memory" );
 }
 
@@ -385,7 +437,12 @@ static inline void __invvpid(int type, u
     } __attribute__ ((packed)) operand = {vpid, 0, gva};
 
     /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
-    asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
+    asm volatile ( "1: "
+#ifdef HAVE_GAS_EPT
+                   "invvpid %0, %q1\n"
+#else
+                   INVVPID_OPCODE MODRM_EAX_08
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, invvpid)
                    "\tud2\n"
@@ -393,7 +450,11 @@ static inline void __invvpid(int type, u
                    "2:"
                    _ASM_EXTABLE(1b, 2b)
                    :
+#ifdef HAVE_GAS_EPT
+                   : "m" (operand), "r" (type)
+#else
                    : "a" (&operand), "c" (type)
+#endif
                    : "memory" );
 }
 



[-- Attachment #2: VMX-use-mnemonics.patch --]
[-- Type: text/plain, Size: 9664 bytes --]

VMX: use proper instruction mnemonics if assembler supports them

With the hex byte emission we were taking away a good part of
flexibility from the compiler, as for simplicity reasons these were
built using fixed operands. All half way modern build environments
would allow using the mnemonics (but we can't disable the hex variants
yet, since the binutils around at the time gcc 4.1 got released didn't
support these yet).

I didn't convert __vmread() yet because that would, just like for
__vmread_safe(), imply converting to a macro so that the output operand
can be the caller supplied variable rather than an intermediate one. As
that would require touching all invocation points of __vmread() (of
which there are quite a few), I'd first like to be certain the approach
is acceptable; the main question being whether the now conditional code
might be considered to cause future maintenance issues, and the second
being that of parameter/argument ordering (here I made __vmread_safe()
match __vmwrite(), but one could also take the position that read and
write should use the inverse order of one another, in line with the
actual instruction operands).

Additionally I was quite puzzled to find that all the asm()-s involved
here have memory clobbers - what are they needed for? Or can they be
dropped at least in some cases?

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Fix up context after changed patch 2 of this series.

--- a/Config.mk
+++ b/Config.mk
@@ -4,6 +4,12 @@ ifeq ($(filter /%,$(XEN_ROOT)),)
 $(error XEN_ROOT must be absolute)
 endif
 
+# Convenient variables
+comma   := ,
+squote  := '
+empty   :=
+space   := $(empty) $(empty)
+
 # fallback for older make
 realpath = $(wildcard $(foreach file,$(1),$(shell cd -P $(dir $(file)) && echo "$$PWD/$(notdir $(file))")))
 
@@ -128,6 +134,21 @@ endef
 check-$(gcc) = $(call cc-ver-check,CC,0x040100,"Xen requires at least gcc-4.1")
 $(eval $(check-y))
 
+# as-insn: Check whether assembler supports an instruction.
+# Usage: cflags-y += $(call as-insn "insn",option-yes,option-no)
+as-insn = $(if $(shell echo 'void _(void) { asm volatile ( $(2) ); }' \
+                       | $(1) -c -x c -o /dev/null - 2>&1),$(4),$(3))
+
+# as-insn-check: Add an option to compilation flags, but only if insn is
+#                supported by assembler.
+# Usage: $(call as-insn-check CFLAGS,CC,"nop",-DHAVE_GAS_NOP)
+as-insn-check = $(eval $(call as-insn-check-closure,$(1),$(2),$(3),$(4)))
+define as-insn-check-closure
+    ifeq ($$(call as-insn,$$($(2)),$(3),y,n),y)
+        $(1) += $(4)
+    endif
+endef
+
 define buildmakevars2shellvars
     export PREFIX="$(PREFIX)";                                            \
     export XEN_SCRIPT_DIR="$(XEN_SCRIPT_DIR)";                            \
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -28,6 +28,8 @@ CFLAGS += -msoft-float
 
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
+$(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
+$(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
 
 ifeq ($(supervisor_mode_kernel),y)
 CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1292,12 +1292,11 @@ void vmx_do_resume(struct vcpu *v)
     reset_stack_and_jump(vmx_asm_do_vmentry);
 }
 
-static unsigned long vmr(unsigned long field)
+static inline unsigned long vmr(unsigned long field)
 {
-    int rc;
     unsigned long val;
-    val = __vmread_safe(field, &rc);
-    return rc ? 0 : val;
+
+    return __vmread_safe(field, val) ? val : 0;
 }
 
 static void vmx_dump_sel(char *name, uint32_t selector)
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -951,13 +951,11 @@ fallback:
         vvmcs_to_shadow(vvmcs, field[i]);
 }
 
-static void shadow_to_vvmcs(void *vvmcs, unsigned int field)
+static inline void shadow_to_vvmcs(void *vvmcs, unsigned int field)
 {
     u64 value;
-    int rc;
 
-    value = __vmread_safe(field, &rc);
-    if ( !rc )
+    if ( __vmread_safe(field, value) )
         __set_vvmcs(vvmcs, field, value);
 }
 
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -282,27 +282,44 @@ extern uint8_t posted_intr_vector;
 
 static inline void __vmptrld(u64 addr)
 {
-    asm volatile ( VMPTRLD_OPCODE
-                   MODRM_EAX_06
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmptrld %0\n"
+#else
+                   VMPTRLD_OPCODE MODRM_EAX_06
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmptrld)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_VMX
+                   : "m" (addr)
+#else
                    : "a" (&addr)
+#endif
                    : "memory");
 }
 
 static inline void __vmpclear(u64 addr)
 {
-    asm volatile ( VMCLEAR_OPCODE
-                   MODRM_EAX_06
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmclear %0\n"
+#else
+                   VMCLEAR_OPCODE MODRM_EAX_06
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmclear)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
+#ifdef HAVE_GAS_VMX
+                   : "+m" (addr)
+                   :
+#else
                    :
                    : "a" (&addr)
+#endif
                    : "memory");
 }
 
@@ -325,31 +342,58 @@ static inline unsigned long __vmread(uns
 
 static inline void __vmwrite(unsigned long field, unsigned long value)
 {
-    asm volatile ( VMWRITE_OPCODE
-                   MODRM_EAX_ECX
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmwrite %1, %0\n"
+#else
+                   VMWRITE_OPCODE MODRM_EAX_ECX
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmwrite)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    : 
+#ifdef HAVE_GAS_VMX
+                   : "r" (field) , "rm" (value)
+#else
                    : "a" (field) , "c" (value)
+#endif
                    : "memory");
 }
 
-static inline unsigned long __vmread_safe(unsigned long field, int *error)
-{
-    unsigned long ecx;
-
-    asm volatile ( VMREAD_OPCODE
-                   MODRM_EAX_ECX
-                   /* CF==1 or ZF==1 --> rc = -1 */
-                   "setna %b0 ; neg %0"
-                   : "=q" (*error), "=c" (ecx)
-                   : "0" (0), "a" (field)
-                   : "memory");
-
-    return ecx;
-}
+#ifdef HAVE_GAS_VMX
+#define __vmread_safe(field, value) ({ \
+    bool_t okay_; \
+    if ( sizeof(value) == sizeof(long) ) \
+        asm volatile ( "vmread %q2, %1\n\t" \
+                       /* CF==1 or ZF==1 --> rc = 0 */ \
+                       "setnbe %0" \
+                       : "=qm" (okay_), "=rm" (value) \
+                       : "r" (field) \
+                       : "memory"); \
+    else \
+    { \
+        asm volatile ( "vmread %q2, %q1\n\t" \
+                       /* CF==1 or ZF==1 --> rc = 0 */ \
+                       "setnbe %0" \
+                       : "=qm" (okay_), "=r" (value) \
+                       : "r" (field) \
+                       : "memory"); \
+    } \
+    okay_; \
+})
+#else
+#define __vmread_safe(field, value) ({ \
+    bool_t okay_; \
+    asm volatile ( VMREAD_OPCODE MODRM_EAX_ECX \
+                   /* CF==1 or ZF==1 --> rc = 0 */ \
+                   "setnbe %0" \
+                   : "=qm" (okay_), "=c" (value) \
+                   : "a" (field) \
+                   : "memory"); \
+    okay_; \
+})
+#endif
 
 static inline void __invept(int type, u64 eptp, u64 gpa)
 {
@@ -365,14 +409,22 @@ static inline void __invept(int type, u6
          !cpu_has_vmx_ept_invept_single_context )
         type = INVEPT_ALL_CONTEXT;
 
-    asm volatile ( INVEPT_OPCODE
-                   MODRM_EAX_08
+    asm volatile (
+#ifdef HAVE_GAS_EPT
+                   "invept %0, %q1\n"
+#else
+                   INVEPT_OPCODE MODRM_EAX_08
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, invept)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_EPT
+                   : "m" (operand), "r" (type)
+#else
                    : "a" (&operand), "c" (type)
+#endif
                    : "memory" );
 }
 
@@ -385,7 +437,12 @@ static inline void __invvpid(int type, u
     } __attribute__ ((packed)) operand = {vpid, 0, gva};
 
     /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
-    asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
+    asm volatile ( "1: "
+#ifdef HAVE_GAS_EPT
+                   "invvpid %0, %q1\n"
+#else
+                   INVVPID_OPCODE MODRM_EAX_08
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, invvpid)
                    "\tud2\n"
@@ -393,7 +450,11 @@ static inline void __invvpid(int type, u
                    "2:"
                    _ASM_EXTABLE(1b, 2b)
                    :
+#ifdef HAVE_GAS_EPT
+                   : "m" (operand), "r" (type)
+#else
                    : "a" (&operand), "c" (type)
+#endif
                    : "memory" );
 }
 

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-24 22:18   ` Andrew Cooper
@ 2013-08-26  9:06     ` Jan Beulich
  2013-08-26  9:25       ` Andrew Cooper
  0 siblings, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2013-08-26  9:06 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

>>> On 25.08.13 at 00:18, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> I certainly like and encourage of the idea of getting rid of the fixed
> registers due to the hex literals.  However, are macros really the
> correct solution?
> 
> __vmread_safe(field, value) now has non-function like semantics for value.
> 
> To me, the more obvious (and as far as I can tell, functionally
> identical) approach would be for
> 
> unsigned long always_inline __vmread_safe(unsigned long field, unsigned
> long *value)
> 
> The compiler, given the correct constraints for value could resolve the
> pointer back to the caller as it inlined the function, and calls would
> look like "__vmread_safe(field, &value);" which looks rather more natural.

The constraints aren't the problem - the use of sizeof(value) inside
the macro is. You can't pass both pointers to 32-bit and 64-bit data
items into the function, and then have the function guess what the
type is to be.

> As for the memory clobbers, I see no indication from the VMX instruction
> reference that vmread or inv{ept,vpid} have relevant side effects
> requiring a clobber.  For the load functions and vmwrite, there is a
> possibilty of a memory clobber if someone has peaked into the relevant
> VMCS page(s).  However, doing so defeats the purpose of vmread/write, so
> as far as I can tell, we could argue away the clobbers on those grounds
> alone.

Good; I'm still hoping to hear an opinion from Intel engineers too.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 2/4] VMX: move various uses of UD2 out of fast paths
  2013-08-26  8:50     ` Jan Beulich
@ 2013-08-26  9:07       ` Andrew Cooper
  0 siblings, 0 replies; 60+ messages in thread
From: Andrew Cooper @ 2013-08-26  9:07 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

On 26/08/2013 09:50, Jan Beulich wrote:
>>>> On 24.08.13 at 00:06, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> On 23/08/2013 15:02, Jan Beulich wrote:
>>> ... at once making conditional forward jumps, which are statically
>>> predicted to be not taken, only used for the unlikely (error) cases.
>>>
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> I presume it is intentional to create many different symbols for
>> different ud2 instructions right next to each other?  It isn't clear
>> from the commit message, but would certainly be the logical choice for
>> debugging an issue arising from such a bug.
> These are labels that don't make it into the object file's symbol
> table anyway, so their names don't matter (but need to all be
> distinct to avoid duplicate names at assembly time).
>
> The addresses were chosen to be all different (rather than
> having a single global UD2 that all of the failure paths jump
> to) in order to be able to re-associate an eventual crash with
> its origin. As that isn't different from how the code behaved
> before, I don't see a need for mentioning this in the description.
>
>> As each of these ud2 instructions will never result in continued normal
>> execution, is it necessary to have the 'likely' tag and jump back?  I
>> cant see any non-buggy case where they would be executed, at which point
>> they are superfluous.
> Which "likely" tag are you talking about? They all use
> UNLIKELY_END_SECTION rather than UNLIKELY_END, so there's
> no extra code being generated afaict.

That would be me misreading the code.  Sorry for the noise.

~Andrew

>
>>> @@ -377,7 +387,9 @@ static inline void __invvpid(int type, u
>>>      /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
>>>      asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
>>>                     /* CF==1 or ZF==1 --> crash (ud2) */
>>> -                   "ja 2f ; ud2 ; 2:\n"
>>> +                   "2: " UNLIKELY_START(be, invvpid)
>>> +                   "\tud2\n"
>>> +                   UNLIKELY_END_SECTION "\n"
>>>                     _ASM_EXTABLE(1b, 2b)
>> Is this logic still correct?
>>
>> To my reading, the logic used to say "if there was an exception while
>> executing INVVPID, then jump to 2: to fix up." where 2: happens to be
>> the end of the inline assembly, bypassing the ud2.
>>
>> After the change, the jbe (formed from UNLIKELY_START) is part of the
>> fixup section, which possibly includes the ud2.  As the state of the
>> flags while executing the INVVPID as unknown, this leaves an unknown
>> chance of "fixing up" to a ud2 instruction.
> Indeed, I screwed this up. Hence there'll be a v2 of this patch
> shortly. Thanks for spotting!
>
> Jan
>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v2 2/4] VMX: move various uses of UD2 out of fast paths
  2013-08-26  8:58     ` [PATCH v2 " Jan Beulich
@ 2013-08-26  9:09       ` Andrew Cooper
  2013-08-29 11:08       ` Tim Deegan
  1 sibling, 0 replies; 60+ messages in thread
From: Andrew Cooper @ 2013-08-26  9:09 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima


[-- Attachment #1.1: Type: text/plain, Size: 4427 bytes --]

On 26/08/2013 09:58, Jan Beulich wrote:
> ... at once making conditional forward jumps, which are statically
> predicted to be not taken, only used for the unlikely (error) cases.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

> ---
> v2: Fix #UD recovery for INVVPID (unintended flow change spotted by Andrew Cooper).
>
> --- a/xen/include/asm-x86/asm_defns.h
> +++ b/xen/include/asm-x86/asm_defns.h
> @@ -67,6 +67,30 @@ void ret_from_intr(void);
>  #define ASSERT_NOT_IN_ATOMIC
>  #endif
>  
> +#else
> +
> +#ifdef __clang__ /* clang's builtin assember can't do .subsection */
> +
> +#define UNLIKELY_START_SECTION ".pushsection .fixup,\"ax\""
> +#define UNLIKELY_END_SECTION   ".popsection"
> +
> +#else
> +
> +#define UNLIKELY_START_SECTION ".subsection 1"
> +#define UNLIKELY_END_SECTION   ".subsection 0"
> +
> +#endif
> +
> +#define UNLIKELY_START(cond, tag)          \
> +        "j" #cond " .Lunlikely%=.tag;\n\t" \
> +        UNLIKELY_START_SECTION "\n"        \
> +        ".Lunlikely%=.tag:"
> +
> +#define UNLIKELY_END(tag)                  \
> +        "jmp .Llikely%=.tag;\n\t"          \
> +        UNLIKELY_END_SECTION "\n"          \
> +        ".Llikely%=.tag:"
> +
>  #endif
>  
>  #endif /* __X86_ASM_DEFNS_H__ */
> --- a/xen/include/asm-x86/hvm/vmx/vmx.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmx.h
> @@ -285,7 +285,9 @@ static inline void __vmptrld(u64 addr)
>      asm volatile ( VMPTRLD_OPCODE
>                     MODRM_EAX_06
>                     /* CF==1 or ZF==1 --> crash (ud2) */
> -                   "ja 1f ; ud2 ; 1:\n"
> +                   UNLIKELY_START(be, vmptrld)
> +                   "\tud2\n"
> +                   UNLIKELY_END_SECTION
>                     :
>                     : "a" (&addr)
>                     : "memory");
> @@ -296,7 +298,9 @@ static inline void __vmpclear(u64 addr)
>      asm volatile ( VMCLEAR_OPCODE
>                     MODRM_EAX_06
>                     /* CF==1 or ZF==1 --> crash (ud2) */
> -                   "ja 1f ; ud2 ; 1:\n"
> +                   UNLIKELY_START(be, vmclear)
> +                   "\tud2\n"
> +                   UNLIKELY_END_SECTION
>                     :
>                     : "a" (&addr)
>                     : "memory");
> @@ -309,7 +313,9 @@ static inline unsigned long __vmread(uns
>      asm volatile ( VMREAD_OPCODE
>                     MODRM_EAX_ECX
>                     /* CF==1 or ZF==1 --> crash (ud2) */
> -                   "ja 1f ; ud2 ; 1:\n"
> +                   UNLIKELY_START(be, vmread)
> +                   "\tud2\n"
> +                   UNLIKELY_END_SECTION
>                     : "=c" (ecx)
>                     : "a" (field)
>                     : "memory");
> @@ -322,7 +328,9 @@ static inline void __vmwrite(unsigned lo
>      asm volatile ( VMWRITE_OPCODE
>                     MODRM_EAX_ECX
>                     /* CF==1 or ZF==1 --> crash (ud2) */
> -                   "ja 1f ; ud2 ; 1:\n"
> +                   UNLIKELY_START(be, vmwrite)
> +                   "\tud2\n"
> +                   UNLIKELY_END_SECTION
>                     : 
>                     : "a" (field) , "c" (value)
>                     : "memory");
> @@ -360,7 +368,9 @@ static inline void __invept(int type, u6
>      asm volatile ( INVEPT_OPCODE
>                     MODRM_EAX_08
>                     /* CF==1 or ZF==1 --> crash (ud2) */
> -                   "ja 1f ; ud2 ; 1:\n"
> +                   UNLIKELY_START(be, invept)
> +                   "\tud2\n"
> +                   UNLIKELY_END_SECTION
>                     :
>                     : "a" (&operand), "c" (type)
>                     : "memory" );
> @@ -377,7 +387,10 @@ static inline void __invvpid(int type, u
>      /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
>      asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
>                     /* CF==1 or ZF==1 --> crash (ud2) */
> -                   "ja 2f ; ud2 ; 2:\n"
> +                   UNLIKELY_START(be, invvpid)
> +                   "\tud2\n"
> +                   UNLIKELY_END_SECTION "\n"
> +                   "2:"
>                     _ASM_EXTABLE(1b, 2b)
>                     :
>                     : "a" (&operand), "c" (type)
>
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel


[-- Attachment #1.2: Type: text/html, Size: 5167 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-26  9:06     ` Jan Beulich
@ 2013-08-26  9:25       ` Andrew Cooper
  2013-08-26  9:41         ` Jan Beulich
  2013-08-26 10:18         ` [PATCH v3 " Jan Beulich
  0 siblings, 2 replies; 60+ messages in thread
From: Andrew Cooper @ 2013-08-26  9:25 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

On 26/08/2013 10:06, Jan Beulich wrote:
>>>> On 25.08.13 at 00:18, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> I certainly like and encourage of the idea of getting rid of the fixed
>> registers due to the hex literals.  However, are macros really the
>> correct solution?
>>
>> __vmread_safe(field, value) now has non-function like semantics for value.
>>
>> To me, the more obvious (and as far as I can tell, functionally
>> identical) approach would be for
>>
>> unsigned long always_inline __vmread_safe(unsigned long field, unsigned
>> long *value)
>>
>> The compiler, given the correct constraints for value could resolve the
>> pointer back to the caller as it inlined the function, and calls would
>> look like "__vmread_safe(field, &value);" which looks rather more natural.
> The constraints aren't the problem - the use of sizeof(value) inside
> the macro is. You can't pass both pointers to 32-bit and 64-bit data
> items into the function, and then have the function guess what the
> type is to be.

Ok (wrt the explanation), but I am not sure the result is correct.

VMREAD has a single encoding, 0f 78, and the size of the reg/mem operand
depends on long mode or not (where compat mode forces a #UD).

>From SDM Vol 3 30.3 "VMREAD":

"If the VMCS field specified by the source operand is shorter than this
effective operand size, the high bits of the destination operand are
cleared to 0. If the VMCS field is longer, then the high bits of the
field are not read."

So passing a pointer to a 32bit value into this function/macro in long
mode will result in the adjacent 32 bits being clobbered, which should
best be avoided.

~Andrew

>
>> As for the memory clobbers, I see no indication from the VMX instruction
>> reference that vmread or inv{ept,vpid} have relevant side effects
>> requiring a clobber.  For the load functions and vmwrite, there is a
>> possibilty of a memory clobber if someone has peaked into the relevant
>> VMCS page(s).  However, doing so defeats the purpose of vmread/write, so
>> as far as I can tell, we could argue away the clobbers on those grounds
>> alone.
> Good; I'm still hoping to hear an opinion from Intel engineers too.
>
> Jan
>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-26  9:25       ` Andrew Cooper
@ 2013-08-26  9:41         ` Jan Beulich
  2013-08-26 10:18         ` [PATCH v3 " Jan Beulich
  1 sibling, 0 replies; 60+ messages in thread
From: Jan Beulich @ 2013-08-26  9:41 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

>>> On 26.08.13 at 11:25, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> On 26/08/2013 10:06, Jan Beulich wrote:
>> The constraints aren't the problem - the use of sizeof(value) inside
>> the macro is. You can't pass both pointers to 32-bit and 64-bit data
>> items into the function, and then have the function guess what the
>> type is to be.
> 
> Ok (wrt the explanation), but I am not sure the result is correct.
> 
> VMREAD has a single encoding, 0f 78, and the size of the reg/mem operand
> depends on long mode or not (where compat mode forces a #UD).
> 
> From SDM Vol 3 30.3 "VMREAD":
> 
> "If the VMCS field specified by the source operand is shorter than this
> effective operand size, the high bits of the destination operand are
> cleared to 0. If the VMCS field is longer, then the high bits of the
> field are not read."
> 
> So passing a pointer to a 32bit value into this function/macro in long
> mode will result in the adjacent 32 bits being clobbered, which should
> best be avoided.

Well spotted (and hence the __vmread_safe() construct is buggy
altogether). Since I know I had looked at the instruction page to
see whether it has 32- and 64-bit variants, I can only blame this
at their inconsistent documentation: Vol 2, where all the other
instructions live anyway, never uses the description field to outline
the modes in which a certain encoding is valid - they have a
separate column for this purpose. And I (mis-)interpreted the
absence of that column...

Anyway - v3 will need to be cut, and then I'll of course see to
make this a function instead of a macro. Luckily the patch in its
entirety is not incorrect, as the two users of __vmread_safe()
use 64-bit values. But as soon as the concept was extended to
__vmread() the bug would have become obvious (and likely even
at compile time already).

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH v3 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-26  9:25       ` Andrew Cooper
  2013-08-26  9:41         ` Jan Beulich
@ 2013-08-26 10:18         ` Jan Beulich
  2013-08-26 13:05           ` Andrew Cooper
  1 sibling, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2013-08-26 10:18 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser, Eddie Dong, Jun Nakajima

[-- Attachment #1: Type: text/plain, Size: 9178 bytes --]

With the hex byte emission we were taking away a good part of
flexibility from the compiler, as for simplicity reasons these were
built using fixed operands. All half way modern build environments
would allow using the mnemonics (but we can't disable the hex variants
yet, since the binutils around at the time gcc 4.1 got released didn't
support these yet).

I didn't convert __vmread() yet because that would, just like for
__vmread_safe(), imply converting to a macro so that the output operand
can be the caller supplied variable rather than an intermediate one. As
that would require touching all invocation points of __vmread() (of
which there are quite a few), I'd first like to be certain the approach
is acceptable; the main question being whether the now conditional code
might be considered to cause future maintenance issues, and the second
being that of parameter/argument ordering (here I made __vmread_safe()
match __vmwrite(), but one could also take the position that read and
write should use the inverse order of one another, in line with the
actual instruction operands).

Additionally I was quite puzzled to find that all the asm()-s involved
here have memory clobbers - what are they needed for? Or can they be
dropped at least in some cases?

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: vmread/vmwrite only have a single fixed size enconding in each
    execution mode (pointed out by Andrew Cooper); convert
    __vmread_safe() back to a function

--- a/Config.mk
+++ b/Config.mk
@@ -4,6 +4,12 @@ ifeq ($(filter /%,$(XEN_ROOT)),)
 $(error XEN_ROOT must be absolute)
 endif
 
+# Convenient variables
+comma   := ,
+squote  := '
+empty   :=
+space   := $(empty) $(empty)
+
 # fallback for older make
 realpath = $(wildcard $(foreach file,$(1),$(shell cd -P $(dir $(file)) && echo "$$PWD/$(notdir $(file))")))
 
@@ -128,6 +134,21 @@ endef
 check-$(gcc) = $(call cc-ver-check,CC,0x040100,"Xen requires at least gcc-4.1")
 $(eval $(check-y))
 
+# as-insn: Check whether assembler supports an instruction.
+# Usage: cflags-y += $(call as-insn "insn",option-yes,option-no)
+as-insn = $(if $(shell echo 'void _(void) { asm volatile ( $(2) ); }' \
+                       | $(1) -c -x c -o /dev/null - 2>&1),$(4),$(3))
+
+# as-insn-check: Add an option to compilation flags, but only if insn is
+#                supported by assembler.
+# Usage: $(call as-insn-check CFLAGS,CC,"nop",-DHAVE_GAS_NOP)
+as-insn-check = $(eval $(call as-insn-check-closure,$(1),$(2),$(3),$(4)))
+define as-insn-check-closure
+    ifeq ($$(call as-insn,$$($(2)),$(3),y,n),y)
+        $(1) += $(4)
+    endif
+endef
+
 define buildmakevars2shellvars
     export PREFIX="$(PREFIX)";                                            \
     export XEN_SCRIPT_DIR="$(XEN_SCRIPT_DIR)";                            \
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -28,6 +28,8 @@ CFLAGS += -msoft-float
 
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
+$(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
+$(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
 
 ifeq ($(supervisor_mode_kernel),y)
 CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1292,12 +1292,11 @@ void vmx_do_resume(struct vcpu *v)
     reset_stack_and_jump(vmx_asm_do_vmentry);
 }
 
-static unsigned long vmr(unsigned long field)
+static inline unsigned long vmr(unsigned long field)
 {
-    int rc;
     unsigned long val;
-    val = __vmread_safe(field, &rc);
-    return rc ? 0 : val;
+
+    return __vmread_safe(field, &val) ? val : 0;
 }
 
 static void vmx_dump_sel(char *name, uint32_t selector)
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -951,13 +951,11 @@ fallback:
         vvmcs_to_shadow(vvmcs, field[i]);
 }
 
-static void shadow_to_vvmcs(void *vvmcs, unsigned int field)
+static inline void shadow_to_vvmcs(void *vvmcs, unsigned int field)
 {
-    u64 value;
-    int rc;
+    unsigned long value;
 
-    value = __vmread_safe(field, &rc);
-    if ( !rc )
+    if ( __vmread_safe(field, &value) )
         __set_vvmcs(vvmcs, field, value);
 }
 
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -282,27 +282,44 @@ extern uint8_t posted_intr_vector;
 
 static inline void __vmptrld(u64 addr)
 {
-    asm volatile ( VMPTRLD_OPCODE
-                   MODRM_EAX_06
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmptrld %0\n"
+#else
+                   VMPTRLD_OPCODE MODRM_EAX_06
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmptrld)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_VMX
+                   : "m" (addr)
+#else
                    : "a" (&addr)
+#endif
                    : "memory");
 }
 
 static inline void __vmpclear(u64 addr)
 {
-    asm volatile ( VMCLEAR_OPCODE
-                   MODRM_EAX_06
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmclear %0\n"
+#else
+                   VMCLEAR_OPCODE MODRM_EAX_06
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmclear)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
+#ifdef HAVE_GAS_VMX
+                   : "+m" (addr)
+                   :
+#else
                    :
                    : "a" (&addr)
+#endif
                    : "memory");
 }
 
@@ -325,30 +342,47 @@ static inline unsigned long __vmread(uns
 
 static inline void __vmwrite(unsigned long field, unsigned long value)
 {
-    asm volatile ( VMWRITE_OPCODE
-                   MODRM_EAX_ECX
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmwrite %1, %0\n"
+#else
+                   VMWRITE_OPCODE MODRM_EAX_ECX
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmwrite)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    : 
+#ifdef HAVE_GAS_VMX
+                   : "r" (field) , "rm" (value)
+#else
                    : "a" (field) , "c" (value)
+#endif
                    : "memory");
 }
 
-static inline unsigned long __vmread_safe(unsigned long field, int *error)
+static inline bool_t __vmread_safe(unsigned long field, unsigned long *value)
 {
-    unsigned long ecx;
+    bool_t okay;
 
-    asm volatile ( VMREAD_OPCODE
-                   MODRM_EAX_ECX
-                   /* CF==1 or ZF==1 --> rc = -1 */
-                   "setna %b0 ; neg %0"
-                   : "=q" (*error), "=c" (ecx)
-                   : "0" (0), "a" (field)
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmread %q2, %1\n\t"
+#else
+                   VMREAD_OPCODE MODRM_EAX_ECX
+#endif
+                   /* CF==1 or ZF==1 --> rc = 0 */
+                   "setnbe %0"
+#ifdef HAVE_GAS_VMX
+                   : "=qm" (okay), "=rm" (*value)
+                   : "r" (field)
+#else
+                   : "=qm" (okay), "=c" (*value)
+                   : "a" (field)
+#endif
                    : "memory");
 
-    return ecx;
+    return okay;
 }
 
 static inline void __invept(int type, u64 eptp, u64 gpa)
@@ -365,14 +399,22 @@ static inline void __invept(int type, u6
          !cpu_has_vmx_ept_invept_single_context )
         type = INVEPT_ALL_CONTEXT;
 
-    asm volatile ( INVEPT_OPCODE
-                   MODRM_EAX_08
+    asm volatile (
+#ifdef HAVE_GAS_EPT
+                   "invept %0, %q1\n"
+#else
+                   INVEPT_OPCODE MODRM_EAX_08
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, invept)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_EPT
+                   : "m" (operand), "r" (type)
+#else
                    : "a" (&operand), "c" (type)
+#endif
                    : "memory" );
 }
 
@@ -385,7 +427,12 @@ static inline void __invvpid(int type, u
     } __attribute__ ((packed)) operand = {vpid, 0, gva};
 
     /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
-    asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
+    asm volatile ( "1: "
+#ifdef HAVE_GAS_EPT
+                   "invvpid %0, %q1\n"
+#else
+                   INVVPID_OPCODE MODRM_EAX_08
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, invvpid)
                    "\tud2\n"
@@ -393,7 +440,11 @@ static inline void __invvpid(int type, u
                    "2:"
                    _ASM_EXTABLE(1b, 2b)
                    :
+#ifdef HAVE_GAS_EPT
+                   : "m" (operand), "r" (type)
+#else
                    : "a" (&operand), "c" (type)
+#endif
                    : "memory" );
 }
 



[-- Attachment #2: VMX-use-mnemonics.patch --]
[-- Type: text/plain, Size: 9242 bytes --]

VMX: use proper instruction mnemonics if assembler supports them

With the hex byte emission we were taking away a good part of
flexibility from the compiler, as for simplicity reasons these were
built using fixed operands. All half way modern build environments
would allow using the mnemonics (but we can't disable the hex variants
yet, since the binutils around at the time gcc 4.1 got released didn't
support these yet).

I didn't convert __vmread() yet because that would, just like for
__vmread_safe(), imply converting to a macro so that the output operand
can be the caller supplied variable rather than an intermediate one. As
that would require touching all invocation points of __vmread() (of
which there are quite a few), I'd first like to be certain the approach
is acceptable; the main question being whether the now conditional code
might be considered to cause future maintenance issues, and the second
being that of parameter/argument ordering (here I made __vmread_safe()
match __vmwrite(), but one could also take the position that read and
write should use the inverse order of one another, in line with the
actual instruction operands).

Additionally I was quite puzzled to find that all the asm()-s involved
here have memory clobbers - what are they needed for? Or can they be
dropped at least in some cases?

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: vmread/vmwrite only have a single fixed size enconding in each
    execution mode (pointed out by Andrew Cooper); convert
    __vmread_safe() back to a function

--- a/Config.mk
+++ b/Config.mk
@@ -4,6 +4,12 @@ ifeq ($(filter /%,$(XEN_ROOT)),)
 $(error XEN_ROOT must be absolute)
 endif
 
+# Convenient variables
+comma   := ,
+squote  := '
+empty   :=
+space   := $(empty) $(empty)
+
 # fallback for older make
 realpath = $(wildcard $(foreach file,$(1),$(shell cd -P $(dir $(file)) && echo "$$PWD/$(notdir $(file))")))
 
@@ -128,6 +134,21 @@ endef
 check-$(gcc) = $(call cc-ver-check,CC,0x040100,"Xen requires at least gcc-4.1")
 $(eval $(check-y))
 
+# as-insn: Check whether assembler supports an instruction.
+# Usage: cflags-y += $(call as-insn "insn",option-yes,option-no)
+as-insn = $(if $(shell echo 'void _(void) { asm volatile ( $(2) ); }' \
+                       | $(1) -c -x c -o /dev/null - 2>&1),$(4),$(3))
+
+# as-insn-check: Add an option to compilation flags, but only if insn is
+#                supported by assembler.
+# Usage: $(call as-insn-check CFLAGS,CC,"nop",-DHAVE_GAS_NOP)
+as-insn-check = $(eval $(call as-insn-check-closure,$(1),$(2),$(3),$(4)))
+define as-insn-check-closure
+    ifeq ($$(call as-insn,$$($(2)),$(3),y,n),y)
+        $(1) += $(4)
+    endif
+endef
+
 define buildmakevars2shellvars
     export PREFIX="$(PREFIX)";                                            \
     export XEN_SCRIPT_DIR="$(XEN_SCRIPT_DIR)";                            \
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -28,6 +28,8 @@ CFLAGS += -msoft-float
 
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
+$(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
+$(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
 
 ifeq ($(supervisor_mode_kernel),y)
 CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1292,12 +1292,11 @@ void vmx_do_resume(struct vcpu *v)
     reset_stack_and_jump(vmx_asm_do_vmentry);
 }
 
-static unsigned long vmr(unsigned long field)
+static inline unsigned long vmr(unsigned long field)
 {
-    int rc;
     unsigned long val;
-    val = __vmread_safe(field, &rc);
-    return rc ? 0 : val;
+
+    return __vmread_safe(field, &val) ? val : 0;
 }
 
 static void vmx_dump_sel(char *name, uint32_t selector)
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -951,13 +951,11 @@ fallback:
         vvmcs_to_shadow(vvmcs, field[i]);
 }
 
-static void shadow_to_vvmcs(void *vvmcs, unsigned int field)
+static inline void shadow_to_vvmcs(void *vvmcs, unsigned int field)
 {
-    u64 value;
-    int rc;
+    unsigned long value;
 
-    value = __vmread_safe(field, &rc);
-    if ( !rc )
+    if ( __vmread_safe(field, &value) )
         __set_vvmcs(vvmcs, field, value);
 }
 
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -282,27 +282,44 @@ extern uint8_t posted_intr_vector;
 
 static inline void __vmptrld(u64 addr)
 {
-    asm volatile ( VMPTRLD_OPCODE
-                   MODRM_EAX_06
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmptrld %0\n"
+#else
+                   VMPTRLD_OPCODE MODRM_EAX_06
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmptrld)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_VMX
+                   : "m" (addr)
+#else
                    : "a" (&addr)
+#endif
                    : "memory");
 }
 
 static inline void __vmpclear(u64 addr)
 {
-    asm volatile ( VMCLEAR_OPCODE
-                   MODRM_EAX_06
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmclear %0\n"
+#else
+                   VMCLEAR_OPCODE MODRM_EAX_06
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmclear)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
+#ifdef HAVE_GAS_VMX
+                   : "+m" (addr)
+                   :
+#else
                    :
                    : "a" (&addr)
+#endif
                    : "memory");
 }
 
@@ -325,30 +342,47 @@ static inline unsigned long __vmread(uns
 
 static inline void __vmwrite(unsigned long field, unsigned long value)
 {
-    asm volatile ( VMWRITE_OPCODE
-                   MODRM_EAX_ECX
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmwrite %1, %0\n"
+#else
+                   VMWRITE_OPCODE MODRM_EAX_ECX
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmwrite)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    : 
+#ifdef HAVE_GAS_VMX
+                   : "r" (field) , "rm" (value)
+#else
                    : "a" (field) , "c" (value)
+#endif
                    : "memory");
 }
 
-static inline unsigned long __vmread_safe(unsigned long field, int *error)
+static inline bool_t __vmread_safe(unsigned long field, unsigned long *value)
 {
-    unsigned long ecx;
+    bool_t okay;
 
-    asm volatile ( VMREAD_OPCODE
-                   MODRM_EAX_ECX
-                   /* CF==1 or ZF==1 --> rc = -1 */
-                   "setna %b0 ; neg %0"
-                   : "=q" (*error), "=c" (ecx)
-                   : "0" (0), "a" (field)
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmread %q2, %1\n\t"
+#else
+                   VMREAD_OPCODE MODRM_EAX_ECX
+#endif
+                   /* CF==1 or ZF==1 --> rc = 0 */
+                   "setnbe %0"
+#ifdef HAVE_GAS_VMX
+                   : "=qm" (okay), "=rm" (*value)
+                   : "r" (field)
+#else
+                   : "=qm" (okay), "=c" (*value)
+                   : "a" (field)
+#endif
                    : "memory");
 
-    return ecx;
+    return okay;
 }
 
 static inline void __invept(int type, u64 eptp, u64 gpa)
@@ -365,14 +399,22 @@ static inline void __invept(int type, u6
          !cpu_has_vmx_ept_invept_single_context )
         type = INVEPT_ALL_CONTEXT;
 
-    asm volatile ( INVEPT_OPCODE
-                   MODRM_EAX_08
+    asm volatile (
+#ifdef HAVE_GAS_EPT
+                   "invept %0, %q1\n"
+#else
+                   INVEPT_OPCODE MODRM_EAX_08
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, invept)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_EPT
+                   : "m" (operand), "r" (type)
+#else
                    : "a" (&operand), "c" (type)
+#endif
                    : "memory" );
 }
 
@@ -385,7 +427,12 @@ static inline void __invvpid(int type, u
     } __attribute__ ((packed)) operand = {vpid, 0, gva};
 
     /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
-    asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
+    asm volatile ( "1: "
+#ifdef HAVE_GAS_EPT
+                   "invvpid %0, %q1\n"
+#else
+                   INVVPID_OPCODE MODRM_EAX_08
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, invvpid)
                    "\tud2\n"
@@ -393,7 +440,11 @@ static inline void __invvpid(int type, u
                    "2:"
                    _ASM_EXTABLE(1b, 2b)
                    :
+#ifdef HAVE_GAS_EPT
+                   : "m" (operand), "r" (type)
+#else
                    : "a" (&operand), "c" (type)
+#endif
                    : "memory" );
 }
 

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 1/4] VMX: streamline entry.S code
  2013-08-23 14:01 ` [PATCH 1/4] VMX: streamline entry.S code Jan Beulich
@ 2013-08-26 10:44   ` Andrew Cooper
  2013-08-26 11:01     ` Jan Beulich
  2013-08-29 11:01   ` Tim Deegan
  1 sibling, 1 reply; 60+ messages in thread
From: Andrew Cooper @ 2013-08-26 10:44 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima


[-- Attachment #1.1: Type: text/plain, Size: 7952 bytes --]

On 23/08/2013 15:01, Jan Beulich wrote:
> - move stuff easily/better done in C into C code
> - re-arrange code paths so that no redundant GET_CURRENT() would remain
>   on the fast paths
> - move long latency operations earlier
> - slightly defer disabling interrupts on the VM entry path
> - use ENTRY() instead of open coding it
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

This is quite tough reviewing as all the changes are mixed together, but
I think I have got it all now.

WRT moving things to C, and use of ENTRY(), fine.

>
> --- a/xen/arch/x86/hvm/vmx/entry.S
> +++ b/xen/arch/x86/hvm/vmx/entry.S
> @@ -28,109 +28,66 @@
>  
>  #define VMRESUME     .byte 0x0f,0x01,0xc3
>  #define VMLAUNCH     .byte 0x0f,0x01,0xc2
> -#define VMREAD(off)  .byte 0x0f,0x78,0x47,((off)-UREGS_rip)
> -#define VMWRITE(off) .byte 0x0f,0x79,0x47,((off)-UREGS_rip)
>  
> -/* VMCS field encodings */
> -#define GUEST_RSP    0x681c
> -#define GUEST_RIP    0x681e
> -#define GUEST_RFLAGS 0x6820
> -
> -        ALIGN
> -.globl vmx_asm_vmexit_handler
> -vmx_asm_vmexit_handler:
> +ENTRY(vmx_asm_vmexit_handler)
>          push %rdi
>          push %rsi
>          push %rdx
>          push %rcx
>          push %rax
> +        mov  %cr2,%rax

I presume this is a long latency instruction.  Do you have a source of
numbers for this? (more for interest, as I can easily accept that it
would be a longer operation than the surrounding ones)

>          push %r8
>          push %r9
>          push %r10
>          push %r11
>          push %rbx
> +        GET_CURRENT(%rbx)

This seems a little less obvious.  I presume you are just breaking true
read-after-write data hazard on %rbx ?

>          push %rbp
>          push %r12
>          push %r13
>          push %r14
>          push %r15
>  
> -        GET_CURRENT(%rbx)
> -
>          movb $1,VCPU_vmx_launched(%rbx)
> -
> -        lea  UREGS_rip(%rsp),%rdi
> -        mov  $GUEST_RIP,%eax
> -        /*VMREAD(UREGS_rip)*/
> -        .byte 0x0f,0x78,0x07  /* vmread %rax,(%rdi) */
> -        mov  $GUEST_RSP,%eax
> -        VMREAD(UREGS_rsp)
> -        mov  $GUEST_RFLAGS,%eax
> -        VMREAD(UREGS_eflags)
> -
> -        mov  %cr2,%rax
>          mov  %rax,VCPU_hvm_guest_cr2(%rbx)
>  
> -#ifndef NDEBUG
> -        mov  $0xbeef,%ax
> -        mov  %ax,UREGS_error_code(%rsp)
> -        mov  %ax,UREGS_entry_vector(%rsp)
> -        mov  %ax,UREGS_saved_upcall_mask(%rsp)
> -        mov  %ax,UREGS_cs(%rsp)
> -        mov  %ax,UREGS_ds(%rsp)
> -        mov  %ax,UREGS_es(%rsp)
> -        mov  %ax,UREGS_fs(%rsp)
> -        mov  %ax,UREGS_gs(%rsp)
> -        mov  %ax,UREGS_ss(%rsp)
> -#endif
> -
>          mov  %rsp,%rdi
>          call vmx_vmexit_handler
>  
> -.globl vmx_asm_do_vmentry
> -vmx_asm_do_vmentry:

If you move the ENTRY(vmx_asm_do_vmentry) up from below, you should be
able to completely drop the jmp in it.  However...

> +.Lvmx_do_vmentry:
>          call vmx_intr_assist
>          call nvmx_switch_guest
>          ASSERT_NOT_IN_ATOMIC
>  
> -        GET_CURRENT(%rbx)
> -        cli

The movement of this cli indicates a possible issue.

If we have softirqs pending, we jump to .Lvmx_process_softirqs, which
calls do_softirq, and then jumps back to the top of .Lvmx_do_vmentry,
which reruns the top of do_vmentry with interrupts now enabled.

First of all, I cant see anything in vmx_intr_assist or
nvmx_switch_guest which should require calling multiple times on a
vmentry.  They are also expecting to be called with interrupts disabled
(although I cant spot anything obvious in the callpath which would be
affected).

Perhaps the jumps vmx_goto_emulator and vmx_process_softirqs should turn
into calls, to prevent repeatedly reruning the top of vmx_do_vmentry? 
Then, ENTRY(vmx_do_vmentry) and .Lvmx_do_vmentry could be collapsed
together.

> -
>          mov  VCPU_processor(%rbx),%eax
> -        shl  $IRQSTAT_shift,%eax
>          lea  irq_stat+IRQSTAT_softirq_pending(%rip),%rdx
> -        cmpl $0,(%rdx,%rax,1)
> +        xor  %ecx,%ecx
> +        shl  $IRQSTAT_shift,%eax
> +        cli
> +        cmp  %ecx,(%rdx,%rax,1)
>          jnz  .Lvmx_process_softirqs
>  
> -        testb $0xff,VCPU_vmx_emulate(%rbx)
> -        jnz .Lvmx_goto_emulator
> -        testb $0xff,VCPU_vmx_realmode(%rbx)
> -UNLIKELY_START(nz, realmode)
> -        cmpw $0,VCPU_vm86_seg_mask(%rbx)
> +        cmp  %cl,VCPU_vmx_emulate(%rbx)
> +        jne .Lvmx_goto_emulator
> +        cmp  %cl,VCPU_vmx_realmode(%rbx)
> +UNLIKELY_START(ne, realmode)
> +        cmp  %cx,VCPU_vm86_seg_mask(%rbx)
>          jnz .Lvmx_goto_emulator
>          mov  %rsp,%rdi
>          call vmx_enter_realmode
>  UNLIKELY_END(realmode)
>  
> +        mov  %rsp,%rdi
>          call vmx_vmenter_helper
>          mov  VCPU_hvm_guest_cr2(%rbx),%rax
> -        mov  %rax,%cr2
> -
> -        lea  UREGS_rip(%rsp),%rdi
> -        mov  $GUEST_RIP,%eax
> -        /*VMWRITE(UREGS_rip)*/
> -        .byte 0x0f,0x79,0x07  /* vmwrite (%rdi),%rax */
> -        mov  $GUEST_RSP,%eax
> -        VMWRITE(UREGS_rsp)
> -        mov  $GUEST_RFLAGS,%eax
> -        VMWRITE(UREGS_eflags)
>  
> -        cmpb $0,VCPU_vmx_launched(%rbx)
>          pop  %r15
>          pop  %r14
>          pop  %r13
>          pop  %r12
>          pop  %rbp
> +        mov  %rax,%cr2
> +        cmpb $0,VCPU_vmx_launched(%rbx)

Again, I presume the move of "mov %rax,%cr2" is about the %rax data hazard?

~Andrew

>          pop  %rbx
>          pop  %r11
>          pop  %r10
> @@ -155,13 +112,17 @@ UNLIKELY_END(realmode)
>          call vm_launch_fail
>          ud2
>  
> +ENTRY(vmx_asm_do_vmentry)
> +        GET_CURRENT(%rbx)
> +        jmp  .Lvmx_do_vmentry
> +
>  .Lvmx_goto_emulator:
>          sti
>          mov  %rsp,%rdi
>          call vmx_realmode
> -        jmp  vmx_asm_do_vmentry
> +        jmp  .Lvmx_do_vmentry
>  
>  .Lvmx_process_softirqs:
>          sti
>          call do_softirq
> -        jmp  vmx_asm_do_vmentry
> +        jmp  .Lvmx_do_vmentry
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -2375,6 +2375,12 @@ void vmx_vmexit_handler(struct cpu_user_
>      unsigned long exit_qualification, inst_len = 0;
>      struct vcpu *v = current;
>  
> +    regs->rip    = __vmread(GUEST_RIP);
> +    regs->rsp    = __vmread(GUEST_RSP);
> +    regs->rflags = __vmread(GUEST_RFLAGS);
> +
> +    hvm_invalidate_regs_fields(regs);
> +
>      if ( paging_mode_hap(v->domain) && hvm_paging_enabled(v) )
>          v->arch.hvm_vcpu.guest_cr[3] = v->arch.hvm_vcpu.hw_cr[3] =
>              __vmread(GUEST_CR3);
> @@ -2868,7 +2874,7 @@ out:
>          nvmx_idtv_handling();
>  }
>  
> -void vmx_vmenter_helper(void)
> +void vmx_vmenter_helper(const struct cpu_user_regs *regs)
>  {
>      struct vcpu *curr = current;
>      u32 new_asid, old_asid;
> @@ -2910,6 +2916,10 @@ void vmx_vmenter_helper(void)
>  
>   out:
>      HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
> +
> +    __vmwrite(GUEST_RIP,    regs->rip);
> +    __vmwrite(GUEST_RSP,    regs->rsp);
> +    __vmwrite(GUEST_RFLAGS, regs->rflags);
>  }
>  
>  /*
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -439,6 +439,21 @@ static inline void hvm_set_info_guest(st
>  
>  int hvm_debug_op(struct vcpu *v, int32_t op);
>  
> +static inline void hvm_invalidate_regs_fields(struct cpu_user_regs *regs)
> +{
> +#ifndef NDEBUG
> +    regs->error_code = 0xbeef;
> +    regs->entry_vector = 0xbeef;
> +    regs->saved_upcall_mask = 0xbf;
> +    regs->cs = 0xbeef;
> +    regs->ss = 0xbeef;
> +    regs->ds = 0xbeef;
> +    regs->es = 0xbeef;
> +    regs->fs = 0xbeef;
> +    regs->gs = 0xbeef;
> +#endif
> +}
> +
>  int hvm_hap_nested_page_fault(paddr_t gpa,
>                                bool_t gla_valid, unsigned long gla,
>                                bool_t access_r,
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel


[-- Attachment #1.2: Type: text/html, Size: 9393 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 1/4] VMX: streamline entry.S code
  2013-08-26 10:44   ` Andrew Cooper
@ 2013-08-26 11:01     ` Jan Beulich
  2013-08-26 11:48       ` Andrew Cooper
  0 siblings, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2013-08-26 11:01 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

>>> On 26.08.13 at 12:44, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> +ENTRY(vmx_asm_vmexit_handler)
>>          push %rdi
>>          push %rsi
>>          push %rdx
>>          push %rcx
>>          push %rax
>> +        mov  %cr2,%rax
> 
> I presume this is a long latency instruction.  Do you have a source of
> numbers for this? (more for interest, as I can easily accept that it
> would be a longer operation than the surrounding ones)

I don't think I ever saw any precise numbers for control register
accesses for anything newer than 386es or maybe 486es. But
control register accesses are slow /not the least because
serializing).

>>          push %r8
>>          push %r9
>>          push %r10
>>          push %r11
>>          push %rbx
>> +        GET_CURRENT(%rbx)
> 
> This seems a little less obvious.  I presume you are just breaking true
> read-after-write data hazard on %rbx ?

No, this is to hide the latency between loading %rbx and use of
it in the address of a memory access.

>> -.globl vmx_asm_do_vmentry
>> -vmx_asm_do_vmentry:
> 
> If you move the ENTRY(vmx_asm_do_vmentry) up from below, you should be
> able to completely drop the jmp in it.

That would be possible, at the expense of added padding. I prefer
it the way it is now, as vmx_asm_do_vmentry is not performance
critical (as being used exactly once per HVM vCPU).

>  However...
> 
>> +.Lvmx_do_vmentry:
>>          call vmx_intr_assist
>>          call nvmx_switch_guest
>>          ASSERT_NOT_IN_ATOMIC
>>  
>> -        GET_CURRENT(%rbx)
>> -        cli
> 
> The movement of this cli indicates a possible issue.
> 
> If we have softirqs pending, we jump to .Lvmx_process_softirqs, which
> calls do_softirq, and then jumps back to the top of .Lvmx_do_vmentry,
> which reruns the top of do_vmentry with interrupts now enabled.

That was this way already before. The "cli" got moved only past
some address calculation (which clearly doesn't need to be done
with interrupts disabled).

> First of all, I cant see anything in vmx_intr_assist or
> nvmx_switch_guest which should require calling multiple times on a
> vmentry.  They are also expecting to be called with interrupts disabled
> (although I cant spot anything obvious in the callpath which would be
> affected).

And both of these functions had been called before disabling
interrupts.

>> -        cmpb $0,VCPU_vmx_launched(%rbx)
>>          pop  %r15
>>          pop  %r14
>>          pop  %r13
>>          pop  %r12
>>          pop  %rbp
>> +        mov  %rax,%cr2
>> +        cmpb $0,VCPU_vmx_launched(%rbx)
> 
> Again, I presume the move of "mov %rax,%cr2" is about the %rax data hazard?

The %cr2 write's move is indeed debatable - I tried to get it farther
away from the producer of the data in %rax, but it's not clear
whether that's very useful. The second purpose was to get
something interleaved with the many "pop"s, so that the CPU can
get busy other than just its memory load ports. If controversial
I'm fine with undoing that change.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 1/4] VMX: streamline entry.S code
  2013-08-26 11:01     ` Jan Beulich
@ 2013-08-26 11:48       ` Andrew Cooper
  2013-08-26 13:12         ` Jan Beulich
  0 siblings, 1 reply; 60+ messages in thread
From: Andrew Cooper @ 2013-08-26 11:48 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

On 26/08/2013 12:01, Jan Beulich wrote:
>>>          push %r8
>>>          push %r9
>>>          push %r10
>>>          push %r11
>>>          push %rbx
>>> +        GET_CURRENT(%rbx)
>> This seems a little less obvious.  I presume you are just breaking true
>> read-after-write data hazard on %rbx ?
> No, this is to hide the latency between loading %rbx and use of
> it in the address of a memory access.

Right - so we are talking about the same thing.

>
>>> -.globl vmx_asm_do_vmentry
>>> -vmx_asm_do_vmentry:
>> If you move the ENTRY(vmx_asm_do_vmentry) up from below, you should be
>> able to completely drop the jmp in it.
> That would be possible, at the expense of added padding. I prefer
> it the way it is now, as vmx_asm_do_vmentry is not performance
> critical (as being used exactly once per HVM vCPU).

There are a number of places where we have ENTRY()-like constructs but
don't want the padding with it.

Would an __ENTRY() macro go down well?  I can spin a patch for it.

>>  However...
>>
>>> +.Lvmx_do_vmentry:
>>>          call vmx_intr_assist
>>>          call nvmx_switch_guest
>>>          ASSERT_NOT_IN_ATOMIC
>>>  
>>> -        GET_CURRENT(%rbx)
>>> -        cli
>> The movement of this cli indicates a possible issue.
>>
>> If we have softirqs pending, we jump to .Lvmx_process_softirqs, which
>> calls do_softirq, and then jumps back to the top of .Lvmx_do_vmentry,
>> which reruns the top of do_vmentry with interrupts now enabled.
> That was this way already before. The "cli" got moved only past
> some address calculation (which clearly doesn't need to be done
> with interrupts disabled).

Sorry - I wasn't clear.  It was simply the cli moving place that caused
me to notice, rather than the behaviour actually changing.

>
>> First of all, I cant see anything in vmx_intr_assist or
>> nvmx_switch_guest which should require calling multiple times on a
>> vmentry.  They are also expecting to be called with interrupts disabled
>> (although I cant spot anything obvious in the callpath which would be
>> affected).
> And both of these functions had been called before disabling
> interrupts.

I need more coffee - I had mentally swapped cli and sti.

My point about re-executing it does still apply.  Looking at the code, I
do not believe it is correct to be executing vmx_intr_assist or
nvmx_switch_guest multiple times on a context switch to an HVM VCPU. 
vmx_intr_assist at the very least has a huge amount of work to do before
it considers exiting.

It does appear that there is possible interaction between do_softirq()
and vmx_intr_assist(), at which point vmx_intr_assist() should be run
after do_softirq(), which removes the apparently redundant run with
interrupts enabled.

>
>>> -        cmpb $0,VCPU_vmx_launched(%rbx)
>>>          pop  %r15
>>>          pop  %r14
>>>          pop  %r13
>>>          pop  %r12
>>>          pop  %rbp
>>> +        mov  %rax,%cr2
>>> +        cmpb $0,VCPU_vmx_launched(%rbx)
>> Again, I presume the move of "mov %rax,%cr2" is about the %rax data hazard?
> The %cr2 write's move is indeed debatable - I tried to get it farther
> away from the producer of the data in %rax, but it's not clear
> whether that's very useful. The second purpose was to get
> something interleaved with the many "pop"s, so that the CPU can
> get busy other than just its memory load ports. If controversial
> I'm fine with undoing that change.
>
> Jan

>From my understanding of a serialising instruction, it forces the
completion of all previous instructions before starting, and prevents
the issue of any subsequent instructions until it itself has completed.

Therefore, I doubt it has the intended effect.

~Andrew

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v3 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-26 10:18         ` [PATCH v3 " Jan Beulich
@ 2013-08-26 13:05           ` Andrew Cooper
  2013-08-26 13:20             ` Jan Beulich
  2013-08-26 14:03             ` [PATCH v4 " Jan Beulich
  0 siblings, 2 replies; 60+ messages in thread
From: Andrew Cooper @ 2013-08-26 13:05 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

On 26/08/2013 11:18, Jan Beulich wrote:
> --- a/xen/include/asm-x86/hvm/vmx/vmx.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmx.h
> @@ -282,27 +282,44 @@ extern uint8_t posted_intr_vector;
>  
>  static inline void __vmptrld(u64 addr)
>  {
> -    asm volatile ( VMPTRLD_OPCODE
> -                   MODRM_EAX_06
> +    asm volatile (
> +#ifdef HAVE_GAS_VMX
> +                   "vmptrld %0\n"
> +#else
> +                   VMPTRLD_OPCODE MODRM_EAX_06
> +#endif
>                     /* CF==1 or ZF==1 --> crash (ud2) */
>                     UNLIKELY_START(be, vmptrld)
>                     "\tud2\n"
>                     UNLIKELY_END_SECTION
>                     :
> +#ifdef HAVE_GAS_VMX
> +                   : "m" (addr)
> +#else
>                     : "a" (&addr)
> +#endif
>                     : "memory");
>  }
>  
>  static inline void __vmpclear(u64 addr)
>  {
> -    asm volatile ( VMCLEAR_OPCODE
> -                   MODRM_EAX_06
> +    asm volatile (
> +#ifdef HAVE_GAS_VMX
> +                   "vmclear %0\n"
> +#else
> +                   VMCLEAR_OPCODE MODRM_EAX_06
> +#endif
>                     /* CF==1 or ZF==1 --> crash (ud2) */
>                     UNLIKELY_START(be, vmclear)
>                     "\tud2\n"
>                     UNLIKELY_END_SECTION
> +#ifdef HAVE_GAS_VMX
> +                   : "+m" (addr)

Why is this "+m" ?  (and specifically different to some of its counterparts)

> +                   :
> +#else
>                     :
>                     : "a" (&addr)
> +#endif
>                     : "memory");
>  }
>  
> @@ -325,30 +342,47 @@ static inline unsigned long __vmread(uns
>  
>  static inline void __vmwrite(unsigned long field, unsigned long value)
>  {
> -    asm volatile ( VMWRITE_OPCODE
> -                   MODRM_EAX_ECX
> +    asm volatile (
> +#ifdef HAVE_GAS_VMX
> +                   "vmwrite %1, %0\n"
> +#else
> +                   VMWRITE_OPCODE MODRM_EAX_ECX
> +#endif
>                     /* CF==1 or ZF==1 --> crash (ud2) */
>                     UNLIKELY_START(be, vmwrite)
>                     "\tud2\n"
>                     UNLIKELY_END_SECTION
>                     : 
> +#ifdef HAVE_GAS_VMX
> +                   : "r" (field) , "rm" (value)
> +#else
>                     : "a" (field) , "c" (value)
> +#endif
>                     : "memory");
>  }
>  
> -static inline unsigned long __vmread_safe(unsigned long field, int *error)
> +static inline bool_t __vmread_safe(unsigned long field, unsigned long *value)
>  {
> -    unsigned long ecx;
> +    bool_t okay;
>  
> -    asm volatile ( VMREAD_OPCODE
> -                   MODRM_EAX_ECX
> -                   /* CF==1 or ZF==1 --> rc = -1 */
> -                   "setna %b0 ; neg %0"
> -                   : "=q" (*error), "=c" (ecx)
> -                   : "0" (0), "a" (field)
> +    asm volatile (
> +#ifdef HAVE_GAS_VMX
> +                   "vmread %q2, %1\n\t"
> +#else
> +                   VMREAD_OPCODE MODRM_EAX_ECX
> +#endif
> +                   /* CF==1 or ZF==1 --> rc = 0 */
> +                   "setnbe %0"
> +#ifdef HAVE_GAS_VMX
> +                   : "=qm" (okay), "=rm" (*value)
> +                   : "r" (field)
> +#else
> +                   : "=qm" (okay), "=c" (*value)
> +                   : "a" (field)
> +#endif

Why the %q2 qualifer with "r" (field).  Would it not be cleaner as %2
and "q" (field) ?  (And similar lower down)

(My understanding of asm constraints is not fantastic, so I apologise if
there is a good and obvious reason for this)

Otherwise, looking much cleaner. :)

~Andrew

>                     : "memory");
>  
> -    return ecx;
> +    return okay;
>  }
>  
>  static inline void __invept(int type, u64 eptp, u64 gpa)
> @@ -365,14 +399,22 @@ static inline void __invept(int type, u6
>           !cpu_has_vmx_ept_invept_single_context )
>          type = INVEPT_ALL_CONTEXT;
>  
> -    asm volatile ( INVEPT_OPCODE
> -                   MODRM_EAX_08
> +    asm volatile (
> +#ifdef HAVE_GAS_EPT
> +                   "invept %0, %q1\n"
> +#else
> +                   INVEPT_OPCODE MODRM_EAX_08
> +#endif
>                     /* CF==1 or ZF==1 --> crash (ud2) */
>                     UNLIKELY_START(be, invept)
>                     "\tud2\n"
>                     UNLIKELY_END_SECTION
>                     :
> +#ifdef HAVE_GAS_EPT
> +                   : "m" (operand), "r" (type)
> +#else
>                     : "a" (&operand), "c" (type)
> +#endif
>                     : "memory" );
>  }
>  
> @@ -385,7 +427,12 @@ static inline void __invvpid(int type, u
>      } __attribute__ ((packed)) operand = {vpid, 0, gva};
>  
>      /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
> -    asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
> +    asm volatile ( "1: "
> +#ifdef HAVE_GAS_EPT
> +                   "invvpid %0, %q1\n"
> +#else
> +                   INVVPID_OPCODE MODRM_EAX_08
> +#endif
>                     /* CF==1 or ZF==1 --> crash (ud2) */
>                     UNLIKELY_START(be, invvpid)
>                     "\tud2\n"
> @@ -393,7 +440,11 @@ static inline void __invvpid(int type, u
>                     "2:"
>                     _ASM_EXTABLE(1b, 2b)
>                     :
> +#ifdef HAVE_GAS_EPT
> +                   : "m" (operand), "r" (type)
> +#else
>                     : "a" (&operand), "c" (type)
> +#endif
>                     : "memory" );
>  }
>  

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 1/4] VMX: streamline entry.S code
  2013-08-26 11:48       ` Andrew Cooper
@ 2013-08-26 13:12         ` Jan Beulich
  2013-08-26 13:22           ` Andrew Cooper
  0 siblings, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2013-08-26 13:12 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

>>> On 26.08.13 at 13:48, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> On 26/08/2013 12:01, Jan Beulich wrote:
>>>> -.globl vmx_asm_do_vmentry
>>>> -vmx_asm_do_vmentry:
>>> If you move the ENTRY(vmx_asm_do_vmentry) up from below, you should be
>>> able to completely drop the jmp in it.
>> That would be possible, at the expense of added padding. I prefer
>> it the way it is now, as vmx_asm_do_vmentry is not performance
>> critical (as being used exactly once per HVM vCPU).
> 
> There are a number of places where we have ENTRY()-like constructs but
> don't want the padding with it.
> 
> Would an __ENTRY() macro go down well?  I can spin a patch for it.

x86 Linux has GLOBAL() for that purpose - I'd like this better than
__ENTRY() both from a name space perspective and from
describing its purpose.

> My point about re-executing it does still apply.  Looking at the code, I
> do not believe it is correct to be executing vmx_intr_assist or
> nvmx_switch_guest multiple times on a context switch to an HVM VCPU. 
> vmx_intr_assist at the very least has a huge amount of work to do before
> it considers exiting.
> 
> It does appear that there is possible interaction between do_softirq()
> and vmx_intr_assist(), at which point vmx_intr_assist() should be run
> after do_softirq(), which removes the apparently redundant run with
> interrupts enabled.

None of this seems related to the patch anymore - if you think
there's more stuff that needs changing, let's discuss this in a
separate thread.

>> The %cr2 write's move is indeed debatable - I tried to get it farther
>> away from the producer of the data in %rax, but it's not clear
>> whether that's very useful. The second purpose was to get
>> something interleaved with the many "pop"s, so that the CPU can
>> get busy other than just its memory load ports. If controversial
>> I'm fine with undoing that change.
> 
> From my understanding of a serialising instruction, it forces the
> completion of all previous instructions before starting, and prevents
> the issue of any subsequent instructions until it itself has completed.
> 
> Therefore, I doubt it has the intended effect.

Wait - this is again also a separation from the producer of the
data. Whether modern CPUs can deal with that I'm not sure,
but it surely doesn't hurt to hide eventual latency.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v3 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-26 13:05           ` Andrew Cooper
@ 2013-08-26 13:20             ` Jan Beulich
  2013-08-26 14:03             ` [PATCH v4 " Jan Beulich
  1 sibling, 0 replies; 60+ messages in thread
From: Jan Beulich @ 2013-08-26 13:20 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

>>> On 26.08.13 at 15:05, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> On 26/08/2013 11:18, Jan Beulich wrote:
>>  static inline void __vmpclear(u64 addr)
>>  {
>> -    asm volatile ( VMCLEAR_OPCODE
>> -                   MODRM_EAX_06
>> +    asm volatile (
>> +#ifdef HAVE_GAS_VMX
>> +                   "vmclear %0\n"
>> +#else
>> +                   VMCLEAR_OPCODE MODRM_EAX_06
>> +#endif
>>                     /* CF==1 or ZF==1 --> crash (ud2) */
>>                     UNLIKELY_START(be, vmclear)
>>                     "\tud2\n"
>>                     UNLIKELY_END_SECTION
>> +#ifdef HAVE_GAS_VMX
>> +                   : "+m" (addr)
> 
> Why is this "+m" ?  (and specifically different to some of its counterparts)

Apparently I misread "The operand of this instruction is always 64 bits
and is always in memory. If the operand is the current-VMCS
pointer, then that pointer is made invalid (set to
FFFFFFFF_FFFFFFFFH)" to mean that the operand gets overwritten
rather than the CPU-internal pointer.

>> +    asm volatile (
>> +#ifdef HAVE_GAS_VMX
>> +                   "vmread %q2, %1\n\t"
>> +#else
>> +                   VMREAD_OPCODE MODRM_EAX_ECX
>> +#endif
>> +                   /* CF==1 or ZF==1 --> rc = 0 */
>> +                   "setnbe %0"
>> +#ifdef HAVE_GAS_VMX
>> +                   : "=qm" (okay), "=rm" (*value)
>> +                   : "r" (field)
>> +#else
>> +                   : "=qm" (okay), "=c" (*value)
>> +                   : "a" (field)
>> +#endif
> 
> Why the %q2 qualifer with "r" (field).  Would it not be cleaner as %2
> and "q" (field) ?  (And similar lower down)

That q was just a leftover from the macro variant. It's not wrong,
but confusing. I'll remove it.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 1/4] VMX: streamline entry.S code
  2013-08-26 13:12         ` Jan Beulich
@ 2013-08-26 13:22           ` Andrew Cooper
  0 siblings, 0 replies; 60+ messages in thread
From: Andrew Cooper @ 2013-08-26 13:22 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

On 26/08/2013 14:12, Jan Beulich wrote:
>>>> On 26.08.13 at 13:48, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> On 26/08/2013 12:01, Jan Beulich wrote:
>>>>> -.globl vmx_asm_do_vmentry
>>>>> -vmx_asm_do_vmentry:
>>>> If you move the ENTRY(vmx_asm_do_vmentry) up from below, you should be
>>>> able to completely drop the jmp in it.
>>> That would be possible, at the expense of added padding. I prefer
>>> it the way it is now, as vmx_asm_do_vmentry is not performance
>>> critical (as being used exactly once per HVM vCPU).
>> There are a number of places where we have ENTRY()-like constructs but
>> don't want the padding with it.
>>
>> Would an __ENTRY() macro go down well?  I can spin a patch for it.
> x86 Linux has GLOBAL() for that purpose - I'd like this better than
> __ENTRY() both from a name space perspective and from
> describing its purpose.

Ok - I will spin a patch.

>
>> My point about re-executing it does still apply.  Looking at the code, I
>> do not believe it is correct to be executing vmx_intr_assist or
>> nvmx_switch_guest multiple times on a context switch to an HVM VCPU. 
>> vmx_intr_assist at the very least has a huge amount of work to do before
>> it considers exiting.
>>
>> It does appear that there is possible interaction between do_softirq()
>> and vmx_intr_assist(), at which point vmx_intr_assist() should be run
>> after do_softirq(), which removes the apparently redundant run with
>> interrupts enabled.
> None of this seems related to the patch anymore - if you think
> there's more stuff that needs changing, let's discuss this in a
> separate thread.

Certainly.

>
>>> The %cr2 write's move is indeed debatable - I tried to get it farther
>>> away from the producer of the data in %rax, but it's not clear
>>> whether that's very useful. The second purpose was to get
>>> something interleaved with the many "pop"s, so that the CPU can
>>> get busy other than just its memory load ports. If controversial
>>> I'm fine with undoing that change.
>> From my understanding of a serialising instruction, it forces the
>> completion of all previous instructions before starting, and prevents
>> the issue of any subsequent instructions until it itself has completed.
>>
>> Therefore, I doubt it has the intended effect.
> Wait - this is again also a separation from the producer of the
> data. Whether modern CPUs can deal with that I'm not sure,
> but it surely doesn't hurt to hide eventual latency.
>
> Jan
>

For non-serialising instructions, it is a good idea (and likely some a
compiler would anyway).  Moving the GET_CURRENT() will probably be quite
effective as most subsequent instructions depend on it.

Serialising instructions on the other hand will not be affected by these
issues (given their nature), but I would prefer to defer judgement to
someone who has a better idea of the microarchitectural implications.

Either way, as the concerns are now just down to playing with the
optimal static instruction scheduling,

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH v4 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-26 13:05           ` Andrew Cooper
  2013-08-26 13:20             ` Jan Beulich
@ 2013-08-26 14:03             ` Jan Beulich
  2013-08-26 14:18               ` Andrew Cooper
  1 sibling, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2013-08-26 14:03 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser, Eddie Dong, Jun Nakajima

[-- Attachment #1: Type: text/plain, Size: 9150 bytes --]

With the hex byte emission we were taking away a good part of
flexibility from the compiler, as for simplicity reasons these were
built using fixed operands. All half way modern build environments
would allow using the mnemonics (but we can't disable the hex variants
yet, since the binutils around at the time gcc 4.1 got released didn't
support these yet).

I didn't convert __vmread() yet because that would, just like for
__vmread_safe(), imply converting to a macro so that the output operand
can be the caller supplied variable rather than an intermediate one. As
that would require touching all invocation points of __vmread() (of
which there are quite a few), I'd first like to be certain the approach
is acceptable; the main question being whether the now conditional code
might be considered to cause future maintenance issues, and the second
being that of parameter/argument ordering (here I made __vmread_safe()
match __vmwrite(), but one could also take the position that read and
write should use the inverse order of one another, in line with the
actual instruction operands).

Additionally I was quite puzzled to find that all the asm()-s involved
here have memory clobbers - what are they needed for? Or can they be
dropped at least in some cases?

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: drop a bogus "q" operand qualifier from the __vmread_safe() inline
    assembly; operand of vmclear is only an input (both pointed out by
    Andrew Cooper).

--- a/Config.mk
+++ b/Config.mk
@@ -4,6 +4,12 @@ ifeq ($(filter /%,$(XEN_ROOT)),)
 $(error XEN_ROOT must be absolute)
 endif
 
+# Convenient variables
+comma   := ,
+squote  := '
+empty   :=
+space   := $(empty) $(empty)
+
 # fallback for older make
 realpath = $(wildcard $(foreach file,$(1),$(shell cd -P $(dir $(file)) && echo "$$PWD/$(notdir $(file))")))
 
@@ -128,6 +134,21 @@ endef
 check-$(gcc) = $(call cc-ver-check,CC,0x040100,"Xen requires at least gcc-4.1")
 $(eval $(check-y))
 
+# as-insn: Check whether assembler supports an instruction.
+# Usage: cflags-y += $(call as-insn "insn",option-yes,option-no)
+as-insn = $(if $(shell echo 'void _(void) { asm volatile ( $(2) ); }' \
+                       | $(1) -c -x c -o /dev/null - 2>&1),$(4),$(3))
+
+# as-insn-check: Add an option to compilation flags, but only if insn is
+#                supported by assembler.
+# Usage: $(call as-insn-check CFLAGS,CC,"nop",-DHAVE_GAS_NOP)
+as-insn-check = $(eval $(call as-insn-check-closure,$(1),$(2),$(3),$(4)))
+define as-insn-check-closure
+    ifeq ($$(call as-insn,$$($(2)),$(3),y,n),y)
+        $(1) += $(4)
+    endif
+endef
+
 define buildmakevars2shellvars
     export PREFIX="$(PREFIX)";                                            \
     export XEN_SCRIPT_DIR="$(XEN_SCRIPT_DIR)";                            \
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -28,6 +28,8 @@ CFLAGS += -msoft-float
 
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
+$(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
+$(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
 
 ifeq ($(supervisor_mode_kernel),y)
 CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1292,12 +1292,11 @@ void vmx_do_resume(struct vcpu *v)
     reset_stack_and_jump(vmx_asm_do_vmentry);
 }
 
-static unsigned long vmr(unsigned long field)
+static inline unsigned long vmr(unsigned long field)
 {
-    int rc;
     unsigned long val;
-    val = __vmread_safe(field, &rc);
-    return rc ? 0 : val;
+
+    return __vmread_safe(field, &val) ? val : 0;
 }
 
 static void vmx_dump_sel(char *name, uint32_t selector)
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -951,13 +951,11 @@ fallback:
         vvmcs_to_shadow(vvmcs, field[i]);
 }
 
-static void shadow_to_vvmcs(void *vvmcs, unsigned int field)
+static inline void shadow_to_vvmcs(void *vvmcs, unsigned int field)
 {
-    u64 value;
-    int rc;
+    unsigned long value;
 
-    value = __vmread_safe(field, &rc);
-    if ( !rc )
+    if ( __vmread_safe(field, &value) )
         __set_vvmcs(vvmcs, field, value);
 }
 
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -282,27 +282,43 @@ extern uint8_t posted_intr_vector;
 
 static inline void __vmptrld(u64 addr)
 {
-    asm volatile ( VMPTRLD_OPCODE
-                   MODRM_EAX_06
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmptrld %0\n"
+#else
+                   VMPTRLD_OPCODE MODRM_EAX_06
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmptrld)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_VMX
+                   : "m" (addr)
+#else
                    : "a" (&addr)
+#endif
                    : "memory");
 }
 
 static inline void __vmpclear(u64 addr)
 {
-    asm volatile ( VMCLEAR_OPCODE
-                   MODRM_EAX_06
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmclear %0\n"
+#else
+                   VMCLEAR_OPCODE MODRM_EAX_06
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmclear)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_VMX
+                   : "m" (addr)
+#else
                    : "a" (&addr)
+#endif
                    : "memory");
 }
 
@@ -325,30 +341,47 @@ static inline unsigned long __vmread(uns
 
 static inline void __vmwrite(unsigned long field, unsigned long value)
 {
-    asm volatile ( VMWRITE_OPCODE
-                   MODRM_EAX_ECX
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmwrite %1, %0\n"
+#else
+                   VMWRITE_OPCODE MODRM_EAX_ECX
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmwrite)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    : 
+#ifdef HAVE_GAS_VMX
+                   : "r" (field) , "rm" (value)
+#else
                    : "a" (field) , "c" (value)
+#endif
                    : "memory");
 }
 
-static inline unsigned long __vmread_safe(unsigned long field, int *error)
+static inline bool_t __vmread_safe(unsigned long field, unsigned long *value)
 {
-    unsigned long ecx;
+    bool_t okay;
 
-    asm volatile ( VMREAD_OPCODE
-                   MODRM_EAX_ECX
-                   /* CF==1 or ZF==1 --> rc = -1 */
-                   "setna %b0 ; neg %0"
-                   : "=q" (*error), "=c" (ecx)
-                   : "0" (0), "a" (field)
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmread %2, %1\n\t"
+#else
+                   VMREAD_OPCODE MODRM_EAX_ECX
+#endif
+                   /* CF==1 or ZF==1 --> rc = 0 */
+                   "setnbe %0"
+#ifdef HAVE_GAS_VMX
+                   : "=qm" (okay), "=rm" (*value)
+                   : "r" (field)
+#else
+                   : "=qm" (okay), "=c" (*value)
+                   : "a" (field)
+#endif
                    : "memory");
 
-    return ecx;
+    return okay;
 }
 
 static inline void __invept(int type, u64 eptp, u64 gpa)
@@ -365,14 +398,22 @@ static inline void __invept(int type, u6
          !cpu_has_vmx_ept_invept_single_context )
         type = INVEPT_ALL_CONTEXT;
 
-    asm volatile ( INVEPT_OPCODE
-                   MODRM_EAX_08
+    asm volatile (
+#ifdef HAVE_GAS_EPT
+                   "invept %0, %q1\n"
+#else
+                   INVEPT_OPCODE MODRM_EAX_08
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, invept)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_EPT
+                   : "m" (operand), "r" (type)
+#else
                    : "a" (&operand), "c" (type)
+#endif
                    : "memory" );
 }
 
@@ -385,7 +426,12 @@ static inline void __invvpid(int type, u
     } __attribute__ ((packed)) operand = {vpid, 0, gva};
 
     /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
-    asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
+    asm volatile ( "1: "
+#ifdef HAVE_GAS_EPT
+                   "invvpid %0, %q1\n"
+#else
+                   INVVPID_OPCODE MODRM_EAX_08
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, invvpid)
                    "\tud2\n"
@@ -393,7 +439,11 @@ static inline void __invvpid(int type, u
                    "2:"
                    _ASM_EXTABLE(1b, 2b)
                    :
+#ifdef HAVE_GAS_EPT
+                   : "m" (operand), "r" (type)
+#else
                    : "a" (&operand), "c" (type)
+#endif
                    : "memory" );
 }
 



[-- Attachment #2: VMX-use-mnemonics.patch --]
[-- Type: text/plain, Size: 9214 bytes --]

VMX: use proper instruction mnemonics if assembler supports them

With the hex byte emission we were taking away a good part of
flexibility from the compiler, as for simplicity reasons these were
built using fixed operands. All half way modern build environments
would allow using the mnemonics (but we can't disable the hex variants
yet, since the binutils around at the time gcc 4.1 got released didn't
support these yet).

I didn't convert __vmread() yet because that would, just like for
__vmread_safe(), imply converting to a macro so that the output operand
can be the caller supplied variable rather than an intermediate one. As
that would require touching all invocation points of __vmread() (of
which there are quite a few), I'd first like to be certain the approach
is acceptable; the main question being whether the now conditional code
might be considered to cause future maintenance issues, and the second
being that of parameter/argument ordering (here I made __vmread_safe()
match __vmwrite(), but one could also take the position that read and
write should use the inverse order of one another, in line with the
actual instruction operands).

Additionally I was quite puzzled to find that all the asm()-s involved
here have memory clobbers - what are they needed for? Or can they be
dropped at least in some cases?

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: drop a bogus "q" operand qualifier from the __vmread_safe() inline
    assembly; operand of vmclear is only an input (both pointed out by
    Andrew Cooper).

--- a/Config.mk
+++ b/Config.mk
@@ -4,6 +4,12 @@ ifeq ($(filter /%,$(XEN_ROOT)),)
 $(error XEN_ROOT must be absolute)
 endif
 
+# Convenient variables
+comma   := ,
+squote  := '
+empty   :=
+space   := $(empty) $(empty)
+
 # fallback for older make
 realpath = $(wildcard $(foreach file,$(1),$(shell cd -P $(dir $(file)) && echo "$$PWD/$(notdir $(file))")))
 
@@ -128,6 +134,21 @@ endef
 check-$(gcc) = $(call cc-ver-check,CC,0x040100,"Xen requires at least gcc-4.1")
 $(eval $(check-y))
 
+# as-insn: Check whether assembler supports an instruction.
+# Usage: cflags-y += $(call as-insn "insn",option-yes,option-no)
+as-insn = $(if $(shell echo 'void _(void) { asm volatile ( $(2) ); }' \
+                       | $(1) -c -x c -o /dev/null - 2>&1),$(4),$(3))
+
+# as-insn-check: Add an option to compilation flags, but only if insn is
+#                supported by assembler.
+# Usage: $(call as-insn-check CFLAGS,CC,"nop",-DHAVE_GAS_NOP)
+as-insn-check = $(eval $(call as-insn-check-closure,$(1),$(2),$(3),$(4)))
+define as-insn-check-closure
+    ifeq ($$(call as-insn,$$($(2)),$(3),y,n),y)
+        $(1) += $(4)
+    endif
+endef
+
 define buildmakevars2shellvars
     export PREFIX="$(PREFIX)";                                            \
     export XEN_SCRIPT_DIR="$(XEN_SCRIPT_DIR)";                            \
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -28,6 +28,8 @@ CFLAGS += -msoft-float
 
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
+$(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
+$(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
 
 ifeq ($(supervisor_mode_kernel),y)
 CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1292,12 +1292,11 @@ void vmx_do_resume(struct vcpu *v)
     reset_stack_and_jump(vmx_asm_do_vmentry);
 }
 
-static unsigned long vmr(unsigned long field)
+static inline unsigned long vmr(unsigned long field)
 {
-    int rc;
     unsigned long val;
-    val = __vmread_safe(field, &rc);
-    return rc ? 0 : val;
+
+    return __vmread_safe(field, &val) ? val : 0;
 }
 
 static void vmx_dump_sel(char *name, uint32_t selector)
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -951,13 +951,11 @@ fallback:
         vvmcs_to_shadow(vvmcs, field[i]);
 }
 
-static void shadow_to_vvmcs(void *vvmcs, unsigned int field)
+static inline void shadow_to_vvmcs(void *vvmcs, unsigned int field)
 {
-    u64 value;
-    int rc;
+    unsigned long value;
 
-    value = __vmread_safe(field, &rc);
-    if ( !rc )
+    if ( __vmread_safe(field, &value) )
         __set_vvmcs(vvmcs, field, value);
 }
 
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -282,27 +282,43 @@ extern uint8_t posted_intr_vector;
 
 static inline void __vmptrld(u64 addr)
 {
-    asm volatile ( VMPTRLD_OPCODE
-                   MODRM_EAX_06
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmptrld %0\n"
+#else
+                   VMPTRLD_OPCODE MODRM_EAX_06
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmptrld)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_VMX
+                   : "m" (addr)
+#else
                    : "a" (&addr)
+#endif
                    : "memory");
 }
 
 static inline void __vmpclear(u64 addr)
 {
-    asm volatile ( VMCLEAR_OPCODE
-                   MODRM_EAX_06
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmclear %0\n"
+#else
+                   VMCLEAR_OPCODE MODRM_EAX_06
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmclear)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_VMX
+                   : "m" (addr)
+#else
                    : "a" (&addr)
+#endif
                    : "memory");
 }
 
@@ -325,30 +341,47 @@ static inline unsigned long __vmread(uns
 
 static inline void __vmwrite(unsigned long field, unsigned long value)
 {
-    asm volatile ( VMWRITE_OPCODE
-                   MODRM_EAX_ECX
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmwrite %1, %0\n"
+#else
+                   VMWRITE_OPCODE MODRM_EAX_ECX
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmwrite)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    : 
+#ifdef HAVE_GAS_VMX
+                   : "r" (field) , "rm" (value)
+#else
                    : "a" (field) , "c" (value)
+#endif
                    : "memory");
 }
 
-static inline unsigned long __vmread_safe(unsigned long field, int *error)
+static inline bool_t __vmread_safe(unsigned long field, unsigned long *value)
 {
-    unsigned long ecx;
+    bool_t okay;
 
-    asm volatile ( VMREAD_OPCODE
-                   MODRM_EAX_ECX
-                   /* CF==1 or ZF==1 --> rc = -1 */
-                   "setna %b0 ; neg %0"
-                   : "=q" (*error), "=c" (ecx)
-                   : "0" (0), "a" (field)
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmread %2, %1\n\t"
+#else
+                   VMREAD_OPCODE MODRM_EAX_ECX
+#endif
+                   /* CF==1 or ZF==1 --> rc = 0 */
+                   "setnbe %0"
+#ifdef HAVE_GAS_VMX
+                   : "=qm" (okay), "=rm" (*value)
+                   : "r" (field)
+#else
+                   : "=qm" (okay), "=c" (*value)
+                   : "a" (field)
+#endif
                    : "memory");
 
-    return ecx;
+    return okay;
 }
 
 static inline void __invept(int type, u64 eptp, u64 gpa)
@@ -365,14 +398,22 @@ static inline void __invept(int type, u6
          !cpu_has_vmx_ept_invept_single_context )
         type = INVEPT_ALL_CONTEXT;
 
-    asm volatile ( INVEPT_OPCODE
-                   MODRM_EAX_08
+    asm volatile (
+#ifdef HAVE_GAS_EPT
+                   "invept %0, %q1\n"
+#else
+                   INVEPT_OPCODE MODRM_EAX_08
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, invept)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_EPT
+                   : "m" (operand), "r" (type)
+#else
                    : "a" (&operand), "c" (type)
+#endif
                    : "memory" );
 }
 
@@ -385,7 +426,12 @@ static inline void __invvpid(int type, u
     } __attribute__ ((packed)) operand = {vpid, 0, gva};
 
     /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
-    asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
+    asm volatile ( "1: "
+#ifdef HAVE_GAS_EPT
+                   "invvpid %0, %q1\n"
+#else
+                   INVVPID_OPCODE MODRM_EAX_08
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, invvpid)
                    "\tud2\n"
@@ -393,7 +439,11 @@ static inline void __invvpid(int type, u
                    "2:"
                    _ASM_EXTABLE(1b, 2b)
                    :
+#ifdef HAVE_GAS_EPT
+                   : "m" (operand), "r" (type)
+#else
                    : "a" (&operand), "c" (type)
+#endif
                    : "memory" );
 }
 

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v4 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-26 14:03             ` [PATCH v4 " Jan Beulich
@ 2013-08-26 14:18               ` Andrew Cooper
  2013-08-26 14:29                 ` Jan Beulich
  2013-08-26 15:31                 ` [PATCH v5 " Jan Beulich
  0 siblings, 2 replies; 60+ messages in thread
From: Andrew Cooper @ 2013-08-26 14:18 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

On 26/08/2013 15:03, Jan Beulich wrote:
> -static inline unsigned long __vmread_safe(unsigned long field, int *error)
> +static inline bool_t __vmread_safe(unsigned long field, unsigned long *value)
>  {
> -    unsigned long ecx;
> +    bool_t okay;
>  
> -    asm volatile ( VMREAD_OPCODE
> -                   MODRM_EAX_ECX
> -                   /* CF==1 or ZF==1 --> rc = -1 */
> -                   "setna %b0 ; neg %0"
> -                   : "=q" (*error), "=c" (ecx)
> -                   : "0" (0), "a" (field)
> +    asm volatile (
> +#ifdef HAVE_GAS_VMX
> +                   "vmread %2, %1\n\t"
> +#else
> +                   VMREAD_OPCODE MODRM_EAX_ECX
> +#endif
> +                   /* CF==1 or ZF==1 --> rc = 0 */
> +                   "setnbe %0"
> +#ifdef HAVE_GAS_VMX
> +                   : "=qm" (okay), "=rm" (*value)
> +                   : "r" (field)
> +#else
> +                   : "=qm" (okay), "=c" (*value)
> +                   : "a" (field)
> +#endif

>From what I can work out while googling, the q constraint is equivalent
to the r constraint for 64bit code.

For consistency sake, I would suggest "=rm" (okay) here

>                     : "memory");
>  
> -    return ecx;
> +    return okay;
>  }
>  
>  static inline void __invept(int type, u64 eptp, u64 gpa)
> @@ -365,14 +398,22 @@ static inline void __invept(int type, u6
>           !cpu_has_vmx_ept_invept_single_context )
>          type = INVEPT_ALL_CONTEXT;
>  
> -    asm volatile ( INVEPT_OPCODE
> -                   MODRM_EAX_08
> +    asm volatile (
> +#ifdef HAVE_GAS_EPT
> +                   "invept %0, %q1\n"

Another stray q

> +#else
> +                   INVEPT_OPCODE MODRM_EAX_08
> +#endif
>                     /* CF==1 or ZF==1 --> crash (ud2) */
>                     UNLIKELY_START(be, invept)
>                     "\tud2\n"
>                     UNLIKELY_END_SECTION
>                     :
> +#ifdef HAVE_GAS_EPT
> +                   : "m" (operand), "r" (type)
> +#else
>                     : "a" (&operand), "c" (type)
> +#endif
>                     : "memory" );
>  }
>  
> @@ -385,7 +426,12 @@ static inline void __invvpid(int type, u
>      } __attribute__ ((packed)) operand = {vpid, 0, gva};
>  
>      /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
> -    asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
> +    asm volatile ( "1: "
> +#ifdef HAVE_GAS_EPT
> +                   "invvpid %0, %q1\n"

And another

~Andrew

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v4 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-26 14:18               ` Andrew Cooper
@ 2013-08-26 14:29                 ` Jan Beulich
  2013-08-26 15:07                   ` Andrew Cooper
  2013-08-26 15:31                 ` [PATCH v5 " Jan Beulich
  1 sibling, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2013-08-26 14:29 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

>>> On 26.08.13 at 16:18, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> On 26/08/2013 15:03, Jan Beulich wrote:
>> +    asm volatile (
>> +#ifdef HAVE_GAS_VMX
>> +                   "vmread %2, %1\n\t"
>> +#else
>> +                   VMREAD_OPCODE MODRM_EAX_ECX
>> +#endif
>> +                   /* CF==1 or ZF==1 --> rc = 0 */
>> +                   "setnbe %0"
>> +#ifdef HAVE_GAS_VMX
>> +                   : "=qm" (okay), "=rm" (*value)
>> +                   : "r" (field)
>> +#else
>> +                   : "=qm" (okay), "=c" (*value)
>> +                   : "a" (field)
>> +#endif
> 
> From what I can work out while googling, the q constraint is equivalent
> to the r constraint for 64bit code.
> 
> For consistency sake, I would suggest "=rm" (okay) here

And I'd like to keep it the way it is for generality's  sake (i.e. not
making the code more 32-bit unclean than we need to).

>> @@ -365,14 +398,22 @@ static inline void __invept(int type, u6
>>           !cpu_has_vmx_ept_invept_single_context )
>>          type = INVEPT_ALL_CONTEXT;
>>  
>> -    asm volatile ( INVEPT_OPCODE
>> -                   MODRM_EAX_08
>> +    asm volatile (
>> +#ifdef HAVE_GAS_EPT
>> +                   "invept %0, %q1\n"
> 
> Another stray q

No - operand 1 is of type "int", and while the high 32 bits get
ignored (i.e. we don't need to do any zero- or sign-extension), we
still need to specify the 64-bit register name here. Or wait - I
thought it would ignore the upper bits, but it's not documented to.
In which case this involves more than just dropping the q modifier.

>> @@ -385,7 +426,12 @@ static inline void __invvpid(int type, u
>>      } __attribute__ ((packed)) operand = {vpid, 0, gva};
>>  
>>      /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
>> -    asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
>> +    asm volatile ( "1: "
>> +#ifdef HAVE_GAS_EPT
>> +                   "invvpid %0, %q1\n"
> 
> And another

Same here.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v4 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-26 14:29                 ` Jan Beulich
@ 2013-08-26 15:07                   ` Andrew Cooper
  2013-08-26 15:10                     ` Andrew Cooper
  2013-08-26 15:29                     ` Jan Beulich
  0 siblings, 2 replies; 60+ messages in thread
From: Andrew Cooper @ 2013-08-26 15:07 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

On 26/08/2013 15:29, Jan Beulich wrote:
>>>> On 26.08.13 at 16:18, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> On 26/08/2013 15:03, Jan Beulich wrote:
>>> +    asm volatile (
>>> +#ifdef HAVE_GAS_VMX
>>> +                   "vmread %2, %1\n\t"
>>> +#else
>>> +                   VMREAD_OPCODE MODRM_EAX_ECX
>>> +#endif
>>> +                   /* CF==1 or ZF==1 --> rc = 0 */
>>> +                   "setnbe %0"
>>> +#ifdef HAVE_GAS_VMX
>>> +                   : "=qm" (okay), "=rm" (*value)
>>> +                   : "r" (field)
>>> +#else
>>> +                   : "=qm" (okay), "=c" (*value)
>>> +                   : "a" (field)
>>> +#endif
>> From what I can work out while googling, the q constraint is equivalent
>> to the r constraint for 64bit code.
>>
>> For consistency sake, I would suggest "=rm" (okay) here
> And I'd like to keep it the way it is for generality's  sake (i.e. not
> making the code more 32-bit unclean than we need to).

Ok

>
>>> @@ -365,14 +398,22 @@ static inline void __invept(int type, u6
>>>           !cpu_has_vmx_ept_invept_single_context )
>>>          type = INVEPT_ALL_CONTEXT;
>>>  
>>> -    asm volatile ( INVEPT_OPCODE
>>> -                   MODRM_EAX_08
>>> +    asm volatile (
>>> +#ifdef HAVE_GAS_EPT
>>> +                   "invept %0, %q1\n"
>> Another stray q
> No - operand 1 is of type "int", and while the high 32 bits get
> ignored (i.e. we don't need to do any zero- or sign-extension), we
> still need to specify the 64-bit register name here. Or wait - I
> thought it would ignore the upper bits, but it's not documented to.
> In which case this involves more than just dropping the q modifier.

I was more referring to having a q in the instruction, yet an "r" in the
parameter list.  I would suggest

INV{EPT,VPID} is strictly defined to take r64 as the "type" parameter in
long mode.  Invalid/unsupported values found in this register can be
detected based on the state of EFLAGS afterwards.

Therefore, I would suggest possibly changing "int type" to "unsigned
long type" if we are going to the effort of getting this correct.  It
shouldn't make a difference currently, as all calls use appropriate
INVEPT_*_CONTEXT defines.

As for the flags, should we be including "cc" to the clobber list as
each of the VM*/INV* instructions explicitly sets the flags.  I would
hope that the toolchain is pessimistic enough to not trust the state of
the flags across some inline assembly, but I can't find any hard
information one way or another.

~Andrew

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v4 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-26 15:07                   ` Andrew Cooper
@ 2013-08-26 15:10                     ` Andrew Cooper
  2013-08-26 15:30                       ` Jan Beulich
  2013-08-26 15:29                     ` Jan Beulich
  1 sibling, 1 reply; 60+ messages in thread
From: Andrew Cooper @ 2013-08-26 15:10 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

On 26/08/2013 16:07, Andrew Cooper wrote:
> On 26/08/2013 15:29, Jan Beulich wrote:
>>>>> On 26.08.13 at 16:18, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>>> On 26/08/2013 15:03, Jan Beulich wrote:
>>>> +    asm volatile (
>>>> +#ifdef HAVE_GAS_VMX
>>>> +                   "vmread %2, %1\n\t"
>>>> +#else
>>>> +                   VMREAD_OPCODE MODRM_EAX_ECX
>>>> +#endif
>>>> +                   /* CF==1 or ZF==1 --> rc = 0 */
>>>> +                   "setnbe %0"
>>>> +#ifdef HAVE_GAS_VMX
>>>> +                   : "=qm" (okay), "=rm" (*value)
>>>> +                   : "r" (field)
>>>> +#else
>>>> +                   : "=qm" (okay), "=c" (*value)
>>>> +                   : "a" (field)
>>>> +#endif
>>> From what I can work out while googling, the q constraint is equivalent
>>> to the r constraint for 64bit code.
>>>
>>> For consistency sake, I would suggest "=rm" (okay) here
>> And I'd like to keep it the way it is for generality's  sake (i.e. not
>> making the code more 32-bit unclean than we need to).
> Ok
>
>>>> @@ -365,14 +398,22 @@ static inline void __invept(int type, u6
>>>>           !cpu_has_vmx_ept_invept_single_context )
>>>>          type = INVEPT_ALL_CONTEXT;
>>>>  
>>>> -    asm volatile ( INVEPT_OPCODE
>>>> -                   MODRM_EAX_08
>>>> +    asm volatile (
>>>> +#ifdef HAVE_GAS_EPT
>>>> +                   "invept %0, %q1\n"
>>> Another stray q
>> No - operand 1 is of type "int", and while the high 32 bits get
>> ignored (i.e. we don't need to do any zero- or sign-extension), we
>> still need to specify the 64-bit register name here. Or wait - I
>> thought it would ignore the upper bits, but it's not documented to.
>> In which case this involves more than just dropping the q modifier.
> I was more referring to having a q in the instruction, yet an "r" in the
> parameter list.  I would suggest

Sorry - sent early.  I would suggest putting all constraints beside
their values, rather than mixing them in amongst the asm code itself,
but do admit that this is only a matter of preference.

>
> INV{EPT,VPID} is strictly defined to take r64 as the "type" parameter in
> long mode.  Invalid/unsupported values found in this register can be
> detected based on the state of EFLAGS afterwards.
>
> Therefore, I would suggest possibly changing "int type" to "unsigned
> long type" if we are going to the effort of getting this correct.  It
> shouldn't make a difference currently, as all calls use appropriate
> INVEPT_*_CONTEXT defines.
>
> As for the flags, should we be including "cc" to the clobber list as
> each of the VM*/INV* instructions explicitly sets the flags.  I would
> hope that the toolchain is pessimistic enough to not trust the state of
> the flags across some inline assembly, but I can't find any hard
> information one way or another.
>
> ~Andrew
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v4 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-26 15:07                   ` Andrew Cooper
  2013-08-26 15:10                     ` Andrew Cooper
@ 2013-08-26 15:29                     ` Jan Beulich
  2013-08-26 15:33                       ` Andrew Cooper
  1 sibling, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2013-08-26 15:29 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

>>> On 26.08.13 at 17:07, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> I was more referring to having a q in the instruction, yet an "r" in the
> parameter list.  I would suggest

So you perhaps mix up "q" as a constraint (meaning byte registers)
and "q" as an operand qualifier (meaning the 64-bit register
regardless of the operand size of the respective asm() operand)?

> INV{EPT,VPID} is strictly defined to take r64 as the "type" parameter in
> long mode.  Invalid/unsupported values found in this register can be
> detected based on the state of EFLAGS afterwards.
> 
> Therefore, I would suggest possibly changing "int type" to "unsigned
> long type" if we are going to the effort of getting this correct.  It
> shouldn't make a difference currently, as all calls use appropriate
> INVEPT_*_CONTEXT defines.

Yes, as already indicated. v5 is about to be on its way.

> As for the flags, should we be including "cc" to the clobber list as
> each of the VM*/INV* instructions explicitly sets the flags.  I would
> hope that the toolchain is pessimistic enough to not trust the state of
> the flags across some inline assembly, but I can't find any hard
> information one way or another.

Yes, on x86 at least it is never necessary to list cc explicitly.
Which at times is sad, but here's it's to our benefit.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v4 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-26 15:10                     ` Andrew Cooper
@ 2013-08-26 15:30                       ` Jan Beulich
  0 siblings, 0 replies; 60+ messages in thread
From: Jan Beulich @ 2013-08-26 15:30 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

>>> On 26.08.13 at 17:10, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> On 26/08/2013 16:07, Andrew Cooper wrote:
>> On 26/08/2013 15:29, Jan Beulich wrote:
>>>>>> On 26.08.13 at 16:18, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>>>> On 26/08/2013 15:03, Jan Beulich wrote:
>>>>> @@ -365,14 +398,22 @@ static inline void __invept(int type, u6
>>>>>           !cpu_has_vmx_ept_invept_single_context )
>>>>>          type = INVEPT_ALL_CONTEXT;
>>>>>  
>>>>> -    asm volatile ( INVEPT_OPCODE
>>>>> -                   MODRM_EAX_08
>>>>> +    asm volatile (
>>>>> +#ifdef HAVE_GAS_EPT
>>>>> +                   "invept %0, %q1\n"
>>>> Another stray q
>>> No - operand 1 is of type "int", and while the high 32 bits get
>>> ignored (i.e. we don't need to do any zero- or sign-extension), we
>>> still need to specify the 64-bit register name here. Or wait - I
>>> thought it would ignore the upper bits, but it's not documented to.
>>> In which case this involves more than just dropping the q modifier.
>> I was more referring to having a q in the instruction, yet an "r" in the
>> parameter list.  I would suggest
> 
> Sorry - sent early.  I would suggest putting all constraints beside
> their values, rather than mixing them in amongst the asm code itself,
> but do admit that this is only a matter of preference.

That statement of yours seems to underline that your mixing up
the meanings of q depending on where it's actually used.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* [PATCH v5 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-26 14:18               ` Andrew Cooper
  2013-08-26 14:29                 ` Jan Beulich
@ 2013-08-26 15:31                 ` Jan Beulich
  2013-08-26 15:36                   ` Andrew Cooper
  2013-08-29 11:47                   ` Tim Deegan
  1 sibling, 2 replies; 60+ messages in thread
From: Jan Beulich @ 2013-08-26 15:31 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser, Eddie Dong, Jun Nakajima

[-- Attachment #1: Type: text/plain, Size: 9525 bytes --]

With the hex byte emission we were taking away a good part of
flexibility from the compiler, as for simplicity reasons these were
built using fixed operands. All half way modern build environments
would allow using the mnemonics (but we can't disable the hex variants
yet, since the binutils around at the time gcc 4.1 got released didn't
support these yet).

I didn't convert __vmread() yet because that would, just like for
__vmread_safe(), imply converting to a macro so that the output operand
can be the caller supplied variable rather than an intermediate one. As
that would require touching all invocation points of __vmread() (of
which there are quite a few), I'd first like to be certain the approach
is acceptable; the main question being whether the now conditional code
might be considered to cause future maintenance issues, and the second
being that of parameter/argument ordering (here I made __vmread_safe()
match __vmwrite(), but one could also take the position that read and
write should use the inverse order of one another, in line with the
actual instruction operands).

Additionally I was quite puzzled to find that all the asm()-s involved
here have memory clobbers - what are they needed for? Or can they be
dropped at least in some cases?

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v5: drop "q" operand qualifiers from __invept() and __invvpid() inline
    assembly (pointed out as questionable by Andrew Cooper), changing
    the types of their "type" parameters to "unsigned long" (which is
    by itself a fix to the original code)

--- a/Config.mk
+++ b/Config.mk
@@ -4,6 +4,12 @@ ifeq ($(filter /%,$(XEN_ROOT)),)
 $(error XEN_ROOT must be absolute)
 endif
 
+# Convenient variables
+comma   := ,
+squote  := '
+empty   :=
+space   := $(empty) $(empty)
+
 # fallback for older make
 realpath = $(wildcard $(foreach file,$(1),$(shell cd -P $(dir $(file)) && echo "$$PWD/$(notdir $(file))")))
 
@@ -128,6 +134,21 @@ endef
 check-$(gcc) = $(call cc-ver-check,CC,0x040100,"Xen requires at least gcc-4.1")
 $(eval $(check-y))
 
+# as-insn: Check whether assembler supports an instruction.
+# Usage: cflags-y += $(call as-insn "insn",option-yes,option-no)
+as-insn = $(if $(shell echo 'void _(void) { asm volatile ( $(2) ); }' \
+                       | $(1) -c -x c -o /dev/null - 2>&1),$(4),$(3))
+
+# as-insn-check: Add an option to compilation flags, but only if insn is
+#                supported by assembler.
+# Usage: $(call as-insn-check CFLAGS,CC,"nop",-DHAVE_GAS_NOP)
+as-insn-check = $(eval $(call as-insn-check-closure,$(1),$(2),$(3),$(4)))
+define as-insn-check-closure
+    ifeq ($$(call as-insn,$$($(2)),$(3),y,n),y)
+        $(1) += $(4)
+    endif
+endef
+
 define buildmakevars2shellvars
     export PREFIX="$(PREFIX)";                                            \
     export XEN_SCRIPT_DIR="$(XEN_SCRIPT_DIR)";                            \
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -28,6 +28,8 @@ CFLAGS += -msoft-float
 
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
+$(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
+$(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
 
 ifeq ($(supervisor_mode_kernel),y)
 CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1292,12 +1292,11 @@ void vmx_do_resume(struct vcpu *v)
     reset_stack_and_jump(vmx_asm_do_vmentry);
 }
 
-static unsigned long vmr(unsigned long field)
+static inline unsigned long vmr(unsigned long field)
 {
-    int rc;
     unsigned long val;
-    val = __vmread_safe(field, &rc);
-    return rc ? 0 : val;
+
+    return __vmread_safe(field, &val) ? val : 0;
 }
 
 static void vmx_dump_sel(char *name, uint32_t selector)
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -951,13 +951,11 @@ fallback:
         vvmcs_to_shadow(vvmcs, field[i]);
 }
 
-static void shadow_to_vvmcs(void *vvmcs, unsigned int field)
+static inline void shadow_to_vvmcs(void *vvmcs, unsigned int field)
 {
-    u64 value;
-    int rc;
+    unsigned long value;
 
-    value = __vmread_safe(field, &rc);
-    if ( !rc )
+    if ( __vmread_safe(field, &value) )
         __set_vvmcs(vvmcs, field, value);
 }
 
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -282,27 +282,43 @@ extern uint8_t posted_intr_vector;
 
 static inline void __vmptrld(u64 addr)
 {
-    asm volatile ( VMPTRLD_OPCODE
-                   MODRM_EAX_06
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmptrld %0\n"
+#else
+                   VMPTRLD_OPCODE MODRM_EAX_06
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmptrld)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_VMX
+                   : "m" (addr)
+#else
                    : "a" (&addr)
+#endif
                    : "memory");
 }
 
 static inline void __vmpclear(u64 addr)
 {
-    asm volatile ( VMCLEAR_OPCODE
-                   MODRM_EAX_06
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmclear %0\n"
+#else
+                   VMCLEAR_OPCODE MODRM_EAX_06
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmclear)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_VMX
+                   : "m" (addr)
+#else
                    : "a" (&addr)
+#endif
                    : "memory");
 }
 
@@ -325,33 +341,50 @@ static inline unsigned long __vmread(uns
 
 static inline void __vmwrite(unsigned long field, unsigned long value)
 {
-    asm volatile ( VMWRITE_OPCODE
-                   MODRM_EAX_ECX
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmwrite %1, %0\n"
+#else
+                   VMWRITE_OPCODE MODRM_EAX_ECX
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmwrite)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    : 
+#ifdef HAVE_GAS_VMX
+                   : "r" (field) , "rm" (value)
+#else
                    : "a" (field) , "c" (value)
+#endif
                    : "memory");
 }
 
-static inline unsigned long __vmread_safe(unsigned long field, int *error)
+static inline bool_t __vmread_safe(unsigned long field, unsigned long *value)
 {
-    unsigned long ecx;
+    bool_t okay;
 
-    asm volatile ( VMREAD_OPCODE
-                   MODRM_EAX_ECX
-                   /* CF==1 or ZF==1 --> rc = -1 */
-                   "setna %b0 ; neg %0"
-                   : "=q" (*error), "=c" (ecx)
-                   : "0" (0), "a" (field)
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmread %2, %1\n\t"
+#else
+                   VMREAD_OPCODE MODRM_EAX_ECX
+#endif
+                   /* CF==1 or ZF==1 --> rc = 0 */
+                   "setnbe %0"
+#ifdef HAVE_GAS_VMX
+                   : "=qm" (okay), "=rm" (*value)
+                   : "r" (field)
+#else
+                   : "=qm" (okay), "=c" (*value)
+                   : "a" (field)
+#endif
                    : "memory");
 
-    return ecx;
+    return okay;
 }
 
-static inline void __invept(int type, u64 eptp, u64 gpa)
+static inline void __invept(unsigned long type, u64 eptp, u64 gpa)
 {
     struct {
         u64 eptp, gpa;
@@ -365,18 +398,26 @@ static inline void __invept(int type, u6
          !cpu_has_vmx_ept_invept_single_context )
         type = INVEPT_ALL_CONTEXT;
 
-    asm volatile ( INVEPT_OPCODE
-                   MODRM_EAX_08
+    asm volatile (
+#ifdef HAVE_GAS_EPT
+                   "invept %0, %1\n"
+#else
+                   INVEPT_OPCODE MODRM_EAX_08
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, invept)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_EPT
+                   : "m" (operand), "r" (type)
+#else
                    : "a" (&operand), "c" (type)
+#endif
                    : "memory" );
 }
 
-static inline void __invvpid(int type, u16 vpid, u64 gva)
+static inline void __invvpid(unsigned long type, u16 vpid, u64 gva)
 {
     struct {
         u64 vpid:16;
@@ -385,7 +426,12 @@ static inline void __invvpid(int type, u
     } __attribute__ ((packed)) operand = {vpid, 0, gva};
 
     /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
-    asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
+    asm volatile ( "1: "
+#ifdef HAVE_GAS_EPT
+                   "invvpid %0, %1\n"
+#else
+                   INVVPID_OPCODE MODRM_EAX_08
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, invvpid)
                    "\tud2\n"
@@ -393,7 +439,11 @@ static inline void __invvpid(int type, u
                    "2:"
                    _ASM_EXTABLE(1b, 2b)
                    :
+#ifdef HAVE_GAS_EPT
+                   : "m" (operand), "r" (type)
+#else
                    : "a" (&operand), "c" (type)
+#endif
                    : "memory" );
 }
 



[-- Attachment #2: VMX-use-mnemonics.patch --]
[-- Type: text/plain, Size: 9589 bytes --]

VMX: use proper instruction mnemonics if assembler supports them

With the hex byte emission we were taking away a good part of
flexibility from the compiler, as for simplicity reasons these were
built using fixed operands. All half way modern build environments
would allow using the mnemonics (but we can't disable the hex variants
yet, since the binutils around at the time gcc 4.1 got released didn't
support these yet).

I didn't convert __vmread() yet because that would, just like for
__vmread_safe(), imply converting to a macro so that the output operand
can be the caller supplied variable rather than an intermediate one. As
that would require touching all invocation points of __vmread() (of
which there are quite a few), I'd first like to be certain the approach
is acceptable; the main question being whether the now conditional code
might be considered to cause future maintenance issues, and the second
being that of parameter/argument ordering (here I made __vmread_safe()
match __vmwrite(), but one could also take the position that read and
write should use the inverse order of one another, in line with the
actual instruction operands).

Additionally I was quite puzzled to find that all the asm()-s involved
here have memory clobbers - what are they needed for? Or can they be
dropped at least in some cases?

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v5: drop "q" operand qualifiers from __invept() and __invvpid() inline
    assembly (pointed out as questionable by Andrew Cooper), changing
    the types of their "type" parameters to "unsigned long" (which is
    by itself a fix to the original code)

--- a/Config.mk
+++ b/Config.mk
@@ -4,6 +4,12 @@ ifeq ($(filter /%,$(XEN_ROOT)),)
 $(error XEN_ROOT must be absolute)
 endif
 
+# Convenient variables
+comma   := ,
+squote  := '
+empty   :=
+space   := $(empty) $(empty)
+
 # fallback for older make
 realpath = $(wildcard $(foreach file,$(1),$(shell cd -P $(dir $(file)) && echo "$$PWD/$(notdir $(file))")))
 
@@ -128,6 +134,21 @@ endef
 check-$(gcc) = $(call cc-ver-check,CC,0x040100,"Xen requires at least gcc-4.1")
 $(eval $(check-y))
 
+# as-insn: Check whether assembler supports an instruction.
+# Usage: cflags-y += $(call as-insn "insn",option-yes,option-no)
+as-insn = $(if $(shell echo 'void _(void) { asm volatile ( $(2) ); }' \
+                       | $(1) -c -x c -o /dev/null - 2>&1),$(4),$(3))
+
+# as-insn-check: Add an option to compilation flags, but only if insn is
+#                supported by assembler.
+# Usage: $(call as-insn-check CFLAGS,CC,"nop",-DHAVE_GAS_NOP)
+as-insn-check = $(eval $(call as-insn-check-closure,$(1),$(2),$(3),$(4)))
+define as-insn-check-closure
+    ifeq ($$(call as-insn,$$($(2)),$(3),y,n),y)
+        $(1) += $(4)
+    endif
+endef
+
 define buildmakevars2shellvars
     export PREFIX="$(PREFIX)";                                            \
     export XEN_SCRIPT_DIR="$(XEN_SCRIPT_DIR)";                            \
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -28,6 +28,8 @@ CFLAGS += -msoft-float
 
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
+$(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
+$(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
 
 ifeq ($(supervisor_mode_kernel),y)
 CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1292,12 +1292,11 @@ void vmx_do_resume(struct vcpu *v)
     reset_stack_and_jump(vmx_asm_do_vmentry);
 }
 
-static unsigned long vmr(unsigned long field)
+static inline unsigned long vmr(unsigned long field)
 {
-    int rc;
     unsigned long val;
-    val = __vmread_safe(field, &rc);
-    return rc ? 0 : val;
+
+    return __vmread_safe(field, &val) ? val : 0;
 }
 
 static void vmx_dump_sel(char *name, uint32_t selector)
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -951,13 +951,11 @@ fallback:
         vvmcs_to_shadow(vvmcs, field[i]);
 }
 
-static void shadow_to_vvmcs(void *vvmcs, unsigned int field)
+static inline void shadow_to_vvmcs(void *vvmcs, unsigned int field)
 {
-    u64 value;
-    int rc;
+    unsigned long value;
 
-    value = __vmread_safe(field, &rc);
-    if ( !rc )
+    if ( __vmread_safe(field, &value) )
         __set_vvmcs(vvmcs, field, value);
 }
 
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -282,27 +282,43 @@ extern uint8_t posted_intr_vector;
 
 static inline void __vmptrld(u64 addr)
 {
-    asm volatile ( VMPTRLD_OPCODE
-                   MODRM_EAX_06
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmptrld %0\n"
+#else
+                   VMPTRLD_OPCODE MODRM_EAX_06
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmptrld)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_VMX
+                   : "m" (addr)
+#else
                    : "a" (&addr)
+#endif
                    : "memory");
 }
 
 static inline void __vmpclear(u64 addr)
 {
-    asm volatile ( VMCLEAR_OPCODE
-                   MODRM_EAX_06
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmclear %0\n"
+#else
+                   VMCLEAR_OPCODE MODRM_EAX_06
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmclear)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_VMX
+                   : "m" (addr)
+#else
                    : "a" (&addr)
+#endif
                    : "memory");
 }
 
@@ -325,33 +341,50 @@ static inline unsigned long __vmread(uns
 
 static inline void __vmwrite(unsigned long field, unsigned long value)
 {
-    asm volatile ( VMWRITE_OPCODE
-                   MODRM_EAX_ECX
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmwrite %1, %0\n"
+#else
+                   VMWRITE_OPCODE MODRM_EAX_ECX
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, vmwrite)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    : 
+#ifdef HAVE_GAS_VMX
+                   : "r" (field) , "rm" (value)
+#else
                    : "a" (field) , "c" (value)
+#endif
                    : "memory");
 }
 
-static inline unsigned long __vmread_safe(unsigned long field, int *error)
+static inline bool_t __vmread_safe(unsigned long field, unsigned long *value)
 {
-    unsigned long ecx;
+    bool_t okay;
 
-    asm volatile ( VMREAD_OPCODE
-                   MODRM_EAX_ECX
-                   /* CF==1 or ZF==1 --> rc = -1 */
-                   "setna %b0 ; neg %0"
-                   : "=q" (*error), "=c" (ecx)
-                   : "0" (0), "a" (field)
+    asm volatile (
+#ifdef HAVE_GAS_VMX
+                   "vmread %2, %1\n\t"
+#else
+                   VMREAD_OPCODE MODRM_EAX_ECX
+#endif
+                   /* CF==1 or ZF==1 --> rc = 0 */
+                   "setnbe %0"
+#ifdef HAVE_GAS_VMX
+                   : "=qm" (okay), "=rm" (*value)
+                   : "r" (field)
+#else
+                   : "=qm" (okay), "=c" (*value)
+                   : "a" (field)
+#endif
                    : "memory");
 
-    return ecx;
+    return okay;
 }
 
-static inline void __invept(int type, u64 eptp, u64 gpa)
+static inline void __invept(unsigned long type, u64 eptp, u64 gpa)
 {
     struct {
         u64 eptp, gpa;
@@ -365,18 +398,26 @@ static inline void __invept(int type, u6
          !cpu_has_vmx_ept_invept_single_context )
         type = INVEPT_ALL_CONTEXT;
 
-    asm volatile ( INVEPT_OPCODE
-                   MODRM_EAX_08
+    asm volatile (
+#ifdef HAVE_GAS_EPT
+                   "invept %0, %1\n"
+#else
+                   INVEPT_OPCODE MODRM_EAX_08
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, invept)
                    "\tud2\n"
                    UNLIKELY_END_SECTION
                    :
+#ifdef HAVE_GAS_EPT
+                   : "m" (operand), "r" (type)
+#else
                    : "a" (&operand), "c" (type)
+#endif
                    : "memory" );
 }
 
-static inline void __invvpid(int type, u16 vpid, u64 gva)
+static inline void __invvpid(unsigned long type, u16 vpid, u64 gva)
 {
     struct {
         u64 vpid:16;
@@ -385,7 +426,12 @@ static inline void __invvpid(int type, u
     } __attribute__ ((packed)) operand = {vpid, 0, gva};
 
     /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
-    asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
+    asm volatile ( "1: "
+#ifdef HAVE_GAS_EPT
+                   "invvpid %0, %1\n"
+#else
+                   INVVPID_OPCODE MODRM_EAX_08
+#endif
                    /* CF==1 or ZF==1 --> crash (ud2) */
                    UNLIKELY_START(be, invvpid)
                    "\tud2\n"
@@ -393,7 +439,11 @@ static inline void __invvpid(int type, u
                    "2:"
                    _ASM_EXTABLE(1b, 2b)
                    :
+#ifdef HAVE_GAS_EPT
+                   : "m" (operand), "r" (type)
+#else
                    : "a" (&operand), "c" (type)
+#endif
                    : "memory" );
 }
 

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v4 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-26 15:29                     ` Jan Beulich
@ 2013-08-26 15:33                       ` Andrew Cooper
  0 siblings, 0 replies; 60+ messages in thread
From: Andrew Cooper @ 2013-08-26 15:33 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

On 26/08/2013 16:29, Jan Beulich wrote:
>>>> On 26.08.13 at 17:07, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> I was more referring to having a q in the instruction, yet an "r" in the
>> parameter list.  I would suggest
> So you perhaps mix up "q" as a constraint (meaning byte registers)
> and "q" as an operand qualifier (meaning the 64-bit register
> regardless of the operand size of the respective asm() operand)?

I clearly am.  Sorry for the confusion.

~Andrew

>
>> INV{EPT,VPID} is strictly defined to take r64 as the "type" parameter in
>> long mode.  Invalid/unsupported values found in this register can be
>> detected based on the state of EFLAGS afterwards.
>>
>> Therefore, I would suggest possibly changing "int type" to "unsigned
>> long type" if we are going to the effort of getting this correct.  It
>> shouldn't make a difference currently, as all calls use appropriate
>> INVEPT_*_CONTEXT defines.
> Yes, as already indicated. v5 is about to be on its way.
>
>> As for the flags, should we be including "cc" to the clobber list as
>> each of the VM*/INV* instructions explicitly sets the flags.  I would
>> hope that the toolchain is pessimistic enough to not trust the state of
>> the flags across some inline assembly, but I can't find any hard
>> information one way or another.
> Yes, on x86 at least it is never necessary to list cc explicitly.
> Which at times is sad, but here's it's to our benefit.
>
> Jan
>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v5 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-26 15:31                 ` [PATCH v5 " Jan Beulich
@ 2013-08-26 15:36                   ` Andrew Cooper
  2013-08-29 11:47                   ` Tim Deegan
  1 sibling, 0 replies; 60+ messages in thread
From: Andrew Cooper @ 2013-08-26 15:36 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

On 26/08/2013 16:31, Jan Beulich wrote:
> With the hex byte emission we were taking away a good part of
> flexibility from the compiler, as for simplicity reasons these were
> built using fixed operands. All half way modern build environments
> would allow using the mnemonics (but we can't disable the hex variants
> yet, since the binutils around at the time gcc 4.1 got released didn't
> support these yet).
>
> I didn't convert __vmread() yet because that would, just like for
> __vmread_safe(), imply converting to a macro so that the output operand
> can be the caller supplied variable rather than an intermediate one. As
> that would require touching all invocation points of __vmread() (of
> which there are quite a few), I'd first like to be certain the approach
> is acceptable; the main question being whether the now conditional code
> might be considered to cause future maintenance issues, and the second
> being that of parameter/argument ordering (here I made __vmread_safe()
> match __vmwrite(), but one could also take the position that read and
> write should use the inverse order of one another, in line with the
> actual instruction operands).
>
> Additionally I was quite puzzled to find that all the asm()-s involved
> here have memory clobbers - what are they needed for? Or can they be
> dropped at least in some cases?
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

> ---
> v5: drop "q" operand qualifiers from __invept() and __invvpid() inline
>     assembly (pointed out as questionable by Andrew Cooper), changing
>     the types of their "type" parameters to "unsigned long" (which is
>     by itself a fix to the original code)
>
> --- a/Config.mk
> +++ b/Config.mk
> @@ -4,6 +4,12 @@ ifeq ($(filter /%,$(XEN_ROOT)),)
>  $(error XEN_ROOT must be absolute)
>  endif
>  
> +# Convenient variables
> +comma   := ,
> +squote  := '
> +empty   :=
> +space   := $(empty) $(empty)
> +
>  # fallback for older make
>  realpath = $(wildcard $(foreach file,$(1),$(shell cd -P $(dir $(file)) && echo "$$PWD/$(notdir $(file))")))
>  
> @@ -128,6 +134,21 @@ endef
>  check-$(gcc) = $(call cc-ver-check,CC,0x040100,"Xen requires at least gcc-4.1")
>  $(eval $(check-y))
>  
> +# as-insn: Check whether assembler supports an instruction.
> +# Usage: cflags-y += $(call as-insn "insn",option-yes,option-no)
> +as-insn = $(if $(shell echo 'void _(void) { asm volatile ( $(2) ); }' \
> +                       | $(1) -c -x c -o /dev/null - 2>&1),$(4),$(3))
> +
> +# as-insn-check: Add an option to compilation flags, but only if insn is
> +#                supported by assembler.
> +# Usage: $(call as-insn-check CFLAGS,CC,"nop",-DHAVE_GAS_NOP)
> +as-insn-check = $(eval $(call as-insn-check-closure,$(1),$(2),$(3),$(4)))
> +define as-insn-check-closure
> +    ifeq ($$(call as-insn,$$($(2)),$(3),y,n),y)
> +        $(1) += $(4)
> +    endif
> +endef
> +
>  define buildmakevars2shellvars
>      export PREFIX="$(PREFIX)";                                            \
>      export XEN_SCRIPT_DIR="$(XEN_SCRIPT_DIR)";                            \
> --- a/xen/arch/x86/Rules.mk
> +++ b/xen/arch/x86/Rules.mk
> @@ -28,6 +28,8 @@ CFLAGS += -msoft-float
>  
>  $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
>  $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
> +$(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
> +$(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
>  
>  ifeq ($(supervisor_mode_kernel),y)
>  CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -1292,12 +1292,11 @@ void vmx_do_resume(struct vcpu *v)
>      reset_stack_and_jump(vmx_asm_do_vmentry);
>  }
>  
> -static unsigned long vmr(unsigned long field)
> +static inline unsigned long vmr(unsigned long field)
>  {
> -    int rc;
>      unsigned long val;
> -    val = __vmread_safe(field, &rc);
> -    return rc ? 0 : val;
> +
> +    return __vmread_safe(field, &val) ? val : 0;
>  }
>  
>  static void vmx_dump_sel(char *name, uint32_t selector)
> --- a/xen/arch/x86/hvm/vmx/vvmx.c
> +++ b/xen/arch/x86/hvm/vmx/vvmx.c
> @@ -951,13 +951,11 @@ fallback:
>          vvmcs_to_shadow(vvmcs, field[i]);
>  }
>  
> -static void shadow_to_vvmcs(void *vvmcs, unsigned int field)
> +static inline void shadow_to_vvmcs(void *vvmcs, unsigned int field)
>  {
> -    u64 value;
> -    int rc;
> +    unsigned long value;
>  
> -    value = __vmread_safe(field, &rc);
> -    if ( !rc )
> +    if ( __vmread_safe(field, &value) )
>          __set_vvmcs(vvmcs, field, value);
>  }
>  
> --- a/xen/include/asm-x86/hvm/vmx/vmx.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmx.h
> @@ -282,27 +282,43 @@ extern uint8_t posted_intr_vector;
>  
>  static inline void __vmptrld(u64 addr)
>  {
> -    asm volatile ( VMPTRLD_OPCODE
> -                   MODRM_EAX_06
> +    asm volatile (
> +#ifdef HAVE_GAS_VMX
> +                   "vmptrld %0\n"
> +#else
> +                   VMPTRLD_OPCODE MODRM_EAX_06
> +#endif
>                     /* CF==1 or ZF==1 --> crash (ud2) */
>                     UNLIKELY_START(be, vmptrld)
>                     "\tud2\n"
>                     UNLIKELY_END_SECTION
>                     :
> +#ifdef HAVE_GAS_VMX
> +                   : "m" (addr)
> +#else
>                     : "a" (&addr)
> +#endif
>                     : "memory");
>  }
>  
>  static inline void __vmpclear(u64 addr)
>  {
> -    asm volatile ( VMCLEAR_OPCODE
> -                   MODRM_EAX_06
> +    asm volatile (
> +#ifdef HAVE_GAS_VMX
> +                   "vmclear %0\n"
> +#else
> +                   VMCLEAR_OPCODE MODRM_EAX_06
> +#endif
>                     /* CF==1 or ZF==1 --> crash (ud2) */
>                     UNLIKELY_START(be, vmclear)
>                     "\tud2\n"
>                     UNLIKELY_END_SECTION
>                     :
> +#ifdef HAVE_GAS_VMX
> +                   : "m" (addr)
> +#else
>                     : "a" (&addr)
> +#endif
>                     : "memory");
>  }
>  
> @@ -325,33 +341,50 @@ static inline unsigned long __vmread(uns
>  
>  static inline void __vmwrite(unsigned long field, unsigned long value)
>  {
> -    asm volatile ( VMWRITE_OPCODE
> -                   MODRM_EAX_ECX
> +    asm volatile (
> +#ifdef HAVE_GAS_VMX
> +                   "vmwrite %1, %0\n"
> +#else
> +                   VMWRITE_OPCODE MODRM_EAX_ECX
> +#endif
>                     /* CF==1 or ZF==1 --> crash (ud2) */
>                     UNLIKELY_START(be, vmwrite)
>                     "\tud2\n"
>                     UNLIKELY_END_SECTION
>                     : 
> +#ifdef HAVE_GAS_VMX
> +                   : "r" (field) , "rm" (value)
> +#else
>                     : "a" (field) , "c" (value)
> +#endif
>                     : "memory");
>  }
>  
> -static inline unsigned long __vmread_safe(unsigned long field, int *error)
> +static inline bool_t __vmread_safe(unsigned long field, unsigned long *value)
>  {
> -    unsigned long ecx;
> +    bool_t okay;
>  
> -    asm volatile ( VMREAD_OPCODE
> -                   MODRM_EAX_ECX
> -                   /* CF==1 or ZF==1 --> rc = -1 */
> -                   "setna %b0 ; neg %0"
> -                   : "=q" (*error), "=c" (ecx)
> -                   : "0" (0), "a" (field)
> +    asm volatile (
> +#ifdef HAVE_GAS_VMX
> +                   "vmread %2, %1\n\t"
> +#else
> +                   VMREAD_OPCODE MODRM_EAX_ECX
> +#endif
> +                   /* CF==1 or ZF==1 --> rc = 0 */
> +                   "setnbe %0"
> +#ifdef HAVE_GAS_VMX
> +                   : "=qm" (okay), "=rm" (*value)
> +                   : "r" (field)
> +#else
> +                   : "=qm" (okay), "=c" (*value)
> +                   : "a" (field)
> +#endif
>                     : "memory");
>  
> -    return ecx;
> +    return okay;
>  }
>  
> -static inline void __invept(int type, u64 eptp, u64 gpa)
> +static inline void __invept(unsigned long type, u64 eptp, u64 gpa)
>  {
>      struct {
>          u64 eptp, gpa;
> @@ -365,18 +398,26 @@ static inline void __invept(int type, u6
>           !cpu_has_vmx_ept_invept_single_context )
>          type = INVEPT_ALL_CONTEXT;
>  
> -    asm volatile ( INVEPT_OPCODE
> -                   MODRM_EAX_08
> +    asm volatile (
> +#ifdef HAVE_GAS_EPT
> +                   "invept %0, %1\n"
> +#else
> +                   INVEPT_OPCODE MODRM_EAX_08
> +#endif
>                     /* CF==1 or ZF==1 --> crash (ud2) */
>                     UNLIKELY_START(be, invept)
>                     "\tud2\n"
>                     UNLIKELY_END_SECTION
>                     :
> +#ifdef HAVE_GAS_EPT
> +                   : "m" (operand), "r" (type)
> +#else
>                     : "a" (&operand), "c" (type)
> +#endif
>                     : "memory" );
>  }
>  
> -static inline void __invvpid(int type, u16 vpid, u64 gva)
> +static inline void __invvpid(unsigned long type, u16 vpid, u64 gva)
>  {
>      struct {
>          u64 vpid:16;
> @@ -385,7 +426,12 @@ static inline void __invvpid(int type, u
>      } __attribute__ ((packed)) operand = {vpid, 0, gva};
>  
>      /* Fix up #UD exceptions which occur when TLBs are flushed before VMXON. */
> -    asm volatile ( "1: " INVVPID_OPCODE MODRM_EAX_08
> +    asm volatile ( "1: "
> +#ifdef HAVE_GAS_EPT
> +                   "invvpid %0, %1\n"
> +#else
> +                   INVVPID_OPCODE MODRM_EAX_08
> +#endif
>                     /* CF==1 or ZF==1 --> crash (ud2) */
>                     UNLIKELY_START(be, invvpid)
>                     "\tud2\n"
> @@ -393,7 +439,11 @@ static inline void __invvpid(int type, u
>                     "2:"
>                     _ASM_EXTABLE(1b, 2b)
>                     :
> +#ifdef HAVE_GAS_EPT
> +                   : "m" (operand), "r" (type)
> +#else
>                     : "a" (&operand), "c" (type)
> +#endif
>                     : "memory" );
>  }
>  
>
>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 4/4] SVM: streamline entry.S code
  2013-08-23 14:04 ` [PATCH 4/4] SVM: streamline entry.S code Jan Beulich
@ 2013-08-26 16:20   ` Andrew Cooper
  2013-08-26 17:20     ` Keir Fraser
  2013-08-26 21:47   ` Andrew Cooper
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 60+ messages in thread
From: Andrew Cooper @ 2013-08-26 16:20 UTC (permalink / raw)
  To: Jan Beulich
  Cc: xen-devel, Boris Ostrovsky, Keir Fraser, Jacob Shin,
	suravee.suthikulpanit


[-- Attachment #1.1: Type: text/plain, Size: 6055 bytes --]

On 23/08/2013 15:04, Jan Beulich wrote:
> - move stuff easily/better done in C into C code
> - re-arrange code paths so that no redundant GET_CURRENT() would remain
>   on the fast paths
> - move long latency operations earlier
> - slightly defer disabling global interrupts on the VM entry path
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/hvm/svm/entry.S
> +++ b/xen/arch/x86/hvm/svm/entry.S
> @@ -32,28 +32,34 @@
>  #define CLGI   .byte 0x0F,0x01,0xDD
>  
>  ENTRY(svm_asm_do_resume)
> +        GET_CURRENT(%rbx)
> +.Lsvm_do_resume:
>          call svm_intr_assist
>          mov  %rsp,%rdi
>          call nsvm_vcpu_switch
>          ASSERT_NOT_IN_ATOMIC
>  
> -        GET_CURRENT(%rbx)
> -        CLGI
> -
>          mov  VCPU_processor(%rbx),%eax
> -        shl  $IRQSTAT_shift,%eax
>          lea  irq_stat+IRQSTAT_softirq_pending(%rip),%rdx
> -        cmpl $0,(%rdx,%rax,1)
> +        xor  %ecx,%ecx
> +        shl  $IRQSTAT_shift,%eax
> +        CLGI
> +        cmp  %ecx,(%rdx,%rax,1)
>          jne  .Lsvm_process_softirqs
>  
> -        testb $0, VCPU_nsvm_hap_enabled(%rbx)
> -UNLIKELY_START(nz, nsvm_hap)
> -        mov  VCPU_nhvm_p2m(%rbx),%rax
> -        test %rax,%rax
> +        cmp  %cl,VCPU_nsvm_hap_enabled(%rbx)
> +UNLIKELY_START(ne, nsvm_hap)
> +        cmp  %rcx,VCPU_nhvm_p2m(%rbx)
>          sete %al
> -        andb VCPU_nhvm_guestmode(%rbx),%al
> -        jnz  .Lsvm_nsvm_no_p2m
> -UNLIKELY_END(nsvm_hap)
> +        test VCPU_nhvm_guestmode(%rbx),%al
> +        UNLIKELY_DONE(z, nsvm_hap)
> +        /*
> +         * Someone shot down our nested p2m table; go round again
> +         * and nsvm_vcpu_switch() will fix it for us.
> +         */
> +        STGI
> +        jmp  .Lsvm_do_resume
> +__UNLIKELY_END(nsvm_hap)
>  
>          call svm_asid_handle_vmrun
>  
> @@ -72,13 +78,12 @@ UNLIKELY_END(svm_trace)
>          mov  UREGS_eflags(%rsp),%rax
>          mov  %rax,VMCB_rflags(%rcx)
>  
> -        mov  VCPU_svm_vmcb_pa(%rbx),%rax
> -
>          pop  %r15
>          pop  %r14
>          pop  %r13
>          pop  %r12
>          pop  %rbp
> +        mov  VCPU_svm_vmcb_pa(%rbx),%rax
>          pop  %rbx
>          pop  %r11
>          pop  %r10
> @@ -92,25 +97,26 @@ UNLIKELY_END(svm_trace)
>  
>          VMRUN
>  
> +        GET_CURRENT(%rax)
>          push %rdi
>          push %rsi
>          push %rdx
>          push %rcx
> +        mov  VCPU_svm_vmcb(%rax),%rcx
>          push %rax

Having read the manual several times, I am now more and more confused
about this.

My reading of the AMD programmer manual vol 3 indicates that %rax after
VMRUN completes will be the host %rax, i.e. VCPU_svm_vmcb_pa.

However, I cant find anywhere in the code which overwrites regs->rax
from vmcb->rax, which I would have thought would have thought would
cause utter devastation in combination with the generic functions
working with a cpu_user_regs structure.

The alternative is that %rax after VMRUN is actually the guest %rax, at
which point the pushes used to do the correct thing, but are now broken
by this patch clobbering it before being saved.

Can someone with more knowledge please confirm?  I really hope I have
overlooked something in the code.

~Andrew

>          push %r8
>          push %r9
>          push %r10
>          push %r11
>          push %rbx
> +        mov  %rax,%rbx
>          push %rbp
>          push %r12
>          push %r13
>          push %r14
>          push %r15
>  
> -        GET_CURRENT(%rbx)
>          movb $0,VCPU_svm_vmcb_in_sync(%rbx)
> -        mov  VCPU_svm_vmcb(%rbx),%rcx
>          mov  VMCB_rax(%rcx),%rax
>          mov  %rax,UREGS_rax(%rsp)
>          mov  VMCB_rip(%rcx),%rax
> @@ -120,33 +126,14 @@ UNLIKELY_END(svm_trace)
>          mov  VMCB_rflags(%rcx),%rax
>          mov  %rax,UREGS_eflags(%rsp)
>  
> -#ifndef NDEBUG
> -        mov  $0xbeef,%ax
> -        mov  %ax,UREGS_error_code(%rsp)
> -        mov  %ax,UREGS_entry_vector(%rsp)
> -        mov  %ax,UREGS_saved_upcall_mask(%rsp)
> -        mov  %ax,UREGS_cs(%rsp)
> -        mov  %ax,UREGS_ds(%rsp)
> -        mov  %ax,UREGS_es(%rsp)
> -        mov  %ax,UREGS_fs(%rsp)
> -        mov  %ax,UREGS_gs(%rsp)
> -        mov  %ax,UREGS_ss(%rsp)
> -#endif
> -
>          STGI
>  .globl svm_stgi_label
>  svm_stgi_label:
>          mov  %rsp,%rdi
>          call svm_vmexit_handler
> -        jmp  svm_asm_do_resume
> +        jmp  .Lsvm_do_resume
>  
>  .Lsvm_process_softirqs:
>          STGI
>          call do_softirq
> -        jmp  svm_asm_do_resume
> -
> -.Lsvm_nsvm_no_p2m:
> -        /* Someone shot down our nested p2m table; go round again
> -         * and nsvm_vcpu_switch() will fix it for us. */
> -        STGI
> -        jmp  svm_asm_do_resume
> +        jmp  .Lsvm_do_resume
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -2069,6 +2069,8 @@ void svm_vmexit_handler(struct cpu_user_
>      vintr_t intr;
>      bool_t vcpu_guestmode = 0;
>  
> +    hvm_invalidate_regs_fields(regs);
> +
>      if ( paging_mode_hap(v->domain) )
>          v->arch.hvm_vcpu.guest_cr[3] = v->arch.hvm_vcpu.hw_cr[3] =
>              vmcb_get_cr3(vmcb);
> --- a/xen/include/asm-x86/asm_defns.h
> +++ b/xen/include/asm-x86/asm_defns.h
> @@ -39,11 +39,17 @@ void ret_from_intr(void);
>          .subsection 1;            \
>          .Lunlikely.tag:
>  
> -#define UNLIKELY_END(tag)         \
> -        jmp .Llikely.tag;         \
> +#define UNLIKELY_DONE(cond, tag)  \
> +        j##cond .Llikely.tag
> +
> +#define __UNLIKELY_END(tag)       \
>          .subsection 0;            \
>          .Llikely.tag:
>  
> +#define UNLIKELY_END(tag)         \
> +        UNLIKELY_DONE(mp, tag);   \
> +        __UNLIKELY_END(tag)
> +
>  #define STACK_CPUINFO_FIELD(field) (STACK_SIZE-CPUINFO_sizeof+CPUINFO_##field)
>  #define GET_STACK_BASE(reg)                       \
>          movq $~(STACK_SIZE-1),reg;                \
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel


[-- Attachment #1.2: Type: text/html, Size: 6689 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 4/4] SVM: streamline entry.S code
  2013-08-26 16:20   ` Andrew Cooper
@ 2013-08-26 17:20     ` Keir Fraser
  2013-08-26 17:46       ` Andrew Cooper
  0 siblings, 1 reply; 60+ messages in thread
From: Keir Fraser @ 2013-08-26 17:20 UTC (permalink / raw)
  To: Andrew Cooper, Jan Beulich
  Cc: xen-devel, Boris Ostrovsky, Jacob Shin, suravee.suthikulpanit

On 26/08/2013 17:20, "Andrew Cooper" <andrew.cooper3@citrix.com> wrote:

> Having read the manual several times, I am now more and more confused about
> this.
>  
>  My reading of the AMD programmer manual vol 3 indicates that %rax after VMRUN
> completes will be the host %rax, i.e. VCPU_svm_vmcb_pa.

Yes.

>  However, I cant find anywhere in the code which overwrites regs->rax from
> vmcb->rax, which I would have thought would have thought would cause utter
> devastation in combination with the generic functions working with a
> cpu_user_regs structure.

It's right there in entry.S: 'mov VMCB_rax(%rcx),%rax; mov
%rax,UREGS_rax(%rsp)'

>  The alternative is that %rax after VMRUN is actually the guest %rax, at which
> point the pushes used to do the correct thing, but are now broken by this
> patch clobbering it before being saved.
>  
>  Can someone with more knowledge please confirm?  I really hope I have
> overlooked something in the code.

:)

 -- Keir

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 4/4] SVM: streamline entry.S code
  2013-08-26 17:20     ` Keir Fraser
@ 2013-08-26 17:46       ` Andrew Cooper
  0 siblings, 0 replies; 60+ messages in thread
From: Andrew Cooper @ 2013-08-26 17:46 UTC (permalink / raw)
  To: Keir Fraser
  Cc: xen-devel, Boris Ostrovsky, Jacob Shin, suravee.suthikulpanit,
	Jan Beulich

On 26/08/2013 18:20, Keir Fraser wrote:
>
>>  However, I cant find anywhere in the code which overwrites regs->rax from
>> vmcb->rax, which I would have thought would have thought would cause utter
>> devastation in combination with the generic functions working with a
>> cpu_user_regs structure.
> It's right there in entry.S: 'mov VMCB_rax(%rcx),%rax; mov
> %rax,UREGS_rax(%rsp)'

So it is.  I feel quite silly right about now.  I suspect that I have
been staring at this code a bit too long.

~Andrew

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 4/4] SVM: streamline entry.S code
  2013-08-23 14:04 ` [PATCH 4/4] SVM: streamline entry.S code Jan Beulich
  2013-08-26 16:20   ` Andrew Cooper
@ 2013-08-26 21:47   ` Andrew Cooper
  2013-08-27  7:38     ` Jan Beulich
  2013-08-29 11:56   ` Tim Deegan
  2013-09-04 14:39   ` Boris Ostrovsky
  3 siblings, 1 reply; 60+ messages in thread
From: Andrew Cooper @ 2013-08-26 21:47 UTC (permalink / raw)
  To: Jan Beulich
  Cc: xen-devel, Boris Ostrovsky, Keir Fraser, Jacob Shin,
	suravee.suthikulpanit

On 23/08/2013 15:04, Jan Beulich wrote:
> --- a/xen/arch/x86/hvm/svm/entry.S
> +++ b/xen/arch/x86/hvm/svm/entry.S
> @@ -32,28 +32,34 @@
>  #define CLGI   .byte 0x0F,0x01,0xDD
>  
>  ENTRY(svm_asm_do_resume)
> +        GET_CURRENT(%rbx)
> +.Lsvm_do_resume:
>          call svm_intr_assist
>          mov  %rsp,%rdi
>          call nsvm_vcpu_switch
>          ASSERT_NOT_IN_ATOMIC
>  
> -        GET_CURRENT(%rbx)
> -        CLGI
> -
>          mov  VCPU_processor(%rbx),%eax
> -        shl  $IRQSTAT_shift,%eax
>          lea  irq_stat+IRQSTAT_softirq_pending(%rip),%rdx
> -        cmpl $0,(%rdx,%rax,1)
> +        xor  %ecx,%ecx
> +        shl  $IRQSTAT_shift,%eax
> +        CLGI
> +        cmp  %ecx,(%rdx,%rax,1)
>          jne  .Lsvm_process_softirqs
>  
> -        testb $0, VCPU_nsvm_hap_enabled(%rbx)
> -UNLIKELY_START(nz, nsvm_hap)

Isn't this as much a bugfix as an optimisation?  test $0, <anything>
should unconditionally set the zero flag, making the UNLIKELY_START()
unreachable code.

For what it is worth, the new logic with cmp does appear to be correct.

> -        mov  VCPU_nhvm_p2m(%rbx),%rax
> -        test %rax,%rax
> +        cmp  %cl,VCPU_nsvm_hap_enabled(%rbx)
> +UNLIKELY_START(ne, nsvm_hap)
> +        cmp  %rcx,VCPU_nhvm_p2m(%rbx)
>          sete %al
> -        andb VCPU_nhvm_guestmode(%rbx),%al
> -        jnz  .Lsvm_nsvm_no_p2m
> -UNLIKELY_END(nsvm_hap)
> +        test VCPU_nhvm_guestmode(%rbx),%al
> +        UNLIKELY_DONE(z, nsvm_hap)
> +        /*
> +         * Someone shot down our nested p2m table; go round again
> +         * and nsvm_vcpu_switch() will fix it for us.
> +         */
> +        STGI
> +        jmp  .Lsvm_do_resume
> +__UNLIKELY_END(nsvm_hap)
>  
>          call svm_asid_handle_vmrun
>  

The next instruction after this hunk is "cmpb $0,tl_init_done(%rip)",
where %cl could be used, in the same style of changes as above.

> @@ -72,13 +78,12 @@ UNLIKELY_END(svm_trace)
>          mov  UREGS_eflags(%rsp),%rax
>          mov  %rax,VMCB_rflags(%rcx)

This mov could be moved down as well, which will break the data hazard
on %rax.  It might however want a comment with it, as that would
certainly make the "mov  UREGS_eflags(%rsp),%rax; mov 
%rax,VMCB_rflags(%rcx)" pair less obvious.

>  
> -        mov  VCPU_svm_vmcb_pa(%rbx),%rax
> -
>          pop  %r15
>          pop  %r14
>          pop  %r13
>          pop  %r12
>          pop  %rbp
> +        mov  VCPU_svm_vmcb_pa(%rbx),%rax
>          pop  %rbx
>          pop  %r11
>          pop  %r10
> @@ -92,25 +97,26 @@ UNLIKELY_END(svm_trace)
>  
>          VMRUN
>  
> +        GET_CURRENT(%rax)
>          push %rdi
>          push %rsi
>          push %rdx
>          push %rcx
> +        mov  VCPU_svm_vmcb(%rax),%rcx
>          push %rax

As regs->rax is overwritten just below, this could be replaced with "sub
$8,%rsp" which mirrors the the add before VMRUN, and would also help to
break up the queue of pushes in the pipeline.


All of those points were just nitpicking, and the changes are mainly
along a similar line to the VMX.  I still have the same concern about
jumping back to the top of the function, but I shall investigate that
and start a new thread.

Therefore,
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 4/4] SVM: streamline entry.S code
  2013-08-26 21:47   ` Andrew Cooper
@ 2013-08-27  7:38     ` Jan Beulich
  0 siblings, 0 replies; 60+ messages in thread
From: Jan Beulich @ 2013-08-27  7:38 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: xen-devel, Boris Ostrovsky, Keir Fraser, Jacob Shin,
	suravee.suthikulpanit

>>> On 26.08.13 at 23:47, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> On 23/08/2013 15:04, Jan Beulich wrote:
>> --- a/xen/arch/x86/hvm/svm/entry.S
>> +++ b/xen/arch/x86/hvm/svm/entry.S
>> @@ -32,28 +32,34 @@
>>  #define CLGI   .byte 0x0F,0x01,0xDD
>>  
>>  ENTRY(svm_asm_do_resume)
>> +        GET_CURRENT(%rbx)
>> +.Lsvm_do_resume:
>>          call svm_intr_assist
>>          mov  %rsp,%rdi
>>          call nsvm_vcpu_switch
>>          ASSERT_NOT_IN_ATOMIC
>>  
>> -        GET_CURRENT(%rbx)
>> -        CLGI
>> -
>>          mov  VCPU_processor(%rbx),%eax
>> -        shl  $IRQSTAT_shift,%eax
>>          lea  irq_stat+IRQSTAT_softirq_pending(%rip),%rdx
>> -        cmpl $0,(%rdx,%rax,1)
>> +        xor  %ecx,%ecx
>> +        shl  $IRQSTAT_shift,%eax
>> +        CLGI
>> +        cmp  %ecx,(%rdx,%rax,1)
>>          jne  .Lsvm_process_softirqs
>>  
>> -        testb $0, VCPU_nsvm_hap_enabled(%rbx)
>> -UNLIKELY_START(nz, nsvm_hap)
> 
> Isn't this as much a bugfix as an optimisation?  test $0, <anything>
> should unconditionally set the zero flag, making the UNLIKELY_START()
> unreachable code.

Indeed, I shall add a note about this to the description.

>>          call svm_asid_handle_vmrun
>>  
> 
> The next instruction after this hunk is "cmpb $0,tl_init_done(%rip)",
> where %cl could be used, in the same style of changes as above.

Yeah, but the call clobbers %rcx.

>> @@ -72,13 +78,12 @@ UNLIKELY_END(svm_trace)
>>          mov  UREGS_eflags(%rsp),%rax
>>          mov  %rax,VMCB_rflags(%rcx)
> 
> This mov could be moved down as well, which will break the data hazard
> on %rax.  It might however want a comment with it, as that would
> certainly make the "mov  UREGS_eflags(%rsp),%rax; mov 
> %rax,VMCB_rflags(%rcx)" pair less obvious.

That would be applicable to the whole series of moves here.
However, I don't think there's a dependency issue here on
modern CPUs (sadly neither Intel's nor AMD's optimization docs
go into enough detail about this), largely thanks to register
renaming.

>> @@ -92,25 +97,26 @@ UNLIKELY_END(svm_trace)
>>  
>>          VMRUN
>>  
>> +        GET_CURRENT(%rax)
>>          push %rdi
>>          push %rsi
>>          push %rdx
>>          push %rcx
>> +        mov  VCPU_svm_vmcb(%rax),%rcx
>>          push %rax
> 
> As regs->rax is overwritten just below, this could be replaced with "sub
> $8,%rsp" which mirrors the the add before VMRUN, and would also help to
> break up the queue of pushes in the pipeline.

But whether the CPU can better handle successive pushes or
pushes (producing and consuming %rsp) interleaved with an
ALU adjustment to %rsp is not clear.

> All of those points were just nitpicking, and the changes are mainly
> along a similar line to the VMX.  I still have the same concern about
> jumping back to the top of the function, but I shall investigate that
> and start a new thread.
> 
> Therefore,
> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

Thanks.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 1/4] VMX: streamline entry.S code
  2013-08-23 14:01 ` [PATCH 1/4] VMX: streamline entry.S code Jan Beulich
  2013-08-26 10:44   ` Andrew Cooper
@ 2013-08-29 11:01   ` Tim Deegan
  2013-08-29 12:35     ` Jan Beulich
  1 sibling, 1 reply; 60+ messages in thread
From: Tim Deegan @ 2013-08-29 11:01 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

At 15:01 +0100 on 23 Aug (1377270100), Jan Beulich wrote:
> +ENTRY(vmx_asm_vmexit_handler)
>          push %rdi
>          push %rsi
>          push %rdx
>          push %rcx
>          push %rax
> +        mov  %cr2,%rax
>          push %r8
>          push %r9
>          push %r10
>          push %r11
>          push %rbx
> +        GET_CURRENT(%rbx)

I see this avoids a potential stall below, but OTOH it introduces an
explicit %rsp access into a string of push ops, which is frowned on
(IIRC there's special fast-forwarding of %rsp in chains of pushes).
I'd be inclined to leave it alone unless there's a measurable
improvement, since the code's more readable as-is.

Either way,

Reviewed-by: Tim Deegan <tim@xen.org>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v2 2/4] VMX: move various uses of UD2 out of fast paths
  2013-08-26  8:58     ` [PATCH v2 " Jan Beulich
  2013-08-26  9:09       ` Andrew Cooper
@ 2013-08-29 11:08       ` Tim Deegan
  1 sibling, 0 replies; 60+ messages in thread
From: Tim Deegan @ 2013-08-29 11:08 UTC (permalink / raw)
  To: Jan Beulich
  Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima, Andrew Cooper

At 09:58 +0100 on 26 Aug (1377511092), Jan Beulich wrote:
> ... at once making conditional forward jumps, which are statically
> predicted to be not taken, only used for the unlikely (error) cases.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v2: Fix #UD recovery for INVVPID (unintended flow change spotted by Andrew Cooper).
> 

Reviewed-by: Tim Deegan <tim@xen.org>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v5 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-26 15:31                 ` [PATCH v5 " Jan Beulich
  2013-08-26 15:36                   ` Andrew Cooper
@ 2013-08-29 11:47                   ` Tim Deegan
  2013-08-29 12:30                     ` Jan Beulich
  2013-08-29 12:45                     ` Jan Beulich
  1 sibling, 2 replies; 60+ messages in thread
From: Tim Deegan @ 2013-08-29 11:47 UTC (permalink / raw)
  To: Jan Beulich
  Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima, Andrew Cooper

At 16:31 +0100 on 26 Aug (1377534696), Jan Beulich wrote:
> With the hex byte emission we were taking away a good part of
> flexibility from the compiler, as for simplicity reasons these were
> built using fixed operands. All half way modern build environments
> would allow using the mnemonics (but we can't disable the hex variants
> yet, since the binutils around at the time gcc 4.1 got released didn't
> support these yet).
> 
> I didn't convert __vmread() yet because that would, just like for
> __vmread_safe(), imply converting to a macro so that the output operand
> can be the caller supplied variable rather than an intermediate one. As
> that would require touching all invocation points of __vmread() (of
> which there are quite a few), I'd first like to be certain the approach
> is acceptable; the main question being whether the now conditional code
> might be considered to cause future maintenance issues, and the second
> being that of parameter/argument ordering (here I made __vmread_safe()
> match __vmwrite(), but one could also take the position that read and
> write should use the inverse order of one another, in line with the
> actual instruction operands).
> 
> Additionally I was quite puzzled to find that all the asm()-s involved
> here have memory clobbers - what are they needed for? Or can they be
> dropped at least in some cases?

The vmread/write ones are, I think, red herrings.  We're not allowed to
make assumptions about the memory state of a loaded VMCS anyway.

invept I think does, to make sure all EPT changes have been written.
invvpid too, for similar reasons.

vmptrld/clear I'm not sure about: if we were to (say) copy a VMCS or
move it we'd need that barrier.  (AFAIK we don't do that but we might be
very surprised if we started).

> -static inline unsigned long __vmread_safe(unsigned long field, int *error)
> +static inline bool_t __vmread_safe(unsigned long field, unsigned long *value)
>  {
> -    unsigned long ecx;
> +    bool_t okay;
>  
> -    asm volatile ( VMREAD_OPCODE
> -                   MODRM_EAX_ECX
> -                   /* CF==1 or ZF==1 --> rc = -1 */
> -                   "setna %b0 ; neg %0"
> -                   : "=q" (*error), "=c" (ecx)
> -                   : "0" (0), "a" (field)
> +    asm volatile (
> +#ifdef HAVE_GAS_VMX
> +                   "vmread %2, %1\n\t"
> +#else
> +                   VMREAD_OPCODE MODRM_EAX_ECX
> +#endif
> +                   /* CF==1 or ZF==1 --> rc = 0 */
> +                   "setnbe %0"

This inversion of the (undocumented) return value could be a nasty
surprise for anyone backporting code that uses __vmread_safe().  Can you
please leave it as it was?

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 4/4] SVM: streamline entry.S code
  2013-08-23 14:04 ` [PATCH 4/4] SVM: streamline entry.S code Jan Beulich
  2013-08-26 16:20   ` Andrew Cooper
  2013-08-26 21:47   ` Andrew Cooper
@ 2013-08-29 11:56   ` Tim Deegan
  2013-09-04 14:39   ` Boris Ostrovsky
  3 siblings, 0 replies; 60+ messages in thread
From: Tim Deegan @ 2013-08-29 11:56 UTC (permalink / raw)
  To: Jan Beulich
  Cc: xen-devel, Boris Ostrovsky, Keir Fraser, Jacob Shin,
	suravee.suthikulpanit

At 15:04 +0100 on 23 Aug (1377270250), Jan Beulich wrote:
> - move stuff easily/better done in C into C code
> - re-arrange code paths so that no redundant GET_CURRENT() would remain
>   on the fast paths
> - move long latency operations earlier
> - slightly defer disabling global interrupts on the VM entry path
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Tim Deegan <tim@xen.org>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v5 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-29 11:47                   ` Tim Deegan
@ 2013-08-29 12:30                     ` Jan Beulich
  2013-08-29 13:11                       ` Tim Deegan
  2013-08-29 12:45                     ` Jan Beulich
  1 sibling, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2013-08-29 12:30 UTC (permalink / raw)
  To: Tim Deegan
  Cc: Andrew Cooper, Keir Fraser, Eddie Dong, Jun Nakajima, xen-devel

>>> On 29.08.13 at 13:47, Tim Deegan <tim@xen.org> wrote:
> At 16:31 +0100 on 26 Aug (1377534696), Jan Beulich wrote:
>> -static inline unsigned long __vmread_safe(unsigned long field, int *error)
>> +static inline bool_t __vmread_safe(unsigned long field, unsigned long *value)
>>  {
>> -    unsigned long ecx;
>> +    bool_t okay;
>>  
>> -    asm volatile ( VMREAD_OPCODE
>> -                   MODRM_EAX_ECX
>> -                   /* CF==1 or ZF==1 --> rc = -1 */
>> -                   "setna %b0 ; neg %0"
>> -                   : "=q" (*error), "=c" (ecx)
>> -                   : "0" (0), "a" (field)
>> +    asm volatile (
>> +#ifdef HAVE_GAS_VMX
>> +                   "vmread %2, %1\n\t"
>> +#else
>> +                   VMREAD_OPCODE MODRM_EAX_ECX
>> +#endif
>> +                   /* CF==1 or ZF==1 --> rc = 0 */
>> +                   "setnbe %0"
> 
> This inversion of the (undocumented) return value could be a nasty
> surprise for anyone backporting code that uses __vmread_safe().  Can you
> please leave it as it was?

The prior return value was the value read; we can't make this the
return value if at the same time we want to be able to utilize the
instruction's capability to read into a memory location (and not
just a register). And hence the short answer to your question is:
No.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 1/4] VMX: streamline entry.S code
  2013-08-29 11:01   ` Tim Deegan
@ 2013-08-29 12:35     ` Jan Beulich
  0 siblings, 0 replies; 60+ messages in thread
From: Jan Beulich @ 2013-08-29 12:35 UTC (permalink / raw)
  To: Tim Deegan; +Cc: xen-devel, Keir Fraser, Eddie Dong, Jun Nakajima

>>> On 29.08.13 at 13:01, Tim Deegan <tim@xen.org> wrote:
> At 15:01 +0100 on 23 Aug (1377270100), Jan Beulich wrote:
>> +ENTRY(vmx_asm_vmexit_handler)
>>          push %rdi
>>          push %rsi
>>          push %rdx
>>          push %rcx
>>          push %rax
>> +        mov  %cr2,%rax
>>          push %r8
>>          push %r9
>>          push %r10
>>          push %r11
>>          push %rbx
>> +        GET_CURRENT(%rbx)
> 
> I see this avoids a potential stall below, but OTOH it introduces an
> explicit %rsp access into a string of push ops, which is frowned on
> (IIRC there's special fast-forwarding of %rsp in chains of pushes).
> I'd be inclined to leave it alone unless there's a measurable
> improvement, since the code's more readable as-is.

I guess I'll leave it this way, aiming at replacing the pushes by moves
(as we did elsewhere in the 4.3 cycle) at some point.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v5 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-29 11:47                   ` Tim Deegan
  2013-08-29 12:30                     ` Jan Beulich
@ 2013-08-29 12:45                     ` Jan Beulich
  2013-08-29 13:19                       ` Tim Deegan
  1 sibling, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2013-08-29 12:45 UTC (permalink / raw)
  To: Tim Deegan
  Cc: Andrew Cooper, Keir Fraser, Eddie Dong, Jun Nakajima, xen-devel

>>> On 29.08.13 at 13:47, Tim Deegan <tim@xen.org> wrote:
> At 16:31 +0100 on 26 Aug (1377534696), Jan Beulich wrote:
>> Additionally I was quite puzzled to find that all the asm()-s involved
>> here have memory clobbers - what are they needed for? Or can they be
>> dropped at least in some cases?
> 
> The vmread/write ones are, I think, red herrings.  We're not allowed to
> make assumptions about the memory state of a loaded VMCS anyway.
> 
> invept I think does, to make sure all EPT changes have been written.
> invvpid too, for similar reasons.

Considering that these instructions aren't documented to be
ordered in any way with earlier ones (at least the instruction
pages don't say anything to that effect), wouldn't we rather
need an MFENCE to enforce this?

> vmptrld/clear I'm not sure about: if we were to (say) copy a VMCS or
> move it we'd need that barrier.  (AFAIK we don't do that but we might be
> very surprised if we started).

Similarly here.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v5 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-29 12:30                     ` Jan Beulich
@ 2013-08-29 13:11                       ` Tim Deegan
  2013-08-29 13:27                         ` Jan Beulich
  0 siblings, 1 reply; 60+ messages in thread
From: Tim Deegan @ 2013-08-29 13:11 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Andrew Cooper, Keir Fraser, Eddie Dong, Jun Nakajima, xen-devel

At 13:30 +0100 on 29 Aug (1377783058), Jan Beulich wrote:
> >>> On 29.08.13 at 13:47, Tim Deegan <tim@xen.org> wrote:
> > At 16:31 +0100 on 26 Aug (1377534696), Jan Beulich wrote:
> >> -static inline unsigned long __vmread_safe(unsigned long field, int *error)
> >> +static inline bool_t __vmread_safe(unsigned long field, unsigned long *value)
> >>  {
> >> -    unsigned long ecx;
> >> +    bool_t okay;
> >>  
> >> -    asm volatile ( VMREAD_OPCODE
> >> -                   MODRM_EAX_ECX
> >> -                   /* CF==1 or ZF==1 --> rc = -1 */
> >> -                   "setna %b0 ; neg %0"
> >> -                   : "=q" (*error), "=c" (ecx)
> >> -                   : "0" (0), "a" (field)
> >> +    asm volatile (
> >> +#ifdef HAVE_GAS_VMX
> >> +                   "vmread %2, %1\n\t"
> >> +#else
> >> +                   VMREAD_OPCODE MODRM_EAX_ECX
> >> +#endif
> >> +                   /* CF==1 or ZF==1 --> rc = 0 */
> >> +                   "setnbe %0"
> > 
> > This inversion of the (undocumented) return value could be a nasty
> > surprise for anyone backporting code that uses __vmread_safe().  Can you
> > please leave it as it was?
> 
> The prior return value was the value read

Sorry, I had somehow missed this.  That's enough to cause compile issues
so I guess it'll be obvious that the function has changed.  Would be
nice if the new verions had a comment to say when it returns 0 and when
1.  Either way, Reviewed-by: Tim Deegan <tim@xen.org>

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v5 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-29 12:45                     ` Jan Beulich
@ 2013-08-29 13:19                       ` Tim Deegan
  0 siblings, 0 replies; 60+ messages in thread
From: Tim Deegan @ 2013-08-29 13:19 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Andrew Cooper, Keir Fraser, Eddie Dong, Jun Nakajima, xen-devel

At 13:45 +0100 on 29 Aug (1377783906), Jan Beulich wrote:
> >>> On 29.08.13 at 13:47, Tim Deegan <tim@xen.org> wrote:
> > At 16:31 +0100 on 26 Aug (1377534696), Jan Beulich wrote:
> >> Additionally I was quite puzzled to find that all the asm()-s involved
> >> here have memory clobbers - what are they needed for? Or can they be
> >> dropped at least in some cases?
> > 
> > The vmread/write ones are, I think, red herrings.  We're not allowed to
> > make assumptions about the memory state of a loaded VMCS anyway.
> > 
> > invept I think does, to make sure all EPT changes have been written.
> > invvpid too, for similar reasons.
> 
> Considering that these instructions aren't documented to be
> ordered in any way with earlier ones (at least the instruction
> pages don't say anything to that effect), wouldn't we rather
> need an MFENCE to enforce this?

invept and invvpid are both serializing (in the list in vol 3A, 8-16). 

> > vmptrld/clear I'm not sure about: if we were to (say) copy a VMCS or
> > move it we'd need that barrier.  (AFAIK we don't do that but we might be
> > very surprised if we started).
> 
> Similarly here.

I think the x86 memory model is strong enough here: as long as the
compiler issues the instructions in the right order they'll be seen in
the right order by all CPUs.

Tim.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v5 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-29 13:11                       ` Tim Deegan
@ 2013-08-29 13:27                         ` Jan Beulich
  2013-08-29 14:02                           ` Tim Deegan
  0 siblings, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2013-08-29 13:27 UTC (permalink / raw)
  To: Tim Deegan
  Cc: Andrew Cooper, Keir Fraser, Eddie Dong, Jun Nakajima, xen-devel

>>> On 29.08.13 at 15:11, Tim Deegan <tim@xen.org> wrote:
> At 13:30 +0100 on 29 Aug (1377783058), Jan Beulich wrote:
>> >>> On 29.08.13 at 13:47, Tim Deegan <tim@xen.org> wrote:
>> > At 16:31 +0100 on 26 Aug (1377534696), Jan Beulich wrote:
>> >> -static inline unsigned long __vmread_safe(unsigned long field, int *error)
>> >> +static inline bool_t __vmread_safe(unsigned long field, unsigned long 
> *value)
>> >>  {
>> >> -    unsigned long ecx;
>> >> +    bool_t okay;
>> >>  
>> >> -    asm volatile ( VMREAD_OPCODE
>> >> -                   MODRM_EAX_ECX
>> >> -                   /* CF==1 or ZF==1 --> rc = -1 */
>> >> -                   "setna %b0 ; neg %0"
>> >> -                   : "=q" (*error), "=c" (ecx)
>> >> -                   : "0" (0), "a" (field)
>> >> +    asm volatile (
>> >> +#ifdef HAVE_GAS_VMX
>> >> +                   "vmread %2, %1\n\t"
>> >> +#else
>> >> +                   VMREAD_OPCODE MODRM_EAX_ECX
>> >> +#endif
>> >> +                   /* CF==1 or ZF==1 --> rc = 0 */
>> >> +                   "setnbe %0"
>> > 
>> > This inversion of the (undocumented) return value could be a nasty
>> > surprise for anyone backporting code that uses __vmread_safe().  Can you
>> > please leave it as it was?
>> 
>> The prior return value was the value read
> 
> Sorry, I had somehow missed this.  That's enough to cause compile issues
> so I guess it'll be obvious that the function has changed.  Would be
> nice if the new verions had a comment to say when it returns 0 and when
> 1.

If I had named the used variable e.g. "ret" or "rc", I could see the
need for a comment. It being named "okay" I rather think a
comment would redundant. But if you're strongly of different
opinion, I can certainly add such a comment.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH v5 3/4] VMX: use proper instruction mnemonics if assembler supports them
  2013-08-29 13:27                         ` Jan Beulich
@ 2013-08-29 14:02                           ` Tim Deegan
  0 siblings, 0 replies; 60+ messages in thread
From: Tim Deegan @ 2013-08-29 14:02 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Andrew Cooper, Keir Fraser, Eddie Dong, Jun Nakajima, xen-devel

At 14:27 +0100 on 29 Aug (1377786433), Jan Beulich wrote:
> >>> On 29.08.13 at 15:11, Tim Deegan <tim@xen.org> wrote:
> > At 13:30 +0100 on 29 Aug (1377783058), Jan Beulich wrote:
> >> >>> On 29.08.13 at 13:47, Tim Deegan <tim@xen.org> wrote:
> >> > At 16:31 +0100 on 26 Aug (1377534696), Jan Beulich wrote:
> >> >> -static inline unsigned long __vmread_safe(unsigned long field, int *error)
> >> >> +static inline bool_t __vmread_safe(unsigned long field, unsigned long 
> > *value)
> >> >>  {
> >> >> -    unsigned long ecx;
> >> >> +    bool_t okay;
> >> >>  
> >> >> -    asm volatile ( VMREAD_OPCODE
> >> >> -                   MODRM_EAX_ECX
> >> >> -                   /* CF==1 or ZF==1 --> rc = -1 */
> >> >> -                   "setna %b0 ; neg %0"
> >> >> -                   : "=q" (*error), "=c" (ecx)
> >> >> -                   : "0" (0), "a" (field)
> >> >> +    asm volatile (
> >> >> +#ifdef HAVE_GAS_VMX
> >> >> +                   "vmread %2, %1\n\t"
> >> >> +#else
> >> >> +                   VMREAD_OPCODE MODRM_EAX_ECX
> >> >> +#endif
> >> >> +                   /* CF==1 or ZF==1 --> rc = 0 */
> >> >> +                   "setnbe %0"
> >> > 
> >> > This inversion of the (undocumented) return value could be a nasty
> >> > surprise for anyone backporting code that uses __vmread_safe().  Can you
> >> > please leave it as it was?
> >> 
> >> The prior return value was the value read
> > 
> > Sorry, I had somehow missed this.  That's enough to cause compile issues
> > so I guess it'll be obvious that the function has changed.  Would be
> > nice if the new verions had a comment to say when it returns 0 and when
> > 1.
> 
> If I had named the used variable e.g. "ret" or "rc", I could see the
> need for a comment. It being named "okay" I rather think a
> comment would redundant. But if you're strongly of different
> opinion, I can certainly add such a comment.

No, leave it if you prefer.  It occurs to me that the useful case would
be for someone going the other way (with a patch against xen-unstable
and porting to an older tree) which would require a comment in the
version being removed. :)

Tim.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Ping: [PATCH 0/4] HVM: produce better binary code
  2013-08-23 13:58 [PATCH 0/4] HVM: produce better binary code Jan Beulich
                   ` (3 preceding siblings ...)
  2013-08-23 14:04 ` [PATCH 4/4] SVM: streamline entry.S code Jan Beulich
@ 2013-09-04 10:06 ` Jan Beulich
  2013-09-04 16:16   ` Andrew Cooper
  4 siblings, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2013-09-04 10:06 UTC (permalink / raw)
  To: Jacob Shin, suravee.suthikulpanit, Eddie Dong, Jun Nakajima,
	Boris Ostrovsky
  Cc: Andrew Cooper, Tim Deegan, xen-devel

>>> On 23.08.13 at 15:58, "Jan Beulich" <JBeulich@suse.com> wrote:

While I got two reviews meanwhile for this series (thanks Andrew
and Tim!), ack-s from the maintainers are still missing:

> 1: VMX: streamline entry.S code
> 2: VMX: move various uses of UD2 out of fast paths
> 3: VMX: use proper instruction mnemonics if assembler supports them

Jun, Eddie? (Yes, there had been a couple of revisions to patches
2 and 3, but even their sending was now more than a week ago.)

> 4: SVM: streamline entry.S code

Suravee, Boris, Jacob?

I'm going to wait a for perhaps another day or two, and will assume
silent agreement if I don't hear otherwise. I'll similarly assume silent
agreement to the discussed follow-up patches (dropping memory
barriers in a few places as well as converting __vmread() along the
lines of __vmread_safe()) once done, submitted, and reviewed.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 4/4] SVM: streamline entry.S code
  2013-08-23 14:04 ` [PATCH 4/4] SVM: streamline entry.S code Jan Beulich
                     ` (2 preceding siblings ...)
  2013-08-29 11:56   ` Tim Deegan
@ 2013-09-04 14:39   ` Boris Ostrovsky
  2013-09-04 14:50     ` Jan Beulich
  3 siblings, 1 reply; 60+ messages in thread
From: Boris Ostrovsky @ 2013-09-04 14:39 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser, Jacob Shin, suravee.suthikulpanit

On 08/23/2013 10:04 AM, Jan Beulich wrote:
>           mov  UREGS_eflags(%rsp),%rax
>           mov  %rax,VMCB_rflags(%rcx)
>   
> -        mov  VCPU_svm_vmcb_pa(%rbx),%rax
> -
>           pop  %r15
>           pop  %r14
>           pop  %r13
>           pop  %r12
>           pop  %rbp
> +        mov  VCPU_svm_vmcb_pa(%rbx),%rax
>           pop  %rbx
>           pop  %r11
>           pop  %r10
> @@ -92,25 +97,26 @@ UNLIKELY_END(svm_trace)
>   
>           VMRUN
>   
> +        GET_CURRENT(%rax)
>           push %rdi
>           push %rsi
>           push %rdx
>           push %rcx
> +        mov  VCPU_svm_vmcb(%rax),%rcx
>           push %rax
>           push %r8
>           push %r9
>           push %r10
>           push %r11
>           push %rbx
> +        mov  %rax,%rbx

Can you continue with using %rax as 'current' pointer below and not save it
to %rbx? %rax appears to be a temp register so perhaps you can use, say,
%rdi, for that purpose.

-boris

>           push %rbp
>           push %r12
>           push %r13
>           push %r14
>           push %r15
>   
> -        GET_CURRENT(%rbx)
>           movb $0,VCPU_svm_vmcb_in_sync(%rbx)
> -        mov  VCPU_svm_vmcb(%rbx),%rcx
>           mov  VMCB_rax(%rcx),%rax
>           mov  %rax,UREGS_rax(%rsp)
>           mov  VMCB_rip(%rcx),%rax
> @@ -120,33 +126,14 @@ UNLIKELY_END(svm_trace)
>           mov  VMCB_rflags(%rcx),%rax
>           mov  %rax,UREGS_eflags(%rsp)
>   
> -#ifndef NDEBUG
> -        mov  $0xbeef,%ax
> -        mov  %ax,UREGS_error_code(%rsp)
> -        mov  %ax,UREGS_entry_vector(%rsp)
> -        mov  %ax,UREGS_saved_upcall_mask(%rsp)
> -        mov  %ax,UREGS_cs(%rsp)
> -        mov  %ax,UREGS_ds(%rsp)
> -        mov  %ax,UREGS_es(%rsp)
> -        mov  %ax,UREGS_fs(%rsp)
> -        mov  %ax,UREGS_gs(%rsp)
> -        mov  %ax,UREGS_ss(%rsp)
> -#endif
> -
>           STGI
>   .globl svm_stgi_label
>   svm_stgi_label:
>           mov  %rsp,%rdi
>           call svm_vmexit_handler
> -        jmp  svm_asm_do_resume
> +        jmp  .Lsvm_do_resume
>   

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 4/4] SVM: streamline entry.S code
  2013-09-04 14:39   ` Boris Ostrovsky
@ 2013-09-04 14:50     ` Jan Beulich
  2013-09-04 15:09       ` Boris Ostrovsky
  0 siblings, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2013-09-04 14:50 UTC (permalink / raw)
  To: Boris Ostrovsky; +Cc: xen-devel, Keir Fraser, Jacob Shin, suravee.suthikulpanit

>>> On 04.09.13 at 16:39, Boris Ostrovsky <boris.ostrovsky@oracle.com> wrote:
> On 08/23/2013 10:04 AM, Jan Beulich wrote:
>> @@ -92,25 +97,26 @@ UNLIKELY_END(svm_trace)
>>   
>>           VMRUN
>>   
>> +        GET_CURRENT(%rax)
>>           push %rdi
>>           push %rsi
>>           push %rdx
>>           push %rcx
>> +        mov  VCPU_svm_vmcb(%rax),%rcx
>>           push %rax
>>           push %r8
>>           push %r9
>>           push %r10
>>           push %r11
>>           push %rbx
>> +        mov  %rax,%rbx
> 
> Can you continue with using %rax as 'current' pointer below and not save it
> to %rbx? %rax appears to be a temp register so perhaps you can use, say,
> %rdi, for that purpose.

I'm sorry, but I don't understand what you're asking for.

We actually _want_ "current" to be in a register thats callee-saved,
such that we don't need to reload it after function calls. One goal of
the patch in fact is to eliminate such redundant reloads. But as I'm
not sure what you want, this explanation may be entirely off.

And as a side note: At this point I'm not really expecting requests
for further changes, unless you spot a mistake in the patch. It was
around for review for long enough, and as said it had two reviews
already. And further optimization should go in a separate,
incremental patch.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 4/4] SVM: streamline entry.S code
  2013-09-04 14:50     ` Jan Beulich
@ 2013-09-04 15:09       ` Boris Ostrovsky
  2013-09-04 15:20         ` Jan Beulich
  0 siblings, 1 reply; 60+ messages in thread
From: Boris Ostrovsky @ 2013-09-04 15:09 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser, Jacob Shin, suravee.suthikulpanit

On 09/04/2013 10:50 AM, Jan Beulich wrote:
>>>> On 04.09.13 at 16:39, Boris Ostrovsky <boris.ostrovsky@oracle.com> wrote:
>> On 08/23/2013 10:04 AM, Jan Beulich wrote:
>>> @@ -92,25 +97,26 @@ UNLIKELY_END(svm_trace)
>>>    
>>>            VMRUN
>>>    
>>> +        GET_CURRENT(%rax)
>>>            push %rdi
>>>            push %rsi
>>>            push %rdx
>>>            push %rcx
>>> +        mov  VCPU_svm_vmcb(%rax),%rcx
>>>            push %rax
>>>            push %r8
>>>            push %r9
>>>            push %r10
>>>            push %r11
>>>            push %rbx
>>> +        mov  %rax,%rbx
>> Can you continue with using %rax as 'current' pointer below and not save it
>> to %rbx? %rax appears to be a temp register so perhaps you can use, say,
>> %rdi, for that purpose.
> I'm sorry, but I don't understand what you're asking for.
>
> We actually _want_ "current" to be in a register thats callee-saved,
> such that we don't need to reload it after function calls. One goal of
> the patch in fact is to eliminate such redundant reloads. But as I'm
> not sure what you want, this explanation may be entirely off.


I meant something like this:

diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S
index 1969629..b362637 100644
--- a/xen/arch/x86/hvm/svm/entry.S
+++ b/xen/arch/x86/hvm/svm/entry.S
@@ -92,10 +92,12 @@ UNLIKELY_END(svm_trace)

          VMRUN

+       GET_CURRENT(%rax)
          push %rdi
          push %rsi
          push %rdx
          push %rcx
+       mov  VCPU_svm_vmcb(%rax),%rcx
          push %rax
          push %r8
          push %r9
@@ -108,17 +110,15 @@ UNLIKELY_END(svm_trace)
          push %r14
          push %r15

-        GET_CURRENT(%rbx)
-        movb $0,VCPU_svm_vmcb_in_sync(%rbx)
-        mov  VCPU_svm_vmcb(%rbx),%rcx
-        mov  VMCB_rax(%rcx),%rax
-        mov  %rax,UREGS_rax(%rsp)
-        mov  VMCB_rip(%rcx),%rax
-        mov  %rax,UREGS_rip(%rsp)
-        mov  VMCB_rsp(%rcx),%rax
-        mov  %rax,UREGS_rsp(%rsp)
-        mov  VMCB_rflags(%rcx),%rax
-        mov  %rax,UREGS_eflags(%rsp)
+        movb $0,VCPU_svm_vmcb_in_sync(%rax)
+        mov  VMCB_rax(%rcx),%rdi
+        mov  %rdi,UREGS_rax(%rsp)
+        mov  VMCB_rip(%rcx),%rdi
+        mov  %rdi,UREGS_rip(%rsp)
+        mov  VMCB_rsp(%rcx),%rdi
+        mov  %rdi,UREGS_rsp(%rsp)
+        mov  VMCB_rflags(%rcx),%rdi
+        mov  %rdi,UREGS_eflags(%rsp)

  #ifndef NDEBUG
          mov  $0xbeef,%ax
ostr@workbase>



%rax is clobbered anyway by ' mov  VMCB_rax(%rcx),%rax'


>
> And as a side note: At this point I'm not really expecting requests
> for further changes, unless you spot a mistake in the patch. It was
> around for review for long enough, and as said it had two reviews
> already. And further optimization should go in a separate,
> incremental patch.
>


Right, this was more of an observation, not a mistake in your patch. 
Feel free
to ignore it (assuming it's even correct).


-boris

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* Re: [PATCH 4/4] SVM: streamline entry.S code
  2013-09-04 15:09       ` Boris Ostrovsky
@ 2013-09-04 15:20         ` Jan Beulich
  2013-09-04 16:42           ` Boris Ostrovsky
  0 siblings, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2013-09-04 15:20 UTC (permalink / raw)
  To: Boris Ostrovsky; +Cc: xen-devel, Keir Fraser, Jacob Shin, suravee.suthikulpanit

>>> On 04.09.13 at 17:09, Boris Ostrovsky <boris.ostrovsky@oracle.com> wrote:
> I meant something like this:
> 
> diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S
> index 1969629..b362637 100644
> --- a/xen/arch/x86/hvm/svm/entry.S
> +++ b/xen/arch/x86/hvm/svm/entry.S
> @@ -92,10 +92,12 @@ UNLIKELY_END(svm_trace)
> 
>           VMRUN
> 
> +       GET_CURRENT(%rax)
>           push %rdi
>           push %rsi
>           push %rdx
>           push %rcx
> +       mov  VCPU_svm_vmcb(%rax),%rcx
>           push %rax
>           push %r8
>           push %r9
> @@ -108,17 +110,15 @@ UNLIKELY_END(svm_trace)
>           push %r14
>           push %r15
> 
> -        GET_CURRENT(%rbx)
> -        movb $0,VCPU_svm_vmcb_in_sync(%rbx)
> -        mov  VCPU_svm_vmcb(%rbx),%rcx
> -        mov  VMCB_rax(%rcx),%rax
> -        mov  %rax,UREGS_rax(%rsp)
> -        mov  VMCB_rip(%rcx),%rax
> -        mov  %rax,UREGS_rip(%rsp)
> -        mov  VMCB_rsp(%rcx),%rax
> -        mov  %rax,UREGS_rsp(%rsp)
> -        mov  VMCB_rflags(%rcx),%rax
> -        mov  %rax,UREGS_eflags(%rsp)
> +        movb $0,VCPU_svm_vmcb_in_sync(%rax)
> +        mov  VMCB_rax(%rcx),%rdi
> +        mov  %rdi,UREGS_rax(%rsp)
> +        mov  VMCB_rip(%rcx),%rdi
> +        mov  %rdi,UREGS_rip(%rsp)
> +        mov  VMCB_rsp(%rcx),%rdi
> +        mov  %rdi,UREGS_rsp(%rsp)
> +        mov  VMCB_rflags(%rcx),%rdi
> +        mov  %rdi,UREGS_eflags(%rsp)
> 
>   #ifndef NDEBUG
>           mov  $0xbeef,%ax
> ostr@workbase>
> 
> 
> 
> %rax is clobbered anyway by ' mov  VMCB_rax(%rcx),%rax'

But the "current" pointer also isn't needed anymore after the clearing
of VCPU_svm_vmcb_in_sync and before doing the first function call
(it's needed after the function call, but for that it's can't be in %rax).

So yes, that clearing of VCPU_svm_vmcb_in_sync could certainly be
done using %rax instead of %rbx, but no other code change would
seem necessary/desirable. If you agree, I'd be willing to do this one
line adjustment.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: Ping: [PATCH 0/4] HVM: produce better binary code
  2013-09-04 10:06 ` Ping: [PATCH 0/4] HVM: produce better binary code Jan Beulich
@ 2013-09-04 16:16   ` Andrew Cooper
  2013-09-04 16:30     ` Tim Deegan
  0 siblings, 1 reply; 60+ messages in thread
From: Andrew Cooper @ 2013-09-04 16:16 UTC (permalink / raw)
  To: Jan Beulich
  Cc: suravee.suthikulpanit, Jacob Shin, Eddie Dong, Tim Deegan,
	Jun Nakajima, xen-devel, Boris Ostrovsky

On 04/09/13 11:06, Jan Beulich wrote:
>>>> On 23.08.13 at 15:58, "Jan Beulich" <JBeulich@suse.com> wrote:
> While I got two reviews meanwhile for this series (thanks Andrew
> and Tim!), ack-s from the maintainers are still missing:
>
>> 1: VMX: streamline entry.S code
>> 2: VMX: move various uses of UD2 out of fast paths
>> 3: VMX: use proper instruction mnemonics if assembler supports them
> Jun, Eddie? (Yes, there had been a couple of revisions to patches
> 2 and 3, but even their sending was now more than a week ago.)
>
>> 4: SVM: streamline entry.S code
> Suravee, Boris, Jacob?
>
> I'm going to wait a for perhaps another day or two, and will assume
> silent agreement if I don't hear otherwise. I'll similarly assume silent
> agreement to the discussed follow-up patches (dropping memory
> barriers in a few places as well as converting __vmread() along the
> lines of __vmread_safe()) once done, submitted, and reviewed.
>
> Jan
>

One thing I have noticed from subsequent reading of processor manuals is
that reading from control registers are not actually serialising.  It
would be useful to get some comments about the in-processor optimisation
for runs of pushes/pops, to determine whether it is a good idea
interrupting the run.  Having said that, the changes of any difference
being measurable is slim.

~Andrew

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: Ping: [PATCH 0/4] HVM: produce better binary code
  2013-09-04 16:16   ` Andrew Cooper
@ 2013-09-04 16:30     ` Tim Deegan
  2013-09-05  7:52       ` Jan Beulich
  0 siblings, 1 reply; 60+ messages in thread
From: Tim Deegan @ 2013-09-04 16:30 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: suravee.suthikulpanit, Jacob Shin, Eddie Dong, Jan Beulich,
	Jun Nakajima, xen-devel, Boris Ostrovsky

At 17:16 +0100 on 04 Sep (1378315004), Andrew Cooper wrote:
> On 04/09/13 11:06, Jan Beulich wrote:
> >>>> On 23.08.13 at 15:58, "Jan Beulich" <JBeulich@suse.com> wrote:
> > While I got two reviews meanwhile for this series (thanks Andrew
> > and Tim!), ack-s from the maintainers are still missing:
> >
> >> 1: VMX: streamline entry.S code
> >> 2: VMX: move various uses of UD2 out of fast paths
> >> 3: VMX: use proper instruction mnemonics if assembler supports them
> > Jun, Eddie? (Yes, there had been a couple of revisions to patches
> > 2 and 3, but even their sending was now more than a week ago.)
> >
> >> 4: SVM: streamline entry.S code
> > Suravee, Boris, Jacob?
> >
> > I'm going to wait a for perhaps another day or two, and will assume
> > silent agreement if I don't hear otherwise. I'll similarly assume silent
> > agreement to the discussed follow-up patches (dropping memory
> > barriers in a few places as well as converting __vmread() along the
> > lines of __vmread_safe()) once done, submitted, and reviewed.
> >
> > Jan
> >
> 
> One thing I have noticed from subsequent reading of processor manuals is
> that reading from control registers are not actually serialising.  It
> would be useful to get some comments about the in-processor optimisation
> for runs of pushes/pops, to determine whether it is a good idea
> interrupting the run.

The Intel optimization manual goes into some detail about it.  The short
version is that it introduces a dependence which can otherwise be
finagled away by cunning logic in the instruction decoder.  It's listed
as 'medium impact, medium generality' which is about as much detail as
we can expect without measuring the actual code on the processors we
care about.

I'm not certain, on a modern CPU, whether it's better to issue a series
of PUSHes or a series of %rsp-relative MOVs and a SUB.

Tim.

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 4/4] SVM: streamline entry.S code
  2013-09-04 15:20         ` Jan Beulich
@ 2013-09-04 16:42           ` Boris Ostrovsky
  2013-09-05  7:10             ` Jan Beulich
  0 siblings, 1 reply; 60+ messages in thread
From: Boris Ostrovsky @ 2013-09-04 16:42 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser, Jacob Shin, suravee.suthikulpanit

On 09/04/2013 11:20 AM, Jan Beulich wrote:
>>>> On 04.09.13 at 17:09, Boris Ostrovsky <boris.ostrovsky@oracle.com> wrote:
>> I meant something like this:
>>
>> diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S
>> index 1969629..b362637 100644
>> --- a/xen/arch/x86/hvm/svm/entry.S
>> +++ b/xen/arch/x86/hvm/svm/entry.S
>> @@ -92,10 +92,12 @@ UNLIKELY_END(svm_trace)
>>
>>            VMRUN
>>
>> +       GET_CURRENT(%rax)
>>            push %rdi
>>            push %rsi
>>            push %rdx
>>            push %rcx
>> +       mov  VCPU_svm_vmcb(%rax),%rcx
>>            push %rax
>>            push %r8
>>            push %r9
>> @@ -108,17 +110,15 @@ UNLIKELY_END(svm_trace)
>>            push %r14
>>            push %r15
>>
>> -        GET_CURRENT(%rbx)
>> -        movb $0,VCPU_svm_vmcb_in_sync(%rbx)
>> -        mov  VCPU_svm_vmcb(%rbx),%rcx
>> -        mov  VMCB_rax(%rcx),%rax
>> -        mov  %rax,UREGS_rax(%rsp)
>> -        mov  VMCB_rip(%rcx),%rax
>> -        mov  %rax,UREGS_rip(%rsp)
>> -        mov  VMCB_rsp(%rcx),%rax
>> -        mov  %rax,UREGS_rsp(%rsp)
>> -        mov  VMCB_rflags(%rcx),%rax
>> -        mov  %rax,UREGS_eflags(%rsp)
>> +        movb $0,VCPU_svm_vmcb_in_sync(%rax)
>> +        mov  VMCB_rax(%rcx),%rdi
>> +        mov  %rdi,UREGS_rax(%rsp)
>> +        mov  VMCB_rip(%rcx),%rdi
>> +        mov  %rdi,UREGS_rip(%rsp)
>> +        mov  VMCB_rsp(%rcx),%rdi
>> +        mov  %rdi,UREGS_rsp(%rsp)
>> +        mov  VMCB_rflags(%rcx),%rdi
>> +        mov  %rdi,UREGS_eflags(%rsp)
>>
>>    #ifndef NDEBUG
>>            mov  $0xbeef,%ax
>> ostr@workbase>
>>
>>
>>
>> %rax is clobbered anyway by ' mov  VMCB_rax(%rcx),%rax'
> But the "current" pointer also isn't needed anymore after the clearing
> of VCPU_svm_vmcb_in_sync and before doing the first function call
> (it's needed after the function call, but for that it's can't be in %rax).
>
> So yes, that clearing of VCPU_svm_vmcb_in_sync could certainly be
> done using %rax instead of %rbx, but no other code change would
> seem necessary/desirable. If you agree, I'd be willing to do this one
> line adjustment.

Oh, yes, that's right, no reason to change other lines.

My only reason for suggesting this change though was to eliminate
'mov  %rax,%rbx' instruction (in your patch, not in the code above).
If you want to keep it in though then I don't think the line adjustment
that you are talking about is needed.



-boris

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH 4/4] SVM: streamline entry.S code
  2013-09-04 16:42           ` Boris Ostrovsky
@ 2013-09-05  7:10             ` Jan Beulich
  0 siblings, 0 replies; 60+ messages in thread
From: Jan Beulich @ 2013-09-05  7:10 UTC (permalink / raw)
  To: Boris Ostrovsky; +Cc: xen-devel, Keir Fraser, Jacob Shin, suravee.suthikulpanit

>>> On 04.09.13 at 18:42, Boris Ostrovsky <boris.ostrovsky@oracle.com> wrote:
> On 09/04/2013 11:20 AM, Jan Beulich wrote:
>>>>> On 04.09.13 at 17:09, Boris Ostrovsky <boris.ostrovsky@oracle.com> wrote:
>>> I meant something like this:
>>>
>>> diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S
>>> index 1969629..b362637 100644
>>> --- a/xen/arch/x86/hvm/svm/entry.S
>>> +++ b/xen/arch/x86/hvm/svm/entry.S
>>> @@ -92,10 +92,12 @@ UNLIKELY_END(svm_trace)
>>>
>>>            VMRUN
>>>
>>> +       GET_CURRENT(%rax)
>>>            push %rdi
>>>            push %rsi
>>>            push %rdx
>>>            push %rcx
>>> +       mov  VCPU_svm_vmcb(%rax),%rcx
>>>            push %rax
>>>            push %r8
>>>            push %r9
>>> @@ -108,17 +110,15 @@ UNLIKELY_END(svm_trace)
>>>            push %r14
>>>            push %r15
>>>
>>> -        GET_CURRENT(%rbx)
>>> -        movb $0,VCPU_svm_vmcb_in_sync(%rbx)
>>> -        mov  VCPU_svm_vmcb(%rbx),%rcx
>>> -        mov  VMCB_rax(%rcx),%rax
>>> -        mov  %rax,UREGS_rax(%rsp)
>>> -        mov  VMCB_rip(%rcx),%rax
>>> -        mov  %rax,UREGS_rip(%rsp)
>>> -        mov  VMCB_rsp(%rcx),%rax
>>> -        mov  %rax,UREGS_rsp(%rsp)
>>> -        mov  VMCB_rflags(%rcx),%rax
>>> -        mov  %rax,UREGS_eflags(%rsp)
>>> +        movb $0,VCPU_svm_vmcb_in_sync(%rax)
>>> +        mov  VMCB_rax(%rcx),%rdi
>>> +        mov  %rdi,UREGS_rax(%rsp)
>>> +        mov  VMCB_rip(%rcx),%rdi
>>> +        mov  %rdi,UREGS_rip(%rsp)
>>> +        mov  VMCB_rsp(%rcx),%rdi
>>> +        mov  %rdi,UREGS_rsp(%rsp)
>>> +        mov  VMCB_rflags(%rcx),%rdi
>>> +        mov  %rdi,UREGS_eflags(%rsp)
>>>
>>>    #ifndef NDEBUG
>>>            mov  $0xbeef,%ax
>>> ostr@workbase>
>>>
>>>
>>>
>>> %rax is clobbered anyway by ' mov  VMCB_rax(%rcx),%rax'
>> But the "current" pointer also isn't needed anymore after the clearing
>> of VCPU_svm_vmcb_in_sync and before doing the first function call
>> (it's needed after the function call, but for that it's can't be in %rax).
>>
>> So yes, that clearing of VCPU_svm_vmcb_in_sync could certainly be
>> done using %rax instead of %rbx, but no other code change would
>> seem necessary/desirable. If you agree, I'd be willing to do this one
>> line adjustment.
> 
> Oh, yes, that's right, no reason to change other lines.
> 
> My only reason for suggesting this change though was to eliminate
> 'mov  %rax,%rbx' instruction (in your patch, not in the code above).
> If you want to keep it in though then I don't think the line adjustment
> that you are talking about is needed.

Once again - this is not a question of "want", but a question of
"need": After calling svm_vmexit_handler execution continue on
to .Lsvm_do_resume, specifically skipping the (more expensive)
(re-)load of %rbx done immediately before that label (for only
svm_asm_do_resume's sake).

The extra one line adjustment would be to further hide eventual
latency between loading %rbx and using it in an address
calculation (I left it untouched originally because the latency
from a reg-reg move ought to be much smaller than that from
the original GET_CURRENT(%rbx) involving a memory read).

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: Ping: [PATCH 0/4] HVM: produce better binary code
  2013-09-04 16:30     ` Tim Deegan
@ 2013-09-05  7:52       ` Jan Beulich
  2013-09-05  7:58         ` Tim Deegan
  0 siblings, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2013-09-05  7:52 UTC (permalink / raw)
  To: Andrew Cooper, Tim Deegan
  Cc: suravee.suthikulpanit, Jacob Shin, Eddie Dong, Jun Nakajima,
	xen-devel, Boris Ostrovsky

>>> On 04.09.13 at 18:30, Tim Deegan <tim@xen.org> wrote:
> I'm not certain, on a modern CPU, whether it's better to issue a series
> of PUSHes or a series of %rsp-relative MOVs and a SUB.

When I did the push/pop -> mov conversion for the non-HVM code
I did some measurements, with the result that the sequence of
moves was never slower but sometimes (usually on older CPUs)
faster. And the obvious benefit of moves is that interleaving them
with other code is less of a risk (in terms of breaking CPU-internal
bypasses/optimizations) than with pushes and pops.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: Ping: [PATCH 0/4] HVM: produce better binary code
  2013-09-05  7:52       ` Jan Beulich
@ 2013-09-05  7:58         ` Tim Deegan
  0 siblings, 0 replies; 60+ messages in thread
From: Tim Deegan @ 2013-09-05  7:58 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Eddie Dong, Andrew Cooper, Jacob Shin, Jun Nakajima, xen-devel,
	Boris Ostrovsky, suravee.suthikulpanit

At 08:52 +0100 on 05 Sep (1378371134), Jan Beulich wrote:
> >>> On 04.09.13 at 18:30, Tim Deegan <tim@xen.org> wrote:
> > I'm not certain, on a modern CPU, whether it's better to issue a series
> > of PUSHes or a series of %rsp-relative MOVs and a SUB.
> 
> When I did the push/pop -> mov conversion for the non-HVM code
> I did some measurements, with the result that the sequence of
> moves was never slower but sometimes (usually on older CPUs)
> faster.

Ah, excellent!

Tim.

^ permalink raw reply	[flat|nested] 60+ messages in thread

end of thread, other threads:[~2013-09-05  7:58 UTC | newest]

Thread overview: 60+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-08-23 13:58 [PATCH 0/4] HVM: produce better binary code Jan Beulich
2013-08-23 14:01 ` [PATCH 1/4] VMX: streamline entry.S code Jan Beulich
2013-08-26 10:44   ` Andrew Cooper
2013-08-26 11:01     ` Jan Beulich
2013-08-26 11:48       ` Andrew Cooper
2013-08-26 13:12         ` Jan Beulich
2013-08-26 13:22           ` Andrew Cooper
2013-08-29 11:01   ` Tim Deegan
2013-08-29 12:35     ` Jan Beulich
2013-08-23 14:02 ` [PATCH 2/4] VMX: move various uses of UD2 out of fast paths Jan Beulich
2013-08-23 22:06   ` Andrew Cooper
2013-08-26  8:50     ` Jan Beulich
2013-08-26  9:07       ` Andrew Cooper
2013-08-26  8:58     ` [PATCH v2 " Jan Beulich
2013-08-26  9:09       ` Andrew Cooper
2013-08-29 11:08       ` Tim Deegan
2013-08-23 14:03 ` [PATCH 3/4] VMX: use proper instruction mnemonics if assembler supports them Jan Beulich
2013-08-24 22:18   ` Andrew Cooper
2013-08-26  9:06     ` Jan Beulich
2013-08-26  9:25       ` Andrew Cooper
2013-08-26  9:41         ` Jan Beulich
2013-08-26 10:18         ` [PATCH v3 " Jan Beulich
2013-08-26 13:05           ` Andrew Cooper
2013-08-26 13:20             ` Jan Beulich
2013-08-26 14:03             ` [PATCH v4 " Jan Beulich
2013-08-26 14:18               ` Andrew Cooper
2013-08-26 14:29                 ` Jan Beulich
2013-08-26 15:07                   ` Andrew Cooper
2013-08-26 15:10                     ` Andrew Cooper
2013-08-26 15:30                       ` Jan Beulich
2013-08-26 15:29                     ` Jan Beulich
2013-08-26 15:33                       ` Andrew Cooper
2013-08-26 15:31                 ` [PATCH v5 " Jan Beulich
2013-08-26 15:36                   ` Andrew Cooper
2013-08-29 11:47                   ` Tim Deegan
2013-08-29 12:30                     ` Jan Beulich
2013-08-29 13:11                       ` Tim Deegan
2013-08-29 13:27                         ` Jan Beulich
2013-08-29 14:02                           ` Tim Deegan
2013-08-29 12:45                     ` Jan Beulich
2013-08-29 13:19                       ` Tim Deegan
2013-08-26  9:03   ` [PATCH v2 " Jan Beulich
2013-08-23 14:04 ` [PATCH 4/4] SVM: streamline entry.S code Jan Beulich
2013-08-26 16:20   ` Andrew Cooper
2013-08-26 17:20     ` Keir Fraser
2013-08-26 17:46       ` Andrew Cooper
2013-08-26 21:47   ` Andrew Cooper
2013-08-27  7:38     ` Jan Beulich
2013-08-29 11:56   ` Tim Deegan
2013-09-04 14:39   ` Boris Ostrovsky
2013-09-04 14:50     ` Jan Beulich
2013-09-04 15:09       ` Boris Ostrovsky
2013-09-04 15:20         ` Jan Beulich
2013-09-04 16:42           ` Boris Ostrovsky
2013-09-05  7:10             ` Jan Beulich
2013-09-04 10:06 ` Ping: [PATCH 0/4] HVM: produce better binary code Jan Beulich
2013-09-04 16:16   ` Andrew Cooper
2013-09-04 16:30     ` Tim Deegan
2013-09-05  7:52       ` Jan Beulich
2013-09-05  7:58         ` Tim Deegan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.