* [PATCH 0/3] x86: adjust entry frame generation
@ 2012-10-02 15:20 Jan Beulich
2012-10-02 15:26 ` [PATCH 1/3] x86: use MOV instead of PUSH/POP when saving/restoring register state Jan Beulich
` (3 more replies)
0 siblings, 4 replies; 19+ messages in thread
From: Jan Beulich @ 2012-10-02 15:20 UTC (permalink / raw)
To: xen-devel
This set of patches converts the way frames gets created from
using PUSHes/POPs to using MOVes, thus allowing (in certain
cases) to avoid saving/restoring part of the register set.
While the place where the (small) win from this comes from varies
between CPUs, the net effect is a 1 to 2% reduction on a
combined interruption entry and exit when the full state save
can be avoided.
1: use MOV instead of PUSH/POP when saving/restoring register state
2: consolidate frame state manipulation functions
3: save only partial register state where possible
Signed-off-by: Jan Beulich <jbeulich@suse.com>
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH 1/3] x86: use MOV instead of PUSH/POP when saving/restoring register state
2012-10-02 15:20 [PATCH 0/3] x86: adjust entry frame generation Jan Beulich
@ 2012-10-02 15:26 ` Jan Beulich
2012-10-02 17:01 ` Keir Fraser
2012-10-02 17:57 ` Konrad Rzeszutek Wilk
2012-10-02 15:26 ` [PATCH 2/3] x86: consolidate frame state manipulation functions Jan Beulich
` (2 subsequent siblings)
3 siblings, 2 replies; 19+ messages in thread
From: Jan Beulich @ 2012-10-02 15:26 UTC (permalink / raw)
To: xen-devel
[-- Attachment #1: Type: text/plain, Size: 6324 bytes --]
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -21,8 +21,7 @@ ENTRY(compat_hypercall)
UNLIKELY_START(ne, msi_check)
movl $HYPERCALL_VECTOR,%edi
call check_for_unexpected_msi
- RESTORE_ALL
- SAVE_ALL
+ LOAD_C_CLOBBERED
UNLIKELY_END(msi_check)
GET_CURRENT(%rbx)
@@ -173,8 +172,7 @@ compat_bad_hypercall:
/* %rbx: struct vcpu, interrupts disabled */
compat_restore_all_guest:
ASSERT_INTERRUPTS_DISABLED
- RESTORE_ALL
- addq $8,%rsp
+ RESTORE_ALL adj=8
.Lft0: iretq
.section .fixup,"ax"
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -47,12 +47,10 @@ restore_all_guest:
cmpl $1,%ecx
ja .Lforce_iret
- addq $8,%rsp
- popq %rcx # RIP
- popq %r11 # CS
- cmpw $FLAT_USER_CS32,%r11
- popq %r11 # RFLAGS
- popq %rsp # RSP
+ cmpw $FLAT_USER_CS32,16(%rsp)# CS
+ movq 8(%rsp),%rcx # RIP
+ movq 24(%rsp),%r11 # RFLAGS
+ movq 32(%rsp),%rsp # RSP
je 1f
sysretq
1: sysretl
@@ -101,8 +99,7 @@ failsafe_callback:
ALIGN
/* No special register assumptions. */
restore_all_xen:
- RESTORE_ALL
- addq $8,%rsp
+ RESTORE_ALL adj=8
iretq
/*
@@ -311,8 +308,7 @@ ENTRY(int80_direct_trap)
UNLIKELY_START(ne, msi_check)
movl $0x80,%edi
call check_for_unexpected_msi
- RESTORE_ALL
- SAVE_ALL
+ LOAD_C_CLOBBERED
UNLIKELY_END(msi_check)
GET_CURRENT(%rbx)
--- a/xen/include/asm-x86/x86_64/asm_defns.h
+++ b/xen/include/asm-x86/x86_64/asm_defns.h
@@ -5,11 +5,11 @@
#ifdef CONFIG_FRAME_POINTER
/* Indicate special exception stack frame by inverting the frame pointer. */
-#define SETUP_EXCEPTION_FRAME_POINTER \
- movq %rsp,%rbp; \
+#define SETUP_EXCEPTION_FRAME_POINTER(offs) \
+ leaq offs(%rsp),%rbp; \
notq %rbp
#else
-#define SETUP_EXCEPTION_FRAME_POINTER
+#define SETUP_EXCEPTION_FRAME_POINTER(off)
#endif
#ifndef NDEBUG
@@ -27,40 +27,49 @@
#define ASSERT_INTERRUPTS_DISABLED ASSERT_INTERRUPT_STATUS(z)
#define SAVE_ALL \
+ addq $-(UREGS_error_code-UREGS_r15), %rsp; \
cld; \
- pushq %rdi; \
- pushq %rsi; \
- pushq %rdx; \
- pushq %rcx; \
- pushq %rax; \
- pushq %r8; \
- pushq %r9; \
- pushq %r10; \
- pushq %r11; \
- pushq %rbx; \
- pushq %rbp; \
- SETUP_EXCEPTION_FRAME_POINTER; \
- pushq %r12; \
- pushq %r13; \
- pushq %r14; \
- pushq %r15;
-
-#define RESTORE_ALL \
- popq %r15; \
- popq %r14; \
- popq %r13; \
- popq %r12; \
- popq %rbp; \
- popq %rbx; \
- popq %r11; \
- popq %r10; \
- popq %r9; \
- popq %r8; \
- popq %rax; \
- popq %rcx; \
- popq %rdx; \
- popq %rsi; \
- popq %rdi;
+ movq %rdi,UREGS_rdi(%rsp); \
+ movq %rsi,UREGS_rsi(%rsp); \
+ movq %rdx,UREGS_rdx(%rsp); \
+ movq %rcx,UREGS_rcx(%rsp); \
+ movq %rax,UREGS_rax(%rsp); \
+ movq %r8,UREGS_r8(%rsp); \
+ movq %r9,UREGS_r9(%rsp); \
+ movq %r10,UREGS_r10(%rsp); \
+ movq %r11,UREGS_r11(%rsp); \
+ movq %rbx,UREGS_rbx(%rsp); \
+ movq %rbp,UREGS_rbp(%rsp); \
+ SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp); \
+ movq %r12,UREGS_r12(%rsp); \
+ movq %r13,UREGS_r13(%rsp); \
+ movq %r14,UREGS_r14(%rsp); \
+ movq %r15,UREGS_r15(%rsp); \
+
+#ifdef __ASSEMBLY__
+.macro LOAD_C_CLOBBERED
+ movq UREGS_r11(%rsp),%r11
+ movq UREGS_r10(%rsp),%r10
+ movq UREGS_r9(%rsp),%r9
+ movq UREGS_r8(%rsp),%r8
+ movq UREGS_rax(%rsp),%rax
+ movq UREGS_rcx(%rsp),%rcx
+ movq UREGS_rdx(%rsp),%rdx
+ movq UREGS_rsi(%rsp),%rsi
+ movq UREGS_rdi(%rsp),%rdi
+.endm
+
+.macro RESTORE_ALL adj=0
+ movq UREGS_r15(%rsp),%r15
+ movq UREGS_r14(%rsp),%r14
+ movq UREGS_r13(%rsp),%r13
+ movq UREGS_r12(%rsp),%r12
+ movq UREGS_rbp(%rsp),%rbp
+ movq UREGS_rbx(%rsp),%rbx
+ LOAD_C_CLOBBERED
+ subq $-(UREGS_error_code-UREGS_r15+\adj), %rsp
+.endm
+#endif
#ifdef PERF_COUNTERS
#define PERFC_INCR(_name,_idx,_cur) \
@@ -94,7 +103,7 @@
__asm__( \
"\n" __ALIGN_STR"\n" \
"common_interrupt:\n\t" \
- STR(SAVE_ALL) \
+ STR(SAVE_ALL) "\n\t" \
"movq %rsp,%rdi\n\t" \
"callq " STR(do_IRQ) "\n\t" \
"jmp ret_from_intr\n");
[-- Attachment #2: x86-save-restore-all-mov.patch --]
[-- Type: text/plain, Size: 6393 bytes --]
x86: use MOV instead of PUSH/POP when saving/restoring register state
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -21,8 +21,7 @@ ENTRY(compat_hypercall)
UNLIKELY_START(ne, msi_check)
movl $HYPERCALL_VECTOR,%edi
call check_for_unexpected_msi
- RESTORE_ALL
- SAVE_ALL
+ LOAD_C_CLOBBERED
UNLIKELY_END(msi_check)
GET_CURRENT(%rbx)
@@ -173,8 +172,7 @@ compat_bad_hypercall:
/* %rbx: struct vcpu, interrupts disabled */
compat_restore_all_guest:
ASSERT_INTERRUPTS_DISABLED
- RESTORE_ALL
- addq $8,%rsp
+ RESTORE_ALL adj=8
.Lft0: iretq
.section .fixup,"ax"
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -47,12 +47,10 @@ restore_all_guest:
cmpl $1,%ecx
ja .Lforce_iret
- addq $8,%rsp
- popq %rcx # RIP
- popq %r11 # CS
- cmpw $FLAT_USER_CS32,%r11
- popq %r11 # RFLAGS
- popq %rsp # RSP
+ cmpw $FLAT_USER_CS32,16(%rsp)# CS
+ movq 8(%rsp),%rcx # RIP
+ movq 24(%rsp),%r11 # RFLAGS
+ movq 32(%rsp),%rsp # RSP
je 1f
sysretq
1: sysretl
@@ -101,8 +99,7 @@ failsafe_callback:
ALIGN
/* No special register assumptions. */
restore_all_xen:
- RESTORE_ALL
- addq $8,%rsp
+ RESTORE_ALL adj=8
iretq
/*
@@ -311,8 +308,7 @@ ENTRY(int80_direct_trap)
UNLIKELY_START(ne, msi_check)
movl $0x80,%edi
call check_for_unexpected_msi
- RESTORE_ALL
- SAVE_ALL
+ LOAD_C_CLOBBERED
UNLIKELY_END(msi_check)
GET_CURRENT(%rbx)
--- a/xen/include/asm-x86/x86_64/asm_defns.h
+++ b/xen/include/asm-x86/x86_64/asm_defns.h
@@ -5,11 +5,11 @@
#ifdef CONFIG_FRAME_POINTER
/* Indicate special exception stack frame by inverting the frame pointer. */
-#define SETUP_EXCEPTION_FRAME_POINTER \
- movq %rsp,%rbp; \
+#define SETUP_EXCEPTION_FRAME_POINTER(offs) \
+ leaq offs(%rsp),%rbp; \
notq %rbp
#else
-#define SETUP_EXCEPTION_FRAME_POINTER
+#define SETUP_EXCEPTION_FRAME_POINTER(off)
#endif
#ifndef NDEBUG
@@ -27,40 +27,49 @@
#define ASSERT_INTERRUPTS_DISABLED ASSERT_INTERRUPT_STATUS(z)
#define SAVE_ALL \
+ addq $-(UREGS_error_code-UREGS_r15), %rsp; \
cld; \
- pushq %rdi; \
- pushq %rsi; \
- pushq %rdx; \
- pushq %rcx; \
- pushq %rax; \
- pushq %r8; \
- pushq %r9; \
- pushq %r10; \
- pushq %r11; \
- pushq %rbx; \
- pushq %rbp; \
- SETUP_EXCEPTION_FRAME_POINTER; \
- pushq %r12; \
- pushq %r13; \
- pushq %r14; \
- pushq %r15;
-
-#define RESTORE_ALL \
- popq %r15; \
- popq %r14; \
- popq %r13; \
- popq %r12; \
- popq %rbp; \
- popq %rbx; \
- popq %r11; \
- popq %r10; \
- popq %r9; \
- popq %r8; \
- popq %rax; \
- popq %rcx; \
- popq %rdx; \
- popq %rsi; \
- popq %rdi;
+ movq %rdi,UREGS_rdi(%rsp); \
+ movq %rsi,UREGS_rsi(%rsp); \
+ movq %rdx,UREGS_rdx(%rsp); \
+ movq %rcx,UREGS_rcx(%rsp); \
+ movq %rax,UREGS_rax(%rsp); \
+ movq %r8,UREGS_r8(%rsp); \
+ movq %r9,UREGS_r9(%rsp); \
+ movq %r10,UREGS_r10(%rsp); \
+ movq %r11,UREGS_r11(%rsp); \
+ movq %rbx,UREGS_rbx(%rsp); \
+ movq %rbp,UREGS_rbp(%rsp); \
+ SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp); \
+ movq %r12,UREGS_r12(%rsp); \
+ movq %r13,UREGS_r13(%rsp); \
+ movq %r14,UREGS_r14(%rsp); \
+ movq %r15,UREGS_r15(%rsp); \
+
+#ifdef __ASSEMBLY__
+.macro LOAD_C_CLOBBERED
+ movq UREGS_r11(%rsp),%r11
+ movq UREGS_r10(%rsp),%r10
+ movq UREGS_r9(%rsp),%r9
+ movq UREGS_r8(%rsp),%r8
+ movq UREGS_rax(%rsp),%rax
+ movq UREGS_rcx(%rsp),%rcx
+ movq UREGS_rdx(%rsp),%rdx
+ movq UREGS_rsi(%rsp),%rsi
+ movq UREGS_rdi(%rsp),%rdi
+.endm
+
+.macro RESTORE_ALL adj=0
+ movq UREGS_r15(%rsp),%r15
+ movq UREGS_r14(%rsp),%r14
+ movq UREGS_r13(%rsp),%r13
+ movq UREGS_r12(%rsp),%r12
+ movq UREGS_rbp(%rsp),%rbp
+ movq UREGS_rbx(%rsp),%rbx
+ LOAD_C_CLOBBERED
+ subq $-(UREGS_error_code-UREGS_r15+\adj), %rsp
+.endm
+#endif
#ifdef PERF_COUNTERS
#define PERFC_INCR(_name,_idx,_cur) \
@@ -94,7 +103,7 @@
__asm__( \
"\n" __ALIGN_STR"\n" \
"common_interrupt:\n\t" \
- STR(SAVE_ALL) \
+ STR(SAVE_ALL) "\n\t" \
"movq %rsp,%rdi\n\t" \
"callq " STR(do_IRQ) "\n\t" \
"jmp ret_from_intr\n");
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH 2/3] x86: consolidate frame state manipulation functions
2012-10-02 15:20 [PATCH 0/3] x86: adjust entry frame generation Jan Beulich
2012-10-02 15:26 ` [PATCH 1/3] x86: use MOV instead of PUSH/POP when saving/restoring register state Jan Beulich
@ 2012-10-02 15:26 ` Jan Beulich
2012-10-02 17:01 ` Keir Fraser
2012-10-02 15:27 ` [PATCH 3/3] x86: save/restore only partial register state where possible Jan Beulich
2012-10-30 14:20 ` [PATCH 0/2, v2] x86: adjust entry frame generation Jan Beulich
3 siblings, 1 reply; 19+ messages in thread
From: Jan Beulich @ 2012-10-02 15:26 UTC (permalink / raw)
To: xen-devel, Jan Beulich
[-- Attachment #1: Type: text/plain, Size: 4215 bytes --]
Rather than doing this in multiple places, have a single central
function (decode_register()) to be used by all other code.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1585,7 +1585,7 @@ int hvm_mov_to_cr(unsigned int cr, unsig
struct vcpu *curr = current;
unsigned long val, *reg;
- if ( (reg = get_x86_gpr(guest_cpu_user_regs(), gpr)) == NULL )
+ if ( (reg = decode_register(gpr, guest_cpu_user_regs(), 0)) == NULL )
{
gdprintk(XENLOG_ERR, "invalid gpr: %u\n", gpr);
goto exit_and_crash;
@@ -1627,7 +1627,7 @@ int hvm_mov_from_cr(unsigned int cr, uns
struct vcpu *curr = current;
unsigned long val = 0, *reg;
- if ( (reg = get_x86_gpr(guest_cpu_user_regs(), gpr)) == NULL )
+ if ( (reg = decode_register(gpr, guest_cpu_user_regs(), 0)) == NULL )
{
gdprintk(XENLOG_ERR, "invalid gpr: %u\n", gpr);
goto exit_and_crash;
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -223,56 +223,18 @@ void __set_vvmcs(void *vvmcs, u32 vmcs_e
static unsigned long reg_read(struct cpu_user_regs *regs,
enum vmx_regs_enc index)
{
- unsigned long value = 0;
+ unsigned long *pval = decode_register(index, regs, 0);
- switch ( index ) {
- CASE_GET_REG(RAX, eax);
- CASE_GET_REG(RCX, ecx);
- CASE_GET_REG(RDX, edx);
- CASE_GET_REG(RBX, ebx);
- CASE_GET_REG(RBP, ebp);
- CASE_GET_REG(RSI, esi);
- CASE_GET_REG(RDI, edi);
- CASE_GET_REG(RSP, esp);
- CASE_GET_REG(R8, r8);
- CASE_GET_REG(R9, r9);
- CASE_GET_REG(R10, r10);
- CASE_GET_REG(R11, r11);
- CASE_GET_REG(R12, r12);
- CASE_GET_REG(R13, r13);
- CASE_GET_REG(R14, r14);
- CASE_GET_REG(R15, r15);
- default:
- break;
- }
-
- return value;
+ return *pval;
}
static void reg_write(struct cpu_user_regs *regs,
enum vmx_regs_enc index,
unsigned long value)
{
- switch ( index ) {
- CASE_SET_REG(RAX, eax);
- CASE_SET_REG(RCX, ecx);
- CASE_SET_REG(RDX, edx);
- CASE_SET_REG(RBX, ebx);
- CASE_SET_REG(RBP, ebp);
- CASE_SET_REG(RSI, esi);
- CASE_SET_REG(RDI, edi);
- CASE_SET_REG(RSP, esp);
- CASE_SET_REG(R8, r8);
- CASE_SET_REG(R9, r9);
- CASE_SET_REG(R10, r10);
- CASE_SET_REG(R11, r11);
- CASE_SET_REG(R12, r12);
- CASE_SET_REG(R13, r13);
- CASE_SET_REG(R14, r14);
- CASE_SET_REG(R15, r15);
- default:
- break;
- }
+ unsigned long *pval = decode_register(index, regs, 0);
+
+ *pval = value;
}
static inline u32 __n2_exec_control(struct vcpu *v)
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -367,34 +367,6 @@ void vcpu_show_execution_state(struct vc
vcpu_unpause(v);
}
-unsigned long *get_x86_gpr(struct cpu_user_regs *regs, unsigned int modrm_reg)
-{
- void *p;
-
- switch ( modrm_reg )
- {
- case 0: p = ®s->eax; break;
- case 1: p = ®s->ecx; break;
- case 2: p = ®s->edx; break;
- case 3: p = ®s->ebx; break;
- case 4: p = ®s->esp; break;
- case 5: p = ®s->ebp; break;
- case 6: p = ®s->esi; break;
- case 7: p = ®s->edi; break;
- case 8: p = ®s->r8; break;
- case 9: p = ®s->r9; break;
- case 10: p = ®s->r10; break;
- case 11: p = ®s->r11; break;
- case 12: p = ®s->r12; break;
- case 13: p = ®s->r13; break;
- case 14: p = ®s->r14; break;
- case 15: p = ®s->r15; break;
- default: p = NULL; break;
- }
-
- return p;
-}
-
static char *trapstr(int trapnr)
{
static char *strings[] = {
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -552,8 +552,6 @@ void microcode_set_module(unsigned int);
int microcode_update(XEN_GUEST_HANDLE(const_void), unsigned long len);
int microcode_resume_cpu(int cpu);
-unsigned long *get_x86_gpr(struct cpu_user_regs *regs, unsigned int modrm_reg);
-
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_X86_PROCESSOR_H */
[-- Attachment #2: x86-drop-get_x86_gpr.patch --]
[-- Type: text/plain, Size: 4266 bytes --]
x86: consolidate frame state manipulation functions
Rather than doing this in multiple places, have a single central
function (decode_register()) to be used by all other code.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1585,7 +1585,7 @@ int hvm_mov_to_cr(unsigned int cr, unsig
struct vcpu *curr = current;
unsigned long val, *reg;
- if ( (reg = get_x86_gpr(guest_cpu_user_regs(), gpr)) == NULL )
+ if ( (reg = decode_register(gpr, guest_cpu_user_regs(), 0)) == NULL )
{
gdprintk(XENLOG_ERR, "invalid gpr: %u\n", gpr);
goto exit_and_crash;
@@ -1627,7 +1627,7 @@ int hvm_mov_from_cr(unsigned int cr, uns
struct vcpu *curr = current;
unsigned long val = 0, *reg;
- if ( (reg = get_x86_gpr(guest_cpu_user_regs(), gpr)) == NULL )
+ if ( (reg = decode_register(gpr, guest_cpu_user_regs(), 0)) == NULL )
{
gdprintk(XENLOG_ERR, "invalid gpr: %u\n", gpr);
goto exit_and_crash;
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -223,56 +223,18 @@ void __set_vvmcs(void *vvmcs, u32 vmcs_e
static unsigned long reg_read(struct cpu_user_regs *regs,
enum vmx_regs_enc index)
{
- unsigned long value = 0;
+ unsigned long *pval = decode_register(index, regs, 0);
- switch ( index ) {
- CASE_GET_REG(RAX, eax);
- CASE_GET_REG(RCX, ecx);
- CASE_GET_REG(RDX, edx);
- CASE_GET_REG(RBX, ebx);
- CASE_GET_REG(RBP, ebp);
- CASE_GET_REG(RSI, esi);
- CASE_GET_REG(RDI, edi);
- CASE_GET_REG(RSP, esp);
- CASE_GET_REG(R8, r8);
- CASE_GET_REG(R9, r9);
- CASE_GET_REG(R10, r10);
- CASE_GET_REG(R11, r11);
- CASE_GET_REG(R12, r12);
- CASE_GET_REG(R13, r13);
- CASE_GET_REG(R14, r14);
- CASE_GET_REG(R15, r15);
- default:
- break;
- }
-
- return value;
+ return *pval;
}
static void reg_write(struct cpu_user_regs *regs,
enum vmx_regs_enc index,
unsigned long value)
{
- switch ( index ) {
- CASE_SET_REG(RAX, eax);
- CASE_SET_REG(RCX, ecx);
- CASE_SET_REG(RDX, edx);
- CASE_SET_REG(RBX, ebx);
- CASE_SET_REG(RBP, ebp);
- CASE_SET_REG(RSI, esi);
- CASE_SET_REG(RDI, edi);
- CASE_SET_REG(RSP, esp);
- CASE_SET_REG(R8, r8);
- CASE_SET_REG(R9, r9);
- CASE_SET_REG(R10, r10);
- CASE_SET_REG(R11, r11);
- CASE_SET_REG(R12, r12);
- CASE_SET_REG(R13, r13);
- CASE_SET_REG(R14, r14);
- CASE_SET_REG(R15, r15);
- default:
- break;
- }
+ unsigned long *pval = decode_register(index, regs, 0);
+
+ *pval = value;
}
static inline u32 __n2_exec_control(struct vcpu *v)
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -367,34 +367,6 @@ void vcpu_show_execution_state(struct vc
vcpu_unpause(v);
}
-unsigned long *get_x86_gpr(struct cpu_user_regs *regs, unsigned int modrm_reg)
-{
- void *p;
-
- switch ( modrm_reg )
- {
- case 0: p = ®s->eax; break;
- case 1: p = ®s->ecx; break;
- case 2: p = ®s->edx; break;
- case 3: p = ®s->ebx; break;
- case 4: p = ®s->esp; break;
- case 5: p = ®s->ebp; break;
- case 6: p = ®s->esi; break;
- case 7: p = ®s->edi; break;
- case 8: p = ®s->r8; break;
- case 9: p = ®s->r9; break;
- case 10: p = ®s->r10; break;
- case 11: p = ®s->r11; break;
- case 12: p = ®s->r12; break;
- case 13: p = ®s->r13; break;
- case 14: p = ®s->r14; break;
- case 15: p = ®s->r15; break;
- default: p = NULL; break;
- }
-
- return p;
-}
-
static char *trapstr(int trapnr)
{
static char *strings[] = {
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -552,8 +552,6 @@ void microcode_set_module(unsigned int);
int microcode_update(XEN_GUEST_HANDLE(const_void), unsigned long len);
int microcode_resume_cpu(int cpu);
-unsigned long *get_x86_gpr(struct cpu_user_regs *regs, unsigned int modrm_reg);
-
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_X86_PROCESSOR_H */
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH 3/3] x86: save/restore only partial register state where possible
2012-10-02 15:20 [PATCH 0/3] x86: adjust entry frame generation Jan Beulich
2012-10-02 15:26 ` [PATCH 1/3] x86: use MOV instead of PUSH/POP when saving/restoring register state Jan Beulich
2012-10-02 15:26 ` [PATCH 2/3] x86: consolidate frame state manipulation functions Jan Beulich
@ 2012-10-02 15:27 ` Jan Beulich
2012-10-02 17:02 ` Keir Fraser
2012-10-30 14:20 ` [PATCH 0/2, v2] x86: adjust entry frame generation Jan Beulich
3 siblings, 1 reply; 19+ messages in thread
From: Jan Beulich @ 2012-10-02 15:27 UTC (permalink / raw)
To: xen-devel
[-- Attachment #1: Type: text/plain, Size: 15891 bytes --]
... and make restore conditional not only upon having saved the state,
but also upon whether saved state was actually modified (and register
values are known to have been preserved).
Note that RBP is unconditionally considered a volatile register (i.e.
irrespective of CONFIG_FRAME_POINTER), since the RBP handling would
become overly complicated due to the need to save/restore it on the
compat mode hypercall path [6th argument].
Note further that for compat mode code paths, saving/restoring R8...R15
is entirely unnecessary - we don't allow those guests to enter 64-bit
mode, and hence they have no way of seeing these registers' contents
(and there consequently also is no information leak, except if the
context saving domctl would be considered such).
Finally, note that this may not properly deal with gdbstub's needs, yet
(but if so, I can't really suggest adjustments, as I don't know that
code).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -10,6 +10,7 @@ typedef bool bool_t;
#define BUG() abort()
#define cpu_has_amd_erratum(nr) 0
+#define mark_regs_dirty(r) ((void)(r))
#include "x86_emulate/x86_emulate.h"
#include "x86_emulate/x86_emulate.c"
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -138,6 +138,7 @@ static void continue_idle_domain(struct
static void continue_nonidle_domain(struct vcpu *v)
{
check_wakeup_from_wait();
+ mark_regs_dirty(guest_cpu_user_regs());
reset_stack_and_jump(ret_from_intr);
}
@@ -1307,7 +1308,7 @@ static void load_segments(struct vcpu *n
if ( test_bit(_VGCF_failsafe_disables_events, &n->arch.vgc_flags) )
vcpu_info(n, evtchn_upcall_mask) = 1;
- regs->entry_vector = TRAP_syscall;
+ regs->entry_vector |= TRAP_syscall;
regs->_eflags &= 0xFFFCBEFFUL;
regs->ss = FLAT_COMPAT_KERNEL_SS;
regs->_esp = (unsigned long)(esp-7);
@@ -1349,7 +1350,7 @@ static void load_segments(struct vcpu *n
if ( test_bit(_VGCF_failsafe_disables_events, &n->arch.vgc_flags) )
vcpu_info(n, evtchn_upcall_mask) = 1;
- regs->entry_vector = TRAP_syscall;
+ regs->entry_vector |= TRAP_syscall;
regs->rflags &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|
X86_EFLAGS_NT|X86_EFLAGS_TF);
regs->ss = FLAT_KERNEL_SS;
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -704,7 +704,7 @@ void irq_complete_move(struct irq_desc *
if (likely(!desc->arch.move_in_progress))
return;
- vector = get_irq_regs()->entry_vector;
+ vector = (u8)get_irq_regs()->entry_vector;
me = smp_processor_id();
if ( vector == desc->arch.vector &&
@@ -798,7 +798,7 @@ void do_IRQ(struct cpu_user_regs *regs)
struct irqaction *action;
uint32_t tsc_in;
struct irq_desc *desc;
- unsigned int vector = regs->entry_vector;
+ unsigned int vector = (u8)regs->entry_vector;
int irq = __get_cpu_var(vector_irq[vector]);
struct cpu_user_regs *old_regs = set_irq_regs(regs);
@@ -892,7 +892,7 @@ void do_IRQ(struct cpu_user_regs *regs)
out:
if ( desc->handler->end )
- desc->handler->end(desc, regs->entry_vector);
+ desc->handler->end(desc, vector);
out_no_end:
spin_unlock(&desc->lock);
out_no_unlock:
@@ -1113,7 +1113,7 @@ static void __do_IRQ_guest(int irq)
struct domain *d;
int i, sp;
struct pending_eoi *peoi = this_cpu(pending_eoi);
- int vector = get_irq_regs()->entry_vector;
+ unsigned int vector = (u8)get_irq_regs()->entry_vector;
if ( unlikely(action->nr_guests == 0) )
{
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2082,6 +2082,7 @@ static int emulate_privileged_op(struct
goto fail;
if ( admin_io_okay(port, op_bytes, v, regs) )
{
+ mark_regs_dirty(regs);
io_emul(regs);
}
else
@@ -2111,6 +2112,7 @@ static int emulate_privileged_op(struct
goto fail;
if ( admin_io_okay(port, op_bytes, v, regs) )
{
+ mark_regs_dirty(regs);
io_emul(regs);
if ( (op_bytes == 1) && pv_post_outb_hook )
pv_post_outb_hook(port, regs->eax);
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -14,8 +14,7 @@
ENTRY(compat_hypercall)
pushq $0
- movl $TRAP_syscall,4(%rsp)
- SAVE_ALL
+ SAVE_VOLATILE type=TRAP_syscall compat=1
cmpb $0,untrusted_msi(%rip)
UNLIKELY_START(ne, msi_check)
@@ -126,6 +125,7 @@ compat_test_guest_events:
/* %rbx: struct vcpu */
compat_process_softirqs:
sti
+ andl $~TRAP_regs_partial,UREGS_entry_vector(%rsp)
call do_softirq
jmp compat_test_all_events
@@ -172,7 +172,7 @@ compat_bad_hypercall:
/* %rbx: struct vcpu, interrupts disabled */
compat_restore_all_guest:
ASSERT_INTERRUPTS_DISABLED
- RESTORE_ALL adj=8
+ RESTORE_ALL adj=8 compat=1
.Lft0: iretq
.section .fixup,"ax"
@@ -243,7 +243,7 @@ UNLIKELY_END(compat_syscall_gpf)
ENTRY(compat_sysenter)
movq VCPU_trap_ctxt(%rbx),%rcx
- cmpl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+ cmpb $TRAP_gp_fault,UREGS_entry_vector(%rsp)
movzwl VCPU_sysenter_sel(%rbx),%eax
movzwl TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_cs(%rcx),%ecx
cmovel %ecx,%eax
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -123,9 +123,8 @@ ENTRY(syscall_enter)
movl $FLAT_KERNEL_SS,24(%rsp)
pushq %rcx
pushq $0
- movl $TRAP_syscall,4(%rsp)
- movq 24(%rsp),%r11 /* Re-load user RFLAGS into %r11 before SAVE_ALL */
- SAVE_ALL
+ movq 24(%rsp),%r11 /* Re-load user RFLAGS into %r11 before saving */
+ SAVE_VOLATILE TRAP_syscall
GET_CURRENT(%rbx)
movq VCPU_domain(%rbx),%rcx
testb $1,DOMAIN_is_32bit_pv(%rcx)
@@ -222,6 +221,7 @@ test_guest_events:
/* %rbx: struct vcpu */
process_softirqs:
sti
+ SAVE_PRESERVED
call do_softirq
jmp test_all_events
@@ -275,8 +275,7 @@ sysenter_eflags_saved:
pushq $3 /* ring 3 null cs */
pushq $0 /* null rip */
pushq $0
- movl $TRAP_syscall,4(%rsp)
- SAVE_ALL
+ SAVE_VOLATILE TRAP_syscall
GET_CURRENT(%rbx)
cmpb $0,VCPU_sysenter_disables_events(%rbx)
movq VCPU_sysenter_addr(%rbx),%rax
@@ -286,6 +285,7 @@ sysenter_eflags_saved:
leal (,%rcx,TBF_INTERRUPT),%ecx
UNLIKELY_START(z, sysenter_gpf)
movq VCPU_trap_ctxt(%rbx),%rsi
+ SAVE_PRESERVED
movl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
movl %eax,TRAPBOUNCE_error_code(%rdx)
movq TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_eip(%rsi),%rax
@@ -302,7 +302,7 @@ UNLIKELY_END(sysenter_gpf)
ENTRY(int80_direct_trap)
pushq $0
- SAVE_ALL
+ SAVE_VOLATILE 0x80
cmpb $0,untrusted_msi(%rip)
UNLIKELY_START(ne, msi_check)
@@ -331,6 +331,7 @@ int80_slow_path:
* IDT entry with DPL==0.
*/
movl $((0x80 << 3) | 0x2),UREGS_error_code(%rsp)
+ SAVE_PRESERVED
movl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
/* A GPF wouldn't have incremented the instruction pointer. */
subq $2,UREGS_rip(%rsp)
@@ -412,7 +413,7 @@ UNLIKELY_END(bounce_failsafe)
/* Rewrite our stack frame and return to guest-OS mode. */
/* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
/* Also clear AC: alignment checks shouldn't trigger in kernel mode. */
- movl $TRAP_syscall,UREGS_entry_vector+8(%rsp)
+ orl $TRAP_syscall,UREGS_entry_vector+8(%rsp)
andl $~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|\
X86_EFLAGS_NT|X86_EFLAGS_TF),UREGS_eflags+8(%rsp)
movq $FLAT_KERNEL_SS,UREGS_ss+8(%rsp)
@@ -477,7 +478,7 @@ handle_exception_saved:
jz exception_with_ints_disabled
sti
1: movq %rsp,%rdi
- movl UREGS_entry_vector(%rsp),%eax
+ movzbl UREGS_entry_vector(%rsp),%eax
leaq exception_table(%rip),%rdx
GET_CURRENT(%rbx)
PERFC_INCR(exceptions, %rax, %rbx)
@@ -518,7 +519,7 @@ exception_with_ints_disabled:
/* No special register assumptions. */
FATAL_exception_with_ints_disabled:
- movl UREGS_entry_vector(%rsp),%edi
+ movzbl UREGS_entry_vector(%rsp),%edi
movq %rsp,%rsi
call fatal_trap
ud2
@@ -624,7 +625,7 @@ handle_ist_exception:
movq %rdi,%rsp
rep movsq
1: movq %rsp,%rdi
- movl UREGS_entry_vector(%rsp),%eax
+ movzbl UREGS_entry_vector(%rsp),%eax
leaq exception_table(%rip),%rdx
callq *(%rdx,%rax,8)
jmp ret_from_intr
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -67,10 +67,15 @@ static void _show_registers(
regs->rbp, regs->rsp, regs->r8);
printk("r9: %016lx r10: %016lx r11: %016lx\n",
regs->r9, regs->r10, regs->r11);
- printk("r12: %016lx r13: %016lx r14: %016lx\n",
- regs->r12, regs->r13, regs->r14);
- printk("r15: %016lx cr0: %016lx cr4: %016lx\n",
- regs->r15, crs[0], crs[4]);
+ if ( !(regs->entry_vector & TRAP_regs_partial) )
+ {
+ printk("r12: %016lx r13: %016lx r14: %016lx\n",
+ regs->r12, regs->r13, regs->r14);
+ printk("r15: %016lx cr0: %016lx cr4: %016lx\n",
+ regs->r15, crs[0], crs[4]);
+ }
+ else
+ printk("cr0: %016lx cr4: %016lx\n", crs[0], crs[4]);
printk("cr3: %016lx cr2: %016lx\n", crs[3], crs[2]);
printk("ds: %04x es: %04x fs: %04x gs: %04x "
"ss: %04x cs: %04x\n",
@@ -301,7 +306,7 @@ unsigned long do_iret(void)
if ( !(iret_saved.flags & VGCF_in_syscall) )
{
- regs->entry_vector = 0;
+ regs->entry_vector &= ~TRAP_syscall;
regs->r11 = iret_saved.r11;
regs->rcx = iret_saved.rcx;
}
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1292,10 +1292,10 @@ decode_register(
case 9: p = ®s->r9; break;
case 10: p = ®s->r10; break;
case 11: p = ®s->r11; break;
- case 12: p = ®s->r12; break;
- case 13: p = ®s->r13; break;
- case 14: p = ®s->r14; break;
- case 15: p = ®s->r15; break;
+ case 12: mark_regs_dirty(regs); p = ®s->r12; break;
+ case 13: mark_regs_dirty(regs); p = ®s->r13; break;
+ case 14: mark_regs_dirty(regs); p = ®s->r14; break;
+ case 15: mark_regs_dirty(regs); p = ®s->r15; break;
#endif
default: BUG(); p = NULL; break;
}
--- a/xen/arch/x86/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate.c
@@ -10,6 +10,7 @@
*/
#include <asm/x86_emulate.h>
+#include <asm/asm_defns.h> /* mark_regs_dirty() */
#include <asm/processor.h> /* current_cpu_info */
#include <asm/amd.h> /* cpu_has_amd_erratum() */
--- a/xen/common/wait.c
+++ b/xen/common/wait.c
@@ -124,10 +124,12 @@ void wake_up_all(struct waitqueue_head *
static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
{
- char *cpu_info = (char *)get_cpu_info();
+ struct cpu_info *cpu_info = get_cpu_info();
struct vcpu *curr = current;
unsigned long dummy;
+ u32 entry_vector = cpu_info->guest_cpu_user_regs.entry_vector;
+ cpu_info->guest_cpu_user_regs.entry_vector &= ~TRAP_regs_partial;
ASSERT(wqv->esp == 0);
/* Save current VCPU affinity; force wakeup on *this* CPU only. */
@@ -157,6 +159,8 @@ static void __prepare_to_wait(struct wai
gdprintk(XENLOG_ERR, "Stack too large in %s\n", __FUNCTION__);
domain_crash_synchronous();
}
+
+ cpu_info->guest_cpu_user_regs.entry_vector = entry_vector;
}
static void __finish_wait(struct waitqueue_vcpu *wqv)
--- a/xen/include/asm-x86/x86_64/asm_defns.h
+++ b/xen/include/asm-x86/x86_64/asm_defns.h
@@ -26,6 +26,17 @@
#define ASSERT_INTERRUPTS_ENABLED ASSERT_INTERRUPT_STATUS(nz)
#define ASSERT_INTERRUPTS_DISABLED ASSERT_INTERRUPT_STATUS(z)
+#define _TRAP_regs_partial 16
+#define _TRAP_regs_dirty 17
+#define TRAP_regs_partial (1 << _TRAP_regs_partial)
+#define TRAP_regs_dirty (1 << _TRAP_regs_dirty)
+
+#define mark_regs_dirty(r) ({ \
+ struct cpu_user_regs *r__ = (r); \
+ ASSERT(!((r__)->entry_vector & TRAP_regs_partial)); \
+ r__->entry_vector |= TRAP_regs_dirty; \
+})
+
#define SAVE_ALL \
addq $-(UREGS_error_code-UREGS_r15), %rsp; \
cld; \
@@ -47,11 +58,49 @@
movq %r15,UREGS_r15(%rsp); \
#ifdef __ASSEMBLY__
-.macro LOAD_C_CLOBBERED
+
+.macro SAVE_VOLATILE type compat=0
+.if \compat
+ movl $\type,UREGS_entry_vector-UREGS_error_code(%rsp)
+.else
+ movl $\type|TRAP_regs_partial,\
+ UREGS_entry_vector-UREGS_error_code(%rsp)
+.endif
+ addq $-(UREGS_error_code-UREGS_r15),%rsp
+ cld
+ movq %rdi,UREGS_rdi(%rsp)
+ movq %rsi,UREGS_rsi(%rsp)
+ movq %rdx,UREGS_rdx(%rsp)
+ movq %rcx,UREGS_rcx(%rsp)
+ movq %rax,UREGS_rax(%rsp)
+.if !\compat
+ movq %r8,UREGS_r8(%rsp)
+ movq %r9,UREGS_r9(%rsp)
+ movq %r10,UREGS_r10(%rsp)
+ movq %r11,UREGS_r11(%rsp)
+.endif
+ movq %rbx,UREGS_rbx(%rsp)
+ movq %rbp,UREGS_rbp(%rsp)
+ SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp)
+.endm
+
+.macro SAVE_PRESERVED
+ btrl $_TRAP_regs_partial,UREGS_entry_vector(%rsp)
+ jnc 987f
+ movq %r12,UREGS_r12(%rsp)
+ movq %r13,UREGS_r13(%rsp)
+ movq %r14,UREGS_r14(%rsp)
+ movq %r15,UREGS_r15(%rsp)
+987:
+.endm
+
+.macro LOAD_C_CLOBBERED compat=0
+.if !\compat
movq UREGS_r11(%rsp),%r11
movq UREGS_r10(%rsp),%r10
movq UREGS_r9(%rsp),%r9
movq UREGS_r8(%rsp),%r8
+.endif
movq UREGS_rax(%rsp),%rax
movq UREGS_rcx(%rsp),%rcx
movq UREGS_rdx(%rsp),%rdx
@@ -59,16 +108,38 @@
movq UREGS_rdi(%rsp),%rdi
.endm
-.macro RESTORE_ALL adj=0
+.macro RESTORE_ALL adj=0 compat=0
+.if !\compat
+ testl $TRAP_regs_dirty,UREGS_entry_vector(%rsp)
+.endif
+ LOAD_C_CLOBBERED \compat
+.if !\compat
+ jz 987f
movq UREGS_r15(%rsp),%r15
movq UREGS_r14(%rsp),%r14
movq UREGS_r13(%rsp),%r13
movq UREGS_r12(%rsp),%r12
- movq UREGS_rbp(%rsp),%rbp
+#ifndef NDEBUG
+ .subsection 1
+987: testl $TRAP_regs_partial,UREGS_entry_vector(%rsp)
+ jnz 987f
+ cmpq UREGS_r15(%rsp),%r15
+ jne 789f
+ cmpq UREGS_r14(%rsp),%r14
+ jne 789f
+ cmpq UREGS_r13(%rsp),%r13
+ jne 789f
+ cmpq UREGS_r12(%rsp),%r12
+ je 987f
+789: ud2
+ .subsection 0
+#endif
+.endif
+987: movq UREGS_rbp(%rsp),%rbp
movq UREGS_rbx(%rsp),%rbx
- LOAD_C_CLOBBERED
subq $-(UREGS_error_code-UREGS_r15+\adj), %rsp
.endm
+
#endif
#ifdef PERF_COUNTERS
[-- Attachment #2: x86-save-restore-reduce.patch --]
[-- Type: text/plain, Size: 15951 bytes --]
x86: save/restore only partial register state where possible
... and make restore conditional not only upon having saved the state,
but also upon whether saved state was actually modified (and register
values are known to have been preserved).
Note that RBP is unconditionally considered a volatile register (i.e.
irrespective of CONFIG_FRAME_POINTER), since the RBP handling would
become overly complicated due to the need to save/restore it on the
compat mode hypercall path [6th argument].
Note further that for compat mode code paths, saving/restoring R8...R15
is entirely unnecessary - we don't allow those guests to enter 64-bit
mode, and hence they have no way of seeing these registers' contents
(and there consequently also is no information leak, except if the
context saving domctl would be considered such).
Finally, note that this may not properly deal with gdbstub's needs, yet
(but if so, I can't really suggest adjustments, as I don't know that
code).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -10,6 +10,7 @@ typedef bool bool_t;
#define BUG() abort()
#define cpu_has_amd_erratum(nr) 0
+#define mark_regs_dirty(r) ((void)(r))
#include "x86_emulate/x86_emulate.h"
#include "x86_emulate/x86_emulate.c"
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -138,6 +138,7 @@ static void continue_idle_domain(struct
static void continue_nonidle_domain(struct vcpu *v)
{
check_wakeup_from_wait();
+ mark_regs_dirty(guest_cpu_user_regs());
reset_stack_and_jump(ret_from_intr);
}
@@ -1307,7 +1308,7 @@ static void load_segments(struct vcpu *n
if ( test_bit(_VGCF_failsafe_disables_events, &n->arch.vgc_flags) )
vcpu_info(n, evtchn_upcall_mask) = 1;
- regs->entry_vector = TRAP_syscall;
+ regs->entry_vector |= TRAP_syscall;
regs->_eflags &= 0xFFFCBEFFUL;
regs->ss = FLAT_COMPAT_KERNEL_SS;
regs->_esp = (unsigned long)(esp-7);
@@ -1349,7 +1350,7 @@ static void load_segments(struct vcpu *n
if ( test_bit(_VGCF_failsafe_disables_events, &n->arch.vgc_flags) )
vcpu_info(n, evtchn_upcall_mask) = 1;
- regs->entry_vector = TRAP_syscall;
+ regs->entry_vector |= TRAP_syscall;
regs->rflags &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|
X86_EFLAGS_NT|X86_EFLAGS_TF);
regs->ss = FLAT_KERNEL_SS;
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -704,7 +704,7 @@ void irq_complete_move(struct irq_desc *
if (likely(!desc->arch.move_in_progress))
return;
- vector = get_irq_regs()->entry_vector;
+ vector = (u8)get_irq_regs()->entry_vector;
me = smp_processor_id();
if ( vector == desc->arch.vector &&
@@ -798,7 +798,7 @@ void do_IRQ(struct cpu_user_regs *regs)
struct irqaction *action;
uint32_t tsc_in;
struct irq_desc *desc;
- unsigned int vector = regs->entry_vector;
+ unsigned int vector = (u8)regs->entry_vector;
int irq = __get_cpu_var(vector_irq[vector]);
struct cpu_user_regs *old_regs = set_irq_regs(regs);
@@ -892,7 +892,7 @@ void do_IRQ(struct cpu_user_regs *regs)
out:
if ( desc->handler->end )
- desc->handler->end(desc, regs->entry_vector);
+ desc->handler->end(desc, vector);
out_no_end:
spin_unlock(&desc->lock);
out_no_unlock:
@@ -1113,7 +1113,7 @@ static void __do_IRQ_guest(int irq)
struct domain *d;
int i, sp;
struct pending_eoi *peoi = this_cpu(pending_eoi);
- int vector = get_irq_regs()->entry_vector;
+ unsigned int vector = (u8)get_irq_regs()->entry_vector;
if ( unlikely(action->nr_guests == 0) )
{
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2082,6 +2082,7 @@ static int emulate_privileged_op(struct
goto fail;
if ( admin_io_okay(port, op_bytes, v, regs) )
{
+ mark_regs_dirty(regs);
io_emul(regs);
}
else
@@ -2111,6 +2112,7 @@ static int emulate_privileged_op(struct
goto fail;
if ( admin_io_okay(port, op_bytes, v, regs) )
{
+ mark_regs_dirty(regs);
io_emul(regs);
if ( (op_bytes == 1) && pv_post_outb_hook )
pv_post_outb_hook(port, regs->eax);
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -14,8 +14,7 @@
ENTRY(compat_hypercall)
pushq $0
- movl $TRAP_syscall,4(%rsp)
- SAVE_ALL
+ SAVE_VOLATILE type=TRAP_syscall compat=1
cmpb $0,untrusted_msi(%rip)
UNLIKELY_START(ne, msi_check)
@@ -126,6 +125,7 @@ compat_test_guest_events:
/* %rbx: struct vcpu */
compat_process_softirqs:
sti
+ andl $~TRAP_regs_partial,UREGS_entry_vector(%rsp)
call do_softirq
jmp compat_test_all_events
@@ -172,7 +172,7 @@ compat_bad_hypercall:
/* %rbx: struct vcpu, interrupts disabled */
compat_restore_all_guest:
ASSERT_INTERRUPTS_DISABLED
- RESTORE_ALL adj=8
+ RESTORE_ALL adj=8 compat=1
.Lft0: iretq
.section .fixup,"ax"
@@ -243,7 +243,7 @@ UNLIKELY_END(compat_syscall_gpf)
ENTRY(compat_sysenter)
movq VCPU_trap_ctxt(%rbx),%rcx
- cmpl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+ cmpb $TRAP_gp_fault,UREGS_entry_vector(%rsp)
movzwl VCPU_sysenter_sel(%rbx),%eax
movzwl TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_cs(%rcx),%ecx
cmovel %ecx,%eax
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -123,9 +123,8 @@ ENTRY(syscall_enter)
movl $FLAT_KERNEL_SS,24(%rsp)
pushq %rcx
pushq $0
- movl $TRAP_syscall,4(%rsp)
- movq 24(%rsp),%r11 /* Re-load user RFLAGS into %r11 before SAVE_ALL */
- SAVE_ALL
+ movq 24(%rsp),%r11 /* Re-load user RFLAGS into %r11 before saving */
+ SAVE_VOLATILE TRAP_syscall
GET_CURRENT(%rbx)
movq VCPU_domain(%rbx),%rcx
testb $1,DOMAIN_is_32bit_pv(%rcx)
@@ -222,6 +221,7 @@ test_guest_events:
/* %rbx: struct vcpu */
process_softirqs:
sti
+ SAVE_PRESERVED
call do_softirq
jmp test_all_events
@@ -275,8 +275,7 @@ sysenter_eflags_saved:
pushq $3 /* ring 3 null cs */
pushq $0 /* null rip */
pushq $0
- movl $TRAP_syscall,4(%rsp)
- SAVE_ALL
+ SAVE_VOLATILE TRAP_syscall
GET_CURRENT(%rbx)
cmpb $0,VCPU_sysenter_disables_events(%rbx)
movq VCPU_sysenter_addr(%rbx),%rax
@@ -286,6 +285,7 @@ sysenter_eflags_saved:
leal (,%rcx,TBF_INTERRUPT),%ecx
UNLIKELY_START(z, sysenter_gpf)
movq VCPU_trap_ctxt(%rbx),%rsi
+ SAVE_PRESERVED
movl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
movl %eax,TRAPBOUNCE_error_code(%rdx)
movq TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_eip(%rsi),%rax
@@ -302,7 +302,7 @@ UNLIKELY_END(sysenter_gpf)
ENTRY(int80_direct_trap)
pushq $0
- SAVE_ALL
+ SAVE_VOLATILE 0x80
cmpb $0,untrusted_msi(%rip)
UNLIKELY_START(ne, msi_check)
@@ -331,6 +331,7 @@ int80_slow_path:
* IDT entry with DPL==0.
*/
movl $((0x80 << 3) | 0x2),UREGS_error_code(%rsp)
+ SAVE_PRESERVED
movl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
/* A GPF wouldn't have incremented the instruction pointer. */
subq $2,UREGS_rip(%rsp)
@@ -412,7 +413,7 @@ UNLIKELY_END(bounce_failsafe)
/* Rewrite our stack frame and return to guest-OS mode. */
/* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
/* Also clear AC: alignment checks shouldn't trigger in kernel mode. */
- movl $TRAP_syscall,UREGS_entry_vector+8(%rsp)
+ orl $TRAP_syscall,UREGS_entry_vector+8(%rsp)
andl $~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|\
X86_EFLAGS_NT|X86_EFLAGS_TF),UREGS_eflags+8(%rsp)
movq $FLAT_KERNEL_SS,UREGS_ss+8(%rsp)
@@ -477,7 +478,7 @@ handle_exception_saved:
jz exception_with_ints_disabled
sti
1: movq %rsp,%rdi
- movl UREGS_entry_vector(%rsp),%eax
+ movzbl UREGS_entry_vector(%rsp),%eax
leaq exception_table(%rip),%rdx
GET_CURRENT(%rbx)
PERFC_INCR(exceptions, %rax, %rbx)
@@ -518,7 +519,7 @@ exception_with_ints_disabled:
/* No special register assumptions. */
FATAL_exception_with_ints_disabled:
- movl UREGS_entry_vector(%rsp),%edi
+ movzbl UREGS_entry_vector(%rsp),%edi
movq %rsp,%rsi
call fatal_trap
ud2
@@ -624,7 +625,7 @@ handle_ist_exception:
movq %rdi,%rsp
rep movsq
1: movq %rsp,%rdi
- movl UREGS_entry_vector(%rsp),%eax
+ movzbl UREGS_entry_vector(%rsp),%eax
leaq exception_table(%rip),%rdx
callq *(%rdx,%rax,8)
jmp ret_from_intr
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -67,10 +67,15 @@ static void _show_registers(
regs->rbp, regs->rsp, regs->r8);
printk("r9: %016lx r10: %016lx r11: %016lx\n",
regs->r9, regs->r10, regs->r11);
- printk("r12: %016lx r13: %016lx r14: %016lx\n",
- regs->r12, regs->r13, regs->r14);
- printk("r15: %016lx cr0: %016lx cr4: %016lx\n",
- regs->r15, crs[0], crs[4]);
+ if ( !(regs->entry_vector & TRAP_regs_partial) )
+ {
+ printk("r12: %016lx r13: %016lx r14: %016lx\n",
+ regs->r12, regs->r13, regs->r14);
+ printk("r15: %016lx cr0: %016lx cr4: %016lx\n",
+ regs->r15, crs[0], crs[4]);
+ }
+ else
+ printk("cr0: %016lx cr4: %016lx\n", crs[0], crs[4]);
printk("cr3: %016lx cr2: %016lx\n", crs[3], crs[2]);
printk("ds: %04x es: %04x fs: %04x gs: %04x "
"ss: %04x cs: %04x\n",
@@ -301,7 +306,7 @@ unsigned long do_iret(void)
if ( !(iret_saved.flags & VGCF_in_syscall) )
{
- regs->entry_vector = 0;
+ regs->entry_vector &= ~TRAP_syscall;
regs->r11 = iret_saved.r11;
regs->rcx = iret_saved.rcx;
}
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1292,10 +1292,10 @@ decode_register(
case 9: p = ®s->r9; break;
case 10: p = ®s->r10; break;
case 11: p = ®s->r11; break;
- case 12: p = ®s->r12; break;
- case 13: p = ®s->r13; break;
- case 14: p = ®s->r14; break;
- case 15: p = ®s->r15; break;
+ case 12: mark_regs_dirty(regs); p = ®s->r12; break;
+ case 13: mark_regs_dirty(regs); p = ®s->r13; break;
+ case 14: mark_regs_dirty(regs); p = ®s->r14; break;
+ case 15: mark_regs_dirty(regs); p = ®s->r15; break;
#endif
default: BUG(); p = NULL; break;
}
--- a/xen/arch/x86/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate.c
@@ -10,6 +10,7 @@
*/
#include <asm/x86_emulate.h>
+#include <asm/asm_defns.h> /* mark_regs_dirty() */
#include <asm/processor.h> /* current_cpu_info */
#include <asm/amd.h> /* cpu_has_amd_erratum() */
--- a/xen/common/wait.c
+++ b/xen/common/wait.c
@@ -124,10 +124,12 @@ void wake_up_all(struct waitqueue_head *
static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
{
- char *cpu_info = (char *)get_cpu_info();
+ struct cpu_info *cpu_info = get_cpu_info();
struct vcpu *curr = current;
unsigned long dummy;
+ u32 entry_vector = cpu_info->guest_cpu_user_regs.entry_vector;
+ cpu_info->guest_cpu_user_regs.entry_vector &= ~TRAP_regs_partial;
ASSERT(wqv->esp == 0);
/* Save current VCPU affinity; force wakeup on *this* CPU only. */
@@ -157,6 +159,8 @@ static void __prepare_to_wait(struct wai
gdprintk(XENLOG_ERR, "Stack too large in %s\n", __FUNCTION__);
domain_crash_synchronous();
}
+
+ cpu_info->guest_cpu_user_regs.entry_vector = entry_vector;
}
static void __finish_wait(struct waitqueue_vcpu *wqv)
--- a/xen/include/asm-x86/x86_64/asm_defns.h
+++ b/xen/include/asm-x86/x86_64/asm_defns.h
@@ -26,6 +26,17 @@
#define ASSERT_INTERRUPTS_ENABLED ASSERT_INTERRUPT_STATUS(nz)
#define ASSERT_INTERRUPTS_DISABLED ASSERT_INTERRUPT_STATUS(z)
+#define _TRAP_regs_partial 16
+#define _TRAP_regs_dirty 17
+#define TRAP_regs_partial (1 << _TRAP_regs_partial)
+#define TRAP_regs_dirty (1 << _TRAP_regs_dirty)
+
+#define mark_regs_dirty(r) ({ \
+ struct cpu_user_regs *r__ = (r); \
+ ASSERT(!((r__)->entry_vector & TRAP_regs_partial)); \
+ r__->entry_vector |= TRAP_regs_dirty; \
+})
+
#define SAVE_ALL \
addq $-(UREGS_error_code-UREGS_r15), %rsp; \
cld; \
@@ -47,11 +58,49 @@
movq %r15,UREGS_r15(%rsp); \
#ifdef __ASSEMBLY__
-.macro LOAD_C_CLOBBERED
+
+.macro SAVE_VOLATILE type compat=0
+.if \compat
+ movl $\type,UREGS_entry_vector-UREGS_error_code(%rsp)
+.else
+ movl $\type|TRAP_regs_partial,\
+ UREGS_entry_vector-UREGS_error_code(%rsp)
+.endif
+ addq $-(UREGS_error_code-UREGS_r15),%rsp
+ cld
+ movq %rdi,UREGS_rdi(%rsp)
+ movq %rsi,UREGS_rsi(%rsp)
+ movq %rdx,UREGS_rdx(%rsp)
+ movq %rcx,UREGS_rcx(%rsp)
+ movq %rax,UREGS_rax(%rsp)
+.if !\compat
+ movq %r8,UREGS_r8(%rsp)
+ movq %r9,UREGS_r9(%rsp)
+ movq %r10,UREGS_r10(%rsp)
+ movq %r11,UREGS_r11(%rsp)
+.endif
+ movq %rbx,UREGS_rbx(%rsp)
+ movq %rbp,UREGS_rbp(%rsp)
+ SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp)
+.endm
+
+.macro SAVE_PRESERVED
+ btrl $_TRAP_regs_partial,UREGS_entry_vector(%rsp)
+ jnc 987f
+ movq %r12,UREGS_r12(%rsp)
+ movq %r13,UREGS_r13(%rsp)
+ movq %r14,UREGS_r14(%rsp)
+ movq %r15,UREGS_r15(%rsp)
+987:
+.endm
+
+.macro LOAD_C_CLOBBERED compat=0
+.if !\compat
movq UREGS_r11(%rsp),%r11
movq UREGS_r10(%rsp),%r10
movq UREGS_r9(%rsp),%r9
movq UREGS_r8(%rsp),%r8
+.endif
movq UREGS_rax(%rsp),%rax
movq UREGS_rcx(%rsp),%rcx
movq UREGS_rdx(%rsp),%rdx
@@ -59,16 +108,38 @@
movq UREGS_rdi(%rsp),%rdi
.endm
-.macro RESTORE_ALL adj=0
+.macro RESTORE_ALL adj=0 compat=0
+.if !\compat
+ testl $TRAP_regs_dirty,UREGS_entry_vector(%rsp)
+.endif
+ LOAD_C_CLOBBERED \compat
+.if !\compat
+ jz 987f
movq UREGS_r15(%rsp),%r15
movq UREGS_r14(%rsp),%r14
movq UREGS_r13(%rsp),%r13
movq UREGS_r12(%rsp),%r12
- movq UREGS_rbp(%rsp),%rbp
+#ifndef NDEBUG
+ .subsection 1
+987: testl $TRAP_regs_partial,UREGS_entry_vector(%rsp)
+ jnz 987f
+ cmpq UREGS_r15(%rsp),%r15
+ jne 789f
+ cmpq UREGS_r14(%rsp),%r14
+ jne 789f
+ cmpq UREGS_r13(%rsp),%r13
+ jne 789f
+ cmpq UREGS_r12(%rsp),%r12
+ je 987f
+789: ud2
+ .subsection 0
+#endif
+.endif
+987: movq UREGS_rbp(%rsp),%rbp
movq UREGS_rbx(%rsp),%rbx
- LOAD_C_CLOBBERED
subq $-(UREGS_error_code-UREGS_r15+\adj), %rsp
.endm
+
#endif
#ifdef PERF_COUNTERS
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/3] x86: use MOV instead of PUSH/POP when saving/restoring register state
2012-10-02 15:26 ` [PATCH 1/3] x86: use MOV instead of PUSH/POP when saving/restoring register state Jan Beulich
@ 2012-10-02 17:01 ` Keir Fraser
2012-10-02 17:57 ` Konrad Rzeszutek Wilk
1 sibling, 0 replies; 19+ messages in thread
From: Keir Fraser @ 2012-10-02 17:01 UTC (permalink / raw)
To: Jan Beulich, xen-devel
On 02/10/2012 16:26, "Jan Beulich" <JBeulich@suse.com> wrote:
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 2/3] x86: consolidate frame state manipulation functions
2012-10-02 15:26 ` [PATCH 2/3] x86: consolidate frame state manipulation functions Jan Beulich
@ 2012-10-02 17:01 ` Keir Fraser
0 siblings, 0 replies; 19+ messages in thread
From: Keir Fraser @ 2012-10-02 17:01 UTC (permalink / raw)
To: Jan Beulich, xen-devel
On 02/10/2012 16:26, "Jan Beulich" <JBeulich@suse.com> wrote:
> Rather than doing this in multiple places, have a single central
> function (decode_register()) to be used by all other code.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 3/3] x86: save/restore only partial register state where possible
2012-10-02 15:27 ` [PATCH 3/3] x86: save/restore only partial register state where possible Jan Beulich
@ 2012-10-02 17:02 ` Keir Fraser
2012-10-03 13:05 ` Jan Beulich
0 siblings, 1 reply; 19+ messages in thread
From: Keir Fraser @ 2012-10-02 17:02 UTC (permalink / raw)
To: Jan Beulich, xen-devel
On 02/10/2012 16:27, "Jan Beulich" <JBeulich@suse.com> wrote:
> ... and make restore conditional not only upon having saved the state,
> but also upon whether saved state was actually modified (and register
> values are known to have been preserved).
>
> Note that RBP is unconditionally considered a volatile register (i.e.
> irrespective of CONFIG_FRAME_POINTER), since the RBP handling would
> become overly complicated due to the need to save/restore it on the
> compat mode hypercall path [6th argument].
>
> Note further that for compat mode code paths, saving/restoring R8...R15
> is entirely unnecessary - we don't allow those guests to enter 64-bit
> mode, and hence they have no way of seeing these registers' contents
> (and there consequently also is no information leak, except if the
> context saving domctl would be considered such).
>
> Finally, note that this may not properly deal with gdbstub's needs, yet
> (but if so, I can't really suggest adjustments, as I don't know that
> code).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Ugly. I'd prefer not to bother unless there really is a win we could care
about here.
-- Keir
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/3] x86: use MOV instead of PUSH/POP when saving/restoring register state
2012-10-02 15:26 ` [PATCH 1/3] x86: use MOV instead of PUSH/POP when saving/restoring register state Jan Beulich
2012-10-02 17:01 ` Keir Fraser
@ 2012-10-02 17:57 ` Konrad Rzeszutek Wilk
2012-10-03 13:01 ` Jan Beulich
1 sibling, 1 reply; 19+ messages in thread
From: Konrad Rzeszutek Wilk @ 2012-10-02 17:57 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel
On Tue, Oct 02, 2012 at 04:26:23PM +0100, Jan Beulich wrote:
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
What's the reasoning behind it? Just that mov's are faster than
pop/push?
>
> --- a/xen/arch/x86/x86_64/compat/entry.S
> +++ b/xen/arch/x86/x86_64/compat/entry.S
> @@ -21,8 +21,7 @@ ENTRY(compat_hypercall)
> UNLIKELY_START(ne, msi_check)
> movl $HYPERCALL_VECTOR,%edi
> call check_for_unexpected_msi
> - RESTORE_ALL
> - SAVE_ALL
> + LOAD_C_CLOBBERED
> UNLIKELY_END(msi_check)
>
> GET_CURRENT(%rbx)
> @@ -173,8 +172,7 @@ compat_bad_hypercall:
> /* %rbx: struct vcpu, interrupts disabled */
> compat_restore_all_guest:
> ASSERT_INTERRUPTS_DISABLED
> - RESTORE_ALL
> - addq $8,%rsp
> + RESTORE_ALL adj=8
> .Lft0: iretq
>
> .section .fixup,"ax"
> --- a/xen/arch/x86/x86_64/entry.S
> +++ b/xen/arch/x86/x86_64/entry.S
> @@ -47,12 +47,10 @@ restore_all_guest:
> cmpl $1,%ecx
> ja .Lforce_iret
>
> - addq $8,%rsp
> - popq %rcx # RIP
> - popq %r11 # CS
> - cmpw $FLAT_USER_CS32,%r11
> - popq %r11 # RFLAGS
> - popq %rsp # RSP
> + cmpw $FLAT_USER_CS32,16(%rsp)# CS
> + movq 8(%rsp),%rcx # RIP
> + movq 24(%rsp),%r11 # RFLAGS
> + movq 32(%rsp),%rsp # RSP
> je 1f
> sysretq
> 1: sysretl
> @@ -101,8 +99,7 @@ failsafe_callback:
> ALIGN
> /* No special register assumptions. */
> restore_all_xen:
> - RESTORE_ALL
> - addq $8,%rsp
> + RESTORE_ALL adj=8
> iretq
>
> /*
> @@ -311,8 +308,7 @@ ENTRY(int80_direct_trap)
> UNLIKELY_START(ne, msi_check)
> movl $0x80,%edi
> call check_for_unexpected_msi
> - RESTORE_ALL
> - SAVE_ALL
> + LOAD_C_CLOBBERED
> UNLIKELY_END(msi_check)
>
> GET_CURRENT(%rbx)
> --- a/xen/include/asm-x86/x86_64/asm_defns.h
> +++ b/xen/include/asm-x86/x86_64/asm_defns.h
> @@ -5,11 +5,11 @@
>
> #ifdef CONFIG_FRAME_POINTER
> /* Indicate special exception stack frame by inverting the frame pointer. */
> -#define SETUP_EXCEPTION_FRAME_POINTER \
> - movq %rsp,%rbp; \
> +#define SETUP_EXCEPTION_FRAME_POINTER(offs) \
> + leaq offs(%rsp),%rbp; \
> notq %rbp
> #else
> -#define SETUP_EXCEPTION_FRAME_POINTER
> +#define SETUP_EXCEPTION_FRAME_POINTER(off)
> #endif
>
> #ifndef NDEBUG
> @@ -27,40 +27,49 @@
> #define ASSERT_INTERRUPTS_DISABLED ASSERT_INTERRUPT_STATUS(z)
>
> #define SAVE_ALL \
> + addq $-(UREGS_error_code-UREGS_r15), %rsp; \
> cld; \
> - pushq %rdi; \
> - pushq %rsi; \
> - pushq %rdx; \
> - pushq %rcx; \
> - pushq %rax; \
> - pushq %r8; \
> - pushq %r9; \
> - pushq %r10; \
> - pushq %r11; \
> - pushq %rbx; \
> - pushq %rbp; \
> - SETUP_EXCEPTION_FRAME_POINTER; \
> - pushq %r12; \
> - pushq %r13; \
> - pushq %r14; \
> - pushq %r15;
> -
> -#define RESTORE_ALL \
> - popq %r15; \
> - popq %r14; \
> - popq %r13; \
> - popq %r12; \
> - popq %rbp; \
> - popq %rbx; \
> - popq %r11; \
> - popq %r10; \
> - popq %r9; \
> - popq %r8; \
> - popq %rax; \
> - popq %rcx; \
> - popq %rdx; \
> - popq %rsi; \
> - popq %rdi;
> + movq %rdi,UREGS_rdi(%rsp); \
> + movq %rsi,UREGS_rsi(%rsp); \
> + movq %rdx,UREGS_rdx(%rsp); \
> + movq %rcx,UREGS_rcx(%rsp); \
> + movq %rax,UREGS_rax(%rsp); \
> + movq %r8,UREGS_r8(%rsp); \
> + movq %r9,UREGS_r9(%rsp); \
> + movq %r10,UREGS_r10(%rsp); \
> + movq %r11,UREGS_r11(%rsp); \
> + movq %rbx,UREGS_rbx(%rsp); \
> + movq %rbp,UREGS_rbp(%rsp); \
> + SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp); \
> + movq %r12,UREGS_r12(%rsp); \
> + movq %r13,UREGS_r13(%rsp); \
> + movq %r14,UREGS_r14(%rsp); \
> + movq %r15,UREGS_r15(%rsp); \
> +
> +#ifdef __ASSEMBLY__
> +.macro LOAD_C_CLOBBERED
> + movq UREGS_r11(%rsp),%r11
> + movq UREGS_r10(%rsp),%r10
> + movq UREGS_r9(%rsp),%r9
> + movq UREGS_r8(%rsp),%r8
> + movq UREGS_rax(%rsp),%rax
> + movq UREGS_rcx(%rsp),%rcx
> + movq UREGS_rdx(%rsp),%rdx
> + movq UREGS_rsi(%rsp),%rsi
> + movq UREGS_rdi(%rsp),%rdi
> +.endm
> +
> +.macro RESTORE_ALL adj=0
> + movq UREGS_r15(%rsp),%r15
> + movq UREGS_r14(%rsp),%r14
> + movq UREGS_r13(%rsp),%r13
> + movq UREGS_r12(%rsp),%r12
> + movq UREGS_rbp(%rsp),%rbp
> + movq UREGS_rbx(%rsp),%rbx
> + LOAD_C_CLOBBERED
> + subq $-(UREGS_error_code-UREGS_r15+\adj), %rsp
> +.endm
> +#endif
>
> #ifdef PERF_COUNTERS
> #define PERFC_INCR(_name,_idx,_cur) \
> @@ -94,7 +103,7 @@
> __asm__( \
> "\n" __ALIGN_STR"\n" \
> "common_interrupt:\n\t" \
> - STR(SAVE_ALL) \
> + STR(SAVE_ALL) "\n\t" \
> "movq %rsp,%rdi\n\t" \
> "callq " STR(do_IRQ) "\n\t" \
> "jmp ret_from_intr\n");
>
>
> x86: use MOV instead of PUSH/POP when saving/restoring register state
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/x86_64/compat/entry.S
> +++ b/xen/arch/x86/x86_64/compat/entry.S
> @@ -21,8 +21,7 @@ ENTRY(compat_hypercall)
> UNLIKELY_START(ne, msi_check)
> movl $HYPERCALL_VECTOR,%edi
> call check_for_unexpected_msi
> - RESTORE_ALL
> - SAVE_ALL
> + LOAD_C_CLOBBERED
> UNLIKELY_END(msi_check)
>
> GET_CURRENT(%rbx)
> @@ -173,8 +172,7 @@ compat_bad_hypercall:
> /* %rbx: struct vcpu, interrupts disabled */
> compat_restore_all_guest:
> ASSERT_INTERRUPTS_DISABLED
> - RESTORE_ALL
> - addq $8,%rsp
> + RESTORE_ALL adj=8
> .Lft0: iretq
>
> .section .fixup,"ax"
> --- a/xen/arch/x86/x86_64/entry.S
> +++ b/xen/arch/x86/x86_64/entry.S
> @@ -47,12 +47,10 @@ restore_all_guest:
> cmpl $1,%ecx
> ja .Lforce_iret
>
> - addq $8,%rsp
> - popq %rcx # RIP
> - popq %r11 # CS
> - cmpw $FLAT_USER_CS32,%r11
> - popq %r11 # RFLAGS
> - popq %rsp # RSP
> + cmpw $FLAT_USER_CS32,16(%rsp)# CS
> + movq 8(%rsp),%rcx # RIP
> + movq 24(%rsp),%r11 # RFLAGS
> + movq 32(%rsp),%rsp # RSP
> je 1f
> sysretq
> 1: sysretl
> @@ -101,8 +99,7 @@ failsafe_callback:
> ALIGN
> /* No special register assumptions. */
> restore_all_xen:
> - RESTORE_ALL
> - addq $8,%rsp
> + RESTORE_ALL adj=8
> iretq
>
> /*
> @@ -311,8 +308,7 @@ ENTRY(int80_direct_trap)
> UNLIKELY_START(ne, msi_check)
> movl $0x80,%edi
> call check_for_unexpected_msi
> - RESTORE_ALL
> - SAVE_ALL
> + LOAD_C_CLOBBERED
> UNLIKELY_END(msi_check)
>
> GET_CURRENT(%rbx)
> --- a/xen/include/asm-x86/x86_64/asm_defns.h
> +++ b/xen/include/asm-x86/x86_64/asm_defns.h
> @@ -5,11 +5,11 @@
>
> #ifdef CONFIG_FRAME_POINTER
> /* Indicate special exception stack frame by inverting the frame pointer. */
> -#define SETUP_EXCEPTION_FRAME_POINTER \
> - movq %rsp,%rbp; \
> +#define SETUP_EXCEPTION_FRAME_POINTER(offs) \
> + leaq offs(%rsp),%rbp; \
> notq %rbp
> #else
> -#define SETUP_EXCEPTION_FRAME_POINTER
> +#define SETUP_EXCEPTION_FRAME_POINTER(off)
> #endif
>
> #ifndef NDEBUG
> @@ -27,40 +27,49 @@
> #define ASSERT_INTERRUPTS_DISABLED ASSERT_INTERRUPT_STATUS(z)
>
> #define SAVE_ALL \
> + addq $-(UREGS_error_code-UREGS_r15), %rsp; \
> cld; \
> - pushq %rdi; \
> - pushq %rsi; \
> - pushq %rdx; \
> - pushq %rcx; \
> - pushq %rax; \
> - pushq %r8; \
> - pushq %r9; \
> - pushq %r10; \
> - pushq %r11; \
> - pushq %rbx; \
> - pushq %rbp; \
> - SETUP_EXCEPTION_FRAME_POINTER; \
> - pushq %r12; \
> - pushq %r13; \
> - pushq %r14; \
> - pushq %r15;
> -
> -#define RESTORE_ALL \
> - popq %r15; \
> - popq %r14; \
> - popq %r13; \
> - popq %r12; \
> - popq %rbp; \
> - popq %rbx; \
> - popq %r11; \
> - popq %r10; \
> - popq %r9; \
> - popq %r8; \
> - popq %rax; \
> - popq %rcx; \
> - popq %rdx; \
> - popq %rsi; \
> - popq %rdi;
> + movq %rdi,UREGS_rdi(%rsp); \
> + movq %rsi,UREGS_rsi(%rsp); \
> + movq %rdx,UREGS_rdx(%rsp); \
> + movq %rcx,UREGS_rcx(%rsp); \
> + movq %rax,UREGS_rax(%rsp); \
> + movq %r8,UREGS_r8(%rsp); \
> + movq %r9,UREGS_r9(%rsp); \
> + movq %r10,UREGS_r10(%rsp); \
> + movq %r11,UREGS_r11(%rsp); \
> + movq %rbx,UREGS_rbx(%rsp); \
> + movq %rbp,UREGS_rbp(%rsp); \
> + SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp); \
> + movq %r12,UREGS_r12(%rsp); \
> + movq %r13,UREGS_r13(%rsp); \
> + movq %r14,UREGS_r14(%rsp); \
> + movq %r15,UREGS_r15(%rsp); \
> +
> +#ifdef __ASSEMBLY__
> +.macro LOAD_C_CLOBBERED
> + movq UREGS_r11(%rsp),%r11
> + movq UREGS_r10(%rsp),%r10
> + movq UREGS_r9(%rsp),%r9
> + movq UREGS_r8(%rsp),%r8
> + movq UREGS_rax(%rsp),%rax
> + movq UREGS_rcx(%rsp),%rcx
> + movq UREGS_rdx(%rsp),%rdx
> + movq UREGS_rsi(%rsp),%rsi
> + movq UREGS_rdi(%rsp),%rdi
> +.endm
> +
> +.macro RESTORE_ALL adj=0
> + movq UREGS_r15(%rsp),%r15
> + movq UREGS_r14(%rsp),%r14
> + movq UREGS_r13(%rsp),%r13
> + movq UREGS_r12(%rsp),%r12
> + movq UREGS_rbp(%rsp),%rbp
> + movq UREGS_rbx(%rsp),%rbx
> + LOAD_C_CLOBBERED
> + subq $-(UREGS_error_code-UREGS_r15+\adj), %rsp
> +.endm
> +#endif
>
> #ifdef PERF_COUNTERS
> #define PERFC_INCR(_name,_idx,_cur) \
> @@ -94,7 +103,7 @@
> __asm__( \
> "\n" __ALIGN_STR"\n" \
> "common_interrupt:\n\t" \
> - STR(SAVE_ALL) \
> + STR(SAVE_ALL) "\n\t" \
> "movq %rsp,%rdi\n\t" \
> "callq " STR(do_IRQ) "\n\t" \
> "jmp ret_from_intr\n");
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/3] x86: use MOV instead of PUSH/POP when saving/restoring register state
2012-10-02 17:57 ` Konrad Rzeszutek Wilk
@ 2012-10-03 13:01 ` Jan Beulich
0 siblings, 0 replies; 19+ messages in thread
From: Jan Beulich @ 2012-10-03 13:01 UTC (permalink / raw)
To: konrad; +Cc: xen-devel
>>> Konrad Rzeszutek Wilk <konrad@kernel.org> 10/02/12 8:09 PM >>>
>On Tue, Oct 02, 2012 at 04:26:23PM +0100, Jan Beulich wrote:
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>What's the reasoning behind it? Just that mov's are faster than
>pop/push?
No (that's not actually true anymore these days). The main reason is that with
MOV, part of the frame generation and register restoration can be more easily
skipped than when using PUSH/POP (would require extra stack pointer
adjustments on the alternative paths) - see patch 3.
Jan
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 3/3] x86: save/restore only partial register state where possible
2012-10-02 17:02 ` Keir Fraser
@ 2012-10-03 13:05 ` Jan Beulich
2012-10-03 14:35 ` Keir Fraser
0 siblings, 1 reply; 19+ messages in thread
From: Jan Beulich @ 2012-10-03 13:05 UTC (permalink / raw)
To: keir; +Cc: xen-devel
>>> Keir Fraser <keir@xen.org> 10/02/12 7:02 PM >>>
>On 02/10/2012 16:27, "Jan Beulich" <JBeulich@suse.com> wrote:
>
>> ... and make restore conditional not only upon having saved the state,
>> but also upon whether saved state was actually modified (and register
>> values are known to have been preserved).
>>
>> Note that RBP is unconditionally considered a volatile register (i.e.
>> irrespective of CONFIG_FRAME_POINTER), since the RBP handling would
>> become overly complicated due to the need to save/restore it on the
>> compat mode hypercall path [6th argument].
>>
>> Note further that for compat mode code paths, saving/restoring R8...R15
>> is entirely unnecessary - we don't allow those guests to enter 64-bit
>> mode, and hence they have no way of seeing these registers' contents
>> (and there consequently also is no information leak, except if the
>> context saving domctl would be considered such).
>>
>> Finally, note that this may not properly deal with gdbstub's needs, yet
>> (but if so, I can't really suggest adjustments, as I don't know that
>> code).
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
>Ugly. I'd prefer not to bother unless there really is a win we could care
>about here.
Without this patch, patch 1 doesn't make a lot of sense either (and patch 2 then
is merely cleanup).
Jan
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 3/3] x86: save/restore only partial register state where possible
2012-10-03 13:05 ` Jan Beulich
@ 2012-10-03 14:35 ` Keir Fraser
2012-10-30 14:26 ` Jan Beulich
0 siblings, 1 reply; 19+ messages in thread
From: Keir Fraser @ 2012-10-03 14:35 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel
On 03/10/2012 14:05, "Jan Beulich" <jbeulich@suse.com> wrote:
>>>> Keir Fraser <keir@xen.org> 10/02/12 7:02 PM >>>
>> On 02/10/2012 16:27, "Jan Beulich" <JBeulich@suse.com> wrote:
>>
>>> ... and make restore conditional not only upon having saved the state,
>>> but also upon whether saved state was actually modified (and register
>>> values are known to have been preserved).
>>>
>>> Note that RBP is unconditionally considered a volatile register (i.e.
>>> irrespective of CONFIG_FRAME_POINTER), since the RBP handling would
>>> become overly complicated due to the need to save/restore it on the
>>> compat mode hypercall path [6th argument].
>>>
>>> Note further that for compat mode code paths, saving/restoring R8...R15
>>> is entirely unnecessary - we don't allow those guests to enter 64-bit
>>> mode, and hence they have no way of seeing these registers' contents
>>> (and there consequently also is no information leak, except if the
>>> context saving domctl would be considered such).
>>>
>>> Finally, note that this may not properly deal with gdbstub's needs, yet
>>> (but if so, I can't really suggest adjustments, as I don't know that
>>> code).
>>>
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>
>> Ugly. I'd prefer not to bother unless there really is a win we could care
>> about here.
>
> Without this patch, patch 1 doesn't make a lot of sense either (and patch 2
> then is merely cleanup).
Okay, can you re-submit with some comments around what the new TRAP flag
values mean and how they should be used? Maybe some comments for the
save/restore macros, and their new arguments, would be nice too. And are you
confident that this approach is maintainable/non-fragile (i.e., we're not
going to be continually finding rare cases where registers are being dirtied
but not saved/restored)?
-- Keir
> Jan
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH 0/2, v2] x86: adjust entry frame generation
2012-10-02 15:20 [PATCH 0/3] x86: adjust entry frame generation Jan Beulich
` (2 preceding siblings ...)
2012-10-02 15:27 ` [PATCH 3/3] x86: save/restore only partial register state where possible Jan Beulich
@ 2012-10-30 14:20 ` Jan Beulich
2012-10-30 14:27 ` [PATCH 1/2, v2] x86: use MOV instead of PUSH/POP when saving/restoring register state Jan Beulich
` (2 more replies)
3 siblings, 3 replies; 19+ messages in thread
From: Jan Beulich @ 2012-10-30 14:20 UTC (permalink / raw)
To: xen-devel
>>> On 02.10.12 at 17:20, "Jan Beulich" <JBeulich@suse.com> wrote:
This pair of patches converts the way frames gets created from
using PUSHes/POPs to using MOVes, thus allowing (in certain
cases) to avoid saving/restoring part of the register set.
While the exact place where the (small) win from this comes from
varies between CPUs, the net effect is a 1 to 2% reduction on a
combined interruption entry and exit when the full state save
can be avoided.
1: use MOV instead of PUSH/POP when saving/restoring register state
2: save only partial register state where possible
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
NB: Patch 1 didn't change from its v1 submission, and what was
originally patch 2 (of 3) did already get committed.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 3/3] x86: save/restore only partial register state where possible
2012-10-03 14:35 ` Keir Fraser
@ 2012-10-30 14:26 ` Jan Beulich
0 siblings, 0 replies; 19+ messages in thread
From: Jan Beulich @ 2012-10-30 14:26 UTC (permalink / raw)
To: Keir Fraser; +Cc: xen-devel
>>> On 03.10.12 at 16:35, Keir Fraser <keir.xen@gmail.com> wrote:
> On 03/10/2012 14:05, "Jan Beulich" <jbeulich@suse.com> wrote:
>
>>>>> Keir Fraser <keir@xen.org> 10/02/12 7:02 PM >>>
>>> On 02/10/2012 16:27, "Jan Beulich" <JBeulich@suse.com> wrote:
>>>
>>>> ... and make restore conditional not only upon having saved the state,
>>>> but also upon whether saved state was actually modified (and register
>>>> values are known to have been preserved).
>>>>
>>>> Note that RBP is unconditionally considered a volatile register (i.e.
>>>> irrespective of CONFIG_FRAME_POINTER), since the RBP handling would
>>>> become overly complicated due to the need to save/restore it on the
>>>> compat mode hypercall path [6th argument].
>>>>
>>>> Note further that for compat mode code paths, saving/restoring R8...R15
>>>> is entirely unnecessary - we don't allow those guests to enter 64-bit
>>>> mode, and hence they have no way of seeing these registers' contents
>>>> (and there consequently also is no information leak, except if the
>>>> context saving domctl would be considered such).
>>>>
>>>> Finally, note that this may not properly deal with gdbstub's needs, yet
>>>> (but if so, I can't really suggest adjustments, as I don't know that
>>>> code).
>>>>
>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>>
>>> Ugly. I'd prefer not to bother unless there really is a win we could care
>>> about here.
>>
>> Without this patch, patch 1 doesn't make a lot of sense either (and patch 2
>> then is merely cleanup).
>
> Okay, can you re-submit with some comments around what the new TRAP flag
> values mean and how they should be used? Maybe some comments for the
> save/restore macros, and their new arguments, would be nice too. And are you
> confident that this approach is maintainable/non-fragile (i.e., we're not
> going to be continually finding rare cases where registers are being dirtied
> but not saved/restored)?
To answer this last question of yours - I think we're safe, not the
least because no optimization is currently being done to the
(more fragile?) HVM entry points (and the TRAP_regs_* flags
are chosen so that by default any frame that gets created without
knowledge of these flags will be in proper state - "partial" and
"dirty" off (and "dirty" being set on exit is meaningless to code
restoring all registers anyway).
But as always - should it turn out I'm wrong with that assessment,
we should favor reverting these two patches over trying to fix
them.
Jan
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH 1/2, v2] x86: use MOV instead of PUSH/POP when saving/restoring register state
2012-10-30 14:20 ` [PATCH 0/2, v2] x86: adjust entry frame generation Jan Beulich
@ 2012-10-30 14:27 ` Jan Beulich
2012-10-30 15:19 ` Mats Petersson
2012-10-30 14:29 ` [PATCH 2/2, v2] x86: save/restore only partial register state where possible Jan Beulich
2012-10-30 14:35 ` [PATCH 0/2, v2] x86: adjust entry frame generation Keir Fraser
2 siblings, 1 reply; 19+ messages in thread
From: Jan Beulich @ 2012-10-30 14:27 UTC (permalink / raw)
To: xen-devel
[-- Attachment #1: Type: text/plain, Size: 6418 bytes --]
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
---
This patch did not change from its v1 submission.
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -21,8 +21,7 @@ ENTRY(compat_hypercall)
UNLIKELY_START(ne, msi_check)
movl $HYPERCALL_VECTOR,%edi
call check_for_unexpected_msi
- RESTORE_ALL
- SAVE_ALL
+ LOAD_C_CLOBBERED
UNLIKELY_END(msi_check)
GET_CURRENT(%rbx)
@@ -173,8 +172,7 @@ compat_bad_hypercall:
/* %rbx: struct vcpu, interrupts disabled */
compat_restore_all_guest:
ASSERT_INTERRUPTS_DISABLED
- RESTORE_ALL
- addq $8,%rsp
+ RESTORE_ALL adj=8
.Lft0: iretq
.section .fixup,"ax"
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -47,12 +47,10 @@ restore_all_guest:
cmpl $1,%ecx
ja .Lforce_iret
- addq $8,%rsp
- popq %rcx # RIP
- popq %r11 # CS
- cmpw $FLAT_USER_CS32,%r11
- popq %r11 # RFLAGS
- popq %rsp # RSP
+ cmpw $FLAT_USER_CS32,16(%rsp)# CS
+ movq 8(%rsp),%rcx # RIP
+ movq 24(%rsp),%r11 # RFLAGS
+ movq 32(%rsp),%rsp # RSP
je 1f
sysretq
1: sysretl
@@ -101,8 +99,7 @@ failsafe_callback:
ALIGN
/* No special register assumptions. */
restore_all_xen:
- RESTORE_ALL
- addq $8,%rsp
+ RESTORE_ALL adj=8
iretq
/*
@@ -311,8 +308,7 @@ ENTRY(int80_direct_trap)
UNLIKELY_START(ne, msi_check)
movl $0x80,%edi
call check_for_unexpected_msi
- RESTORE_ALL
- SAVE_ALL
+ LOAD_C_CLOBBERED
UNLIKELY_END(msi_check)
GET_CURRENT(%rbx)
--- a/xen/include/asm-x86/x86_64/asm_defns.h
+++ b/xen/include/asm-x86/x86_64/asm_defns.h
@@ -5,11 +5,11 @@
#ifdef CONFIG_FRAME_POINTER
/* Indicate special exception stack frame by inverting the frame pointer. */
-#define SETUP_EXCEPTION_FRAME_POINTER \
- movq %rsp,%rbp; \
+#define SETUP_EXCEPTION_FRAME_POINTER(offs) \
+ leaq offs(%rsp),%rbp; \
notq %rbp
#else
-#define SETUP_EXCEPTION_FRAME_POINTER
+#define SETUP_EXCEPTION_FRAME_POINTER(off)
#endif
#ifndef NDEBUG
@@ -27,40 +27,49 @@
#define ASSERT_INTERRUPTS_DISABLED ASSERT_INTERRUPT_STATUS(z)
#define SAVE_ALL \
+ addq $-(UREGS_error_code-UREGS_r15), %rsp; \
cld; \
- pushq %rdi; \
- pushq %rsi; \
- pushq %rdx; \
- pushq %rcx; \
- pushq %rax; \
- pushq %r8; \
- pushq %r9; \
- pushq %r10; \
- pushq %r11; \
- pushq %rbx; \
- pushq %rbp; \
- SETUP_EXCEPTION_FRAME_POINTER; \
- pushq %r12; \
- pushq %r13; \
- pushq %r14; \
- pushq %r15;
-
-#define RESTORE_ALL \
- popq %r15; \
- popq %r14; \
- popq %r13; \
- popq %r12; \
- popq %rbp; \
- popq %rbx; \
- popq %r11; \
- popq %r10; \
- popq %r9; \
- popq %r8; \
- popq %rax; \
- popq %rcx; \
- popq %rdx; \
- popq %rsi; \
- popq %rdi;
+ movq %rdi,UREGS_rdi(%rsp); \
+ movq %rsi,UREGS_rsi(%rsp); \
+ movq %rdx,UREGS_rdx(%rsp); \
+ movq %rcx,UREGS_rcx(%rsp); \
+ movq %rax,UREGS_rax(%rsp); \
+ movq %r8,UREGS_r8(%rsp); \
+ movq %r9,UREGS_r9(%rsp); \
+ movq %r10,UREGS_r10(%rsp); \
+ movq %r11,UREGS_r11(%rsp); \
+ movq %rbx,UREGS_rbx(%rsp); \
+ movq %rbp,UREGS_rbp(%rsp); \
+ SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp); \
+ movq %r12,UREGS_r12(%rsp); \
+ movq %r13,UREGS_r13(%rsp); \
+ movq %r14,UREGS_r14(%rsp); \
+ movq %r15,UREGS_r15(%rsp); \
+
+#ifdef __ASSEMBLY__
+.macro LOAD_C_CLOBBERED
+ movq UREGS_r11(%rsp),%r11
+ movq UREGS_r10(%rsp),%r10
+ movq UREGS_r9(%rsp),%r9
+ movq UREGS_r8(%rsp),%r8
+ movq UREGS_rax(%rsp),%rax
+ movq UREGS_rcx(%rsp),%rcx
+ movq UREGS_rdx(%rsp),%rdx
+ movq UREGS_rsi(%rsp),%rsi
+ movq UREGS_rdi(%rsp),%rdi
+.endm
+
+.macro RESTORE_ALL adj=0
+ movq UREGS_r15(%rsp),%r15
+ movq UREGS_r14(%rsp),%r14
+ movq UREGS_r13(%rsp),%r13
+ movq UREGS_r12(%rsp),%r12
+ movq UREGS_rbp(%rsp),%rbp
+ movq UREGS_rbx(%rsp),%rbx
+ LOAD_C_CLOBBERED
+ subq $-(UREGS_error_code-UREGS_r15+\adj), %rsp
+.endm
+#endif
#ifdef PERF_COUNTERS
#define PERFC_INCR(_name,_idx,_cur) \
@@ -94,7 +103,7 @@
__asm__( \
"\n" __ALIGN_STR"\n" \
"common_interrupt:\n\t" \
- STR(SAVE_ALL) \
+ STR(SAVE_ALL) "\n\t" \
"movq %rsp,%rdi\n\t" \
"callq " STR(do_IRQ) "\n\t" \
"jmp ret_from_intr\n");
[-- Attachment #2: x86-save-restore-all-mov.patch --]
[-- Type: text/plain, Size: 6487 bytes --]
x86: use MOV instead of PUSH/POP when saving/restoring register state
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
---
This patch did not change from its v1 submission.
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -21,8 +21,7 @@ ENTRY(compat_hypercall)
UNLIKELY_START(ne, msi_check)
movl $HYPERCALL_VECTOR,%edi
call check_for_unexpected_msi
- RESTORE_ALL
- SAVE_ALL
+ LOAD_C_CLOBBERED
UNLIKELY_END(msi_check)
GET_CURRENT(%rbx)
@@ -173,8 +172,7 @@ compat_bad_hypercall:
/* %rbx: struct vcpu, interrupts disabled */
compat_restore_all_guest:
ASSERT_INTERRUPTS_DISABLED
- RESTORE_ALL
- addq $8,%rsp
+ RESTORE_ALL adj=8
.Lft0: iretq
.section .fixup,"ax"
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -47,12 +47,10 @@ restore_all_guest:
cmpl $1,%ecx
ja .Lforce_iret
- addq $8,%rsp
- popq %rcx # RIP
- popq %r11 # CS
- cmpw $FLAT_USER_CS32,%r11
- popq %r11 # RFLAGS
- popq %rsp # RSP
+ cmpw $FLAT_USER_CS32,16(%rsp)# CS
+ movq 8(%rsp),%rcx # RIP
+ movq 24(%rsp),%r11 # RFLAGS
+ movq 32(%rsp),%rsp # RSP
je 1f
sysretq
1: sysretl
@@ -101,8 +99,7 @@ failsafe_callback:
ALIGN
/* No special register assumptions. */
restore_all_xen:
- RESTORE_ALL
- addq $8,%rsp
+ RESTORE_ALL adj=8
iretq
/*
@@ -311,8 +308,7 @@ ENTRY(int80_direct_trap)
UNLIKELY_START(ne, msi_check)
movl $0x80,%edi
call check_for_unexpected_msi
- RESTORE_ALL
- SAVE_ALL
+ LOAD_C_CLOBBERED
UNLIKELY_END(msi_check)
GET_CURRENT(%rbx)
--- a/xen/include/asm-x86/x86_64/asm_defns.h
+++ b/xen/include/asm-x86/x86_64/asm_defns.h
@@ -5,11 +5,11 @@
#ifdef CONFIG_FRAME_POINTER
/* Indicate special exception stack frame by inverting the frame pointer. */
-#define SETUP_EXCEPTION_FRAME_POINTER \
- movq %rsp,%rbp; \
+#define SETUP_EXCEPTION_FRAME_POINTER(offs) \
+ leaq offs(%rsp),%rbp; \
notq %rbp
#else
-#define SETUP_EXCEPTION_FRAME_POINTER
+#define SETUP_EXCEPTION_FRAME_POINTER(off)
#endif
#ifndef NDEBUG
@@ -27,40 +27,49 @@
#define ASSERT_INTERRUPTS_DISABLED ASSERT_INTERRUPT_STATUS(z)
#define SAVE_ALL \
+ addq $-(UREGS_error_code-UREGS_r15), %rsp; \
cld; \
- pushq %rdi; \
- pushq %rsi; \
- pushq %rdx; \
- pushq %rcx; \
- pushq %rax; \
- pushq %r8; \
- pushq %r9; \
- pushq %r10; \
- pushq %r11; \
- pushq %rbx; \
- pushq %rbp; \
- SETUP_EXCEPTION_FRAME_POINTER; \
- pushq %r12; \
- pushq %r13; \
- pushq %r14; \
- pushq %r15;
-
-#define RESTORE_ALL \
- popq %r15; \
- popq %r14; \
- popq %r13; \
- popq %r12; \
- popq %rbp; \
- popq %rbx; \
- popq %r11; \
- popq %r10; \
- popq %r9; \
- popq %r8; \
- popq %rax; \
- popq %rcx; \
- popq %rdx; \
- popq %rsi; \
- popq %rdi;
+ movq %rdi,UREGS_rdi(%rsp); \
+ movq %rsi,UREGS_rsi(%rsp); \
+ movq %rdx,UREGS_rdx(%rsp); \
+ movq %rcx,UREGS_rcx(%rsp); \
+ movq %rax,UREGS_rax(%rsp); \
+ movq %r8,UREGS_r8(%rsp); \
+ movq %r9,UREGS_r9(%rsp); \
+ movq %r10,UREGS_r10(%rsp); \
+ movq %r11,UREGS_r11(%rsp); \
+ movq %rbx,UREGS_rbx(%rsp); \
+ movq %rbp,UREGS_rbp(%rsp); \
+ SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp); \
+ movq %r12,UREGS_r12(%rsp); \
+ movq %r13,UREGS_r13(%rsp); \
+ movq %r14,UREGS_r14(%rsp); \
+ movq %r15,UREGS_r15(%rsp); \
+
+#ifdef __ASSEMBLY__
+.macro LOAD_C_CLOBBERED
+ movq UREGS_r11(%rsp),%r11
+ movq UREGS_r10(%rsp),%r10
+ movq UREGS_r9(%rsp),%r9
+ movq UREGS_r8(%rsp),%r8
+ movq UREGS_rax(%rsp),%rax
+ movq UREGS_rcx(%rsp),%rcx
+ movq UREGS_rdx(%rsp),%rdx
+ movq UREGS_rsi(%rsp),%rsi
+ movq UREGS_rdi(%rsp),%rdi
+.endm
+
+.macro RESTORE_ALL adj=0
+ movq UREGS_r15(%rsp),%r15
+ movq UREGS_r14(%rsp),%r14
+ movq UREGS_r13(%rsp),%r13
+ movq UREGS_r12(%rsp),%r12
+ movq UREGS_rbp(%rsp),%rbp
+ movq UREGS_rbx(%rsp),%rbx
+ LOAD_C_CLOBBERED
+ subq $-(UREGS_error_code-UREGS_r15+\adj), %rsp
+.endm
+#endif
#ifdef PERF_COUNTERS
#define PERFC_INCR(_name,_idx,_cur) \
@@ -94,7 +103,7 @@
__asm__( \
"\n" __ALIGN_STR"\n" \
"common_interrupt:\n\t" \
- STR(SAVE_ALL) \
+ STR(SAVE_ALL) "\n\t" \
"movq %rsp,%rdi\n\t" \
"callq " STR(do_IRQ) "\n\t" \
"jmp ret_from_intr\n");
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH 2/2, v2] x86: save/restore only partial register state where possible
2012-10-30 14:20 ` [PATCH 0/2, v2] x86: adjust entry frame generation Jan Beulich
2012-10-30 14:27 ` [PATCH 1/2, v2] x86: use MOV instead of PUSH/POP when saving/restoring register state Jan Beulich
@ 2012-10-30 14:29 ` Jan Beulich
2012-10-30 14:35 ` [PATCH 0/2, v2] x86: adjust entry frame generation Keir Fraser
2 siblings, 0 replies; 19+ messages in thread
From: Jan Beulich @ 2012-10-30 14:29 UTC (permalink / raw)
To: xen-devel
[-- Attachment #1: Type: text/plain, Size: 17234 bytes --]
... and make restore conditional not only upon having saved the state,
but also upon whether saved state was actually modified (and register
values are known to have been preserved).
Note that RBP is unconditionally considered a volatile register (i.e.
irrespective of CONFIG_FRAME_POINTER), since the RBP handling would
become overly complicated due to the need to save/restore it on the
compat mode hypercall path [6th argument].
Note further that for compat mode code paths, saving/restoring R8...R15
is entirely unnecessary - we don't allow those guests to enter 64-bit
mode, and hence they have no way of seeing these registers' contents
(and there consequently also is no information leak, except if the
context saving domctl would be considered such).
Finally, note that this may not properly deal with gdbstub's needs, yet
(but if so, I can't really suggest adjustments, as I don't know that
code).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Add a few comments.
This patch functionally depends on x86-drop-get_x86_gpr.patch.
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -10,6 +10,7 @@ typedef bool bool_t;
#define BUG() abort()
#define cpu_has_amd_erratum(nr) 0
+#define mark_regs_dirty(r) ((void)(r))
#include "x86_emulate/x86_emulate.h"
#include "x86_emulate/x86_emulate.c"
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -143,6 +143,7 @@ static void continue_idle_domain(struct
static void continue_nonidle_domain(struct vcpu *v)
{
check_wakeup_from_wait();
+ mark_regs_dirty(guest_cpu_user_regs());
reset_stack_and_jump(ret_from_intr);
}
@@ -1312,7 +1313,7 @@ static void load_segments(struct vcpu *n
if ( test_bit(_VGCF_failsafe_disables_events, &n->arch.vgc_flags) )
vcpu_info(n, evtchn_upcall_mask) = 1;
- regs->entry_vector = TRAP_syscall;
+ regs->entry_vector |= TRAP_syscall;
regs->_eflags &= 0xFFFCBEFFUL;
regs->ss = FLAT_COMPAT_KERNEL_SS;
regs->_esp = (unsigned long)(esp-7);
@@ -1354,7 +1355,7 @@ static void load_segments(struct vcpu *n
if ( test_bit(_VGCF_failsafe_disables_events, &n->arch.vgc_flags) )
vcpu_info(n, evtchn_upcall_mask) = 1;
- regs->entry_vector = TRAP_syscall;
+ regs->entry_vector |= TRAP_syscall;
regs->rflags &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|
X86_EFLAGS_NT|X86_EFLAGS_TF);
regs->ss = FLAT_KERNEL_SS;
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -704,7 +704,7 @@ void irq_complete_move(struct irq_desc *
if (likely(!desc->arch.move_in_progress))
return;
- vector = get_irq_regs()->entry_vector;
+ vector = (u8)get_irq_regs()->entry_vector;
me = smp_processor_id();
if ( vector == desc->arch.vector &&
@@ -798,7 +798,7 @@ void do_IRQ(struct cpu_user_regs *regs)
struct irqaction *action;
uint32_t tsc_in;
struct irq_desc *desc;
- unsigned int vector = regs->entry_vector;
+ unsigned int vector = (u8)regs->entry_vector;
int irq = __get_cpu_var(vector_irq[vector]);
struct cpu_user_regs *old_regs = set_irq_regs(regs);
@@ -892,7 +892,7 @@ void do_IRQ(struct cpu_user_regs *regs)
out:
if ( desc->handler->end )
- desc->handler->end(desc, regs->entry_vector);
+ desc->handler->end(desc, vector);
out_no_end:
spin_unlock(&desc->lock);
out_no_unlock:
@@ -1113,7 +1113,7 @@ static void __do_IRQ_guest(int irq)
struct domain *d;
int i, sp;
struct pending_eoi *peoi = this_cpu(pending_eoi);
- int vector = get_irq_regs()->entry_vector;
+ unsigned int vector = (u8)get_irq_regs()->entry_vector;
if ( unlikely(action->nr_guests == 0) )
{
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2082,6 +2082,7 @@ static int emulate_privileged_op(struct
goto fail;
if ( admin_io_okay(port, op_bytes, v, regs) )
{
+ mark_regs_dirty(regs);
io_emul(regs);
}
else
@@ -2111,6 +2112,7 @@ static int emulate_privileged_op(struct
goto fail;
if ( admin_io_okay(port, op_bytes, v, regs) )
{
+ mark_regs_dirty(regs);
io_emul(regs);
if ( (op_bytes == 1) && pv_post_outb_hook )
pv_post_outb_hook(port, regs->eax);
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -14,8 +14,7 @@
ENTRY(compat_hypercall)
pushq $0
- movl $TRAP_syscall,4(%rsp)
- SAVE_ALL
+ SAVE_VOLATILE type=TRAP_syscall compat=1
cmpb $0,untrusted_msi(%rip)
UNLIKELY_START(ne, msi_check)
@@ -126,6 +125,7 @@ compat_test_guest_events:
/* %rbx: struct vcpu */
compat_process_softirqs:
sti
+ andl $~TRAP_regs_partial,UREGS_entry_vector(%rsp)
call do_softirq
jmp compat_test_all_events
@@ -172,7 +172,7 @@ compat_bad_hypercall:
/* %rbx: struct vcpu, interrupts disabled */
compat_restore_all_guest:
ASSERT_INTERRUPTS_DISABLED
- RESTORE_ALL adj=8
+ RESTORE_ALL adj=8 compat=1
.Lft0: iretq
.section .fixup,"ax"
@@ -243,7 +243,7 @@ UNLIKELY_END(compat_syscall_gpf)
ENTRY(compat_sysenter)
movq VCPU_trap_ctxt(%rbx),%rcx
- cmpl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+ cmpb $TRAP_gp_fault,UREGS_entry_vector(%rsp)
movzwl VCPU_sysenter_sel(%rbx),%eax
movzwl TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_cs(%rcx),%ecx
cmovel %ecx,%eax
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -123,9 +123,8 @@ ENTRY(syscall_enter)
movl $FLAT_KERNEL_SS,24(%rsp)
pushq %rcx
pushq $0
- movl $TRAP_syscall,4(%rsp)
- movq 24(%rsp),%r11 /* Re-load user RFLAGS into %r11 before SAVE_ALL */
- SAVE_ALL
+ movq 24(%rsp),%r11 /* Re-load user RFLAGS into %r11 before saving */
+ SAVE_VOLATILE TRAP_syscall
GET_CURRENT(%rbx)
movq VCPU_domain(%rbx),%rcx
testb $1,DOMAIN_is_32bit_pv(%rcx)
@@ -222,6 +221,7 @@ test_guest_events:
/* %rbx: struct vcpu */
process_softirqs:
sti
+ SAVE_PRESERVED
call do_softirq
jmp test_all_events
@@ -275,8 +275,7 @@ sysenter_eflags_saved:
pushq $3 /* ring 3 null cs */
pushq $0 /* null rip */
pushq $0
- movl $TRAP_syscall,4(%rsp)
- SAVE_ALL
+ SAVE_VOLATILE TRAP_syscall
GET_CURRENT(%rbx)
cmpb $0,VCPU_sysenter_disables_events(%rbx)
movq VCPU_sysenter_addr(%rbx),%rax
@@ -286,6 +285,7 @@ sysenter_eflags_saved:
leal (,%rcx,TBF_INTERRUPT),%ecx
UNLIKELY_START(z, sysenter_gpf)
movq VCPU_trap_ctxt(%rbx),%rsi
+ SAVE_PRESERVED
movl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
movl %eax,TRAPBOUNCE_error_code(%rdx)
movq TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_eip(%rsi),%rax
@@ -302,7 +302,7 @@ UNLIKELY_END(sysenter_gpf)
ENTRY(int80_direct_trap)
pushq $0
- SAVE_ALL
+ SAVE_VOLATILE 0x80
cmpb $0,untrusted_msi(%rip)
UNLIKELY_START(ne, msi_check)
@@ -331,6 +331,7 @@ int80_slow_path:
* IDT entry with DPL==0.
*/
movl $((0x80 << 3) | 0x2),UREGS_error_code(%rsp)
+ SAVE_PRESERVED
movl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
/* A GPF wouldn't have incremented the instruction pointer. */
subq $2,UREGS_rip(%rsp)
@@ -412,7 +413,7 @@ UNLIKELY_END(bounce_failsafe)
/* Rewrite our stack frame and return to guest-OS mode. */
/* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
/* Also clear AC: alignment checks shouldn't trigger in kernel mode. */
- movl $TRAP_syscall,UREGS_entry_vector+8(%rsp)
+ orl $TRAP_syscall,UREGS_entry_vector+8(%rsp)
andl $~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|\
X86_EFLAGS_NT|X86_EFLAGS_TF),UREGS_eflags+8(%rsp)
movq $FLAT_KERNEL_SS,UREGS_ss+8(%rsp)
@@ -477,7 +478,7 @@ handle_exception_saved:
jz exception_with_ints_disabled
sti
1: movq %rsp,%rdi
- movl UREGS_entry_vector(%rsp),%eax
+ movzbl UREGS_entry_vector(%rsp),%eax
leaq exception_table(%rip),%rdx
GET_CURRENT(%rbx)
PERFC_INCR(exceptions, %rax, %rbx)
@@ -518,7 +519,7 @@ exception_with_ints_disabled:
/* No special register assumptions. */
FATAL_exception_with_ints_disabled:
- movl UREGS_entry_vector(%rsp),%edi
+ movzbl UREGS_entry_vector(%rsp),%edi
movq %rsp,%rsi
call fatal_trap
ud2
@@ -624,7 +625,7 @@ handle_ist_exception:
movq %rdi,%rsp
rep movsq
1: movq %rsp,%rdi
- movl UREGS_entry_vector(%rsp),%eax
+ movzbl UREGS_entry_vector(%rsp),%eax
leaq exception_table(%rip),%rdx
callq *(%rdx,%rax,8)
jmp ret_from_intr
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -67,10 +67,15 @@ static void _show_registers(
regs->rbp, regs->rsp, regs->r8);
printk("r9: %016lx r10: %016lx r11: %016lx\n",
regs->r9, regs->r10, regs->r11);
- printk("r12: %016lx r13: %016lx r14: %016lx\n",
- regs->r12, regs->r13, regs->r14);
- printk("r15: %016lx cr0: %016lx cr4: %016lx\n",
- regs->r15, crs[0], crs[4]);
+ if ( !(regs->entry_vector & TRAP_regs_partial) )
+ {
+ printk("r12: %016lx r13: %016lx r14: %016lx\n",
+ regs->r12, regs->r13, regs->r14);
+ printk("r15: %016lx cr0: %016lx cr4: %016lx\n",
+ regs->r15, crs[0], crs[4]);
+ }
+ else
+ printk("cr0: %016lx cr4: %016lx\n", crs[0], crs[4]);
printk("cr3: %016lx cr2: %016lx\n", crs[3], crs[2]);
printk("ds: %04x es: %04x fs: %04x gs: %04x "
"ss: %04x cs: %04x\n",
@@ -301,7 +306,7 @@ unsigned long do_iret(void)
if ( !(iret_saved.flags & VGCF_in_syscall) )
{
- regs->entry_vector = 0;
+ regs->entry_vector &= ~TRAP_syscall;
regs->r11 = iret_saved.r11;
regs->rcx = iret_saved.rcx;
}
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1292,10 +1292,10 @@ decode_register(
case 9: p = ®s->r9; break;
case 10: p = ®s->r10; break;
case 11: p = ®s->r11; break;
- case 12: p = ®s->r12; break;
- case 13: p = ®s->r13; break;
- case 14: p = ®s->r14; break;
- case 15: p = ®s->r15; break;
+ case 12: mark_regs_dirty(regs); p = ®s->r12; break;
+ case 13: mark_regs_dirty(regs); p = ®s->r13; break;
+ case 14: mark_regs_dirty(regs); p = ®s->r14; break;
+ case 15: mark_regs_dirty(regs); p = ®s->r15; break;
#endif
default: BUG(); p = NULL; break;
}
--- a/xen/arch/x86/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate.c
@@ -10,6 +10,7 @@
*/
#include <asm/x86_emulate.h>
+#include <asm/asm_defns.h> /* mark_regs_dirty() */
#include <asm/processor.h> /* current_cpu_info */
#include <asm/amd.h> /* cpu_has_amd_erratum() */
--- a/xen/common/wait.c
+++ b/xen/common/wait.c
@@ -124,10 +124,12 @@ void wake_up_all(struct waitqueue_head *
static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
{
- char *cpu_info = (char *)get_cpu_info();
+ struct cpu_info *cpu_info = get_cpu_info();
struct vcpu *curr = current;
unsigned long dummy;
+ u32 entry_vector = cpu_info->guest_cpu_user_regs.entry_vector;
+ cpu_info->guest_cpu_user_regs.entry_vector &= ~TRAP_regs_partial;
ASSERT(wqv->esp == 0);
/* Save current VCPU affinity; force wakeup on *this* CPU only. */
@@ -157,6 +159,8 @@ static void __prepare_to_wait(struct wai
gdprintk(XENLOG_ERR, "Stack too large in %s\n", __FUNCTION__);
domain_crash_synchronous();
}
+
+ cpu_info->guest_cpu_user_regs.entry_vector = entry_vector;
}
static void __finish_wait(struct waitqueue_vcpu *wqv)
--- a/xen/include/asm-x86/x86_64/asm_defns.h
+++ b/xen/include/asm-x86/x86_64/asm_defns.h
@@ -26,6 +26,31 @@
#define ASSERT_INTERRUPTS_ENABLED ASSERT_INTERRUPT_STATUS(nz)
#define ASSERT_INTERRUPTS_DISABLED ASSERT_INTERRUPT_STATUS(z)
+/*
+ * This flag is set in an exception frame when registers R12-R15 did not get
+ * saved.
+ */
+#define _TRAP_regs_partial 16
+#define TRAP_regs_partial (1 << _TRAP_regs_partial)
+/*
+ * This flag gets set in an exception frame when registers R12-R15 possibly
+ * get modified from their originally saved values and hence need to be
+ * restored even if the normal call flow would restore register values.
+ *
+ * The flag being set implies _TRAP_regs_partial to be unset. Restoring
+ * R12-R15 thus is
+ * - required when this flag is set,
+ * - safe when _TRAP_regs_partial is unset.
+ */
+#define _TRAP_regs_dirty 17
+#define TRAP_regs_dirty (1 << _TRAP_regs_dirty)
+
+#define mark_regs_dirty(r) ({ \
+ struct cpu_user_regs *r__ = (r); \
+ ASSERT(!((r__)->entry_vector & TRAP_regs_partial)); \
+ r__->entry_vector |= TRAP_regs_dirty; \
+})
+
#define SAVE_ALL \
addq $-(UREGS_error_code-UREGS_r15), %rsp; \
cld; \
@@ -47,11 +72,66 @@
movq %r15,UREGS_r15(%rsp); \
#ifdef __ASSEMBLY__
-.macro LOAD_C_CLOBBERED
+
+/*
+ * Save all registers not preserved by C code or used in entry/exit code. Mark
+ * the frame as partial.
+ *
+ * @type: exception type
+ * @compat: R8-R15 don't need saving, and the frame nevertheless is complete
+ */
+.macro SAVE_VOLATILE type compat=0
+.if \compat
+ movl $\type,UREGS_entry_vector-UREGS_error_code(%rsp)
+.else
+ movl $\type|TRAP_regs_partial,\
+ UREGS_entry_vector-UREGS_error_code(%rsp)
+.endif
+ addq $-(UREGS_error_code-UREGS_r15),%rsp
+ cld
+ movq %rdi,UREGS_rdi(%rsp)
+ movq %rsi,UREGS_rsi(%rsp)
+ movq %rdx,UREGS_rdx(%rsp)
+ movq %rcx,UREGS_rcx(%rsp)
+ movq %rax,UREGS_rax(%rsp)
+.if !\compat
+ movq %r8,UREGS_r8(%rsp)
+ movq %r9,UREGS_r9(%rsp)
+ movq %r10,UREGS_r10(%rsp)
+ movq %r11,UREGS_r11(%rsp)
+.endif
+ movq %rbx,UREGS_rbx(%rsp)
+ movq %rbp,UREGS_rbp(%rsp)
+ SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp)
+.endm
+
+/*
+ * Complete a frame potentially only partially saved.
+ */
+.macro SAVE_PRESERVED
+ btrl $_TRAP_regs_partial,UREGS_entry_vector(%rsp)
+ jnc 987f
+ movq %r12,UREGS_r12(%rsp)
+ movq %r13,UREGS_r13(%rsp)
+ movq %r14,UREGS_r14(%rsp)
+ movq %r15,UREGS_r15(%rsp)
+987:
+.endm
+
+/*
+ * Reload registers not preserved by C code from frame.
+ *
+ * @compat: R8-R11 don't need reloading
+ *
+ * For the way it is used in RESTORE_ALL, this macro must preserve EFLAGS.ZF.
+ */
+.macro LOAD_C_CLOBBERED compat=0
+.if !\compat
movq UREGS_r11(%rsp),%r11
movq UREGS_r10(%rsp),%r10
movq UREGS_r9(%rsp),%r9
movq UREGS_r8(%rsp),%r8
+.endif
movq UREGS_rax(%rsp),%rax
movq UREGS_rcx(%rsp),%rcx
movq UREGS_rdx(%rsp),%rdx
@@ -59,16 +139,45 @@
movq UREGS_rdi(%rsp),%rdi
.endm
-.macro RESTORE_ALL adj=0
+/*
+ * Restore all previously saved registers.
+ *
+ * @adj: extra stack pointer adjustment to be folded into the adjustment done
+ * anyway at the end of the macro
+ * @compat: R8-R15 don't need reloading
+ */
+.macro RESTORE_ALL adj=0 compat=0
+.if !\compat
+ testl $TRAP_regs_dirty,UREGS_entry_vector(%rsp)
+.endif
+ LOAD_C_CLOBBERED \compat
+.if !\compat
+ jz 987f
movq UREGS_r15(%rsp),%r15
movq UREGS_r14(%rsp),%r14
movq UREGS_r13(%rsp),%r13
movq UREGS_r12(%rsp),%r12
- movq UREGS_rbp(%rsp),%rbp
+#ifndef NDEBUG
+ .subsection 1
+987: testl $TRAP_regs_partial,UREGS_entry_vector(%rsp)
+ jnz 987f
+ cmpq UREGS_r15(%rsp),%r15
+ jne 789f
+ cmpq UREGS_r14(%rsp),%r14
+ jne 789f
+ cmpq UREGS_r13(%rsp),%r13
+ jne 789f
+ cmpq UREGS_r12(%rsp),%r12
+ je 987f
+789: ud2
+ .subsection 0
+#endif
+.endif
+987: movq UREGS_rbp(%rsp),%rbp
movq UREGS_rbx(%rsp),%rbx
- LOAD_C_CLOBBERED
subq $-(UREGS_error_code-UREGS_r15+\adj), %rsp
.endm
+
#endif
#ifdef PERF_COUNTERS
[-- Attachment #2: x86-save-restore-reduce.patch --]
[-- Type: text/plain, Size: 17294 bytes --]
x86: save/restore only partial register state where possible
... and make restore conditional not only upon having saved the state,
but also upon whether saved state was actually modified (and register
values are known to have been preserved).
Note that RBP is unconditionally considered a volatile register (i.e.
irrespective of CONFIG_FRAME_POINTER), since the RBP handling would
become overly complicated due to the need to save/restore it on the
compat mode hypercall path [6th argument].
Note further that for compat mode code paths, saving/restoring R8...R15
is entirely unnecessary - we don't allow those guests to enter 64-bit
mode, and hence they have no way of seeing these registers' contents
(and there consequently also is no information leak, except if the
context saving domctl would be considered such).
Finally, note that this may not properly deal with gdbstub's needs, yet
(but if so, I can't really suggest adjustments, as I don't know that
code).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Add a few comments.
This patch functionally depends on x86-drop-get_x86_gpr.patch.
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -10,6 +10,7 @@ typedef bool bool_t;
#define BUG() abort()
#define cpu_has_amd_erratum(nr) 0
+#define mark_regs_dirty(r) ((void)(r))
#include "x86_emulate/x86_emulate.h"
#include "x86_emulate/x86_emulate.c"
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -143,6 +143,7 @@ static void continue_idle_domain(struct
static void continue_nonidle_domain(struct vcpu *v)
{
check_wakeup_from_wait();
+ mark_regs_dirty(guest_cpu_user_regs());
reset_stack_and_jump(ret_from_intr);
}
@@ -1312,7 +1313,7 @@ static void load_segments(struct vcpu *n
if ( test_bit(_VGCF_failsafe_disables_events, &n->arch.vgc_flags) )
vcpu_info(n, evtchn_upcall_mask) = 1;
- regs->entry_vector = TRAP_syscall;
+ regs->entry_vector |= TRAP_syscall;
regs->_eflags &= 0xFFFCBEFFUL;
regs->ss = FLAT_COMPAT_KERNEL_SS;
regs->_esp = (unsigned long)(esp-7);
@@ -1354,7 +1355,7 @@ static void load_segments(struct vcpu *n
if ( test_bit(_VGCF_failsafe_disables_events, &n->arch.vgc_flags) )
vcpu_info(n, evtchn_upcall_mask) = 1;
- regs->entry_vector = TRAP_syscall;
+ regs->entry_vector |= TRAP_syscall;
regs->rflags &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|
X86_EFLAGS_NT|X86_EFLAGS_TF);
regs->ss = FLAT_KERNEL_SS;
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -704,7 +704,7 @@ void irq_complete_move(struct irq_desc *
if (likely(!desc->arch.move_in_progress))
return;
- vector = get_irq_regs()->entry_vector;
+ vector = (u8)get_irq_regs()->entry_vector;
me = smp_processor_id();
if ( vector == desc->arch.vector &&
@@ -798,7 +798,7 @@ void do_IRQ(struct cpu_user_regs *regs)
struct irqaction *action;
uint32_t tsc_in;
struct irq_desc *desc;
- unsigned int vector = regs->entry_vector;
+ unsigned int vector = (u8)regs->entry_vector;
int irq = __get_cpu_var(vector_irq[vector]);
struct cpu_user_regs *old_regs = set_irq_regs(regs);
@@ -892,7 +892,7 @@ void do_IRQ(struct cpu_user_regs *regs)
out:
if ( desc->handler->end )
- desc->handler->end(desc, regs->entry_vector);
+ desc->handler->end(desc, vector);
out_no_end:
spin_unlock(&desc->lock);
out_no_unlock:
@@ -1113,7 +1113,7 @@ static void __do_IRQ_guest(int irq)
struct domain *d;
int i, sp;
struct pending_eoi *peoi = this_cpu(pending_eoi);
- int vector = get_irq_regs()->entry_vector;
+ unsigned int vector = (u8)get_irq_regs()->entry_vector;
if ( unlikely(action->nr_guests == 0) )
{
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2082,6 +2082,7 @@ static int emulate_privileged_op(struct
goto fail;
if ( admin_io_okay(port, op_bytes, v, regs) )
{
+ mark_regs_dirty(regs);
io_emul(regs);
}
else
@@ -2111,6 +2112,7 @@ static int emulate_privileged_op(struct
goto fail;
if ( admin_io_okay(port, op_bytes, v, regs) )
{
+ mark_regs_dirty(regs);
io_emul(regs);
if ( (op_bytes == 1) && pv_post_outb_hook )
pv_post_outb_hook(port, regs->eax);
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -14,8 +14,7 @@
ENTRY(compat_hypercall)
pushq $0
- movl $TRAP_syscall,4(%rsp)
- SAVE_ALL
+ SAVE_VOLATILE type=TRAP_syscall compat=1
cmpb $0,untrusted_msi(%rip)
UNLIKELY_START(ne, msi_check)
@@ -126,6 +125,7 @@ compat_test_guest_events:
/* %rbx: struct vcpu */
compat_process_softirqs:
sti
+ andl $~TRAP_regs_partial,UREGS_entry_vector(%rsp)
call do_softirq
jmp compat_test_all_events
@@ -172,7 +172,7 @@ compat_bad_hypercall:
/* %rbx: struct vcpu, interrupts disabled */
compat_restore_all_guest:
ASSERT_INTERRUPTS_DISABLED
- RESTORE_ALL adj=8
+ RESTORE_ALL adj=8 compat=1
.Lft0: iretq
.section .fixup,"ax"
@@ -243,7 +243,7 @@ UNLIKELY_END(compat_syscall_gpf)
ENTRY(compat_sysenter)
movq VCPU_trap_ctxt(%rbx),%rcx
- cmpl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+ cmpb $TRAP_gp_fault,UREGS_entry_vector(%rsp)
movzwl VCPU_sysenter_sel(%rbx),%eax
movzwl TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_cs(%rcx),%ecx
cmovel %ecx,%eax
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -123,9 +123,8 @@ ENTRY(syscall_enter)
movl $FLAT_KERNEL_SS,24(%rsp)
pushq %rcx
pushq $0
- movl $TRAP_syscall,4(%rsp)
- movq 24(%rsp),%r11 /* Re-load user RFLAGS into %r11 before SAVE_ALL */
- SAVE_ALL
+ movq 24(%rsp),%r11 /* Re-load user RFLAGS into %r11 before saving */
+ SAVE_VOLATILE TRAP_syscall
GET_CURRENT(%rbx)
movq VCPU_domain(%rbx),%rcx
testb $1,DOMAIN_is_32bit_pv(%rcx)
@@ -222,6 +221,7 @@ test_guest_events:
/* %rbx: struct vcpu */
process_softirqs:
sti
+ SAVE_PRESERVED
call do_softirq
jmp test_all_events
@@ -275,8 +275,7 @@ sysenter_eflags_saved:
pushq $3 /* ring 3 null cs */
pushq $0 /* null rip */
pushq $0
- movl $TRAP_syscall,4(%rsp)
- SAVE_ALL
+ SAVE_VOLATILE TRAP_syscall
GET_CURRENT(%rbx)
cmpb $0,VCPU_sysenter_disables_events(%rbx)
movq VCPU_sysenter_addr(%rbx),%rax
@@ -286,6 +285,7 @@ sysenter_eflags_saved:
leal (,%rcx,TBF_INTERRUPT),%ecx
UNLIKELY_START(z, sysenter_gpf)
movq VCPU_trap_ctxt(%rbx),%rsi
+ SAVE_PRESERVED
movl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
movl %eax,TRAPBOUNCE_error_code(%rdx)
movq TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_eip(%rsi),%rax
@@ -302,7 +302,7 @@ UNLIKELY_END(sysenter_gpf)
ENTRY(int80_direct_trap)
pushq $0
- SAVE_ALL
+ SAVE_VOLATILE 0x80
cmpb $0,untrusted_msi(%rip)
UNLIKELY_START(ne, msi_check)
@@ -331,6 +331,7 @@ int80_slow_path:
* IDT entry with DPL==0.
*/
movl $((0x80 << 3) | 0x2),UREGS_error_code(%rsp)
+ SAVE_PRESERVED
movl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
/* A GPF wouldn't have incremented the instruction pointer. */
subq $2,UREGS_rip(%rsp)
@@ -412,7 +413,7 @@ UNLIKELY_END(bounce_failsafe)
/* Rewrite our stack frame and return to guest-OS mode. */
/* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
/* Also clear AC: alignment checks shouldn't trigger in kernel mode. */
- movl $TRAP_syscall,UREGS_entry_vector+8(%rsp)
+ orl $TRAP_syscall,UREGS_entry_vector+8(%rsp)
andl $~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|\
X86_EFLAGS_NT|X86_EFLAGS_TF),UREGS_eflags+8(%rsp)
movq $FLAT_KERNEL_SS,UREGS_ss+8(%rsp)
@@ -477,7 +478,7 @@ handle_exception_saved:
jz exception_with_ints_disabled
sti
1: movq %rsp,%rdi
- movl UREGS_entry_vector(%rsp),%eax
+ movzbl UREGS_entry_vector(%rsp),%eax
leaq exception_table(%rip),%rdx
GET_CURRENT(%rbx)
PERFC_INCR(exceptions, %rax, %rbx)
@@ -518,7 +519,7 @@ exception_with_ints_disabled:
/* No special register assumptions. */
FATAL_exception_with_ints_disabled:
- movl UREGS_entry_vector(%rsp),%edi
+ movzbl UREGS_entry_vector(%rsp),%edi
movq %rsp,%rsi
call fatal_trap
ud2
@@ -624,7 +625,7 @@ handle_ist_exception:
movq %rdi,%rsp
rep movsq
1: movq %rsp,%rdi
- movl UREGS_entry_vector(%rsp),%eax
+ movzbl UREGS_entry_vector(%rsp),%eax
leaq exception_table(%rip),%rdx
callq *(%rdx,%rax,8)
jmp ret_from_intr
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -67,10 +67,15 @@ static void _show_registers(
regs->rbp, regs->rsp, regs->r8);
printk("r9: %016lx r10: %016lx r11: %016lx\n",
regs->r9, regs->r10, regs->r11);
- printk("r12: %016lx r13: %016lx r14: %016lx\n",
- regs->r12, regs->r13, regs->r14);
- printk("r15: %016lx cr0: %016lx cr4: %016lx\n",
- regs->r15, crs[0], crs[4]);
+ if ( !(regs->entry_vector & TRAP_regs_partial) )
+ {
+ printk("r12: %016lx r13: %016lx r14: %016lx\n",
+ regs->r12, regs->r13, regs->r14);
+ printk("r15: %016lx cr0: %016lx cr4: %016lx\n",
+ regs->r15, crs[0], crs[4]);
+ }
+ else
+ printk("cr0: %016lx cr4: %016lx\n", crs[0], crs[4]);
printk("cr3: %016lx cr2: %016lx\n", crs[3], crs[2]);
printk("ds: %04x es: %04x fs: %04x gs: %04x "
"ss: %04x cs: %04x\n",
@@ -301,7 +306,7 @@ unsigned long do_iret(void)
if ( !(iret_saved.flags & VGCF_in_syscall) )
{
- regs->entry_vector = 0;
+ regs->entry_vector &= ~TRAP_syscall;
regs->r11 = iret_saved.r11;
regs->rcx = iret_saved.rcx;
}
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1292,10 +1292,10 @@ decode_register(
case 9: p = ®s->r9; break;
case 10: p = ®s->r10; break;
case 11: p = ®s->r11; break;
- case 12: p = ®s->r12; break;
- case 13: p = ®s->r13; break;
- case 14: p = ®s->r14; break;
- case 15: p = ®s->r15; break;
+ case 12: mark_regs_dirty(regs); p = ®s->r12; break;
+ case 13: mark_regs_dirty(regs); p = ®s->r13; break;
+ case 14: mark_regs_dirty(regs); p = ®s->r14; break;
+ case 15: mark_regs_dirty(regs); p = ®s->r15; break;
#endif
default: BUG(); p = NULL; break;
}
--- a/xen/arch/x86/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate.c
@@ -10,6 +10,7 @@
*/
#include <asm/x86_emulate.h>
+#include <asm/asm_defns.h> /* mark_regs_dirty() */
#include <asm/processor.h> /* current_cpu_info */
#include <asm/amd.h> /* cpu_has_amd_erratum() */
--- a/xen/common/wait.c
+++ b/xen/common/wait.c
@@ -124,10 +124,12 @@ void wake_up_all(struct waitqueue_head *
static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
{
- char *cpu_info = (char *)get_cpu_info();
+ struct cpu_info *cpu_info = get_cpu_info();
struct vcpu *curr = current;
unsigned long dummy;
+ u32 entry_vector = cpu_info->guest_cpu_user_regs.entry_vector;
+ cpu_info->guest_cpu_user_regs.entry_vector &= ~TRAP_regs_partial;
ASSERT(wqv->esp == 0);
/* Save current VCPU affinity; force wakeup on *this* CPU only. */
@@ -157,6 +159,8 @@ static void __prepare_to_wait(struct wai
gdprintk(XENLOG_ERR, "Stack too large in %s\n", __FUNCTION__);
domain_crash_synchronous();
}
+
+ cpu_info->guest_cpu_user_regs.entry_vector = entry_vector;
}
static void __finish_wait(struct waitqueue_vcpu *wqv)
--- a/xen/include/asm-x86/x86_64/asm_defns.h
+++ b/xen/include/asm-x86/x86_64/asm_defns.h
@@ -26,6 +26,31 @@
#define ASSERT_INTERRUPTS_ENABLED ASSERT_INTERRUPT_STATUS(nz)
#define ASSERT_INTERRUPTS_DISABLED ASSERT_INTERRUPT_STATUS(z)
+/*
+ * This flag is set in an exception frame when registers R12-R15 did not get
+ * saved.
+ */
+#define _TRAP_regs_partial 16
+#define TRAP_regs_partial (1 << _TRAP_regs_partial)
+/*
+ * This flag gets set in an exception frame when registers R12-R15 possibly
+ * get modified from their originally saved values and hence need to be
+ * restored even if the normal call flow would restore register values.
+ *
+ * The flag being set implies _TRAP_regs_partial to be unset. Restoring
+ * R12-R15 thus is
+ * - required when this flag is set,
+ * - safe when _TRAP_regs_partial is unset.
+ */
+#define _TRAP_regs_dirty 17
+#define TRAP_regs_dirty (1 << _TRAP_regs_dirty)
+
+#define mark_regs_dirty(r) ({ \
+ struct cpu_user_regs *r__ = (r); \
+ ASSERT(!((r__)->entry_vector & TRAP_regs_partial)); \
+ r__->entry_vector |= TRAP_regs_dirty; \
+})
+
#define SAVE_ALL \
addq $-(UREGS_error_code-UREGS_r15), %rsp; \
cld; \
@@ -47,11 +72,66 @@
movq %r15,UREGS_r15(%rsp); \
#ifdef __ASSEMBLY__
-.macro LOAD_C_CLOBBERED
+
+/*
+ * Save all registers not preserved by C code or used in entry/exit code. Mark
+ * the frame as partial.
+ *
+ * @type: exception type
+ * @compat: R8-R15 don't need saving, and the frame nevertheless is complete
+ */
+.macro SAVE_VOLATILE type compat=0
+.if \compat
+ movl $\type,UREGS_entry_vector-UREGS_error_code(%rsp)
+.else
+ movl $\type|TRAP_regs_partial,\
+ UREGS_entry_vector-UREGS_error_code(%rsp)
+.endif
+ addq $-(UREGS_error_code-UREGS_r15),%rsp
+ cld
+ movq %rdi,UREGS_rdi(%rsp)
+ movq %rsi,UREGS_rsi(%rsp)
+ movq %rdx,UREGS_rdx(%rsp)
+ movq %rcx,UREGS_rcx(%rsp)
+ movq %rax,UREGS_rax(%rsp)
+.if !\compat
+ movq %r8,UREGS_r8(%rsp)
+ movq %r9,UREGS_r9(%rsp)
+ movq %r10,UREGS_r10(%rsp)
+ movq %r11,UREGS_r11(%rsp)
+.endif
+ movq %rbx,UREGS_rbx(%rsp)
+ movq %rbp,UREGS_rbp(%rsp)
+ SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp)
+.endm
+
+/*
+ * Complete a frame potentially only partially saved.
+ */
+.macro SAVE_PRESERVED
+ btrl $_TRAP_regs_partial,UREGS_entry_vector(%rsp)
+ jnc 987f
+ movq %r12,UREGS_r12(%rsp)
+ movq %r13,UREGS_r13(%rsp)
+ movq %r14,UREGS_r14(%rsp)
+ movq %r15,UREGS_r15(%rsp)
+987:
+.endm
+
+/*
+ * Reload registers not preserved by C code from frame.
+ *
+ * @compat: R8-R11 don't need reloading
+ *
+ * For the way it is used in RESTORE_ALL, this macro must preserve EFLAGS.ZF.
+ */
+.macro LOAD_C_CLOBBERED compat=0
+.if !\compat
movq UREGS_r11(%rsp),%r11
movq UREGS_r10(%rsp),%r10
movq UREGS_r9(%rsp),%r9
movq UREGS_r8(%rsp),%r8
+.endif
movq UREGS_rax(%rsp),%rax
movq UREGS_rcx(%rsp),%rcx
movq UREGS_rdx(%rsp),%rdx
@@ -59,16 +139,45 @@
movq UREGS_rdi(%rsp),%rdi
.endm
-.macro RESTORE_ALL adj=0
+/*
+ * Restore all previously saved registers.
+ *
+ * @adj: extra stack pointer adjustment to be folded into the adjustment done
+ * anyway at the end of the macro
+ * @compat: R8-R15 don't need reloading
+ */
+.macro RESTORE_ALL adj=0 compat=0
+.if !\compat
+ testl $TRAP_regs_dirty,UREGS_entry_vector(%rsp)
+.endif
+ LOAD_C_CLOBBERED \compat
+.if !\compat
+ jz 987f
movq UREGS_r15(%rsp),%r15
movq UREGS_r14(%rsp),%r14
movq UREGS_r13(%rsp),%r13
movq UREGS_r12(%rsp),%r12
- movq UREGS_rbp(%rsp),%rbp
+#ifndef NDEBUG
+ .subsection 1
+987: testl $TRAP_regs_partial,UREGS_entry_vector(%rsp)
+ jnz 987f
+ cmpq UREGS_r15(%rsp),%r15
+ jne 789f
+ cmpq UREGS_r14(%rsp),%r14
+ jne 789f
+ cmpq UREGS_r13(%rsp),%r13
+ jne 789f
+ cmpq UREGS_r12(%rsp),%r12
+ je 987f
+789: ud2
+ .subsection 0
+#endif
+.endif
+987: movq UREGS_rbp(%rsp),%rbp
movq UREGS_rbx(%rsp),%rbx
- LOAD_C_CLOBBERED
subq $-(UREGS_error_code-UREGS_r15+\adj), %rsp
.endm
+
#endif
#ifdef PERF_COUNTERS
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 0/2, v2] x86: adjust entry frame generation
2012-10-30 14:20 ` [PATCH 0/2, v2] x86: adjust entry frame generation Jan Beulich
2012-10-30 14:27 ` [PATCH 1/2, v2] x86: use MOV instead of PUSH/POP when saving/restoring register state Jan Beulich
2012-10-30 14:29 ` [PATCH 2/2, v2] x86: save/restore only partial register state where possible Jan Beulich
@ 2012-10-30 14:35 ` Keir Fraser
2 siblings, 0 replies; 19+ messages in thread
From: Keir Fraser @ 2012-10-30 14:35 UTC (permalink / raw)
To: Jan Beulich, xen-devel
On 30/10/2012 15:20, "Jan Beulich" <JBeulich@suse.com> wrote:
>>>> On 02.10.12 at 17:20, "Jan Beulich" <JBeulich@suse.com> wrote:
> This pair of patches converts the way frames gets created from
> using PUSHes/POPs to using MOVes, thus allowing (in certain
> cases) to avoid saving/restoring part of the register set.
>
> While the exact place where the (small) win from this comes from
> varies between CPUs, the net effect is a 1 to 2% reduction on a
> combined interruption entry and exit when the full state save
> can be avoided.
>
> 1: use MOV instead of PUSH/POP when saving/restoring register state
> 2: save only partial register state where possible
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>
> ---
> NB: Patch 1 didn't change from its v1 submission, and what was
> originally patch 2 (of 3) did already get committed.
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/2, v2] x86: use MOV instead of PUSH/POP when saving/restoring register state
2012-10-30 15:19 ` Mats Petersson
@ 2012-10-30 14:36 ` Keir Fraser
2012-10-30 15:33 ` Jan Beulich
1 sibling, 0 replies; 19+ messages in thread
From: Keir Fraser @ 2012-10-30 14:36 UTC (permalink / raw)
To: Mats Petersson, xen-devel
On 30/10/2012 16:19, "Mats Petersson" <mats.petersson@citrix.com> wrote:
>> #ifdef CONFIG_FRAME_POINTER
>> /* Indicate special exception stack frame by inverting the frame pointer.
>> */
>> -#define SETUP_EXCEPTION_FRAME_POINTER \
>> - movq %rsp,%rbp; \
>> +#define SETUP_EXCEPTION_FRAME_POINTER(offs) \
>> + leaq offs(%rsp),%rbp; \
>> notq %rbp
>> #else
>> -#define SETUP_EXCEPTION_FRAME_POINTER
>> +#define SETUP_EXCEPTION_FRAME_POINTER(off)
> Is it too pedantic to say that "off" here should be "offs"?
> Just for consistency...
No, it's a fair point. :)
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/2, v2] x86: use MOV instead of PUSH/POP when saving/restoring register state
2012-10-30 14:27 ` [PATCH 1/2, v2] x86: use MOV instead of PUSH/POP when saving/restoring register state Jan Beulich
@ 2012-10-30 15:19 ` Mats Petersson
2012-10-30 14:36 ` Keir Fraser
2012-10-30 15:33 ` Jan Beulich
0 siblings, 2 replies; 19+ messages in thread
From: Mats Petersson @ 2012-10-30 15:19 UTC (permalink / raw)
To: xen-devel
On 30/10/12 14:27, Jan Beulich wrote:
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Acked-by: Keir Fraser <keir@xen.org>
> ---
> This patch did not change from its v1 submission.
>
> --- a/xen/arch/x86/x86_64/compat/entry.S
> +++ b/xen/arch/x86/x86_64/compat/entry.S
> @@ -21,8 +21,7 @@ ENTRY(compat_hypercall)
> UNLIKELY_START(ne, msi_check)
> movl $HYPERCALL_VECTOR,%edi
> call check_for_unexpected_msi
> - RESTORE_ALL
> - SAVE_ALL
> + LOAD_C_CLOBBERED
> UNLIKELY_END(msi_check)
>
> GET_CURRENT(%rbx)
> @@ -173,8 +172,7 @@ compat_bad_hypercall:
> /* %rbx: struct vcpu, interrupts disabled */
> compat_restore_all_guest:
> ASSERT_INTERRUPTS_DISABLED
> - RESTORE_ALL
> - addq $8,%rsp
> + RESTORE_ALL adj=8
> .Lft0: iretq
>
> .section .fixup,"ax"
> --- a/xen/arch/x86/x86_64/entry.S
> +++ b/xen/arch/x86/x86_64/entry.S
> @@ -47,12 +47,10 @@ restore_all_guest:
> cmpl $1,%ecx
> ja .Lforce_iret
>
> - addq $8,%rsp
> - popq %rcx # RIP
> - popq %r11 # CS
> - cmpw $FLAT_USER_CS32,%r11
> - popq %r11 # RFLAGS
> - popq %rsp # RSP
> + cmpw $FLAT_USER_CS32,16(%rsp)# CS
> + movq 8(%rsp),%rcx # RIP
> + movq 24(%rsp),%r11 # RFLAGS
> + movq 32(%rsp),%rsp # RSP
> je 1f
> sysretq
> 1: sysretl
> @@ -101,8 +99,7 @@ failsafe_callback:
> ALIGN
> /* No special register assumptions. */
> restore_all_xen:
> - RESTORE_ALL
> - addq $8,%rsp
> + RESTORE_ALL adj=8
> iretq
>
> /*
> @@ -311,8 +308,7 @@ ENTRY(int80_direct_trap)
> UNLIKELY_START(ne, msi_check)
> movl $0x80,%edi
> call check_for_unexpected_msi
> - RESTORE_ALL
> - SAVE_ALL
> + LOAD_C_CLOBBERED
> UNLIKELY_END(msi_check)
>
> GET_CURRENT(%rbx)
> --- a/xen/include/asm-x86/x86_64/asm_defns.h
> +++ b/xen/include/asm-x86/x86_64/asm_defns.h
> @@ -5,11 +5,11 @@
>
> #ifdef CONFIG_FRAME_POINTER
> /* Indicate special exception stack frame by inverting the frame pointer. */
> -#define SETUP_EXCEPTION_FRAME_POINTER \
> - movq %rsp,%rbp; \
> +#define SETUP_EXCEPTION_FRAME_POINTER(offs) \
> + leaq offs(%rsp),%rbp; \
> notq %rbp
> #else
> -#define SETUP_EXCEPTION_FRAME_POINTER
> +#define SETUP_EXCEPTION_FRAME_POINTER(off)
Is it too pedantic to say that "off" here should be "offs"?
Just for consistency...
--
Mats
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/2, v2] x86: use MOV instead of PUSH/POP when saving/restoring register state
2012-10-30 15:19 ` Mats Petersson
2012-10-30 14:36 ` Keir Fraser
@ 2012-10-30 15:33 ` Jan Beulich
1 sibling, 0 replies; 19+ messages in thread
From: Jan Beulich @ 2012-10-30 15:33 UTC (permalink / raw)
To: Mats Petersson; +Cc: xen-devel
>>> On 30.10.12 at 16:19, Mats Petersson <mats.petersson@citrix.com> wrote:
> On 30/10/12 14:27, Jan Beulich wrote:
>> --- a/xen/include/asm-x86/x86_64/asm_defns.h
>> +++ b/xen/include/asm-x86/x86_64/asm_defns.h
>> @@ -5,11 +5,11 @@
>>
>> #ifdef CONFIG_FRAME_POINTER
>> /* Indicate special exception stack frame by inverting the frame pointer.
> */
>> -#define SETUP_EXCEPTION_FRAME_POINTER \
>> - movq %rsp,%rbp; \
>> +#define SETUP_EXCEPTION_FRAME_POINTER(offs) \
>> + leaq offs(%rsp),%rbp; \
>> notq %rbp
>> #else
>> -#define SETUP_EXCEPTION_FRAME_POINTER
>> +#define SETUP_EXCEPTION_FRAME_POINTER(off)
> Is it too pedantic to say that "off" here should be "offs"?
> Just for consistency...
It doesn't really matter, but I adjusted this in the copy that
may eventually get committed - for consistency, as you say.
Jan
^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2012-10-30 15:33 UTC | newest]
Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-10-02 15:20 [PATCH 0/3] x86: adjust entry frame generation Jan Beulich
2012-10-02 15:26 ` [PATCH 1/3] x86: use MOV instead of PUSH/POP when saving/restoring register state Jan Beulich
2012-10-02 17:01 ` Keir Fraser
2012-10-02 17:57 ` Konrad Rzeszutek Wilk
2012-10-03 13:01 ` Jan Beulich
2012-10-02 15:26 ` [PATCH 2/3] x86: consolidate frame state manipulation functions Jan Beulich
2012-10-02 17:01 ` Keir Fraser
2012-10-02 15:27 ` [PATCH 3/3] x86: save/restore only partial register state where possible Jan Beulich
2012-10-02 17:02 ` Keir Fraser
2012-10-03 13:05 ` Jan Beulich
2012-10-03 14:35 ` Keir Fraser
2012-10-30 14:26 ` Jan Beulich
2012-10-30 14:20 ` [PATCH 0/2, v2] x86: adjust entry frame generation Jan Beulich
2012-10-30 14:27 ` [PATCH 1/2, v2] x86: use MOV instead of PUSH/POP when saving/restoring register state Jan Beulich
2012-10-30 15:19 ` Mats Petersson
2012-10-30 14:36 ` Keir Fraser
2012-10-30 15:33 ` Jan Beulich
2012-10-30 14:29 ` [PATCH 2/2, v2] x86: save/restore only partial register state where possible Jan Beulich
2012-10-30 14:35 ` [PATCH 0/2, v2] x86: adjust entry frame generation Keir Fraser
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.