From: "Jan Beulich" <JBeulich@suse.com>
To: xen-devel <xen-devel@lists.xenproject.org>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>,
Keir Fraser <keir@xen.org>, Feng Wu <feng.wu@intel.com>
Subject: [PATCH v3 5/4] x86: reduce code size of struct cpu_info member accesses
Date: Thu, 17 Mar 2016 10:14:22 -0600 [thread overview]
Message-ID: <56EAE5EE02000078000DDFB5@prv-mh.provo.novell.com> (raw)
In-Reply-To: <56EA6FDF02000078000DD8FB@prv-mh.provo.novell.com>
[-- Attachment #1: Type: text/plain, Size: 6993 bytes --]
Instead of addressing these fields via the base of the stack (which
uniformly requires 4-byte displacements), address them from the end
(which for everything other than guest_cpu_user_regs requires just
1-byte ones). This yields a code size reduction somewhere between 8k
and 12k in my builds.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Note that just like patch 4 of the series this also isn't directly
related to the SMEP/SMAP issue, but is again just a result of things
realized while doing that work, and again depends on the earlier
patches to apply cleanly.
--- a/xen/arch/x86/hvm/svm/entry.S
+++ b/xen/arch/x86/hvm/svm/entry.S
@@ -31,7 +31,7 @@
#define CLGI .byte 0x0F,0x01,0xDD
ENTRY(svm_asm_do_resume)
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
.Lsvm_do_resume:
call svm_intr_assist
mov %rsp,%rdi
@@ -97,7 +97,7 @@ UNLIKELY_END(svm_trace)
VMRUN
- GET_CURRENT(%rax)
+ GET_CURRENT(ax)
push %rdi
push %rsi
push %rdx
--- a/xen/arch/x86/hvm/vmx/entry.S
+++ b/xen/arch/x86/hvm/vmx/entry.S
@@ -40,7 +40,7 @@ ENTRY(vmx_asm_vmexit_handler)
push %r10
push %r11
push %rbx
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
push %rbp
push %r12
push %r13
@@ -113,7 +113,7 @@ UNLIKELY_END(realmode)
BUG /* vmx_vmentry_failure() shouldn't return. */
ENTRY(vmx_asm_do_vmentry)
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
jmp .Lvmx_do_vmentry
.Lvmx_goto_emulator:
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -26,7 +26,7 @@ UNLIKELY_START(ne, msi_check)
UNLIKELY_END(msi_check)
movl UREGS_rax(%rsp),%eax
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
cmpl $NR_hypercalls,%eax
jae compat_bad_hypercall
@@ -202,7 +202,7 @@ ENTRY(compat_restore_all_guest)
/* This mustn't modify registers other than %rax. */
ENTRY(cr4_pv32_restore)
push %rdx
- GET_CPUINFO_FIELD(cr4, %rdx)
+ GET_CPUINFO_FIELD(cr4, dx)
mov (%rdx), %rax
test $X86_CR4_SMEP|X86_CR4_SMAP,%eax
jnz 0f
@@ -245,7 +245,7 @@ ENTRY(cstar_enter)
pushq %rcx
pushq $0
SAVE_VOLATILE TRAP_syscall
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
movq VCPU_domain(%rbx),%rcx
cmpb $0,DOMAIN_is_32bit_pv(%rcx)
je switch_to_kernel
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -97,7 +97,7 @@ ENTRY(lstar_enter)
pushq %rcx
pushq $0
SAVE_VOLATILE TRAP_syscall
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
testb $TF_kernel_mode,VCPU_thread_flags(%rbx)
jz switch_to_kernel
@@ -246,7 +246,7 @@ GLOBAL(sysenter_eflags_saved)
pushq $0 /* null rip */
pushq $0
SAVE_VOLATILE TRAP_syscall
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
cmpb $0,VCPU_sysenter_disables_events(%rbx)
movq VCPU_sysenter_addr(%rbx),%rax
setne %cl
@@ -288,7 +288,7 @@ UNLIKELY_START(ne, msi_check)
call check_for_unexpected_msi
UNLIKELY_END(msi_check)
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
/* Check that the callback is non-null. */
leaq VCPU_int80_bounce(%rbx),%rdx
@@ -420,10 +420,10 @@ domain_crash_page_fault:
call show_page_walk
ENTRY(dom_crash_sync_extable)
# Get out of the guest-save area of the stack.
- GET_STACK_BASE(%rax)
+ GET_STACK_END(ax)
leaq STACK_CPUINFO_FIELD(guest_cpu_user_regs)(%rax),%rsp
# create_bounce_frame() temporarily clobbers CS.RPL. Fix up.
- __GET_CURRENT(%rax)
+ __GET_CURRENT(ax)
movq VCPU_domain(%rax),%rax
testb $1,DOMAIN_is_32bit_pv(%rax)
setz %al
@@ -441,7 +441,7 @@ ENTRY(common_interrupt)
/* No special register assumptions. */
ENTRY(ret_from_intr)
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
testb $3,UREGS_cs(%rsp)
jz restore_all_xen
movq VCPU_domain(%rbx),%rax
@@ -455,7 +455,7 @@ ENTRY(page_fault)
GLOBAL(handle_exception)
SAVE_ALL CLAC
handle_exception_saved:
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%rsp)
jz exception_with_ints_disabled
@@ -649,7 +649,7 @@ handle_ist_exception:
testb $3,UREGS_cs(%rsp)
jz 1f
/* Interrupted guest context. Copy the context to stack bottom. */
- GET_CPUINFO_FIELD(guest_cpu_user_regs,%rdi)
+ GET_CPUINFO_FIELD(guest_cpu_user_regs,di)
movq %rsp,%rsi
movl $UREGS_kernel_sizeof/8,%ecx
movq %rdi,%rsp
@@ -664,7 +664,7 @@ handle_ist_exception:
/* We want to get straight to the IRET on the NMI exit path. */
testb $3,UREGS_cs(%rsp)
jz restore_all_xen
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
/* Send an IPI to ourselves to cover for the lack of event checking. */
movl VCPU_processor(%rbx),%eax
shll $IRQSTAT_shift,%eax
--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -127,19 +127,19 @@ void ret_from_intr(void);
UNLIKELY_DONE(mp, tag); \
__UNLIKELY_END(tag)
-#define STACK_CPUINFO_FIELD(field) (STACK_SIZE-CPUINFO_sizeof+CPUINFO_##field)
-#define GET_STACK_BASE(reg) \
- movq $~(STACK_SIZE-1),reg; \
- andq %rsp,reg
+#define STACK_CPUINFO_FIELD(field) (1 - CPUINFO_sizeof + CPUINFO_##field)
+#define GET_STACK_END(reg) \
+ movl $STACK_SIZE-1, %e##reg; \
+ orq %rsp, %r##reg
#define GET_CPUINFO_FIELD(field, reg) \
- GET_STACK_BASE(reg); \
- addq $STACK_CPUINFO_FIELD(field),reg
+ GET_STACK_END(reg); \
+ addq $STACK_CPUINFO_FIELD(field), %r##reg
#define __GET_CURRENT(reg) \
- movq STACK_CPUINFO_FIELD(current_vcpu)(reg),reg
+ movq STACK_CPUINFO_FIELD(current_vcpu)(%r##reg), %r##reg
#define GET_CURRENT(reg) \
- GET_STACK_BASE(reg); \
+ GET_STACK_END(reg); \
__GET_CURRENT(reg)
#ifndef NDEBUG
--- a/xen/include/asm-x86/current.h
+++ b/xen/include/asm-x86/current.h
@@ -55,7 +55,7 @@ static inline struct cpu_info *get_cpu_i
register unsigned long sp asm("rsp");
#endif
- return (struct cpu_info *)((sp & ~(STACK_SIZE-1)) + STACK_SIZE) - 1;
+ return (struct cpu_info *)((sp | (STACK_SIZE - 1)) + 1) - 1;
}
#define get_current() (get_cpu_info()->current_vcpu)
[-- Attachment #2: x86-gci-use-or.patch --]
[-- Type: text/plain, Size: 7049 bytes --]
x86: reduce code size of struct cpu_info member accesses
Instead of addressing these fields via the base of the stack (which
uniformly requires 4-byte displacements), address them from the end
(which for everything other than guest_cpu_user_regs requires just
1-byte ones). This yields a code size reduction somewhere between 8k
and 12k in my builds.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Note that just like patch 4 of the series this also isn't directly
related to the SMEP/SMAP issue, but is again just a result of things
realized while doing that work, and again depends on the earlier
patches to apply cleanly.
--- a/xen/arch/x86/hvm/svm/entry.S
+++ b/xen/arch/x86/hvm/svm/entry.S
@@ -31,7 +31,7 @@
#define CLGI .byte 0x0F,0x01,0xDD
ENTRY(svm_asm_do_resume)
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
.Lsvm_do_resume:
call svm_intr_assist
mov %rsp,%rdi
@@ -97,7 +97,7 @@ UNLIKELY_END(svm_trace)
VMRUN
- GET_CURRENT(%rax)
+ GET_CURRENT(ax)
push %rdi
push %rsi
push %rdx
--- a/xen/arch/x86/hvm/vmx/entry.S
+++ b/xen/arch/x86/hvm/vmx/entry.S
@@ -40,7 +40,7 @@ ENTRY(vmx_asm_vmexit_handler)
push %r10
push %r11
push %rbx
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
push %rbp
push %r12
push %r13
@@ -113,7 +113,7 @@ UNLIKELY_END(realmode)
BUG /* vmx_vmentry_failure() shouldn't return. */
ENTRY(vmx_asm_do_vmentry)
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
jmp .Lvmx_do_vmentry
.Lvmx_goto_emulator:
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -26,7 +26,7 @@ UNLIKELY_START(ne, msi_check)
UNLIKELY_END(msi_check)
movl UREGS_rax(%rsp),%eax
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
cmpl $NR_hypercalls,%eax
jae compat_bad_hypercall
@@ -202,7 +202,7 @@ ENTRY(compat_restore_all_guest)
/* This mustn't modify registers other than %rax. */
ENTRY(cr4_pv32_restore)
push %rdx
- GET_CPUINFO_FIELD(cr4, %rdx)
+ GET_CPUINFO_FIELD(cr4, dx)
mov (%rdx), %rax
test $X86_CR4_SMEP|X86_CR4_SMAP,%eax
jnz 0f
@@ -245,7 +245,7 @@ ENTRY(cstar_enter)
pushq %rcx
pushq $0
SAVE_VOLATILE TRAP_syscall
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
movq VCPU_domain(%rbx),%rcx
cmpb $0,DOMAIN_is_32bit_pv(%rcx)
je switch_to_kernel
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -97,7 +97,7 @@ ENTRY(lstar_enter)
pushq %rcx
pushq $0
SAVE_VOLATILE TRAP_syscall
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
testb $TF_kernel_mode,VCPU_thread_flags(%rbx)
jz switch_to_kernel
@@ -246,7 +246,7 @@ GLOBAL(sysenter_eflags_saved)
pushq $0 /* null rip */
pushq $0
SAVE_VOLATILE TRAP_syscall
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
cmpb $0,VCPU_sysenter_disables_events(%rbx)
movq VCPU_sysenter_addr(%rbx),%rax
setne %cl
@@ -288,7 +288,7 @@ UNLIKELY_START(ne, msi_check)
call check_for_unexpected_msi
UNLIKELY_END(msi_check)
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
/* Check that the callback is non-null. */
leaq VCPU_int80_bounce(%rbx),%rdx
@@ -420,10 +420,10 @@ domain_crash_page_fault:
call show_page_walk
ENTRY(dom_crash_sync_extable)
# Get out of the guest-save area of the stack.
- GET_STACK_BASE(%rax)
+ GET_STACK_END(ax)
leaq STACK_CPUINFO_FIELD(guest_cpu_user_regs)(%rax),%rsp
# create_bounce_frame() temporarily clobbers CS.RPL. Fix up.
- __GET_CURRENT(%rax)
+ __GET_CURRENT(ax)
movq VCPU_domain(%rax),%rax
testb $1,DOMAIN_is_32bit_pv(%rax)
setz %al
@@ -441,7 +441,7 @@ ENTRY(common_interrupt)
/* No special register assumptions. */
ENTRY(ret_from_intr)
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
testb $3,UREGS_cs(%rsp)
jz restore_all_xen
movq VCPU_domain(%rbx),%rax
@@ -455,7 +455,7 @@ ENTRY(page_fault)
GLOBAL(handle_exception)
SAVE_ALL CLAC
handle_exception_saved:
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%rsp)
jz exception_with_ints_disabled
@@ -649,7 +649,7 @@ handle_ist_exception:
testb $3,UREGS_cs(%rsp)
jz 1f
/* Interrupted guest context. Copy the context to stack bottom. */
- GET_CPUINFO_FIELD(guest_cpu_user_regs,%rdi)
+ GET_CPUINFO_FIELD(guest_cpu_user_regs,di)
movq %rsp,%rsi
movl $UREGS_kernel_sizeof/8,%ecx
movq %rdi,%rsp
@@ -664,7 +664,7 @@ handle_ist_exception:
/* We want to get straight to the IRET on the NMI exit path. */
testb $3,UREGS_cs(%rsp)
jz restore_all_xen
- GET_CURRENT(%rbx)
+ GET_CURRENT(bx)
/* Send an IPI to ourselves to cover for the lack of event checking. */
movl VCPU_processor(%rbx),%eax
shll $IRQSTAT_shift,%eax
--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -127,19 +127,19 @@ void ret_from_intr(void);
UNLIKELY_DONE(mp, tag); \
__UNLIKELY_END(tag)
-#define STACK_CPUINFO_FIELD(field) (STACK_SIZE-CPUINFO_sizeof+CPUINFO_##field)
-#define GET_STACK_BASE(reg) \
- movq $~(STACK_SIZE-1),reg; \
- andq %rsp,reg
+#define STACK_CPUINFO_FIELD(field) (1 - CPUINFO_sizeof + CPUINFO_##field)
+#define GET_STACK_END(reg) \
+ movl $STACK_SIZE-1, %e##reg; \
+ orq %rsp, %r##reg
#define GET_CPUINFO_FIELD(field, reg) \
- GET_STACK_BASE(reg); \
- addq $STACK_CPUINFO_FIELD(field),reg
+ GET_STACK_END(reg); \
+ addq $STACK_CPUINFO_FIELD(field), %r##reg
#define __GET_CURRENT(reg) \
- movq STACK_CPUINFO_FIELD(current_vcpu)(reg),reg
+ movq STACK_CPUINFO_FIELD(current_vcpu)(%r##reg), %r##reg
#define GET_CURRENT(reg) \
- GET_STACK_BASE(reg); \
+ GET_STACK_END(reg); \
__GET_CURRENT(reg)
#ifndef NDEBUG
--- a/xen/include/asm-x86/current.h
+++ b/xen/include/asm-x86/current.h
@@ -55,7 +55,7 @@ static inline struct cpu_info *get_cpu_i
register unsigned long sp asm("rsp");
#endif
- return (struct cpu_info *)((sp & ~(STACK_SIZE-1)) + STACK_SIZE) - 1;
+ return (struct cpu_info *)((sp | (STACK_SIZE - 1)) + 1) - 1;
}
#define get_current() (get_cpu_info()->current_vcpu)
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
next prev parent reply other threads:[~2016-03-17 16:14 UTC|newest]
Thread overview: 67+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-03-04 11:08 [PATCH 0/4] x86: accommodate 32-bit PV guests with SMAP/SMEP handling Jan Beulich
2016-03-04 11:27 ` [PATCH 1/4] x86/alternatives: correct near branch check Jan Beulich
2016-03-07 15:43 ` Andrew Cooper
2016-03-07 15:56 ` Jan Beulich
2016-03-07 16:11 ` Andrew Cooper
2016-03-07 16:21 ` Jan Beulich
2016-03-08 17:33 ` Andrew Cooper
2016-03-04 11:27 ` [PATCH 2/4] x86: suppress SMAP and SMEP while running 32-bit PV guest code Jan Beulich
2016-03-07 16:59 ` Andrew Cooper
2016-03-08 7:57 ` Jan Beulich
2016-03-09 8:09 ` Wu, Feng
2016-03-09 14:09 ` Jan Beulich
2016-03-09 11:19 ` Andrew Cooper
2016-03-09 14:28 ` Jan Beulich
2016-03-09 8:09 ` Wu, Feng
2016-03-09 10:45 ` Andrew Cooper
2016-03-09 12:27 ` Wu, Feng
2016-03-09 12:33 ` Andrew Cooper
2016-03-09 12:36 ` Jan Beulich
2016-03-09 12:54 ` Wu, Feng
2016-03-09 13:35 ` Wu, Feng
2016-03-09 13:42 ` Andrew Cooper
2016-03-09 14:03 ` Jan Beulich
2016-03-09 14:07 ` Jan Beulich
2016-03-04 11:28 ` [PATCH 3/4] x86: use optimal NOPs to fill the SMAP/SMEP placeholders Jan Beulich
2016-03-07 17:43 ` Andrew Cooper
2016-03-08 8:02 ` Jan Beulich
2016-03-04 11:29 ` [PATCH 4/4] x86: use 32-bit loads for 32-bit PV guest state reload Jan Beulich
2016-03-07 17:45 ` Andrew Cooper
2016-03-10 9:44 ` [PATCH v2 0/3] x86: accommodate 32-bit PV guests with SMEP/SMAP handling Jan Beulich
2016-03-10 9:53 ` [PATCH v2 1/3] x86: suppress SMEP and SMAP while running 32-bit PV guest code Jan Beulich
2016-05-13 15:48 ` Andrew Cooper
2016-03-10 9:54 ` [PATCH v2 2/3] x86: use optimal NOPs to fill the SMEP/SMAP placeholders Jan Beulich
2016-05-13 15:49 ` Andrew Cooper
2016-03-10 9:55 ` [PATCH v2 3/3] x86: use 32-bit loads for 32-bit PV guest state reload Jan Beulich
[not found] ` <56E9A0DB02000078000DD54C@prv-mh.provo.novell.com>
2016-03-17 7:50 ` [PATCH v3 0/4] x86: accommodate 32-bit PV guests with SMEP/SMAP handling Jan Beulich
2016-03-17 8:02 ` [PATCH v3 1/4] x86: move cached CR4 value to struct cpu_info Jan Beulich
2016-03-17 16:20 ` Andrew Cooper
2016-03-17 8:03 ` [PATCH v3 2/4] x86: suppress SMEP and SMAP while running 32-bit PV guest code Jan Beulich
2016-03-25 18:01 ` Konrad Rzeszutek Wilk
2016-03-29 6:55 ` Jan Beulich
2016-05-13 15:58 ` Andrew Cooper
2016-03-17 8:03 ` [PATCH v3 3/4] x86: use optimal NOPs to fill the SMEP/SMAP placeholders Jan Beulich
2016-05-13 15:57 ` Andrew Cooper
2016-05-13 16:06 ` Jan Beulich
2016-05-13 16:09 ` Andrew Cooper
2016-03-17 8:04 ` [PATCH v3 4/4] x86: use 32-bit loads for 32-bit PV guest state reload Jan Beulich
2016-03-25 18:02 ` Konrad Rzeszutek Wilk
2016-03-17 16:14 ` Jan Beulich [this message]
2016-03-25 18:47 ` [PATCH v3 5/4] x86: reduce code size of struct cpu_info member accesses Konrad Rzeszutek Wilk
2016-03-29 6:59 ` Jan Beulich
2016-03-30 14:28 ` Konrad Rzeszutek Wilk
2016-03-30 14:42 ` Jan Beulich
2016-05-13 16:11 ` Andrew Cooper
2016-05-03 13:58 ` Ping: [PATCH v3 2/4] x86: suppress SMEP and SMAP while running 32-bit PV guest code Jan Beulich
2016-05-03 14:10 ` Andrew Cooper
2016-05-03 14:25 ` Jan Beulich
2016-05-04 10:03 ` Andrew Cooper
2016-05-04 13:35 ` Jan Beulich
2016-05-04 3:07 ` Wu, Feng
2016-05-13 15:21 ` Wei Liu
2016-05-13 15:30 ` Jan Beulich
2016-05-13 15:33 ` Wei Liu
2016-05-13 17:02 ` [PATCH v3 0/4] x86: accommodate 32-bit PV guests with SMEP/SMAP handling Wei Liu
2016-05-13 17:21 ` Andrew Cooper
2016-06-21 6:19 ` Wu, Feng
2016-06-21 7:17 ` Jan Beulich
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=56EAE5EE02000078000DDFB5@prv-mh.provo.novell.com \
--to=jbeulich@suse.com \
--cc=andrew.cooper3@citrix.com \
--cc=feng.wu@intel.com \
--cc=keir@xen.org \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).