From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Jan Beulich" Subject: [PATCH v3 5/4] x86: reduce code size of struct cpu_info member accesses Date: Thu, 17 Mar 2016 10:14:22 -0600 Message-ID: <56EAE5EE02000078000DDFB5@prv-mh.provo.novell.com> References: <56D97AC102000078000D9537@prv-mh.provo.novell.com> <56E14FF502000078000DB232@prv-mh.provo.novell.com> <56E9A0DB02000078000DD54C@prv-mh.provo.novell.com> <56EA6FDF02000078000DD8FB@prv-mh.provo.novell.com> <56EA6FDF02000078000DD8FB@prv-mh.provo.novell.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="=__PartCDFA7CCE.2__=" Return-path: Received: from mail6.bemta3.messagelabs.com ([195.245.230.39]) by lists.xenproject.org with esmtp (Exim 4.84_2) (envelope-from ) id 1agaZA-0004cz-5L for xen-devel@lists.xenproject.org; Thu, 17 Mar 2016 16:14:28 +0000 In-Reply-To: <56EA6FDF02000078000DD8FB@prv-mh.provo.novell.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Errors-To: xen-devel-bounces@lists.xen.org Sender: "Xen-devel" To: xen-devel Cc: Andrew Cooper , Keir Fraser , Feng Wu List-Id: xen-devel@lists.xenproject.org This is a MIME message. If you are reading this text, you may want to consider changing to a mail reader or gateway that understands how to properly handle MIME multipart messages. --=__PartCDFA7CCE.2__= Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: quoted-printable Content-Disposition: inline Instead of addressing these fields via the base of the stack (which uniformly requires 4-byte displacements), address them from the end (which for everything other than guest_cpu_user_regs requires just 1-byte ones). This yields a code size reduction somewhere between 8k and 12k in my builds. Signed-off-by: Jan Beulich --- Note that just like patch 4 of the series this also isn't directly related to the SMEP/SMAP issue, but is again just a result of things realized while doing that work, and again depends on the earlier patches to apply cleanly. --- a/xen/arch/x86/hvm/svm/entry.S +++ b/xen/arch/x86/hvm/svm/entry.S @@ -31,7 +31,7 @@ #define CLGI .byte 0x0F,0x01,0xDD =20 ENTRY(svm_asm_do_resume) - GET_CURRENT(%rbx) + GET_CURRENT(bx) .Lsvm_do_resume: call svm_intr_assist mov %rsp,%rdi @@ -97,7 +97,7 @@ UNLIKELY_END(svm_trace) =20 VMRUN =20 - GET_CURRENT(%rax) + GET_CURRENT(ax) push %rdi push %rsi push %rdx --- a/xen/arch/x86/hvm/vmx/entry.S +++ b/xen/arch/x86/hvm/vmx/entry.S @@ -40,7 +40,7 @@ ENTRY(vmx_asm_vmexit_handler) push %r10 push %r11 push %rbx - GET_CURRENT(%rbx) + GET_CURRENT(bx) push %rbp push %r12 push %r13 @@ -113,7 +113,7 @@ UNLIKELY_END(realmode) BUG /* vmx_vmentry_failure() shouldn't return. */ =20 ENTRY(vmx_asm_do_vmentry) - GET_CURRENT(%rbx) + GET_CURRENT(bx) jmp .Lvmx_do_vmentry =20 .Lvmx_goto_emulator: --- a/xen/arch/x86/x86_64/compat/entry.S +++ b/xen/arch/x86/x86_64/compat/entry.S @@ -26,7 +26,7 @@ UNLIKELY_START(ne, msi_check) UNLIKELY_END(msi_check) =20 movl UREGS_rax(%rsp),%eax - GET_CURRENT(%rbx) + GET_CURRENT(bx) =20 cmpl $NR_hypercalls,%eax jae compat_bad_hypercall @@ -202,7 +202,7 @@ ENTRY(compat_restore_all_guest) /* This mustn't modify registers other than %rax. */ ENTRY(cr4_pv32_restore) push %rdx - GET_CPUINFO_FIELD(cr4, %rdx) + GET_CPUINFO_FIELD(cr4, dx) mov (%rdx), %rax test $X86_CR4_SMEP|X86_CR4_SMAP,%eax jnz 0f @@ -245,7 +245,7 @@ ENTRY(cstar_enter) pushq %rcx pushq $0 SAVE_VOLATILE TRAP_syscall - GET_CURRENT(%rbx) + GET_CURRENT(bx) movq VCPU_domain(%rbx),%rcx cmpb $0,DOMAIN_is_32bit_pv(%rcx) je switch_to_kernel --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -97,7 +97,7 @@ ENTRY(lstar_enter) pushq %rcx pushq $0 SAVE_VOLATILE TRAP_syscall - GET_CURRENT(%rbx) + GET_CURRENT(bx) testb $TF_kernel_mode,VCPU_thread_flags(%rbx) jz switch_to_kernel =20 @@ -246,7 +246,7 @@ GLOBAL(sysenter_eflags_saved) pushq $0 /* null rip */ pushq $0 SAVE_VOLATILE TRAP_syscall - GET_CURRENT(%rbx) + GET_CURRENT(bx) cmpb $0,VCPU_sysenter_disables_events(%rbx) movq VCPU_sysenter_addr(%rbx),%rax setne %cl @@ -288,7 +288,7 @@ UNLIKELY_START(ne, msi_check) call check_for_unexpected_msi UNLIKELY_END(msi_check) =20 - GET_CURRENT(%rbx) + GET_CURRENT(bx) =20 /* Check that the callback is non-null. */ leaq VCPU_int80_bounce(%rbx),%rdx @@ -420,10 +420,10 @@ domain_crash_page_fault: call show_page_walk ENTRY(dom_crash_sync_extable) # Get out of the guest-save area of the stack. - GET_STACK_BASE(%rax) + GET_STACK_END(ax) leaq STACK_CPUINFO_FIELD(guest_cpu_user_regs)(%rax),%rsp # create_bounce_frame() temporarily clobbers CS.RPL. Fix up. - __GET_CURRENT(%rax) + __GET_CURRENT(ax) movq VCPU_domain(%rax),%rax testb $1,DOMAIN_is_32bit_pv(%rax) setz %al @@ -441,7 +441,7 @@ ENTRY(common_interrupt) =20 /* No special register assumptions. */ ENTRY(ret_from_intr) - GET_CURRENT(%rbx) + GET_CURRENT(bx) testb $3,UREGS_cs(%rsp) jz restore_all_xen movq VCPU_domain(%rbx),%rax @@ -455,7 +455,7 @@ ENTRY(page_fault) GLOBAL(handle_exception) SAVE_ALL CLAC handle_exception_saved: - GET_CURRENT(%rbx) + GET_CURRENT(bx) testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%rsp) jz exception_with_ints_disabled =20 @@ -649,7 +649,7 @@ handle_ist_exception: testb $3,UREGS_cs(%rsp) jz 1f /* Interrupted guest context. Copy the context to stack bottom. = */ - GET_CPUINFO_FIELD(guest_cpu_user_regs,%rdi) + GET_CPUINFO_FIELD(guest_cpu_user_regs,di) movq %rsp,%rsi movl $UREGS_kernel_sizeof/8,%ecx movq %rdi,%rsp @@ -664,7 +664,7 @@ handle_ist_exception: /* We want to get straight to the IRET on the NMI exit path. */ testb $3,UREGS_cs(%rsp) jz restore_all_xen - GET_CURRENT(%rbx) + GET_CURRENT(bx) /* Send an IPI to ourselves to cover for the lack of event = checking. */ movl VCPU_processor(%rbx),%eax shll $IRQSTAT_shift,%eax --- a/xen/include/asm-x86/asm_defns.h +++ b/xen/include/asm-x86/asm_defns.h @@ -127,19 +127,19 @@ void ret_from_intr(void); UNLIKELY_DONE(mp, tag); \ __UNLIKELY_END(tag) =20 -#define STACK_CPUINFO_FIELD(field) (STACK_SIZE-CPUINFO_sizeof+CPUINFO_##fi= eld) -#define GET_STACK_BASE(reg) \ - movq $~(STACK_SIZE-1),reg; \ - andq %rsp,reg +#define STACK_CPUINFO_FIELD(field) (1 - CPUINFO_sizeof + CPUINFO_##field) +#define GET_STACK_END(reg) \ + movl $STACK_SIZE-1, %e##reg; \ + orq %rsp, %r##reg =20 #define GET_CPUINFO_FIELD(field, reg) \ - GET_STACK_BASE(reg); \ - addq $STACK_CPUINFO_FIELD(field),reg + GET_STACK_END(reg); \ + addq $STACK_CPUINFO_FIELD(field), %r##reg =20 #define __GET_CURRENT(reg) \ - movq STACK_CPUINFO_FIELD(current_vcpu)(reg),reg + movq STACK_CPUINFO_FIELD(current_vcpu)(%r##reg), %r##reg #define GET_CURRENT(reg) \ - GET_STACK_BASE(reg); \ + GET_STACK_END(reg); \ __GET_CURRENT(reg) =20 #ifndef NDEBUG --- a/xen/include/asm-x86/current.h +++ b/xen/include/asm-x86/current.h @@ -55,7 +55,7 @@ static inline struct cpu_info *get_cpu_i register unsigned long sp asm("rsp"); #endif =20 - return (struct cpu_info *)((sp & ~(STACK_SIZE-1)) + STACK_SIZE) - 1; + return (struct cpu_info *)((sp | (STACK_SIZE - 1)) + 1) - 1; } =20 #define get_current() (get_cpu_info()->current_vcpu) --=__PartCDFA7CCE.2__= Content-Type: text/plain; name="x86-gci-use-or.patch" Content-Transfer-Encoding: quoted-printable Content-Disposition: attachment; filename="x86-gci-use-or.patch" x86: reduce code size of struct cpu_info member accesses=0A=0AInstead of = addressing these fields via the base of the stack (which=0Auniformly = requires 4-byte displacements), address them from the end=0A(which for = everything other than guest_cpu_user_regs requires just=0A1-byte ones). = This yields a code size reduction somewhere between 8k=0Aand 12k in my = builds.=0A=0ASigned-off-by: Jan Beulich =0A---=0ANote = that just like patch 4 of the series this also isn't directly=0Arelated to = the SMEP/SMAP issue, but is again just a result of things=0Arealized while = doing that work, and again depends on the earlier=0Apatches to apply = cleanly.=0A=0A--- a/xen/arch/x86/hvm/svm/entry.S=0A+++ b/xen/arch/x86/hvm/s= vm/entry.S=0A@@ -31,7 +31,7 @@=0A #define CLGI .byte 0x0F,0x01,0xDD=0A = =0A ENTRY(svm_asm_do_resume)=0A- GET_CURRENT(%rbx)=0A+ = GET_CURRENT(bx)=0A .Lsvm_do_resume:=0A call svm_intr_assist=0A = mov %rsp,%rdi=0A@@ -97,7 +97,7 @@ UNLIKELY_END(svm_trace)=0A =0A = VMRUN=0A =0A- GET_CURRENT(%rax)=0A+ GET_CURRENT(ax)=0A = push %rdi=0A push %rsi=0A push %rdx=0A--- a/xen/arch/x= 86/hvm/vmx/entry.S=0A+++ b/xen/arch/x86/hvm/vmx/entry.S=0A@@ -40,7 +40,7 = @@ ENTRY(vmx_asm_vmexit_handler)=0A push %r10=0A push = %r11=0A push %rbx=0A- GET_CURRENT(%rbx)=0A+ = GET_CURRENT(bx)=0A push %rbp=0A push %r12=0A push = %r13=0A@@ -113,7 +113,7 @@ UNLIKELY_END(realmode)=0A BUG /* = vmx_vmentry_failure() shouldn't return. */=0A =0A ENTRY(vmx_asm_do_vmentry)= =0A- GET_CURRENT(%rbx)=0A+ GET_CURRENT(bx)=0A jmp = .Lvmx_do_vmentry=0A =0A .Lvmx_goto_emulator:=0A--- a/xen/arch/x86/x86_64/co= mpat/entry.S=0A+++ b/xen/arch/x86/x86_64/compat/entry.S=0A@@ -26,7 +26,7 = @@ UNLIKELY_START(ne, msi_check)=0A UNLIKELY_END(msi_check)=0A =0A = movl UREGS_rax(%rsp),%eax=0A- GET_CURRENT(%rbx)=0A+ = GET_CURRENT(bx)=0A =0A cmpl $NR_hypercalls,%eax=0A jae = compat_bad_hypercall=0A@@ -202,7 +202,7 @@ ENTRY(compat_restore_all_guest)= =0A /* This mustn't modify registers other than %rax. */=0A ENTRY(cr4_pv32_= restore)=0A push %rdx=0A- GET_CPUINFO_FIELD(cr4, %rdx)=0A+ = GET_CPUINFO_FIELD(cr4, dx)=0A mov (%rdx), %rax=0A = test $X86_CR4_SMEP|X86_CR4_SMAP,%eax=0A jnz 0f=0A@@ -245,7 = +245,7 @@ ENTRY(cstar_enter)=0A pushq %rcx=0A pushq $0=0A = SAVE_VOLATILE TRAP_syscall=0A- GET_CURRENT(%rbx)=0A+ = GET_CURRENT(bx)=0A movq VCPU_domain(%rbx),%rcx=0A cmpb = $0,DOMAIN_is_32bit_pv(%rcx)=0A je switch_to_kernel=0A--- = a/xen/arch/x86/x86_64/entry.S=0A+++ b/xen/arch/x86/x86_64/entry.S=0A@@ = -97,7 +97,7 @@ ENTRY(lstar_enter)=0A pushq %rcx=0A pushq = $0=0A SAVE_VOLATILE TRAP_syscall=0A- GET_CURRENT(%rbx)=0A+ = GET_CURRENT(bx)=0A testb $TF_kernel_mode,VCPU_thread_flags(%r= bx)=0A jz switch_to_kernel=0A =0A@@ -246,7 +246,7 @@ GLOBAL(syse= nter_eflags_saved)=0A pushq $0 /* null rip */=0A pushq = $0=0A SAVE_VOLATILE TRAP_syscall=0A- GET_CURRENT(%rbx)=0A+ = GET_CURRENT(bx)=0A cmpb $0,VCPU_sysenter_disables_events(%rb= x)=0A movq VCPU_sysenter_addr(%rbx),%rax=0A setne = %cl=0A@@ -288,7 +288,7 @@ UNLIKELY_START(ne, msi_check)=0A call = check_for_unexpected_msi=0A UNLIKELY_END(msi_check)=0A =0A- = GET_CURRENT(%rbx)=0A+ GET_CURRENT(bx)=0A =0A /* Check that = the callback is non-null. */=0A leaq VCPU_int80_bounce(%rbx),%rdx= =0A@@ -420,10 +420,10 @@ domain_crash_page_fault:=0A call = show_page_walk=0A ENTRY(dom_crash_sync_extable)=0A # Get out of = the guest-save area of the stack.=0A- GET_STACK_BASE(%rax)=0A+ = GET_STACK_END(ax)=0A leaq STACK_CPUINFO_FIELD(guest_cpu_user_reg= s)(%rax),%rsp=0A # create_bounce_frame() temporarily clobbers = CS.RPL. Fix up.=0A- __GET_CURRENT(%rax)=0A+ __GET_CURRENT(ax)= =0A movq VCPU_domain(%rax),%rax=0A testb $1,DOMAIN_is_32bi= t_pv(%rax)=0A setz %al=0A@@ -441,7 +441,7 @@ ENTRY(common_interrup= t)=0A =0A /* No special register assumptions. */=0A ENTRY(ret_from_intr)=0A= - GET_CURRENT(%rbx)=0A+ GET_CURRENT(bx)=0A testb = $3,UREGS_cs(%rsp)=0A jz restore_all_xen=0A movq = VCPU_domain(%rbx),%rax=0A@@ -455,7 +455,7 @@ ENTRY(page_fault)=0A = GLOBAL(handle_exception)=0A SAVE_ALL CLAC=0A handle_exception_saved= :=0A- GET_CURRENT(%rbx)=0A+ GET_CURRENT(bx)=0A testb = $X86_EFLAGS_IF>>8,UREGS_eflags+1(%rsp)=0A jz exception_with_ints= _disabled=0A =0A@@ -649,7 +649,7 @@ handle_ist_exception:=0A testb = $3,UREGS_cs(%rsp)=0A jz 1f=0A /* Interrupted guest = context. Copy the context to stack bottom. */=0A- GET_CPUINFO_FIELD(= guest_cpu_user_regs,%rdi)=0A+ GET_CPUINFO_FIELD(guest_cpu_user_regs,= di)=0A movq %rsp,%rsi=0A movl $UREGS_kernel_sizeof/8,%ecx= =0A movq %rdi,%rsp=0A@@ -664,7 +664,7 @@ handle_ist_exception:=0A = /* We want to get straight to the IRET on the NMI exit path. */=0A = testb $3,UREGS_cs(%rsp)=0A jz restore_all_xen=0A- = GET_CURRENT(%rbx)=0A+ GET_CURRENT(bx)=0A /* Send an IPI to = ourselves to cover for the lack of event checking. */=0A movl = VCPU_processor(%rbx),%eax=0A shll $IRQSTAT_shift,%eax=0A--- = a/xen/include/asm-x86/asm_defns.h=0A+++ b/xen/include/asm-x86/asm_defns.h= =0A@@ -127,19 +127,19 @@ void ret_from_intr(void);=0A UNLIKELY_DONE= (mp, tag); \=0A __UNLIKELY_END(tag)=0A =0A-#define STACK_CPUINFO_= FIELD(field) (STACK_SIZE-CPUINFO_sizeof+CPUINFO_##field)=0A-#define = GET_STACK_BASE(reg) \=0A- movq $~(STACK_SIZE-1= ),reg; \=0A- andq %rsp,reg=0A+#define STACK_CPUINFO_F= IELD(field) (1 - CPUINFO_sizeof + CPUINFO_##field)=0A+#define GET_STACK_END= (reg) \=0A+ movl $STACK_SIZE-1, %e##reg; = \=0A+ orq %rsp, %r##reg=0A =0A #define GET_CPUINFO_FIELD(f= ield, reg) \=0A- GET_STACK_BASE(reg); = \=0A- addq $STACK_CPUINFO_FIELD(field),reg=0A+ GET_STACK_E= ND(reg); \=0A+ addq $STACK_CPUINFO_FIELD(field= ), %r##reg=0A =0A #define __GET_CURRENT(reg) \=0A- = movq STACK_CPUINFO_FIELD(current_vcpu)(reg),reg=0A+ movq = STACK_CPUINFO_FIELD(current_vcpu)(%r##reg), %r##reg=0A #define GET_CURRENT(= reg) \=0A- GET_STACK_BASE(reg); = \=0A+ GET_STACK_END(reg); \=0A = __GET_CURRENT(reg)=0A =0A #ifndef NDEBUG=0A--- a/xen/include/asm-x86/cur= rent.h=0A+++ b/xen/include/asm-x86/current.h=0A@@ -55,7 +55,7 @@ static = inline struct cpu_info *get_cpu_i=0A register unsigned long sp = asm("rsp");=0A #endif=0A =0A- return (struct cpu_info *)((sp & = ~(STACK_SIZE-1)) + STACK_SIZE) - 1;=0A+ return (struct cpu_info *)((sp = | (STACK_SIZE - 1)) + 1) - 1;=0A }=0A =0A #define get_current() = (get_cpu_info()->current_vcpu)=0A --=__PartCDFA7CCE.2__= Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: base64 Content-Disposition: inline X19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX18KWGVuLWRldmVs IG1haWxpbmcgbGlzdApYZW4tZGV2ZWxAbGlzdHMueGVuLm9yZwpodHRwOi8vbGlzdHMueGVuLm9y Zy94ZW4tZGV2ZWwK --=__PartCDFA7CCE.2__=--