All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] x86/entry: xorq->xorl; idtentry size reduction
@ 2018-02-14 17:59 Dominik Brodowski
  2018-02-14 17:59 ` [PATCH 1/2] x86/entry: reduce static footprint of idtentry Dominik Brodowski
  2018-02-14 17:59 ` [PATCH 2/2] x86/entry/64: use xorl for register clearing Dominik Brodowski
  0 siblings, 2 replies; 8+ messages in thread
From: Dominik Brodowski @ 2018-02-14 17:59 UTC (permalink / raw)
  To: linux-kernel, mingo, x86, torvalds; +Cc: luto, ak, tglx, dan.j.williams

These two patches apply on top of tip/pti.

The first one cuts the idtentry macro as suggested by Linus.
I'm not sure whether this patch yet needs an explicit SOB by
him though. It was previously sent as RFC / testing only patch
"8/7" to the previous x86/entry-related series, cf.
http://lkml.kernel.org/r/20180211104949.12992-9-linux@dominikbrodowski.net

The other one replaces the xorq-based register cleaning with an
equivalent xorl-based version, which is supposed to be faster on
some architectures.

Thanks,
	Dominik

Dominik Brodowski (2):
  x86/entry: reduce static footprint of idtentry
  x86/entry/64: use xorl for register clearing

 arch/x86/entry/calling.h         | 27 +++++++++++++-------
 arch/x86/entry/entry_64.S        | 18 ++++++--------
 arch/x86/entry/entry_64_compat.S | 54 ++++++++++++++++++++--------------------
 3 files changed, 53 insertions(+), 46 deletions(-)

-- 
2.16.1

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/2] x86/entry: reduce static footprint of idtentry
  2018-02-14 17:59 [PATCH 0/2] x86/entry: xorq->xorl; idtentry size reduction Dominik Brodowski
@ 2018-02-14 17:59 ` Dominik Brodowski
  2018-02-17 11:40   ` [tip:x86/pti] x86/entry: Reduce the code footprint of the 'idtentry' macro tip-bot for Dominik Brodowski
  2018-02-14 17:59 ` [PATCH 2/2] x86/entry/64: use xorl for register clearing Dominik Brodowski
  1 sibling, 1 reply; 8+ messages in thread
From: Dominik Brodowski @ 2018-02-14 17:59 UTC (permalink / raw)
  To: linux-kernel, mingo, x86, torvalds; +Cc: luto, ak, tglx, dan.j.williams

Play a little trick in the generic PUSH_AND_CLEAR_REGS macro
to insert the GP registers "above" the original return address.
This allows us to (re-)insert the macro in error_entry() and
paranoid_entry() and to remove it from the idtentry macro. This
reduces the static footprint significantly:

   text	   data	    bss	    dec	    hex	filename
  24307	      0	      0	  24307	   5ef3	entry_64.o-orig
  20987	      0	      0	  20987	   51fb	entry_64.o

Co-developed-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 arch/x86/entry/calling.h  | 11 ++++++++++-
 arch/x86/entry/entry_64.S | 18 ++++++++----------
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index dce7092ab24a..79ead48e6fe1 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -97,7 +97,7 @@ For 32-bit we have the following conventions - kernel is built with
 
 #define SIZEOF_PTREGS	21*8
 
-.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
 	/*
 	 * Push registers and sanitize registers of values that a
 	 * speculation attack might otherwise want to exploit. The
@@ -105,8 +105,14 @@ For 32-bit we have the following conventions - kernel is built with
 	 * could be put to use in a speculative execution gadget.
 	 * Interleave XOR with PUSH for better uop scheduling:
 	 */
+	.if \save_ret
+	pushq	%rsi		/* pt_regs->si */
+	movq	8(%rsp), %rsi	/* temporarily store ret address in %rsi */
+	movq	%rdi, 8(%rsp)	/* pt_regs->di (overwriting original ret) */
+	.else
 	pushq   %rdi		/* pt_regs->di */
 	pushq   %rsi		/* pt_regs->si */
+	.endif
 	pushq	\rdx		/* pt_regs->dx */
 	pushq   %rcx		/* pt_regs->cx */
 	pushq   \rax		/* pt_regs->ax */
@@ -131,6 +137,9 @@ For 32-bit we have the following conventions - kernel is built with
 	pushq	%r15		/* pt_regs->r15 */
 	xorq    %r15, %r15	/* nospec   r15*/
 	UNWIND_HINT_REGS
+	.if \save_ret
+	pushq	%rsi		/* return address on top of stack */
+	.endif
 .endm
 
 .macro POP_REGS pop_rdi=1 skip_r11rcx=0
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1c54204207d8..5d9cb0f037e4 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -871,12 +871,8 @@ ENTRY(\sym)
 	pushq	$-1				/* ORIG_RAX: no syscall to restart */
 	.endif
 
-	/* Save all registers in pt_regs */
-	PUSH_AND_CLEAR_REGS
-	ENCODE_FRAME_POINTER
-
 	.if \paranoid < 2
-	testb	$3, CS(%rsp)			/* If coming from userspace, switch stacks */
+	testb	$3, CS-ORIG_RAX(%rsp)		/* If coming from userspace, switch stacks */
 	jnz	.Lfrom_usermode_switch_stack_\@
 	.endif
 
@@ -1123,13 +1119,15 @@ idtentry machine_check		do_mce			has_error_code=0	paranoid=1
 #endif
 
 /*
- * Switch gs if needed.
+ * Save all registers in pt_regs, and switch gs if needed.
  * Use slow, but surefire "are we in kernel?" check.
  * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
  */
 ENTRY(paranoid_entry)
 	UNWIND_HINT_FUNC
 	cld
+	PUSH_AND_CLEAR_REGS save_ret=1
+	ENCODE_FRAME_POINTER 8
 	movl	$1, %ebx
 	movl	$MSR_GS_BASE, %ecx
 	rdmsr
@@ -1173,12 +1171,14 @@ ENTRY(paranoid_exit)
 END(paranoid_exit)
 
 /*
- * Switch gs if needed.
+ * Save all registers in pt_regs, and switch gs if needed.
  * Return: EBX=0: came from user mode; EBX=1: otherwise
  */
 ENTRY(error_entry)
-	UNWIND_HINT_REGS offset=8
+	UNWIND_HINT_FUNC
 	cld
+	PUSH_AND_CLEAR_REGS save_ret=1
+	ENCODE_FRAME_POINTER 8
 	testb	$3, CS+8(%rsp)
 	jz	.Lerror_kernelspace
 
@@ -1569,8 +1569,6 @@ end_repeat_nmi:
 	 * frame to point back to repeat_nmi.
 	 */
 	pushq	$-1				/* ORIG_RAX: no syscall to restart */
-	PUSH_AND_CLEAR_REGS
-	ENCODE_FRAME_POINTER
 
 	/*
 	 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
-- 
2.16.1

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/2] x86/entry/64: use xorl for register clearing
  2018-02-14 17:59 [PATCH 0/2] x86/entry: xorq->xorl; idtentry size reduction Dominik Brodowski
  2018-02-14 17:59 ` [PATCH 1/2] x86/entry: reduce static footprint of idtentry Dominik Brodowski
@ 2018-02-14 17:59 ` Dominik Brodowski
  2018-02-17 11:40   ` [tip:x86/pti] x86/entry/64: Use 'xorl' for faster " tip-bot for Dominik Brodowski
  1 sibling, 1 reply; 8+ messages in thread
From: Dominik Brodowski @ 2018-02-14 17:59 UTC (permalink / raw)
  To: linux-kernel, mingo, x86, torvalds; +Cc: luto, ak, tglx, dan.j.williams

Using xorq to clear general-purpose registers is slower than
xorl on some architectures. As xorl is sufficient to clear all
64bit of these registers,[*] switch the x86 64-bit entry code
to use xorl.

[*] According to Intel 64 and IA-32 Architecture Software Developer's
    Manual, section 3.4.1.1, the result of 32-bit operands are "zero-
    extended to a 64-bit result in the destination general-purpose
    register." The AMD64 Architecture Programmer’s Manual Volume 3,
    Appendix B.1, describes the same behaviour.

Suggested-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 arch/x86/entry/calling.h         | 16 ++++++------
 arch/x86/entry/entry_64_compat.S | 54 ++++++++++++++++++++--------------------
 2 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 79ead48e6fe1..adaf5fd9840d 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -117,25 +117,25 @@ For 32-bit we have the following conventions - kernel is built with
 	pushq   %rcx		/* pt_regs->cx */
 	pushq   \rax		/* pt_regs->ax */
 	pushq   %r8		/* pt_regs->r8 */
-	xorq    %r8, %r8	/* nospec   r8 */
+	xorl	%r8d, %r8d	/* nospec   r8 */
 	pushq   %r9		/* pt_regs->r9 */
-	xorq    %r9, %r9	/* nospec   r9 */
+	xorl	%r9d, %r9d	/* nospec   r9 */
 	pushq   %r10		/* pt_regs->r10 */
-	xorq    %r10, %r10	/* nospec   r10 */
+	xorl	%r10d, %r10d	/* nospec   r10 */
 	pushq   %r11		/* pt_regs->r11 */
-	xorq    %r11, %r11	/* nospec   r11*/
+	xorl	%r11d, %r11d	/* nospec   r11*/
 	pushq	%rbx		/* pt_regs->rbx */
 	xorl    %ebx, %ebx	/* nospec   rbx*/
 	pushq	%rbp		/* pt_regs->rbp */
 	xorl    %ebp, %ebp	/* nospec   rbp*/
 	pushq	%r12		/* pt_regs->r12 */
-	xorq    %r12, %r12	/* nospec   r12*/
+	xorl	%r12d, %r12d	/* nospec   r12*/
 	pushq	%r13		/* pt_regs->r13 */
-	xorq    %r13, %r13	/* nospec   r13*/
+	xorl	%r13d, %r13d	/* nospec   r13*/
 	pushq	%r14		/* pt_regs->r14 */
-	xorq    %r14, %r14	/* nospec   r14*/
+	xorl	%r14d, %r14d	/* nospec   r14*/
 	pushq	%r15		/* pt_regs->r15 */
-	xorq    %r15, %r15	/* nospec   r15*/
+	xorl	%r15d, %r15d	/* nospec   r15*/
 	UNWIND_HINT_REGS
 	.if \save_ret
 	pushq	%rsi		/* return address on top of stack */
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index fd65e016e413..364ea4a207be 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat)
 	pushq	%rcx			/* pt_regs->cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
 	pushq   $0			/* pt_regs->r8  = 0 */
-	xorq	%r8, %r8		/* nospec   r8 */
+	xorl	%r8d, %r8d		/* nospec   r8 */
 	pushq   $0			/* pt_regs->r9  = 0 */
-	xorq	%r9, %r9		/* nospec   r9 */
+	xorl	%r9d, %r9d		/* nospec   r9 */
 	pushq   $0			/* pt_regs->r10 = 0 */
-	xorq	%r10, %r10		/* nospec   r10 */
+	xorl	%r10d, %r10d		/* nospec   r10 */
 	pushq   $0			/* pt_regs->r11 = 0 */
-	xorq	%r11, %r11		/* nospec   r11 */
+	xorl	%r11d, %r11d		/* nospec   r11 */
 	pushq   %rbx                    /* pt_regs->rbx */
 	xorl	%ebx, %ebx		/* nospec   rbx */
 	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
 	xorl	%ebp, %ebp		/* nospec   rbp */
 	pushq   $0			/* pt_regs->r12 = 0 */
-	xorq	%r12, %r12		/* nospec   r12 */
+	xorl	%r12d, %r12d		/* nospec   r12 */
 	pushq   $0			/* pt_regs->r13 = 0 */
-	xorq	%r13, %r13		/* nospec   r13 */
+	xorl	%r13d, %r13d		/* nospec   r13 */
 	pushq   $0			/* pt_regs->r14 = 0 */
-	xorq	%r14, %r14		/* nospec   r14 */
+	xorl	%r14d, %r14d		/* nospec   r14 */
 	pushq   $0			/* pt_regs->r15 = 0 */
-	xorq	%r15, %r15		/* nospec   r15 */
+	xorl	%r15d, %r15d		/* nospec   r15 */
 	cld
 
 	/*
@@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
 	pushq	%rbp			/* pt_regs->cx (stashed in bp) */
 	pushq	$-ENOSYS		/* pt_regs->ax */
 	pushq   $0			/* pt_regs->r8  = 0 */
-	xorq	%r8, %r8		/* nospec   r8 */
+	xorl	%r8d, %r8d		/* nospec   r8 */
 	pushq   $0			/* pt_regs->r9  = 0 */
-	xorq	%r9, %r9		/* nospec   r9 */
+	xorl	%r9d, %r9d		/* nospec   r9 */
 	pushq   $0			/* pt_regs->r10 = 0 */
-	xorq	%r10, %r10		/* nospec   r10 */
+	xorl	%r10d, %r10d		/* nospec   r10 */
 	pushq   $0			/* pt_regs->r11 = 0 */
-	xorq	%r11, %r11		/* nospec   r11 */
+	xorl	%r11d, %r11d		/* nospec   r11 */
 	pushq   %rbx                    /* pt_regs->rbx */
 	xorl	%ebx, %ebx		/* nospec   rbx */
 	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
 	xorl	%ebp, %ebp		/* nospec   rbp */
 	pushq   $0			/* pt_regs->r12 = 0 */
-	xorq	%r12, %r12		/* nospec   r12 */
+	xorl	%r12d, %r12d		/* nospec   r12 */
 	pushq   $0			/* pt_regs->r13 = 0 */
-	xorq	%r13, %r13		/* nospec   r13 */
+	xorl	%r13d, %r13d		/* nospec   r13 */
 	pushq   $0			/* pt_regs->r14 = 0 */
-	xorq	%r14, %r14		/* nospec   r14 */
+	xorl	%r14d, %r14d		/* nospec   r14 */
 	pushq   $0			/* pt_regs->r15 = 0 */
-	xorq	%r15, %r15		/* nospec   r15 */
+	xorl	%r15d, %r15d		/* nospec   r15 */
 
 	/*
 	 * User mode is traced as though IRQs are on, and SYSENTER
@@ -298,9 +298,9 @@ sysret32_from_system_call:
 	 */
 	SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
 
-	xorq	%r8, %r8
-	xorq	%r9, %r9
-	xorq	%r10, %r10
+	xorl	%r8d, %r8d
+	xorl	%r9d, %r9d
+	xorl	%r10d, %r10d
 	swapgs
 	sysretl
 END(entry_SYSCALL_compat)
@@ -358,25 +358,25 @@ ENTRY(entry_INT80_compat)
 	pushq	%rcx			/* pt_regs->cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
 	pushq   $0			/* pt_regs->r8  = 0 */
-	xorq	%r8, %r8		/* nospec   r8 */
+	xorl	%r8d, %r8d		/* nospec   r8 */
 	pushq   $0			/* pt_regs->r9  = 0 */
-	xorq	%r9, %r9		/* nospec   r9 */
+	xorl	%r9d, %r9d		/* nospec   r9 */
 	pushq   $0			/* pt_regs->r10 = 0 */
-	xorq	%r10, %r10		/* nospec   r10 */
+	xorl	%r10d, %r10d		/* nospec   r10 */
 	pushq   $0			/* pt_regs->r11 = 0 */
-	xorq	%r11, %r11		/* nospec   r11 */
+	xorl	%r11d, %r11d		/* nospec   r11 */
 	pushq   %rbx                    /* pt_regs->rbx */
 	xorl	%ebx, %ebx		/* nospec   rbx */
 	pushq   %rbp                    /* pt_regs->rbp */
 	xorl	%ebp, %ebp		/* nospec   rbp */
 	pushq   %r12                    /* pt_regs->r12 */
-	xorq	%r12, %r12		/* nospec   r12 */
+	xorl	%r12d, %r12d		/* nospec   r12 */
 	pushq   %r13                    /* pt_regs->r13 */
-	xorq	%r13, %r13		/* nospec   r13 */
+	xorl	%r13d, %r13d		/* nospec   r13 */
 	pushq   %r14                    /* pt_regs->r14 */
-	xorq	%r14, %r14		/* nospec   r14 */
+	xorl	%r14d, %r14d		/* nospec   r14 */
 	pushq   %r15                    /* pt_regs->r15 */
-	xorq	%r15, %r15		/* nospec   r15 */
+	xorl	%r15d, %r15d		/* nospec   r15 */
 	cld
 
 	/*
-- 
2.16.1

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [tip:x86/pti] x86/entry: Reduce the code footprint of the 'idtentry' macro
  2018-02-14 17:59 ` [PATCH 1/2] x86/entry: reduce static footprint of idtentry Dominik Brodowski
@ 2018-02-17 11:40   ` tip-bot for Dominik Brodowski
  2018-02-17 12:28     ` Dominik Brodowski
  0 siblings, 1 reply; 8+ messages in thread
From: tip-bot for Dominik Brodowski @ 2018-02-17 11:40 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: arjan, mingo, gregkh, dan.j.williams, bp, luto, linux, torvalds,
	dwmw2, jpoimboe, hpa, dave.hansen, linux-kernel, peterz, tglx

Commit-ID:  9e809d15d6b692fa061d74be7aaab1c79f6784b8
Gitweb:     https://git.kernel.org/tip/9e809d15d6b692fa061d74be7aaab1c79f6784b8
Author:     Dominik Brodowski <linux@dominikbrodowski.net>
AuthorDate: Wed, 14 Feb 2018 18:59:23 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Sat, 17 Feb 2018 11:14:33 +0100

x86/entry: Reduce the code footprint of the 'idtentry' macro

Play a little trick in the generic PUSH_AND_CLEAR_REGS macro
to insert the GP registers "above" the original return address.

This allows us to (re-)insert the macro in error_entry() and
paranoid_entry() and to remove it from the idtentry macro. This
reduces the static footprint significantly:

   text	   data	    bss	    dec	    hex	filename
  24307	      0	      0	  24307	   5ef3	entry_64.o-orig
  20987	      0	      0	  20987	   51fb	entry_64.o

Co-developed-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20180214175924.23065-2-linux@dominikbrodowski.net
[ Small tweaks to comments. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/calling.h  | 11 ++++++++++-
 arch/x86/entry/entry_64.S | 18 ++++++++----------
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index dce7092..196b610 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -97,7 +97,7 @@ For 32-bit we have the following conventions - kernel is built with
 
 #define SIZEOF_PTREGS	21*8
 
-.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
 	/*
 	 * Push registers and sanitize registers of values that a
 	 * speculation attack might otherwise want to exploit. The
@@ -105,8 +105,14 @@ For 32-bit we have the following conventions - kernel is built with
 	 * could be put to use in a speculative execution gadget.
 	 * Interleave XOR with PUSH for better uop scheduling:
 	 */
+	.if \save_ret
+	pushq	%rsi		/* pt_regs->si */
+	movq	8(%rsp), %rsi	/* temporarily store the return address in %rsi */
+	movq	%rdi, 8(%rsp)	/* pt_regs->di (overwriting original return address) */
+	.else
 	pushq   %rdi		/* pt_regs->di */
 	pushq   %rsi		/* pt_regs->si */
+	.endif
 	pushq	\rdx		/* pt_regs->dx */
 	pushq   %rcx		/* pt_regs->cx */
 	pushq   \rax		/* pt_regs->ax */
@@ -131,6 +137,9 @@ For 32-bit we have the following conventions - kernel is built with
 	pushq	%r15		/* pt_regs->r15 */
 	xorq    %r15, %r15	/* nospec   r15*/
 	UNWIND_HINT_REGS
+	.if \save_ret
+	pushq	%rsi		/* return address on top of stack */
+	.endif
 .endm
 
 .macro POP_REGS pop_rdi=1 skip_r11rcx=0
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 8971bd6..77edc23 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -875,12 +875,8 @@ ENTRY(\sym)
 	pushq	$-1				/* ORIG_RAX: no syscall to restart */
 	.endif
 
-	/* Save all registers in pt_regs */
-	PUSH_AND_CLEAR_REGS
-	ENCODE_FRAME_POINTER
-
 	.if \paranoid < 2
-	testb	$3, CS(%rsp)			/* If coming from userspace, switch stacks */
+	testb	$3, CS-ORIG_RAX(%rsp)		/* If coming from userspace, switch stacks */
 	jnz	.Lfrom_usermode_switch_stack_\@
 	.endif
 
@@ -1130,13 +1126,15 @@ idtentry machine_check		do_mce			has_error_code=0	paranoid=1
 #endif
 
 /*
- * Switch gs if needed.
+ * Save all registers in pt_regs, and switch gs if needed.
  * Use slow, but surefire "are we in kernel?" check.
  * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
  */
 ENTRY(paranoid_entry)
 	UNWIND_HINT_FUNC
 	cld
+	PUSH_AND_CLEAR_REGS save_ret=1
+	ENCODE_FRAME_POINTER 8
 	movl	$1, %ebx
 	movl	$MSR_GS_BASE, %ecx
 	rdmsr
@@ -1181,12 +1179,14 @@ ENTRY(paranoid_exit)
 END(paranoid_exit)
 
 /*
- * Switch gs if needed.
+ * Save all registers in pt_regs, and switch GS if needed.
  * Return: EBX=0: came from user mode; EBX=1: otherwise
  */
 ENTRY(error_entry)
-	UNWIND_HINT_REGS offset=8
+	UNWIND_HINT_FUNC
 	cld
+	PUSH_AND_CLEAR_REGS save_ret=1
+	ENCODE_FRAME_POINTER 8
 	testb	$3, CS+8(%rsp)
 	jz	.Lerror_kernelspace
 
@@ -1577,8 +1577,6 @@ end_repeat_nmi:
 	 * frame to point back to repeat_nmi.
 	 */
 	pushq	$-1				/* ORIG_RAX: no syscall to restart */
-	PUSH_AND_CLEAR_REGS
-	ENCODE_FRAME_POINTER
 
 	/*
 	 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [tip:x86/pti] x86/entry/64: Use 'xorl' for faster register clearing
  2018-02-14 17:59 ` [PATCH 2/2] x86/entry/64: use xorl for register clearing Dominik Brodowski
@ 2018-02-17 11:40   ` tip-bot for Dominik Brodowski
  0 siblings, 0 replies; 8+ messages in thread
From: tip-bot for Dominik Brodowski @ 2018-02-17 11:40 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: torvalds, dvlasenk, arjan, luto, bp, peterz, dave.hansen,
	dan.j.williams, mingo, jpoimboe, hpa, dwmw2, linux-kernel, linux,
	tglx, gregkh

Commit-ID:  ced5d0bf603fa0baee8ea889e1d70971fd210894
Gitweb:     https://git.kernel.org/tip/ced5d0bf603fa0baee8ea889e1d70971fd210894
Author:     Dominik Brodowski <linux@dominikbrodowski.net>
AuthorDate: Wed, 14 Feb 2018 18:59:24 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Sat, 17 Feb 2018 11:14:33 +0100

x86/entry/64: Use 'xorl' for faster register clearing

On some x86 CPU microarchitectures using 'xorq' to clear general-purpose
registers is slower than 'xorl'. As 'xorl' is sufficient to clear all
64 bits of these registers due to zero-extension [*], switch the x86
64-bit entry code to use 'xorl'.

No change in functionality and no change in code size.

[*] According to Intel 64 and IA-32 Architecture Software Developer's
    Manual, section 3.4.1.1, the result of 32-bit operands are "zero-
    extended to a 64-bit result in the destination general-purpose
    register." The AMD64 Architecture Programmer’s Manual Volume 3,
    Appendix B.1, describes the same behaviour.

Suggested-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20180214175924.23065-3-linux@dominikbrodowski.net
[ Improved on the changelog a bit. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/calling.h         | 16 ++++++------
 arch/x86/entry/entry_64_compat.S | 54 ++++++++++++++++++++--------------------
 2 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 196b610..5d10b7a 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -117,25 +117,25 @@ For 32-bit we have the following conventions - kernel is built with
 	pushq   %rcx		/* pt_regs->cx */
 	pushq   \rax		/* pt_regs->ax */
 	pushq   %r8		/* pt_regs->r8 */
-	xorq    %r8, %r8	/* nospec   r8 */
+	xorl	%r8d, %r8d	/* nospec   r8 */
 	pushq   %r9		/* pt_regs->r9 */
-	xorq    %r9, %r9	/* nospec   r9 */
+	xorl	%r9d, %r9d	/* nospec   r9 */
 	pushq   %r10		/* pt_regs->r10 */
-	xorq    %r10, %r10	/* nospec   r10 */
+	xorl	%r10d, %r10d	/* nospec   r10 */
 	pushq   %r11		/* pt_regs->r11 */
-	xorq    %r11, %r11	/* nospec   r11*/
+	xorl	%r11d, %r11d	/* nospec   r11*/
 	pushq	%rbx		/* pt_regs->rbx */
 	xorl    %ebx, %ebx	/* nospec   rbx*/
 	pushq	%rbp		/* pt_regs->rbp */
 	xorl    %ebp, %ebp	/* nospec   rbp*/
 	pushq	%r12		/* pt_regs->r12 */
-	xorq    %r12, %r12	/* nospec   r12*/
+	xorl	%r12d, %r12d	/* nospec   r12*/
 	pushq	%r13		/* pt_regs->r13 */
-	xorq    %r13, %r13	/* nospec   r13*/
+	xorl	%r13d, %r13d	/* nospec   r13*/
 	pushq	%r14		/* pt_regs->r14 */
-	xorq    %r14, %r14	/* nospec   r14*/
+	xorl	%r14d, %r14d	/* nospec   r14*/
 	pushq	%r15		/* pt_regs->r15 */
-	xorq    %r15, %r15	/* nospec   r15*/
+	xorl	%r15d, %r15d	/* nospec   r15*/
 	UNWIND_HINT_REGS
 	.if \save_ret
 	pushq	%rsi		/* return address on top of stack */
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index fd65e01..364ea4a 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat)
 	pushq	%rcx			/* pt_regs->cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
 	pushq   $0			/* pt_regs->r8  = 0 */
-	xorq	%r8, %r8		/* nospec   r8 */
+	xorl	%r8d, %r8d		/* nospec   r8 */
 	pushq   $0			/* pt_regs->r9  = 0 */
-	xorq	%r9, %r9		/* nospec   r9 */
+	xorl	%r9d, %r9d		/* nospec   r9 */
 	pushq   $0			/* pt_regs->r10 = 0 */
-	xorq	%r10, %r10		/* nospec   r10 */
+	xorl	%r10d, %r10d		/* nospec   r10 */
 	pushq   $0			/* pt_regs->r11 = 0 */
-	xorq	%r11, %r11		/* nospec   r11 */
+	xorl	%r11d, %r11d		/* nospec   r11 */
 	pushq   %rbx                    /* pt_regs->rbx */
 	xorl	%ebx, %ebx		/* nospec   rbx */
 	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
 	xorl	%ebp, %ebp		/* nospec   rbp */
 	pushq   $0			/* pt_regs->r12 = 0 */
-	xorq	%r12, %r12		/* nospec   r12 */
+	xorl	%r12d, %r12d		/* nospec   r12 */
 	pushq   $0			/* pt_regs->r13 = 0 */
-	xorq	%r13, %r13		/* nospec   r13 */
+	xorl	%r13d, %r13d		/* nospec   r13 */
 	pushq   $0			/* pt_regs->r14 = 0 */
-	xorq	%r14, %r14		/* nospec   r14 */
+	xorl	%r14d, %r14d		/* nospec   r14 */
 	pushq   $0			/* pt_regs->r15 = 0 */
-	xorq	%r15, %r15		/* nospec   r15 */
+	xorl	%r15d, %r15d		/* nospec   r15 */
 	cld
 
 	/*
@@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
 	pushq	%rbp			/* pt_regs->cx (stashed in bp) */
 	pushq	$-ENOSYS		/* pt_regs->ax */
 	pushq   $0			/* pt_regs->r8  = 0 */
-	xorq	%r8, %r8		/* nospec   r8 */
+	xorl	%r8d, %r8d		/* nospec   r8 */
 	pushq   $0			/* pt_regs->r9  = 0 */
-	xorq	%r9, %r9		/* nospec   r9 */
+	xorl	%r9d, %r9d		/* nospec   r9 */
 	pushq   $0			/* pt_regs->r10 = 0 */
-	xorq	%r10, %r10		/* nospec   r10 */
+	xorl	%r10d, %r10d		/* nospec   r10 */
 	pushq   $0			/* pt_regs->r11 = 0 */
-	xorq	%r11, %r11		/* nospec   r11 */
+	xorl	%r11d, %r11d		/* nospec   r11 */
 	pushq   %rbx                    /* pt_regs->rbx */
 	xorl	%ebx, %ebx		/* nospec   rbx */
 	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
 	xorl	%ebp, %ebp		/* nospec   rbp */
 	pushq   $0			/* pt_regs->r12 = 0 */
-	xorq	%r12, %r12		/* nospec   r12 */
+	xorl	%r12d, %r12d		/* nospec   r12 */
 	pushq   $0			/* pt_regs->r13 = 0 */
-	xorq	%r13, %r13		/* nospec   r13 */
+	xorl	%r13d, %r13d		/* nospec   r13 */
 	pushq   $0			/* pt_regs->r14 = 0 */
-	xorq	%r14, %r14		/* nospec   r14 */
+	xorl	%r14d, %r14d		/* nospec   r14 */
 	pushq   $0			/* pt_regs->r15 = 0 */
-	xorq	%r15, %r15		/* nospec   r15 */
+	xorl	%r15d, %r15d		/* nospec   r15 */
 
 	/*
 	 * User mode is traced as though IRQs are on, and SYSENTER
@@ -298,9 +298,9 @@ sysret32_from_system_call:
 	 */
 	SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
 
-	xorq	%r8, %r8
-	xorq	%r9, %r9
-	xorq	%r10, %r10
+	xorl	%r8d, %r8d
+	xorl	%r9d, %r9d
+	xorl	%r10d, %r10d
 	swapgs
 	sysretl
 END(entry_SYSCALL_compat)
@@ -358,25 +358,25 @@ ENTRY(entry_INT80_compat)
 	pushq	%rcx			/* pt_regs->cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
 	pushq   $0			/* pt_regs->r8  = 0 */
-	xorq	%r8, %r8		/* nospec   r8 */
+	xorl	%r8d, %r8d		/* nospec   r8 */
 	pushq   $0			/* pt_regs->r9  = 0 */
-	xorq	%r9, %r9		/* nospec   r9 */
+	xorl	%r9d, %r9d		/* nospec   r9 */
 	pushq   $0			/* pt_regs->r10 = 0 */
-	xorq	%r10, %r10		/* nospec   r10 */
+	xorl	%r10d, %r10d		/* nospec   r10 */
 	pushq   $0			/* pt_regs->r11 = 0 */
-	xorq	%r11, %r11		/* nospec   r11 */
+	xorl	%r11d, %r11d		/* nospec   r11 */
 	pushq   %rbx                    /* pt_regs->rbx */
 	xorl	%ebx, %ebx		/* nospec   rbx */
 	pushq   %rbp                    /* pt_regs->rbp */
 	xorl	%ebp, %ebp		/* nospec   rbp */
 	pushq   %r12                    /* pt_regs->r12 */
-	xorq	%r12, %r12		/* nospec   r12 */
+	xorl	%r12d, %r12d		/* nospec   r12 */
 	pushq   %r13                    /* pt_regs->r13 */
-	xorq	%r13, %r13		/* nospec   r13 */
+	xorl	%r13d, %r13d		/* nospec   r13 */
 	pushq   %r14                    /* pt_regs->r14 */
-	xorq	%r14, %r14		/* nospec   r14 */
+	xorl	%r14d, %r14d		/* nospec   r14 */
 	pushq   %r15                    /* pt_regs->r15 */
-	xorq	%r15, %r15		/* nospec   r15 */
+	xorl	%r15d, %r15d		/* nospec   r15 */
 	cld
 
 	/*

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [tip:x86/pti] x86/entry: Reduce the code footprint of the 'idtentry' macro
  2018-02-17 11:40   ` [tip:x86/pti] x86/entry: Reduce the code footprint of the 'idtentry' macro tip-bot for Dominik Brodowski
@ 2018-02-17 12:28     ` Dominik Brodowski
  0 siblings, 0 replies; 8+ messages in thread
From: Dominik Brodowski @ 2018-02-17 12:28 UTC (permalink / raw)
  To: arjan, gregkh, mingo, dan.j.williams, luto, bp, torvalds, dwmw2,
	hpa, dave.hansen, jpoimboe, linux-kernel, peterz, tglx
  Cc: linux-tip-commits

On Sat, Feb 17, 2018 at 03:40:13AM -0800, tip-bot for Dominik Brodowski wrote:
> Commit-ID:  9e809d15d6b692fa061d74be7aaab1c79f6784b8
> Gitweb:     https://git.kernel.org/tip/9e809d15d6b692fa061d74be7aaab1c79f6784b8
> Author:     Dominik Brodowski <linux@dominikbrodowski.net>
> AuthorDate: Wed, 14 Feb 2018 18:59:23 +0100
> Committer:  Ingo Molnar <mingo@kernel.org>
> CommitDate: Sat, 17 Feb 2018 11:14:33 +0100
> 
> x86/entry: Reduce the code footprint of the 'idtentry' macro
> 
> Play a little trick in the generic PUSH_AND_CLEAR_REGS macro
> to insert the GP registers "above" the original return address.
> 
> This allows us to (re-)insert the macro in error_entry() and
> paranoid_entry() and to remove it from the idtentry macro. This
> reduces the static footprint significantly:
> 
>    text	   data	    bss	    dec	    hex	filename
>   24307	      0	      0	  24307	   5ef3	entry_64.o-orig
>   20987	      0	      0	  20987	   51fb	entry_64.o
> 
> Co-developed-by: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
> Cc: Andy Lutomirski <luto@kernel.org>
> Cc: Arjan van de Ven <arjan@linux.intel.com>
> Cc: Borislav Petkov <bp@alien8.de>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Dave Hansen <dave.hansen@linux.intel.com>
> Cc: David Woodhouse <dwmw2@infradead.org>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Cc: Josh Poimboeuf <jpoimboe@redhat.com>
> Cc: Linus Torvalds <torvalds@linux-foundation.org>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Link: http://lkml.kernel.org/r/20180214175924.23065-2-linux@dominikbrodowski.net
> [ Small tweaks to comments. ]
> Signed-off-by: Ingo Molnar <mingo@kernel.org>

Thanks! Just a minor thing:

> - * Switch gs if needed.
> + * Save all registers in pt_regs, and switch gs if needed.

...

> - * Switch gs if needed.
> + * Save all registers in pt_regs, and switch GS if needed.

If we switch from gs to GS, we should do it in both places.

Thanks!

	Dominik

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] x86/entry/64: use xorl for register clearing
  2018-02-14 18:21 [PATCH 2/2] x86/entry/64: use xorl for " Alexey Dobriyan
@ 2018-02-14 18:27 ` Dominik Brodowski
  0 siblings, 0 replies; 8+ messages in thread
From: Dominik Brodowski @ 2018-02-14 18:27 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: linux-kernel

On Wed, Feb 14, 2018 at 09:21:12PM +0300, Alexey Dobriyan wrote:
> > -	xorq    %r8, %r8	/* nospec   r8 */
> > +	xorl	%r8d, %r8d	/* nospec   r8 */
> 
> The suffix should be simply dropped as operand size is unambigious.
> It is just one more character than necessary on the screen.

No strong feelings about this issue, but I prefer it to be explicit. And
that's what seems to be the standard in arch/x86/entry/ .

Thanks,
	Dominik

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] x86/entry/64: use xorl for register clearing
@ 2018-02-14 18:21 Alexey Dobriyan
  2018-02-14 18:27 ` Dominik Brodowski
  0 siblings, 1 reply; 8+ messages in thread
From: Alexey Dobriyan @ 2018-02-14 18:21 UTC (permalink / raw)
  To: linux; +Cc: linux-kernel

> -	xorq    %r8, %r8	/* nospec   r8 */
> +	xorl	%r8d, %r8d	/* nospec   r8 */

The suffix should be simply dropped as operand size is unambigious.
It is just one more character than necessary on the screen.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-02-17 12:28 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-14 17:59 [PATCH 0/2] x86/entry: xorq->xorl; idtentry size reduction Dominik Brodowski
2018-02-14 17:59 ` [PATCH 1/2] x86/entry: reduce static footprint of idtentry Dominik Brodowski
2018-02-17 11:40   ` [tip:x86/pti] x86/entry: Reduce the code footprint of the 'idtentry' macro tip-bot for Dominik Brodowski
2018-02-17 12:28     ` Dominik Brodowski
2018-02-14 17:59 ` [PATCH 2/2] x86/entry/64: use xorl for register clearing Dominik Brodowski
2018-02-17 11:40   ` [tip:x86/pti] x86/entry/64: Use 'xorl' for faster " tip-bot for Dominik Brodowski
2018-02-14 18:21 [PATCH 2/2] x86/entry/64: use xorl for " Alexey Dobriyan
2018-02-14 18:27 ` Dominik Brodowski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.