All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] x86: entry.S cleanup
@ 2015-01-08 16:25 Denys Vlasenko
  2015-01-08 16:25 ` [PATCH 1/4] x86: entry_64.S: delete unused code Denys Vlasenko
                   ` (3 more replies)
  0 siblings, 4 replies; 29+ messages in thread
From: Denys Vlasenko @ 2015-01-08 16:25 UTC (permalink / raw)
  To: linux-kernel
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Andy Lutomirski, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

This is the first part of a bigger patch set I posted last August,
then under title of "x86: entry.S optimizations".

I fully rebased that patch set to Linus tree, but currently
Linus and -next are diverging a bit in arch/x86/kernel/entry_64.S

To make it easier for mainteiners, I'm splitting the old patch set
into more digestible parts.

This is the first set. These four patches apply both to Linus and -next.
They do not contain any substantial code changes.
Patches 2 and 4 don't result in any actual code changes.

CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org

Denys Vlasenko (4):
  x86: entry_64.S: delete unused code
  x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET
  x86: open-code register save/restore in trace_hardirqs thunks
  x86: entry_64.S: fold SAVE_ARGS_IRQ macro into its sole user

 arch/x86/ia32/ia32entry.S      |   4 +-
 arch/x86/include/asm/calling.h |   1 -
 arch/x86/kernel/entry_64.S     | 122 ++++++++++++++---------------------------
 arch/x86/lib/thunk_64.S        |  29 ++++++++--
 4 files changed, 68 insertions(+), 88 deletions(-)

-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 1/4] x86: entry_64.S: delete unused code
  2015-01-08 16:25 [PATCH 0/4] x86: entry.S cleanup Denys Vlasenko
@ 2015-01-08 16:25 ` Denys Vlasenko
  2015-01-08 18:16   ` Borislav Petkov
  2015-01-08 16:25 ` [PATCH 2/4] x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET Denys Vlasenko
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 29+ messages in thread
From: Denys Vlasenko @ 2015-01-08 16:25 UTC (permalink / raw)
  To: linux-kernel
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Andy Lutomirski, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

A define, two macros and an unreferenced bit of assembly are gone.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
---
 arch/x86/include/asm/calling.h |  1 -
 arch/x86/kernel/entry_64.S     | 34 ----------------------------------
 2 files changed, 35 deletions(-)

diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 76659b6..1f1297b 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -83,7 +83,6 @@ For 32-bit we have the following conventions - kernel is built with
 #define SS		160
 
 #define ARGOFFSET	R11
-#define SWFRAME		ORIG_RAX
 
 	.macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
 	subq  $9*8+\addskip, %rsp
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 931f32f..5ed4773 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -155,27 +155,6 @@ ENDPROC(native_usergs_sysret64)
 	movq \tmp,R11+\offset(%rsp)
 	.endm
 
-	.macro FAKE_STACK_FRAME child_rip
-	/* push in order ss, rsp, eflags, cs, rip */
-	xorl %eax, %eax
-	pushq_cfi $__KERNEL_DS /* ss */
-	/*CFI_REL_OFFSET	ss,0*/
-	pushq_cfi %rax /* rsp */
-	CFI_REL_OFFSET	rsp,0
-	pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) /* eflags - interrupts on */
-	/*CFI_REL_OFFSET	rflags,0*/
-	pushq_cfi $__KERNEL_CS /* cs */
-	/*CFI_REL_OFFSET	cs,0*/
-	pushq_cfi \child_rip /* rip */
-	CFI_REL_OFFSET	rip,0
-	pushq_cfi %rax /* orig rax */
-	.endm
-
-	.macro UNFAKE_STACK_FRAME
-	addq $8*6, %rsp
-	CFI_ADJUST_CFA_OFFSET	-(6*8)
-	.endm
-
 /*
  * initial frame state for interrupts (and exceptions without error code)
  */
@@ -626,19 +605,6 @@ END(\label)
 	FORK_LIKE  vfork
 	FIXED_FRAME stub_iopl, sys_iopl
 
-ENTRY(ptregscall_common)
-	DEFAULT_FRAME 1 8	/* offset 8: return address */
-	RESTORE_TOP_OF_STACK %r11, 8
-	movq_cfi_restore R15+8, r15
-	movq_cfi_restore R14+8, r14
-	movq_cfi_restore R13+8, r13
-	movq_cfi_restore R12+8, r12
-	movq_cfi_restore RBP+8, rbp
-	movq_cfi_restore RBX+8, rbx
-	ret $REST_SKIP		/* pop extended registers */
-	CFI_ENDPROC
-END(ptregscall_common)
-
 ENTRY(stub_execve)
 	CFI_STARTPROC
 	addq $8, %rsp
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 2/4] x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET
  2015-01-08 16:25 [PATCH 0/4] x86: entry.S cleanup Denys Vlasenko
  2015-01-08 16:25 ` [PATCH 1/4] x86: entry_64.S: delete unused code Denys Vlasenko
@ 2015-01-08 16:25 ` Denys Vlasenko
  2015-01-09 10:41   ` Borislav Petkov
  2015-01-08 16:25 ` [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks Denys Vlasenko
  2015-01-08 16:25 ` [PATCH 4/4] x86: entry_64.S: fold SAVE_ARGS_IRQ macro into its sole user Denys Vlasenko
  3 siblings, 1 reply; 29+ messages in thread
From: Denys Vlasenko @ 2015-01-08 16:25 UTC (permalink / raw)
  To: linux-kernel
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Andy Lutomirski, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

The values of these two constants are the same, the meaning is different.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/ia32/ia32entry.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 82e8a1d..156ebca 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -179,8 +179,8 @@ sysenter_dispatch:
 sysexit_from_sys_call:
 	andl    $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	/* clear IF, that popfq doesn't enable interrupts early */
-	andl  $~0x200,EFLAGS-R11(%rsp) 
-	movl	RIP-R11(%rsp),%edx		/* User %eip */
+	andl	$~0x200,EFLAGS-ARGOFFSET(%rsp)
+	movl	RIP-ARGOFFSET(%rsp),%edx		/* User %eip */
 	CFI_REGISTER rip,rdx
 	RESTORE_ARGS 0,24,0,0,0,0
 	xorq	%r8,%r8
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-08 16:25 [PATCH 0/4] x86: entry.S cleanup Denys Vlasenko
  2015-01-08 16:25 ` [PATCH 1/4] x86: entry_64.S: delete unused code Denys Vlasenko
  2015-01-08 16:25 ` [PATCH 2/4] x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET Denys Vlasenko
@ 2015-01-08 16:25 ` Denys Vlasenko
  2015-01-09 10:55   ` Borislav Petkov
  2015-01-09 12:19   ` Borislav Petkov
  2015-01-08 16:25 ` [PATCH 4/4] x86: entry_64.S: fold SAVE_ARGS_IRQ macro into its sole user Denys Vlasenko
  3 siblings, 2 replies; 29+ messages in thread
From: Denys Vlasenko @ 2015-01-08 16:25 UTC (permalink / raw)
  To: linux-kernel
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Andy Lutomirski, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

This is a preparatory patch for change in "struct pt_regs"
handling in entry_64.S.

trace_hardirqs thunks were (ab)using a part of pt_regs
handling code, namely SAVE_ARGS/RESTORE_ARGS macros,
to save/restore registers across C function calls.

Since SAVE_ARGS is going to be changed, open-code
register saving/restoring here.

Incidentally, this removes a bit of dead code:
one SAVE_ARGS was used just to emit a CFI annotation,
but it also generated unreachable assembly insns.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/lib/thunk_64.S | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index b30b5eb..03a3883 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -16,10 +16,20 @@
 \name:
 	CFI_STARTPROC
 
-	/* this one pushes 9 elems, the next one would be %rIP */
-	SAVE_ARGS
+	subq  $9*8, %rsp
+	CFI_ADJUST_CFA_OFFSET 9*8
+	movq_cfi r11, 0*8
+	movq_cfi r10, 1*8
+	movq_cfi r9,  2*8
+	movq_cfi r8,  3*8
+	movq_cfi rax, 4*8
+	movq_cfi rcx, 5*8
+	movq_cfi rdx, 6*8
+	movq_cfi rsi, 7*8
+	movq_cfi rdi, 8*8
 
 	.if \put_ret_addr_in_rdi
+	/* 9*8(%rsp) is return addr on stack */
 	movq_cfi_restore 9*8, rdi
 	.endif
 
@@ -45,11 +55,20 @@
 #endif
 #endif
 
-	/* SAVE_ARGS below is used only for the .cfi directives it contains. */
 	CFI_STARTPROC
-	SAVE_ARGS
+	CFI_ADJUST_CFA_OFFSET 9*8
 restore:
-	RESTORE_ARGS
+	movq_cfi_restore 0*8, r11
+	movq_cfi_restore 1*8, r10
+	movq_cfi_restore 2*8, r9
+	movq_cfi_restore 3*8, r8
+	movq_cfi_restore 4*8, rax
+	movq_cfi_restore 5*8, rcx
+	movq_cfi_restore 6*8, rdx
+	movq_cfi_restore 7*8, rsi
+	movq_cfi_restore 8*8, rdi
+	addq 9*8, %rsp
+	CFI_ADJUST_CFA_OFFSET -9*8
 	ret
 	CFI_ENDPROC
 	_ASM_NOKPROBE(restore)
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 4/4] x86: entry_64.S: fold SAVE_ARGS_IRQ macro into its sole user
  2015-01-08 16:25 [PATCH 0/4] x86: entry.S cleanup Denys Vlasenko
                   ` (2 preceding siblings ...)
  2015-01-08 16:25 ` [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks Denys Vlasenko
@ 2015-01-08 16:25 ` Denys Vlasenko
  3 siblings, 0 replies; 29+ messages in thread
From: Denys Vlasenko @ 2015-01-08 16:25 UTC (permalink / raw)
  To: linux-kernel
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Andy Lutomirski, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

No code changes.

This is a preparatory patch for change in "struct pt_regs" handling.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/kernel/entry_64.S | 88 ++++++++++++++++++++++------------------------
 1 file changed, 42 insertions(+), 46 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 5ed4773..7d59df2 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -217,51 +217,6 @@ ENDPROC(native_usergs_sysret64)
 	CFI_REL_OFFSET r15, R15+\offset
 	.endm
 
-/* save partial stack frame */
-	.macro SAVE_ARGS_IRQ
-	cld
-	/* start from rbp in pt_regs and jump over */
-	movq_cfi rdi, (RDI-RBP)
-	movq_cfi rsi, (RSI-RBP)
-	movq_cfi rdx, (RDX-RBP)
-	movq_cfi rcx, (RCX-RBP)
-	movq_cfi rax, (RAX-RBP)
-	movq_cfi  r8,  (R8-RBP)
-	movq_cfi  r9,  (R9-RBP)
-	movq_cfi r10, (R10-RBP)
-	movq_cfi r11, (R11-RBP)
-
-	/* Save rbp so that we can unwind from get_irq_regs() */
-	movq_cfi rbp, 0
-
-	/* Save previous stack value */
-	movq %rsp, %rsi
-
-	leaq -RBP(%rsp),%rdi	/* arg1 for handler */
-	testl $3, CS-RBP(%rsi)
-	je 1f
-	SWAPGS
-	/*
-	 * irq_count is used to check if a CPU is already on an interrupt stack
-	 * or not. While this is essentially redundant with preempt_count it is
-	 * a little cheaper to use a separate counter in the PDA (short of
-	 * moving irq_enter into assembly, which would be too much work)
-	 */
-1:	incl PER_CPU_VAR(irq_count)
-	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
-	CFI_DEF_CFA_REGISTER	rsi
-
-	/* Store previous stack value */
-	pushq %rsi
-	CFI_ESCAPE	0x0f /* DW_CFA_def_cfa_expression */, 6, \
-			0x77 /* DW_OP_breg7 */, 0, \
-			0x06 /* DW_OP_deref */, \
-			0x08 /* DW_OP_const1u */, SS+8-RBP, \
-			0x22 /* DW_OP_plus */
-	/* We entered an interrupt context - irqs are off: */
-	TRACE_IRQS_OFF
-	.endm
-
 ENTRY(save_paranoid)
 	XCPT_FRAME 1 RDI+8
 	cld
@@ -745,7 +700,48 @@ END(interrupt)
 	/* reserve pt_regs for scratch regs and rbp */
 	subq $ORIG_RAX-RBP, %rsp
 	CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
-	SAVE_ARGS_IRQ
+	cld
+	/* start from rbp in pt_regs and jump over */
+	movq_cfi rdi, (RDI-RBP)
+	movq_cfi rsi, (RSI-RBP)
+	movq_cfi rdx, (RDX-RBP)
+	movq_cfi rcx, (RCX-RBP)
+	movq_cfi rax, (RAX-RBP)
+	movq_cfi  r8,  (R8-RBP)
+	movq_cfi  r9,  (R9-RBP)
+	movq_cfi r10, (R10-RBP)
+	movq_cfi r11, (R11-RBP)
+
+	/* Save rbp so that we can unwind from get_irq_regs() */
+	movq_cfi rbp, 0
+
+	/* Save previous stack value */
+	movq %rsp, %rsi
+
+	leaq -RBP(%rsp),%rdi	/* arg1 for handler */
+	testl $3, CS-RBP(%rsi)
+	je 1f
+	SWAPGS
+	/*
+	 * irq_count is used to check if a CPU is already on an interrupt stack
+	 * or not. While this is essentially redundant with preempt_count it is
+	 * a little cheaper to use a separate counter in the PDA (short of
+	 * moving irq_enter into assembly, which would be too much work)
+	 */
+1:	incl PER_CPU_VAR(irq_count)
+	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
+	CFI_DEF_CFA_REGISTER	rsi
+
+	/* Store previous stack value */
+	pushq %rsi
+	CFI_ESCAPE	0x0f /* DW_CFA_def_cfa_expression */, 6, \
+			0x77 /* DW_OP_breg7 */, 0, \
+			0x06 /* DW_OP_deref */, \
+			0x08 /* DW_OP_const1u */, SS+8-RBP, \
+			0x22 /* DW_OP_plus */
+	/* We entered an interrupt context - irqs are off: */
+	TRACE_IRQS_OFF
+
 	call \func
 	.endm
 
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/4] x86: entry_64.S: delete unused code
  2015-01-08 16:25 ` [PATCH 1/4] x86: entry_64.S: delete unused code Denys Vlasenko
@ 2015-01-08 18:16   ` Borislav Petkov
  2015-01-13 22:01     ` Andy Lutomirski
  0 siblings, 1 reply; 29+ messages in thread
From: Borislav Petkov @ 2015-01-08 18:16 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: linux-kernel, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Andy Lutomirski, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Thu, Jan 08, 2015 at 05:25:12PM +0100, Denys Vlasenko wrote:
> A define, two macros and an unreferenced bit of assembly are gone.
> 
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Linus Torvalds <torvalds@linux-foundation.org>
> CC: Oleg Nesterov <oleg@redhat.com>
> CC: "H. Peter Anvin" <hpa@zytor.com>
> CC: Andy Lutomirski <luto@amacapital.net>
> CC: Frederic Weisbecker <fweisbec@gmail.com>
> CC: X86 ML <x86@kernel.org>
> CC: Alexei Starovoitov <ast@plumgrid.com>
> CC: Will Drewry <wad@chromium.org>
> CC: Kees Cook <keescook@chromium.org>
> CC: linux-kernel@vger.kernel.org
> 
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>

Acked-by: Borislav Petkov <bp@suse.de>

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/4] x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET
  2015-01-08 16:25 ` [PATCH 2/4] x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET Denys Vlasenko
@ 2015-01-09 10:41   ` Borislav Petkov
  0 siblings, 0 replies; 29+ messages in thread
From: Borislav Petkov @ 2015-01-09 10:41 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: linux-kernel, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Andy Lutomirski, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Thu, Jan 08, 2015 at 05:25:13PM +0100, Denys Vlasenko wrote:
> The values of these two constants are the same, the meaning is different.
> 
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Linus Torvalds <torvalds@linux-foundation.org>
> CC: Oleg Nesterov <oleg@redhat.com>
> CC: "H. Peter Anvin" <hpa@zytor.com>
> CC: Andy Lutomirski <luto@amacapital.net>
> CC: Frederic Weisbecker <fweisbec@gmail.com>
> CC: X86 ML <x86@kernel.org>
> CC: Alexei Starovoitov <ast@plumgrid.com>
> CC: Will Drewry <wad@chromium.org>
> CC: Kees Cook <keescook@chromium.org>
> CC: linux-kernel@vger.kernel.org
> ---

Acked-by: Borislav Petkov <bp@suse.de>

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-08 16:25 ` [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks Denys Vlasenko
@ 2015-01-09 10:55   ` Borislav Petkov
  2015-01-09 20:29     ` Denys Vlasenko
  2015-01-09 12:19   ` Borislav Petkov
  1 sibling, 1 reply; 29+ messages in thread
From: Borislav Petkov @ 2015-01-09 10:55 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: linux-kernel, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Andy Lutomirski, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Thu, Jan 08, 2015 at 05:25:14PM +0100, Denys Vlasenko wrote:
> This is a preparatory patch for change in "struct pt_regs"
> handling in entry_64.S.
> 
> trace_hardirqs thunks were (ab)using a part of pt_regs
> handling code, namely SAVE_ARGS/RESTORE_ARGS macros,
> to save/restore registers across C function calls.
> 
> Since SAVE_ARGS is going to be changed, open-code
> register saving/restoring here.
> 
> Incidentally, this removes a bit of dead code:
> one SAVE_ARGS was used just to emit a CFI annotation,
> but it also generated unreachable assembly insns.
> 
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Linus Torvalds <torvalds@linux-foundation.org>
> CC: Oleg Nesterov <oleg@redhat.com>
> CC: "H. Peter Anvin" <hpa@zytor.com>
> CC: Andy Lutomirski <luto@amacapital.net>
> CC: Frederic Weisbecker <fweisbec@gmail.com>
> CC: X86 ML <x86@kernel.org>
> CC: Alexei Starovoitov <ast@plumgrid.com>
> CC: Will Drewry <wad@chromium.org>
> CC: Kees Cook <keescook@chromium.org>
> CC: linux-kernel@vger.kernel.org
> ---
>  arch/x86/lib/thunk_64.S | 29 ++++++++++++++++++++++++-----
>  1 file changed, 24 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
> index b30b5eb..03a3883 100644
> --- a/arch/x86/lib/thunk_64.S
> +++ b/arch/x86/lib/thunk_64.S
> @@ -16,10 +16,20 @@
>  \name:
>  	CFI_STARTPROC
>  
> -	/* this one pushes 9 elems, the next one would be %rIP */
> -	SAVE_ARGS
> +	subq  $9*8, %rsp
> +	CFI_ADJUST_CFA_OFFSET 9*8
> +	movq_cfi r11, 0*8
> +	movq_cfi r10, 1*8
> +	movq_cfi r9,  2*8
> +	movq_cfi r8,  3*8
> +	movq_cfi rax, 4*8
> +	movq_cfi rcx, 5*8
> +	movq_cfi rdx, 6*8
> +	movq_cfi rsi, 7*8
> +	movq_cfi rdi, 8*8
>  
>  	.if \put_ret_addr_in_rdi
> +	/* 9*8(%rsp) is return addr on stack */
>  	movq_cfi_restore 9*8, rdi
>  	.endif
>  
> @@ -45,11 +55,20 @@
>  #endif
>  #endif
>  
> -	/* SAVE_ARGS below is used only for the .cfi directives it contains. */
>  	CFI_STARTPROC
> -	SAVE_ARGS
> +	CFI_ADJUST_CFA_OFFSET 9*8
>  restore:
> -	RESTORE_ARGS
> +	movq_cfi_restore 0*8, r11
> +	movq_cfi_restore 1*8, r10
> +	movq_cfi_restore 2*8, r9
> +	movq_cfi_restore 3*8, r8
> +	movq_cfi_restore 4*8, rax
> +	movq_cfi_restore 5*8, rcx
> +	movq_cfi_restore 6*8, rdx
> +	movq_cfi_restore 7*8, rsi
> +	movq_cfi_restore 8*8, rdi
> +	addq 9*8, %rsp
> +	CFI_ADJUST_CFA_OFFSET -9*8

The only nitpick I'd have with this is can we keep the register
saving/restoring order in the code the same as in the SAVE_/RESTORE_ARGS
macros?

SAVE_ARGS starts with the highest offset 9*8, rdi and ends at 0*8 and
r11 and RESTORE_ARGS does that in reverse.

Also, can you post the struct pt_regs change too so that we know where
this is going?

Thanks.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-08 16:25 ` [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks Denys Vlasenko
  2015-01-09 10:55   ` Borislav Petkov
@ 2015-01-09 12:19   ` Borislav Petkov
  2015-01-09 18:54     ` Denys Vlasenko
  1 sibling, 1 reply; 29+ messages in thread
From: Borislav Petkov @ 2015-01-09 12:19 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: linux-kernel, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Andy Lutomirski, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Thu, Jan 08, 2015 at 05:25:14PM +0100, Denys Vlasenko wrote:
> This is a preparatory patch for change in "struct pt_regs"
> handling in entry_64.S.
> 
> trace_hardirqs thunks were (ab)using a part of pt_regs
> handling code, namely SAVE_ARGS/RESTORE_ARGS macros,
> to save/restore registers across C function calls.
> 
> Since SAVE_ARGS is going to be changed, open-code
> register saving/restoring here.
> 
> Incidentally, this removes a bit of dead code:
> one SAVE_ARGS was used just to emit a CFI annotation,
> but it also generated unreachable assembly insns.
> 
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Linus Torvalds <torvalds@linux-foundation.org>
> CC: Oleg Nesterov <oleg@redhat.com>
> CC: "H. Peter Anvin" <hpa@zytor.com>
> CC: Andy Lutomirski <luto@amacapital.net>
> CC: Frederic Weisbecker <fweisbec@gmail.com>
> CC: X86 ML <x86@kernel.org>
> CC: Alexei Starovoitov <ast@plumgrid.com>
> CC: Will Drewry <wad@chromium.org>
> CC: Kees Cook <keescook@chromium.org>
> CC: linux-kernel@vger.kernel.org
> ---
>  arch/x86/lib/thunk_64.S | 29 ++++++++++++++++++++++++-----
>  1 file changed, 24 insertions(+), 5 deletions(-)

Hmm, this patch breaks booting my kvm guest: it stops booting at some
point and restarts itself after a couple of seconds.

The monitor says rIP points to ffffffff8167ae30 which is this:

ffffffff8167ae30 <async_page_fault>:
ffffffff8167ae30:       ff 15 fa 62 31 00       callq  *0x3162fa(%rip)        # ffffffff81991130 <pv_irq_ops+0x30>
ffffffff8167ae36:       48 83 ec 78             sub    $0x78,%rsp
ffffffff8167ae3a:       e8 d1 01 00 00          callq  ffffffff8167b010 <error_entry>
ffffffff8167ae3f:       48 89 e7                mov    %rsp,%rdi
ffffffff8167ae42:       48 8b 74 24 78          mov    0x78(%rsp),%rsi
ffffffff8167ae47:       48 c7 44 24 78 ff ff    movq   $0xffffffffffffffff,0x78(%rsp)
ffffffff8167ae4e:       ff ff 
ffffffff8167ae50:       e8 9b 9e 9c ff          callq  ffffffff81044cf0 <do_async_page_fault>
ffffffff8167ae55:       e9 76 02 00 00          jmpq   ffffffff8167b0d0 <error_exit>
ffffffff8167ae5a:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)

HTH.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-09 12:19   ` Borislav Petkov
@ 2015-01-09 18:54     ` Denys Vlasenko
  2015-01-10 14:23       ` Borislav Petkov
  0 siblings, 1 reply; 29+ messages in thread
From: Denys Vlasenko @ 2015-01-09 18:54 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Denys Vlasenko, Linux Kernel Mailing List, Linus Torvalds,
	Oleg Nesterov, H. Peter Anvin, Andy Lutomirski,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

Hi Borislav, thank you for testing and finding it.

On Fri, Jan 9, 2015 at 1:19 PM, Borislav Petkov <bp@alien8.de> wrote:
> Hmm, this patch breaks booting my kvm guest: it stops booting at some
> point and restarts itself after a couple of seconds.
>
> The monitor says rIP points to ffffffff8167ae30 which is this:
>
> ffffffff8167ae30 <async_page_fault>:
> ffffffff8167ae30:       ff 15 fa 62 31 00       callq  *0x3162fa(%rip)        # ffffffff81991130 <pv_irq_ops+0x30>
> ffffffff8167ae36:       48 83 ec 78             sub    $0x78,%rsp
> ffffffff8167ae3a:       e8 d1 01 00 00          callq  ffffffff8167b010 <error_entry>
> ffffffff8167ae3f:       48 89 e7                mov    %rsp,%rdi
> ffffffff8167ae42:       48 8b 74 24 78          mov    0x78(%rsp),%rsi
> ffffffff8167ae47:       48 c7 44 24 78 ff ff    movq   $0xffffffffffffffff,0x78(%rsp)
> ffffffff8167ae4e:       ff ff
> ffffffff8167ae50:       e8 9b 9e 9c ff          callq  ffffffff81044cf0 <do_async_page_fault>
> ffffffff8167ae55:       e9 76 02 00 00          jmpq   ffffffff8167b0d0 <error_exit>
> ffffffff8167ae5a:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)

I just looked at disassembly of thunk_64.o
before and after the patch. Here's what I see:

Before:

Disassembly of section .text:
0000000000000000 <restore-0x30>:
   0:   48 83 ec 48             sub    $0x48,%rsp
   4:   48 89 7c 24 40          mov    %rdi,0x40(%rsp)
   9:   48 89 74 24 38          mov    %rsi,0x38(%rsp)
   e:   48 89 54 24 30          mov    %rdx,0x30(%rsp)
  13:   48 89 4c 24 28          mov    %rcx,0x28(%rsp)
  18:   48 89 44 24 20          mov    %rax,0x20(%rsp)
  1d:   4c 89 44 24 18          mov    %r8,0x18(%rsp)
  22:   4c 89 4c 24 10          mov    %r9,0x10(%rsp)
  27:   4c 89 54 24 08          mov    %r10,0x8(%rsp)
  2c:   4c 89 1c 24             mov    %r11,(%rsp)
0000000000000030 <restore>:
  30:   4c 8b 1c 24             mov    (%rsp),%r11
  34:   4c 8b 54 24 08          mov    0x8(%rsp),%r10
  39:   4c 8b 4c 24 10          mov    0x10(%rsp),%r9
  3e:   4c 8b 44 24 18          mov    0x18(%rsp),%r8
  43:   48 8b 44 24 20          mov    0x20(%rsp),%rax
  48:   48 8b 4c 24 28          mov    0x28(%rsp),%rcx
  4d:   48 8b 54 24 30          mov    0x30(%rsp),%rdx
  52:   48 8b 74 24 38          mov    0x38(%rsp),%rsi
  57:   48 8b 7c 24 40          mov    0x40(%rsp),%rdi
  5c:   48 83 c4 48             add    $0x48,%rsp
  60:   c3                      retq

After:

Disassembly of section .text:
0000000000000000 <restore>:
   0:   4c 8b 1c 24             mov    (%rsp),%r11
   4:   4c 8b 54 24 08          mov    0x8(%rsp),%r10
   9:   4c 8b 4c 24 10          mov    0x10(%rsp),%r9
   e:   4c 8b 44 24 18          mov    0x18(%rsp),%r8
  13:   48 8b 44 24 20          mov    0x20(%rsp),%rax
  18:   48 8b 4c 24 28          mov    0x28(%rsp),%rcx
  1d:   48 8b 54 24 30          mov    0x30(%rsp),%rdx
  22:   48 8b 74 24 38          mov    0x38(%rsp),%rsi
  27:   48 8b 7c 24 40          mov    0x40(%rsp),%rdi
  2c:   48 03 24 25 48 00 00    add    0x48,%rsp
  33:   00
  34:   c3                      retq

IOW, my patch, on the level of generated assembly, results only in removal
of unreachable "SAVE_ARGS" thing.

I looked into git history all the way back to 2005. The part

+       /* SAVE_ARGS below is used only for the .cfi directives it contains. */
+       CFI_STARTPROC
+       SAVE_ARGS
+restore:

was there in the very first git commit.

I don't see how this SAVE_ARGS can affect anything. It *is* unreachable,
right?

Does kvm guest code really parse and use CFI data in its operation?
That's the only way the breakage can be explained.

In order to narrow it down, can you, instead of my patch, try
just deleting this one line, and see whether breakage appears?

Thanks!
-- 
vda

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-09 10:55   ` Borislav Petkov
@ 2015-01-09 20:29     ` Denys Vlasenko
  2015-01-10 13:52       ` Borislav Petkov
  0 siblings, 1 reply; 29+ messages in thread
From: Denys Vlasenko @ 2015-01-09 20:29 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Denys Vlasenko, Linux Kernel Mailing List, Linus Torvalds,
	Oleg Nesterov, H. Peter Anvin, Andy Lutomirski,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On Fri, Jan 9, 2015 at 11:55 AM, Borislav Petkov <bp@alien8.de> wrote:
> Also, can you post the struct pt_regs change too so that we know where
> this is going?

I'm concerned that posting too many patches at once
actually lowers the chances of them being reviewed/accepted.

For example, this probably is the reason why they fell through last time.

Since you expressed interest in seeing them, I'm going to send
all remaining patches to you off-list. Thanks.
-- 
vda

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-09 20:29     ` Denys Vlasenko
@ 2015-01-10 13:52       ` Borislav Petkov
  0 siblings, 0 replies; 29+ messages in thread
From: Borislav Petkov @ 2015-01-10 13:52 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Denys Vlasenko, Linux Kernel Mailing List, Linus Torvalds,
	Oleg Nesterov, H. Peter Anvin, Andy Lutomirski,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On Fri, Jan 09, 2015 at 09:29:10PM +0100, Denys Vlasenko wrote:
> On Fri, Jan 9, 2015 at 11:55 AM, Borislav Petkov <bp@alien8.de> wrote:
> > Also, can you post the struct pt_regs change too so that we know where
> > this is going?
> 
> I'm concerned that posting too many patches at once
> actually lowers the chances of them being reviewed/accepted.

Yeah, especially if they touch entry_64.S :-)

> For example, this probably is the reason why they fell through last time.
> 
> Since you expressed interest in seeing them, I'm going to send
> all remaining patches to you off-list. Thanks.

Thanks, I'll take a look.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-09 18:54     ` Denys Vlasenko
@ 2015-01-10 14:23       ` Borislav Petkov
  2015-01-10 20:14         ` Denys Vlasenko
  0 siblings, 1 reply; 29+ messages in thread
From: Borislav Petkov @ 2015-01-10 14:23 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Denys Vlasenko, Linux Kernel Mailing List, Linus Torvalds,
	Oleg Nesterov, H. Peter Anvin, Andy Lutomirski,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On Fri, Jan 09, 2015 at 07:54:08PM +0100, Denys Vlasenko wrote:
> In order to narrow it down, can you, instead of my patch, try
> just deleting this one line, and see whether breakage appears?

Yep, this works:

---
Index: b/arch/x86/lib/thunk_64.S
===================================================================
--- a/arch/x86/lib/thunk_64.S	2015-01-10 15:03:10.262716858 +0100
+++ b/arch/x86/lib/thunk_64.S	2015-01-10 15:12:53.502730396 +0100
@@ -47,7 +47,6 @@
 
 	/* SAVE_ARGS below is used only for the .cfi directives it contains. */
 	CFI_STARTPROC
-	SAVE_ARGS
 restore:
 	RESTORE_ARGS
 	ret
--

Bah, I see it. This nasty '$' gets forgotten a lot, maybe we should have
a check for that in some scripts :-)

Here's the fix:

---
Index: b/arch/x86/lib/thunk_64.S
===================================================================
--- a/arch/x86/lib/thunk_64.S	2015-01-10 15:18:04.418737613 +0100
+++ b/arch/x86/lib/thunk_64.S	2015-01-10 15:17:18.882736556 +0100
@@ -67,7 +67,7 @@ restore:
 	movq_cfi_restore 6*8, rdx
 	movq_cfi_restore 7*8, rsi
 	movq_cfi_restore 8*8, rdi
-	addq 9*8, %rsp
+	addq $9*8, %rsp
 	CFI_ADJUST_CFA_OFFSET -9*8
 	ret
 	CFI_ENDPROC
---

That boots!

:-D

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-10 14:23       ` Borislav Petkov
@ 2015-01-10 20:14         ` Denys Vlasenko
  2015-01-10 20:17           ` Andy Lutomirski
  2015-01-10 22:00           ` Borislav Petkov
  0 siblings, 2 replies; 29+ messages in thread
From: Denys Vlasenko @ 2015-01-10 20:14 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Denys Vlasenko, Linux Kernel Mailing List, Linus Torvalds,
	Oleg Nesterov, H. Peter Anvin, Andy Lutomirski,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

[-- Attachment #1: Type: text/plain, Size: 1000 bytes --]

On Sat, Jan 10, 2015 at 3:23 PM, Borislav Petkov <bp@alien8.de> wrote:
> Bah, I see it. This nasty '$' gets forgotten a lot, maybe we should have
> a check for that in some scripts :-)
>
> Here's the fix:
>
> ---
> Index: b/arch/x86/lib/thunk_64.S
> ===================================================================
> --- a/arch/x86/lib/thunk_64.S   2015-01-10 15:18:04.418737613 +0100
> +++ b/arch/x86/lib/thunk_64.S   2015-01-10 15:17:18.882736556 +0100
> @@ -67,7 +67,7 @@ restore:
>         movq_cfi_restore 6*8, rdx
>         movq_cfi_restore 7*8, rsi
>         movq_cfi_restore 8*8, rdi
> -       addq 9*8, %rsp
> +       addq $9*8, %rsp
>         CFI_ADJUST_CFA_OFFSET -9*8
>         ret

Thanks!

After I've seen the disassembly I myself posted, I can't help but wonder
why we use 5-byte instructions to store and load regs on stack when
pushes and pops are 1 or 2-byte long.

Especially that 32-bit code *does* use push/pops.

Can you test the attached patch with your kvm guest testcase?

[-- Attachment #2: 0003-x86-open-code-register-save-restore-in-trace_hardirq.patch --]
[-- Type: text/x-patch, Size: 3038 bytes --]

From 2f636e0a92db898f2bdb592027aa302fcb32a326 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
To: linux-kernel@vger.kernel.org
Subject: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks

This is a preparatory patch for change in "struct pt_regs"
handling in entry_64.S.

trace_hardirqs thunks were (ab)using a part of pt_regs
handling code, namely SAVE_ARGS/RESTORE_ARGS macros,
to save/restore registers across C function calls.

Since SAVE_ARGS is going to be changed, open-code
register saving/restoring here. Take a page from thunk_32.S
and use push/pop insns instead of movq, they are far shorter:
1 or 2 bytes versus 5, and no need for insns to adjust %rsp:

   text	   data	    bss	    dec	    hex	filename
    333	     40	      0	    373	    175	thunk_64_movq.o
    104	     40	      0	    144	     90	thunk_64_push_pop.o

Incidentally, this removes a bit of dead code:
one SAVE_ARGS was used just to emit a CFI annotation,
but it also generated unreachable assembly insns.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Borislav Petkov <bp@alien8.de>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/lib/thunk_64.S | 46 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index b30b5eb..8ec443a 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -17,9 +17,27 @@
 	CFI_STARTPROC
 
 	/* this one pushes 9 elems, the next one would be %rIP */
-	SAVE_ARGS
+	pushq_cfi %rdi
+	CFI_REL_OFFSET rdi, 0
+	pushq_cfi %rsi
+	CFI_REL_OFFSET rsi, 0
+	pushq_cfi %rdx
+	CFI_REL_OFFSET rdx, 0
+	pushq_cfi %rcx
+	CFI_REL_OFFSET rcx, 0
+	pushq_cfi %rax
+	CFI_REL_OFFSET rax, 0
+	pushq_cfi %r8
+	CFI_REL_OFFSET r8, 0
+	pushq_cfi %r9
+	CFI_REL_OFFSET r9, 0
+	pushq_cfi %r10
+	CFI_REL_OFFSET r10, 0
+	pushq_cfi %r11
+	CFI_REL_OFFSET r11, 0
 
 	.if \put_ret_addr_in_rdi
+	/* 9*8(%rsp) is return addr on stack */
 	movq_cfi_restore 9*8, rdi
 	.endif
 
@@ -45,11 +63,31 @@
 #endif
 #endif
 
-	/* SAVE_ARGS below is used only for the .cfi directives it contains. */
+#if defined(CONFIG_TRACE_IRQFLAGS) \
+ || defined(CONFIG_DEBUG_LOCK_ALLOC) \
+ || defined(CONFIG_PREEMPT)
 	CFI_STARTPROC
-	SAVE_ARGS
+	CFI_ADJUST_CFA_OFFSET 9*8
 restore:
-	RESTORE_ARGS
+	popq_cfi %r11
+	CFI_RESTORE r11
+	popq_cfi %r10
+	CFI_RESTORE r10
+	popq_cfi %r9
+	CFI_RESTORE r9
+	popq_cfi %r8
+	CFI_RESTORE r8
+	popq_cfi %rax
+	CFI_RESTORE rax
+	popq_cfi %rcx
+	CFI_RESTORE rcx
+	popq_cfi %rdx
+	CFI_RESTORE rdx
+	popq_cfi %rsi
+	CFI_RESTORE rsi
+	popq_cfi %rdi
+	CFI_RESTORE rdi
 	ret
 	CFI_ENDPROC
 	_ASM_NOKPROBE(restore)
+#endif
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-10 20:14         ` Denys Vlasenko
@ 2015-01-10 20:17           ` Andy Lutomirski
  2015-01-10 20:42             ` Borislav Petkov
                               ` (2 more replies)
  2015-01-10 22:00           ` Borislav Petkov
  1 sibling, 3 replies; 29+ messages in thread
From: Andy Lutomirski @ 2015-01-10 20:17 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Borislav Petkov, Denys Vlasenko, Linux Kernel Mailing List,
	Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On Sat, Jan 10, 2015 at 12:14 PM, Denys Vlasenko
<vda.linux@googlemail.com> wrote:
> On Sat, Jan 10, 2015 at 3:23 PM, Borislav Petkov <bp@alien8.de> wrote:
>> Bah, I see it. This nasty '$' gets forgotten a lot, maybe we should have
>> a check for that in some scripts :-)
>>
>> Here's the fix:
>>
>> ---
>> Index: b/arch/x86/lib/thunk_64.S
>> ===================================================================
>> --- a/arch/x86/lib/thunk_64.S   2015-01-10 15:18:04.418737613 +0100
>> +++ b/arch/x86/lib/thunk_64.S   2015-01-10 15:17:18.882736556 +0100
>> @@ -67,7 +67,7 @@ restore:
>>         movq_cfi_restore 6*8, rdx
>>         movq_cfi_restore 7*8, rsi
>>         movq_cfi_restore 8*8, rdi
>> -       addq 9*8, %rsp
>> +       addq $9*8, %rsp
>>         CFI_ADJUST_CFA_OFFSET -9*8
>>         ret
>
> Thanks!
>
> After I've seen the disassembly I myself posted, I can't help but wonder
> why we use 5-byte instructions to store and load regs on stack when
> pushes and pops are 1 or 2-byte long.
>

I asked this once, and someone told me that push/pop has lower
throughput.  I find this surprising.

--Andy

> Especially that 32-bit code *does* use push/pops.
>
> Can you test the attached patch with your kvm guest testcase?

Tt could be worth adding a macro along the lines of pushq_cfi_save
that does the pushq_cfi and the CFI_REL_OFFSET.

--Andy

-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-10 20:17           ` Andy Lutomirski
@ 2015-01-10 20:42             ` Borislav Petkov
  2015-01-10 21:02               ` Andy Lutomirski
  2015-01-10 20:43             ` Denys Vlasenko
  2015-01-10 21:08             ` Linus Torvalds
  2 siblings, 1 reply; 29+ messages in thread
From: Borislav Petkov @ 2015-01-10 20:42 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Denys Vlasenko, Linux Kernel Mailing List,
	Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On Sat, Jan 10, 2015 at 12:17:13PM -0800, Andy Lutomirski wrote:
> I asked this once, and someone told me that push/pop has lower
> throughput.  I find this surprising.

Implicit dependency on %rsp probably. The MOVs allow you to start more
stuff out-of-order I'd guess...

> Tt could be worth adding a macro along the lines of pushq_cfi_save
> that does the pushq_cfi and the CFI_REL_OFFSET.

Yep, for balance.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-10 20:17           ` Andy Lutomirski
  2015-01-10 20:42             ` Borislav Petkov
@ 2015-01-10 20:43             ` Denys Vlasenko
  2015-01-10 21:08             ` Linus Torvalds
  2 siblings, 0 replies; 29+ messages in thread
From: Denys Vlasenko @ 2015-01-10 20:43 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Borislav Petkov, Denys Vlasenko, Linux Kernel Mailing List,
	Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On Sat, Jan 10, 2015 at 9:17 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>> After I've seen the disassembly I myself posted, I can't help but wonder
>> why we use 5-byte instructions to store and load regs on stack when
>> pushes and pops are 1 or 2-byte long.
>
> I asked this once, and someone told me that push/pop has lower
> throughput.  I find this surprising.

Theoretically yes.
In practice, AMD K7 and K8 seem to be able to execute two movq's
in one cycle, but only one push.

For all other processors I looked at, they have the same throughput:
K10 can do two movq's in one cycle, but also two push'es.
Bulldozer...Steamroller: can do one insn per cycle.
Bobcat..Jaguar: can do one insn per cycle.
Core 2: can do one insn per cycle.
Nehalem: can do one insn per cycle.

The above was microbenchmarked with long sequences
of similar instructions, in which case store unit gets saturated
and becomes a bottleneck.

Here's the document.

http://www.agner.org/optimize/instruction_tables.pdf

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-10 20:42             ` Borislav Petkov
@ 2015-01-10 21:02               ` Andy Lutomirski
  2015-01-10 21:09                 ` Denys Vlasenko
  0 siblings, 1 reply; 29+ messages in thread
From: Andy Lutomirski @ 2015-01-10 21:02 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Denys Vlasenko, Denys Vlasenko, Linux Kernel Mailing List,
	Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On Sat, Jan 10, 2015 at 12:42 PM, Borislav Petkov <bp@alien8.de> wrote:
> On Sat, Jan 10, 2015 at 12:17:13PM -0800, Andy Lutomirski wrote:
>> I asked this once, and someone told me that push/pop has lower
>> throughput.  I find this surprising.
>
> Implicit dependency on %rsp probably. The MOVs allow you to start more
> stuff out-of-order I'd guess...

AIUI modern CPUs have fancy stack engines that match call/ret pairs,
and presumably they can speculate rsp values across multiple pushes
and pops very quickly.

Also, don't compilers generally use push and pop to save and restore
callee-saved registers?  I think that function calls are common enough
that the CPU vendors would have made these sequences fast.

--Andy

>
>> Tt could be worth adding a macro along the lines of pushq_cfi_save
>> that does the pushq_cfi and the CFI_REL_OFFSET.
>
> Yep, for balance.
>
> --
> Regards/Gruss,
>     Boris.
>
> Sent from a fat crate under my desk. Formatting is fine.
> --



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-10 20:17           ` Andy Lutomirski
  2015-01-10 20:42             ` Borislav Petkov
  2015-01-10 20:43             ` Denys Vlasenko
@ 2015-01-10 21:08             ` Linus Torvalds
  2015-01-10 21:26               ` Borislav Petkov
  2 siblings, 1 reply; 29+ messages in thread
From: Linus Torvalds @ 2015-01-10 21:08 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Borislav Petkov, Denys Vlasenko,
	Linux Kernel Mailing List, Oleg Nesterov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On Sat, Jan 10, 2015 at 12:17 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>
> I asked this once, and someone told me that push/pop has lower
> throughput.  I find this surprising.

It was true for some AMD CPU's in particular. One insn/cycle vs two.

I personally would be very happy to go back to push/pop sequences.
Even without a fancy stack engine like Intel has done for a while,
even *simple* cores can generally pair pushes and pops. I think the
original Pentium already had a special magic pairing logic to pair
pushes and pops despite both instructions using %esp. It's a common
and fairly trivial special case, and the fact that a few AMD
microarchitectures didn't do it is likely not really a good reason to
avoid repeated push/pop instructions.

                   Linus

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-10 21:02               ` Andy Lutomirski
@ 2015-01-10 21:09                 ` Denys Vlasenko
  2015-01-10 21:27                   ` Linus Torvalds
  0 siblings, 1 reply; 29+ messages in thread
From: Denys Vlasenko @ 2015-01-10 21:09 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Borislav Petkov, Denys Vlasenko, Linux Kernel Mailing List,
	Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On Sat, Jan 10, 2015 at 10:02 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Sat, Jan 10, 2015 at 12:42 PM, Borislav Petkov <bp@alien8.de> wrote:
>> On Sat, Jan 10, 2015 at 12:17:13PM -0800, Andy Lutomirski wrote:
>>> I asked this once, and someone told me that push/pop has lower
>>> throughput.  I find this surprising.
>>
>> Implicit dependency on %rsp probably. The MOVs allow you to start more
>> stuff out-of-order I'd guess...
>
> AIUI modern CPUs have fancy stack engines that match call/ret pairs,
> and presumably they can speculate rsp values across multiple pushes
> and pops very quickly.

Yes, stack engine hangs off the pipeline right after decode stage.

> Also, don't compilers generally use push and pop to save and restore
> callee-saved registers?  I think that function calls are common enough
> that the CPU vendors would have made these sequences fast.

Compilers can't predict which functions are hottest.
Using mov's would bloat prologues by about factor of 5.

I think using push/pop is okay. In the very hottest code paths
you may want to prefer mov's.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-10 21:08             ` Linus Torvalds
@ 2015-01-10 21:26               ` Borislav Petkov
  0 siblings, 0 replies; 29+ messages in thread
From: Borislav Petkov @ 2015-01-10 21:26 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andy Lutomirski, Denys Vlasenko, Denys Vlasenko,
	Linux Kernel Mailing List, Oleg Nesterov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On Sat, Jan 10, 2015 at 01:08:33PM -0800, Linus Torvalds wrote:
> It was true for some AMD CPU's in particular. One insn/cycle vs two.

Probably on K8: Agner Fog's insn tables show reciprocal throughput of
1/2 for MOV r64/m64 vs 1 for PUSH/POP.

> I personally would be very happy to go back to push/pop sequences.
> Even without a fancy stack engine like Intel has done for a while,
> even *simple* cores can generally pair pushes and pops. I think the

I think all the modern x86 machines have stack engines now :-)

> original Pentium already had a special magic pairing logic to pair
> pushes and pops despite both instructions using %esp. It's a common
> and fairly trivial special case, and the fact that a few AMD
> microarchitectures didn't do it is likely not really a good reason to
> avoid repeated push/pop instructions.

Well, according to the optimization manual, on F15h (Bulldozer and
later) PUSH/POP are faster than MOVs and on F16h (Jaguar and later) both
MOV and PUSH/POP have latency of 1, with MOV having a 1/2 throughput vs
PUSH/POP throughput of 1. So theoretically we can do 2 MOVs per cycle
there vs 1 PUSH/POP.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-10 21:09                 ` Denys Vlasenko
@ 2015-01-10 21:27                   ` Linus Torvalds
  2015-01-10 21:57                     ` Denys Vlasenko
  0 siblings, 1 reply; 29+ messages in thread
From: Linus Torvalds @ 2015-01-10 21:27 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Andy Lutomirski, Borislav Petkov, Denys Vlasenko,
	Linux Kernel Mailing List, Oleg Nesterov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On Sat, Jan 10, 2015 at 1:09 PM, Denys Vlasenko
<vda.linux@googlemail.com> wrote:
>
> I think using push/pop is okay. In the very hottest code paths
> you may want to prefer mov's.

For kernel entrypoints in particular, the code sequence is quite
possibly constrained by the decoder and instruction fetch rather than
the execution engine. Even if the entrypoint were to be in the L1 I$
(which is not generally the case except in microbenchmarks), I am
pretty sure that even Intel doesn't actually speculatively decode
across system call boundaries, so unlike normal nice code, you don't
have the front end running ahead of the execution engine.

Looking at the system call hotpath, for example, it looks like we
save/restore 8 registers. So 16 instructions or about 80 bytes of
code. I could easily imagine us avoiding one cacheline access by using
shorter 1- and 2-byte push/pop instructions (depending a bit on how
the cacheline alignment works out, of course).

Depending on how well it prefetches from L2 and/or exact decoder
details, that kind of issue *can* overshadow the actual execution
costs. Of course, on microbenchmarks (eg some system call benchmark
that does "getppid()" in a loop), even the kernel side stays in the
L1, so those might show possible execution issues more. And
macrobenchmarks probably won't show a cycle or two in the system call
or fault path anyway.

                    Linus

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-10 21:27                   ` Linus Torvalds
@ 2015-01-10 21:57                     ` Denys Vlasenko
  0 siblings, 0 replies; 29+ messages in thread
From: Denys Vlasenko @ 2015-01-10 21:57 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andy Lutomirski, Borislav Petkov, Denys Vlasenko,
	Linux Kernel Mailing List, Oleg Nesterov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On Sat, Jan 10, 2015 at 10:27 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
> On Sat, Jan 10, 2015 at 1:09 PM, Denys Vlasenko
> <vda.linux@googlemail.com> wrote:
>>
>> I think using push/pop is okay. In the very hottest code paths
>> you may want to prefer mov's.
>
> For kernel entrypoints in particular, the code sequence is quite
> possibly constrained by the decoder and instruction fetch rather than
> the execution engine. Even if the entrypoint were to be in the L1 I$
> (which is not generally the case except in microbenchmarks), I am
> pretty sure that even Intel doesn't actually speculatively decode
> across system call boundaries, so unlike normal nice code, you don't
> have the front end running ahead of the execution engine.
>
> Looking at the system call hotpath, for example, it looks like we
> save/restore 8 registers. So 16 instructions or about 80 bytes of
> code. I could easily imagine us avoiding one cacheline access by using
> shorter 1- and 2-byte push/pop instructions (depending a bit on how
> the cacheline alignment works out, of course).
>
> Depending on how well it prefetches from L2 and/or exact decoder
> details, that kind of issue *can* overshadow the actual execution
> costs. Of course, on microbenchmarks (eg some system call benchmark
> that does "getppid()" in a loop), even the kernel side stays in the
> L1, so those might show possible execution issues more. And
> macrobenchmarks probably won't show a cycle or two in the system call
> or fault path anyway.
>
>                     Linus

Looks like consensus to me. I'm resending patches with patch #3 reworked.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-10 20:14         ` Denys Vlasenko
  2015-01-10 20:17           ` Andy Lutomirski
@ 2015-01-10 22:00           ` Borislav Petkov
  2015-01-10 22:03             ` Denys Vlasenko
  2015-01-10 22:04             ` Andy Lutomirski
  1 sibling, 2 replies; 29+ messages in thread
From: Borislav Petkov @ 2015-01-10 22:00 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Denys Vlasenko, Linux Kernel Mailing List, Linus Torvalds,
	Oleg Nesterov, H. Peter Anvin, Andy Lutomirski,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On Sat, Jan 10, 2015 at 09:14:03PM +0100, Denys Vlasenko wrote:
> From 2f636e0a92db898f2bdb592027aa302fcb32a326 Mon Sep 17 00:00:00 2001
> From: Denys Vlasenko <dvlasenk@redhat.com>
> To: linux-kernel@vger.kernel.org
> Subject: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
> 
> This is a preparatory patch for change in "struct pt_regs"
> handling in entry_64.S.
> 
> trace_hardirqs thunks were (ab)using a part of pt_regs
> handling code, namely SAVE_ARGS/RESTORE_ARGS macros,
> to save/restore registers across C function calls.
> 
> Since SAVE_ARGS is going to be changed, open-code
> register saving/restoring here. Take a page from thunk_32.S
> and use push/pop insns instead of movq, they are far shorter:
> 1 or 2 bytes versus 5, and no need for insns to adjust %rsp:
> 
>    text	   data	    bss	    dec	    hex	filename
>     333	     40	      0	    373	    175	thunk_64_movq.o
>     104	     40	      0	    144	     90	thunk_64_push_pop.o
> 
> Incidentally, this removes a bit of dead code:
> one SAVE_ARGS was used just to emit a CFI annotation,
> but it also generated unreachable assembly insns.
> 
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Linus Torvalds <torvalds@linux-foundation.org>
> CC: Oleg Nesterov <oleg@redhat.com>
> CC: "H. Peter Anvin" <hpa@zytor.com>
> CC: Borislav Petkov <bp@alien8.de>
> CC: Andy Lutomirski <luto@amacapital.net>
> CC: Frederic Weisbecker <fweisbec@gmail.com>
> CC: X86 ML <x86@kernel.org>
> CC: Alexei Starovoitov <ast@plumgrid.com>
> CC: Will Drewry <wad@chromium.org>
> CC: Kees Cook <keescook@chromium.org>
> CC: linux-kernel@vger.kernel.org
> ---
>  arch/x86/lib/thunk_64.S | 46 ++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 42 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
> index b30b5eb..8ec443a 100644
> --- a/arch/x86/lib/thunk_64.S
> +++ b/arch/x86/lib/thunk_64.S
> @@ -17,9 +17,27 @@
>  	CFI_STARTPROC
>  
>  	/* this one pushes 9 elems, the next one would be %rIP */
> -	SAVE_ARGS
> +	pushq_cfi %rdi
> +	CFI_REL_OFFSET rdi, 0

Btw, why the second CFI annotation?

pushq_cfi does already CFI_ADJUST_CFA_OFFSET 8. Can't we use one and
hide it in the macro?

Btw, patch boots fine in the guest.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-10 22:00           ` Borislav Petkov
@ 2015-01-10 22:03             ` Denys Vlasenko
  2015-01-10 22:04             ` Andy Lutomirski
  1 sibling, 0 replies; 29+ messages in thread
From: Denys Vlasenko @ 2015-01-10 22:03 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Denys Vlasenko, Linux Kernel Mailing List, Linus Torvalds,
	Oleg Nesterov, H. Peter Anvin, Andy Lutomirski,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On Sat, Jan 10, 2015 at 11:00 PM, Borislav Petkov <bp@alien8.de> wrote:
>> --- a/arch/x86/lib/thunk_64.S
>> +++ b/arch/x86/lib/thunk_64.S
>> @@ -17,9 +17,27 @@
>>       CFI_STARTPROC
>>
>>       /* this one pushes 9 elems, the next one would be %rIP */
>> -     SAVE_ARGS
>> +     pushq_cfi %rdi
>> +     CFI_REL_OFFSET rdi, 0
>
> Btw, why the second CFI annotation?
>
> pushq_cfi does already CFI_ADJUST_CFA_OFFSET 8. Can't we use one and
> hide it in the macro?

We probably should.

Since thunk_32.S uses the very same construct, I think it should be fixed
in both files by one patch.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-10 22:00           ` Borislav Petkov
  2015-01-10 22:03             ` Denys Vlasenko
@ 2015-01-10 22:04             ` Andy Lutomirski
  1 sibling, 0 replies; 29+ messages in thread
From: Andy Lutomirski @ 2015-01-10 22:04 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Denys Vlasenko, Denys Vlasenko, Linux Kernel Mailing List,
	Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On Sat, Jan 10, 2015 at 2:00 PM, Borislav Petkov <bp@alien8.de> wrote:
> On Sat, Jan 10, 2015 at 09:14:03PM +0100, Denys Vlasenko wrote:
>> From 2f636e0a92db898f2bdb592027aa302fcb32a326 Mon Sep 17 00:00:00 2001
>> From: Denys Vlasenko <dvlasenk@redhat.com>
>> To: linux-kernel@vger.kernel.org
>> Subject: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
>>
>> This is a preparatory patch for change in "struct pt_regs"
>> handling in entry_64.S.
>>
>> trace_hardirqs thunks were (ab)using a part of pt_regs
>> handling code, namely SAVE_ARGS/RESTORE_ARGS macros,
>> to save/restore registers across C function calls.
>>
>> Since SAVE_ARGS is going to be changed, open-code
>> register saving/restoring here. Take a page from thunk_32.S
>> and use push/pop insns instead of movq, they are far shorter:
>> 1 or 2 bytes versus 5, and no need for insns to adjust %rsp:
>>
>>    text          data     bss     dec     hex filename
>>     333            40       0     373     175 thunk_64_movq.o
>>     104            40       0     144      90 thunk_64_push_pop.o
>>
>> Incidentally, this removes a bit of dead code:
>> one SAVE_ARGS was used just to emit a CFI annotation,
>> but it also generated unreachable assembly insns.
>>
>> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
>> CC: Linus Torvalds <torvalds@linux-foundation.org>
>> CC: Oleg Nesterov <oleg@redhat.com>
>> CC: "H. Peter Anvin" <hpa@zytor.com>
>> CC: Borislav Petkov <bp@alien8.de>
>> CC: Andy Lutomirski <luto@amacapital.net>
>> CC: Frederic Weisbecker <fweisbec@gmail.com>
>> CC: X86 ML <x86@kernel.org>
>> CC: Alexei Starovoitov <ast@plumgrid.com>
>> CC: Will Drewry <wad@chromium.org>
>> CC: Kees Cook <keescook@chromium.org>
>> CC: linux-kernel@vger.kernel.org
>> ---
>>  arch/x86/lib/thunk_64.S | 46 ++++++++++++++++++++++++++++++++++++++++++----
>>  1 file changed, 42 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
>> index b30b5eb..8ec443a 100644
>> --- a/arch/x86/lib/thunk_64.S
>> +++ b/arch/x86/lib/thunk_64.S
>> @@ -17,9 +17,27 @@
>>       CFI_STARTPROC
>>
>>       /* this one pushes 9 elems, the next one would be %rIP */
>> -     SAVE_ARGS
>> +     pushq_cfi %rdi
>> +     CFI_REL_OFFSET rdi, 0
>
> Btw, why the second CFI annotation?
>
> pushq_cfi does already CFI_ADJUST_CFA_OFFSET 8. Can't we use one and
> hide it in the macro?

By my imperfect understanding of CFI:

CFI_ADJUST_CFA_OFFSET means that the offset between rsp and the
"canonical frame address" is increased by 8 (because we just
subtracted 8 from rsp) and CFI_REL_OFFSET reg, 0 means that the
unwinder can find reg at offset 0 + (cfa offset here) from the CFA.

IOW, one is to keep the stack frame tracking consistent and the other
is to tell the unwinder about the register we just saved.

--Andy

>
> Btw, patch boots fine in the guest.
>
> --
> Regards/Gruss,
>     Boris.
>
> Sent from a fat crate under my desk. Formatting is fine.
> --



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/4] x86: entry_64.S: delete unused code
  2015-01-08 18:16   ` Borislav Petkov
@ 2015-01-13 22:01     ` Andy Lutomirski
  0 siblings, 0 replies; 29+ messages in thread
From: Andy Lutomirski @ 2015-01-13 22:01 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Denys Vlasenko, linux-kernel, Linus Torvalds, Oleg Nesterov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Thu, Jan 8, 2015 at 10:16 AM, Borislav Petkov <bp@alien8.de> wrote:
> On Thu, Jan 08, 2015 at 05:25:12PM +0100, Denys Vlasenko wrote:
>> A define, two macros and an unreferenced bit of assembly are gone.
>>
>> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
>> CC: Linus Torvalds <torvalds@linux-foundation.org>
>> CC: Oleg Nesterov <oleg@redhat.com>
>> CC: "H. Peter Anvin" <hpa@zytor.com>
>> CC: Andy Lutomirski <luto@amacapital.net>
>> CC: Frederic Weisbecker <fweisbec@gmail.com>
>> CC: X86 ML <x86@kernel.org>
>> CC: Alexei Starovoitov <ast@plumgrid.com>
>> CC: Will Drewry <wad@chromium.org>
>> CC: Kees Cook <keescook@chromium.org>
>> CC: linux-kernel@vger.kernel.org
>>
>> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
>
> Acked-by: Borislav Petkov <bp@suse.de>
>

Applied to luto/x86/entry.  We'll see how this "maintenance" thing goes. :)

--Andy

> --
> Regards/Gruss,
>     Boris.
>
> Sent from a fat crate under my desk. Formatting is fine.
> --



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 4/4] x86: entry_64.S: fold SAVE_ARGS_IRQ macro into its sole user
  2015-01-10 22:00 ` [PATCH 4/4] x86: entry_64.S: fold SAVE_ARGS_IRQ macro into its sole user Denys Vlasenko
@ 2015-01-13 22:26   ` Andy Lutomirski
  0 siblings, 0 replies; 29+ messages in thread
From: Andy Lutomirski @ 2015-01-13 22:26 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: linux-kernel, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Borislav Petkov, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Sat, Jan 10, 2015 at 2:00 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> No code changes.
>
> This is a preparatory patch for change in "struct pt_regs" handling.
>
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Linus Torvalds <torvalds@linux-foundation.org>
> CC: Oleg Nesterov <oleg@redhat.com>
> CC: "H. Peter Anvin" <hpa@zytor.com>
> CC: Borislav Petkov <bp@alien8.de>
> CC: Andy Lutomirski <luto@amacapital.net>
> CC: Frederic Weisbecker <fweisbec@gmail.com>
> CC: X86 ML <x86@kernel.org>
> CC: Alexei Starovoitov <ast@plumgrid.com>
> CC: Will Drewry <wad@chromium.org>
> CC: Kees Cook <keescook@chromium.org>
> CC: linux-kernel@vger.kernel.org

Applied.

I have *not* applied patch 3 or the new restore patch, since I got
lost in the patch attachments and long discussion.  Can you post
incremental patches on top of:

https://git.kernel.org/cgit/linux/kernel/git/luto/linux.git/log/?h=x86/entry

Thanks,
Andy

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 4/4] x86: entry_64.S: fold SAVE_ARGS_IRQ macro into its sole user
  2015-01-10 22:00 [PATCH 0/4 v2] x86: entry.S cleanup Denys Vlasenko
@ 2015-01-10 22:00 ` Denys Vlasenko
  2015-01-13 22:26   ` Andy Lutomirski
  0 siblings, 1 reply; 29+ messages in thread
From: Denys Vlasenko @ 2015-01-10 22:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Borislav Petkov, Andy Lutomirski, Frederic Weisbecker, X86 ML,
	Alexei Starovoitov, Will Drewry, Kees Cook

No code changes.

This is a preparatory patch for change in "struct pt_regs" handling.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Borislav Petkov <bp@alien8.de>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/kernel/entry_64.S | 88 ++++++++++++++++++++++------------------------
 1 file changed, 42 insertions(+), 46 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 5ed4773..7d59df2 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -217,51 +217,6 @@ ENDPROC(native_usergs_sysret64)
 	CFI_REL_OFFSET r15, R15+\offset
 	.endm
 
-/* save partial stack frame */
-	.macro SAVE_ARGS_IRQ
-	cld
-	/* start from rbp in pt_regs and jump over */
-	movq_cfi rdi, (RDI-RBP)
-	movq_cfi rsi, (RSI-RBP)
-	movq_cfi rdx, (RDX-RBP)
-	movq_cfi rcx, (RCX-RBP)
-	movq_cfi rax, (RAX-RBP)
-	movq_cfi  r8,  (R8-RBP)
-	movq_cfi  r9,  (R9-RBP)
-	movq_cfi r10, (R10-RBP)
-	movq_cfi r11, (R11-RBP)
-
-	/* Save rbp so that we can unwind from get_irq_regs() */
-	movq_cfi rbp, 0
-
-	/* Save previous stack value */
-	movq %rsp, %rsi
-
-	leaq -RBP(%rsp),%rdi	/* arg1 for handler */
-	testl $3, CS-RBP(%rsi)
-	je 1f
-	SWAPGS
-	/*
-	 * irq_count is used to check if a CPU is already on an interrupt stack
-	 * or not. While this is essentially redundant with preempt_count it is
-	 * a little cheaper to use a separate counter in the PDA (short of
-	 * moving irq_enter into assembly, which would be too much work)
-	 */
-1:	incl PER_CPU_VAR(irq_count)
-	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
-	CFI_DEF_CFA_REGISTER	rsi
-
-	/* Store previous stack value */
-	pushq %rsi
-	CFI_ESCAPE	0x0f /* DW_CFA_def_cfa_expression */, 6, \
-			0x77 /* DW_OP_breg7 */, 0, \
-			0x06 /* DW_OP_deref */, \
-			0x08 /* DW_OP_const1u */, SS+8-RBP, \
-			0x22 /* DW_OP_plus */
-	/* We entered an interrupt context - irqs are off: */
-	TRACE_IRQS_OFF
-	.endm
-
 ENTRY(save_paranoid)
 	XCPT_FRAME 1 RDI+8
 	cld
@@ -745,7 +700,48 @@ END(interrupt)
 	/* reserve pt_regs for scratch regs and rbp */
 	subq $ORIG_RAX-RBP, %rsp
 	CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
-	SAVE_ARGS_IRQ
+	cld
+	/* start from rbp in pt_regs and jump over */
+	movq_cfi rdi, (RDI-RBP)
+	movq_cfi rsi, (RSI-RBP)
+	movq_cfi rdx, (RDX-RBP)
+	movq_cfi rcx, (RCX-RBP)
+	movq_cfi rax, (RAX-RBP)
+	movq_cfi  r8,  (R8-RBP)
+	movq_cfi  r9,  (R9-RBP)
+	movq_cfi r10, (R10-RBP)
+	movq_cfi r11, (R11-RBP)
+
+	/* Save rbp so that we can unwind from get_irq_regs() */
+	movq_cfi rbp, 0
+
+	/* Save previous stack value */
+	movq %rsp, %rsi
+
+	leaq -RBP(%rsp),%rdi	/* arg1 for handler */
+	testl $3, CS-RBP(%rsi)
+	je 1f
+	SWAPGS
+	/*
+	 * irq_count is used to check if a CPU is already on an interrupt stack
+	 * or not. While this is essentially redundant with preempt_count it is
+	 * a little cheaper to use a separate counter in the PDA (short of
+	 * moving irq_enter into assembly, which would be too much work)
+	 */
+1:	incl PER_CPU_VAR(irq_count)
+	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
+	CFI_DEF_CFA_REGISTER	rsi
+
+	/* Store previous stack value */
+	pushq %rsi
+	CFI_ESCAPE	0x0f /* DW_CFA_def_cfa_expression */, 6, \
+			0x77 /* DW_OP_breg7 */, 0, \
+			0x06 /* DW_OP_deref */, \
+			0x08 /* DW_OP_const1u */, SS+8-RBP, \
+			0x22 /* DW_OP_plus */
+	/* We entered an interrupt context - irqs are off: */
+	TRACE_IRQS_OFF
+
 	call \func
 	.endm
 
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2015-01-13 22:27 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-08 16:25 [PATCH 0/4] x86: entry.S cleanup Denys Vlasenko
2015-01-08 16:25 ` [PATCH 1/4] x86: entry_64.S: delete unused code Denys Vlasenko
2015-01-08 18:16   ` Borislav Petkov
2015-01-13 22:01     ` Andy Lutomirski
2015-01-08 16:25 ` [PATCH 2/4] x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET Denys Vlasenko
2015-01-09 10:41   ` Borislav Petkov
2015-01-08 16:25 ` [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks Denys Vlasenko
2015-01-09 10:55   ` Borislav Petkov
2015-01-09 20:29     ` Denys Vlasenko
2015-01-10 13:52       ` Borislav Petkov
2015-01-09 12:19   ` Borislav Petkov
2015-01-09 18:54     ` Denys Vlasenko
2015-01-10 14:23       ` Borislav Petkov
2015-01-10 20:14         ` Denys Vlasenko
2015-01-10 20:17           ` Andy Lutomirski
2015-01-10 20:42             ` Borislav Petkov
2015-01-10 21:02               ` Andy Lutomirski
2015-01-10 21:09                 ` Denys Vlasenko
2015-01-10 21:27                   ` Linus Torvalds
2015-01-10 21:57                     ` Denys Vlasenko
2015-01-10 20:43             ` Denys Vlasenko
2015-01-10 21:08             ` Linus Torvalds
2015-01-10 21:26               ` Borislav Petkov
2015-01-10 22:00           ` Borislav Petkov
2015-01-10 22:03             ` Denys Vlasenko
2015-01-10 22:04             ` Andy Lutomirski
2015-01-08 16:25 ` [PATCH 4/4] x86: entry_64.S: fold SAVE_ARGS_IRQ macro into its sole user Denys Vlasenko
2015-01-10 22:00 [PATCH 0/4 v2] x86: entry.S cleanup Denys Vlasenko
2015-01-10 22:00 ` [PATCH 4/4] x86: entry_64.S: fold SAVE_ARGS_IRQ macro into its sole user Denys Vlasenko
2015-01-13 22:26   ` Andy Lutomirski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.