All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4 v2] x86: entry.S cleanup
@ 2015-01-10 22:00 Denys Vlasenko
  2015-01-10 22:00 ` [PATCH 1/4] x86: entry_64.S: delete unused code Denys Vlasenko
                   ` (3 more replies)
  0 siblings, 4 replies; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-10 22:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Borislav Petkov, Andy Lutomirski, Frederic Weisbecker, X86 ML,
	Alexei Starovoitov, Will Drewry, Kees Cook

This is the first part of a bigger patch set I posted last August,
then under title of "x86: entry.S optimizations".

I fully rebased that patch set to Linus tree, but currently
Linus and -next are diverging a bit in arch/x86/kernel/entry_64.S

To make it easier for mainteiners, I'm splitting the old patch set
into more digestible parts.

This is the first set. These four patches apply both to Linus and -next.
They do not contain any substantial code changes.
Patches 2 and 4 don't result in any actual code changes.

Changes since v1:
Patch #3 had a bug.

CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Borislav Petkov <bp@alien8.de>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org

Denys Vlasenko (4):
  x86: entry_64.S: delete unused code
  x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET
  x86: open-code register save/restore in trace_hardirqs thunks
  x86: entry_64.S: fold SAVE_ARGS_IRQ macro into its sole user

 arch/x86/ia32/ia32entry.S      |   4 +-
 arch/x86/include/asm/calling.h |   1 -
 arch/x86/kernel/entry_64.S     | 122 ++++++++++++++---------------------------
 arch/x86/lib/thunk_64.S        |  29 ++++++++--
 4 files changed, 68 insertions(+), 88 deletions(-)

-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 1/4] x86: entry_64.S: delete unused code
  2015-01-10 22:00 [PATCH 0/4 v2] x86: entry.S cleanup Denys Vlasenko
@ 2015-01-10 22:00 ` Denys Vlasenko
  2015-01-10 22:12   ` Andy Lutomirski
  2015-01-10 22:00 ` [PATCH 2/4] x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET Denys Vlasenko
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-10 22:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Borislav Petkov, Andy Lutomirski, Frederic Weisbecker, X86 ML,
	Alexei Starovoitov, Will Drewry, Kees Cook

A define, two macros and an unreferenced bit of assembly are gone.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Borislav Petkov <bp@suse.de>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Borislav Petkov <bp@alien8.de>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/include/asm/calling.h |  1 -
 arch/x86/kernel/entry_64.S     | 34 ----------------------------------
 2 files changed, 35 deletions(-)

diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 76659b6..1f1297b 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -83,7 +83,6 @@ For 32-bit we have the following conventions - kernel is built with
 #define SS		160
 
 #define ARGOFFSET	R11
-#define SWFRAME		ORIG_RAX
 
 	.macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
 	subq  $9*8+\addskip, %rsp
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 931f32f..5ed4773 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -155,27 +155,6 @@ ENDPROC(native_usergs_sysret64)
 	movq \tmp,R11+\offset(%rsp)
 	.endm
 
-	.macro FAKE_STACK_FRAME child_rip
-	/* push in order ss, rsp, eflags, cs, rip */
-	xorl %eax, %eax
-	pushq_cfi $__KERNEL_DS /* ss */
-	/*CFI_REL_OFFSET	ss,0*/
-	pushq_cfi %rax /* rsp */
-	CFI_REL_OFFSET	rsp,0
-	pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) /* eflags - interrupts on */
-	/*CFI_REL_OFFSET	rflags,0*/
-	pushq_cfi $__KERNEL_CS /* cs */
-	/*CFI_REL_OFFSET	cs,0*/
-	pushq_cfi \child_rip /* rip */
-	CFI_REL_OFFSET	rip,0
-	pushq_cfi %rax /* orig rax */
-	.endm
-
-	.macro UNFAKE_STACK_FRAME
-	addq $8*6, %rsp
-	CFI_ADJUST_CFA_OFFSET	-(6*8)
-	.endm
-
 /*
  * initial frame state for interrupts (and exceptions without error code)
  */
@@ -626,19 +605,6 @@ END(\label)
 	FORK_LIKE  vfork
 	FIXED_FRAME stub_iopl, sys_iopl
 
-ENTRY(ptregscall_common)
-	DEFAULT_FRAME 1 8	/* offset 8: return address */
-	RESTORE_TOP_OF_STACK %r11, 8
-	movq_cfi_restore R15+8, r15
-	movq_cfi_restore R14+8, r14
-	movq_cfi_restore R13+8, r13
-	movq_cfi_restore R12+8, r12
-	movq_cfi_restore RBP+8, rbp
-	movq_cfi_restore RBX+8, rbx
-	ret $REST_SKIP		/* pop extended registers */
-	CFI_ENDPROC
-END(ptregscall_common)
-
 ENTRY(stub_execve)
 	CFI_STARTPROC
 	addq $8, %rsp
-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 2/4] x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET
  2015-01-10 22:00 [PATCH 0/4 v2] x86: entry.S cleanup Denys Vlasenko
  2015-01-10 22:00 ` [PATCH 1/4] x86: entry_64.S: delete unused code Denys Vlasenko
@ 2015-01-10 22:00 ` Denys Vlasenko
  2015-01-10 22:13   ` Andy Lutomirski
  2015-01-13 22:11   ` Andy Lutomirski
  2015-01-10 22:00 ` [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks Denys Vlasenko
  2015-01-10 22:00 ` [PATCH 4/4] x86: entry_64.S: fold SAVE_ARGS_IRQ macro into its sole user Denys Vlasenko
  3 siblings, 2 replies; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-10 22:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Borislav Petkov, Andy Lutomirski, Frederic Weisbecker, X86 ML,
	Alexei Starovoitov, Will Drewry, Kees Cook

The values of these two constants are the same, the meaning is different.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Acked-by: Borislav Petkov <bp@suse.de>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Borislav Petkov <bp@alien8.de>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/ia32/ia32entry.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 82e8a1d..156ebca 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -179,8 +179,8 @@ sysenter_dispatch:
 sysexit_from_sys_call:
 	andl    $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	/* clear IF, that popfq doesn't enable interrupts early */
-	andl  $~0x200,EFLAGS-R11(%rsp) 
-	movl	RIP-R11(%rsp),%edx		/* User %eip */
+	andl	$~0x200,EFLAGS-ARGOFFSET(%rsp)
+	movl	RIP-ARGOFFSET(%rsp),%edx		/* User %eip */
 	CFI_REGISTER rip,rdx
 	RESTORE_ARGS 0,24,0,0,0,0
 	xorq	%r8,%r8
-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-10 22:00 [PATCH 0/4 v2] x86: entry.S cleanup Denys Vlasenko
  2015-01-10 22:00 ` [PATCH 1/4] x86: entry_64.S: delete unused code Denys Vlasenko
  2015-01-10 22:00 ` [PATCH 2/4] x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET Denys Vlasenko
@ 2015-01-10 22:00 ` Denys Vlasenko
  2015-01-10 22:07   ` Linus Torvalds
  2015-02-11  2:38   ` Andy Lutomirski
  2015-01-10 22:00 ` [PATCH 4/4] x86: entry_64.S: fold SAVE_ARGS_IRQ macro into its sole user Denys Vlasenko
  3 siblings, 2 replies; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-10 22:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Borislav Petkov, Andy Lutomirski, Frederic Weisbecker, X86 ML,
	Alexei Starovoitov, Will Drewry, Kees Cook

This is a preparatory patch for change in "struct pt_regs"
handling in entry_64.S.

trace_hardirqs thunks were (ab)using a part of pt_regs
handling code, namely SAVE_ARGS/RESTORE_ARGS macros,
to save/restore registers across C function calls.

Since SAVE_ARGS is going to be changed, open-code
register saving/restoring here.

Incidentally, this removes a bit of dead code:
one SAVE_ARGS was used just to emit a CFI annotation,
but it also generated unreachable assembly insns.

Take a page from thunk_32.S and use push/pop insns
instead of movq, they are far shorter:
1 or 2 bytes versus 5, and no need for insns to adjust %rsp:

   text	   data	    bss	    dec	    hex	filename
    333	     40	      0	    373	    175	thunk_64_movq.o
    104	     40	      0	    144	     90	thunk_64_push_pop.o

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Borislav Petkov <bp@alien8.de>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/lib/thunk_64.S | 46 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index b30b5eb..8ec443a 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -17,9 +17,27 @@
 	CFI_STARTPROC
 
 	/* this one pushes 9 elems, the next one would be %rIP */
-	SAVE_ARGS
+	pushq_cfi %rdi
+	CFI_REL_OFFSET rdi, 0
+	pushq_cfi %rsi
+	CFI_REL_OFFSET rsi, 0
+	pushq_cfi %rdx
+	CFI_REL_OFFSET rdx, 0
+	pushq_cfi %rcx
+	CFI_REL_OFFSET rcx, 0
+	pushq_cfi %rax
+	CFI_REL_OFFSET rax, 0
+	pushq_cfi %r8
+	CFI_REL_OFFSET r8, 0
+	pushq_cfi %r9
+	CFI_REL_OFFSET r9, 0
+	pushq_cfi %r10
+	CFI_REL_OFFSET r10, 0
+	pushq_cfi %r11
+	CFI_REL_OFFSET r11, 0
 
 	.if \put_ret_addr_in_rdi
+	/* 9*8(%rsp) is return addr on stack */
 	movq_cfi_restore 9*8, rdi
 	.endif
 
@@ -45,11 +63,31 @@
 #endif
 #endif
 
-	/* SAVE_ARGS below is used only for the .cfi directives it contains. */
+#if defined(CONFIG_TRACE_IRQFLAGS) \
+ || defined(CONFIG_DEBUG_LOCK_ALLOC) \
+ || defined(CONFIG_PREEMPT)
 	CFI_STARTPROC
-	SAVE_ARGS
+	CFI_ADJUST_CFA_OFFSET 9*8
 restore:
-	RESTORE_ARGS
+	popq_cfi %r11
+	CFI_RESTORE r11
+	popq_cfi %r10
+	CFI_RESTORE r10
+	popq_cfi %r9
+	CFI_RESTORE r9
+	popq_cfi %r8
+	CFI_RESTORE r8
+	popq_cfi %rax
+	CFI_RESTORE rax
+	popq_cfi %rcx
+	CFI_RESTORE rcx
+	popq_cfi %rdx
+	CFI_RESTORE rdx
+	popq_cfi %rsi
+	CFI_RESTORE rsi
+	popq_cfi %rdi
+	CFI_RESTORE rdi
 	ret
 	CFI_ENDPROC
 	_ASM_NOKPROBE(restore)
+#endif
-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 4/4] x86: entry_64.S: fold SAVE_ARGS_IRQ macro into its sole user
  2015-01-10 22:00 [PATCH 0/4 v2] x86: entry.S cleanup Denys Vlasenko
                   ` (2 preceding siblings ...)
  2015-01-10 22:00 ` [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks Denys Vlasenko
@ 2015-01-10 22:00 ` Denys Vlasenko
  2015-01-13 22:26   ` Andy Lutomirski
  3 siblings, 1 reply; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-10 22:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Borislav Petkov, Andy Lutomirski, Frederic Weisbecker, X86 ML,
	Alexei Starovoitov, Will Drewry, Kees Cook

No code changes.

This is a preparatory patch for change in "struct pt_regs" handling.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Borislav Petkov <bp@alien8.de>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/kernel/entry_64.S | 88 ++++++++++++++++++++++------------------------
 1 file changed, 42 insertions(+), 46 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 5ed4773..7d59df2 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -217,51 +217,6 @@ ENDPROC(native_usergs_sysret64)
 	CFI_REL_OFFSET r15, R15+\offset
 	.endm
 
-/* save partial stack frame */
-	.macro SAVE_ARGS_IRQ
-	cld
-	/* start from rbp in pt_regs and jump over */
-	movq_cfi rdi, (RDI-RBP)
-	movq_cfi rsi, (RSI-RBP)
-	movq_cfi rdx, (RDX-RBP)
-	movq_cfi rcx, (RCX-RBP)
-	movq_cfi rax, (RAX-RBP)
-	movq_cfi  r8,  (R8-RBP)
-	movq_cfi  r9,  (R9-RBP)
-	movq_cfi r10, (R10-RBP)
-	movq_cfi r11, (R11-RBP)
-
-	/* Save rbp so that we can unwind from get_irq_regs() */
-	movq_cfi rbp, 0
-
-	/* Save previous stack value */
-	movq %rsp, %rsi
-
-	leaq -RBP(%rsp),%rdi	/* arg1 for handler */
-	testl $3, CS-RBP(%rsi)
-	je 1f
-	SWAPGS
-	/*
-	 * irq_count is used to check if a CPU is already on an interrupt stack
-	 * or not. While this is essentially redundant with preempt_count it is
-	 * a little cheaper to use a separate counter in the PDA (short of
-	 * moving irq_enter into assembly, which would be too much work)
-	 */
-1:	incl PER_CPU_VAR(irq_count)
-	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
-	CFI_DEF_CFA_REGISTER	rsi
-
-	/* Store previous stack value */
-	pushq %rsi
-	CFI_ESCAPE	0x0f /* DW_CFA_def_cfa_expression */, 6, \
-			0x77 /* DW_OP_breg7 */, 0, \
-			0x06 /* DW_OP_deref */, \
-			0x08 /* DW_OP_const1u */, SS+8-RBP, \
-			0x22 /* DW_OP_plus */
-	/* We entered an interrupt context - irqs are off: */
-	TRACE_IRQS_OFF
-	.endm
-
 ENTRY(save_paranoid)
 	XCPT_FRAME 1 RDI+8
 	cld
@@ -745,7 +700,48 @@ END(interrupt)
 	/* reserve pt_regs for scratch regs and rbp */
 	subq $ORIG_RAX-RBP, %rsp
 	CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
-	SAVE_ARGS_IRQ
+	cld
+	/* start from rbp in pt_regs and jump over */
+	movq_cfi rdi, (RDI-RBP)
+	movq_cfi rsi, (RSI-RBP)
+	movq_cfi rdx, (RDX-RBP)
+	movq_cfi rcx, (RCX-RBP)
+	movq_cfi rax, (RAX-RBP)
+	movq_cfi  r8,  (R8-RBP)
+	movq_cfi  r9,  (R9-RBP)
+	movq_cfi r10, (R10-RBP)
+	movq_cfi r11, (R11-RBP)
+
+	/* Save rbp so that we can unwind from get_irq_regs() */
+	movq_cfi rbp, 0
+
+	/* Save previous stack value */
+	movq %rsp, %rsi
+
+	leaq -RBP(%rsp),%rdi	/* arg1 for handler */
+	testl $3, CS-RBP(%rsi)
+	je 1f
+	SWAPGS
+	/*
+	 * irq_count is used to check if a CPU is already on an interrupt stack
+	 * or not. While this is essentially redundant with preempt_count it is
+	 * a little cheaper to use a separate counter in the PDA (short of
+	 * moving irq_enter into assembly, which would be too much work)
+	 */
+1:	incl PER_CPU_VAR(irq_count)
+	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
+	CFI_DEF_CFA_REGISTER	rsi
+
+	/* Store previous stack value */
+	pushq %rsi
+	CFI_ESCAPE	0x0f /* DW_CFA_def_cfa_expression */, 6, \
+			0x77 /* DW_OP_breg7 */, 0, \
+			0x06 /* DW_OP_deref */, \
+			0x08 /* DW_OP_const1u */, SS+8-RBP, \
+			0x22 /* DW_OP_plus */
+	/* We entered an interrupt context - irqs are off: */
+	TRACE_IRQS_OFF
+
 	call \func
 	.endm
 
-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-10 22:00 ` [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks Denys Vlasenko
@ 2015-01-10 22:07   ` Linus Torvalds
  2015-01-10 22:35     ` Denys Vlasenko
  2015-02-11  2:38   ` Andy Lutomirski
  1 sibling, 1 reply; 130+ messages in thread
From: Linus Torvalds @ 2015-01-10 22:07 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Linux Kernel Mailing List, Oleg Nesterov, H. Peter Anvin,
	Borislav Petkov, Andy Lutomirski, Frederic Weisbecker, X86 ML,
	Alexei Starovoitov, Will Drewry, Kees Cook

On Sat, Jan 10, 2015 at 2:00 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> +       pushq_cfi %rdi
> +       CFI_REL_OFFSET rdi, 0
> +       pushq_cfi %rsi
> +       CFI_REL_OFFSET rsi, 0
> +       pushq_cfi %rdx
> +       CFI_REL_OFFSET rdx, 0
..
Ugh.

This is too ugly, it needs to die.

Why doesn't pushq_cfi do that CFI_REL_OFFSET thing?

And if there is some good reason why, then please let's just make sure
that there is a sane way to write pushes and pops, without having that
insane CFI crap for every single damn case.

Because this is unreadable.

                       Linus

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 1/4] x86: entry_64.S: delete unused code
  2015-01-10 22:00 ` [PATCH 1/4] x86: entry_64.S: delete unused code Denys Vlasenko
@ 2015-01-10 22:12   ` Andy Lutomirski
  0 siblings, 0 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-01-10 22:12 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: linux-kernel, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Borislav Petkov, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Sat, Jan 10, 2015 at 2:00 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> A define, two macros and an unreferenced bit of assembly are gone.


Acked-by: Andy Lutomirski <luto@amacapital.net>

>
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> Acked-by: Borislav Petkov <bp@suse.de>
> CC: Linus Torvalds <torvalds@linux-foundation.org>
> CC: Oleg Nesterov <oleg@redhat.com>
> CC: "H. Peter Anvin" <hpa@zytor.com>
> CC: Borislav Petkov <bp@alien8.de>
> CC: Andy Lutomirski <luto@amacapital.net>
> CC: Frederic Weisbecker <fweisbec@gmail.com>
> CC: X86 ML <x86@kernel.org>
> CC: Alexei Starovoitov <ast@plumgrid.com>
> CC: Will Drewry <wad@chromium.org>
> CC: Kees Cook <keescook@chromium.org>
> CC: linux-kernel@vger.kernel.org
> ---
>  arch/x86/include/asm/calling.h |  1 -
>  arch/x86/kernel/entry_64.S     | 34 ----------------------------------
>  2 files changed, 35 deletions(-)
>
> diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
> index 76659b6..1f1297b 100644
> --- a/arch/x86/include/asm/calling.h
> +++ b/arch/x86/include/asm/calling.h
> @@ -83,7 +83,6 @@ For 32-bit we have the following conventions - kernel is built with
>  #define SS             160
>
>  #define ARGOFFSET      R11
> -#define SWFRAME                ORIG_RAX
>
>         .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
>         subq  $9*8+\addskip, %rsp
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index 931f32f..5ed4773 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -155,27 +155,6 @@ ENDPROC(native_usergs_sysret64)
>         movq \tmp,R11+\offset(%rsp)
>         .endm
>
> -       .macro FAKE_STACK_FRAME child_rip
> -       /* push in order ss, rsp, eflags, cs, rip */
> -       xorl %eax, %eax
> -       pushq_cfi $__KERNEL_DS /* ss */
> -       /*CFI_REL_OFFSET        ss,0*/
> -       pushq_cfi %rax /* rsp */
> -       CFI_REL_OFFSET  rsp,0
> -       pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) /* eflags - interrupts on */
> -       /*CFI_REL_OFFSET        rflags,0*/
> -       pushq_cfi $__KERNEL_CS /* cs */
> -       /*CFI_REL_OFFSET        cs,0*/
> -       pushq_cfi \child_rip /* rip */
> -       CFI_REL_OFFSET  rip,0
> -       pushq_cfi %rax /* orig rax */
> -       .endm
> -
> -       .macro UNFAKE_STACK_FRAME
> -       addq $8*6, %rsp
> -       CFI_ADJUST_CFA_OFFSET   -(6*8)
> -       .endm
> -
>  /*
>   * initial frame state for interrupts (and exceptions without error code)
>   */
> @@ -626,19 +605,6 @@ END(\label)
>         FORK_LIKE  vfork
>         FIXED_FRAME stub_iopl, sys_iopl
>
> -ENTRY(ptregscall_common)
> -       DEFAULT_FRAME 1 8       /* offset 8: return address */
> -       RESTORE_TOP_OF_STACK %r11, 8
> -       movq_cfi_restore R15+8, r15
> -       movq_cfi_restore R14+8, r14
> -       movq_cfi_restore R13+8, r13
> -       movq_cfi_restore R12+8, r12
> -       movq_cfi_restore RBP+8, rbp
> -       movq_cfi_restore RBX+8, rbx
> -       ret $REST_SKIP          /* pop extended registers */
> -       CFI_ENDPROC
> -END(ptregscall_common)
> -
>  ENTRY(stub_execve)
>         CFI_STARTPROC
>         addq $8, %rsp
> --
> 1.8.1.4
>



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/4] x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET
  2015-01-10 22:00 ` [PATCH 2/4] x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET Denys Vlasenko
@ 2015-01-10 22:13   ` Andy Lutomirski
  2015-01-10 22:27     ` Linus Torvalds
  2015-01-13 22:11   ` Andy Lutomirski
  1 sibling, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-01-10 22:13 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: linux-kernel, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Borislav Petkov, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Sat, Jan 10, 2015 at 2:00 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> The values of these two constants are the same, the meaning is different.

Acked-by: Andy Lutomirski <luto@amacapital.net>

I'll make the same change to my pending entry work, and hopefully we
can avoid conflicts.

--Andy

>
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> Acked-by: Borislav Petkov <bp@suse.de>
> CC: Linus Torvalds <torvalds@linux-foundation.org>
> CC: Oleg Nesterov <oleg@redhat.com>
> CC: "H. Peter Anvin" <hpa@zytor.com>
> CC: Borislav Petkov <bp@alien8.de>
> CC: Andy Lutomirski <luto@amacapital.net>
> CC: Frederic Weisbecker <fweisbec@gmail.com>
> CC: X86 ML <x86@kernel.org>
> CC: Alexei Starovoitov <ast@plumgrid.com>
> CC: Will Drewry <wad@chromium.org>
> CC: Kees Cook <keescook@chromium.org>
> CC: linux-kernel@vger.kernel.org
> ---
>  arch/x86/ia32/ia32entry.S | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
> index 82e8a1d..156ebca 100644
> --- a/arch/x86/ia32/ia32entry.S
> +++ b/arch/x86/ia32/ia32entry.S
> @@ -179,8 +179,8 @@ sysenter_dispatch:
>  sysexit_from_sys_call:
>         andl    $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
>         /* clear IF, that popfq doesn't enable interrupts early */
> -       andl  $~0x200,EFLAGS-R11(%rsp)
> -       movl    RIP-R11(%rsp),%edx              /* User %eip */
> +       andl    $~0x200,EFLAGS-ARGOFFSET(%rsp)
> +       movl    RIP-ARGOFFSET(%rsp),%edx                /* User %eip */
>         CFI_REGISTER rip,rdx
>         RESTORE_ARGS 0,24,0,0,0,0
>         xorq    %r8,%r8
> --
> 1.8.1.4
>



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/4] x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET
  2015-01-10 22:13   ` Andy Lutomirski
@ 2015-01-10 22:27     ` Linus Torvalds
  2015-01-10 22:35       ` Borislav Petkov
                         ` (2 more replies)
  0 siblings, 3 replies; 130+ messages in thread
From: Linus Torvalds @ 2015-01-10 22:27 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, linux-kernel, Oleg Nesterov, H. Peter Anvin,
	Borislav Petkov, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Sat, Jan 10, 2015 at 2:13 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>
> I'll make the same change to my pending entry work, and hopefully we
> can avoid conflicts.

That's not how conflicts work.

Either there is no overlap between the changes at all, in which case
it doesn't matter if you then also have Denys' changes in your tree.

Or you have other changes that change code around Denys' code, in
which case you'll get conflicts whether you have Denys' changes or not
(because two branches will be changing the same area differently, and
so there's a conflict that needs to resolve which side was right).

So the only way to avoid a conflict is to not touch the same code, or
to touch it *exactly* the same way in all respects.

Now, while the *conflict* is not something you can't avoid, some
conflicts are easier to resolve than others, and from a conflict
resolution standpoint it can make sense for your branch to include
Denys' changes.

Why? Because if whoever resolves the conflict sees that one branch is
a proper superset of the other branch, than the resolution is a much
more obvious "let's just take everything from one side" edit, rather
than having to pick-and-choose.

I I do actually agree with you taking the fixes (and maybe you should
*entirely* take ownership of all the entry_64.S changes, so that there
is no "other side" to conflict with at all!). I just wanted to point
out the actual effects from a conflict standpoint.

                          Linus

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/4] x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET
  2015-01-10 22:27     ` Linus Torvalds
@ 2015-01-10 22:35       ` Borislav Petkov
  2015-01-10 22:41         ` Linus Torvalds
  2015-01-10 22:37       ` Linus Torvalds
  2015-01-10 23:27       ` Andy Lutomirski
  2 siblings, 1 reply; 130+ messages in thread
From: Borislav Petkov @ 2015-01-10 22:35 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Linus Torvalds, Denys Vlasenko, linux-kernel, Oleg Nesterov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Sat, Jan 10, 2015 at 02:27:06PM -0800, Linus Torvalds wrote:
> I I do actually agree with you taking the fixes (and maybe you should
> *entirely* take ownership of all the entry_64.S changes, so that there
> is no "other side" to conflict with at all!). I just wanted to point
> out the actual effects from a conflict standpoint.

Haha, Linus just made you maintainer of entry_64.S :-)

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-10 22:07   ` Linus Torvalds
@ 2015-01-10 22:35     ` Denys Vlasenko
  2015-01-10 22:41       ` Borislav Petkov
  0 siblings, 1 reply; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-10 22:35 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Denys Vlasenko, Linux Kernel Mailing List, Oleg Nesterov,
	H. Peter Anvin, Borislav Petkov, Andy Lutomirski,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On Sat, Jan 10, 2015 at 11:07 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
> On Sat, Jan 10, 2015 at 2:00 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
>> +       pushq_cfi %rdi
>> +       CFI_REL_OFFSET rdi, 0
>> +       pushq_cfi %rsi
>> +       CFI_REL_OFFSET rsi, 0
>> +       pushq_cfi %rdx
>> +       CFI_REL_OFFSET rdx, 0
> ..
> Ugh.
>
> This is too ugly, it needs to die.
>
> Why doesn't pushq_cfi do that CFI_REL_OFFSET thing?

I don't know either... it seems to be used after push_cfi
*sometimes*, not always:

.macro op_safe_regs op
ENTRY(\op\()_safe_regs)
        CFI_STARTPROC
        pushl_cfi %ebx
        pushl_cfi %ebp
        pushl_cfi %esi
        pushl_cfi %edi

but

ENTRY(csum_partial)
        CFI_STARTPROC
        pushl_cfi %esi
        CFI_REL_OFFSET esi, 0
        pushl_cfi %ebx
        CFI_REL_OFFSET ebx, 0

We need an expert to enlighten us.

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/4] x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET
  2015-01-10 22:27     ` Linus Torvalds
  2015-01-10 22:35       ` Borislav Petkov
@ 2015-01-10 22:37       ` Linus Torvalds
  2015-01-10 23:27       ` Andy Lutomirski
  2 siblings, 0 replies; 130+ messages in thread
From: Linus Torvalds @ 2015-01-10 22:37 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, linux-kernel, Oleg Nesterov, H. Peter Anvin,
	Borislav Petkov, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Sat, Jan 10, 2015 at 2:27 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> So the only way to avoid a conflict is to not touch the same code, or
> to touch it *exactly* the same way in all respects.

.. with the caveat that the exact definition of "overlap" can end up
being somewhat fuzzy with the whole conflict simplification, so I
guess it can actually end up helping. But I think it could go either
way. Just from experience, the more important part is seeing that one
branch is obviously a superset of the other, which tends to happen
particularly with the case of "small parts of the new branch were
already merged as bug-fixes during the last development cycle".

                       Linus

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/4] x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET
  2015-01-10 22:35       ` Borislav Petkov
@ 2015-01-10 22:41         ` Linus Torvalds
  2015-01-10 22:45           ` Borislav Petkov
  0 siblings, 1 reply; 130+ messages in thread
From: Linus Torvalds @ 2015-01-10 22:41 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Andy Lutomirski, Denys Vlasenko, linux-kernel, Oleg Nesterov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Sat, Jan 10, 2015 at 2:35 PM, Borislav Petkov <bp@alien8.de> wrote:
>
> Haha, Linus just made you maintainer of entry_64.S :-)

Just going by

    git shortlog --no-merges --since=6.months arch/x86/kernel/entry_64.S

and looking at the kind of changes are going on, I really think a
"tag, you're it" to Andy is a reasonable thing to do.

I just hope he's stupid^Wambitious enough to fall for it.

                        Linus

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-10 22:35     ` Denys Vlasenko
@ 2015-01-10 22:41       ` Borislav Petkov
  2015-01-11  3:33         ` Denys Vlasenko
  0 siblings, 1 reply; 130+ messages in thread
From: Borislav Petkov @ 2015-01-10 22:41 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Linus Torvalds, Denys Vlasenko, Linux Kernel Mailing List,
	Oleg Nesterov, H. Peter Anvin, Andy Lutomirski,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On Sat, Jan 10, 2015 at 11:35:58PM +0100, Denys Vlasenko wrote:
> I don't know either... it seems to be used after push_cfi
> *sometimes*, not always:
> 
> .macro op_safe_regs op
> ENTRY(\op\()_safe_regs)
>         CFI_STARTPROC
>         pushl_cfi %ebx
>         pushl_cfi %ebp
>         pushl_cfi %esi
>         pushl_cfi %edi
> 
> but
> 
> ENTRY(csum_partial)
>         CFI_STARTPROC
>         pushl_cfi %esi
>         CFI_REL_OFFSET esi, 0
>         pushl_cfi %ebx
>         CFI_REL_OFFSET ebx, 0
> 
> We need an expert to enlighten us.

I think Andy had it:

https://lkml.kernel.org/r/CALCETrUuJ8T%2BLp23uYezHvxAtF-L_20dOaD4aDxciE7xZqDK3w@mail.gmail.com

So we better hide that crap in the macro at least, like the rest of the
CFI gunk.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/4] x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET
  2015-01-10 22:41         ` Linus Torvalds
@ 2015-01-10 22:45           ` Borislav Petkov
  0 siblings, 0 replies; 130+ messages in thread
From: Borislav Petkov @ 2015-01-10 22:45 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andy Lutomirski, Denys Vlasenko, linux-kernel, Oleg Nesterov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Sat, Jan 10, 2015 at 02:41:07PM -0800, Linus Torvalds wrote:
> On Sat, Jan 10, 2015 at 2:35 PM, Borislav Petkov <bp@alien8.de> wrote:
> >
> > Haha, Linus just made you maintainer of entry_64.S :-)
> 
> Just going by
> 
>     git shortlog --no-merges --since=6.months arch/x86/kernel/entry_64.S
> 
> and looking at the kind of changes are going on, I really think a
> "tag, you're it" to Andy is a reasonable thing to do.
> 
> I just hope he's stupid^Wambitious enough to fall for it.

I'm sure he will, he's doing nuts stuff anyway to that file but it seems
to work so far :-)

You should take a look at this other thing too, using SYSRET to return
from exceptions:

https://lkml.kernel.org/r/49394403b8b12486a6b9c9c70b72bd9f5dce7364.1415403984.git.luto@amacapital.net

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/4] x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET
  2015-01-10 22:27     ` Linus Torvalds
  2015-01-10 22:35       ` Borislav Petkov
  2015-01-10 22:37       ` Linus Torvalds
@ 2015-01-10 23:27       ` Andy Lutomirski
  2 siblings, 0 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-01-10 23:27 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Denys Vlasenko, linux-kernel, Oleg Nesterov, H. Peter Anvin,
	Borislav Petkov, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Sat, Jan 10, 2015 at 2:27 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
> On Sat, Jan 10, 2015 at 2:13 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>>
>> I'll make the same change to my pending entry work, and hopefully we
>> can avoid conflicts.
>
> That's not how conflicts work.
>
> Either there is no overlap between the changes at all, in which case
> it doesn't matter if you then also have Denys' changes in your tree.
>
> Or you have other changes that change code around Denys' code, in
> which case you'll get conflicts whether you have Denys' changes or not
> (because two branches will be changing the same area differently, and
> so there's a conflict that needs to resolve which side was right).
>
> So the only way to avoid a conflict is to not touch the same code, or
> to touch it *exactly* the same way in all respects.
>
> Now, while the *conflict* is not something you can't avoid, some
> conflicts are easier to resolve than others, and from a conflict
> resolution standpoint it can make sense for your branch to include
> Denys' changes.

Hmm.  What I meant here about avoiding conflicts was more like "with
any luck, the timing will work out such that the code conflicts don't
end up happening."

>
> Why? Because if whoever resolves the conflict sees that one branch is
> a proper superset of the other branch, than the resolution is a much
> more obvious "let's just take everything from one side" edit, rather
> than having to pick-and-choose.
>
> I I do actually agree with you taking the fixes (and maybe you should
> *entirely* take ownership of all the entry_64.S changes, so that there
> is no "other side" to conflict with at all!). I just wanted to point
> out the actual effects from a conflict standpoint.
>

Egads.  As if the vdso isn't bad enough.

Mumble mumble maybe I'll agree to maintain this masterpiece of
well-engineered asm.

--Andy

>                           Linus



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-10 22:41       ` Borislav Petkov
@ 2015-01-11  3:33         ` Denys Vlasenko
  2015-01-11 10:54           ` Borislav Petkov
  0 siblings, 1 reply; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-11  3:33 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Linus Torvalds, Denys Vlasenko, Linux Kernel Mailing List,
	Oleg Nesterov, H. Peter Anvin, Andy Lutomirski,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On Sat, Jan 10, 2015 at 11:41 PM, Borislav Petkov <bp@alien8.de> wrote:
> On Sat, Jan 10, 2015 at 11:35:58PM +0100, Denys Vlasenko wrote:
>> I don't know either... it seems to be used after push_cfi
>> *sometimes*, not always:
>>
>> .macro op_safe_regs op
>> ENTRY(\op\()_safe_regs)
>>         CFI_STARTPROC
>>         pushl_cfi %ebx
>>         pushl_cfi %ebp
>>         pushl_cfi %esi
>>         pushl_cfi %edi
>>
>> but
>>
>> ENTRY(csum_partial)
>>         CFI_STARTPROC
>>         pushl_cfi %esi
>>         CFI_REL_OFFSET esi, 0
>>         pushl_cfi %ebx
>>         CFI_REL_OFFSET ebx, 0
>>
>> We need an expert to enlighten us.
>
> I think Andy had it:
>
> https://lkml.kernel.org/r/CALCETrUuJ8T%2BLp23uYezHvxAtF-L_20dOaD4aDxciE7xZqDK3w@mail.gmail.com
>
> So we better hide that crap in the macro at least, like the rest of the
> CFI gunk.

I understand Andy's post.
My question is, what about places like this?

> .macro op_safe_regs op
> ENTRY(\op\()_safe_regs)
>         CFI_STARTPROC
>         pushl_cfi %ebx
>         pushl_cfi %ebp
>         pushl_cfi %esi
>         pushl_cfi %edi

Do we need to convert it to use macros which also do
"CFI_REL_OFFSET reg, 0" thingy, or not?
In either case: why?

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-11  3:33         ` Denys Vlasenko
@ 2015-01-11 10:54           ` Borislav Petkov
  2015-01-11 23:06             ` Denys Vlasenko
  0 siblings, 1 reply; 130+ messages in thread
From: Borislav Petkov @ 2015-01-11 10:54 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Linus Torvalds, Denys Vlasenko, Linux Kernel Mailing List,
	Oleg Nesterov, H. Peter Anvin, Andy Lutomirski,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On Sun, Jan 11, 2015 at 04:33:58AM +0100, Denys Vlasenko wrote:
> I understand Andy's post.
> My question is, what about places like this?
> 
> > .macro op_safe_regs op
> > ENTRY(\op\()_safe_regs)
> >         CFI_STARTPROC
> >         pushl_cfi %ebx
> >         pushl_cfi %ebp
> >         pushl_cfi %esi
> >         pushl_cfi %edi
> 
> Do we need to convert it to use macros which also do
> "CFI_REL_OFFSET reg, 0" thingy, or not?
> In either case: why?

What is different at those places to not use the CFI annotations?

Frankly speaking, I'm for dropping all that CFI ugliness completely.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-11 10:54           ` Borislav Petkov
@ 2015-01-11 23:06             ` Denys Vlasenko
  0 siblings, 0 replies; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-11 23:06 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Linus Torvalds, Denys Vlasenko, Linux Kernel Mailing List,
	Oleg Nesterov, H. Peter Anvin, Andy Lutomirski,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On Sun, Jan 11, 2015 at 11:54 AM, Borislav Petkov <bp@alien8.de> wrote:
> On Sun, Jan 11, 2015 at 04:33:58AM +0100, Denys Vlasenko wrote:
>> I understand Andy's post.
>> My question is, what about places like this?
>>
>> > .macro op_safe_regs op
>> > ENTRY(\op\()_safe_regs)
>> >         CFI_STARTPROC
>> >         pushl_cfi %ebx
>> >         pushl_cfi %ebp
>> >         pushl_cfi %esi
>> >         pushl_cfi %edi
>>
>> Do we need to convert it to use macros which also do
>> "CFI_REL_OFFSET reg, 0" thingy, or not?
>> In either case: why?
>
> What is different at those places to not use the CFI annotations?

Okay, sending a patch which _probably_ does the right thing...

^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE
@ 2015-01-11 23:07 Denys Vlasenko
  2015-01-12  0:38 ` Andy Lutomirski
                   ` (2 more replies)
  0 siblings, 3 replies; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-11 23:07 UTC (permalink / raw)
  To: linux-kernel
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Borislav Petkov, Andy Lutomirski, Frederic Weisbecker, X86 ML,
	Alexei Starovoitov, Will Drewry, Kees Cook

Sequences
        pushl_cfi %reg
        CFI_REL_OFFSET reg, 0
and
        popl_cfi %reg
        CFI_RESTORE reg
happen quite often. This patch adds macros which generate them.

No assembly changes (verified with objdump -dr vmlinux.o).

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Borislav Petkov <bp@alien8.de>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/include/asm/calling.h | 42 ++++++++++-------------------
 arch/x86/include/asm/dwarf2.h  | 24 +++++++++++++++++
 arch/x86/kernel/entry_32.S     | 21 +++++----------
 arch/x86/lib/atomic64_cx8_32.S | 50 ++++++++++++++---------------------
 arch/x86/lib/checksum_32.S     | 60 ++++++++++++++----------------------------
 arch/x86/lib/msr-reg.S         | 24 ++++++++---------
 arch/x86/lib/rwsem.S           | 44 ++++++++++++++-----------------
 arch/x86/lib/thunk_32.S        | 18 +++++--------
 arch/x86/lib/thunk_64.S        | 54 +++++++++++++------------------------
 9 files changed, 141 insertions(+), 196 deletions(-)

diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 1f1297b..3c711f2a 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -210,37 +210,23 @@ For 32-bit we have the following conventions - kernel is built with
  */
 
 	.macro SAVE_ALL
-	pushl_cfi %eax
-	CFI_REL_OFFSET eax, 0
-	pushl_cfi %ebp
-	CFI_REL_OFFSET ebp, 0
-	pushl_cfi %edi
-	CFI_REL_OFFSET edi, 0
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %edx
-	CFI_REL_OFFSET edx, 0
-	pushl_cfi %ecx
-	CFI_REL_OFFSET ecx, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg eax
+	pushl_cfi_reg ebp
+	pushl_cfi_reg edi
+	pushl_cfi_reg esi
+	pushl_cfi_reg edx
+	pushl_cfi_reg ecx
+	pushl_cfi_reg ebx
 	.endm
 
 	.macro RESTORE_ALL
-	popl_cfi %ebx
-	CFI_RESTORE ebx
-	popl_cfi %ecx
-	CFI_RESTORE ecx
-	popl_cfi %edx
-	CFI_RESTORE edx
-	popl_cfi %esi
-	CFI_RESTORE esi
-	popl_cfi %edi
-	CFI_RESTORE edi
-	popl_cfi %ebp
-	CFI_RESTORE ebp
-	popl_cfi %eax
-	CFI_RESTORE eax
+	popl_cfi_reg ebx
+	popl_cfi_reg ecx
+	popl_cfi_reg edx
+	popl_cfi_reg esi
+	popl_cfi_reg edi
+	popl_cfi_reg ebp
+	popl_cfi_reg eax
 	.endm
 
 #endif /* CONFIG_X86_64 */
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index f6f1598..de1cdaf 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -86,11 +86,23 @@
 	CFI_ADJUST_CFA_OFFSET 8
 	.endm
 
+	.macro pushq_cfi_reg reg
+	pushq %\reg
+	CFI_ADJUST_CFA_OFFSET 8
+	CFI_REL_OFFSET \reg, 0
+	.endm
+
 	.macro popq_cfi reg
 	popq \reg
 	CFI_ADJUST_CFA_OFFSET -8
 	.endm
 
+	.macro popq_cfi_reg reg
+	popq %\reg
+	CFI_ADJUST_CFA_OFFSET -8
+	CFI_RESTORE \reg
+	.endm
+
 	.macro pushfq_cfi
 	pushfq
 	CFI_ADJUST_CFA_OFFSET 8
@@ -116,11 +128,23 @@
 	CFI_ADJUST_CFA_OFFSET 4
 	.endm
 
+	.macro pushl_cfi_reg reg
+	pushl %\reg
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET \reg, 0
+	.endm
+
 	.macro popl_cfi reg
 	popl \reg
 	CFI_ADJUST_CFA_OFFSET -4
 	.endm
 
+	.macro popl_cfi_reg reg
+	popl %\reg
+	CFI_ADJUST_CFA_OFFSET -4
+	CFI_RESTORE \reg
+	.endm
+
 	.macro pushfl_cfi
 	pushfl
 	CFI_ADJUST_CFA_OFFSET 4
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 000d419..a4a2eaa 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1237,20 +1237,13 @@ error_code:
 	/*CFI_REL_OFFSET es, 0*/
 	pushl_cfi %ds
 	/*CFI_REL_OFFSET ds, 0*/
-	pushl_cfi %eax
-	CFI_REL_OFFSET eax, 0
-	pushl_cfi %ebp
-	CFI_REL_OFFSET ebp, 0
-	pushl_cfi %edi
-	CFI_REL_OFFSET edi, 0
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %edx
-	CFI_REL_OFFSET edx, 0
-	pushl_cfi %ecx
-	CFI_REL_OFFSET ecx, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg eax
+	pushl_cfi_reg ebp
+	pushl_cfi_reg edi
+	pushl_cfi_reg esi
+	pushl_cfi_reg edx
+	pushl_cfi_reg ecx
+	pushl_cfi_reg ebx
 	cld
 	movl $(__KERNEL_PERCPU), %ecx
 	movl %ecx, %fs
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index f5cc9eb..082a851 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -13,16 +13,6 @@
 #include <asm/alternative-asm.h>
 #include <asm/dwarf2.h>
 
-.macro SAVE reg
-	pushl_cfi %\reg
-	CFI_REL_OFFSET \reg, 0
-.endm
-
-.macro RESTORE reg
-	popl_cfi %\reg
-	CFI_RESTORE \reg
-.endm
-
 .macro read64 reg
 	movl %ebx, %eax
 	movl %ecx, %edx
@@ -67,10 +57,10 @@ ENDPROC(atomic64_xchg_cx8)
 .macro addsub_return func ins insc
 ENTRY(atomic64_\func\()_return_cx8)
 	CFI_STARTPROC
-	SAVE ebp
-	SAVE ebx
-	SAVE esi
-	SAVE edi
+	pushl_cfi_reg ebp
+	pushl_cfi_reg ebx
+	pushl_cfi_reg esi
+	pushl_cfi_reg edi
 
 	movl %eax, %esi
 	movl %edx, %edi
@@ -89,10 +79,10 @@ ENTRY(atomic64_\func\()_return_cx8)
 10:
 	movl %ebx, %eax
 	movl %ecx, %edx
-	RESTORE edi
-	RESTORE esi
-	RESTORE ebx
-	RESTORE ebp
+	popl_cfi_reg edi
+	popl_cfi_reg esi
+	popl_cfi_reg ebx
+	popl_cfi_reg ebp
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_\func\()_return_cx8)
@@ -104,7 +94,7 @@ addsub_return sub sub sbb
 .macro incdec_return func ins insc
 ENTRY(atomic64_\func\()_return_cx8)
 	CFI_STARTPROC
-	SAVE ebx
+	pushl_cfi_reg ebx
 
 	read64 %esi
 1:
@@ -119,7 +109,7 @@ ENTRY(atomic64_\func\()_return_cx8)
 10:
 	movl %ebx, %eax
 	movl %ecx, %edx
-	RESTORE ebx
+	popl_cfi_reg ebx
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_\func\()_return_cx8)
@@ -130,7 +120,7 @@ incdec_return dec sub sbb
 
 ENTRY(atomic64_dec_if_positive_cx8)
 	CFI_STARTPROC
-	SAVE ebx
+	pushl_cfi_reg ebx
 
 	read64 %esi
 1:
@@ -146,18 +136,18 @@ ENTRY(atomic64_dec_if_positive_cx8)
 2:
 	movl %ebx, %eax
 	movl %ecx, %edx
-	RESTORE ebx
+	popl_cfi_reg ebx
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_dec_if_positive_cx8)
 
 ENTRY(atomic64_add_unless_cx8)
 	CFI_STARTPROC
-	SAVE ebp
-	SAVE ebx
+	pushl_cfi_reg ebp
+	pushl_cfi_reg ebx
 /* these just push these two parameters on the stack */
-	SAVE edi
-	SAVE ecx
+	pushl_cfi_reg edi
+	pushl_cfi_reg ecx
 
 	movl %eax, %ebp
 	movl %edx, %edi
@@ -179,8 +169,8 @@ ENTRY(atomic64_add_unless_cx8)
 3:
 	addl $8, %esp
 	CFI_ADJUST_CFA_OFFSET -8
-	RESTORE ebx
-	RESTORE ebp
+	popl_cfi_reg ebx
+	popl_cfi_reg ebp
 	ret
 4:
 	cmpl %edx, 4(%esp)
@@ -192,7 +182,7 @@ ENDPROC(atomic64_add_unless_cx8)
 
 ENTRY(atomic64_inc_not_zero_cx8)
 	CFI_STARTPROC
-	SAVE ebx
+	pushl_cfi_reg ebx
 
 	read64 %esi
 1:
@@ -209,7 +199,7 @@ ENTRY(atomic64_inc_not_zero_cx8)
 
 	movl $1, %eax
 3:
-	RESTORE ebx
+	popl_cfi_reg ebx
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_inc_not_zero_cx8)
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index e78b8ee..c3b9953 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -51,10 +51,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
 	   */		
 ENTRY(csum_partial)
 	CFI_STARTPROC
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg esi
+	pushl_cfi_reg ebx
 	movl 20(%esp),%eax	# Function arg: unsigned int sum
 	movl 16(%esp),%ecx	# Function arg: int len
 	movl 12(%esp),%esi	# Function arg: unsigned char *buff
@@ -131,10 +129,8 @@ ENTRY(csum_partial)
 	jz 8f
 	roll $8, %eax
 8:
-	popl_cfi %ebx
-	CFI_RESTORE ebx
-	popl_cfi %esi
-	CFI_RESTORE esi
+	popl_cfi_reg ebx
+	popl_cfi_reg esi
 	ret
 	CFI_ENDPROC
 ENDPROC(csum_partial)
@@ -145,10 +141,8 @@ ENDPROC(csum_partial)
 
 ENTRY(csum_partial)
 	CFI_STARTPROC
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg esi
+	pushl_cfi_reg ebx
 	movl 20(%esp),%eax	# Function arg: unsigned int sum
 	movl 16(%esp),%ecx	# Function arg: int len
 	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
@@ -255,10 +249,8 @@ ENTRY(csum_partial)
 	jz 90f
 	roll $8, %eax
 90: 
-	popl_cfi %ebx
-	CFI_RESTORE ebx
-	popl_cfi %esi
-	CFI_RESTORE esi
+	popl_cfi_reg ebx
+	popl_cfi_reg esi
 	ret
 	CFI_ENDPROC
 ENDPROC(csum_partial)
@@ -298,12 +290,9 @@ ENTRY(csum_partial_copy_generic)
 	CFI_STARTPROC
 	subl  $4,%esp	
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl_cfi %edi
-	CFI_REL_OFFSET edi, 0
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg edi
+	pushl_cfi_reg esi
+	pushl_cfi_reg ebx
 	movl ARGBASE+16(%esp),%eax	# sum
 	movl ARGBASE+12(%esp),%ecx	# len
 	movl ARGBASE+4(%esp),%esi	# src
@@ -412,12 +401,9 @@ DST(	movb %cl, (%edi)	)
 
 .previous
 
-	popl_cfi %ebx
-	CFI_RESTORE ebx
-	popl_cfi %esi
-	CFI_RESTORE esi
-	popl_cfi %edi
-	CFI_RESTORE edi
+	popl_cfi_reg ebx
+	popl_cfi_reg esi
+	popl_cfi_reg edi
 	popl_cfi %ecx			# equivalent to addl $4,%esp
 	ret	
 	CFI_ENDPROC
@@ -441,12 +427,9 @@ ENDPROC(csum_partial_copy_generic)
 		
 ENTRY(csum_partial_copy_generic)
 	CFI_STARTPROC
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
-	pushl_cfi %edi
-	CFI_REL_OFFSET edi, 0
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
+	pushl_cfi_reg ebx
+	pushl_cfi_reg edi
+	pushl_cfi_reg esi
 	movl ARGBASE+4(%esp),%esi	#src
 	movl ARGBASE+8(%esp),%edi	#dst	
 	movl ARGBASE+12(%esp),%ecx	#len
@@ -506,12 +489,9 @@ DST(	movb %dl, (%edi)         )
 	jmp  7b			
 .previous				
 
-	popl_cfi %esi
-	CFI_RESTORE esi
-	popl_cfi %edi
-	CFI_RESTORE edi
-	popl_cfi %ebx
-	CFI_RESTORE ebx
+	popl_cfi_reg esi
+	popl_cfi_reg edi
+	popl_cfi_reg ebx
 	ret
 	CFI_ENDPROC
 ENDPROC(csum_partial_copy_generic)
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index f6d13ee..3ca5218 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -14,8 +14,8 @@
 .macro op_safe_regs op
 ENTRY(\op\()_safe_regs)
 	CFI_STARTPROC
-	pushq_cfi %rbx
-	pushq_cfi %rbp
+	pushq_cfi_reg rbx
+	pushq_cfi_reg rbp
 	movq	%rdi, %r10	/* Save pointer */
 	xorl	%r11d, %r11d	/* Return value */
 	movl    (%rdi), %eax
@@ -35,8 +35,8 @@ ENTRY(\op\()_safe_regs)
 	movl    %ebp, 20(%r10)
 	movl    %esi, 24(%r10)
 	movl    %edi, 28(%r10)
-	popq_cfi %rbp
-	popq_cfi %rbx
+	popq_cfi_reg rbp
+	popq_cfi_reg rbx
 	ret
 3:
 	CFI_RESTORE_STATE
@@ -53,10 +53,10 @@ ENDPROC(\op\()_safe_regs)
 .macro op_safe_regs op
 ENTRY(\op\()_safe_regs)
 	CFI_STARTPROC
-	pushl_cfi %ebx
-	pushl_cfi %ebp
-	pushl_cfi %esi
-	pushl_cfi %edi
+	pushl_cfi_reg ebx
+	pushl_cfi_reg ebp
+	pushl_cfi_reg esi
+	pushl_cfi_reg edi
 	pushl_cfi $0              /* Return value */
 	pushl_cfi %eax
 	movl    4(%eax), %ecx
@@ -80,10 +80,10 @@ ENTRY(\op\()_safe_regs)
 	movl    %esi, 24(%eax)
 	movl    %edi, 28(%eax)
 	popl_cfi %eax
-	popl_cfi %edi
-	popl_cfi %esi
-	popl_cfi %ebp
-	popl_cfi %ebx
+	popl_cfi_reg edi
+	popl_cfi_reg esi
+	popl_cfi_reg ebp
+	popl_cfi_reg ebx
 	ret
 3:
 	CFI_RESTORE_STATE
diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S
index 5dff5f0..2322abe 100644
--- a/arch/x86/lib/rwsem.S
+++ b/arch/x86/lib/rwsem.S
@@ -34,10 +34,10 @@
  */
 
 #define save_common_regs \
-	pushl_cfi %ecx; CFI_REL_OFFSET ecx, 0
+	pushl_cfi_reg ecx
 
 #define restore_common_regs \
-	popl_cfi %ecx; CFI_RESTORE ecx
+	popl_cfi_reg ecx
 
 	/* Avoid uglifying the argument copying x86-64 needs to do. */
 	.macro movq src, dst
@@ -64,22 +64,22 @@
  */
 
 #define save_common_regs \
-	pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \
-	pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \
-	pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \
-	pushq_cfi %r8;  CFI_REL_OFFSET r8,  0; \
-	pushq_cfi %r9;  CFI_REL_OFFSET r9,  0; \
-	pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \
-	pushq_cfi %r11; CFI_REL_OFFSET r11, 0
+	pushq_cfi_reg rdi; \
+	pushq_cfi_reg rsi; \
+	pushq_cfi_reg rcx; \
+	pushq_cfi_reg r8;  \
+	pushq_cfi_reg r9;  \
+	pushq_cfi_reg r10; \
+	pushq_cfi_reg r11
 
 #define restore_common_regs \
-	popq_cfi %r11; CFI_RESTORE r11; \
-	popq_cfi %r10; CFI_RESTORE r10; \
-	popq_cfi %r9;  CFI_RESTORE r9; \
-	popq_cfi %r8;  CFI_RESTORE r8; \
-	popq_cfi %rcx; CFI_RESTORE rcx; \
-	popq_cfi %rsi; CFI_RESTORE rsi; \
-	popq_cfi %rdi; CFI_RESTORE rdi
+	popq_cfi_reg r11; \
+	popq_cfi_reg r10; \
+	popq_cfi_reg r9; \
+	popq_cfi_reg r8; \
+	popq_cfi_reg rcx; \
+	popq_cfi_reg rsi; \
+	popq_cfi_reg rdi
 
 #endif
 
@@ -87,12 +87,10 @@
 ENTRY(call_rwsem_down_read_failed)
 	CFI_STARTPROC
 	save_common_regs
-	__ASM_SIZE(push,_cfi) %__ASM_REG(dx)
-	CFI_REL_OFFSET __ASM_REG(dx), 0
+	__ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
 	movq %rax,%rdi
 	call rwsem_down_read_failed
-	__ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
-	CFI_RESTORE __ASM_REG(dx)
+	__ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
 	restore_common_regs
 	ret
 	CFI_ENDPROC
@@ -124,12 +122,10 @@ ENDPROC(call_rwsem_wake)
 ENTRY(call_rwsem_downgrade_wake)
 	CFI_STARTPROC
 	save_common_regs
-	__ASM_SIZE(push,_cfi) %__ASM_REG(dx)
-	CFI_REL_OFFSET __ASM_REG(dx), 0
+	__ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
 	movq %rax,%rdi
 	call rwsem_downgrade_wake
-	__ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
-	CFI_RESTORE __ASM_REG(dx)
+	__ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
 	restore_common_regs
 	ret
 	CFI_ENDPROC
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
index e28cdaf..5eb7150 100644
--- a/arch/x86/lib/thunk_32.S
+++ b/arch/x86/lib/thunk_32.S
@@ -13,12 +13,9 @@
 	.globl \name
 \name:
 	CFI_STARTPROC
-	pushl_cfi %eax
-	CFI_REL_OFFSET eax, 0
-	pushl_cfi %ecx
-	CFI_REL_OFFSET ecx, 0
-	pushl_cfi %edx
-	CFI_REL_OFFSET edx, 0
+	pushl_cfi_reg eax
+	pushl_cfi_reg ecx
+	pushl_cfi_reg edx
 
 	.if \put_ret_addr_in_eax
 	/* Place EIP in the arg1 */
@@ -26,12 +23,9 @@
 	.endif
 
 	call \func
-	popl_cfi %edx
-	CFI_RESTORE edx
-	popl_cfi %ecx
-	CFI_RESTORE ecx
-	popl_cfi %eax
-	CFI_RESTORE eax
+	popl_cfi_reg edx
+	popl_cfi_reg ecx
+	popl_cfi_reg eax
 	ret
 	CFI_ENDPROC
 	_ASM_NOKPROBE(\name)
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index 8ec443a..f89ba4e9 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -17,24 +17,15 @@
 	CFI_STARTPROC
 
 	/* this one pushes 9 elems, the next one would be %rIP */
-	pushq_cfi %rdi
-	CFI_REL_OFFSET rdi, 0
-	pushq_cfi %rsi
-	CFI_REL_OFFSET rsi, 0
-	pushq_cfi %rdx
-	CFI_REL_OFFSET rdx, 0
-	pushq_cfi %rcx
-	CFI_REL_OFFSET rcx, 0
-	pushq_cfi %rax
-	CFI_REL_OFFSET rax, 0
-	pushq_cfi %r8
-	CFI_REL_OFFSET r8, 0
-	pushq_cfi %r9
-	CFI_REL_OFFSET r9, 0
-	pushq_cfi %r10
-	CFI_REL_OFFSET r10, 0
-	pushq_cfi %r11
-	CFI_REL_OFFSET r11, 0
+	pushq_cfi_reg rdi
+	pushq_cfi_reg rsi
+	pushq_cfi_reg rdx
+	pushq_cfi_reg rcx
+	pushq_cfi_reg rax
+	pushq_cfi_reg r8
+	pushq_cfi_reg r9
+	pushq_cfi_reg r10
+	pushq_cfi_reg r11
 
 	.if \put_ret_addr_in_rdi
 	/* 9*8(%rsp) is return addr on stack */
@@ -69,24 +60,15 @@
 	CFI_STARTPROC
 	CFI_ADJUST_CFA_OFFSET 9*8
 restore:
-	popq_cfi %r11
-	CFI_RESTORE r11
-	popq_cfi %r10
-	CFI_RESTORE r10
-	popq_cfi %r9
-	CFI_RESTORE r9
-	popq_cfi %r8
-	CFI_RESTORE r8
-	popq_cfi %rax
-	CFI_RESTORE rax
-	popq_cfi %rcx
-	CFI_RESTORE rcx
-	popq_cfi %rdx
-	CFI_RESTORE rdx
-	popq_cfi %rsi
-	CFI_RESTORE rsi
-	popq_cfi %rdi
-	CFI_RESTORE rdi
+	popq_cfi_reg r11
+	popq_cfi_reg r10
+	popq_cfi_reg r9
+	popq_cfi_reg r8
+	popq_cfi_reg rax
+	popq_cfi_reg rcx
+	popq_cfi_reg rdx
+	popq_cfi_reg rsi
+	popq_cfi_reg rdi
 	ret
 	CFI_ENDPROC
 	_ASM_NOKPROBE(restore)
-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE
  2015-01-11 23:07 [PATCH] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE Denys Vlasenko
@ 2015-01-12  0:38 ` Andy Lutomirski
  2015-01-12  6:23   ` Denys Vlasenko
  2015-01-12 19:23 ` Borislav Petkov
  2015-02-11 20:24 ` Andy Lutomirski
  2 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-01-12  0:38 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: linux-kernel, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Borislav Petkov, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Sun, Jan 11, 2015 at 3:07 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> Sequences
>         pushl_cfi %reg
>         CFI_REL_OFFSET reg, 0
> and
>         popl_cfi %reg
>         CFI_RESTORE reg
> happen quite often. This patch adds macros which generate them.
>
> No assembly changes (verified with objdump -dr vmlinux.o).

Looks sane to me.  Where does this apply in relation to the rest of your series?

>
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Linus Torvalds <torvalds@linux-foundation.org>
> CC: Oleg Nesterov <oleg@redhat.com>
> CC: "H. Peter Anvin" <hpa@zytor.com>
> CC: Borislav Petkov <bp@alien8.de>
> CC: Andy Lutomirski <luto@amacapital.net>
> CC: Frederic Weisbecker <fweisbec@gmail.com>
> CC: X86 ML <x86@kernel.org>
> CC: Alexei Starovoitov <ast@plumgrid.com>
> CC: Will Drewry <wad@chromium.org>
> CC: Kees Cook <keescook@chromium.org>
> CC: linux-kernel@vger.kernel.org
> ---
>  arch/x86/include/asm/calling.h | 42 ++++++++++-------------------
>  arch/x86/include/asm/dwarf2.h  | 24 +++++++++++++++++
>  arch/x86/kernel/entry_32.S     | 21 +++++----------
>  arch/x86/lib/atomic64_cx8_32.S | 50 ++++++++++++++---------------------
>  arch/x86/lib/checksum_32.S     | 60 ++++++++++++++----------------------------
>  arch/x86/lib/msr-reg.S         | 24 ++++++++---------
>  arch/x86/lib/rwsem.S           | 44 ++++++++++++++-----------------
>  arch/x86/lib/thunk_32.S        | 18 +++++--------
>  arch/x86/lib/thunk_64.S        | 54 +++++++++++++------------------------
>  9 files changed, 141 insertions(+), 196 deletions(-)
>
> diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
> index 1f1297b..3c711f2a 100644
> --- a/arch/x86/include/asm/calling.h
> +++ b/arch/x86/include/asm/calling.h
> @@ -210,37 +210,23 @@ For 32-bit we have the following conventions - kernel is built with
>   */
>
>         .macro SAVE_ALL
> -       pushl_cfi %eax
> -       CFI_REL_OFFSET eax, 0
> -       pushl_cfi %ebp
> -       CFI_REL_OFFSET ebp, 0
> -       pushl_cfi %edi
> -       CFI_REL_OFFSET edi, 0
> -       pushl_cfi %esi
> -       CFI_REL_OFFSET esi, 0
> -       pushl_cfi %edx
> -       CFI_REL_OFFSET edx, 0
> -       pushl_cfi %ecx
> -       CFI_REL_OFFSET ecx, 0
> -       pushl_cfi %ebx
> -       CFI_REL_OFFSET ebx, 0
> +       pushl_cfi_reg eax
> +       pushl_cfi_reg ebp
> +       pushl_cfi_reg edi
> +       pushl_cfi_reg esi
> +       pushl_cfi_reg edx
> +       pushl_cfi_reg ecx
> +       pushl_cfi_reg ebx
>         .endm
>
>         .macro RESTORE_ALL
> -       popl_cfi %ebx
> -       CFI_RESTORE ebx
> -       popl_cfi %ecx
> -       CFI_RESTORE ecx
> -       popl_cfi %edx
> -       CFI_RESTORE edx
> -       popl_cfi %esi
> -       CFI_RESTORE esi
> -       popl_cfi %edi
> -       CFI_RESTORE edi
> -       popl_cfi %ebp
> -       CFI_RESTORE ebp
> -       popl_cfi %eax
> -       CFI_RESTORE eax
> +       popl_cfi_reg ebx
> +       popl_cfi_reg ecx
> +       popl_cfi_reg edx
> +       popl_cfi_reg esi
> +       popl_cfi_reg edi
> +       popl_cfi_reg ebp
> +       popl_cfi_reg eax
>         .endm
>
>  #endif /* CONFIG_X86_64 */
> diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
> index f6f1598..de1cdaf 100644
> --- a/arch/x86/include/asm/dwarf2.h
> +++ b/arch/x86/include/asm/dwarf2.h
> @@ -86,11 +86,23 @@
>         CFI_ADJUST_CFA_OFFSET 8
>         .endm
>
> +       .macro pushq_cfi_reg reg
> +       pushq %\reg
> +       CFI_ADJUST_CFA_OFFSET 8
> +       CFI_REL_OFFSET \reg, 0
> +       .endm
> +
>         .macro popq_cfi reg
>         popq \reg
>         CFI_ADJUST_CFA_OFFSET -8
>         .endm
>
> +       .macro popq_cfi_reg reg
> +       popq %\reg
> +       CFI_ADJUST_CFA_OFFSET -8
> +       CFI_RESTORE \reg
> +       .endm
> +
>         .macro pushfq_cfi
>         pushfq
>         CFI_ADJUST_CFA_OFFSET 8
> @@ -116,11 +128,23 @@
>         CFI_ADJUST_CFA_OFFSET 4
>         .endm
>
> +       .macro pushl_cfi_reg reg
> +       pushl %\reg
> +       CFI_ADJUST_CFA_OFFSET 4
> +       CFI_REL_OFFSET \reg, 0
> +       .endm
> +
>         .macro popl_cfi reg
>         popl \reg
>         CFI_ADJUST_CFA_OFFSET -4
>         .endm
>
> +       .macro popl_cfi_reg reg
> +       popl %\reg
> +       CFI_ADJUST_CFA_OFFSET -4
> +       CFI_RESTORE \reg
> +       .endm
> +
>         .macro pushfl_cfi
>         pushfl
>         CFI_ADJUST_CFA_OFFSET 4
> diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
> index 000d419..a4a2eaa 100644
> --- a/arch/x86/kernel/entry_32.S
> +++ b/arch/x86/kernel/entry_32.S
> @@ -1237,20 +1237,13 @@ error_code:
>         /*CFI_REL_OFFSET es, 0*/
>         pushl_cfi %ds
>         /*CFI_REL_OFFSET ds, 0*/
> -       pushl_cfi %eax
> -       CFI_REL_OFFSET eax, 0
> -       pushl_cfi %ebp
> -       CFI_REL_OFFSET ebp, 0
> -       pushl_cfi %edi
> -       CFI_REL_OFFSET edi, 0
> -       pushl_cfi %esi
> -       CFI_REL_OFFSET esi, 0
> -       pushl_cfi %edx
> -       CFI_REL_OFFSET edx, 0
> -       pushl_cfi %ecx
> -       CFI_REL_OFFSET ecx, 0
> -       pushl_cfi %ebx
> -       CFI_REL_OFFSET ebx, 0
> +       pushl_cfi_reg eax
> +       pushl_cfi_reg ebp
> +       pushl_cfi_reg edi
> +       pushl_cfi_reg esi
> +       pushl_cfi_reg edx
> +       pushl_cfi_reg ecx
> +       pushl_cfi_reg ebx
>         cld
>         movl $(__KERNEL_PERCPU), %ecx
>         movl %ecx, %fs
> diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
> index f5cc9eb..082a851 100644
> --- a/arch/x86/lib/atomic64_cx8_32.S
> +++ b/arch/x86/lib/atomic64_cx8_32.S
> @@ -13,16 +13,6 @@
>  #include <asm/alternative-asm.h>
>  #include <asm/dwarf2.h>
>
> -.macro SAVE reg
> -       pushl_cfi %\reg
> -       CFI_REL_OFFSET \reg, 0
> -.endm
> -
> -.macro RESTORE reg
> -       popl_cfi %\reg
> -       CFI_RESTORE \reg
> -.endm
> -
>  .macro read64 reg
>         movl %ebx, %eax
>         movl %ecx, %edx
> @@ -67,10 +57,10 @@ ENDPROC(atomic64_xchg_cx8)
>  .macro addsub_return func ins insc
>  ENTRY(atomic64_\func\()_return_cx8)
>         CFI_STARTPROC
> -       SAVE ebp
> -       SAVE ebx
> -       SAVE esi
> -       SAVE edi
> +       pushl_cfi_reg ebp
> +       pushl_cfi_reg ebx
> +       pushl_cfi_reg esi
> +       pushl_cfi_reg edi
>
>         movl %eax, %esi
>         movl %edx, %edi
> @@ -89,10 +79,10 @@ ENTRY(atomic64_\func\()_return_cx8)
>  10:
>         movl %ebx, %eax
>         movl %ecx, %edx
> -       RESTORE edi
> -       RESTORE esi
> -       RESTORE ebx
> -       RESTORE ebp
> +       popl_cfi_reg edi
> +       popl_cfi_reg esi
> +       popl_cfi_reg ebx
> +       popl_cfi_reg ebp
>         ret
>         CFI_ENDPROC
>  ENDPROC(atomic64_\func\()_return_cx8)
> @@ -104,7 +94,7 @@ addsub_return sub sub sbb
>  .macro incdec_return func ins insc
>  ENTRY(atomic64_\func\()_return_cx8)
>         CFI_STARTPROC
> -       SAVE ebx
> +       pushl_cfi_reg ebx
>
>         read64 %esi
>  1:
> @@ -119,7 +109,7 @@ ENTRY(atomic64_\func\()_return_cx8)
>  10:
>         movl %ebx, %eax
>         movl %ecx, %edx
> -       RESTORE ebx
> +       popl_cfi_reg ebx
>         ret
>         CFI_ENDPROC
>  ENDPROC(atomic64_\func\()_return_cx8)
> @@ -130,7 +120,7 @@ incdec_return dec sub sbb
>
>  ENTRY(atomic64_dec_if_positive_cx8)
>         CFI_STARTPROC
> -       SAVE ebx
> +       pushl_cfi_reg ebx
>
>         read64 %esi
>  1:
> @@ -146,18 +136,18 @@ ENTRY(atomic64_dec_if_positive_cx8)
>  2:
>         movl %ebx, %eax
>         movl %ecx, %edx
> -       RESTORE ebx
> +       popl_cfi_reg ebx
>         ret
>         CFI_ENDPROC
>  ENDPROC(atomic64_dec_if_positive_cx8)
>
>  ENTRY(atomic64_add_unless_cx8)
>         CFI_STARTPROC
> -       SAVE ebp
> -       SAVE ebx
> +       pushl_cfi_reg ebp
> +       pushl_cfi_reg ebx
>  /* these just push these two parameters on the stack */
> -       SAVE edi
> -       SAVE ecx
> +       pushl_cfi_reg edi
> +       pushl_cfi_reg ecx
>
>         movl %eax, %ebp
>         movl %edx, %edi
> @@ -179,8 +169,8 @@ ENTRY(atomic64_add_unless_cx8)
>  3:
>         addl $8, %esp
>         CFI_ADJUST_CFA_OFFSET -8
> -       RESTORE ebx
> -       RESTORE ebp
> +       popl_cfi_reg ebx
> +       popl_cfi_reg ebp
>         ret
>  4:
>         cmpl %edx, 4(%esp)
> @@ -192,7 +182,7 @@ ENDPROC(atomic64_add_unless_cx8)
>
>  ENTRY(atomic64_inc_not_zero_cx8)
>         CFI_STARTPROC
> -       SAVE ebx
> +       pushl_cfi_reg ebx
>
>         read64 %esi
>  1:
> @@ -209,7 +199,7 @@ ENTRY(atomic64_inc_not_zero_cx8)
>
>         movl $1, %eax
>  3:
> -       RESTORE ebx
> +       popl_cfi_reg ebx
>         ret
>         CFI_ENDPROC
>  ENDPROC(atomic64_inc_not_zero_cx8)
> diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
> index e78b8ee..c3b9953 100644
> --- a/arch/x86/lib/checksum_32.S
> +++ b/arch/x86/lib/checksum_32.S
> @@ -51,10 +51,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
>            */
>  ENTRY(csum_partial)
>         CFI_STARTPROC
> -       pushl_cfi %esi
> -       CFI_REL_OFFSET esi, 0
> -       pushl_cfi %ebx
> -       CFI_REL_OFFSET ebx, 0
> +       pushl_cfi_reg esi
> +       pushl_cfi_reg ebx
>         movl 20(%esp),%eax      # Function arg: unsigned int sum
>         movl 16(%esp),%ecx      # Function arg: int len
>         movl 12(%esp),%esi      # Function arg: unsigned char *buff
> @@ -131,10 +129,8 @@ ENTRY(csum_partial)
>         jz 8f
>         roll $8, %eax
>  8:
> -       popl_cfi %ebx
> -       CFI_RESTORE ebx
> -       popl_cfi %esi
> -       CFI_RESTORE esi
> +       popl_cfi_reg ebx
> +       popl_cfi_reg esi
>         ret
>         CFI_ENDPROC
>  ENDPROC(csum_partial)
> @@ -145,10 +141,8 @@ ENDPROC(csum_partial)
>
>  ENTRY(csum_partial)
>         CFI_STARTPROC
> -       pushl_cfi %esi
> -       CFI_REL_OFFSET esi, 0
> -       pushl_cfi %ebx
> -       CFI_REL_OFFSET ebx, 0
> +       pushl_cfi_reg esi
> +       pushl_cfi_reg ebx
>         movl 20(%esp),%eax      # Function arg: unsigned int sum
>         movl 16(%esp),%ecx      # Function arg: int len
>         movl 12(%esp),%esi      # Function arg: const unsigned char *buf
> @@ -255,10 +249,8 @@ ENTRY(csum_partial)
>         jz 90f
>         roll $8, %eax
>  90:
> -       popl_cfi %ebx
> -       CFI_RESTORE ebx
> -       popl_cfi %esi
> -       CFI_RESTORE esi
> +       popl_cfi_reg ebx
> +       popl_cfi_reg esi
>         ret
>         CFI_ENDPROC
>  ENDPROC(csum_partial)
> @@ -298,12 +290,9 @@ ENTRY(csum_partial_copy_generic)
>         CFI_STARTPROC
>         subl  $4,%esp
>         CFI_ADJUST_CFA_OFFSET 4
> -       pushl_cfi %edi
> -       CFI_REL_OFFSET edi, 0
> -       pushl_cfi %esi
> -       CFI_REL_OFFSET esi, 0
> -       pushl_cfi %ebx
> -       CFI_REL_OFFSET ebx, 0
> +       pushl_cfi_reg edi
> +       pushl_cfi_reg esi
> +       pushl_cfi_reg ebx
>         movl ARGBASE+16(%esp),%eax      # sum
>         movl ARGBASE+12(%esp),%ecx      # len
>         movl ARGBASE+4(%esp),%esi       # src
> @@ -412,12 +401,9 @@ DST(       movb %cl, (%edi)        )
>
>  .previous
>
> -       popl_cfi %ebx
> -       CFI_RESTORE ebx
> -       popl_cfi %esi
> -       CFI_RESTORE esi
> -       popl_cfi %edi
> -       CFI_RESTORE edi
> +       popl_cfi_reg ebx
> +       popl_cfi_reg esi
> +       popl_cfi_reg edi
>         popl_cfi %ecx                   # equivalent to addl $4,%esp
>         ret
>         CFI_ENDPROC
> @@ -441,12 +427,9 @@ ENDPROC(csum_partial_copy_generic)
>
>  ENTRY(csum_partial_copy_generic)
>         CFI_STARTPROC
> -       pushl_cfi %ebx
> -       CFI_REL_OFFSET ebx, 0
> -       pushl_cfi %edi
> -       CFI_REL_OFFSET edi, 0
> -       pushl_cfi %esi
> -       CFI_REL_OFFSET esi, 0
> +       pushl_cfi_reg ebx
> +       pushl_cfi_reg edi
> +       pushl_cfi_reg esi
>         movl ARGBASE+4(%esp),%esi       #src
>         movl ARGBASE+8(%esp),%edi       #dst
>         movl ARGBASE+12(%esp),%ecx      #len
> @@ -506,12 +489,9 @@ DST(       movb %dl, (%edi)         )
>         jmp  7b
>  .previous
>
> -       popl_cfi %esi
> -       CFI_RESTORE esi
> -       popl_cfi %edi
> -       CFI_RESTORE edi
> -       popl_cfi %ebx
> -       CFI_RESTORE ebx
> +       popl_cfi_reg esi
> +       popl_cfi_reg edi
> +       popl_cfi_reg ebx
>         ret
>         CFI_ENDPROC
>  ENDPROC(csum_partial_copy_generic)
> diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
> index f6d13ee..3ca5218 100644
> --- a/arch/x86/lib/msr-reg.S
> +++ b/arch/x86/lib/msr-reg.S
> @@ -14,8 +14,8 @@
>  .macro op_safe_regs op
>  ENTRY(\op\()_safe_regs)
>         CFI_STARTPROC
> -       pushq_cfi %rbx
> -       pushq_cfi %rbp
> +       pushq_cfi_reg rbx
> +       pushq_cfi_reg rbp
>         movq    %rdi, %r10      /* Save pointer */
>         xorl    %r11d, %r11d    /* Return value */
>         movl    (%rdi), %eax
> @@ -35,8 +35,8 @@ ENTRY(\op\()_safe_regs)
>         movl    %ebp, 20(%r10)
>         movl    %esi, 24(%r10)
>         movl    %edi, 28(%r10)
> -       popq_cfi %rbp
> -       popq_cfi %rbx
> +       popq_cfi_reg rbp
> +       popq_cfi_reg rbx
>         ret
>  3:
>         CFI_RESTORE_STATE
> @@ -53,10 +53,10 @@ ENDPROC(\op\()_safe_regs)
>  .macro op_safe_regs op
>  ENTRY(\op\()_safe_regs)
>         CFI_STARTPROC
> -       pushl_cfi %ebx
> -       pushl_cfi %ebp
> -       pushl_cfi %esi
> -       pushl_cfi %edi
> +       pushl_cfi_reg ebx
> +       pushl_cfi_reg ebp
> +       pushl_cfi_reg esi
> +       pushl_cfi_reg edi
>         pushl_cfi $0              /* Return value */
>         pushl_cfi %eax
>         movl    4(%eax), %ecx
> @@ -80,10 +80,10 @@ ENTRY(\op\()_safe_regs)
>         movl    %esi, 24(%eax)
>         movl    %edi, 28(%eax)
>         popl_cfi %eax
> -       popl_cfi %edi
> -       popl_cfi %esi
> -       popl_cfi %ebp
> -       popl_cfi %ebx
> +       popl_cfi_reg edi
> +       popl_cfi_reg esi
> +       popl_cfi_reg ebp
> +       popl_cfi_reg ebx
>         ret
>  3:
>         CFI_RESTORE_STATE
> diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S
> index 5dff5f0..2322abe 100644
> --- a/arch/x86/lib/rwsem.S
> +++ b/arch/x86/lib/rwsem.S
> @@ -34,10 +34,10 @@
>   */
>
>  #define save_common_regs \
> -       pushl_cfi %ecx; CFI_REL_OFFSET ecx, 0
> +       pushl_cfi_reg ecx
>
>  #define restore_common_regs \
> -       popl_cfi %ecx; CFI_RESTORE ecx
> +       popl_cfi_reg ecx
>
>         /* Avoid uglifying the argument copying x86-64 needs to do. */
>         .macro movq src, dst
> @@ -64,22 +64,22 @@
>   */
>
>  #define save_common_regs \
> -       pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \
> -       pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \
> -       pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \
> -       pushq_cfi %r8;  CFI_REL_OFFSET r8,  0; \
> -       pushq_cfi %r9;  CFI_REL_OFFSET r9,  0; \
> -       pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \
> -       pushq_cfi %r11; CFI_REL_OFFSET r11, 0
> +       pushq_cfi_reg rdi; \
> +       pushq_cfi_reg rsi; \
> +       pushq_cfi_reg rcx; \
> +       pushq_cfi_reg r8;  \
> +       pushq_cfi_reg r9;  \
> +       pushq_cfi_reg r10; \
> +       pushq_cfi_reg r11
>
>  #define restore_common_regs \
> -       popq_cfi %r11; CFI_RESTORE r11; \
> -       popq_cfi %r10; CFI_RESTORE r10; \
> -       popq_cfi %r9;  CFI_RESTORE r9; \
> -       popq_cfi %r8;  CFI_RESTORE r8; \
> -       popq_cfi %rcx; CFI_RESTORE rcx; \
> -       popq_cfi %rsi; CFI_RESTORE rsi; \
> -       popq_cfi %rdi; CFI_RESTORE rdi
> +       popq_cfi_reg r11; \
> +       popq_cfi_reg r10; \
> +       popq_cfi_reg r9; \
> +       popq_cfi_reg r8; \
> +       popq_cfi_reg rcx; \
> +       popq_cfi_reg rsi; \
> +       popq_cfi_reg rdi
>
>  #endif
>
> @@ -87,12 +87,10 @@
>  ENTRY(call_rwsem_down_read_failed)
>         CFI_STARTPROC
>         save_common_regs
> -       __ASM_SIZE(push,_cfi) %__ASM_REG(dx)
> -       CFI_REL_OFFSET __ASM_REG(dx), 0
> +       __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
>         movq %rax,%rdi
>         call rwsem_down_read_failed
> -       __ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
> -       CFI_RESTORE __ASM_REG(dx)
> +       __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
>         restore_common_regs
>         ret
>         CFI_ENDPROC
> @@ -124,12 +122,10 @@ ENDPROC(call_rwsem_wake)
>  ENTRY(call_rwsem_downgrade_wake)
>         CFI_STARTPROC
>         save_common_regs
> -       __ASM_SIZE(push,_cfi) %__ASM_REG(dx)
> -       CFI_REL_OFFSET __ASM_REG(dx), 0
> +       __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
>         movq %rax,%rdi
>         call rwsem_downgrade_wake
> -       __ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
> -       CFI_RESTORE __ASM_REG(dx)
> +       __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
>         restore_common_regs
>         ret
>         CFI_ENDPROC
> diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
> index e28cdaf..5eb7150 100644
> --- a/arch/x86/lib/thunk_32.S
> +++ b/arch/x86/lib/thunk_32.S
> @@ -13,12 +13,9 @@
>         .globl \name
>  \name:
>         CFI_STARTPROC
> -       pushl_cfi %eax
> -       CFI_REL_OFFSET eax, 0
> -       pushl_cfi %ecx
> -       CFI_REL_OFFSET ecx, 0
> -       pushl_cfi %edx
> -       CFI_REL_OFFSET edx, 0
> +       pushl_cfi_reg eax
> +       pushl_cfi_reg ecx
> +       pushl_cfi_reg edx
>
>         .if \put_ret_addr_in_eax
>         /* Place EIP in the arg1 */
> @@ -26,12 +23,9 @@
>         .endif
>
>         call \func
> -       popl_cfi %edx
> -       CFI_RESTORE edx
> -       popl_cfi %ecx
> -       CFI_RESTORE ecx
> -       popl_cfi %eax
> -       CFI_RESTORE eax
> +       popl_cfi_reg edx
> +       popl_cfi_reg ecx
> +       popl_cfi_reg eax
>         ret
>         CFI_ENDPROC
>         _ASM_NOKPROBE(\name)
> diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
> index 8ec443a..f89ba4e9 100644
> --- a/arch/x86/lib/thunk_64.S
> +++ b/arch/x86/lib/thunk_64.S
> @@ -17,24 +17,15 @@
>         CFI_STARTPROC
>
>         /* this one pushes 9 elems, the next one would be %rIP */
> -       pushq_cfi %rdi
> -       CFI_REL_OFFSET rdi, 0
> -       pushq_cfi %rsi
> -       CFI_REL_OFFSET rsi, 0
> -       pushq_cfi %rdx
> -       CFI_REL_OFFSET rdx, 0
> -       pushq_cfi %rcx
> -       CFI_REL_OFFSET rcx, 0
> -       pushq_cfi %rax
> -       CFI_REL_OFFSET rax, 0
> -       pushq_cfi %r8
> -       CFI_REL_OFFSET r8, 0
> -       pushq_cfi %r9
> -       CFI_REL_OFFSET r9, 0
> -       pushq_cfi %r10
> -       CFI_REL_OFFSET r10, 0
> -       pushq_cfi %r11
> -       CFI_REL_OFFSET r11, 0
> +       pushq_cfi_reg rdi
> +       pushq_cfi_reg rsi
> +       pushq_cfi_reg rdx
> +       pushq_cfi_reg rcx
> +       pushq_cfi_reg rax
> +       pushq_cfi_reg r8
> +       pushq_cfi_reg r9
> +       pushq_cfi_reg r10
> +       pushq_cfi_reg r11
>
>         .if \put_ret_addr_in_rdi
>         /* 9*8(%rsp) is return addr on stack */
> @@ -69,24 +60,15 @@
>         CFI_STARTPROC
>         CFI_ADJUST_CFA_OFFSET 9*8
>  restore:
> -       popq_cfi %r11
> -       CFI_RESTORE r11
> -       popq_cfi %r10
> -       CFI_RESTORE r10
> -       popq_cfi %r9
> -       CFI_RESTORE r9
> -       popq_cfi %r8
> -       CFI_RESTORE r8
> -       popq_cfi %rax
> -       CFI_RESTORE rax
> -       popq_cfi %rcx
> -       CFI_RESTORE rcx
> -       popq_cfi %rdx
> -       CFI_RESTORE rdx
> -       popq_cfi %rsi
> -       CFI_RESTORE rsi
> -       popq_cfi %rdi
> -       CFI_RESTORE rdi
> +       popq_cfi_reg r11
> +       popq_cfi_reg r10
> +       popq_cfi_reg r9
> +       popq_cfi_reg r8
> +       popq_cfi_reg rax
> +       popq_cfi_reg rcx
> +       popq_cfi_reg rdx
> +       popq_cfi_reg rsi
> +       popq_cfi_reg rdi
>         ret
>         CFI_ENDPROC
>         _ASM_NOKPROBE(restore)
> --
> 1.8.1.4
>



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE
  2015-01-12  0:38 ` Andy Lutomirski
@ 2015-01-12  6:23   ` Denys Vlasenko
  0 siblings, 0 replies; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-12  6:23 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, linux-kernel, Linus Torvalds, Oleg Nesterov,
	H. Peter Anvin, Borislav Petkov, Frederic Weisbecker, X86 ML,
	Alexei Starovoitov, Will Drewry, Kees Cook

On Mon, Jan 12, 2015 at 1:38 AM, Andy Lutomirski <luto@amacapital.net> wrote:
>> Sequences
>>         pushl_cfi %reg
>>         CFI_REL_OFFSET reg, 0
>> and
>>         popl_cfi %reg
>>         CFI_RESTORE reg
>> happen quite often. This patch adds macros which generate them.
>>
>> No assembly changes (verified with objdump -dr vmlinux.o).
>
> Looks sane to me.  Where does this apply in relation to the rest of your series?

On top of my latest four-patch set.

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE
  2015-01-11 23:07 [PATCH] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE Denys Vlasenko
  2015-01-12  0:38 ` Andy Lutomirski
@ 2015-01-12 19:23 ` Borislav Petkov
  2015-01-12 19:25   ` Andy Lutomirski
  2015-02-11 20:24 ` Andy Lutomirski
  2 siblings, 1 reply; 130+ messages in thread
From: Borislav Petkov @ 2015-01-12 19:23 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: linux-kernel, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Andy Lutomirski, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Mon, Jan 12, 2015 at 12:07:35AM +0100, Denys Vlasenko wrote:
> Sequences
>         pushl_cfi %reg
>         CFI_REL_OFFSET reg, 0
> and
>         popl_cfi %reg
>         CFI_RESTORE reg
> happen quite often. This patch adds macros which generate them.
> 
> No assembly changes (verified with objdump -dr vmlinux.o).

...

> diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
> index f6f1598..de1cdaf 100644
> --- a/arch/x86/include/asm/dwarf2.h
> +++ b/arch/x86/include/asm/dwarf2.h
> @@ -86,11 +86,23 @@
>  	CFI_ADJUST_CFA_OFFSET 8
>  	.endm
>  
> +	.macro pushq_cfi_reg reg
> +	pushq %\reg
> +	CFI_ADJUST_CFA_OFFSET 8
> +	CFI_REL_OFFSET \reg, 0
> +	.endm

What's wrong with adding the CFI_REL_OFFSET to the pushl/popl_cfi macro
and not add two new _reg macros?

I.e., have the _cfi macros add all the CFI annotations needed.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE
  2015-01-12 19:23 ` Borislav Petkov
@ 2015-01-12 19:25   ` Andy Lutomirski
  2015-01-12 19:37     ` Borislav Petkov
  0 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-01-12 19:25 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Denys Vlasenko, linux-kernel, Linus Torvalds, Oleg Nesterov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Mon, Jan 12, 2015 at 11:23 AM, Borislav Petkov <bp@alien8.de> wrote:
> On Mon, Jan 12, 2015 at 12:07:35AM +0100, Denys Vlasenko wrote:
>> Sequences
>>         pushl_cfi %reg
>>         CFI_REL_OFFSET reg, 0
>> and
>>         popl_cfi %reg
>>         CFI_RESTORE reg
>> happen quite often. This patch adds macros which generate them.
>>
>> No assembly changes (verified with objdump -dr vmlinux.o).
>
> ...
>
>> diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
>> index f6f1598..de1cdaf 100644
>> --- a/arch/x86/include/asm/dwarf2.h
>> +++ b/arch/x86/include/asm/dwarf2.h
>> @@ -86,11 +86,23 @@
>>       CFI_ADJUST_CFA_OFFSET 8
>>       .endm
>>
>> +     .macro pushq_cfi_reg reg
>> +     pushq %\reg
>> +     CFI_ADJUST_CFA_OFFSET 8
>> +     CFI_REL_OFFSET \reg, 0
>> +     .endm
>
> What's wrong with adding the CFI_REL_OFFSET to the pushl/popl_cfi macro
> and not add two new _reg macros?
>
> I.e., have the _cfi macros add all the CFI annotations needed.
>

I think that some users don't want the CFI_REL_OFFSET.  Also, there's
that pesky % sign.

--Andy

> --
> Regards/Gruss,
>     Boris.
>
> Sent from a fat crate under my desk. Formatting is fine.
> --



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE
  2015-01-12 19:25   ` Andy Lutomirski
@ 2015-01-12 19:37     ` Borislav Petkov
  2015-01-12 19:46       ` Andy Lutomirski
  0 siblings, 1 reply; 130+ messages in thread
From: Borislav Petkov @ 2015-01-12 19:37 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, linux-kernel, Linus Torvalds, Oleg Nesterov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

Andy, please trim your replies.

On Mon, Jan 12, 2015 at 11:25:39AM -0800, Andy Lutomirski wrote:
> I think that some users don't want the CFI_REL_OFFSET.

Why? I thought those two annotations are independent? As you said:

"IOW, one is to keep the stack frame tracking consistent and the other
is to tell the unwinder about the register we just saved."

Sounds to me like we want both...

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE
  2015-01-12 19:37     ` Borislav Petkov
@ 2015-01-12 19:46       ` Andy Lutomirski
  2015-01-12 20:11         ` Borislav Petkov
  0 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-01-12 19:46 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Denys Vlasenko, linux-kernel, Linus Torvalds, Oleg Nesterov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Mon, Jan 12, 2015 at 11:37 AM, Borislav Petkov <bp@alien8.de> wrote:
> Andy, please trim your replies.
>
> On Mon, Jan 12, 2015 at 11:25:39AM -0800, Andy Lutomirski wrote:
>> I think that some users don't want the CFI_REL_OFFSET.
>
> Why? I thought those two annotations are independent? As you said:
>
> "IOW, one is to keep the stack frame tracking consistent and the other
> is to tell the unwinder about the register we just saved."
>
> Sounds to me like we want both...
>

Dumb example:

    pushq_cfi $__KERNEL_DS /* ss */

This doesn't save anything that the unwinder would care about.

Better example:

    pushq_cfi \child_rip /* rip */
    CFI_REL_OFFSET    rip,0

Doing this with a macro would need a fancier macro.

Then there's crap like:

    pushq_cfi %rdi
    SCHEDULE_USER
    popq_cfi %rdi

I would need to look a lot more carefully to figure out whether this
would need CFI_REL_OFFSET.

If we actually had a DWARF unwinder in the kernel, maybe we could have
real test cases :-/

--Andy

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE
  2015-01-12 19:46       ` Andy Lutomirski
@ 2015-01-12 20:11         ` Borislav Petkov
  2015-01-12 20:14           ` Andy Lutomirski
  0 siblings, 1 reply; 130+ messages in thread
From: Borislav Petkov @ 2015-01-12 20:11 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, linux-kernel, Linus Torvalds, Oleg Nesterov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Mon, Jan 12, 2015 at 11:46:53AM -0800, Andy Lutomirski wrote:
> Dumb example:
> 
>     pushq_cfi $__KERNEL_DS /* ss */
> 
> This doesn't save anything that the unwinder would care about.

And? The unwinder or whatever looks at that info simply ignores stuff it
is not interested in, no?

> Better example:
> 
>     pushq_cfi \child_rip /* rip */
>     CFI_REL_OFFSET    rip,0
> 
> Doing this with a macro would need a fancier macro.

I'd ask first whether we really need this at all.

> Then there's crap like:
> 
>     pushq_cfi %rdi
>     SCHEDULE_USER
>     popq_cfi %rdi

I guess we can add a gas regname argument optional and if it is set, use
it and if not, use the reg itself... Or something like that in the best
effort type of approach.

> I would need to look a lot more carefully to figure out whether this
> would need CFI_REL_OFFSET.
> 
> If we actually had a DWARF unwinder in the kernel, maybe we could have
> real test cases :-/

I don't think that's ever going to happen.

I'd say we do the CFI annotation on a best effort basis but not
sacrifice readability in the process. If it can't be annotated, well,
tough luck.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE
  2015-01-12 20:11         ` Borislav Petkov
@ 2015-01-12 20:14           ` Andy Lutomirski
  2015-01-12 20:22             ` H. Peter Anvin
  2015-01-12 20:32             ` Borislav Petkov
  0 siblings, 2 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-01-12 20:14 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Denys Vlasenko, linux-kernel, Linus Torvalds, Oleg Nesterov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Mon, Jan 12, 2015 at 12:11 PM, Borislav Petkov <bp@alien8.de> wrote:
> On Mon, Jan 12, 2015 at 11:46:53AM -0800, Andy Lutomirski wrote:
>> Dumb example:
>>
>>     pushq_cfi $__KERNEL_DS /* ss */
>>
>> This doesn't save anything that the unwinder would care about.
>
> And? The unwinder or whatever looks at that info simply ignores stuff it
> is not interested in, no?

But CFI_REL_OFFSET $__KERNEL_DS, 0 probably isn't even well-formed and
won't build.

>
>> Better example:
>>
>>     pushq_cfi \child_rip /* rip */
>>     CFI_REL_OFFSET    rip,0
>>
>> Doing this with a macro would need a fancier macro.
>
> I'd ask first whether we really need this at all.
>
>> Then there's crap like:
>>
>>     pushq_cfi %rdi
>>     SCHEDULE_USER
>>     popq_cfi %rdi
>
> I guess we can add a gas regname argument optional and if it is set, use
> it and if not, use the reg itself... Or something like that in the best
> effort type of approach.
>
>> I would need to look a lot more carefully to figure out whether this
>> would need CFI_REL_OFFSET.
>>
>> If we actually had a DWARF unwinder in the kernel, maybe we could have
>> real test cases :-/
>
> I don't think that's ever going to happen.
>
> I'd say we do the CFI annotation on a best effort basis but not
> sacrifice readability in the process. If it can't be annotated, well,
> tough luck.
>

This stuff is at least useful (in theory) for debugging with gdb.  And
I wouldn't mind an optional DWARF unwinder to get higher quality
backtraces.  Obviously any such thing would need to be quite robust.
I think SuSE has one.

> --
> Regards/Gruss,
>     Boris.
>
> Sent from a fat crate under my desk. Formatting is fine.
> --



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE
  2015-01-12 20:14           ` Andy Lutomirski
@ 2015-01-12 20:22             ` H. Peter Anvin
  2015-01-12 20:26               ` Andy Lutomirski
  2015-01-12 20:32             ` Borislav Petkov
  1 sibling, 1 reply; 130+ messages in thread
From: H. Peter Anvin @ 2015-01-12 20:22 UTC (permalink / raw)
  To: Andy Lutomirski, Borislav Petkov
  Cc: Denys Vlasenko, linux-kernel, Linus Torvalds, Oleg Nesterov,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook

On 01/12/2015 12:14 PM, Andy Lutomirski wrote:
> On Mon, Jan 12, 2015 at 12:11 PM, Borislav Petkov <bp@alien8.de> wrote:
>> On Mon, Jan 12, 2015 at 11:46:53AM -0800, Andy Lutomirski wrote:
>>> Dumb example:
>>>
>>>     pushq_cfi $__KERNEL_DS /* ss */
>>>
>>> This doesn't save anything that the unwinder would care about.
>>
>> And? The unwinder or whatever looks at that info simply ignores stuff it
>> is not interested in, no?
> 
> But CFI_REL_OFFSET $__KERNEL_DS, 0 probably isn't even well-formed and
> won't build.
> 

I think this is relatively easy to deal with at the expense of a large
.ifeq statement in the macro.

	-hpa



^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE
  2015-01-12 20:22             ` H. Peter Anvin
@ 2015-01-12 20:26               ` Andy Lutomirski
  2015-01-12 21:03                 ` Borislav Petkov
  2015-01-13 12:07                 ` Denys Vlasenko
  0 siblings, 2 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-01-12 20:26 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Borislav Petkov, Denys Vlasenko, linux-kernel, Linus Torvalds,
	Oleg Nesterov, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Mon, Jan 12, 2015 at 12:22 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 01/12/2015 12:14 PM, Andy Lutomirski wrote:
>> On Mon, Jan 12, 2015 at 12:11 PM, Borislav Petkov <bp@alien8.de> wrote:
>>> On Mon, Jan 12, 2015 at 11:46:53AM -0800, Andy Lutomirski wrote:
>>>> Dumb example:
>>>>
>>>>     pushq_cfi $__KERNEL_DS /* ss */
>>>>
>>>> This doesn't save anything that the unwinder would care about.
>>>
>>> And? The unwinder or whatever looks at that info simply ignores stuff it
>>> is not interested in, no?
>>
>> But CFI_REL_OFFSET $__KERNEL_DS, 0 probably isn't even well-formed and
>> won't build.
>>
>
> I think this is relatively easy to deal with at the expense of a large
> .ifeq statement in the macro.
>
>

Is the usage you have in mind something like:

pushq_cfi %r11 /* saves r11 */
pushq_cfi $0 /* saves nothing */
pushq_cfi %r11, savereg=rip /* saves rip */

I think I prefer the somewhat less magical:

pushq_reg_cfi %r11 /* saves r11 */
pushq_reg_cfi %r11, savereg=rip /* saves rip */
pushq_nounwind_cfi $0

Or something like that.

--Andy

         -hpa
>
>



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE
  2015-01-12 20:14           ` Andy Lutomirski
  2015-01-12 20:22             ` H. Peter Anvin
@ 2015-01-12 20:32             ` Borislav Petkov
  1 sibling, 0 replies; 130+ messages in thread
From: Borislav Petkov @ 2015-01-12 20:32 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, linux-kernel, Linus Torvalds, Oleg Nesterov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Mon, Jan 12, 2015 at 12:14:32PM -0800, Andy Lutomirski wrote:
> This stuff is at least useful (in theory) for debugging with gdb.  And

Bah, debugging the kernel with gdb, who does that?!

Real men stare at oopses and rebuild stack with pen and paper!

:-P

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE
  2015-01-12 20:26               ` Andy Lutomirski
@ 2015-01-12 21:03                 ` Borislav Petkov
  2015-01-13 12:07                 ` Denys Vlasenko
  1 sibling, 0 replies; 130+ messages in thread
From: Borislav Petkov @ 2015-01-12 21:03 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: H. Peter Anvin, Denys Vlasenko, linux-kernel, Linus Torvalds,
	Oleg Nesterov, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Mon, Jan 12, 2015 at 12:26:38PM -0800, Andy Lutomirski wrote:
> Is the usage you have in mind something like:
> 
> pushq_cfi %r11 /* saves r11 */
> pushq_cfi $0 /* saves nothing */
> pushq_cfi %r11, savereg=rip /* saves rip */
> 
> I think I prefer the somewhat less magical:
> 
> pushq_reg_cfi %r11 /* saves r11 */
> pushq_reg_cfi %r11, savereg=rip /* saves rip */
> pushq_nounwind_cfi $0

But that's two macro names and the above one is one single which does
it all. And it's not like we're not looking at the definition of macros
when staring at the code anyway...

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE
  2015-01-12 20:26               ` Andy Lutomirski
  2015-01-12 21:03                 ` Borislav Petkov
@ 2015-01-13 12:07                 ` Denys Vlasenko
  1 sibling, 0 replies; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-13 12:07 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: H. Peter Anvin, Borislav Petkov, Denys Vlasenko, linux-kernel,
	Linus Torvalds, Oleg Nesterov, Frederic Weisbecker, X86 ML,
	Alexei Starovoitov, Will Drewry, Kees Cook

On Mon, Jan 12, 2015 at 9:26 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> I think I prefer the somewhat less magical:
>
> pushq_reg_cfi %r11 /* saves r11 */
> pushq_reg_cfi %r11, savereg=rip /* saves rip */
> pushq_nounwind_cfi $0
>
> Or something like that.

That's very close to my patch. With it:

pushq_cfi     - doesn't emit "register is saved" annotation, can be
used with non-register args.
pushq_cfi_reg - emits it

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/4] x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET
  2015-01-10 22:00 ` [PATCH 2/4] x86: ia32entry.S: fix wrong symbolic constant usage: R11->ARGOFFSET Denys Vlasenko
  2015-01-10 22:13   ` Andy Lutomirski
@ 2015-01-13 22:11   ` Andy Lutomirski
  1 sibling, 0 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-01-13 22:11 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: linux-kernel, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Borislav Petkov, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Sat, Jan 10, 2015 at 2:00 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> The values of these two constants are the same, the meaning is different.
>
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> Acked-by: Borislav Petkov <bp@suse.de>
> CC: Linus Torvalds <torvalds@linux-foundation.org>
> CC: Oleg Nesterov <oleg@redhat.com>
> CC: "H. Peter Anvin" <hpa@zytor.com>
> CC: Borislav Petkov <bp@alien8.de>
> CC: Andy Lutomirski <luto@amacapital.net>
> CC: Frederic Weisbecker <fweisbec@gmail.com>
> CC: X86 ML <x86@kernel.org>
> CC: Alexei Starovoitov <ast@plumgrid.com>
> CC: Will Drewry <wad@chromium.org>
> CC: Kees Cook <keescook@chromium.org>
> CC: linux-kernel@vger.kernel.org
> ---
>  arch/x86/ia32/ia32entry.S | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
> index 82e8a1d..156ebca 100644
> --- a/arch/x86/ia32/ia32entry.S
> +++ b/arch/x86/ia32/ia32entry.S
> @@ -179,8 +179,8 @@ sysenter_dispatch:
>  sysexit_from_sys_call:
>         andl    $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
>         /* clear IF, that popfq doesn't enable interrupts early */
> -       andl  $~0x200,EFLAGS-R11(%rsp)
> -       movl    RIP-R11(%rsp),%edx              /* User %eip */
> +       andl    $~0x200,EFLAGS-ARGOFFSET(%rsp)
> +       movl    RIP-ARGOFFSET(%rsp),%edx                /* User %eip */
>         CFI_REGISTER rip,rdx
>         RESTORE_ARGS 0,24,0,0,0,0
>         xorq    %r8,%r8
> --
> 1.8.1.4
>

Applied.

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 4/4] x86: entry_64.S: fold SAVE_ARGS_IRQ macro into its sole user
  2015-01-10 22:00 ` [PATCH 4/4] x86: entry_64.S: fold SAVE_ARGS_IRQ macro into its sole user Denys Vlasenko
@ 2015-01-13 22:26   ` Andy Lutomirski
  0 siblings, 0 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-01-13 22:26 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: linux-kernel, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Borislav Petkov, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Sat, Jan 10, 2015 at 2:00 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> No code changes.
>
> This is a preparatory patch for change in "struct pt_regs" handling.
>
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Linus Torvalds <torvalds@linux-foundation.org>
> CC: Oleg Nesterov <oleg@redhat.com>
> CC: "H. Peter Anvin" <hpa@zytor.com>
> CC: Borislav Petkov <bp@alien8.de>
> CC: Andy Lutomirski <luto@amacapital.net>
> CC: Frederic Weisbecker <fweisbec@gmail.com>
> CC: X86 ML <x86@kernel.org>
> CC: Alexei Starovoitov <ast@plumgrid.com>
> CC: Will Drewry <wad@chromium.org>
> CC: Kees Cook <keescook@chromium.org>
> CC: linux-kernel@vger.kernel.org

Applied.

I have *not* applied patch 3 or the new restore patch, since I got
lost in the patch attachments and long discussion.  Can you post
incremental patches on top of:

https://git.kernel.org/cgit/linux/kernel/git/luto/linux.git/log/?h=x86/entry

Thanks,
Andy

^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs"
@ 2015-01-14 21:48 Denys Vlasenko
  2015-01-14 21:48 ` [PATCH 02/11] x86: code shrink in paranoid_exit Denys Vlasenko
                   ` (11 more replies)
  0 siblings, 12 replies; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-14 21:48 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, linux-kernel

64-bit code was using six stack slots less by not saving/restoring
registers which are callee-preserved according to C ABI,
and not allocating space for them.
Only when syscall needed a complete "struct pt_regs",
the complete area was allocated and filled in.
As an additional twist, on interrupt entry a "slightly less truncated pt_regs"
trick is used, to make nested interrupt stacks easier to unwind.

This proved to be a source of significant obfuscation and subtle bugs.
For example, stub_fork had to pop the return address,
extend the struct, save registers, and push return address back. Ugly.
ia32_ptregs_common pops return address and "returns" via jmp insn,
throwing a wrench into CPU return stack cache.

This patch changes code to always allocate a complete "struct pt_regs".
The saving of registers is still done lazily.

"Partial pt_regs" trick on interrupt stack is retained, but with added comments
which explain what we are doing, and why. Existing comments are improved.

Macros which manipulate "struct pt_regs" on stack are reworked:
ALLOC_PT_GPREGS_ON_STACK allocates the structure.
SAVE_C_REGS saves to it those registers which are clobbered by C code.
SAVE_EXTRA_REGS saves to it all other registers.
Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros reverse it.

ia32_ptregs_common, stub_fork and friends lost their ugly dance with
return pointer.

LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
instead of magic numbers.

error_entry and save_paranoid now use SAVE_C_REGS + SAVE_EXTRA_REGS
instead of having it open-coded yet again.

Misleading and slightly wrong comments in "struct pt_regs" are fixed
(four instances).

Patch was run-tested: 64-bit executables, 32-bit executables,
strace works.
Timing tests did not show measurable difference in 32-bit
and 64-bit syscalls.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/ia32/ia32entry.S              |  47 +++----
 arch/x86/include/asm/calling.h         | 222 ++++++++++++++++-----------------
 arch/x86/include/asm/irqflags.h        |   4 +-
 arch/x86/include/asm/ptrace.h          |  13 +-
 arch/x86/include/uapi/asm/ptrace-abi.h |  16 ++-
 arch/x86/include/uapi/asm/ptrace.h     |  13 +-
 arch/x86/kernel/entry_64.S             | 210 +++++++++++++------------------
 7 files changed, 250 insertions(+), 275 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 156ebca..f4bed49 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -62,12 +62,12 @@
 	 */
 	.macro LOAD_ARGS32 offset, _r9=0
 	.if \_r9
-	movl \offset+16(%rsp),%r9d
+	movl \offset+R9(%rsp),%r9d
 	.endif
-	movl \offset+40(%rsp),%ecx
-	movl \offset+48(%rsp),%edx
-	movl \offset+56(%rsp),%esi
-	movl \offset+64(%rsp),%edi
+	movl \offset+RCX(%rsp),%ecx
+	movl \offset+RDX(%rsp),%edx
+	movl \offset+RSI(%rsp),%esi
+	movl \offset+RDI(%rsp),%edi
 	movl %eax,%eax			/* zero extension */
 	.endm
 	
@@ -144,7 +144,8 @@ ENTRY(ia32_sysenter_target)
 	CFI_REL_OFFSET rip,0
 	pushq_cfi %rax
 	cld
-	SAVE_ARGS 0,1,0
+	ALLOC_PT_GPREGS_ON_STACK
+	SAVE_C_REGS_EXCEPT_R891011
  	/* no need to do an access_ok check here because rbp has been
  	   32bit zero extended */ 
 	ASM_STAC
@@ -182,7 +183,8 @@ sysexit_from_sys_call:
 	andl	$~0x200,EFLAGS-ARGOFFSET(%rsp)
 	movl	RIP-ARGOFFSET(%rsp),%edx		/* User %eip */
 	CFI_REGISTER rip,rdx
-	RESTORE_ARGS 0,24,0,0,0,0
+	RESTORE_RSI_RDI
+	REMOVE_PT_GPREGS_FROM_STACK 3*8
 	xorq	%r8,%r8
 	xorq	%r9,%r9
 	xorq	%r10,%r10
@@ -256,13 +258,13 @@ sysenter_tracesys:
 	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jz	sysenter_auditsys
 #endif
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	CLEAR_RREGS
 	movq	$-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
 	movq	%rsp,%rdi        /* &pt_regs -> arg1 */
 	call	syscall_trace_enter
 	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	cmpq	$(IA32_NR_syscalls-1),%rax
 	ja	int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
 	jmp	sysenter_do_call
@@ -304,7 +306,8 @@ ENTRY(ia32_cstar_target)
 	 * disabled irqs and here we enable it straight after entry:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_ARGS 8,0,0
+	ALLOC_PT_GPREGS_ON_STACK 8
+	SAVE_C_REGS_EXCEPT_RCX_R891011
 	movl 	%eax,%eax	/* zero extension */
 	movq	%rax,ORIG_RAX-ARGOFFSET(%rsp)
 	movq	%rcx,RIP-ARGOFFSET(%rsp)
@@ -341,7 +344,7 @@ cstar_dispatch:
 	jnz sysretl_audit
 sysretl_from_sys_call:
 	andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	RESTORE_ARGS 0,-ARG_SKIP,0,0,0
+	RESTORE_RSI_RDI_RDX
 	movl RIP-ARGOFFSET(%rsp),%ecx
 	CFI_REGISTER rip,rcx
 	movl EFLAGS-ARGOFFSET(%rsp),%r11d	
@@ -372,13 +375,13 @@ cstar_tracesys:
 	jz cstar_auditsys
 #endif
 	xchgl %r9d,%ebp
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	CLEAR_RREGS 0, r9
 	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
 	LOAD_ARGS32 ARGOFFSET, 1  /* reload args from stack in case ptrace changed it */
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	xchgl %ebp,%r9d
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
@@ -433,7 +436,8 @@ ENTRY(ia32_syscall)
 	cld
 	/* note the registers are not zero extended to the sf.
 	   this could be a problem. */
-	SAVE_ARGS 0,1,0
+	ALLOC_PT_GPREGS_ON_STACK
+	SAVE_C_REGS_EXCEPT_R891011
 	orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jnz ia32_tracesys
@@ -446,16 +450,16 @@ ia32_sysret:
 	movq %rax,RAX-ARGOFFSET(%rsp)
 ia32_ret_from_sys_call:
 	CLEAR_RREGS -ARGOFFSET
-	jmp int_ret_from_sys_call 
+	jmp int_ret_from_sys_call
 
-ia32_tracesys:			 
-	SAVE_REST
+ia32_tracesys:
+	SAVE_EXTRA_REGS
 	CLEAR_RREGS
 	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
 	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja  int_ret_from_sys_call	/* ia32_tracesys has set RAX(%rsp) */
 	jmp ia32_do_call
@@ -492,7 +496,6 @@ GLOBAL(stub32_clone)
 
 	ALIGN
 ia32_ptregs_common:
-	popq %r11
 	CFI_ENDPROC
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
@@ -507,9 +510,9 @@ ia32_ptregs_common:
 /*	CFI_REL_OFFSET	rflags,EFLAGS-ARGOFFSET*/
 	CFI_REL_OFFSET	rsp,RSP-ARGOFFSET
 /*	CFI_REL_OFFSET	ss,SS-ARGOFFSET*/
-	SAVE_REST
+	SAVE_EXTRA_REGS 8
 	call *%rax
-	RESTORE_REST
-	jmp  ia32_sysret	/* misbalances the return cache */
+	RESTORE_EXTRA_REGS 8
+	ret
 	CFI_ENDPROC
 END(ia32_ptregs_common)
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 1f1297b..a2ef97a 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -55,143 +55,137 @@ For 32-bit we have the following conventions - kernel is built with
  * for assembly code:
  */
 
-#define R15		  0
-#define R14		  8
-#define R13		 16
-#define R12		 24
-#define RBP		 32
-#define RBX		 40
-
-/* arguments: interrupts/non tracing syscalls only save up to here: */
-#define R11		 48
-#define R10		 56
-#define R9		 64
-#define R8		 72
-#define RAX		 80
-#define RCX		 88
-#define RDX		 96
-#define RSI		104
-#define RDI		112
-#define ORIG_RAX	120       /* + error_code */
-/* end of arguments */
-
-/* cpu exception frame or undefined in case of fast syscall: */
-#define RIP		128
-#define CS		136
-#define EFLAGS		144
-#define RSP		152
-#define SS		160
-
-#define ARGOFFSET	R11
-
-	.macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
-	subq  $9*8+\addskip, %rsp
-	CFI_ADJUST_CFA_OFFSET	9*8+\addskip
-	movq_cfi rdi, 8*8
-	movq_cfi rsi, 7*8
-	movq_cfi rdx, 6*8
-
-	.if \save_rcx
-	movq_cfi rcx, 5*8
-	.endif
+/* The layout forms the "struct pt_regs" on the stack: */
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
+#define R15		0*8
+#define R14		1*8
+#define R13		2*8
+#define R12		3*8
+#define RBP		4*8
+#define RBX		5*8
+/* These regs are callee-clobbered. Always saved on kernel entry. */
+#define R11		6*8
+#define R10		7*8
+#define R9		8*8
+#define R8		9*8
+#define RAX		10*8
+#define RCX		11*8
+#define RDX		12*8
+#define RSI		13*8
+#define RDI		14*8
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
+#define ORIG_RAX	15*8
+/* Return frame for iretq */
+#define RIP		16*8
+#define CS		17*8
+#define EFLAGS		18*8
+#define RSP		19*8
+#define SS		20*8
+
+#define ARGOFFSET	0
+
+	.macro ALLOC_PT_GPREGS_ON_STACK addskip=0
+	subq	$15*8+\addskip, %rsp
+	CFI_ADJUST_CFA_OFFSET 15*8+\addskip
+	.endm
 
-	.if \rax_enosys
-	movq $-ENOSYS, 4*8(%rsp)
-	.else
-	movq_cfi rax, 4*8
+	.macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8plus=1
+	.if \r8plus
+	movq_cfi r11, 6*8+\offset
+	movq_cfi r10, 7*8+\offset
+	movq_cfi r9,  8*8+\offset
+	movq_cfi r8,  9*8+\offset
 	.endif
-
-	.if \save_r891011
-	movq_cfi r8,  3*8
-	movq_cfi r9,  2*8
-	movq_cfi r10, 1*8
-	movq_cfi r11, 0*8
+	.if \rax
+	movq_cfi rax, 10*8+\offset
+	.endif
+	.if \rcx
+	movq_cfi rcx, 11*8+\offset
 	.endif
+	movq_cfi rdx, 12*8+\offset
+	movq_cfi rsi, 13*8+\offset
+	movq_cfi rdi, 14*8+\offset
+	.endm
+	.macro SAVE_C_REGS offset=0
+	SAVE_C_REGS_HELPER \offset, 1, 1, 1
+	.endm
+	.macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
+	SAVE_C_REGS_HELPER \offset, 0, 0, 1
+	.endm
+	.macro SAVE_C_REGS_EXCEPT_R891011
+	SAVE_C_REGS_HELPER 0, 1, 1, 0
+	.endm
+	.macro SAVE_C_REGS_EXCEPT_RCX_R891011
+	SAVE_C_REGS_HELPER 0, 1, 0, 0
+	.endm
 
+	.macro SAVE_EXTRA_REGS offset=0
+	movq_cfi r15, 0*8+\offset
+	movq_cfi r14, 1*8+\offset
+	movq_cfi r13, 2*8+\offset
+	movq_cfi r12, 3*8+\offset
+	movq_cfi rbp, 4*8+\offset
+	movq_cfi rbx, 5*8+\offset
+	.endm
+	.macro SAVE_EXTRA_REGS_RBP offset=0
+	movq_cfi rbp, 4*8+\offset
 	.endm
 
-#define ARG_SKIP	(9*8)
+	.macro RESTORE_EXTRA_REGS offset=0
+	movq_cfi_restore 0*8+\offset, r15
+	movq_cfi_restore 1*8+\offset, r14
+	movq_cfi_restore 2*8+\offset, r13
+	movq_cfi_restore 3*8+\offset, r12
+	movq_cfi_restore 4*8+\offset, rbp
+	movq_cfi_restore 5*8+\offset, rbx
+	.endm
 
-	.macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \
-			    rstor_r8910=1, rstor_rdx=1
+	.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
 	.if \rstor_r11
-	movq_cfi_restore 0*8, r11
+	movq_cfi_restore 6*8, r11
 	.endif
-
 	.if \rstor_r8910
-	movq_cfi_restore 1*8, r10
-	movq_cfi_restore 2*8, r9
-	movq_cfi_restore 3*8, r8
+	movq_cfi_restore 7*8, r10
+	movq_cfi_restore 8*8, r9
+	movq_cfi_restore 9*8, r8
 	.endif
-
 	.if \rstor_rax
-	movq_cfi_restore 4*8, rax
+	movq_cfi_restore 10*8, rax
 	.endif
-
 	.if \rstor_rcx
-	movq_cfi_restore 5*8, rcx
+	movq_cfi_restore 11*8, rcx
 	.endif
-
 	.if \rstor_rdx
-	movq_cfi_restore 6*8, rdx
-	.endif
-
-	movq_cfi_restore 7*8, rsi
-	movq_cfi_restore 8*8, rdi
-
-	.if ARG_SKIP+\addskip > 0
-	addq $ARG_SKIP+\addskip, %rsp
-	CFI_ADJUST_CFA_OFFSET	-(ARG_SKIP+\addskip)
+	movq_cfi_restore 12*8, rdx
 	.endif
+	movq_cfi_restore 13*8, rsi
+	movq_cfi_restore 14*8, rdi
 	.endm
-
-	.macro LOAD_ARGS offset, skiprax=0
-	movq \offset(%rsp),    %r11
-	movq \offset+8(%rsp),  %r10
-	movq \offset+16(%rsp), %r9
-	movq \offset+24(%rsp), %r8
-	movq \offset+40(%rsp), %rcx
-	movq \offset+48(%rsp), %rdx
-	movq \offset+56(%rsp), %rsi
-	movq \offset+64(%rsp), %rdi
-	.if \skiprax
-	.else
-	movq \offset+72(%rsp), %rax
-	.endif
+	.macro RESTORE_C_REGS
+	RESTORE_C_REGS_HELPER 1,1,1,1,1
 	.endm
-
-#define REST_SKIP	(6*8)
-
-	.macro SAVE_REST
-	subq $REST_SKIP, %rsp
-	CFI_ADJUST_CFA_OFFSET	REST_SKIP
-	movq_cfi rbx, 5*8
-	movq_cfi rbp, 4*8
-	movq_cfi r12, 3*8
-	movq_cfi r13, 2*8
-	movq_cfi r14, 1*8
-	movq_cfi r15, 0*8
+	.macro RESTORE_C_REGS_EXCEPT_RAX
+	RESTORE_C_REGS_HELPER 0,1,1,1,1
 	.endm
-
-	.macro RESTORE_REST
-	movq_cfi_restore 0*8, r15
-	movq_cfi_restore 1*8, r14
-	movq_cfi_restore 2*8, r13
-	movq_cfi_restore 3*8, r12
-	movq_cfi_restore 4*8, rbp
-	movq_cfi_restore 5*8, rbx
-	addq $REST_SKIP, %rsp
-	CFI_ADJUST_CFA_OFFSET	-(REST_SKIP)
+	.macro RESTORE_C_REGS_EXCEPT_RCX
+	RESTORE_C_REGS_HELPER 1,0,1,1,1
 	.endm
-
-	.macro SAVE_ALL
-	SAVE_ARGS
-	SAVE_REST
+	.macro RESTORE_RSI_RDI
+	RESTORE_C_REGS_HELPER 0,0,0,0,0
+	.endm
+	.macro RESTORE_RSI_RDI_RDX
+	RESTORE_C_REGS_HELPER 0,0,0,0,1
 	.endm
 
-	.macro RESTORE_ALL addskip=0
-	RESTORE_REST
-	RESTORE_ARGS 1, \addskip
+	.macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
+	addq $15*8+\addskip, %rsp
+	CFI_ADJUST_CFA_OFFSET -(15*8+\addskip)
 	.endm
 
 	.macro icebp
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 0a8b519..021bee9 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -171,9 +171,9 @@ static inline int arch_irqs_disabled(void)
 #define ARCH_LOCKDEP_SYS_EXIT_IRQ	\
 	TRACE_IRQS_ON; \
 	sti; \
-	SAVE_REST; \
+	SAVE_EXTRA_REGS; \
 	LOCKDEP_SYS_EXIT; \
-	RESTORE_REST; \
+	RESTORE_EXTRA_REGS; \
 	cli; \
 	TRACE_IRQS_OFF;
 
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 86fc2bb..4077d96 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -31,13 +31,17 @@ struct pt_regs {
 #else /* __i386__ */
 
 struct pt_regs {
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
 	unsigned long r15;
 	unsigned long r14;
 	unsigned long r13;
 	unsigned long r12;
 	unsigned long bp;
 	unsigned long bx;
-/* arguments: non interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
 	unsigned long r11;
 	unsigned long r10;
 	unsigned long r9;
@@ -47,9 +51,12 @@ struct pt_regs {
 	unsigned long dx;
 	unsigned long si;
 	unsigned long di;
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
 	unsigned long orig_ax;
-/* end of arguments */
-/* cpu exception frame or undefined */
+/* Return frame for iretq */
 	unsigned long ip;
 	unsigned long cs;
 	unsigned long flags;
diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h
index 7b0a55a..580aee3 100644
--- a/arch/x86/include/uapi/asm/ptrace-abi.h
+++ b/arch/x86/include/uapi/asm/ptrace-abi.h
@@ -25,13 +25,17 @@
 #else /* __i386__ */
 
 #if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
 #define R15 0
 #define R14 8
 #define R13 16
 #define R12 24
 #define RBP 32
 #define RBX 40
-/* arguments: interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
 #define R11 48
 #define R10 56
 #define R9 64
@@ -41,15 +45,17 @@
 #define RDX 96
 #define RSI 104
 #define RDI 112
-#define ORIG_RAX 120       /* = ERROR */
-/* end of arguments */
-/* cpu exception frame or undefined in case of fast syscall. */
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
+#define ORIG_RAX 120
+/* Return frame for iretq */
 #define RIP 128
 #define CS 136
 #define EFLAGS 144
 #define RSP 152
 #define SS 160
-#define ARGOFFSET R11
 #endif /* __ASSEMBLY__ */
 
 /* top of stack page */
diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h
index ac4b9aa..bc16115 100644
--- a/arch/x86/include/uapi/asm/ptrace.h
+++ b/arch/x86/include/uapi/asm/ptrace.h
@@ -41,13 +41,17 @@ struct pt_regs {
 #ifndef __KERNEL__
 
 struct pt_regs {
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
 	unsigned long r15;
 	unsigned long r14;
 	unsigned long r13;
 	unsigned long r12;
 	unsigned long rbp;
 	unsigned long rbx;
-/* arguments: non interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
 	unsigned long r11;
 	unsigned long r10;
 	unsigned long r9;
@@ -57,9 +61,12 @@ struct pt_regs {
 	unsigned long rdx;
 	unsigned long rsi;
 	unsigned long rdi;
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
 	unsigned long orig_rax;
-/* end of arguments */
-/* cpu exception frame or undefined */
+/* Return frame for iretq */
 	unsigned long rip;
 	unsigned long cs;
 	unsigned long eflags;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 7d59df2..a21b5b3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -14,9 +14,6 @@
  * NOTE: This code handles signal-recognition, which happens every time
  * after an interrupt and after each system call.
  *
- * Normal syscalls and interrupts don't save a full stack frame, this is
- * only done for syscall tracing, signals or fork/exec et.al.
- *
  * A note on terminology:
  * - top of stack: Architecture defined interrupt frame from SS to RIP
  * at the top of the kernel process stack.
@@ -26,12 +23,6 @@
  * Some macro usage:
  * - CFI macros are used to generate dwarf2 unwind information for better
  * backtraces. They don't change any code.
- * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
- * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
- * There are unfortunately lots of special cases where some registers
- * not touched. The macro is a big mess that should be cleaned up.
- * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
- * Gives a full stack frame.
  * - ENTRY/END Define functions in the symbol table.
  * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  * frame that is otherwise undefined after a SYSCALL
@@ -156,7 +147,7 @@ ENDPROC(native_usergs_sysret64)
 	.endm
 
 /*
- * initial frame state for interrupts (and exceptions without error code)
+ * empty frame
  */
 	.macro EMPTY_FRAME start=1 offset=0
 	.if \start
@@ -189,9 +180,9 @@ ENDPROC(native_usergs_sysret64)
 	.endm
 
 /*
- * frame that enables calling into C.
+ * frame that enables passing a complete pt_regs to a C function.
  */
-	.macro PARTIAL_FRAME start=1 offset=0
+	.macro DEFAULT_FRAME start=1 offset=0
 	XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
 	CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
 	CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
@@ -202,13 +193,6 @@ ENDPROC(native_usergs_sysret64)
 	CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
 	CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
 	CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
-	.endm
-
-/*
- * frame that enables passing a complete pt_regs to a C function.
- */
-	.macro DEFAULT_FRAME start=1 offset=0
-	PARTIAL_FRAME \start, R11+\offset-R15
 	CFI_REL_OFFSET rbx, RBX+\offset
 	CFI_REL_OFFSET rbp, RBP+\offset
 	CFI_REL_OFFSET r12, R12+\offset
@@ -220,21 +204,8 @@ ENDPROC(native_usergs_sysret64)
 ENTRY(save_paranoid)
 	XCPT_FRAME 1 RDI+8
 	cld
-	movq %rdi, RDI+8(%rsp)
-	movq %rsi, RSI+8(%rsp)
-	movq_cfi rdx, RDX+8
-	movq_cfi rcx, RCX+8
-	movq_cfi rax, RAX+8
-	movq %r8, R8+8(%rsp)
-	movq %r9, R9+8(%rsp)
-	movq %r10, R10+8(%rsp)
-	movq %r11, R11+8(%rsp)
-	movq_cfi rbx, RBX+8
-	movq %rbp, RBP+8(%rsp)
-	movq %r12, R12+8(%rsp)
-	movq %r13, R13+8(%rsp)
-	movq %r14, R14+8(%rsp)
-	movq %r15, R15+8(%rsp)
+	SAVE_C_REGS 8
+	SAVE_EXTRA_REGS 8
 	movl $1,%ebx
 	movl $MSR_GS_BASE,%ecx
 	rdmsr
@@ -263,7 +234,7 @@ ENTRY(ret_from_fork)
 
 	GET_THREAD_INFO(%rcx)
 
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 
 	testl $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
 	jz   1f
@@ -275,12 +246,10 @@ ENTRY(ret_from_fork)
 	jmp ret_from_sys_call			# go to the SYSRET fastpath
 
 1:
-	subq $REST_SKIP, %rsp	# leave space for volatiles
-	CFI_ADJUST_CFA_OFFSET	REST_SKIP
 	movq %rbp, %rdi
 	call *%rbx
 	movl $0, RAX(%rsp)
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(ret_from_fork)
@@ -338,9 +307,11 @@ GLOBAL(system_call_after_swapgs)
 	 * and short:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_ARGS 8, 0, rax_enosys=1
+	ALLOC_PT_GPREGS_ON_STACK 8
+	SAVE_C_REGS_EXCEPT_RAX_RCX
+	movq	$-ENOSYS,RAX-ARGOFFSET(%rsp)
 	movq_cfi rax,(ORIG_RAX-ARGOFFSET)
-	movq  %rcx,RIP-ARGOFFSET(%rsp)
+	movq	%rcx,RIP-ARGOFFSET(%rsp)
 	CFI_REL_OFFSET rip,RIP-ARGOFFSET
 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jnz tracesys
@@ -374,9 +345,9 @@ sysret_check:
 	 * sysretq will re-enable interrupts:
 	 */
 	TRACE_IRQS_ON
+	RESTORE_C_REGS_EXCEPT_RCX
 	movq RIP-ARGOFFSET(%rsp),%rcx
 	CFI_REGISTER	rip,rcx
-	RESTORE_ARGS 1,-ARG_SKIP,0
 	/*CFI_REGISTER	rflags,r11*/
 	movq	PER_CPU_VAR(old_rsp), %rsp
 	USERGS_SYSRET64
@@ -428,16 +399,16 @@ sysret_audit:
 
 	/* Do syscall tracing */
 tracesys:
-	leaq -REST_SKIP(%rsp), %rdi
+	movq %rsp, %rdi
 	movq $AUDIT_ARCH_X86_64, %rsi
 	call syscall_trace_enter_phase1
 	test %rax, %rax
 	jnz tracesys_phase2		/* if needed, run the slow path */
-	LOAD_ARGS 0			/* else restore clobbered regs */
+	RESTORE_C_REGS			/* else restore clobbered regs */
 	jmp system_call_fastpath	/*      and return to the fast path */
 
 tracesys_phase2:
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %rdi
 	movq %rsp, %rdi
 	movq $AUDIT_ARCH_X86_64, %rsi
@@ -445,12 +416,12 @@ tracesys_phase2:
 	call syscall_trace_enter_phase2
 
 	/*
-	 * Reload arg registers from stack in case ptrace changed them.
+	 * Reload registers from stack in case ptrace changed them.
 	 * We don't reload %rax because syscall_trace_entry_phase2() returned
 	 * the value it wants us to use in the table lookup.
 	 */
-	LOAD_ARGS ARGOFFSET, 1
-	RESTORE_REST
+	RESTORE_C_REGS_EXCEPT_RAX
+	RESTORE_EXTRA_REGS
 #if __SYSCALL_MASK == ~0
 	cmpq $__NR_syscall_max,%rax
 #else
@@ -501,7 +472,7 @@ int_very_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
 int_check_syscall_exit_work:
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	/* Check for syscall exit trace */
 	testl $_TIF_WORK_SYSCALL_EXIT,%edx
 	jz int_signal
@@ -520,7 +491,7 @@ int_signal:
 	call do_notify_resume
 1:	movl $_TIF_WORK_MASK,%edi
 int_restore_rest:
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	jmp int_with_check
@@ -530,15 +501,12 @@ END(system_call)
 	.macro FORK_LIKE func
 ENTRY(stub_\func)
 	CFI_STARTPROC
-	popq	%r11			/* save return address */
-	PARTIAL_FRAME 0
-	SAVE_REST
-	pushq	%r11			/* put it back on stack */
+	DEFAULT_FRAME 0, 8		/* offset 8: return address */
+	SAVE_EXTRA_REGS 8
 	FIXUP_TOP_OF_STACK %r11, 8
-	DEFAULT_FRAME 0 8		/* offset 8: return address */
 	call sys_\func
 	RESTORE_TOP_OF_STACK %r11, 8
-	ret $REST_SKIP		/* pop extended registers */
+	ret
 	CFI_ENDPROC
 END(stub_\func)
 	.endm
@@ -546,7 +514,7 @@ END(stub_\func)
 	.macro FIXED_FRAME label,func
 ENTRY(\label)
 	CFI_STARTPROC
-	PARTIAL_FRAME 0 8		/* offset 8: return address */
+	DEFAULT_FRAME 0, 8		/* offset 8: return address */
 	FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET
 	call \func
 	RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET
@@ -563,12 +531,12 @@ END(\label)
 ENTRY(stub_execve)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call sys_execve
 	movq %rax,RAX(%rsp)
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_execve)
@@ -576,13 +544,13 @@ END(stub_execve)
 ENTRY(stub_execveat)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call sys_execveat
 	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_execveat)
@@ -594,12 +562,12 @@ END(stub_execveat)
 ENTRY(stub_rt_sigreturn)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call sys_rt_sigreturn
 	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_rt_sigreturn)
@@ -608,12 +576,12 @@ END(stub_rt_sigreturn)
 ENTRY(stub_x32_rt_sigreturn)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call sys32_x32_rt_sigreturn
 	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_x32_rt_sigreturn)
@@ -621,13 +589,13 @@ END(stub_x32_rt_sigreturn)
 ENTRY(stub_x32_execve)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call compat_sys_execve
 	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_x32_execve)
@@ -635,13 +603,13 @@ END(stub_x32_execve)
 ENTRY(stub_x32_execveat)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call compat_sys_execveat
 	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_x32_execveat)
@@ -697,42 +665,36 @@ END(interrupt)
 
 /* 0(%rsp): ~(interrupt number) */
 	.macro interrupt func
-	/* reserve pt_regs for scratch regs and rbp */
-	subq $ORIG_RAX-RBP, %rsp
-	CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
 	cld
-	/* start from rbp in pt_regs and jump over */
-	movq_cfi rdi, (RDI-RBP)
-	movq_cfi rsi, (RSI-RBP)
-	movq_cfi rdx, (RDX-RBP)
-	movq_cfi rcx, (RCX-RBP)
-	movq_cfi rax, (RAX-RBP)
-	movq_cfi  r8,  (R8-RBP)
-	movq_cfi  r9,  (R9-RBP)
-	movq_cfi r10, (R10-RBP)
-	movq_cfi r11, (R11-RBP)
-
-	/* Save rbp so that we can unwind from get_irq_regs() */
-	movq_cfi rbp, 0
-
-	/* Save previous stack value */
-	movq %rsp, %rsi
+	/*
+	 * Since nothing in interrupt handling code touches r12...r15 members
+	 * of "struct pt_regs", and since interrupts can nest, we can save
+	 * four stack slots and simultaneously provide
+	 * an unwind-friendly stack layout by saving "truncated" pt_regs
+	 * exactly up to rbp slot, without these members.
+	 */
+	ALLOC_PT_GPREGS_ON_STACK -RBP
+	SAVE_C_REGS -RBP
+	/* this goes to 0(%rsp) for unwinder, not for saving the value: */
+	SAVE_EXTRA_REGS_RBP -RBP
 
-	leaq -RBP(%rsp),%rdi	/* arg1 for handler */
-	testl $3, CS-RBP(%rsi)
+	leaq -RBP(%rsp),%rdi	/* arg1 for \func (pointer to pt_regs) */
+
+	testl $3, CS-RBP(%rsp)
 	je 1f
 	SWAPGS
+1:
 	/*
+	 * Save previous stack pointer, optionally switch to interrupt stack.
 	 * irq_count is used to check if a CPU is already on an interrupt stack
 	 * or not. While this is essentially redundant with preempt_count it is
 	 * a little cheaper to use a separate counter in the PDA (short of
 	 * moving irq_enter into assembly, which would be too much work)
 	 */
-1:	incl PER_CPU_VAR(irq_count)
+	movq %rsp, %rsi
+	incl PER_CPU_VAR(irq_count)
 	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
 	CFI_DEF_CFA_REGISTER	rsi
-
-	/* Store previous stack value */
 	pushq %rsi
 	CFI_ESCAPE	0x0f /* DW_CFA_def_cfa_expression */, 6, \
 			0x77 /* DW_OP_breg7 */, 0, \
@@ -764,6 +726,7 @@ ret_from_intr:
 	/* Restore saved previous stack */
 	popq %rsi
 	CFI_DEF_CFA rsi,SS+8-RBP	/* reg/off reset after def_cfa_expr */
+	/* return code expects complete pt_regs - adjust rsp accordingly: */
 	leaq ARGOFFSET-RBP(%rsi), %rsp
 	CFI_DEF_CFA_REGISTER	rsp
 	CFI_ADJUST_CFA_OFFSET	RBP-ARGOFFSET
@@ -775,7 +738,7 @@ exit_intr:
 
 	/* Interrupt came from user space */
 	/*
-	 * Has a correct top of stack, but a partial stack frame
+	 * Has a correct top of stack.
 	 * %rcx: thread info. Interrupts off.
 	 */
 retint_with_reschedule:
@@ -803,7 +766,8 @@ retint_restore_args:	/* return to kernel space */
 	 */
 	TRACE_IRQS_IRETQ
 restore_args:
-	RESTORE_ARGS 1,8,1
+	RESTORE_C_REGS
+	REMOVE_PT_GPREGS_FROM_STACK 8
 
 irq_return:
 	INTERRUPT_RETURN
@@ -874,12 +838,12 @@ retint_signal:
 	jz    retint_swapgs
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	movq $-1,ORIG_RAX(%rsp)
 	xorl %esi,%esi		# oldset
 	movq %rsp,%rdi		# &pt_regs
 	call do_notify_resume
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)
@@ -1006,8 +970,7 @@ ENTRY(\sym)
 	pushq_cfi $-1			/* ORIG_RAX: no syscall to restart */
 	.endif
 
-	subq $ORIG_RAX-R15, %rsp
-	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+	ALLOC_PT_GPREGS_ON_STACK
 
 	.if \paranoid
 	.if \paranoid == 1
@@ -1253,7 +1216,9 @@ ENTRY(xen_failsafe_callback)
 	addq $0x30,%rsp
 	CFI_ADJUST_CFA_OFFSET -0x30
 	pushq_cfi $-1 /* orig_ax = -1 => not a system call */
-	SAVE_ALL
+	ALLOC_PT_GPREGS_ON_STACK
+	SAVE_C_REGS
+	SAVE_EXTRA_REGS
 	jmp error_exit
 	CFI_ENDPROC
 END(xen_failsafe_callback)
@@ -1305,11 +1270,15 @@ ENTRY(paranoid_exit)
 	jnz paranoid_restore
 	TRACE_IRQS_IRETQ 0
 	SWAPGS_UNSAFE_STACK
-	RESTORE_ALL 8
+	RESTORE_EXTRA_REGS
+	RESTORE_C_REGS
+	REMOVE_PT_GPREGS_FROM_STACK 8
 	INTERRUPT_RETURN
 paranoid_restore:
 	TRACE_IRQS_IRETQ_DEBUG 0
-	RESTORE_ALL 8
+	RESTORE_EXTRA_REGS
+	RESTORE_C_REGS
+	REMOVE_PT_GPREGS_FROM_STACK 8
 	INTERRUPT_RETURN
 	CFI_ENDPROC
 END(paranoid_exit)
@@ -1323,21 +1292,8 @@ ENTRY(error_entry)
 	CFI_ADJUST_CFA_OFFSET 15*8
 	/* oldrax contains error code */
 	cld
-	movq %rdi, RDI+8(%rsp)
-	movq %rsi, RSI+8(%rsp)
-	movq %rdx, RDX+8(%rsp)
-	movq %rcx, RCX+8(%rsp)
-	movq %rax, RAX+8(%rsp)
-	movq  %r8,  R8+8(%rsp)
-	movq  %r9,  R9+8(%rsp)
-	movq %r10, R10+8(%rsp)
-	movq %r11, R11+8(%rsp)
-	movq_cfi rbx, RBX+8
-	movq %rbp, RBP+8(%rsp)
-	movq %r12, R12+8(%rsp)
-	movq %r13, R13+8(%rsp)
-	movq %r14, R14+8(%rsp)
-	movq %r15, R15+8(%rsp)
+	SAVE_C_REGS 8
+	SAVE_EXTRA_REGS 8
 	xorl %ebx,%ebx
 	testl $3,CS+8(%rsp)
 	je error_kernelspace
@@ -1386,7 +1342,7 @@ END(error_entry)
 ENTRY(error_exit)
 	DEFAULT_FRAME
 	movl %ebx,%eax
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)
@@ -1605,8 +1561,8 @@ end_repeat_nmi:
 	 * so that we repeat another NMI.
 	 */
 	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
-	subq $ORIG_RAX-R15, %rsp
-	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+	ALLOC_PT_GPREGS_ON_STACK
+
 	/*
 	 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
 	 * as we should not be calling schedule in NMI context.
@@ -1645,8 +1601,10 @@ end_repeat_nmi:
 nmi_swapgs:
 	SWAPGS_UNSAFE_STACK
 nmi_restore:
+	RESTORE_EXTRA_REGS
+	RESTORE_C_REGS
 	/* Pop the extra iret frame at once */
-	RESTORE_ALL 6*8
+	REMOVE_PT_GPREGS_FROM_STACK 6*8
 
 	/* Clear the NMI executing stack variable */
 	movq $0, 5*8(%rsp)
-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 02/11] x86: code shrink in paranoid_exit
  2015-01-14 21:48 [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs" Denys Vlasenko
@ 2015-01-14 21:48 ` Denys Vlasenko
  2015-02-11 20:36   ` Andy Lutomirski
  2015-02-18 23:26   ` Andy Lutomirski
  2015-01-14 21:48 ` [PATCH 03/11] x86: mass removal of ARGOFFSET Denys Vlasenko
                   ` (10 subsequent siblings)
  11 siblings, 2 replies; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-14 21:48 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, linux-kernel

RESTORE_EXTRA_REGS + RESTORE_C_REGS looks small, but it's
a lot of instructions (fourteen). Let's reuse them.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/kernel/entry_64.S | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a21b5b3..f5e815e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1270,12 +1270,10 @@ ENTRY(paranoid_exit)
 	jnz paranoid_restore
 	TRACE_IRQS_IRETQ 0
 	SWAPGS_UNSAFE_STACK
-	RESTORE_EXTRA_REGS
-	RESTORE_C_REGS
-	REMOVE_PT_GPREGS_FROM_STACK 8
-	INTERRUPT_RETURN
+	jmp paranoid_restore1
 paranoid_restore:
 	TRACE_IRQS_IRETQ_DEBUG 0
+paranoid_restore1:
 	RESTORE_EXTRA_REGS
 	RESTORE_C_REGS
 	REMOVE_PT_GPREGS_FROM_STACK 8
-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 03/11] x86: mass removal of ARGOFFSET
  2015-01-14 21:48 [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs" Denys Vlasenko
  2015-01-14 21:48 ` [PATCH 02/11] x86: code shrink in paranoid_exit Denys Vlasenko
@ 2015-01-14 21:48 ` Denys Vlasenko
  2015-02-21  0:31   ` Andy Lutomirski
  2015-01-14 21:48 ` [PATCH 04/11] x86: rename some macros and labels, no code changes Denys Vlasenko
                   ` (9 subsequent siblings)
  11 siblings, 1 reply; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-14 21:48 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, linux-kernel

ARGOFFSET is zero now, removing it changes no code.
A few macros lost "offset" parameter, since it is always zero now too.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/ia32/ia32entry.S      | 142 ++++++++++++++++++++---------------------
 arch/x86/include/asm/calling.h |   2 -
 arch/x86/kernel/entry_64.S     |  64 +++++++++----------
 3 files changed, 103 insertions(+), 105 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index f4bed49..e99f8a5 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -41,13 +41,13 @@
 	movl	%edx,%edx	/* zero extension */
 	.endm 
 
-	/* clobbers %eax */	
-	.macro  CLEAR_RREGS offset=0, _r9=rax
+	/* clobbers %rax */
+	.macro  CLEAR_RREGS _r9=rax
 	xorl 	%eax,%eax
-	movq	%rax,\offset+R11(%rsp)
-	movq	%rax,\offset+R10(%rsp)
-	movq	%\_r9,\offset+R9(%rsp)
-	movq	%rax,\offset+R8(%rsp)
+	movq	%rax,R11(%rsp)
+	movq	%rax,R10(%rsp)
+	movq	%\_r9,R9(%rsp)
+	movq	%rax,R8(%rsp)
 	.endm
 
 	/*
@@ -60,14 +60,14 @@
 	 * If it's -1 to make us punt the syscall, then (u32)-1 is still
 	 * an appropriately invalid value.
 	 */
-	.macro LOAD_ARGS32 offset, _r9=0
+	.macro LOAD_ARGS32 _r9=0
 	.if \_r9
-	movl \offset+R9(%rsp),%r9d
+	movl R9(%rsp),%r9d
 	.endif
-	movl \offset+RCX(%rsp),%ecx
-	movl \offset+RDX(%rsp),%edx
-	movl \offset+RSI(%rsp),%esi
-	movl \offset+RDI(%rsp),%edi
+	movl RCX(%rsp),%ecx
+	movl RDX(%rsp),%edx
+	movl RSI(%rsp),%esi
+	movl RDI(%rsp),%edi
 	movl %eax,%eax			/* zero extension */
 	.endm
 	
@@ -158,12 +158,12 @@ ENTRY(ia32_sysenter_target)
 	 * ourselves.  To save a few cycles, we can check whether
 	 * NT was set instead of doing an unconditional popfq.
 	 */
-	testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp)
+	testl $X86_EFLAGS_NT,EFLAGS(%rsp)
 	jnz sysenter_fix_flags
 sysenter_flags_fixed:
 
-	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP)
+	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP)
 	CFI_REMEMBER_STATE
 	jnz  sysenter_tracesys
 	cmpq	$(IA32_NR_syscalls-1),%rax
@@ -172,16 +172,16 @@ sysenter_do_call:
 	IA32_ARG_FIXUP
 sysenter_dispatch:
 	call	*ia32_sys_call_table(,%rax,8)
-	movq	%rax,RAX-ARGOFFSET(%rsp)
+	movq	%rax,RAX(%rsp)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl	$_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl	$_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP)
 	jnz	sysexit_audit
 sysexit_from_sys_call:
-	andl    $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	andl    $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP)
 	/* clear IF, that popfq doesn't enable interrupts early */
-	andl	$~0x200,EFLAGS-ARGOFFSET(%rsp)
-	movl	RIP-ARGOFFSET(%rsp),%edx		/* User %eip */
+	andl	$~0x200,EFLAGS(%rsp)
+	movl	RIP(%rsp),%edx		/* User %eip */
 	CFI_REGISTER rip,rdx
 	RESTORE_RSI_RDI
 	REMOVE_PT_GPREGS_FROM_STACK 3*8
@@ -207,18 +207,18 @@ sysexit_from_sys_call:
 	movl %ebx,%esi			/* 2nd arg: 1st syscall arg */
 	movl %eax,%edi			/* 1st arg: syscall number */
 	call __audit_syscall_entry
-	movl RAX-ARGOFFSET(%rsp),%eax	/* reload syscall number */
+	movl RAX(%rsp),%eax	/* reload syscall number */
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja ia32_badsys
 	movl %ebx,%edi			/* reload 1st syscall arg */
-	movl RCX-ARGOFFSET(%rsp),%esi	/* reload 2nd syscall arg */
-	movl RDX-ARGOFFSET(%rsp),%edx	/* reload 3rd syscall arg */
-	movl RSI-ARGOFFSET(%rsp),%ecx	/* reload 4th syscall arg */
-	movl RDI-ARGOFFSET(%rsp),%r8d	/* reload 5th syscall arg */
+	movl RCX(%rsp),%esi	/* reload 2nd syscall arg */
+	movl RDX(%rsp),%edx	/* reload 3rd syscall arg */
+	movl RSI(%rsp),%ecx	/* reload 4th syscall arg */
+	movl RDI(%rsp),%r8d	/* reload 5th syscall arg */
 	.endm
 
 	.macro auditsys_exit exit
-	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP)
 	jnz ia32_ret_from_sys_call
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
@@ -229,13 +229,13 @@ sysexit_from_sys_call:
 1:	setbe %al		/* 1 if error, 0 if not */
 	movzbl %al,%edi		/* zero-extend that into %edi */
 	call __audit_syscall_exit
-	movq RAX-ARGOFFSET(%rsp),%rax	/* reload syscall return value */
+	movq RAX(%rsp),%rax	/* reload syscall return value */
 	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl %edi,TI_flags+THREAD_INFO(%rsp,RIP)
 	jz \exit
-	CLEAR_RREGS -ARGOFFSET
+	CLEAR_RREGS
 	jmp int_with_check
 	.endm
 
@@ -255,7 +255,7 @@ sysenter_fix_flags:
 
 sysenter_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP)
 	jz	sysenter_auditsys
 #endif
 	SAVE_EXTRA_REGS
@@ -263,7 +263,7 @@ sysenter_tracesys:
 	movq	$-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
 	movq	%rsp,%rdi        /* &pt_regs -> arg1 */
 	call	syscall_trace_enter
-	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
+	LOAD_ARGS32  /* reload args from stack in case ptrace changed it */
 	RESTORE_EXTRA_REGS
 	cmpq	$(IA32_NR_syscalls-1),%rax
 	ja	int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
@@ -309,17 +309,17 @@ ENTRY(ia32_cstar_target)
 	ALLOC_PT_GPREGS_ON_STACK 8
 	SAVE_C_REGS_EXCEPT_RCX_R891011
 	movl 	%eax,%eax	/* zero extension */
-	movq	%rax,ORIG_RAX-ARGOFFSET(%rsp)
-	movq	%rcx,RIP-ARGOFFSET(%rsp)
-	CFI_REL_OFFSET rip,RIP-ARGOFFSET
-	movq	%rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */
+	movq	%rax,ORIG_RAX(%rsp)
+	movq	%rcx,RIP(%rsp)
+	CFI_REL_OFFSET rip,RIP
+	movq	%rbp,RCX(%rsp) /* this lies slightly to ptrace */
 	movl	%ebp,%ecx
-	movq	$__USER32_CS,CS-ARGOFFSET(%rsp)
-	movq	$__USER32_DS,SS-ARGOFFSET(%rsp)
-	movq	%r11,EFLAGS-ARGOFFSET(%rsp)
-	/*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
-	movq	%r8,RSP-ARGOFFSET(%rsp)	
-	CFI_REL_OFFSET rsp,RSP-ARGOFFSET
+	movq	$__USER32_CS,CS(%rsp)
+	movq	$__USER32_DS,SS(%rsp)
+	movq	%r11,EFLAGS(%rsp)
+	/*CFI_REL_OFFSET rflags,EFLAGS*/
+	movq	%r8,RSP(%rsp)
+	CFI_REL_OFFSET rsp,RSP
 	/* no need to do an access_ok check here because r8 has been
 	   32bit zero extended */ 
 	/* hardware stack frame is complete now */	
@@ -327,8 +327,8 @@ ENTRY(ia32_cstar_target)
 1:	movl	(%r8),%r9d
 	_ASM_EXTABLE(1b,ia32_badarg)
 	ASM_CLAC
-	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP)
+	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP)
 	CFI_REMEMBER_STATE
 	jnz   cstar_tracesys
 	cmpq $IA32_NR_syscalls-1,%rax
@@ -337,32 +337,32 @@ cstar_do_call:
 	IA32_ARG_FIXUP 1
 cstar_dispatch:
 	call *ia32_sys_call_table(,%rax,8)
-	movq %rax,RAX-ARGOFFSET(%rsp)
+	movq %rax,RAX(%rsp)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP)
 	jnz sysretl_audit
 sysretl_from_sys_call:
-	andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP)
 	RESTORE_RSI_RDI_RDX
-	movl RIP-ARGOFFSET(%rsp),%ecx
+	movl RIP(%rsp),%ecx
 	CFI_REGISTER rip,rcx
-	movl EFLAGS-ARGOFFSET(%rsp),%r11d	
+	movl EFLAGS(%rsp),%r11d
 	/*CFI_REGISTER rflags,r11*/
 	xorq	%r10,%r10
 	xorq	%r9,%r9
 	xorq	%r8,%r8
 	TRACE_IRQS_ON
-	movl RSP-ARGOFFSET(%rsp),%esp
+	movl RSP(%rsp),%esp
 	CFI_RESTORE rsp
 	USERGS_SYSRET32
 	
 #ifdef CONFIG_AUDITSYSCALL
 cstar_auditsys:
 	CFI_RESTORE_STATE
-	movl %r9d,R9-ARGOFFSET(%rsp)	/* register to be clobbered by call */
+	movl %r9d,R9(%rsp)	/* register to be clobbered by call */
 	auditsys_entry_common
-	movl R9-ARGOFFSET(%rsp),%r9d	/* reload 6th syscall arg */
+	movl R9(%rsp),%r9d	/* reload 6th syscall arg */
 	jmp cstar_dispatch
 
 sysretl_audit:
@@ -371,16 +371,16 @@ sysretl_audit:
 
 cstar_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP)
 	jz cstar_auditsys
 #endif
 	xchgl %r9d,%ebp
 	SAVE_EXTRA_REGS
-	CLEAR_RREGS 0, r9
+	CLEAR_RREGS r9
 	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
-	LOAD_ARGS32 ARGOFFSET, 1  /* reload args from stack in case ptrace changed it */
+	LOAD_ARGS32 1	/* reload args from stack in case ptrace changed it */
 	RESTORE_EXTRA_REGS
 	xchgl %ebp,%r9d
 	cmpq $(IA32_NR_syscalls-1),%rax
@@ -438,8 +438,8 @@ ENTRY(ia32_syscall)
 	   this could be a problem. */
 	ALLOC_PT_GPREGS_ON_STACK
 	SAVE_C_REGS_EXCEPT_R891011
-	orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP)
+	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP)
 	jnz ia32_tracesys
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja ia32_badsys
@@ -447,9 +447,9 @@ ia32_do_call:
 	IA32_ARG_FIXUP
 	call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
 ia32_sysret:
-	movq %rax,RAX-ARGOFFSET(%rsp)
+	movq %rax,RAX(%rsp)
 ia32_ret_from_sys_call:
-	CLEAR_RREGS -ARGOFFSET
+	CLEAR_RREGS
 	jmp int_ret_from_sys_call
 
 ia32_tracesys:
@@ -458,7 +458,7 @@ ia32_tracesys:
 	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
-	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
+	LOAD_ARGS32	/* reload args from stack in case ptrace changed it */
 	RESTORE_EXTRA_REGS
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja  int_ret_from_sys_call	/* ia32_tracesys has set RAX(%rsp) */
@@ -466,7 +466,7 @@ ia32_tracesys:
 END(ia32_syscall)
 
 ia32_badsys:
-	movq $0,ORIG_RAX-ARGOFFSET(%rsp)
+	movq $0,ORIG_RAX(%rsp)
 	movq $-ENOSYS,%rax
 	jmp ia32_sysret
 
@@ -499,17 +499,17 @@ ia32_ptregs_common:
 	CFI_ENDPROC
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,SS+8-ARGOFFSET
-	CFI_REL_OFFSET	rax,RAX-ARGOFFSET
-	CFI_REL_OFFSET	rcx,RCX-ARGOFFSET
-	CFI_REL_OFFSET	rdx,RDX-ARGOFFSET
-	CFI_REL_OFFSET	rsi,RSI-ARGOFFSET
-	CFI_REL_OFFSET	rdi,RDI-ARGOFFSET
-	CFI_REL_OFFSET	rip,RIP-ARGOFFSET
-/*	CFI_REL_OFFSET	cs,CS-ARGOFFSET*/
-/*	CFI_REL_OFFSET	rflags,EFLAGS-ARGOFFSET*/
-	CFI_REL_OFFSET	rsp,RSP-ARGOFFSET
-/*	CFI_REL_OFFSET	ss,SS-ARGOFFSET*/
+	CFI_DEF_CFA	rsp,SS+8
+	CFI_REL_OFFSET	rax,RAX
+	CFI_REL_OFFSET	rcx,RCX
+	CFI_REL_OFFSET	rdx,RDX
+	CFI_REL_OFFSET	rsi,RSI
+	CFI_REL_OFFSET	rdi,RDI
+	CFI_REL_OFFSET	rip,RIP
+/*	CFI_REL_OFFSET	cs,CS*/
+/*	CFI_REL_OFFSET	rflags,EFLAGS*/
+	CFI_REL_OFFSET	rsp,RSP
+/*	CFI_REL_OFFSET	ss,SS*/
 	SAVE_EXTRA_REGS 8
 	call *%rax
 	RESTORE_EXTRA_REGS 8
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index a2ef97a..3cd7e31 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -88,8 +88,6 @@ For 32-bit we have the following conventions - kernel is built with
 #define RSP		19*8
 #define SS		20*8
 
-#define ARGOFFSET	0
-
 	.macro ALLOC_PT_GPREGS_ON_STACK addskip=0
 	subq	$15*8+\addskip, %rsp
 	CFI_ADJUST_CFA_OFFSET 15*8+\addskip
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index f5e815e..a15bef6 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -73,7 +73,7 @@ ENDPROC(native_usergs_sysret64)
 #endif /* CONFIG_PARAVIRT */
 
 
-.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
+.macro TRACE_IRQS_IRETQ offset=0
 #ifdef CONFIG_TRACE_IRQFLAGS
 	bt   $9,EFLAGS-\offset(%rsp)	/* interrupts off? */
 	jnc  1f
@@ -107,7 +107,7 @@ ENDPROC(native_usergs_sysret64)
 	call debug_stack_reset
 .endm
 
-.macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET
+.macro TRACE_IRQS_IRETQ_DEBUG offset=0
 	bt   $9,EFLAGS-\offset(%rsp)	/* interrupts off? */
 	jnc  1f
 	TRACE_IRQS_ON_DEBUG
@@ -183,16 +183,16 @@ ENDPROC(native_usergs_sysret64)
  * frame that enables passing a complete pt_regs to a C function.
  */
 	.macro DEFAULT_FRAME start=1 offset=0
-	XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
-	CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
-	CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
-	CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
-	CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
-	CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
-	CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
-	CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
-	CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
-	CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
+	XCPT_FRAME \start, ORIG_RAX+\offset
+	CFI_REL_OFFSET rdi, RDI+\offset
+	CFI_REL_OFFSET rsi, RSI+\offset
+	CFI_REL_OFFSET rdx, RDX+\offset
+	CFI_REL_OFFSET rcx, RCX+\offset
+	CFI_REL_OFFSET rax, RAX+\offset
+	CFI_REL_OFFSET r8, R8+\offset
+	CFI_REL_OFFSET r9, R9+\offset
+	CFI_REL_OFFSET r10, R10+\offset
+	CFI_REL_OFFSET r11, R11+\offset
 	CFI_REL_OFFSET rbx, RBX+\offset
 	CFI_REL_OFFSET rbp, RBP+\offset
 	CFI_REL_OFFSET r12, R12+\offset
@@ -236,13 +236,13 @@ ENTRY(ret_from_fork)
 
 	RESTORE_EXTRA_REGS
 
-	testl $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
+	testl $3, CS(%rsp)			# from kernel_thread?
 	jz   1f
 
 	testl $_TIF_IA32, TI_flags(%rcx)	# 32-bit compat task needs IRET
 	jnz  int_ret_from_sys_call
 
-	RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
+	RESTORE_TOP_OF_STACK %rdi
 	jmp ret_from_sys_call			# go to the SYSRET fastpath
 
 1:
@@ -309,11 +309,11 @@ GLOBAL(system_call_after_swapgs)
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	ALLOC_PT_GPREGS_ON_STACK 8
 	SAVE_C_REGS_EXCEPT_RAX_RCX
-	movq	$-ENOSYS,RAX-ARGOFFSET(%rsp)
-	movq_cfi rax,(ORIG_RAX-ARGOFFSET)
-	movq	%rcx,RIP-ARGOFFSET(%rsp)
-	CFI_REL_OFFSET rip,RIP-ARGOFFSET
-	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	movq	$-ENOSYS,RAX(%rsp)
+	movq_cfi rax,(ORIG_RAX)
+	movq	%rcx,RIP(%rsp)
+	CFI_REL_OFFSET rip,RIP
+	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP)
 	jnz tracesys
 system_call_fastpath:
 #if __SYSCALL_MASK == ~0
@@ -325,7 +325,7 @@ system_call_fastpath:
 	ja ret_from_sys_call  /* and return regs->ax */
 	movq %r10,%rcx
 	call *sys_call_table(,%rax,8)  # XXX:	 rip relative
-	movq %rax,RAX-ARGOFFSET(%rsp)
+	movq %rax,RAX(%rsp)
 /*
  * Syscall return path ending with SYSRET (fast path)
  * Has incomplete stack frame and undefined top of stack.
@@ -337,7 +337,7 @@ sysret_check:
 	LOCKDEP_SYS_EXIT
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
+	movl TI_flags+THREAD_INFO(%rsp,RIP),%edx
 	andl %edi,%edx
 	jnz  sysret_careful
 	CFI_REMEMBER_STATE
@@ -346,7 +346,7 @@ sysret_check:
 	 */
 	TRACE_IRQS_ON
 	RESTORE_C_REGS_EXCEPT_RCX
-	movq RIP-ARGOFFSET(%rsp),%rcx
+	movq RIP(%rsp),%rcx
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
 	movq	PER_CPU_VAR(old_rsp), %rsp
@@ -378,7 +378,7 @@ sysret_signal:
 	 * These all wind up with the iret return path anyway,
 	 * so just join that path right now.
 	 */
-	FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
+	FIXUP_TOP_OF_STACK %r11
 	jmp int_check_syscall_exit_work
 
 #ifdef CONFIG_AUDITSYSCALL
@@ -388,7 +388,7 @@ sysret_signal:
 	 * masked off.
 	 */
 sysret_audit:
-	movq RAX-ARGOFFSET(%rsp),%rsi	/* second arg, syscall return value */
+	movq RAX(%rsp),%rsi	/* second arg, syscall return value */
 	cmpq $-MAX_ERRNO,%rsi	/* is it < -MAX_ERRNO? */
 	setbe %al		/* 1 if so, 0 if not */
 	movzbl %al,%edi		/* zero-extend that into %edi */
@@ -431,7 +431,7 @@ tracesys_phase2:
 	ja   int_ret_from_sys_call	/* RAX(%rsp) is already set */
 	movq %r10,%rcx	/* fixup for C */
 	call *sys_call_table(,%rax,8)
-	movq %rax,RAX-ARGOFFSET(%rsp)
+	movq %rax,RAX(%rsp)
 	/* Use IRET because user could have changed frame */
 
 /*
@@ -515,9 +515,9 @@ END(stub_\func)
 ENTRY(\label)
 	CFI_STARTPROC
 	DEFAULT_FRAME 0, 8		/* offset 8: return address */
-	FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET
+	FIXUP_TOP_OF_STACK %r11, 8
 	call \func
-	RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET
+	RESTORE_TOP_OF_STACK %r11, 8
 	ret
 	CFI_ENDPROC
 END(\label)
@@ -717,7 +717,7 @@ common_interrupt:
 	ASM_CLAC
 	addq $-0x80,(%rsp)		/* Adjust vector to [-256,-1] range */
 	interrupt do_IRQ
-	/* 0(%rsp): old_rsp-ARGOFFSET */
+	/* 0(%rsp): old_rsp */
 ret_from_intr:
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
@@ -727,13 +727,13 @@ ret_from_intr:
 	popq %rsi
 	CFI_DEF_CFA rsi,SS+8-RBP	/* reg/off reset after def_cfa_expr */
 	/* return code expects complete pt_regs - adjust rsp accordingly: */
-	leaq ARGOFFSET-RBP(%rsi), %rsp
+	leaq -RBP(%rsi), %rsp
 	CFI_DEF_CFA_REGISTER	rsp
-	CFI_ADJUST_CFA_OFFSET	RBP-ARGOFFSET
+	CFI_ADJUST_CFA_OFFSET	RBP
 
 exit_intr:
 	GET_THREAD_INFO(%rcx)
-	testl $3,CS-ARGOFFSET(%rsp)
+	testl $3,CS(%rsp)
 	je retint_kernel
 
 	/* Interrupt came from user space */
@@ -855,7 +855,7 @@ retint_signal:
 ENTRY(retint_kernel)
 	cmpl $0,PER_CPU_VAR(__preempt_count)
 	jnz  retint_restore_args
-	bt   $9,EFLAGS-ARGOFFSET(%rsp)	/* interrupts off? */
+	bt   $9,EFLAGS(%rsp)	/* interrupts off? */
 	jnc  retint_restore_args
 	call preempt_schedule_irq
 	jmp exit_intr
-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 04/11] x86: rename some macros and labels, no code changes
  2015-01-14 21:48 [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs" Denys Vlasenko
  2015-01-14 21:48 ` [PATCH 02/11] x86: code shrink in paranoid_exit Denys Vlasenko
  2015-01-14 21:48 ` [PATCH 03/11] x86: mass removal of ARGOFFSET Denys Vlasenko
@ 2015-01-14 21:48 ` Denys Vlasenko
  2015-01-14 21:48 ` [PATCH 05/11] x86: add comments about various syscall instructions, " Denys Vlasenko
                   ` (8 subsequent siblings)
  11 siblings, 0 replies; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-14 21:48 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, linux-kernel

Rename LOAD_ARGS32 to RESTORE_REGS32 to match other RESTORE_* macros.
The "ARGS" part was misleading anyway - we are restoring registers,
not arguments.

Similarly, rename [retint_]restore_args to [retint_]restore_c_regs:
at these labels, we restore registers clobbered by C ABI;
rename int_restore_rest to int_restore_extra_regs.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/ia32/ia32entry.S  | 10 +++++-----
 arch/x86/kernel/entry_64.S | 16 ++++++++--------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index e99f8a5..9f4c877 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -51,7 +51,7 @@
 	.endm
 
 	/*
-	 * Reload arg registers from stack in case ptrace changed them.
+	 * Reload registers from stack in case ptrace changed them.
 	 * We don't reload %eax because syscall_trace_enter() returned
 	 * the %rax value we should see.  Instead, we just truncate that
 	 * value to 32 bits again as we did on entry from user mode.
@@ -60,7 +60,7 @@
 	 * If it's -1 to make us punt the syscall, then (u32)-1 is still
 	 * an appropriately invalid value.
 	 */
-	.macro LOAD_ARGS32 _r9=0
+	.macro RESTORE_REGS32 _r9=0
 	.if \_r9
 	movl R9(%rsp),%r9d
 	.endif
@@ -263,7 +263,7 @@ sysenter_tracesys:
 	movq	$-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
 	movq	%rsp,%rdi        /* &pt_regs -> arg1 */
 	call	syscall_trace_enter
-	LOAD_ARGS32  /* reload args from stack in case ptrace changed it */
+	RESTORE_REGS32  /* reload regs from stack in case ptrace changed it */
 	RESTORE_EXTRA_REGS
 	cmpq	$(IA32_NR_syscalls-1),%rax
 	ja	int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
@@ -380,7 +380,7 @@ cstar_tracesys:
 	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
-	LOAD_ARGS32 1	/* reload args from stack in case ptrace changed it */
+	RESTORE_REGS32 1	/* reload regs from stack in case ptrace changed it */
 	RESTORE_EXTRA_REGS
 	xchgl %ebp,%r9d
 	cmpq $(IA32_NR_syscalls-1),%rax
@@ -458,7 +458,7 @@ ia32_tracesys:
 	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
-	LOAD_ARGS32	/* reload args from stack in case ptrace changed it */
+	RESTORE_REGS32	/* reload args from stack in case ptrace changed it */
 	RESTORE_EXTRA_REGS
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja  int_ret_from_sys_call	/* ia32_tracesys has set RAX(%rsp) */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a15bef6..c9259f4 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -62,7 +62,7 @@
 
 
 #ifndef CONFIG_PREEMPT
-#define retint_kernel retint_restore_args
+#define retint_kernel retint_restore_c_regs
 #endif
 
 #ifdef CONFIG_PARAVIRT
@@ -481,7 +481,7 @@ int_check_syscall_exit_work:
 	call syscall_trace_leave
 	popq_cfi %rdi
 	andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
-	jmp int_restore_rest
+	jmp int_restore_extra_regs
 
 int_signal:
 	testl $_TIF_DO_NOTIFY_MASK,%edx
@@ -490,7 +490,7 @@ int_signal:
 	xorl %esi,%esi		# oldset -> arg2
 	call do_notify_resume
 1:	movl $_TIF_WORK_MASK,%edi
-int_restore_rest:
+int_restore_extra_regs:
 	RESTORE_EXTRA_REGS
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
@@ -757,15 +757,15 @@ retint_swapgs:		/* return to user-space */
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_IRETQ
 	SWAPGS
-	jmp restore_args
+	jmp restore_c_regs
 
-retint_restore_args:	/* return to kernel space */
+retint_restore_c_regs:	/* return to kernel space */
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	/*
 	 * The iretq could re-enable interrupts:
 	 */
 	TRACE_IRQS_IRETQ
-restore_args:
+restore_c_regs:
 	RESTORE_C_REGS
 	REMOVE_PT_GPREGS_FROM_STACK 8
 
@@ -854,9 +854,9 @@ retint_signal:
 	/* rcx:	 threadinfo. interrupts off. */
 ENTRY(retint_kernel)
 	cmpl $0,PER_CPU_VAR(__preempt_count)
-	jnz  retint_restore_args
+	jnz  retint_restore_c_regs
 	bt   $9,EFLAGS(%rsp)	/* interrupts off? */
-	jnc  retint_restore_args
+	jnc  retint_restore_c_regs
 	call preempt_schedule_irq
 	jmp exit_intr
 #endif
-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 05/11] x86: add comments about various syscall instructions, no code changes
  2015-01-14 21:48 [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs" Denys Vlasenko
                   ` (2 preceding siblings ...)
  2015-01-14 21:48 ` [PATCH 04/11] x86: rename some macros and labels, no code changes Denys Vlasenko
@ 2015-01-14 21:48 ` Denys Vlasenko
  2015-01-14 21:48 ` [PATCH 06/11] x86: entry_64.S: move save_paranoid and ret_from_fork closer to their users Denys Vlasenko
                   ` (7 subsequent siblings)
  11 siblings, 0 replies; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-14 21:48 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, linux-kernel

SYSCALL/SYSRET and SYSENTER/SYSEXIT have weird semantics.
Moreover, they differ in 32- and 64-bit mode.
What is saved? What is not? Is rsp set? Are interrupts disabled?
People tend to not remember these details well enough.

This patch adds comments which explain in detail
what registers are modified by each of these instructions.
The comments are placed immediately before corresponding
entry and exit points.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/ia32/ia32entry.S  | 133 ++++++++++++++++++++++++++++-----------------
 arch/x86/kernel/entry_64.S |  32 ++++++-----
 2 files changed, 102 insertions(+), 63 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 9f4c877..366f633 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -99,22 +99,25 @@ ENDPROC(native_irq_enable_sysexit)
 /*
  * 32bit SYSENTER instruction entry.
  *
+ * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
+ * IF and VM in rflags are cleared (IOW: interrupts are off).
+ * SYSENTER does not save anything on the stack,
+ * and does not save old rip (!!!) and rflags.
+ *
  * Arguments:
- * %eax	System call number.
- * %ebx Arg1
- * %ecx Arg2
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp user stack
- * 0(%ebp) Arg6	
- * 	
- * Interrupts off.
- *	
+ * eax  system call number
+ * ebx  arg1
+ * ecx  arg2
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * ebp  user stack
+ * 0(%ebp) arg6
+ *
  * This is purely a fast path. For anything complicated we use the int 0x80
- * path below.	Set up a complete hardware stack frame to share code
+ * path below. We set up a complete hardware stack frame to share code
  * with the int 0x80 path.
- */ 	
+ */
 ENTRY(ia32_sysenter_target)
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
@@ -128,6 +131,7 @@ ENTRY(ia32_sysenter_target)
 	 * disabled irqs, here we enable it straight after entry:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
+	/* Construct iret frame (ss,rsp,rflags,cs,rip) */
  	movl	%ebp,%ebp		/* zero extension */
 	pushq_cfi $__USER32_DS
 	/*CFI_REL_OFFSET ss,0*/
@@ -140,14 +144,19 @@ ENTRY(ia32_sysenter_target)
 	pushq_cfi $__USER32_CS
 	/*CFI_REL_OFFSET cs,0*/
 	movl	%eax, %eax
+	/* Store thread_info->sysenter_return in rip stack slot */
 	pushq_cfi %r10
 	CFI_REL_OFFSET rip,0
+	/* Store orig_ax */
 	pushq_cfi %rax
+	/* Construct the rest of "struct pt_regs" */
 	cld
 	ALLOC_PT_GPREGS_ON_STACK
 	SAVE_C_REGS_EXCEPT_R891011
- 	/* no need to do an access_ok check here because rbp has been
- 	   32bit zero extended */ 
+	/*
+	 * no need to do an access_ok check here because rbp has been
+	 * 32bit zero extended
+	 */
 	ASM_STAC
 1:	movl	(%rbp),%ebp
 	_ASM_EXTABLE(1b,ia32_badarg)
@@ -184,6 +193,7 @@ sysexit_from_sys_call:
 	movl	RIP(%rsp),%edx		/* User %eip */
 	CFI_REGISTER rip,rdx
 	RESTORE_RSI_RDI
+	/* pop everything except ss,rsp,rflags slots */
 	REMOVE_PT_GPREGS_FROM_STACK 3*8
 	xorq	%r8,%r8
 	xorq	%r9,%r9
@@ -194,6 +204,10 @@ sysexit_from_sys_call:
 	popq_cfi %rcx				/* User %esp */
 	CFI_REGISTER rsp,rcx
 	TRACE_IRQS_ON
+	/*
+	 * 32bit SYSEXIT restores eip from edx, esp from ecx.
+	 * cs and ss are loaded from MSRs.
+	 */
 	ENABLE_INTERRUPTS_SYSEXIT32
 
 	CFI_RESTORE_STATE
@@ -274,23 +288,33 @@ ENDPROC(ia32_sysenter_target)
 /*
  * 32bit SYSCALL instruction entry.
  *
+ * 32bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
+ * then loads new ss, cs, and rip from previously programmed MSRs.
+ * rflags gets masked by a value from another MSR (so CLD and CLAC
+ * are not needed). SYSCALL does not save anything on the stack
+ * and does not change rsp.
+ *
+ * Note: rflags saving+masking-with-MSR happens only in Long mode
+ * (in legacy 32bit mode, IF, RF and VM bits are cleared and that's it).
+ * Don't get confused: rflags saving+masking depends on Long Mode Active bit
+ * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
+ * or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
+ *
  * Arguments:
- * %eax	System call number.
- * %ebx Arg1
- * %ecx return EIP 
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp Arg2    [note: not saved in the stack frame, should not be touched]
- * %esp user stack 
- * 0(%esp) Arg6
- * 	
- * Interrupts off.
- *	
+ * eax  system call number
+ * ecx  return address
+ * ebx  arg1
+ * ebp  arg2	(note: not saved in the stack frame, should not be touched)
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * esp  user stack
+ * 0(%esp) arg6
+ *
  * This is purely a fast path. For anything complicated we use the int 0x80
- * path below.	Set up a complete hardware stack frame to share code
- * with the int 0x80 path.	
- */ 	
+ * path below. We set up a complete hardware stack frame to share code
+ * with the int 0x80 path.
+ */
 ENTRY(ia32_cstar_target)
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
@@ -306,7 +330,7 @@ ENTRY(ia32_cstar_target)
 	 * disabled irqs and here we enable it straight after entry:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	ALLOC_PT_GPREGS_ON_STACK 8
+	ALLOC_PT_GPREGS_ON_STACK 8	/* +8: space for orig_ax */
 	SAVE_C_REGS_EXCEPT_RCX_R891011
 	movl 	%eax,%eax	/* zero extension */
 	movq	%rax,ORIG_RAX(%rsp)
@@ -320,9 +344,11 @@ ENTRY(ia32_cstar_target)
 	/*CFI_REL_OFFSET rflags,EFLAGS*/
 	movq	%r8,RSP(%rsp)
 	CFI_REL_OFFSET rsp,RSP
-	/* no need to do an access_ok check here because r8 has been
-	   32bit zero extended */ 
-	/* hardware stack frame is complete now */	
+	/* iret stack frame is complete now */
+	/*
+	 * no need to do an access_ok check here because r8 has been
+	 * 32bit zero extended
+	 */
 	ASM_STAC
 1:	movl	(%r8),%r9d
 	_ASM_EXTABLE(1b,ia32_badarg)
@@ -355,8 +381,15 @@ sysretl_from_sys_call:
 	TRACE_IRQS_ON
 	movl RSP(%rsp),%esp
 	CFI_RESTORE rsp
+	/*
+	 * 64bit->32bit SYSRET restores eip from ecx,
+	 * eflags from r11 (but RF and VM bits are forced to 0),
+	 * cs and ss are loaded from MSRs.
+	 * (Note: 32bit->32bit SYSRET is different: since r11
+	 * does not exist, it merely sets eflags.IF=1).
+	 */
 	USERGS_SYSRET32
-	
+
 #ifdef CONFIG_AUDITSYSCALL
 cstar_auditsys:
 	CFI_RESTORE_STATE
@@ -394,26 +427,26 @@ ia32_badarg:
 	jmp ia32_sysret
 	CFI_ENDPROC
 
-/* 
- * Emulated IA32 system calls via int 0x80. 
+/*
+ * Emulated IA32 system calls via int 0x80.
  *
- * Arguments:	 
- * %eax	System call number.
- * %ebx Arg1
- * %ecx Arg2
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp Arg6    [note: not saved in the stack frame, should not be touched]
+ * Arguments:
+ * eax  system call number
+ * ebx  arg1
+ * ecx  arg2
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * ebp  arg6	(note: not saved in the stack frame, should not be touched)
  *
  * Notes:
- * Uses the same stack frame as the x86-64 version.	
- * All registers except %eax must be saved (but ptrace may violate that)
+ * Uses the same stack frame as the x86-64 version.
+ * All registers except eax must be saved (but ptrace may violate that).
  * Arguments are zero extended. For system calls that want sign extension and
  * take long arguments a wrapper is needed. Most calls can just be called
  * directly.
- * Assumes it is only called from user space and entered with interrupts off.	
- */ 				
+ * Assumes it is only called from user space and entered with interrupts off.
+ */
 
 ENTRY(ia32_syscall)
 	CFI_STARTPROC32	simple
@@ -432,7 +465,7 @@ ENTRY(ia32_syscall)
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	movl %eax,%eax
-	pushq_cfi %rax
+	pushq_cfi %rax		/* store orig_ax */
 	cld
 	/* note the registers are not zero extended to the sf.
 	   this could be a problem. */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c9259f4..3498274 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -255,25 +255,25 @@ ENTRY(ret_from_fork)
 END(ret_from_fork)
 
 /*
- * System call entry. Up to 6 arguments in registers are supported.
+ * 64bit SYSCALL instruction entry. Up to 6 arguments in registers.
  *
- * SYSCALL does not save anything on the stack and does not change the
- * stack pointer.  However, it does mask the flags register for us, so
- * CLD and CLAC are not needed.
- */
-
-/*
- * Register setup:
+ * 64bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
+ * then loads new ss, cs, and rip from previously programmed MSRs.
+ * rflags gets masked by a value from another MSR (so CLD and CLAC
+ * are not needed). SYSCALL does not save anything on the stack
+ * and does not change rsp.
+ *
+ * Registers on entry:
  * rax  system call number
+ * rcx  return address
+ * r11  saved rflags (note: r11 is callee-clobbered register in C ABI)
  * rdi  arg0
- * rcx  return address for syscall/sysret, C arg3
  * rsi  arg1
  * rdx  arg2
- * r10  arg3 	(--> moved to rcx for C)
+ * r10  arg3 (needs to be moved to rcx to conform to C ABI)
  * r8   arg4
  * r9   arg5
- * r11  eflags for syscall/sysret, temporary for C
- * r12-r15,rbp,rbx saved by C code, not touched.
+ * (note: r12-r15,rbp,rbx are callee-preserved in C ABI)
  *
  * Interrupts are off on entry.
  * Only called from user space.
@@ -301,13 +301,14 @@ ENTRY(system_call)
 GLOBAL(system_call_after_swapgs)
 
 	movq	%rsp,PER_CPU_VAR(old_rsp)
+	/* kernel_stack is set so that 5 slots (iret frame) are preallocated */
 	movq	PER_CPU_VAR(kernel_stack),%rsp
 	/*
 	 * No need to follow this irqs off/on section - it's straight
 	 * and short:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	ALLOC_PT_GPREGS_ON_STACK 8
+	ALLOC_PT_GPREGS_ON_STACK 8		/* +8: space for orig_ax */
 	SAVE_C_REGS_EXCEPT_RAX_RCX
 	movq	$-ENOSYS,RAX(%rsp)
 	movq_cfi rax,(ORIG_RAX)
@@ -350,6 +351,11 @@ sysret_check:
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
 	movq	PER_CPU_VAR(old_rsp), %rsp
+	/*
+	 * 64bit SYSRET restores rip from rcx,
+	 * rflags from r11 (but RF and VM bits are forced to 0),
+	 * cs and ss are loaded from MSRs.
+	 */
 	USERGS_SYSRET64
 
 	CFI_RESTORE_STATE
-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 06/11] x86: entry_64.S: move save_paranoid and ret_from_fork closer to their users
  2015-01-14 21:48 [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs" Denys Vlasenko
                   ` (3 preceding siblings ...)
  2015-01-14 21:48 ` [PATCH 05/11] x86: add comments about various syscall instructions, " Denys Vlasenko
@ 2015-01-14 21:48 ` Denys Vlasenko
  2015-01-14 21:48 ` [PATCH 07/11] x86: entry_64.S: rename save_paranoid to paranoid_entry, no code changes Denys Vlasenko
                   ` (6 subsequent siblings)
  11 siblings, 0 replies; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-14 21:48 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, linux-kernel

For some odd reason, these two functions are at the very top of the file.
save_paranoid's caller is approximately in the middle of it, move it there.
Move ret_from_fork to be right after fork/exec helpers.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/kernel/entry_64.S | 106 ++++++++++++++++++++++-----------------------
 1 file changed, 53 insertions(+), 53 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 3498274..d72e46e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -201,59 +201,6 @@ ENDPROC(native_usergs_sysret64)
 	CFI_REL_OFFSET r15, R15+\offset
 	.endm
 
-ENTRY(save_paranoid)
-	XCPT_FRAME 1 RDI+8
-	cld
-	SAVE_C_REGS 8
-	SAVE_EXTRA_REGS 8
-	movl $1,%ebx
-	movl $MSR_GS_BASE,%ecx
-	rdmsr
-	testl %edx,%edx
-	js 1f	/* negative -> in kernel */
-	SWAPGS
-	xorl %ebx,%ebx
-1:	ret
-	CFI_ENDPROC
-END(save_paranoid)
-
-/*
- * A newly forked process directly context switches into this address.
- *
- * rdi: prev task we switched from
- */
-ENTRY(ret_from_fork)
-	DEFAULT_FRAME
-
-	LOCK ; btr $TIF_FORK,TI_flags(%r8)
-
-	pushq_cfi $0x0002
-	popfq_cfi				# reset kernel eflags
-
-	call schedule_tail			# rdi: 'prev' task parameter
-
-	GET_THREAD_INFO(%rcx)
-
-	RESTORE_EXTRA_REGS
-
-	testl $3, CS(%rsp)			# from kernel_thread?
-	jz   1f
-
-	testl $_TIF_IA32, TI_flags(%rcx)	# 32-bit compat task needs IRET
-	jnz  int_ret_from_sys_call
-
-	RESTORE_TOP_OF_STACK %rdi
-	jmp ret_from_sys_call			# go to the SYSRET fastpath
-
-1:
-	movq %rbp, %rdi
-	call *%rbx
-	movl $0, RAX(%rsp)
-	RESTORE_EXTRA_REGS
-	jmp int_ret_from_sys_call
-	CFI_ENDPROC
-END(ret_from_fork)
-
 /*
  * 64bit SYSCALL instruction entry. Up to 6 arguments in registers.
  *
@@ -623,6 +570,43 @@ END(stub_x32_execveat)
 #endif
 
 /*
+ * A newly forked process directly context switches into this address.
+ *
+ * rdi: prev task we switched from
+ */
+ENTRY(ret_from_fork)
+	DEFAULT_FRAME
+
+	LOCK ; btr $TIF_FORK,TI_flags(%r8)
+
+	pushq_cfi $0x0002
+	popfq_cfi				# reset kernel eflags
+
+	call schedule_tail			# rdi: 'prev' task parameter
+
+	GET_THREAD_INFO(%rcx)
+
+	RESTORE_EXTRA_REGS
+
+	testl $3, CS(%rsp)			# from kernel_thread?
+	jz   1f
+
+	testl $_TIF_IA32, TI_flags(%rcx)	# 32-bit compat task needs IRET
+	jnz  int_ret_from_sys_call
+
+	RESTORE_TOP_OF_STACK %rdi
+	jmp ret_from_sys_call			# go to the SYSRET fastpath
+
+1:
+	movq %rbp, %rdi
+	call *%rbx
+	movl $0, RAX(%rsp)
+	RESTORE_EXTRA_REGS
+	jmp int_ret_from_sys_call
+	CFI_ENDPROC
+END(ret_from_fork)
+
+/*
  * Build the entry stubs and pointer table with some assembler magic.
  * We pack 7 stubs into a single 32-byte chunk, which will fit in a
  * single cache line on all modern x86 implementations.
@@ -1256,6 +1240,22 @@ idtentry async_page_fault do_async_page_fault has_error_code=1
 idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
 #endif
 
+ENTRY(save_paranoid)
+	XCPT_FRAME 1 RDI+8
+	cld
+	SAVE_C_REGS 8
+	SAVE_EXTRA_REGS 8
+	movl $1,%ebx
+	movl $MSR_GS_BASE,%ecx
+	rdmsr
+	testl %edx,%edx
+	js 1f	/* negative -> in kernel */
+	SWAPGS
+	xorl %ebx,%ebx
+1:	ret
+	CFI_ENDPROC
+END(save_paranoid)
+
 	/*
 	 * "Paranoid" exit path from exception stack.  This is invoked
 	 * only on return from non-NMI IST interrupts that came
-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 07/11] x86: entry_64.S: rename save_paranoid to paranoid_entry, no code changes
  2015-01-14 21:48 [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs" Denys Vlasenko
                   ` (4 preceding siblings ...)
  2015-01-14 21:48 ` [PATCH 06/11] x86: entry_64.S: move save_paranoid and ret_from_fork closer to their users Denys Vlasenko
@ 2015-01-14 21:48 ` Denys Vlasenko
  2015-02-11 20:39   ` Andy Lutomirski
  2015-01-14 21:48 ` [PATCH 08/11] x86: entry_64.S: fold test_in_nmi macro into its only user Denys Vlasenko
                   ` (5 subsequent siblings)
  11 siblings, 1 reply; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-14 21:48 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, linux-kernel

This patch does a lot of cleanup in comments and formatting,
but it does not change any code.

Rename save_paranoid to paranoid_entry: this makes naming
similar to its "non-paranoid" sibling, error_entry,
and to its counterpart, paranoid_exit.

Use the same CFI annotation atop paranoid_entry and error_entry.

Fix irregular indentation of assembler operands.

Add/fix comments on top of paranoid_entry and error_entry.
Remove stale comment about "oldrax".
Make comments about "no swapgs" flag in ebx more prominent.
Deindent wrongly indented top-level comment atop paranoid_exit.
Indent wrongly deindented comment inside error_entry.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/kernel/entry_64.S | 104 +++++++++++++++++++++++----------------------
 1 file changed, 54 insertions(+), 50 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index d72e46e..aacfa2c 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -968,10 +968,11 @@ ENTRY(\sym)
 	testl $3, CS(%rsp)		/* If coming from userspace, switch */
 	jnz 1f				/* stacks. */
 	.endif
-	call save_paranoid
+	call paranoid_entry
 	.else
 	call error_entry
 	.endif
+	/* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
 
 	DEFAULT_FRAME 0
 
@@ -1002,10 +1003,11 @@ ENTRY(\sym)
 	addq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist)
 	.endif
 
+	/* these procedures expect "no swapgs" flag in ebx */
 	.if \paranoid
-	jmp paranoid_exit		/* %ebx: no swapgs flag */
+	jmp paranoid_exit
 	.else
-	jmp error_exit			/* %ebx: no swapgs flag */
+	jmp error_exit
 	.endif
 
 	.if \paranoid == 1
@@ -1240,8 +1242,13 @@ idtentry async_page_fault do_async_page_fault has_error_code=1
 idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
 #endif
 
-ENTRY(save_paranoid)
-	XCPT_FRAME 1 RDI+8
+/*
+ * Save all registers in pt_regs, and switch gs if needed.
+ * Use slow, but surefire "are we in kernel?" check.
+ * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
+ */
+ENTRY(paranoid_entry)
+	XCPT_FRAME 1 15*8
 	cld
 	SAVE_C_REGS 8
 	SAVE_EXTRA_REGS 8
@@ -1254,20 +1261,19 @@ ENTRY(save_paranoid)
 	xorl %ebx,%ebx
 1:	ret
 	CFI_ENDPROC
-END(save_paranoid)
+END(paranoid_entry)
 
-	/*
-	 * "Paranoid" exit path from exception stack.  This is invoked
-	 * only on return from non-NMI IST interrupts that came
-	 * from kernel space.
-	 *
-	 * We may be returning to very strange contexts (e.g. very early
-	 * in syscall entry), so checking for preemption here would
-	 * be complicated.  Fortunately, we there's no good reason
-	 * to try to handle preemption here.
-	 */
-
-	/* ebx:	no swapgs flag */
+/*
+ * "Paranoid" exit path from exception stack.  This is invoked
+ * only on return from non-NMI IST interrupts that came
+ * from kernel space.
+ *
+ * We may be returning to very strange contexts (e.g. very early
+ * in syscall entry), so checking for preemption here would
+ * be complicated.  Fortunately, we there's no good reason
+ * to try to handle preemption here.
+ */
+/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
 ENTRY(paranoid_exit)
 	DEFAULT_FRAME
 	DISABLE_INTERRUPTS(CLBR_NONE)
@@ -1288,18 +1294,16 @@ paranoid_restore1:
 END(paranoid_exit)
 
 /*
- * Exception entry point. This expects an error code/orig_rax on the stack.
- * returns in "no swapgs flag" in %ebx.
+ * Save all registers in pt_regs, and switch gs if needed.
+ * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
  */
 ENTRY(error_entry)
-	XCPT_FRAME
-	CFI_ADJUST_CFA_OFFSET 15*8
-	/* oldrax contains error code */
+	XCPT_FRAME 1 15*8
 	cld
 	SAVE_C_REGS 8
 	SAVE_EXTRA_REGS 8
-	xorl %ebx,%ebx
-	testl $3,CS+8(%rsp)
+	xorl	%ebx,%ebx
+	testl	$3,CS+8(%rsp)
 	je error_kernelspace
 error_swapgs:
 	SWAPGS
@@ -1307,42 +1311,42 @@ error_sti:
 	TRACE_IRQS_OFF
 	ret
 
-/*
- * There are two places in the kernel that can potentially fault with
- * usergs. Handle them here.  B stepping K8s sometimes report a
- * truncated RIP for IRET exceptions returning to compat mode. Check
- * for these here too.
- */
+	/*
+	 * There are two places in the kernel that can potentially fault with
+	 * usergs. Handle them here.  B stepping K8s sometimes report a
+	 * truncated RIP for IRET exceptions returning to compat mode. Check
+	 * for these here too.
+	 */
 error_kernelspace:
 	CFI_REL_OFFSET rcx, RCX+8
-	incl %ebx
-	leaq native_irq_return_iret(%rip),%rcx
-	cmpq %rcx,RIP+8(%rsp)
-	je error_bad_iret
-	movl %ecx,%eax	/* zero extend */
-	cmpq %rax,RIP+8(%rsp)
-	je bstep_iret
-	cmpq $gs_change,RIP+8(%rsp)
-	je error_swapgs
-	jmp error_sti
+	incl	%ebx
+	leaq	native_irq_return_iret(%rip),%rcx
+	cmpq	%rcx,RIP+8(%rsp)
+	je	error_bad_iret
+	movl	%ecx,%eax	/* zero extend */
+	cmpq	%rax,RIP+8(%rsp)
+	je	bstep_iret
+	cmpq	$gs_change,RIP+8(%rsp)
+	je	error_swapgs
+	jmp	error_sti
 
 bstep_iret:
 	/* Fix truncated RIP */
-	movq %rcx,RIP+8(%rsp)
+	movq	%rcx,RIP+8(%rsp)
 	/* fall through */
 
 error_bad_iret:
 	SWAPGS
-	mov %rsp,%rdi
-	call fixup_bad_iret
-	mov %rax,%rsp
-	decl %ebx	/* Return to usergs */
-	jmp error_sti
+	mov	%rsp,%rdi
+	call	fixup_bad_iret
+	mov	%rax,%rsp
+	decl	%ebx	/* Return to usergs */
+	jmp	error_sti
 	CFI_ENDPROC
 END(error_entry)
 
 
-/* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */
+/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
 ENTRY(error_exit)
 	DEFAULT_FRAME
 	movl %ebx,%eax
@@ -1568,13 +1572,13 @@ end_repeat_nmi:
 	ALLOC_PT_GPREGS_ON_STACK
 
 	/*
-	 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
+	 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
 	 * as we should not be calling schedule in NMI context.
 	 * Even with normal interrupts enabled. An NMI should not be
 	 * setting NEED_RESCHED or anything that normal interrupts and
 	 * exceptions might do.
 	 */
-	call save_paranoid
+	call paranoid_entry
 	DEFAULT_FRAME 0
 
 	/*
-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 08/11] x86: entry_64.S: fold test_in_nmi macro into its only user
  2015-01-14 21:48 [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs" Denys Vlasenko
                   ` (5 preceding siblings ...)
  2015-01-14 21:48 ` [PATCH 07/11] x86: entry_64.S: rename save_paranoid to paranoid_entry, no code changes Denys Vlasenko
@ 2015-01-14 21:48 ` Denys Vlasenko
  2015-02-11 20:40   ` Andy Lutomirski
  2015-01-14 21:48 ` [PATCH 09/11] x86: get rid of KERNEL_STACK_OFFSET Denys Vlasenko
                   ` (4 subsequent siblings)
  11 siblings, 1 reply; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-14 21:48 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, linux-kernel

No code changes.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/kernel/entry_64.S | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index aacfa2c..71036fe 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1365,19 +1365,7 @@ ENTRY(error_exit)
 	CFI_ENDPROC
 END(error_exit)
 
-/*
- * Test if a given stack is an NMI stack or not.
- */
-	.macro test_in_nmi reg stack nmi_ret normal_ret
-	cmpq %\reg, \stack
-	ja \normal_ret
-	subq $EXCEPTION_STKSZ, %\reg
-	cmpq %\reg, \stack
-	jb \normal_ret
-	jmp \nmi_ret
-	.endm
-
-	/* runs on exception stack */
+/* Runs on exception stack */
 ENTRY(nmi)
 	INTR_FRAME
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
@@ -1438,8 +1426,14 @@ ENTRY(nmi)
 	 * We check the variable because the first NMI could be in a
 	 * breakpoint routine using a breakpoint stack.
 	 */
-	lea 6*8(%rsp), %rdx
-	test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
+	lea	6*8(%rsp), %rdx
+	cmpq	%rdx, 4*8(%rsp)
+	ja	first_nmi
+	subq	$EXCEPTION_STKSZ, %rdx
+	cmpq	%rdx, 4*8(%rsp)
+	jb	first_nmi
+	jmp	nested_nmi
+
 	CFI_REMEMBER_STATE
 
 nested_nmi:
-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 09/11] x86: get rid of KERNEL_STACK_OFFSET
  2015-01-14 21:48 [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs" Denys Vlasenko
                   ` (6 preceding siblings ...)
  2015-01-14 21:48 ` [PATCH 08/11] x86: entry_64.S: fold test_in_nmi macro into its only user Denys Vlasenko
@ 2015-01-14 21:48 ` Denys Vlasenko
  2015-01-14 21:48 ` [PATCH 10/11] x86: ia32entry.S: fold IA32_ARG_FIXUP macro into its callers Denys Vlasenko
                   ` (3 subsequent siblings)
  11 siblings, 0 replies; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-14 21:48 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, linux-kernel

PER_CPU_VAR(kernel_stack) was set up in a way where it points
five stack slots below the top of stack.

Presumably, it was done to avoid one "sub $5*8,%rsp"
in syscall/sysenter code paths, where iret frame needs to be
created by hand.

Ironically, none of them benefit from this optimization,
since all of them need to allocate additional data on stack
(struct pt_regs), so they still have to perform subtraction.
And ia32_sysenter_target even needs to *undo* this optimization:
it constructs iret stack with pushes instead of movs,
so it needs to start right at the top.

This patch eliminates KERNEL_STACK_OFFSET.
PER_CPU_VAR(kernel_stack) now points directly to top of stack.
pt_regs allocations are adjusted to allocate iret frame as well.

Semi-mysterious expressions THREAD_INFO(%rsp,RIP) - "why RIP??"
are now replaced by more logical THREAD_INFO(%rsp,SIZEOF_PTREGS) -
"rsp is SIZEOF_PTREGS bytes below the top, calculate
thread_info's address using that information"

Net result in code is that one instruction is eliminated,
and constants in several others have changed.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/ia32/ia32entry.S          | 35 +++++++++++++++++------------------
 arch/x86/include/asm/calling.h     |  2 ++
 arch/x86/include/asm/thread_info.h |  8 +++-----
 arch/x86/kernel/cpu/common.c       |  2 +-
 arch/x86/kernel/entry_64.S         |  9 ++++-----
 arch/x86/kernel/process_32.c       |  3 +--
 arch/x86/kernel/process_64.c       |  3 +--
 arch/x86/kernel/smpboot.c          |  3 +--
 arch/x86/xen/smp.c                 |  3 +--
 9 files changed, 31 insertions(+), 37 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 366f633..ed58fe1 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -125,7 +125,6 @@ ENTRY(ia32_sysenter_target)
 	CFI_REGISTER	rsp,rbp
 	SWAPGS_UNSAFE_STACK
 	movq	PER_CPU_VAR(kernel_stack), %rsp
-	addq	$(KERNEL_STACK_OFFSET),%rsp
 	/*
 	 * No need to follow this irqs on/off section: the syscall
 	 * disabled irqs, here we enable it straight after entry:
@@ -139,7 +138,7 @@ ENTRY(ia32_sysenter_target)
 	CFI_REL_OFFSET rsp,0
 	pushfq_cfi
 	/*CFI_REL_OFFSET rflags,0*/
-	movl	TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
+	movl	TI_sysenter_return+THREAD_INFO(%rsp,3*8),%r10d
 	CFI_REGISTER rip,r10
 	pushq_cfi $__USER32_CS
 	/*CFI_REL_OFFSET cs,0*/
@@ -171,8 +170,8 @@ ENTRY(ia32_sysenter_target)
 	jnz sysenter_fix_flags
 sysenter_flags_fixed:
 
-	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP)
-	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP)
+	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,SIZEOF_PTREGS)
+	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
 	CFI_REMEMBER_STATE
 	jnz  sysenter_tracesys
 	cmpq	$(IA32_NR_syscalls-1),%rax
@@ -184,10 +183,10 @@ sysenter_dispatch:
 	movq	%rax,RAX(%rsp)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl	$_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP)
+	testl	$_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
 	jnz	sysexit_audit
 sysexit_from_sys_call:
-	andl    $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP)
+	andl    $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,SIZEOF_PTREGS)
 	/* clear IF, that popfq doesn't enable interrupts early */
 	andl	$~0x200,EFLAGS(%rsp)
 	movl	RIP(%rsp),%edx		/* User %eip */
@@ -232,7 +231,7 @@ sysexit_from_sys_call:
 	.endm
 
 	.macro auditsys_exit exit
-	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP)
+	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
 	jnz ia32_ret_from_sys_call
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
@@ -247,7 +246,7 @@ sysexit_from_sys_call:
 	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl %edi,TI_flags+THREAD_INFO(%rsp,RIP)
+	testl %edi,TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
 	jz \exit
 	CLEAR_RREGS
 	jmp int_with_check
@@ -269,7 +268,7 @@ sysenter_fix_flags:
 
 sysenter_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP)
+	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
 	jz	sysenter_auditsys
 #endif
 	SAVE_EXTRA_REGS
@@ -318,7 +317,7 @@ ENDPROC(ia32_sysenter_target)
 ENTRY(ia32_cstar_target)
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,KERNEL_STACK_OFFSET
+	CFI_DEF_CFA	rsp,0
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
 	SWAPGS_UNSAFE_STACK
@@ -330,7 +329,7 @@ ENTRY(ia32_cstar_target)
 	 * disabled irqs and here we enable it straight after entry:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	ALLOC_PT_GPREGS_ON_STACK 8	/* +8: space for orig_ax */
+	ALLOC_PT_GPREGS_ON_STACK 6*8 /* 6*8: space for orig_ax and iret frame */
 	SAVE_C_REGS_EXCEPT_RCX_R891011
 	movl 	%eax,%eax	/* zero extension */
 	movq	%rax,ORIG_RAX(%rsp)
@@ -353,8 +352,8 @@ ENTRY(ia32_cstar_target)
 1:	movl	(%r8),%r9d
 	_ASM_EXTABLE(1b,ia32_badarg)
 	ASM_CLAC
-	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP)
-	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP)
+	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,SIZEOF_PTREGS)
+	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
 	CFI_REMEMBER_STATE
 	jnz   cstar_tracesys
 	cmpq $IA32_NR_syscalls-1,%rax
@@ -366,10 +365,10 @@ cstar_dispatch:
 	movq %rax,RAX(%rsp)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP)
+	testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
 	jnz sysretl_audit
 sysretl_from_sys_call:
-	andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP)
+	andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,SIZEOF_PTREGS)
 	RESTORE_RSI_RDI_RDX
 	movl RIP(%rsp),%ecx
 	CFI_REGISTER rip,rcx
@@ -404,7 +403,7 @@ sysretl_audit:
 
 cstar_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP)
+	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
 	jz cstar_auditsys
 #endif
 	xchgl %r9d,%ebp
@@ -471,8 +470,8 @@ ENTRY(ia32_syscall)
 	   this could be a problem. */
 	ALLOC_PT_GPREGS_ON_STACK
 	SAVE_C_REGS_EXCEPT_R891011
-	orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP)
-	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP)
+	orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,SIZEOF_PTREGS)
+	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
 	jnz ia32_tracesys
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja ia32_badsys
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 3cd7e31..9f0de92 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -88,6 +88,8 @@ For 32-bit we have the following conventions - kernel is built with
 #define RSP		19*8
 #define SS		20*8
 
+#define SIZEOF_PTREGS	21*8
+
 	.macro ALLOC_PT_GPREGS_ON_STACK addskip=0
 	subq	$15*8+\addskip, %rsp
 	CFI_ADJUST_CFA_OFFSET 15*8+\addskip
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e82e95a..3eb9d05 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -149,7 +149,6 @@ struct thread_info {
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
 
 #define STACK_WARN		(THREAD_SIZE/8)
-#define KERNEL_STACK_OFFSET	(5*(BITS_PER_LONG/8))
 
 /*
  * macros/functions for gaining access to the thread information structure
@@ -163,8 +162,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
 static inline struct thread_info *current_thread_info(void)
 {
 	struct thread_info *ti;
-	ti = (void *)(this_cpu_read_stable(kernel_stack) +
-		      KERNEL_STACK_OFFSET - THREAD_SIZE);
+	ti = (void *)(this_cpu_read_stable(kernel_stack) - THREAD_SIZE);
 	return ti;
 }
 
@@ -184,13 +182,13 @@ static inline unsigned long current_stack_pointer(void)
 /* how to get the thread information struct from ASM */
 #define GET_THREAD_INFO(reg) \
 	_ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
-	_ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ;
+	_ASM_SUB $(THREAD_SIZE),reg ;
 
 /*
  * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
  * a certain register (to be used in assembler memory operands).
  */
-#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg)
+#define THREAD_INFO(reg, off) (off)-THREAD_SIZE(reg)
 
 #endif
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c604965..aa76c69 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1116,7 +1116,7 @@ static __init int setup_disablecpuid(char *arg)
 __setup("clearcpuid=", setup_disablecpuid);
 
 DEFINE_PER_CPU(unsigned long, kernel_stack) =
-	(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
+	(unsigned long)&init_thread_union + THREAD_SIZE;
 EXPORT_PER_CPU_SYMBOL(kernel_stack);
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 71036fe..279af0e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -236,7 +236,7 @@ ENDPROC(native_usergs_sysret64)
 ENTRY(system_call)
 	CFI_STARTPROC	simple
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,KERNEL_STACK_OFFSET
+	CFI_DEF_CFA	rsp,0
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
 	SWAPGS_UNSAFE_STACK
@@ -248,20 +248,19 @@ ENTRY(system_call)
 GLOBAL(system_call_after_swapgs)
 
 	movq	%rsp,PER_CPU_VAR(old_rsp)
-	/* kernel_stack is set so that 5 slots (iret frame) are preallocated */
 	movq	PER_CPU_VAR(kernel_stack),%rsp
 	/*
 	 * No need to follow this irqs off/on section - it's straight
 	 * and short:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	ALLOC_PT_GPREGS_ON_STACK 8		/* +8: space for orig_ax */
+	ALLOC_PT_GPREGS_ON_STACK 6*8 /* 6*8: space for orig_ax and iret frame */
 	SAVE_C_REGS_EXCEPT_RAX_RCX
 	movq	$-ENOSYS,RAX(%rsp)
 	movq_cfi rax,(ORIG_RAX)
 	movq	%rcx,RIP(%rsp)
 	CFI_REL_OFFSET rip,RIP
-	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP)
+	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
 	jnz tracesys
 system_call_fastpath:
 #if __SYSCALL_MASK == ~0
@@ -285,7 +284,7 @@ sysret_check:
 	LOCKDEP_SYS_EXIT
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	movl TI_flags+THREAD_INFO(%rsp,RIP),%edx
+	movl TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS),%edx
 	andl %edi,%edx
 	jnz  sysret_careful
 	CFI_REMEMBER_STATE
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8f3ebfe..ecda2bd 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -311,8 +311,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	arch_end_context_switch(next_p);
 
 	this_cpu_write(kernel_stack,
-		  (unsigned long)task_stack_page(next_p) +
-		  THREAD_SIZE - KERNEL_STACK_OFFSET);
+		(unsigned long)task_stack_page(next_p) + THREAD_SIZE);
 
 	/*
 	 * Restore %gs if needed (which is common)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 5a2c029..975d342 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -414,8 +414,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
 
 	this_cpu_write(kernel_stack,
-		  (unsigned long)task_stack_page(next_p) +
-		  THREAD_SIZE - KERNEL_STACK_OFFSET);
+		(unsigned long)task_stack_page(next_p) + THREAD_SIZE);
 
 	/*
 	 * Now maybe reload the debug registers and handle I/O bitmaps
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6d7022c..66fd449 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -776,8 +776,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 	initial_gs = per_cpu_offset(cpu);
 #endif
 	per_cpu(kernel_stack, cpu) =
-		(unsigned long)task_stack_page(idle) -
-		KERNEL_STACK_OFFSET + THREAD_SIZE;
+		(unsigned long)task_stack_page(idle) + THREAD_SIZE;
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 	initial_code = (unsigned long)start_secondary;
 	stack_start  = idle->thread.sp;
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 4c071ae..4e71a3b 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -452,8 +452,7 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
 	clear_tsk_thread_flag(idle, TIF_FORK);
 #endif
 	per_cpu(kernel_stack, cpu) =
-		(unsigned long)task_stack_page(idle) -
-		KERNEL_STACK_OFFSET + THREAD_SIZE;
+		(unsigned long)task_stack_page(idle) + THREAD_SIZE;
 
 	xen_setup_runstate_info(cpu);
 	xen_setup_timer(cpu);
-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 10/11] x86: ia32entry.S: fold IA32_ARG_FIXUP macro into its callers
  2015-01-14 21:48 [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs" Denys Vlasenko
                   ` (7 preceding siblings ...)
  2015-01-14 21:48 ` [PATCH 09/11] x86: get rid of KERNEL_STACK_OFFSET Denys Vlasenko
@ 2015-01-14 21:48 ` Denys Vlasenko
  2015-01-14 21:48 ` [PATCH 11/11] x86: entry_64.S: use more understandable constants Denys Vlasenko
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-14 21:48 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, linux-kernel

Use of a small macro - one with conditional expansion - does more harm
than good. It obfuscates code, with minimal code reuse.

For example, because of obfuscation it's not obvious that
in ia32_sysenter_target, we can optimize loading of r9 -
currently it is loaded with a detour through ebp.

This patch folds IA32_ARG_FIXUP macro into its callers.

No code changes.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/ia32/ia32entry.S | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index ed58fe1..55b785c 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -30,17 +30,6 @@
 
 	.section .entry.text, "ax"
 
-	.macro IA32_ARG_FIXUP noebp=0
-	movl	%edi,%r8d
-	.if \noebp
-	.else
-	movl	%ebp,%r9d
-	.endif
-	xchg	%ecx,%esi
-	movl	%ebx,%edi
-	movl	%edx,%edx	/* zero extension */
-	.endm 
-
 	/* clobbers %rax */
 	.macro  CLEAR_RREGS _r9=rax
 	xorl 	%eax,%eax
@@ -177,7 +166,12 @@ sysenter_flags_fixed:
 	cmpq	$(IA32_NR_syscalls-1),%rax
 	ja	ia32_badsys
 sysenter_do_call:
-	IA32_ARG_FIXUP
+	/* 32bit syscall -> 64bit C ABI argument conversion */
+	movl	%edi,%r8d	/* arg5 */
+	movl	%ebp,%r9d	/* arg6 */
+	xchg	%ecx,%esi	/* rsi:arg2, rcx:arg4 */
+	movl	%ebx,%edi	/* arg1 */
+	movl	%edx,%edx	/* arg3 (zero extension) */
 sysenter_dispatch:
 	call	*ia32_sys_call_table(,%rax,8)
 	movq	%rax,RAX(%rsp)
@@ -359,7 +353,12 @@ ENTRY(ia32_cstar_target)
 	cmpq $IA32_NR_syscalls-1,%rax
 	ja  ia32_badsys
 cstar_do_call:
-	IA32_ARG_FIXUP 1
+	/* 32bit syscall -> 64bit C ABI argument conversion */
+	movl	%edi,%r8d	/* arg5 */
+	/* r9 already loaded */	/* arg6 */
+	xchg	%ecx,%esi	/* rsi:arg2, rcx:arg4 */
+	movl	%ebx,%edi	/* arg1 */
+	movl	%edx,%edx	/* arg3 (zero extension) */
 cstar_dispatch:
 	call *ia32_sys_call_table(,%rax,8)
 	movq %rax,RAX(%rsp)
@@ -476,7 +475,12 @@ ENTRY(ia32_syscall)
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja ia32_badsys
 ia32_do_call:
-	IA32_ARG_FIXUP
+	/* 32bit syscall -> 64bit C ABI argument conversion */
+	movl %edi,%r8d	/* arg5 */
+	movl %ebp,%r9d	/* arg6 */
+	xchg %ecx,%esi	/* rsi:arg2, rcx:arg4 */
+	movl %ebx,%edi	/* arg1 */
+	movl %edx,%edx	/* arg3 (zero extension) */
 	call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
 ia32_sysret:
 	movq %rax,RAX(%rsp)
-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 11/11] x86: entry_64.S: use more understandable constants
  2015-01-14 21:48 [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs" Denys Vlasenko
                   ` (8 preceding siblings ...)
  2015-01-14 21:48 ` [PATCH 10/11] x86: ia32entry.S: fold IA32_ARG_FIXUP macro into its callers Denys Vlasenko
@ 2015-01-14 21:48 ` Denys Vlasenko
  2015-01-14 22:17 ` [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs" Denys Vlasenko
  2015-02-11 20:30 ` Andy Lutomirski
  11 siblings, 0 replies; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-14 21:48 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, linux-kernel

Constants such as SS+8 or SS+8-RIP are mysterious.
In most cases, SS+8 is just meant to be SIZEOF_PTREGS,
SS+8-RIP is RIP's offset in iret frame.

This patch changes some of these constants to be less mysterious.
No code changes (verified with objdump).

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/kernel/entry_64.S | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 279af0e..d84c7c7 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -163,12 +163,12 @@ ENDPROC(native_usergs_sysret64)
  * initial frame state for interrupts (and exceptions without error code)
  */
 	.macro INTR_FRAME start=1 offset=0
-	EMPTY_FRAME \start, SS+8+\offset-RIP
-	/*CFI_REL_OFFSET ss, SS+\offset-RIP*/
-	CFI_REL_OFFSET rsp, RSP+\offset-RIP
-	/*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
-	/*CFI_REL_OFFSET cs, CS+\offset-RIP*/
-	CFI_REL_OFFSET rip, RIP+\offset-RIP
+	EMPTY_FRAME \start, 5*8+\offset
+	/*CFI_REL_OFFSET ss, 4*8+\offset*/
+	CFI_REL_OFFSET rsp, 3*8+\offset
+	/*CFI_REL_OFFSET rflags, 2*8+\offset*/
+	/*CFI_REL_OFFSET cs, 1*8+\offset*/
+	CFI_REL_OFFSET rip, 0*8+\offset
 	.endm
 
 /*
@@ -176,7 +176,7 @@ ENDPROC(native_usergs_sysret64)
  * with vector already pushed)
  */
 	.macro XCPT_FRAME start=1 offset=0
-	INTR_FRAME \start, RIP+\offset-ORIG_RAX
+	INTR_FRAME \start, 1*8+\offset
 	.endm
 
 /*
@@ -685,10 +685,14 @@ END(interrupt)
 	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
 	CFI_DEF_CFA_REGISTER	rsi
 	pushq %rsi
+	/*
+	 * For debugger:
+	 * "CFA (Current Frame Address) is the value on stack + offset"
+	 */
 	CFI_ESCAPE	0x0f /* DW_CFA_def_cfa_expression */, 6, \
-			0x77 /* DW_OP_breg7 */, 0, \
+			0x77 /* DW_OP_breg7 (rsp) */, 0, \
 			0x06 /* DW_OP_deref */, \
-			0x08 /* DW_OP_const1u */, SS+8-RBP, \
+			0x08 /* DW_OP_const1u */, SIZEOF_PTREGS-RBP, \
 			0x22 /* DW_OP_plus */
 	/* We entered an interrupt context - irqs are off: */
 	TRACE_IRQS_OFF
@@ -714,7 +718,7 @@ ret_from_intr:
 
 	/* Restore saved previous stack */
 	popq %rsi
-	CFI_DEF_CFA rsi,SS+8-RBP	/* reg/off reset after def_cfa_expr */
+	CFI_DEF_CFA rsi,SIZEOF_PTREGS-RBP /* reg/off reset after def_cfa_expr */
 	/* return code expects complete pt_regs - adjust rsp accordingly: */
 	leaq -RBP(%rsi), %rsp
 	CFI_DEF_CFA_REGISTER	rsp
@@ -1525,7 +1529,7 @@ first_nmi:
 	.rept 5
 	pushq_cfi 11*8(%rsp)
 	.endr
-	CFI_DEF_CFA_OFFSET SS+8-RIP
+	CFI_DEF_CFA_OFFSET 5*8
 
 	/* Everything up to here is safe from nested NMIs */
 
@@ -1553,7 +1557,7 @@ repeat_nmi:
 	pushq_cfi -6*8(%rsp)
 	.endr
 	subq $(5*8), %rsp
-	CFI_DEF_CFA_OFFSET SS+8-RIP
+	CFI_DEF_CFA_OFFSET 5*8
 end_repeat_nmi:
 
 	/*
-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-01-14 21:48 [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs" Denys Vlasenko
                   ` (9 preceding siblings ...)
  2015-01-14 21:48 ` [PATCH 11/11] x86: entry_64.S: use more understandable constants Denys Vlasenko
@ 2015-01-14 22:17 ` Denys Vlasenko
  2015-01-14 22:29   ` Andy Lutomirski
  2015-02-11 20:30 ` Andy Lutomirski
  11 siblings, 1 reply; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-14 22:17 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Linus Torvalds, Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Will Drewry, Kees Cook,
	Linux Kernel Mailing List

On Wed, Jan 14, 2015 at 10:48 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> Macros which manipulate "struct pt_regs" on stack are reworked:
> ALLOC_PT_GPREGS_ON_STACK allocates the structure.
> SAVE_C_REGS saves to it those registers which are clobbered by C code.
> SAVE_EXTRA_REGS saves to it all other registers.
> Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros reverse it.

Andy,

Please note that this patch set also needs the previously posted patch:

[PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks

which is not present in your tree - otherwise arch/x86/lib/thunk_64.S
will not compile (no SAVE_ARGS anymore).

-- 
vda

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-01-14 22:17 ` [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs" Denys Vlasenko
@ 2015-01-14 22:29   ` Andy Lutomirski
  2015-01-14 22:41     ` Borislav Petkov
  0 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-01-14 22:29 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Linus Torvalds, Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Will Drewry, Kees Cook,
	Linux Kernel Mailing List

On Wed, Jan 14, 2015 at 2:17 PM, Denys Vlasenko
<vda.linux@googlemail.com> wrote:
> On Wed, Jan 14, 2015 at 10:48 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
>> Macros which manipulate "struct pt_regs" on stack are reworked:
>> ALLOC_PT_GPREGS_ON_STACK allocates the structure.
>> SAVE_C_REGS saves to it those registers which are clobbered by C code.
>> SAVE_EXTRA_REGS saves to it all other registers.
>> Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros reverse it.
>
> Andy,
>
> Please note that this patch set also needs the previously posted patch:
>
> [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
>
> which is not present in your tree - otherwise arch/x86/lib/thunk_64.S
> will not compile (no SAVE_ARGS anymore).

I need to go through Linus' and everyone else's comments on that to
figure out what to do first.

--Andy

>
> --
> vda



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-01-14 22:29   ` Andy Lutomirski
@ 2015-01-14 22:41     ` Borislav Petkov
  2015-01-14 22:50       ` Denys Vlasenko
  0 siblings, 1 reply; 130+ messages in thread
From: Borislav Petkov @ 2015-01-14 22:41 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Will Drewry, Kees Cook,
	Linux Kernel Mailing List

On Wed, Jan 14, 2015 at 02:29:39PM -0800, Andy Lutomirski wrote:
> I need to go through Linus' and everyone else's comments on that to
> figure out what to do first.

Right, we want to take it slow and be conservative here - entry_XX.S is
not a joke.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-01-14 22:41     ` Borislav Petkov
@ 2015-01-14 22:50       ` Denys Vlasenko
  0 siblings, 0 replies; 130+ messages in thread
From: Denys Vlasenko @ 2015-01-14 22:50 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Andy Lutomirski, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Will Drewry, Kees Cook,
	Linux Kernel Mailing List

On Wed, Jan 14, 2015 at 11:41 PM, Borislav Petkov <bp@alien8.de> wrote:
> On Wed, Jan 14, 2015 at 02:29:39PM -0800, Andy Lutomirski wrote:
>> I need to go through Linus' and everyone else's comments on that to
>> figure out what to do first.
>
> Right, we want to take it slow and be conservative here - entry_XX.S is
> not a joke.

No problem - I just wanted you to know what I have in the pipeline.

There are four more optimization patches on top of this patchset,
I'm not posting those because I see they will definitely collide
with some of Andy's patches.

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks
  2015-01-10 22:00 ` [PATCH 3/4] x86: open-code register save/restore in trace_hardirqs thunks Denys Vlasenko
  2015-01-10 22:07   ` Linus Torvalds
@ 2015-02-11  2:38   ` Andy Lutomirski
  1 sibling, 0 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-11  2:38 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: linux-kernel, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Borislav Petkov, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Sat, Jan 10, 2015 at 2:00 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> This is a preparatory patch for change in "struct pt_regs"
> handling in entry_64.S.
>
> trace_hardirqs thunks were (ab)using a part of pt_regs
> handling code, namely SAVE_ARGS/RESTORE_ARGS macros,
> to save/restore registers across C function calls.
>
> Since SAVE_ARGS is going to be changed, open-code
> register saving/restoring here.
>
> Incidentally, this removes a bit of dead code:
> one SAVE_ARGS was used just to emit a CFI annotation,
> but it also generated unreachable assembly insns.
>
> Take a page from thunk_32.S and use push/pop insns
> instead of movq, they are far shorter:
> 1 or 2 bytes versus 5, and no need for insns to adjust %rsp:
>
>    text    data     bss     dec     hex filename
>     333      40       0     373     175 thunk_64_movq.o
>     104      40       0     144      90 thunk_64_push_pop.o
>
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Linus Torvalds <torvalds@linux-foundation.org>
> CC: Oleg Nesterov <oleg@redhat.com>
> CC: "H. Peter Anvin" <hpa@zytor.com>
> CC: Borislav Petkov <bp@alien8.de>
> CC: Andy Lutomirski <luto@amacapital.net>
> CC: Frederic Weisbecker <fweisbec@gmail.com>
> CC: X86 ML <x86@kernel.org>
> CC: Alexei Starovoitov <ast@plumgrid.com>
> CC: Will Drewry <wad@chromium.org>
> CC: Kees Cook <keescook@chromium.org>
> CC: linux-kernel@vger.kernel.org
> ---
>  arch/x86/lib/thunk_64.S | 46 ++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 42 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
> index b30b5eb..8ec443a 100644
> --- a/arch/x86/lib/thunk_64.S
> +++ b/arch/x86/lib/thunk_64.S
> @@ -17,9 +17,27 @@
>         CFI_STARTPROC
>
>         /* this one pushes 9 elems, the next one would be %rIP */
> -       SAVE_ARGS
> +       pushq_cfi %rdi
> +       CFI_REL_OFFSET rdi, 0
> +       pushq_cfi %rsi
> +       CFI_REL_OFFSET rsi, 0
> +       pushq_cfi %rdx
> +       CFI_REL_OFFSET rdx, 0
> +       pushq_cfi %rcx
> +       CFI_REL_OFFSET rcx, 0
> +       pushq_cfi %rax
> +       CFI_REL_OFFSET rax, 0
> +       pushq_cfi %r8
> +       CFI_REL_OFFSET r8, 0
> +       pushq_cfi %r9
> +       CFI_REL_OFFSET r9, 0
> +       pushq_cfi %r10
> +       CFI_REL_OFFSET r10, 0
> +       pushq_cfi %r11
> +       CFI_REL_OFFSET r11, 0
>
>         .if \put_ret_addr_in_rdi
> +       /* 9*8(%rsp) is return addr on stack */
>         movq_cfi_restore 9*8, rdi
>         .endif
>
> @@ -45,11 +63,31 @@
>  #endif
>  #endif
>
> -       /* SAVE_ARGS below is used only for the .cfi directives it contains. */
> +#if defined(CONFIG_TRACE_IRQFLAGS) \
> + || defined(CONFIG_DEBUG_LOCK_ALLOC) \
> + || defined(CONFIG_PREEMPT)
>         CFI_STARTPROC
> -       SAVE_ARGS
> +       CFI_ADJUST_CFA_OFFSET 9*8
>  restore:
> -       RESTORE_ARGS
> +       popq_cfi %r11
> +       CFI_RESTORE r11
> +       popq_cfi %r10
> +       CFI_RESTORE r10
> +       popq_cfi %r9
> +       CFI_RESTORE r9
> +       popq_cfi %r8
> +       CFI_RESTORE r8
> +       popq_cfi %rax
> +       CFI_RESTORE rax
> +       popq_cfi %rcx
> +       CFI_RESTORE rcx
> +       popq_cfi %rdx
> +       CFI_RESTORE rdx
> +       popq_cfi %rsi
> +       CFI_RESTORE rsi
> +       popq_cfi %rdi
> +       CFI_RESTORE rdi
>         ret
>         CFI_ENDPROC
>         _ASM_NOKPROBE(restore)
> +#endif


Applied.  I'll deal with the cleanup as a followup tomorrow.

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE
  2015-01-11 23:07 [PATCH] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE Denys Vlasenko
  2015-01-12  0:38 ` Andy Lutomirski
  2015-01-12 19:23 ` Borislav Petkov
@ 2015-02-11 20:24 ` Andy Lutomirski
  2 siblings, 0 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-11 20:24 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: linux-kernel, Linus Torvalds, Oleg Nesterov, H. Peter Anvin,
	Borislav Petkov, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook

On Sun, Jan 11, 2015 at 3:07 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> Sequences
>         pushl_cfi %reg
>         CFI_REL_OFFSET reg, 0
> and
>         popl_cfi %reg
>         CFI_RESTORE reg
> happen quite often. This patch adds macros which generate them.
>
> No assembly changes (verified with objdump -dr vmlinux.o).

Applied as-is.  I'm sure that there's an even better way to do this,
but I think that this patch is a clear improvement.

--Andy

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-01-14 21:48 [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs" Denys Vlasenko
                   ` (10 preceding siblings ...)
  2015-01-14 22:17 ` [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs" Denys Vlasenko
@ 2015-02-11 20:30 ` Andy Lutomirski
  2015-02-11 21:55   ` Denys Vlasenko
  11 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-11 20:30 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Linus Torvalds, Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, linux-kernel

On Wed, Jan 14, 2015 at 1:48 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> 64-bit code was using six stack slots less by not saving/restoring
> registers which are callee-preserved according to C ABI,
> and not allocating space for them.
> Only when syscall needed a complete "struct pt_regs",
> the complete area was allocated and filled in.
> As an additional twist, on interrupt entry a "slightly less truncated pt_regs"
> trick is used, to make nested interrupt stacks easier to unwind.
>
> This proved to be a source of significant obfuscation and subtle bugs.
> For example, stub_fork had to pop the return address,
> extend the struct, save registers, and push return address back. Ugly.
> ia32_ptregs_common pops return address and "returns" via jmp insn,
> throwing a wrench into CPU return stack cache.
>
> This patch changes code to always allocate a complete "struct pt_regs".
> The saving of registers is still done lazily.
>
> "Partial pt_regs" trick on interrupt stack is retained, but with added comments
> which explain what we are doing, and why. Existing comments are improved.
>
> Macros which manipulate "struct pt_regs" on stack are reworked:
> ALLOC_PT_GPREGS_ON_STACK allocates the structure.
> SAVE_C_REGS saves to it those registers which are clobbered by C code.
> SAVE_EXTRA_REGS saves to it all other registers.
> Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros reverse it.
>
> ia32_ptregs_common, stub_fork and friends lost their ugly dance with
> return pointer.
>
> LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
> instead of magic numbers.
>
> error_entry and save_paranoid now use SAVE_C_REGS + SAVE_EXTRA_REGS
> instead of having it open-coded yet again.
>
> Misleading and slightly wrong comments in "struct pt_regs" are fixed
> (four instances).
>
> Patch was run-tested: 64-bit executables, 32-bit executables,
> strace works.
> Timing tests did not show measurable difference in 32-bit
> and 64-bit syscalls.

I like this.  However, can you split it?  I think it would be easier
to review and more bisect-friendly if there were first a patch to do
all the macro and comment cleanup without any layout changes followed
by bite-sized layout changes.  As it stands, I can't just go through
this to make sure that nothing is changing, because things are
changing.

--Andy

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 02/11] x86: code shrink in paranoid_exit
  2015-01-14 21:48 ` [PATCH 02/11] x86: code shrink in paranoid_exit Denys Vlasenko
@ 2015-02-11 20:36   ` Andy Lutomirski
  2015-02-11 21:01     ` H. Peter Anvin
  2015-02-11 21:13     ` Denys Vlasenko
  2015-02-18 23:26   ` Andy Lutomirski
  1 sibling, 2 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-11 20:36 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Linus Torvalds, Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, linux-kernel

On Wed, Jan 14, 2015 at 1:48 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> RESTORE_EXTRA_REGS + RESTORE_C_REGS looks small, but it's
> a lot of instructions (fourteen). Let's reuse them.
>
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Linus Torvalds <torvalds@linux-foundation.org>
> CC: Oleg Nesterov <oleg@redhat.com>
> CC: Borislav Petkov <bp@alien8.de>
> CC: "H. Peter Anvin" <hpa@zytor.com>
> CC: Andy Lutomirski <luto@amacapital.net>
> CC: Frederic Weisbecker <fweisbec@gmail.com>
> CC: X86 ML <x86@kernel.org>
> CC: Alexei Starovoitov <ast@plumgrid.com>
> CC: Will Drewry <wad@chromium.org>
> CC: Kees Cook <keescook@chromium.org>
> CC: linux-kernel@vger.kernel.org
> ---
>  arch/x86/kernel/entry_64.S | 6 ++----
>  1 file changed, 2 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index a21b5b3..f5e815e 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -1270,12 +1270,10 @@ ENTRY(paranoid_exit)
>         jnz paranoid_restore
>         TRACE_IRQS_IRETQ 0
>         SWAPGS_UNSAFE_STACK
> -       RESTORE_EXTRA_REGS
> -       RESTORE_C_REGS
> -       REMOVE_PT_GPREGS_FROM_STACK 8
> -       INTERRUPT_RETURN
> +       jmp paranoid_restore1
>  paranoid_restore:
>         TRACE_IRQS_IRETQ_DEBUG 0
> +paranoid_restore1:
>         RESTORE_EXTRA_REGS
>         RESTORE_C_REGS
>         REMOVE_PT_GPREGS_FROM_STACK 8

This is sort of a reply to the wrong thread, but wouldn't it be nicer
if we could pop a bunch of regs instead of using mov followed by add?
(Yes, this could be a followup, but it could be easier to spot now by
changing macros like RESTORE_XYZ.)

--Andy

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 07/11] x86: entry_64.S: rename save_paranoid to paranoid_entry, no code changes
  2015-01-14 21:48 ` [PATCH 07/11] x86: entry_64.S: rename save_paranoid to paranoid_entry, no code changes Denys Vlasenko
@ 2015-02-11 20:39   ` Andy Lutomirski
  0 siblings, 0 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-11 20:39 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Linus Torvalds, Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, linux-kernel

On Wed, Jan 14, 2015 at 1:48 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> This patch does a lot of cleanup in comments and formatting,
> but it does not change any code.
>
> Rename save_paranoid to paranoid_entry: this makes naming
> similar to its "non-paranoid" sibling, error_entry,
> and to its counterpart, paranoid_exit.
>
> Use the same CFI annotation atop paranoid_entry and error_entry.
>
> Fix irregular indentation of assembler operands.
>
> Add/fix comments on top of paranoid_entry and error_entry.
> Remove stale comment about "oldrax".
> Make comments about "no swapgs" flag in ebx more prominent.
> Deindent wrongly indented top-level comment atop paranoid_exit.
> Indent wrongly deindented comment inside error_entry.
>
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>

Looks good.  Will apply once the rest of the series is ready.

--Andy

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 08/11] x86: entry_64.S: fold test_in_nmi macro into its only user
  2015-01-14 21:48 ` [PATCH 08/11] x86: entry_64.S: fold test_in_nmi macro into its only user Denys Vlasenko
@ 2015-02-11 20:40   ` Andy Lutomirski
  2015-02-12  2:17     ` Steven Rostedt
  0 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-11 20:40 UTC (permalink / raw)
  To: Denys Vlasenko, Steven Rostedt
  Cc: Linus Torvalds, Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, linux-kernel

On Wed, Jan 14, 2015 at 1:48 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> No code changes.

Steven, is this okay with you?

--Andy

>
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Linus Torvalds <torvalds@linux-foundation.org>
> CC: Oleg Nesterov <oleg@redhat.com>
> CC: Borislav Petkov <bp@alien8.de>
> CC: "H. Peter Anvin" <hpa@zytor.com>
> CC: Andy Lutomirski <luto@amacapital.net>
> CC: Frederic Weisbecker <fweisbec@gmail.com>
> CC: X86 ML <x86@kernel.org>
> CC: Alexei Starovoitov <ast@plumgrid.com>
> CC: Will Drewry <wad@chromium.org>
> CC: Kees Cook <keescook@chromium.org>
> CC: linux-kernel@vger.kernel.org
> ---
>  arch/x86/kernel/entry_64.S | 24 +++++++++---------------
>  1 file changed, 9 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index aacfa2c..71036fe 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -1365,19 +1365,7 @@ ENTRY(error_exit)
>         CFI_ENDPROC
>  END(error_exit)
>
> -/*
> - * Test if a given stack is an NMI stack or not.
> - */
> -       .macro test_in_nmi reg stack nmi_ret normal_ret
> -       cmpq %\reg, \stack
> -       ja \normal_ret
> -       subq $EXCEPTION_STKSZ, %\reg
> -       cmpq %\reg, \stack
> -       jb \normal_ret
> -       jmp \nmi_ret
> -       .endm
> -
> -       /* runs on exception stack */
> +/* Runs on exception stack */
>  ENTRY(nmi)
>         INTR_FRAME
>         PARAVIRT_ADJUST_EXCEPTION_FRAME
> @@ -1438,8 +1426,14 @@ ENTRY(nmi)
>          * We check the variable because the first NMI could be in a
>          * breakpoint routine using a breakpoint stack.
>          */
> -       lea 6*8(%rsp), %rdx
> -       test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
> +       lea     6*8(%rsp), %rdx
> +       cmpq    %rdx, 4*8(%rsp)
> +       ja      first_nmi
> +       subq    $EXCEPTION_STKSZ, %rdx
> +       cmpq    %rdx, 4*8(%rsp)
> +       jb      first_nmi
> +       jmp     nested_nmi
> +
>         CFI_REMEMBER_STATE
>
>  nested_nmi:
> --
> 1.8.1.4
>



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 02/11] x86: code shrink in paranoid_exit
  2015-02-11 20:36   ` Andy Lutomirski
@ 2015-02-11 21:01     ` H. Peter Anvin
  2015-02-11 21:13     ` Denys Vlasenko
  1 sibling, 0 replies; 130+ messages in thread
From: H. Peter Anvin @ 2015-02-11 21:01 UTC (permalink / raw)
  To: Andy Lutomirski, Denys Vlasenko
  Cc: Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, linux-kernel

On 02/11/2015 12:36 PM, Andy Lutomirski wrote:
> 
> This is sort of a reply to the wrong thread, but wouldn't it be nicer
> if we could pop a bunch of regs instead of using mov followed by add?
> (Yes, this could be a followup, but it could be easier to spot now by
> changing macros like RESTORE_XYZ.)
> 

As far as I recall, it was an optimization for the Pentium 4 and
possibly K8.  At this point, most CPUs have stack engines so push/pop
may be faster or at least the same speed.  It would be nice to
microbenchmark.

	-hpa



^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 02/11] x86: code shrink in paranoid_exit
  2015-02-11 20:36   ` Andy Lutomirski
  2015-02-11 21:01     ` H. Peter Anvin
@ 2015-02-11 21:13     ` Denys Vlasenko
  2015-02-11 22:09       ` Andy Lutomirski
  1 sibling, 1 reply; 130+ messages in thread
From: Denys Vlasenko @ 2015-02-11 21:13 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Linus Torvalds, Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, linux-kernel

On 02/11/2015 09:36 PM, Andy Lutomirski wrote:
> On Wed, Jan 14, 2015 at 1:48 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
>> RESTORE_EXTRA_REGS + RESTORE_C_REGS looks small, but it's
>> a lot of instructions (fourteen). Let's reuse them.
>>
>> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
>> CC: Linus Torvalds <torvalds@linux-foundation.org>
>> CC: Oleg Nesterov <oleg@redhat.com>
>> CC: Borislav Petkov <bp@alien8.de>
>> CC: "H. Peter Anvin" <hpa@zytor.com>
>> CC: Andy Lutomirski <luto@amacapital.net>
>> CC: Frederic Weisbecker <fweisbec@gmail.com>
>> CC: X86 ML <x86@kernel.org>
>> CC: Alexei Starovoitov <ast@plumgrid.com>
>> CC: Will Drewry <wad@chromium.org>
>> CC: Kees Cook <keescook@chromium.org>
>> CC: linux-kernel@vger.kernel.org
>> ---
>>  arch/x86/kernel/entry_64.S | 6 ++----
>>  1 file changed, 2 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
>> index a21b5b3..f5e815e 100644
>> --- a/arch/x86/kernel/entry_64.S
>> +++ b/arch/x86/kernel/entry_64.S
>> @@ -1270,12 +1270,10 @@ ENTRY(paranoid_exit)
>>         jnz paranoid_restore
>>         TRACE_IRQS_IRETQ 0
>>         SWAPGS_UNSAFE_STACK
>> -       RESTORE_EXTRA_REGS
>> -       RESTORE_C_REGS
>> -       REMOVE_PT_GPREGS_FROM_STACK 8
>> -       INTERRUPT_RETURN
>> +       jmp paranoid_restore1
>>  paranoid_restore:
>>         TRACE_IRQS_IRETQ_DEBUG 0
>> +paranoid_restore1:
>>         RESTORE_EXTRA_REGS
>>         RESTORE_C_REGS
>>         REMOVE_PT_GPREGS_FROM_STACK 8
> 
> This is sort of a reply to the wrong thread, but wouldn't it be nicer
> if we could pop a bunch of regs instead of using mov followed by add?

I like this (pop instead of mov) too.

> (Yes, this could be a followup, but it could be easier to spot now by
> changing macros like RESTORE_XYZ.)

I don't understand what this means.
"Let's switch to pops now, in this patch"? Or something else?

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-11 20:30 ` Andy Lutomirski
@ 2015-02-11 21:55   ` Denys Vlasenko
  2015-02-11 22:03     ` Andy Lutomirski
  0 siblings, 1 reply; 130+ messages in thread
From: Denys Vlasenko @ 2015-02-11 21:55 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Linus Torvalds, Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, linux-kernel

On 02/11/2015 09:30 PM, Andy Lutomirski wrote:
> On Wed, Jan 14, 2015 at 1:48 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
>> 64-bit code was using six stack slots less by not saving/restoring
>> registers which are callee-preserved according to C ABI,
>> and not allocating space for them.
>> Only when syscall needed a complete "struct pt_regs",
>> the complete area was allocated and filled in.
>> As an additional twist, on interrupt entry a "slightly less truncated pt_regs"
>> trick is used, to make nested interrupt stacks easier to unwind.
>>
>> This proved to be a source of significant obfuscation and subtle bugs.
>> For example, stub_fork had to pop the return address,
>> extend the struct, save registers, and push return address back. Ugly.
>> ia32_ptregs_common pops return address and "returns" via jmp insn,
>> throwing a wrench into CPU return stack cache.
>>
>> This patch changes code to always allocate a complete "struct pt_regs".
>> The saving of registers is still done lazily.
>>
>> "Partial pt_regs" trick on interrupt stack is retained, but with added comments
>> which explain what we are doing, and why. Existing comments are improved.
>>
>> Macros which manipulate "struct pt_regs" on stack are reworked:
>> ALLOC_PT_GPREGS_ON_STACK allocates the structure.
>> SAVE_C_REGS saves to it those registers which are clobbered by C code.
>> SAVE_EXTRA_REGS saves to it all other registers.
>> Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros reverse it.
>>
>> ia32_ptregs_common, stub_fork and friends lost their ugly dance with
>> return pointer.
>>
>> LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
>> instead of magic numbers.
>>
>> error_entry and save_paranoid now use SAVE_C_REGS + SAVE_EXTRA_REGS
>> instead of having it open-coded yet again.
>>
>> Misleading and slightly wrong comments in "struct pt_regs" are fixed
>> (four instances).
>>
>> Patch was run-tested: 64-bit executables, 32-bit executables,
>> strace works.
>> Timing tests did not show measurable difference in 32-bit
>> and 64-bit syscalls.
> 
> I like this.  However, can you split it?  I think it would be easier
> to review and more bisect-friendly if there were first a patch to do
> all the macro and comment cleanup without any layout changes followed
> by bite-sized layout changes.

Changes in macro names are intentionally made in a single patch,
together with significant changes to their meanings.
Renaming macros makes sure I reviewed all uses - otherwise
(if I missed a macro use somewhere) it would just not compile.

LOAD_ARGS32 change to use R9 etc instead of magic numbers like 16
also can't be split up. The point is, 16 is not equal to R9 (which is 64),
it only becomes equal because LOAD_ARGS32 is invoked with offset=ARGOFFSET,
and ARGOFFSET+16 = R9. The patch sets ARGOFSSET to 0, thus only after this
patch, I can replace magic number with R9. But if I'd do that,
then _this_ patch would still need to replace one magic number (16) -
with another magic number, 64, to not break stuff.
It makes sense, then, to not do the change there twice, in two patches,
and instead just bite the bullet and replace 16 with R9 in this patch.

In a few minutes I'll send you two patches which resulted from splitting
comment changes into a separate patch.

-- 
vda


^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 01/11] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-11 21:55   ` Denys Vlasenko
@ 2015-02-11 22:03     ` Andy Lutomirski
  0 siblings, 0 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-11 22:03 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Linus Torvalds, Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, linux-kernel

On Wed, Feb 11, 2015 at 1:55 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> On 02/11/2015 09:30 PM, Andy Lutomirski wrote:
>> On Wed, Jan 14, 2015 at 1:48 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
>>> 64-bit code was using six stack slots less by not saving/restoring
>>> registers which are callee-preserved according to C ABI,
>>> and not allocating space for them.
>>> Only when syscall needed a complete "struct pt_regs",
>>> the complete area was allocated and filled in.
>>> As an additional twist, on interrupt entry a "slightly less truncated pt_regs"
>>> trick is used, to make nested interrupt stacks easier to unwind.
>>>
>>> This proved to be a source of significant obfuscation and subtle bugs.
>>> For example, stub_fork had to pop the return address,
>>> extend the struct, save registers, and push return address back. Ugly.
>>> ia32_ptregs_common pops return address and "returns" via jmp insn,
>>> throwing a wrench into CPU return stack cache.
>>>
>>> This patch changes code to always allocate a complete "struct pt_regs".
>>> The saving of registers is still done lazily.
>>>
>>> "Partial pt_regs" trick on interrupt stack is retained, but with added comments
>>> which explain what we are doing, and why. Existing comments are improved.
>>>
>>> Macros which manipulate "struct pt_regs" on stack are reworked:
>>> ALLOC_PT_GPREGS_ON_STACK allocates the structure.
>>> SAVE_C_REGS saves to it those registers which are clobbered by C code.
>>> SAVE_EXTRA_REGS saves to it all other registers.
>>> Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros reverse it.
>>>
>>> ia32_ptregs_common, stub_fork and friends lost their ugly dance with
>>> return pointer.
>>>
>>> LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
>>> instead of magic numbers.
>>>
>>> error_entry and save_paranoid now use SAVE_C_REGS + SAVE_EXTRA_REGS
>>> instead of having it open-coded yet again.
>>>
>>> Misleading and slightly wrong comments in "struct pt_regs" are fixed
>>> (four instances).
>>>
>>> Patch was run-tested: 64-bit executables, 32-bit executables,
>>> strace works.
>>> Timing tests did not show measurable difference in 32-bit
>>> and 64-bit syscalls.
>>
>> I like this.  However, can you split it?  I think it would be easier
>> to review and more bisect-friendly if there were first a patch to do
>> all the macro and comment cleanup without any layout changes followed
>> by bite-sized layout changes.
>
> Changes in macro names are intentionally made in a single patch,
> together with significant changes to their meanings.
> Renaming macros makes sure I reviewed all uses - otherwise
> (if I missed a macro use somewhere) it would just not compile.
>
> LOAD_ARGS32 change to use R9 etc instead of magic numbers like 16
> also can't be split up. The point is, 16 is not equal to R9 (which is 64),
> it only becomes equal because LOAD_ARGS32 is invoked with offset=ARGOFFSET,
> and ARGOFFSET+16 = R9. The patch sets ARGOFSSET to 0, thus only after this
> patch, I can replace magic number with R9. But if I'd do that,
> then _this_ patch would still need to replace one magic number (16) -
> with another magic number, 64, to not break stuff.
> It makes sense, then, to not do the change there twice, in two patches,
> and instead just bite the bullet and replace 16 with R9 in this patch.

Fair enough.  I'll have lots of fun reviewing, then.  This may take a
couple days.

--Andy

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 02/11] x86: code shrink in paranoid_exit
  2015-02-11 21:13     ` Denys Vlasenko
@ 2015-02-11 22:09       ` Andy Lutomirski
  0 siblings, 0 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-11 22:09 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Linus Torvalds, Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, linux-kernel

On Wed, Feb 11, 2015 at 1:13 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> On 02/11/2015 09:36 PM, Andy Lutomirski wrote:
>> On Wed, Jan 14, 2015 at 1:48 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
>>> RESTORE_EXTRA_REGS + RESTORE_C_REGS looks small, but it's
>>> a lot of instructions (fourteen). Let's reuse them.
>>>
>>> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
>>> CC: Linus Torvalds <torvalds@linux-foundation.org>
>>> CC: Oleg Nesterov <oleg@redhat.com>
>>> CC: Borislav Petkov <bp@alien8.de>
>>> CC: "H. Peter Anvin" <hpa@zytor.com>
>>> CC: Andy Lutomirski <luto@amacapital.net>
>>> CC: Frederic Weisbecker <fweisbec@gmail.com>
>>> CC: X86 ML <x86@kernel.org>
>>> CC: Alexei Starovoitov <ast@plumgrid.com>
>>> CC: Will Drewry <wad@chromium.org>
>>> CC: Kees Cook <keescook@chromium.org>
>>> CC: linux-kernel@vger.kernel.org
>>> ---
>>>  arch/x86/kernel/entry_64.S | 6 ++----
>>>  1 file changed, 2 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
>>> index a21b5b3..f5e815e 100644
>>> --- a/arch/x86/kernel/entry_64.S
>>> +++ b/arch/x86/kernel/entry_64.S
>>> @@ -1270,12 +1270,10 @@ ENTRY(paranoid_exit)
>>>         jnz paranoid_restore
>>>         TRACE_IRQS_IRETQ 0
>>>         SWAPGS_UNSAFE_STACK
>>> -       RESTORE_EXTRA_REGS
>>> -       RESTORE_C_REGS
>>> -       REMOVE_PT_GPREGS_FROM_STACK 8
>>> -       INTERRUPT_RETURN
>>> +       jmp paranoid_restore1
>>>  paranoid_restore:
>>>         TRACE_IRQS_IRETQ_DEBUG 0
>>> +paranoid_restore1:
>>>         RESTORE_EXTRA_REGS
>>>         RESTORE_C_REGS
>>>         REMOVE_PT_GPREGS_FROM_STACK 8
>>
>> This is sort of a reply to the wrong thread, but wouldn't it be nicer
>> if we could pop a bunch of regs instead of using mov followed by add?
>
> I like this (pop instead of mov) too.
>
>> (Yes, this could be a followup, but it could be easier to spot now by
>> changing macros like RESTORE_XYZ.)
>
> I don't understand what this means.
> "Let's switch to pops now, in this patch"? Or something else?

You're replacing things like SAVE_XYZ with sequences like
ALLOC_SPACE_FOR_XYZ, RESTORE_XYZ, and replacing them with POP_XYZ
could be done in a single step.

That being said, let's not overcomplicate this.  I'll review the patch
as is, and we can switch to push/pop (or not) later on.

--Andy

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 08/11] x86: entry_64.S: fold test_in_nmi macro into its only user
  2015-02-11 20:40   ` Andy Lutomirski
@ 2015-02-12  2:17     ` Steven Rostedt
  0 siblings, 0 replies; 130+ messages in thread
From: Steven Rostedt @ 2015-02-12  2:17 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, linux-kernel

On Wed, 11 Feb 2015 12:40:36 -0800
Andy Lutomirski <luto@amacapital.net> wrote:

> On Wed, Jan 14, 2015 at 1:48 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> > No code changes.
> 
> Steven, is this okay with you?

Not as is. The reason I created the macro in the first place was to
give names to values and labels. That is:

-       .macro test_in_nmi reg stack nmi_ret normal_ret
-       cmpq %\reg, \stack
-       ja \normal_ret
-       subq $EXCEPTION_STKSZ, %\reg
-       cmpq %\reg, \stack
-       jb \normal_ret
-       jmp \nmi_ret

Shows you that we are comparing the reg to a given stack. And it also
shows us where to jump as a result (normal_ret vs nmi_ret).


We lose that with:

+       cmpq    %rdx, 4*8(%rsp)
+       ja      first_nmi
+       subq    $EXCEPTION_STKSZ, %rdx
+       cmpq    %rdx, 4*8(%rsp)
+       jb      first_nmi
+       jmp     nested_nmi

I'm not against the change. I would like to see some descriptive
comments along with it. Like adding:

        /* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */
	cmpq    %rdx, 4*8(%rsp)
        /* If the stack pointer is above the NMI stack, this is a normal NMI */
      	ja      first_nmi
       	subq    $EXCEPTION_STKSZ, %rdx
       	cmpq    %rdx, 4*8(%rsp)
        /* If it is below the NMI stack, it is a normal NMI */
       	jb      first_nmi
        /* Ah, it is within the NMI stack, treat it as nested */
       	jmp     nested_nmi

-- Steve

^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 1/3 v3] x86: entry_64.S: fix wrong symbolic constant usage: R11->ARGOFFSET
@ 2015-02-12 21:54 Denys Vlasenko
  2015-02-12 21:54 ` [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs" Denys Vlasenko
                   ` (2 more replies)
  0 siblings, 3 replies; 130+ messages in thread
From: Denys Vlasenko @ 2015-02-12 21:54 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, linux-kernel

Since the last fix of this nature, few more instances have crept in.
Fix them up. No object code changes (constants have the same value).

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/kernel/entry_64.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index db13655..ac542ac 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -757,8 +757,8 @@ retint_swapgs:		/* return to user-space */
 	 * Try to use SYSRET instead of IRET if we're returning to
 	 * a completely clean 64-bit userspace context.
 	 */
-	movq (RCX-R11)(%rsp), %rcx
-	cmpq %rcx,(RIP-R11)(%rsp)		/* RCX == RIP */
+	movq (RCX-ARGOFFSET)(%rsp), %rcx
+	cmpq %rcx,(RIP-ARGOFFSET)(%rsp)		/* RCX == RIP */
 	jne opportunistic_sysret_failed
 
 	/*
@@ -779,7 +779,7 @@ retint_swapgs:		/* return to user-space */
 	shr $__VIRTUAL_MASK_SHIFT, %rcx
 	jnz opportunistic_sysret_failed
 
-	cmpq $__USER_CS,(CS-R11)(%rsp)		/* CS must match SYSRET */
+	cmpq $__USER_CS,(CS-ARGOFFSET)(%rsp)	/* CS must match SYSRET */
 	jne opportunistic_sysret_failed
 
 	movq (R11-ARGOFFSET)(%rsp), %r11
-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-12 21:54 [PATCH 1/3 v3] x86: entry_64.S: fix wrong symbolic constant usage: R11->ARGOFFSET Denys Vlasenko
@ 2015-02-12 21:54 ` Denys Vlasenko
  2015-02-18 20:22   ` Andy Lutomirski
  2015-02-25 12:37   ` Andrey Wagin
  2015-02-12 21:54 ` [PATCH 3/3 v3] x86: entry_64.S: fix comments. No code changes Denys Vlasenko
  2015-02-18 20:00 ` [PATCH 1/3 v3] x86: entry_64.S: fix wrong symbolic constant usage: R11->ARGOFFSET Andy Lutomirski
  2 siblings, 2 replies; 130+ messages in thread
From: Denys Vlasenko @ 2015-02-12 21:54 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, linux-kernel

64-bit code was using six stack slots less by not saving/restoring
registers which are callee-preserved according to C ABI,
and not allocating space for them.
Only when syscall needed a complete "struct pt_regs",
the complete area was allocated and filled in.
As an additional twist, on interrupt entry a "slightly less truncated pt_regs"
trick is used, to make nested interrupt stacks easier to unwind.

This proved to be a source of significant obfuscation and subtle bugs.
For example, stub_fork had to pop the return address,
extend the struct, save registers, and push return address back. Ugly.
ia32_ptregs_common pops return address and "returns" via jmp insn,
throwing a wrench into CPU return stack cache.

This patch changes code to always allocate a complete "struct pt_regs".
The saving of registers is still done lazily.

"Partial pt_regs" trick on interrupt stack is retained.

Macros which manipulate "struct pt_regs" on stack are reworked:
ALLOC_PT_GPREGS_ON_STACK allocates the structure.
SAVE_C_REGS saves to it those registers which are clobbered by C code.
SAVE_EXTRA_REGS saves to it all other registers.
Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros reverse it.

ia32_ptregs_common, stub_fork and friends lost their ugly dance with
return pointer.

LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
instead of magic numbers.

error_entry and save_paranoid now use SAVE_C_REGS + SAVE_EXTRA_REGS
instead of having it open-coded yet again.

Patch was run-tested: 64-bit executables, 32-bit executables,
strace works.
Timing tests did not show measurable difference in 32-bit
and 64-bit syscalls.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/ia32/ia32entry.S              |  47 +++----
 arch/x86/include/asm/calling.h         | 222 ++++++++++++++++-----------------
 arch/x86/include/asm/irqflags.h        |   4 +-
 arch/x86/include/uapi/asm/ptrace-abi.h |   1 -
 arch/x86/kernel/entry_64.S             | 195 +++++++++++------------------
 5 files changed, 209 insertions(+), 260 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 156ebca..f4bed49 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -62,12 +62,12 @@
 	 */
 	.macro LOAD_ARGS32 offset, _r9=0
 	.if \_r9
-	movl \offset+16(%rsp),%r9d
+	movl \offset+R9(%rsp),%r9d
 	.endif
-	movl \offset+40(%rsp),%ecx
-	movl \offset+48(%rsp),%edx
-	movl \offset+56(%rsp),%esi
-	movl \offset+64(%rsp),%edi
+	movl \offset+RCX(%rsp),%ecx
+	movl \offset+RDX(%rsp),%edx
+	movl \offset+RSI(%rsp),%esi
+	movl \offset+RDI(%rsp),%edi
 	movl %eax,%eax			/* zero extension */
 	.endm
 	
@@ -144,7 +144,8 @@ ENTRY(ia32_sysenter_target)
 	CFI_REL_OFFSET rip,0
 	pushq_cfi %rax
 	cld
-	SAVE_ARGS 0,1,0
+	ALLOC_PT_GPREGS_ON_STACK
+	SAVE_C_REGS_EXCEPT_R891011
  	/* no need to do an access_ok check here because rbp has been
  	   32bit zero extended */ 
 	ASM_STAC
@@ -182,7 +183,8 @@ sysexit_from_sys_call:
 	andl	$~0x200,EFLAGS-ARGOFFSET(%rsp)
 	movl	RIP-ARGOFFSET(%rsp),%edx		/* User %eip */
 	CFI_REGISTER rip,rdx
-	RESTORE_ARGS 0,24,0,0,0,0
+	RESTORE_RSI_RDI
+	REMOVE_PT_GPREGS_FROM_STACK 3*8
 	xorq	%r8,%r8
 	xorq	%r9,%r9
 	xorq	%r10,%r10
@@ -256,13 +258,13 @@ sysenter_tracesys:
 	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jz	sysenter_auditsys
 #endif
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	CLEAR_RREGS
 	movq	$-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
 	movq	%rsp,%rdi        /* &pt_regs -> arg1 */
 	call	syscall_trace_enter
 	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	cmpq	$(IA32_NR_syscalls-1),%rax
 	ja	int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
 	jmp	sysenter_do_call
@@ -304,7 +306,8 @@ ENTRY(ia32_cstar_target)
 	 * disabled irqs and here we enable it straight after entry:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_ARGS 8,0,0
+	ALLOC_PT_GPREGS_ON_STACK 8
+	SAVE_C_REGS_EXCEPT_RCX_R891011
 	movl 	%eax,%eax	/* zero extension */
 	movq	%rax,ORIG_RAX-ARGOFFSET(%rsp)
 	movq	%rcx,RIP-ARGOFFSET(%rsp)
@@ -341,7 +344,7 @@ cstar_dispatch:
 	jnz sysretl_audit
 sysretl_from_sys_call:
 	andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	RESTORE_ARGS 0,-ARG_SKIP,0,0,0
+	RESTORE_RSI_RDI_RDX
 	movl RIP-ARGOFFSET(%rsp),%ecx
 	CFI_REGISTER rip,rcx
 	movl EFLAGS-ARGOFFSET(%rsp),%r11d	
@@ -372,13 +375,13 @@ cstar_tracesys:
 	jz cstar_auditsys
 #endif
 	xchgl %r9d,%ebp
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	CLEAR_RREGS 0, r9
 	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
 	LOAD_ARGS32 ARGOFFSET, 1  /* reload args from stack in case ptrace changed it */
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	xchgl %ebp,%r9d
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
@@ -433,7 +436,8 @@ ENTRY(ia32_syscall)
 	cld
 	/* note the registers are not zero extended to the sf.
 	   this could be a problem. */
-	SAVE_ARGS 0,1,0
+	ALLOC_PT_GPREGS_ON_STACK
+	SAVE_C_REGS_EXCEPT_R891011
 	orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jnz ia32_tracesys
@@ -446,16 +450,16 @@ ia32_sysret:
 	movq %rax,RAX-ARGOFFSET(%rsp)
 ia32_ret_from_sys_call:
 	CLEAR_RREGS -ARGOFFSET
-	jmp int_ret_from_sys_call 
+	jmp int_ret_from_sys_call
 
-ia32_tracesys:			 
-	SAVE_REST
+ia32_tracesys:
+	SAVE_EXTRA_REGS
 	CLEAR_RREGS
 	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
 	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja  int_ret_from_sys_call	/* ia32_tracesys has set RAX(%rsp) */
 	jmp ia32_do_call
@@ -492,7 +496,6 @@ GLOBAL(stub32_clone)
 
 	ALIGN
 ia32_ptregs_common:
-	popq %r11
 	CFI_ENDPROC
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
@@ -507,9 +510,9 @@ ia32_ptregs_common:
 /*	CFI_REL_OFFSET	rflags,EFLAGS-ARGOFFSET*/
 	CFI_REL_OFFSET	rsp,RSP-ARGOFFSET
 /*	CFI_REL_OFFSET	ss,SS-ARGOFFSET*/
-	SAVE_REST
+	SAVE_EXTRA_REGS 8
 	call *%rax
-	RESTORE_REST
-	jmp  ia32_sysret	/* misbalances the return cache */
+	RESTORE_EXTRA_REGS 8
+	ret
 	CFI_ENDPROC
 END(ia32_ptregs_common)
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 3c711f2a..3835647 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -55,143 +55,137 @@ For 32-bit we have the following conventions - kernel is built with
  * for assembly code:
  */
 
-#define R15		  0
-#define R14		  8
-#define R13		 16
-#define R12		 24
-#define RBP		 32
-#define RBX		 40
-
-/* arguments: interrupts/non tracing syscalls only save up to here: */
-#define R11		 48
-#define R10		 56
-#define R9		 64
-#define R8		 72
-#define RAX		 80
-#define RCX		 88
-#define RDX		 96
-#define RSI		104
-#define RDI		112
-#define ORIG_RAX	120       /* + error_code */
-/* end of arguments */
-
-/* cpu exception frame or undefined in case of fast syscall: */
-#define RIP		128
-#define CS		136
-#define EFLAGS		144
-#define RSP		152
-#define SS		160
-
-#define ARGOFFSET	R11
-
-	.macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
-	subq  $9*8+\addskip, %rsp
-	CFI_ADJUST_CFA_OFFSET	9*8+\addskip
-	movq_cfi rdi, 8*8
-	movq_cfi rsi, 7*8
-	movq_cfi rdx, 6*8
-
-	.if \save_rcx
-	movq_cfi rcx, 5*8
-	.endif
+/* The layout forms the "struct pt_regs" on the stack: */
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
+#define R15		0*8
+#define R14		1*8
+#define R13		2*8
+#define R12		3*8
+#define RBP		4*8
+#define RBX		5*8
+/* These regs are callee-clobbered. Always saved on kernel entry. */
+#define R11		6*8
+#define R10		7*8
+#define R9		8*8
+#define R8		9*8
+#define RAX		10*8
+#define RCX		11*8
+#define RDX		12*8
+#define RSI		13*8
+#define RDI		14*8
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
+#define ORIG_RAX	15*8
+/* Return frame for iretq */
+#define RIP		16*8
+#define CS		17*8
+#define EFLAGS		18*8
+#define RSP		19*8
+#define SS		20*8
+
+#define ARGOFFSET	0
+
+	.macro ALLOC_PT_GPREGS_ON_STACK addskip=0
+	subq	$15*8+\addskip, %rsp
+	CFI_ADJUST_CFA_OFFSET 15*8+\addskip
+	.endm
 
-	.if \rax_enosys
-	movq $-ENOSYS, 4*8(%rsp)
-	.else
-	movq_cfi rax, 4*8
+	.macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8plus=1
+	.if \r8plus
+	movq_cfi r11, 6*8+\offset
+	movq_cfi r10, 7*8+\offset
+	movq_cfi r9,  8*8+\offset
+	movq_cfi r8,  9*8+\offset
 	.endif
-
-	.if \save_r891011
-	movq_cfi r8,  3*8
-	movq_cfi r9,  2*8
-	movq_cfi r10, 1*8
-	movq_cfi r11, 0*8
+	.if \rax
+	movq_cfi rax, 10*8+\offset
+	.endif
+	.if \rcx
+	movq_cfi rcx, 11*8+\offset
 	.endif
+	movq_cfi rdx, 12*8+\offset
+	movq_cfi rsi, 13*8+\offset
+	movq_cfi rdi, 14*8+\offset
+	.endm
+	.macro SAVE_C_REGS offset=0
+	SAVE_C_REGS_HELPER \offset, 1, 1, 1
+	.endm
+	.macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
+	SAVE_C_REGS_HELPER \offset, 0, 0, 1
+	.endm
+	.macro SAVE_C_REGS_EXCEPT_R891011
+	SAVE_C_REGS_HELPER 0, 1, 1, 0
+	.endm
+	.macro SAVE_C_REGS_EXCEPT_RCX_R891011
+	SAVE_C_REGS_HELPER 0, 1, 0, 0
+	.endm
 
+	.macro SAVE_EXTRA_REGS offset=0
+	movq_cfi r15, 0*8+\offset
+	movq_cfi r14, 1*8+\offset
+	movq_cfi r13, 2*8+\offset
+	movq_cfi r12, 3*8+\offset
+	movq_cfi rbp, 4*8+\offset
+	movq_cfi rbx, 5*8+\offset
+	.endm
+	.macro SAVE_EXTRA_REGS_RBP offset=0
+	movq_cfi rbp, 4*8+\offset
 	.endm
 
-#define ARG_SKIP	(9*8)
+	.macro RESTORE_EXTRA_REGS offset=0
+	movq_cfi_restore 0*8+\offset, r15
+	movq_cfi_restore 1*8+\offset, r14
+	movq_cfi_restore 2*8+\offset, r13
+	movq_cfi_restore 3*8+\offset, r12
+	movq_cfi_restore 4*8+\offset, rbp
+	movq_cfi_restore 5*8+\offset, rbx
+	.endm
 
-	.macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \
-			    rstor_r8910=1, rstor_rdx=1
+	.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
 	.if \rstor_r11
-	movq_cfi_restore 0*8, r11
+	movq_cfi_restore 6*8, r11
 	.endif
-
 	.if \rstor_r8910
-	movq_cfi_restore 1*8, r10
-	movq_cfi_restore 2*8, r9
-	movq_cfi_restore 3*8, r8
+	movq_cfi_restore 7*8, r10
+	movq_cfi_restore 8*8, r9
+	movq_cfi_restore 9*8, r8
 	.endif
-
 	.if \rstor_rax
-	movq_cfi_restore 4*8, rax
+	movq_cfi_restore 10*8, rax
 	.endif
-
 	.if \rstor_rcx
-	movq_cfi_restore 5*8, rcx
+	movq_cfi_restore 11*8, rcx
 	.endif
-
 	.if \rstor_rdx
-	movq_cfi_restore 6*8, rdx
-	.endif
-
-	movq_cfi_restore 7*8, rsi
-	movq_cfi_restore 8*8, rdi
-
-	.if ARG_SKIP+\addskip > 0
-	addq $ARG_SKIP+\addskip, %rsp
-	CFI_ADJUST_CFA_OFFSET	-(ARG_SKIP+\addskip)
+	movq_cfi_restore 12*8, rdx
 	.endif
+	movq_cfi_restore 13*8, rsi
+	movq_cfi_restore 14*8, rdi
 	.endm
-
-	.macro LOAD_ARGS offset, skiprax=0
-	movq \offset(%rsp),    %r11
-	movq \offset+8(%rsp),  %r10
-	movq \offset+16(%rsp), %r9
-	movq \offset+24(%rsp), %r8
-	movq \offset+40(%rsp), %rcx
-	movq \offset+48(%rsp), %rdx
-	movq \offset+56(%rsp), %rsi
-	movq \offset+64(%rsp), %rdi
-	.if \skiprax
-	.else
-	movq \offset+72(%rsp), %rax
-	.endif
+	.macro RESTORE_C_REGS
+	RESTORE_C_REGS_HELPER 1,1,1,1,1
 	.endm
-
-#define REST_SKIP	(6*8)
-
-	.macro SAVE_REST
-	subq $REST_SKIP, %rsp
-	CFI_ADJUST_CFA_OFFSET	REST_SKIP
-	movq_cfi rbx, 5*8
-	movq_cfi rbp, 4*8
-	movq_cfi r12, 3*8
-	movq_cfi r13, 2*8
-	movq_cfi r14, 1*8
-	movq_cfi r15, 0*8
+	.macro RESTORE_C_REGS_EXCEPT_RAX
+	RESTORE_C_REGS_HELPER 0,1,1,1,1
 	.endm
-
-	.macro RESTORE_REST
-	movq_cfi_restore 0*8, r15
-	movq_cfi_restore 1*8, r14
-	movq_cfi_restore 2*8, r13
-	movq_cfi_restore 3*8, r12
-	movq_cfi_restore 4*8, rbp
-	movq_cfi_restore 5*8, rbx
-	addq $REST_SKIP, %rsp
-	CFI_ADJUST_CFA_OFFSET	-(REST_SKIP)
+	.macro RESTORE_C_REGS_EXCEPT_RCX
+	RESTORE_C_REGS_HELPER 1,0,1,1,1
 	.endm
-
-	.macro SAVE_ALL
-	SAVE_ARGS
-	SAVE_REST
+	.macro RESTORE_RSI_RDI
+	RESTORE_C_REGS_HELPER 0,0,0,0,0
+	.endm
+	.macro RESTORE_RSI_RDI_RDX
+	RESTORE_C_REGS_HELPER 0,0,0,0,1
 	.endm
 
-	.macro RESTORE_ALL addskip=0
-	RESTORE_REST
-	RESTORE_ARGS 1, \addskip
+	.macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
+	addq $15*8+\addskip, %rsp
+	CFI_ADJUST_CFA_OFFSET -(15*8+\addskip)
 	.endm
 
 	.macro icebp
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 0a8b519..021bee9 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -171,9 +171,9 @@ static inline int arch_irqs_disabled(void)
 #define ARCH_LOCKDEP_SYS_EXIT_IRQ	\
 	TRACE_IRQS_ON; \
 	sti; \
-	SAVE_REST; \
+	SAVE_EXTRA_REGS; \
 	LOCKDEP_SYS_EXIT; \
-	RESTORE_REST; \
+	RESTORE_EXTRA_REGS; \
 	cli; \
 	TRACE_IRQS_OFF;
 
diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h
index 7b0a55a..ad115bf 100644
--- a/arch/x86/include/uapi/asm/ptrace-abi.h
+++ b/arch/x86/include/uapi/asm/ptrace-abi.h
@@ -49,7 +49,6 @@
 #define EFLAGS 144
 #define RSP 152
 #define SS 160
-#define ARGOFFSET R11
 #endif /* __ASSEMBLY__ */
 
 /* top of stack page */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index ac542ac..45bdd26 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -26,12 +26,6 @@
  * Some macro usage:
  * - CFI macros are used to generate dwarf2 unwind information for better
  * backtraces. They don't change any code.
- * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
- * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
- * There are unfortunately lots of special cases where some registers
- * not touched. The macro is a big mess that should be cleaned up.
- * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
- * Gives a full stack frame.
  * - ENTRY/END Define functions in the symbol table.
  * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  * frame that is otherwise undefined after a SYSCALL
@@ -190,9 +184,9 @@ ENDPROC(native_usergs_sysret64)
 	.endm
 
 /*
- * frame that enables calling into C.
+ * frame that enables passing a complete pt_regs to a C function.
  */
-	.macro PARTIAL_FRAME start=1 offset=0
+	.macro DEFAULT_FRAME start=1 offset=0
 	XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
 	CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
 	CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
@@ -203,13 +197,6 @@ ENDPROC(native_usergs_sysret64)
 	CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
 	CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
 	CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
-	.endm
-
-/*
- * frame that enables passing a complete pt_regs to a C function.
- */
-	.macro DEFAULT_FRAME start=1 offset=0
-	PARTIAL_FRAME \start, R11+\offset-R15
 	CFI_REL_OFFSET rbx, RBX+\offset
 	CFI_REL_OFFSET rbp, RBP+\offset
 	CFI_REL_OFFSET r12, R12+\offset
@@ -221,21 +208,8 @@ ENDPROC(native_usergs_sysret64)
 ENTRY(save_paranoid)
 	XCPT_FRAME 1 RDI+8
 	cld
-	movq %rdi, RDI+8(%rsp)
-	movq %rsi, RSI+8(%rsp)
-	movq_cfi rdx, RDX+8
-	movq_cfi rcx, RCX+8
-	movq_cfi rax, RAX+8
-	movq %r8, R8+8(%rsp)
-	movq %r9, R9+8(%rsp)
-	movq %r10, R10+8(%rsp)
-	movq %r11, R11+8(%rsp)
-	movq_cfi rbx, RBX+8
-	movq %rbp, RBP+8(%rsp)
-	movq %r12, R12+8(%rsp)
-	movq %r13, R13+8(%rsp)
-	movq %r14, R14+8(%rsp)
-	movq %r15, R15+8(%rsp)
+	SAVE_C_REGS 8
+	SAVE_EXTRA_REGS 8
 	movl $1,%ebx
 	movl $MSR_GS_BASE,%ecx
 	rdmsr
@@ -264,7 +238,7 @@ ENTRY(ret_from_fork)
 
 	GET_THREAD_INFO(%rcx)
 
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 
 	testl $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
 	jz   1f
@@ -276,12 +250,10 @@ ENTRY(ret_from_fork)
 	jmp ret_from_sys_call			# go to the SYSRET fastpath
 
 1:
-	subq $REST_SKIP, %rsp	# leave space for volatiles
-	CFI_ADJUST_CFA_OFFSET	REST_SKIP
 	movq %rbp, %rdi
 	call *%rbx
 	movl $0, RAX(%rsp)
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(ret_from_fork)
@@ -339,9 +311,11 @@ GLOBAL(system_call_after_swapgs)
 	 * and short:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_ARGS 8, 0, rax_enosys=1
+	ALLOC_PT_GPREGS_ON_STACK 8
+	SAVE_C_REGS_EXCEPT_RAX_RCX
+	movq	$-ENOSYS,RAX-ARGOFFSET(%rsp)
 	movq_cfi rax,(ORIG_RAX-ARGOFFSET)
-	movq  %rcx,RIP-ARGOFFSET(%rsp)
+	movq	%rcx,RIP-ARGOFFSET(%rsp)
 	CFI_REL_OFFSET rip,RIP-ARGOFFSET
 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jnz tracesys
@@ -372,9 +346,9 @@ ret_from_sys_call:
 	 * sysretq will re-enable interrupts:
 	 */
 	TRACE_IRQS_ON
+	RESTORE_C_REGS_EXCEPT_RCX
 	movq RIP-ARGOFFSET(%rsp),%rcx
 	CFI_REGISTER	rip,rcx
-	RESTORE_ARGS 1,-ARG_SKIP,0
 	/*CFI_REGISTER	rflags,r11*/
 	movq	PER_CPU_VAR(old_rsp), %rsp
 	USERGS_SYSRET64
@@ -387,16 +361,16 @@ int_ret_from_sys_call_fixup:
 
 	/* Do syscall tracing */
 tracesys:
-	leaq -REST_SKIP(%rsp), %rdi
+	movq %rsp, %rdi
 	movq $AUDIT_ARCH_X86_64, %rsi
 	call syscall_trace_enter_phase1
 	test %rax, %rax
 	jnz tracesys_phase2		/* if needed, run the slow path */
-	LOAD_ARGS 0			/* else restore clobbered regs */
+	RESTORE_C_REGS			/* else restore clobbered regs */
 	jmp system_call_fastpath	/*      and return to the fast path */
 
 tracesys_phase2:
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %rdi
 	movq %rsp, %rdi
 	movq $AUDIT_ARCH_X86_64, %rsi
@@ -408,8 +382,8 @@ tracesys_phase2:
 	 * We don't reload %rax because syscall_trace_entry_phase2() returned
 	 * the value it wants us to use in the table lookup.
 	 */
-	LOAD_ARGS ARGOFFSET, 1
-	RESTORE_REST
+	RESTORE_C_REGS_EXCEPT_RAX
+	RESTORE_EXTRA_REGS
 #if __SYSCALL_MASK == ~0
 	cmpq $__NR_syscall_max,%rax
 #else
@@ -460,7 +434,7 @@ int_very_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
 int_check_syscall_exit_work:
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	/* Check for syscall exit trace */
 	testl $_TIF_WORK_SYSCALL_EXIT,%edx
 	jz int_signal
@@ -479,7 +453,7 @@ int_signal:
 	call do_notify_resume
 1:	movl $_TIF_WORK_MASK,%edi
 int_restore_rest:
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	jmp int_with_check
@@ -489,15 +463,12 @@ END(system_call)
 	.macro FORK_LIKE func
 ENTRY(stub_\func)
 	CFI_STARTPROC
-	popq	%r11			/* save return address */
-	PARTIAL_FRAME 0
-	SAVE_REST
-	pushq	%r11			/* put it back on stack */
+	DEFAULT_FRAME 0, 8		/* offset 8: return address */
+	SAVE_EXTRA_REGS 8
 	FIXUP_TOP_OF_STACK %r11, 8
-	DEFAULT_FRAME 0 8		/* offset 8: return address */
 	call sys_\func
 	RESTORE_TOP_OF_STACK %r11, 8
-	ret $REST_SKIP		/* pop extended registers */
+	ret
 	CFI_ENDPROC
 END(stub_\func)
 	.endm
@@ -505,7 +476,7 @@ END(stub_\func)
 	.macro FIXED_FRAME label,func
 ENTRY(\label)
 	CFI_STARTPROC
-	PARTIAL_FRAME 0 8		/* offset 8: return address */
+	DEFAULT_FRAME 0, 8		/* offset 8: return address */
 	FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET
 	call \func
 	RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET
@@ -522,12 +493,12 @@ END(\label)
 ENTRY(stub_execve)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call sys_execve
 	movq %rax,RAX(%rsp)
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_execve)
@@ -535,13 +506,13 @@ END(stub_execve)
 ENTRY(stub_execveat)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call sys_execveat
 	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_execveat)
@@ -553,12 +524,12 @@ END(stub_execveat)
 ENTRY(stub_rt_sigreturn)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call sys_rt_sigreturn
 	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_rt_sigreturn)
@@ -567,12 +538,12 @@ END(stub_rt_sigreturn)
 ENTRY(stub_x32_rt_sigreturn)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call sys32_x32_rt_sigreturn
 	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_x32_rt_sigreturn)
@@ -580,13 +551,13 @@ END(stub_x32_rt_sigreturn)
 ENTRY(stub_x32_execve)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call compat_sys_execve
 	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_x32_execve)
@@ -594,13 +565,13 @@ END(stub_x32_execve)
 ENTRY(stub_x32_execveat)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call compat_sys_execveat
 	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_x32_execveat)
@@ -656,42 +627,28 @@ END(interrupt)
 
 /* 0(%rsp): ~(interrupt number) */
 	.macro interrupt func
-	/* reserve pt_regs for scratch regs and rbp */
-	subq $ORIG_RAX-RBP, %rsp
-	CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
 	cld
-	/* start from rbp in pt_regs and jump over */
-	movq_cfi rdi, (RDI-RBP)
-	movq_cfi rsi, (RSI-RBP)
-	movq_cfi rdx, (RDX-RBP)
-	movq_cfi rcx, (RCX-RBP)
-	movq_cfi rax, (RAX-RBP)
-	movq_cfi  r8,  (R8-RBP)
-	movq_cfi  r9,  (R9-RBP)
-	movq_cfi r10, (R10-RBP)
-	movq_cfi r11, (R11-RBP)
-
-	/* Save rbp so that we can unwind from get_irq_regs() */
-	movq_cfi rbp, 0
-
-	/* Save previous stack value */
-	movq %rsp, %rsi
+	ALLOC_PT_GPREGS_ON_STACK -RBP
+	SAVE_C_REGS -RBP
+	/* this goes to 0(%rsp) for unwinder, not for saving the value: */
+	SAVE_EXTRA_REGS_RBP -RBP
+
+	leaq -RBP(%rsp),%rdi	/* arg1 for \func (pointer to pt_regs) */
 
-	leaq -RBP(%rsp),%rdi	/* arg1 for handler */
-	testl $3, CS-RBP(%rsi)
+	testl $3, CS-RBP(%rsp)
 	je 1f
 	SWAPGS
+1:
 	/*
 	 * irq_count is used to check if a CPU is already on an interrupt stack
 	 * or not. While this is essentially redundant with preempt_count it is
 	 * a little cheaper to use a separate counter in the PDA (short of
 	 * moving irq_enter into assembly, which would be too much work)
 	 */
-1:	incl PER_CPU_VAR(irq_count)
+	movq %rsp, %rsi
+	incl PER_CPU_VAR(irq_count)
 	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
 	CFI_DEF_CFA_REGISTER	rsi
-
-	/* Store previous stack value */
 	pushq %rsi
 	CFI_ESCAPE	0x0f /* DW_CFA_def_cfa_expression */, 6, \
 			0x77 /* DW_OP_breg7 */, 0, \
@@ -800,7 +757,8 @@ retint_swapgs:		/* return to user-space */
 	 */
 irq_return_via_sysret:
 	CFI_REMEMBER_STATE
-	RESTORE_ARGS 1,8,1
+	RESTORE_C_REGS
+	REMOVE_PT_GPREGS_FROM_STACK 8
 	movq (RSP-RIP)(%rsp),%rsp
 	USERGS_SYSRET64
 	CFI_RESTORE_STATE
@@ -816,7 +774,8 @@ retint_restore_args:	/* return to kernel space */
 	 */
 	TRACE_IRQS_IRETQ
 restore_args:
-	RESTORE_ARGS 1,8,1
+	RESTORE_C_REGS
+	REMOVE_PT_GPREGS_FROM_STACK 8
 
 irq_return:
 	INTERRUPT_RETURN
@@ -887,12 +846,12 @@ retint_signal:
 	jz    retint_swapgs
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	movq $-1,ORIG_RAX(%rsp)
 	xorl %esi,%esi		# oldset
 	movq %rsp,%rdi		# &pt_regs
 	call do_notify_resume
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)
@@ -1019,8 +978,7 @@ ENTRY(\sym)
 	pushq_cfi $-1			/* ORIG_RAX: no syscall to restart */
 	.endif
 
-	subq $ORIG_RAX-R15, %rsp
-	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+	ALLOC_PT_GPREGS_ON_STACK
 
 	.if \paranoid
 	.if \paranoid == 1
@@ -1266,7 +1224,9 @@ ENTRY(xen_failsafe_callback)
 	addq $0x30,%rsp
 	CFI_ADJUST_CFA_OFFSET -0x30
 	pushq_cfi $-1 /* orig_ax = -1 => not a system call */
-	SAVE_ALL
+	ALLOC_PT_GPREGS_ON_STACK
+	SAVE_C_REGS
+	SAVE_EXTRA_REGS
 	jmp error_exit
 	CFI_ENDPROC
 END(xen_failsafe_callback)
@@ -1318,11 +1278,15 @@ ENTRY(paranoid_exit)
 	jnz paranoid_restore
 	TRACE_IRQS_IRETQ 0
 	SWAPGS_UNSAFE_STACK
-	RESTORE_ALL 8
+	RESTORE_EXTRA_REGS
+	RESTORE_C_REGS
+	REMOVE_PT_GPREGS_FROM_STACK 8
 	INTERRUPT_RETURN
 paranoid_restore:
 	TRACE_IRQS_IRETQ_DEBUG 0
-	RESTORE_ALL 8
+	RESTORE_EXTRA_REGS
+	RESTORE_C_REGS
+	REMOVE_PT_GPREGS_FROM_STACK 8
 	INTERRUPT_RETURN
 	CFI_ENDPROC
 END(paranoid_exit)
@@ -1336,21 +1300,8 @@ ENTRY(error_entry)
 	CFI_ADJUST_CFA_OFFSET 15*8
 	/* oldrax contains error code */
 	cld
-	movq %rdi, RDI+8(%rsp)
-	movq %rsi, RSI+8(%rsp)
-	movq %rdx, RDX+8(%rsp)
-	movq %rcx, RCX+8(%rsp)
-	movq %rax, RAX+8(%rsp)
-	movq  %r8,  R8+8(%rsp)
-	movq  %r9,  R9+8(%rsp)
-	movq %r10, R10+8(%rsp)
-	movq %r11, R11+8(%rsp)
-	movq_cfi rbx, RBX+8
-	movq %rbp, RBP+8(%rsp)
-	movq %r12, R12+8(%rsp)
-	movq %r13, R13+8(%rsp)
-	movq %r14, R14+8(%rsp)
-	movq %r15, R15+8(%rsp)
+	SAVE_C_REGS 8
+	SAVE_EXTRA_REGS 8
 	xorl %ebx,%ebx
 	testl $3,CS+8(%rsp)
 	je error_kernelspace
@@ -1399,7 +1350,7 @@ END(error_entry)
 ENTRY(error_exit)
 	DEFAULT_FRAME
 	movl %ebx,%eax
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)
@@ -1618,8 +1569,8 @@ end_repeat_nmi:
 	 * so that we repeat another NMI.
 	 */
 	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
-	subq $ORIG_RAX-R15, %rsp
-	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+	ALLOC_PT_GPREGS_ON_STACK
+
 	/*
 	 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
 	 * as we should not be calling schedule in NMI context.
@@ -1658,8 +1609,10 @@ end_repeat_nmi:
 nmi_swapgs:
 	SWAPGS_UNSAFE_STACK
 nmi_restore:
+	RESTORE_EXTRA_REGS
+	RESTORE_C_REGS
 	/* Pop the extra iret frame at once */
-	RESTORE_ALL 6*8
+	REMOVE_PT_GPREGS_FROM_STACK 6*8
 
 	/* Clear the NMI executing stack variable */
 	movq $0, 5*8(%rsp)
-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 3/3 v3] x86: entry_64.S: fix comments. No code changes
  2015-02-12 21:54 [PATCH 1/3 v3] x86: entry_64.S: fix wrong symbolic constant usage: R11->ARGOFFSET Denys Vlasenko
  2015-02-12 21:54 ` [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs" Denys Vlasenko
@ 2015-02-12 21:54 ` Denys Vlasenko
  2015-02-18 20:25   ` Andy Lutomirski
  2015-02-18 20:00 ` [PATCH 1/3 v3] x86: entry_64.S: fix wrong symbolic constant usage: R11->ARGOFFSET Andy Lutomirski
  2 siblings, 1 reply; 130+ messages in thread
From: Denys Vlasenko @ 2015-02-12 21:54 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, linux-kernel

Misleading and slightly wrong comments in "struct pt_regs" are fixed
(four instances).

Fix wrong comment atop EMPTY_FRAME macro.

Explain in more details what we do with stack layout during hw interrupt.

Correct comments about "partial stack frame" which are no longer true.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---
 arch/x86/include/asm/ptrace.h          | 13 ++++++++++---
 arch/x86/include/uapi/asm/ptrace-abi.h | 15 +++++++++++----
 arch/x86/include/uapi/asm/ptrace.h     | 13 ++++++++++---
 arch/x86/kernel/entry_64.S             | 18 ++++++++++++------
 4 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 86fc2bb..4077d96 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -31,13 +31,17 @@ struct pt_regs {
 #else /* __i386__ */
 
 struct pt_regs {
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
 	unsigned long r15;
 	unsigned long r14;
 	unsigned long r13;
 	unsigned long r12;
 	unsigned long bp;
 	unsigned long bx;
-/* arguments: non interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
 	unsigned long r11;
 	unsigned long r10;
 	unsigned long r9;
@@ -47,9 +51,12 @@ struct pt_regs {
 	unsigned long dx;
 	unsigned long si;
 	unsigned long di;
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
 	unsigned long orig_ax;
-/* end of arguments */
-/* cpu exception frame or undefined */
+/* Return frame for iretq */
 	unsigned long ip;
 	unsigned long cs;
 	unsigned long flags;
diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h
index ad115bf..580aee3 100644
--- a/arch/x86/include/uapi/asm/ptrace-abi.h
+++ b/arch/x86/include/uapi/asm/ptrace-abi.h
@@ -25,13 +25,17 @@
 #else /* __i386__ */
 
 #if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
 #define R15 0
 #define R14 8
 #define R13 16
 #define R12 24
 #define RBP 32
 #define RBX 40
-/* arguments: interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
 #define R11 48
 #define R10 56
 #define R9 64
@@ -41,9 +45,12 @@
 #define RDX 96
 #define RSI 104
 #define RDI 112
-#define ORIG_RAX 120       /* = ERROR */
-/* end of arguments */
-/* cpu exception frame or undefined in case of fast syscall. */
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
+#define ORIG_RAX 120
+/* Return frame for iretq */
 #define RIP 128
 #define CS 136
 #define EFLAGS 144
diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h
index ac4b9aa..bc16115 100644
--- a/arch/x86/include/uapi/asm/ptrace.h
+++ b/arch/x86/include/uapi/asm/ptrace.h
@@ -41,13 +41,17 @@ struct pt_regs {
 #ifndef __KERNEL__
 
 struct pt_regs {
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
 	unsigned long r15;
 	unsigned long r14;
 	unsigned long r13;
 	unsigned long r12;
 	unsigned long rbp;
 	unsigned long rbx;
-/* arguments: non interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
 	unsigned long r11;
 	unsigned long r10;
 	unsigned long r9;
@@ -57,9 +61,12 @@ struct pt_regs {
 	unsigned long rdx;
 	unsigned long rsi;
 	unsigned long rdi;
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
 	unsigned long orig_rax;
-/* end of arguments */
-/* cpu exception frame or undefined */
+/* Return frame for iretq */
 	unsigned long rip;
 	unsigned long cs;
 	unsigned long eflags;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 45bdd26..79cf059 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -14,9 +14,6 @@
  * NOTE: This code handles signal-recognition, which happens every time
  * after an interrupt and after each system call.
  *
- * Normal syscalls and interrupts don't save a full stack frame, this is
- * only done for syscall tracing, signals or fork/exec et.al.
- *
  * A note on terminology:
  * - top of stack: Architecture defined interrupt frame from SS to RIP
  * at the top of the kernel process stack.
@@ -151,7 +148,7 @@ ENDPROC(native_usergs_sysret64)
 	.endm
 
 /*
- * initial frame state for interrupts (and exceptions without error code)
+ * empty frame
  */
 	.macro EMPTY_FRAME start=1 offset=0
 	.if \start
@@ -378,7 +375,7 @@ tracesys_phase2:
 	call syscall_trace_enter_phase2
 
 	/*
-	 * Reload arg registers from stack in case ptrace changed them.
+	 * Reload registers from stack in case ptrace changed them.
 	 * We don't reload %rax because syscall_trace_entry_phase2() returned
 	 * the value it wants us to use in the table lookup.
 	 */
@@ -628,6 +625,13 @@ END(interrupt)
 /* 0(%rsp): ~(interrupt number) */
 	.macro interrupt func
 	cld
+	/*
+	 * Since nothing in interrupt handling code touches r12...r15 members
+	 * of "struct pt_regs", and since interrupts can nest, we can save
+	 * four stack slots and simultaneously provide
+	 * an unwind-friendly stack layout by saving "truncated" pt_regs
+	 * exactly up to rbp slot, without these members.
+	 */
 	ALLOC_PT_GPREGS_ON_STACK -RBP
 	SAVE_C_REGS -RBP
 	/* this goes to 0(%rsp) for unwinder, not for saving the value: */
@@ -640,6 +644,7 @@ END(interrupt)
 	SWAPGS
 1:
 	/*
+	 * Save previous stack pointer, optionally switch to interrupt stack.
 	 * irq_count is used to check if a CPU is already on an interrupt stack
 	 * or not. While this is essentially redundant with preempt_count it is
 	 * a little cheaper to use a separate counter in the PDA (short of
@@ -680,6 +685,7 @@ ret_from_intr:
 	/* Restore saved previous stack */
 	popq %rsi
 	CFI_DEF_CFA rsi,SS+8-RBP	/* reg/off reset after def_cfa_expr */
+	/* return code expects complete pt_regs - adjust rsp accordingly: */
 	leaq ARGOFFSET-RBP(%rsi), %rsp
 	CFI_DEF_CFA_REGISTER	rsp
 	CFI_ADJUST_CFA_OFFSET	RBP-ARGOFFSET
@@ -691,7 +697,7 @@ exit_intr:
 
 	/* Interrupt came from user space */
 	/*
-	 * Has a correct top of stack, but a partial stack frame
+	 * Has a correct top of stack.
 	 * %rcx: thread info. Interrupts off.
 	 */
 retint_with_reschedule:
-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 1/3 v3] x86: entry_64.S: fix wrong symbolic constant usage: R11->ARGOFFSET
  2015-02-12 21:54 [PATCH 1/3 v3] x86: entry_64.S: fix wrong symbolic constant usage: R11->ARGOFFSET Denys Vlasenko
  2015-02-12 21:54 ` [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs" Denys Vlasenko
  2015-02-12 21:54 ` [PATCH 3/3 v3] x86: entry_64.S: fix comments. No code changes Denys Vlasenko
@ 2015-02-18 20:00 ` Andy Lutomirski
  2 siblings, 0 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-18 20:00 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Linus Torvalds, Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, linux-kernel

On Thu, Feb 12, 2015 at 1:54 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> Since the last fix of this nature, few more instances have crept in.
> Fix them up. No object code changes (constants have the same value).

Applied.

--Andy

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-12 21:54 ` [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs" Denys Vlasenko
@ 2015-02-18 20:22   ` Andy Lutomirski
  2015-02-25 12:37   ` Andrey Wagin
  1 sibling, 0 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-18 20:22 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Linus Torvalds, Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, linux-kernel

On Thu, Feb 12, 2015 at 1:54 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> 64-bit code was using six stack slots less by not saving/restoring
> registers which are callee-preserved according to C ABI,
> and not allocating space for them.
> Only when syscall needed a complete "struct pt_regs",
> the complete area was allocated and filled in.
> As an additional twist, on interrupt entry a "slightly less truncated pt_regs"
> trick is used, to make nested interrupt stacks easier to unwind.
>
> This proved to be a source of significant obfuscation and subtle bugs.
> For example, stub_fork had to pop the return address,
> extend the struct, save registers, and push return address back. Ugly.
> ia32_ptregs_common pops return address and "returns" via jmp insn,
> throwing a wrench into CPU return stack cache.
>
> This patch changes code to always allocate a complete "struct pt_regs".
> The saving of registers is still done lazily.
>
> "Partial pt_regs" trick on interrupt stack is retained.
>
> Macros which manipulate "struct pt_regs" on stack are reworked:
> ALLOC_PT_GPREGS_ON_STACK allocates the structure.
> SAVE_C_REGS saves to it those registers which are clobbered by C code.
> SAVE_EXTRA_REGS saves to it all other registers.
> Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros reverse it.
>
> ia32_ptregs_common, stub_fork and friends lost their ugly dance with
> return pointer.
>
> LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
> instead of magic numbers.
>
> error_entry and save_paranoid now use SAVE_C_REGS + SAVE_EXTRA_REGS
> instead of having it open-coded yet again.
>
> Patch was run-tested: 64-bit executables, 32-bit executables,
> strace works.
> Timing tests did not show measurable difference in 32-bit
> and 64-bit syscalls.

This patch scares me, because it changes a lot of hairy code.  That
being said, I don't see anything wrong with it, and the end result is
much nicer than the status quo.  So I applied it, and I'll let the
kbuild bot have fun with it.  I confirmed that I can boot a 64-bit and
a 32-bit system with it, at least in my configuration.

Further reviews are encouraged :)

--Andy

>
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Linus Torvalds <torvalds@linux-foundation.org>
> CC: Oleg Nesterov <oleg@redhat.com>
> CC: Borislav Petkov <bp@alien8.de>
> CC: "H. Peter Anvin" <hpa@zytor.com>
> CC: Andy Lutomirski <luto@amacapital.net>
> CC: Frederic Weisbecker <fweisbec@gmail.com>
> CC: X86 ML <x86@kernel.org>
> CC: Alexei Starovoitov <ast@plumgrid.com>
> CC: Will Drewry <wad@chromium.org>
> CC: Kees Cook <keescook@chromium.org>
> CC: linux-kernel@vger.kernel.org
> ---
>  arch/x86/ia32/ia32entry.S              |  47 +++----
>  arch/x86/include/asm/calling.h         | 222 ++++++++++++++++-----------------
>  arch/x86/include/asm/irqflags.h        |   4 +-
>  arch/x86/include/uapi/asm/ptrace-abi.h |   1 -
>  arch/x86/kernel/entry_64.S             | 195 +++++++++++------------------
>  5 files changed, 209 insertions(+), 260 deletions(-)
>
> diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
> index 156ebca..f4bed49 100644
> --- a/arch/x86/ia32/ia32entry.S
> +++ b/arch/x86/ia32/ia32entry.S
> @@ -62,12 +62,12 @@
>          */
>         .macro LOAD_ARGS32 offset, _r9=0
>         .if \_r9
> -       movl \offset+16(%rsp),%r9d
> +       movl \offset+R9(%rsp),%r9d
>         .endif
> -       movl \offset+40(%rsp),%ecx
> -       movl \offset+48(%rsp),%edx
> -       movl \offset+56(%rsp),%esi
> -       movl \offset+64(%rsp),%edi
> +       movl \offset+RCX(%rsp),%ecx
> +       movl \offset+RDX(%rsp),%edx
> +       movl \offset+RSI(%rsp),%esi
> +       movl \offset+RDI(%rsp),%edi
>         movl %eax,%eax                  /* zero extension */
>         .endm
>
> @@ -144,7 +144,8 @@ ENTRY(ia32_sysenter_target)
>         CFI_REL_OFFSET rip,0
>         pushq_cfi %rax
>         cld
> -       SAVE_ARGS 0,1,0
> +       ALLOC_PT_GPREGS_ON_STACK
> +       SAVE_C_REGS_EXCEPT_R891011
>         /* no need to do an access_ok check here because rbp has been
>            32bit zero extended */
>         ASM_STAC
> @@ -182,7 +183,8 @@ sysexit_from_sys_call:
>         andl    $~0x200,EFLAGS-ARGOFFSET(%rsp)
>         movl    RIP-ARGOFFSET(%rsp),%edx                /* User %eip */
>         CFI_REGISTER rip,rdx
> -       RESTORE_ARGS 0,24,0,0,0,0
> +       RESTORE_RSI_RDI
> +       REMOVE_PT_GPREGS_FROM_STACK 3*8
>         xorq    %r8,%r8
>         xorq    %r9,%r9
>         xorq    %r10,%r10
> @@ -256,13 +258,13 @@ sysenter_tracesys:
>         testl   $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
>         jz      sysenter_auditsys
>  #endif
> -       SAVE_REST
> +       SAVE_EXTRA_REGS
>         CLEAR_RREGS
>         movq    $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
>         movq    %rsp,%rdi        /* &pt_regs -> arg1 */
>         call    syscall_trace_enter
>         LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
> -       RESTORE_REST
> +       RESTORE_EXTRA_REGS
>         cmpq    $(IA32_NR_syscalls-1),%rax
>         ja      int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
>         jmp     sysenter_do_call
> @@ -304,7 +306,8 @@ ENTRY(ia32_cstar_target)
>          * disabled irqs and here we enable it straight after entry:
>          */
>         ENABLE_INTERRUPTS(CLBR_NONE)
> -       SAVE_ARGS 8,0,0
> +       ALLOC_PT_GPREGS_ON_STACK 8
> +       SAVE_C_REGS_EXCEPT_RCX_R891011
>         movl    %eax,%eax       /* zero extension */
>         movq    %rax,ORIG_RAX-ARGOFFSET(%rsp)
>         movq    %rcx,RIP-ARGOFFSET(%rsp)
> @@ -341,7 +344,7 @@ cstar_dispatch:
>         jnz sysretl_audit
>  sysretl_from_sys_call:
>         andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
> -       RESTORE_ARGS 0,-ARG_SKIP,0,0,0
> +       RESTORE_RSI_RDI_RDX
>         movl RIP-ARGOFFSET(%rsp),%ecx
>         CFI_REGISTER rip,rcx
>         movl EFLAGS-ARGOFFSET(%rsp),%r11d
> @@ -372,13 +375,13 @@ cstar_tracesys:
>         jz cstar_auditsys
>  #endif
>         xchgl %r9d,%ebp
> -       SAVE_REST
> +       SAVE_EXTRA_REGS
>         CLEAR_RREGS 0, r9
>         movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
>         movq %rsp,%rdi        /* &pt_regs -> arg1 */
>         call syscall_trace_enter
>         LOAD_ARGS32 ARGOFFSET, 1  /* reload args from stack in case ptrace changed it */
> -       RESTORE_REST
> +       RESTORE_EXTRA_REGS
>         xchgl %ebp,%r9d
>         cmpq $(IA32_NR_syscalls-1),%rax
>         ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
> @@ -433,7 +436,8 @@ ENTRY(ia32_syscall)
>         cld
>         /* note the registers are not zero extended to the sf.
>            this could be a problem. */
> -       SAVE_ARGS 0,1,0
> +       ALLOC_PT_GPREGS_ON_STACK
> +       SAVE_C_REGS_EXCEPT_R891011
>         orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
>         testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
>         jnz ia32_tracesys
> @@ -446,16 +450,16 @@ ia32_sysret:
>         movq %rax,RAX-ARGOFFSET(%rsp)
>  ia32_ret_from_sys_call:
>         CLEAR_RREGS -ARGOFFSET
> -       jmp int_ret_from_sys_call
> +       jmp int_ret_from_sys_call
>
> -ia32_tracesys:
> -       SAVE_REST
> +ia32_tracesys:
> +       SAVE_EXTRA_REGS
>         CLEAR_RREGS
>         movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
>         movq %rsp,%rdi        /* &pt_regs -> arg1 */
>         call syscall_trace_enter
>         LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
> -       RESTORE_REST
> +       RESTORE_EXTRA_REGS
>         cmpq $(IA32_NR_syscalls-1),%rax
>         ja  int_ret_from_sys_call       /* ia32_tracesys has set RAX(%rsp) */
>         jmp ia32_do_call
> @@ -492,7 +496,6 @@ GLOBAL(stub32_clone)
>
>         ALIGN
>  ia32_ptregs_common:
> -       popq %r11
>         CFI_ENDPROC
>         CFI_STARTPROC32 simple
>         CFI_SIGNAL_FRAME
> @@ -507,9 +510,9 @@ ia32_ptregs_common:
>  /*     CFI_REL_OFFSET  rflags,EFLAGS-ARGOFFSET*/
>         CFI_REL_OFFSET  rsp,RSP-ARGOFFSET
>  /*     CFI_REL_OFFSET  ss,SS-ARGOFFSET*/
> -       SAVE_REST
> +       SAVE_EXTRA_REGS 8
>         call *%rax
> -       RESTORE_REST
> -       jmp  ia32_sysret        /* misbalances the return cache */
> +       RESTORE_EXTRA_REGS 8
> +       ret
>         CFI_ENDPROC
>  END(ia32_ptregs_common)
> diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
> index 3c711f2a..3835647 100644
> --- a/arch/x86/include/asm/calling.h
> +++ b/arch/x86/include/asm/calling.h
> @@ -55,143 +55,137 @@ For 32-bit we have the following conventions - kernel is built with
>   * for assembly code:
>   */
>
> -#define R15              0
> -#define R14              8
> -#define R13             16
> -#define R12             24
> -#define RBP             32
> -#define RBX             40
> -
> -/* arguments: interrupts/non tracing syscalls only save up to here: */
> -#define R11             48
> -#define R10             56
> -#define R9              64
> -#define R8              72
> -#define RAX             80
> -#define RCX             88
> -#define RDX             96
> -#define RSI            104
> -#define RDI            112
> -#define ORIG_RAX       120       /* + error_code */
> -/* end of arguments */
> -
> -/* cpu exception frame or undefined in case of fast syscall: */
> -#define RIP            128
> -#define CS             136
> -#define EFLAGS         144
> -#define RSP            152
> -#define SS             160
> -
> -#define ARGOFFSET      R11
> -
> -       .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
> -       subq  $9*8+\addskip, %rsp
> -       CFI_ADJUST_CFA_OFFSET   9*8+\addskip
> -       movq_cfi rdi, 8*8
> -       movq_cfi rsi, 7*8
> -       movq_cfi rdx, 6*8
> -
> -       .if \save_rcx
> -       movq_cfi rcx, 5*8
> -       .endif
> +/* The layout forms the "struct pt_regs" on the stack: */
> +/*
> + * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
> + * unless syscall needs a complete, fully filled "struct pt_regs".
> + */
> +#define R15            0*8
> +#define R14            1*8
> +#define R13            2*8
> +#define R12            3*8
> +#define RBP            4*8
> +#define RBX            5*8
> +/* These regs are callee-clobbered. Always saved on kernel entry. */
> +#define R11            6*8
> +#define R10            7*8
> +#define R9             8*8
> +#define R8             9*8
> +#define RAX            10*8
> +#define RCX            11*8
> +#define RDX            12*8
> +#define RSI            13*8
> +#define RDI            14*8
> +/*
> + * On syscall entry, this is syscall#. On CPU exception, this is error code.
> + * On hw interrupt, it's IRQ number:
> + */
> +#define ORIG_RAX       15*8
> +/* Return frame for iretq */
> +#define RIP            16*8
> +#define CS             17*8
> +#define EFLAGS         18*8
> +#define RSP            19*8
> +#define SS             20*8
> +
> +#define ARGOFFSET      0
> +
> +       .macro ALLOC_PT_GPREGS_ON_STACK addskip=0
> +       subq    $15*8+\addskip, %rsp
> +       CFI_ADJUST_CFA_OFFSET 15*8+\addskip
> +       .endm
>
> -       .if \rax_enosys
> -       movq $-ENOSYS, 4*8(%rsp)
> -       .else
> -       movq_cfi rax, 4*8
> +       .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8plus=1
> +       .if \r8plus
> +       movq_cfi r11, 6*8+\offset
> +       movq_cfi r10, 7*8+\offset
> +       movq_cfi r9,  8*8+\offset
> +       movq_cfi r8,  9*8+\offset
>         .endif
> -
> -       .if \save_r891011
> -       movq_cfi r8,  3*8
> -       movq_cfi r9,  2*8
> -       movq_cfi r10, 1*8
> -       movq_cfi r11, 0*8
> +       .if \rax
> +       movq_cfi rax, 10*8+\offset
> +       .endif
> +       .if \rcx
> +       movq_cfi rcx, 11*8+\offset
>         .endif
> +       movq_cfi rdx, 12*8+\offset
> +       movq_cfi rsi, 13*8+\offset
> +       movq_cfi rdi, 14*8+\offset
> +       .endm
> +       .macro SAVE_C_REGS offset=0
> +       SAVE_C_REGS_HELPER \offset, 1, 1, 1
> +       .endm
> +       .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
> +       SAVE_C_REGS_HELPER \offset, 0, 0, 1
> +       .endm
> +       .macro SAVE_C_REGS_EXCEPT_R891011
> +       SAVE_C_REGS_HELPER 0, 1, 1, 0
> +       .endm
> +       .macro SAVE_C_REGS_EXCEPT_RCX_R891011
> +       SAVE_C_REGS_HELPER 0, 1, 0, 0
> +       .endm
>
> +       .macro SAVE_EXTRA_REGS offset=0
> +       movq_cfi r15, 0*8+\offset
> +       movq_cfi r14, 1*8+\offset
> +       movq_cfi r13, 2*8+\offset
> +       movq_cfi r12, 3*8+\offset
> +       movq_cfi rbp, 4*8+\offset
> +       movq_cfi rbx, 5*8+\offset
> +       .endm
> +       .macro SAVE_EXTRA_REGS_RBP offset=0
> +       movq_cfi rbp, 4*8+\offset
>         .endm
>
> -#define ARG_SKIP       (9*8)
> +       .macro RESTORE_EXTRA_REGS offset=0
> +       movq_cfi_restore 0*8+\offset, r15
> +       movq_cfi_restore 1*8+\offset, r14
> +       movq_cfi_restore 2*8+\offset, r13
> +       movq_cfi_restore 3*8+\offset, r12
> +       movq_cfi_restore 4*8+\offset, rbp
> +       movq_cfi_restore 5*8+\offset, rbx
> +       .endm
>
> -       .macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \
> -                           rstor_r8910=1, rstor_rdx=1
> +       .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
>         .if \rstor_r11
> -       movq_cfi_restore 0*8, r11
> +       movq_cfi_restore 6*8, r11
>         .endif
> -
>         .if \rstor_r8910
> -       movq_cfi_restore 1*8, r10
> -       movq_cfi_restore 2*8, r9
> -       movq_cfi_restore 3*8, r8
> +       movq_cfi_restore 7*8, r10
> +       movq_cfi_restore 8*8, r9
> +       movq_cfi_restore 9*8, r8
>         .endif
> -
>         .if \rstor_rax
> -       movq_cfi_restore 4*8, rax
> +       movq_cfi_restore 10*8, rax
>         .endif
> -
>         .if \rstor_rcx
> -       movq_cfi_restore 5*8, rcx
> +       movq_cfi_restore 11*8, rcx
>         .endif
> -
>         .if \rstor_rdx
> -       movq_cfi_restore 6*8, rdx
> -       .endif
> -
> -       movq_cfi_restore 7*8, rsi
> -       movq_cfi_restore 8*8, rdi
> -
> -       .if ARG_SKIP+\addskip > 0
> -       addq $ARG_SKIP+\addskip, %rsp
> -       CFI_ADJUST_CFA_OFFSET   -(ARG_SKIP+\addskip)
> +       movq_cfi_restore 12*8, rdx
>         .endif
> +       movq_cfi_restore 13*8, rsi
> +       movq_cfi_restore 14*8, rdi
>         .endm
> -
> -       .macro LOAD_ARGS offset, skiprax=0
> -       movq \offset(%rsp),    %r11
> -       movq \offset+8(%rsp),  %r10
> -       movq \offset+16(%rsp), %r9
> -       movq \offset+24(%rsp), %r8
> -       movq \offset+40(%rsp), %rcx
> -       movq \offset+48(%rsp), %rdx
> -       movq \offset+56(%rsp), %rsi
> -       movq \offset+64(%rsp), %rdi
> -       .if \skiprax
> -       .else
> -       movq \offset+72(%rsp), %rax
> -       .endif
> +       .macro RESTORE_C_REGS
> +       RESTORE_C_REGS_HELPER 1,1,1,1,1
>         .endm
> -
> -#define REST_SKIP      (6*8)
> -
> -       .macro SAVE_REST
> -       subq $REST_SKIP, %rsp
> -       CFI_ADJUST_CFA_OFFSET   REST_SKIP
> -       movq_cfi rbx, 5*8
> -       movq_cfi rbp, 4*8
> -       movq_cfi r12, 3*8
> -       movq_cfi r13, 2*8
> -       movq_cfi r14, 1*8
> -       movq_cfi r15, 0*8
> +       .macro RESTORE_C_REGS_EXCEPT_RAX
> +       RESTORE_C_REGS_HELPER 0,1,1,1,1
>         .endm
> -
> -       .macro RESTORE_REST
> -       movq_cfi_restore 0*8, r15
> -       movq_cfi_restore 1*8, r14
> -       movq_cfi_restore 2*8, r13
> -       movq_cfi_restore 3*8, r12
> -       movq_cfi_restore 4*8, rbp
> -       movq_cfi_restore 5*8, rbx
> -       addq $REST_SKIP, %rsp
> -       CFI_ADJUST_CFA_OFFSET   -(REST_SKIP)
> +       .macro RESTORE_C_REGS_EXCEPT_RCX
> +       RESTORE_C_REGS_HELPER 1,0,1,1,1
>         .endm
> -
> -       .macro SAVE_ALL
> -       SAVE_ARGS
> -       SAVE_REST
> +       .macro RESTORE_RSI_RDI
> +       RESTORE_C_REGS_HELPER 0,0,0,0,0
> +       .endm
> +       .macro RESTORE_RSI_RDI_RDX
> +       RESTORE_C_REGS_HELPER 0,0,0,0,1
>         .endm
>
> -       .macro RESTORE_ALL addskip=0
> -       RESTORE_REST
> -       RESTORE_ARGS 1, \addskip
> +       .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
> +       addq $15*8+\addskip, %rsp
> +       CFI_ADJUST_CFA_OFFSET -(15*8+\addskip)
>         .endm
>
>         .macro icebp
> diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
> index 0a8b519..021bee9 100644
> --- a/arch/x86/include/asm/irqflags.h
> +++ b/arch/x86/include/asm/irqflags.h
> @@ -171,9 +171,9 @@ static inline int arch_irqs_disabled(void)
>  #define ARCH_LOCKDEP_SYS_EXIT_IRQ      \
>         TRACE_IRQS_ON; \
>         sti; \
> -       SAVE_REST; \
> +       SAVE_EXTRA_REGS; \
>         LOCKDEP_SYS_EXIT; \
> -       RESTORE_REST; \
> +       RESTORE_EXTRA_REGS; \
>         cli; \
>         TRACE_IRQS_OFF;
>
> diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h
> index 7b0a55a..ad115bf 100644
> --- a/arch/x86/include/uapi/asm/ptrace-abi.h
> +++ b/arch/x86/include/uapi/asm/ptrace-abi.h
> @@ -49,7 +49,6 @@
>  #define EFLAGS 144
>  #define RSP 152
>  #define SS 160
> -#define ARGOFFSET R11
>  #endif /* __ASSEMBLY__ */
>
>  /* top of stack page */
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index ac542ac..45bdd26 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -26,12 +26,6 @@
>   * Some macro usage:
>   * - CFI macros are used to generate dwarf2 unwind information for better
>   * backtraces. They don't change any code.
> - * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
> - * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
> - * There are unfortunately lots of special cases where some registers
> - * not touched. The macro is a big mess that should be cleaned up.
> - * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
> - * Gives a full stack frame.
>   * - ENTRY/END Define functions in the symbol table.
>   * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
>   * frame that is otherwise undefined after a SYSCALL
> @@ -190,9 +184,9 @@ ENDPROC(native_usergs_sysret64)
>         .endm
>
>  /*
> - * frame that enables calling into C.
> + * frame that enables passing a complete pt_regs to a C function.
>   */
> -       .macro PARTIAL_FRAME start=1 offset=0
> +       .macro DEFAULT_FRAME start=1 offset=0
>         XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
>         CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
>         CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
> @@ -203,13 +197,6 @@ ENDPROC(native_usergs_sysret64)
>         CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
>         CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
>         CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
> -       .endm
> -
> -/*
> - * frame that enables passing a complete pt_regs to a C function.
> - */
> -       .macro DEFAULT_FRAME start=1 offset=0
> -       PARTIAL_FRAME \start, R11+\offset-R15
>         CFI_REL_OFFSET rbx, RBX+\offset
>         CFI_REL_OFFSET rbp, RBP+\offset
>         CFI_REL_OFFSET r12, R12+\offset
> @@ -221,21 +208,8 @@ ENDPROC(native_usergs_sysret64)
>  ENTRY(save_paranoid)
>         XCPT_FRAME 1 RDI+8
>         cld
> -       movq %rdi, RDI+8(%rsp)
> -       movq %rsi, RSI+8(%rsp)
> -       movq_cfi rdx, RDX+8
> -       movq_cfi rcx, RCX+8
> -       movq_cfi rax, RAX+8
> -       movq %r8, R8+8(%rsp)
> -       movq %r9, R9+8(%rsp)
> -       movq %r10, R10+8(%rsp)
> -       movq %r11, R11+8(%rsp)
> -       movq_cfi rbx, RBX+8
> -       movq %rbp, RBP+8(%rsp)
> -       movq %r12, R12+8(%rsp)
> -       movq %r13, R13+8(%rsp)
> -       movq %r14, R14+8(%rsp)
> -       movq %r15, R15+8(%rsp)
> +       SAVE_C_REGS 8
> +       SAVE_EXTRA_REGS 8
>         movl $1,%ebx
>         movl $MSR_GS_BASE,%ecx
>         rdmsr
> @@ -264,7 +238,7 @@ ENTRY(ret_from_fork)
>
>         GET_THREAD_INFO(%rcx)
>
> -       RESTORE_REST
> +       RESTORE_EXTRA_REGS
>
>         testl $3, CS-ARGOFFSET(%rsp)            # from kernel_thread?
>         jz   1f
> @@ -276,12 +250,10 @@ ENTRY(ret_from_fork)
>         jmp ret_from_sys_call                   # go to the SYSRET fastpath
>
>  1:
> -       subq $REST_SKIP, %rsp   # leave space for volatiles
> -       CFI_ADJUST_CFA_OFFSET   REST_SKIP
>         movq %rbp, %rdi
>         call *%rbx
>         movl $0, RAX(%rsp)
> -       RESTORE_REST
> +       RESTORE_EXTRA_REGS
>         jmp int_ret_from_sys_call
>         CFI_ENDPROC
>  END(ret_from_fork)
> @@ -339,9 +311,11 @@ GLOBAL(system_call_after_swapgs)
>          * and short:
>          */
>         ENABLE_INTERRUPTS(CLBR_NONE)
> -       SAVE_ARGS 8, 0, rax_enosys=1
> +       ALLOC_PT_GPREGS_ON_STACK 8
> +       SAVE_C_REGS_EXCEPT_RAX_RCX
> +       movq    $-ENOSYS,RAX-ARGOFFSET(%rsp)
>         movq_cfi rax,(ORIG_RAX-ARGOFFSET)
> -       movq  %rcx,RIP-ARGOFFSET(%rsp)
> +       movq    %rcx,RIP-ARGOFFSET(%rsp)
>         CFI_REL_OFFSET rip,RIP-ARGOFFSET
>         testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
>         jnz tracesys
> @@ -372,9 +346,9 @@ ret_from_sys_call:
>          * sysretq will re-enable interrupts:
>          */
>         TRACE_IRQS_ON
> +       RESTORE_C_REGS_EXCEPT_RCX
>         movq RIP-ARGOFFSET(%rsp),%rcx
>         CFI_REGISTER    rip,rcx
> -       RESTORE_ARGS 1,-ARG_SKIP,0
>         /*CFI_REGISTER  rflags,r11*/
>         movq    PER_CPU_VAR(old_rsp), %rsp
>         USERGS_SYSRET64
> @@ -387,16 +361,16 @@ int_ret_from_sys_call_fixup:
>
>         /* Do syscall tracing */
>  tracesys:
> -       leaq -REST_SKIP(%rsp), %rdi
> +       movq %rsp, %rdi
>         movq $AUDIT_ARCH_X86_64, %rsi
>         call syscall_trace_enter_phase1
>         test %rax, %rax
>         jnz tracesys_phase2             /* if needed, run the slow path */
> -       LOAD_ARGS 0                     /* else restore clobbered regs */
> +       RESTORE_C_REGS                  /* else restore clobbered regs */
>         jmp system_call_fastpath        /*      and return to the fast path */
>
>  tracesys_phase2:
> -       SAVE_REST
> +       SAVE_EXTRA_REGS
>         FIXUP_TOP_OF_STACK %rdi
>         movq %rsp, %rdi
>         movq $AUDIT_ARCH_X86_64, %rsi
> @@ -408,8 +382,8 @@ tracesys_phase2:
>          * We don't reload %rax because syscall_trace_entry_phase2() returned
>          * the value it wants us to use in the table lookup.
>          */
> -       LOAD_ARGS ARGOFFSET, 1
> -       RESTORE_REST
> +       RESTORE_C_REGS_EXCEPT_RAX
> +       RESTORE_EXTRA_REGS
>  #if __SYSCALL_MASK == ~0
>         cmpq $__NR_syscall_max,%rax
>  #else
> @@ -460,7 +434,7 @@ int_very_careful:
>         TRACE_IRQS_ON
>         ENABLE_INTERRUPTS(CLBR_NONE)
>  int_check_syscall_exit_work:
> -       SAVE_REST
> +       SAVE_EXTRA_REGS
>         /* Check for syscall exit trace */
>         testl $_TIF_WORK_SYSCALL_EXIT,%edx
>         jz int_signal
> @@ -479,7 +453,7 @@ int_signal:
>         call do_notify_resume
>  1:     movl $_TIF_WORK_MASK,%edi
>  int_restore_rest:
> -       RESTORE_REST
> +       RESTORE_EXTRA_REGS
>         DISABLE_INTERRUPTS(CLBR_NONE)
>         TRACE_IRQS_OFF
>         jmp int_with_check
> @@ -489,15 +463,12 @@ END(system_call)
>         .macro FORK_LIKE func
>  ENTRY(stub_\func)
>         CFI_STARTPROC
> -       popq    %r11                    /* save return address */
> -       PARTIAL_FRAME 0
> -       SAVE_REST
> -       pushq   %r11                    /* put it back on stack */
> +       DEFAULT_FRAME 0, 8              /* offset 8: return address */
> +       SAVE_EXTRA_REGS 8
>         FIXUP_TOP_OF_STACK %r11, 8
> -       DEFAULT_FRAME 0 8               /* offset 8: return address */
>         call sys_\func
>         RESTORE_TOP_OF_STACK %r11, 8
> -       ret $REST_SKIP          /* pop extended registers */
> +       ret
>         CFI_ENDPROC
>  END(stub_\func)
>         .endm
> @@ -505,7 +476,7 @@ END(stub_\func)
>         .macro FIXED_FRAME label,func
>  ENTRY(\label)
>         CFI_STARTPROC
> -       PARTIAL_FRAME 0 8               /* offset 8: return address */
> +       DEFAULT_FRAME 0, 8              /* offset 8: return address */
>         FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET
>         call \func
>         RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET
> @@ -522,12 +493,12 @@ END(\label)
>  ENTRY(stub_execve)
>         CFI_STARTPROC
>         addq $8, %rsp
> -       PARTIAL_FRAME 0
> -       SAVE_REST
> +       DEFAULT_FRAME 0
> +       SAVE_EXTRA_REGS
>         FIXUP_TOP_OF_STACK %r11
>         call sys_execve
>         movq %rax,RAX(%rsp)
> -       RESTORE_REST
> +       RESTORE_EXTRA_REGS
>         jmp int_ret_from_sys_call
>         CFI_ENDPROC
>  END(stub_execve)
> @@ -535,13 +506,13 @@ END(stub_execve)
>  ENTRY(stub_execveat)
>         CFI_STARTPROC
>         addq $8, %rsp
> -       PARTIAL_FRAME 0
> -       SAVE_REST
> +       DEFAULT_FRAME 0
> +       SAVE_EXTRA_REGS
>         FIXUP_TOP_OF_STACK %r11
>         call sys_execveat
>         RESTORE_TOP_OF_STACK %r11
>         movq %rax,RAX(%rsp)
> -       RESTORE_REST
> +       RESTORE_EXTRA_REGS
>         jmp int_ret_from_sys_call
>         CFI_ENDPROC
>  END(stub_execveat)
> @@ -553,12 +524,12 @@ END(stub_execveat)
>  ENTRY(stub_rt_sigreturn)
>         CFI_STARTPROC
>         addq $8, %rsp
> -       PARTIAL_FRAME 0
> -       SAVE_REST
> +       DEFAULT_FRAME 0
> +       SAVE_EXTRA_REGS
>         FIXUP_TOP_OF_STACK %r11
>         call sys_rt_sigreturn
>         movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
> -       RESTORE_REST
> +       RESTORE_EXTRA_REGS
>         jmp int_ret_from_sys_call
>         CFI_ENDPROC
>  END(stub_rt_sigreturn)
> @@ -567,12 +538,12 @@ END(stub_rt_sigreturn)
>  ENTRY(stub_x32_rt_sigreturn)
>         CFI_STARTPROC
>         addq $8, %rsp
> -       PARTIAL_FRAME 0
> -       SAVE_REST
> +       DEFAULT_FRAME 0
> +       SAVE_EXTRA_REGS
>         FIXUP_TOP_OF_STACK %r11
>         call sys32_x32_rt_sigreturn
>         movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
> -       RESTORE_REST
> +       RESTORE_EXTRA_REGS
>         jmp int_ret_from_sys_call
>         CFI_ENDPROC
>  END(stub_x32_rt_sigreturn)
> @@ -580,13 +551,13 @@ END(stub_x32_rt_sigreturn)
>  ENTRY(stub_x32_execve)
>         CFI_STARTPROC
>         addq $8, %rsp
> -       PARTIAL_FRAME 0
> -       SAVE_REST
> +       DEFAULT_FRAME 0
> +       SAVE_EXTRA_REGS
>         FIXUP_TOP_OF_STACK %r11
>         call compat_sys_execve
>         RESTORE_TOP_OF_STACK %r11
>         movq %rax,RAX(%rsp)
> -       RESTORE_REST
> +       RESTORE_EXTRA_REGS
>         jmp int_ret_from_sys_call
>         CFI_ENDPROC
>  END(stub_x32_execve)
> @@ -594,13 +565,13 @@ END(stub_x32_execve)
>  ENTRY(stub_x32_execveat)
>         CFI_STARTPROC
>         addq $8, %rsp
> -       PARTIAL_FRAME 0
> -       SAVE_REST
> +       DEFAULT_FRAME 0
> +       SAVE_EXTRA_REGS
>         FIXUP_TOP_OF_STACK %r11
>         call compat_sys_execveat
>         RESTORE_TOP_OF_STACK %r11
>         movq %rax,RAX(%rsp)
> -       RESTORE_REST
> +       RESTORE_EXTRA_REGS
>         jmp int_ret_from_sys_call
>         CFI_ENDPROC
>  END(stub_x32_execveat)
> @@ -656,42 +627,28 @@ END(interrupt)
>
>  /* 0(%rsp): ~(interrupt number) */
>         .macro interrupt func
> -       /* reserve pt_regs for scratch regs and rbp */
> -       subq $ORIG_RAX-RBP, %rsp
> -       CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
>         cld
> -       /* start from rbp in pt_regs and jump over */
> -       movq_cfi rdi, (RDI-RBP)
> -       movq_cfi rsi, (RSI-RBP)
> -       movq_cfi rdx, (RDX-RBP)
> -       movq_cfi rcx, (RCX-RBP)
> -       movq_cfi rax, (RAX-RBP)
> -       movq_cfi  r8,  (R8-RBP)
> -       movq_cfi  r9,  (R9-RBP)
> -       movq_cfi r10, (R10-RBP)
> -       movq_cfi r11, (R11-RBP)
> -
> -       /* Save rbp so that we can unwind from get_irq_regs() */
> -       movq_cfi rbp, 0
> -
> -       /* Save previous stack value */
> -       movq %rsp, %rsi
> +       ALLOC_PT_GPREGS_ON_STACK -RBP
> +       SAVE_C_REGS -RBP
> +       /* this goes to 0(%rsp) for unwinder, not for saving the value: */
> +       SAVE_EXTRA_REGS_RBP -RBP
> +
> +       leaq -RBP(%rsp),%rdi    /* arg1 for \func (pointer to pt_regs) */
>
> -       leaq -RBP(%rsp),%rdi    /* arg1 for handler */
> -       testl $3, CS-RBP(%rsi)
> +       testl $3, CS-RBP(%rsp)
>         je 1f
>         SWAPGS
> +1:
>         /*
>          * irq_count is used to check if a CPU is already on an interrupt stack
>          * or not. While this is essentially redundant with preempt_count it is
>          * a little cheaper to use a separate counter in the PDA (short of
>          * moving irq_enter into assembly, which would be too much work)
>          */
> -1:     incl PER_CPU_VAR(irq_count)
> +       movq %rsp, %rsi
> +       incl PER_CPU_VAR(irq_count)
>         cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
>         CFI_DEF_CFA_REGISTER    rsi
> -
> -       /* Store previous stack value */
>         pushq %rsi
>         CFI_ESCAPE      0x0f /* DW_CFA_def_cfa_expression */, 6, \
>                         0x77 /* DW_OP_breg7 */, 0, \
> @@ -800,7 +757,8 @@ retint_swapgs:              /* return to user-space */
>          */
>  irq_return_via_sysret:
>         CFI_REMEMBER_STATE
> -       RESTORE_ARGS 1,8,1
> +       RESTORE_C_REGS
> +       REMOVE_PT_GPREGS_FROM_STACK 8
>         movq (RSP-RIP)(%rsp),%rsp
>         USERGS_SYSRET64
>         CFI_RESTORE_STATE
> @@ -816,7 +774,8 @@ retint_restore_args:        /* return to kernel space */
>          */
>         TRACE_IRQS_IRETQ
>  restore_args:
> -       RESTORE_ARGS 1,8,1
> +       RESTORE_C_REGS
> +       REMOVE_PT_GPREGS_FROM_STACK 8
>
>  irq_return:
>         INTERRUPT_RETURN
> @@ -887,12 +846,12 @@ retint_signal:
>         jz    retint_swapgs
>         TRACE_IRQS_ON
>         ENABLE_INTERRUPTS(CLBR_NONE)
> -       SAVE_REST
> +       SAVE_EXTRA_REGS
>         movq $-1,ORIG_RAX(%rsp)
>         xorl %esi,%esi          # oldset
>         movq %rsp,%rdi          # &pt_regs
>         call do_notify_resume
> -       RESTORE_REST
> +       RESTORE_EXTRA_REGS
>         DISABLE_INTERRUPTS(CLBR_NONE)
>         TRACE_IRQS_OFF
>         GET_THREAD_INFO(%rcx)
> @@ -1019,8 +978,7 @@ ENTRY(\sym)
>         pushq_cfi $-1                   /* ORIG_RAX: no syscall to restart */
>         .endif
>
> -       subq $ORIG_RAX-R15, %rsp
> -       CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
> +       ALLOC_PT_GPREGS_ON_STACK
>
>         .if \paranoid
>         .if \paranoid == 1
> @@ -1266,7 +1224,9 @@ ENTRY(xen_failsafe_callback)
>         addq $0x30,%rsp
>         CFI_ADJUST_CFA_OFFSET -0x30
>         pushq_cfi $-1 /* orig_ax = -1 => not a system call */
> -       SAVE_ALL
> +       ALLOC_PT_GPREGS_ON_STACK
> +       SAVE_C_REGS
> +       SAVE_EXTRA_REGS
>         jmp error_exit
>         CFI_ENDPROC
>  END(xen_failsafe_callback)
> @@ -1318,11 +1278,15 @@ ENTRY(paranoid_exit)
>         jnz paranoid_restore
>         TRACE_IRQS_IRETQ 0
>         SWAPGS_UNSAFE_STACK
> -       RESTORE_ALL 8
> +       RESTORE_EXTRA_REGS
> +       RESTORE_C_REGS
> +       REMOVE_PT_GPREGS_FROM_STACK 8
>         INTERRUPT_RETURN
>  paranoid_restore:
>         TRACE_IRQS_IRETQ_DEBUG 0
> -       RESTORE_ALL 8
> +       RESTORE_EXTRA_REGS
> +       RESTORE_C_REGS
> +       REMOVE_PT_GPREGS_FROM_STACK 8
>         INTERRUPT_RETURN
>         CFI_ENDPROC
>  END(paranoid_exit)
> @@ -1336,21 +1300,8 @@ ENTRY(error_entry)
>         CFI_ADJUST_CFA_OFFSET 15*8
>         /* oldrax contains error code */
>         cld
> -       movq %rdi, RDI+8(%rsp)
> -       movq %rsi, RSI+8(%rsp)
> -       movq %rdx, RDX+8(%rsp)
> -       movq %rcx, RCX+8(%rsp)
> -       movq %rax, RAX+8(%rsp)
> -       movq  %r8,  R8+8(%rsp)
> -       movq  %r9,  R9+8(%rsp)
> -       movq %r10, R10+8(%rsp)
> -       movq %r11, R11+8(%rsp)
> -       movq_cfi rbx, RBX+8
> -       movq %rbp, RBP+8(%rsp)
> -       movq %r12, R12+8(%rsp)
> -       movq %r13, R13+8(%rsp)
> -       movq %r14, R14+8(%rsp)
> -       movq %r15, R15+8(%rsp)
> +       SAVE_C_REGS 8
> +       SAVE_EXTRA_REGS 8
>         xorl %ebx,%ebx
>         testl $3,CS+8(%rsp)
>         je error_kernelspace
> @@ -1399,7 +1350,7 @@ END(error_entry)
>  ENTRY(error_exit)
>         DEFAULT_FRAME
>         movl %ebx,%eax
> -       RESTORE_REST
> +       RESTORE_EXTRA_REGS
>         DISABLE_INTERRUPTS(CLBR_NONE)
>         TRACE_IRQS_OFF
>         GET_THREAD_INFO(%rcx)
> @@ -1618,8 +1569,8 @@ end_repeat_nmi:
>          * so that we repeat another NMI.
>          */
>         pushq_cfi $-1           /* ORIG_RAX: no syscall to restart */
> -       subq $ORIG_RAX-R15, %rsp
> -       CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
> +       ALLOC_PT_GPREGS_ON_STACK
> +
>         /*
>          * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
>          * as we should not be calling schedule in NMI context.
> @@ -1658,8 +1609,10 @@ end_repeat_nmi:
>  nmi_swapgs:
>         SWAPGS_UNSAFE_STACK
>  nmi_restore:
> +       RESTORE_EXTRA_REGS
> +       RESTORE_C_REGS
>         /* Pop the extra iret frame at once */
> -       RESTORE_ALL 6*8
> +       REMOVE_PT_GPREGS_FROM_STACK 6*8
>
>         /* Clear the NMI executing stack variable */
>         movq $0, 5*8(%rsp)
> --
> 1.8.1.4
>



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 3/3 v3] x86: entry_64.S: fix comments. No code changes
  2015-02-12 21:54 ` [PATCH 3/3 v3] x86: entry_64.S: fix comments. No code changes Denys Vlasenko
@ 2015-02-18 20:25   ` Andy Lutomirski
  0 siblings, 0 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-18 20:25 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Linus Torvalds, Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, linux-kernel

On Thu, Feb 12, 2015 at 1:54 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> Misleading and slightly wrong comments in "struct pt_regs" are fixed
> (four instances).
>
> Fix wrong comment atop EMPTY_FRAME macro.
>
> Explain in more details what we do with stack layout during hw interrupt.
>
> Correct comments about "partial stack frame" which are no longer true.

Applied.

--Andy

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 02/11] x86: code shrink in paranoid_exit
  2015-01-14 21:48 ` [PATCH 02/11] x86: code shrink in paranoid_exit Denys Vlasenko
  2015-02-11 20:36   ` Andy Lutomirski
@ 2015-02-18 23:26   ` Andy Lutomirski
  1 sibling, 0 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-18 23:26 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Linus Torvalds, Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, linux-kernel

On Wed, Jan 14, 2015 at 1:48 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> RESTORE_EXTRA_REGS + RESTORE_C_REGS looks small, but it's
> a lot of instructions (fourteen). Let's reuse them.
>
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Linus Torvalds <torvalds@linux-foundation.org>
> CC: Oleg Nesterov <oleg@redhat.com>
> CC: Borislav Petkov <bp@alien8.de>
> CC: "H. Peter Anvin" <hpa@zytor.com>
> CC: Andy Lutomirski <luto@amacapital.net>
> CC: Frederic Weisbecker <fweisbec@gmail.com>
> CC: X86 ML <x86@kernel.org>
> CC: Alexei Starovoitov <ast@plumgrid.com>
> CC: Will Drewry <wad@chromium.org>
> CC: Kees Cook <keescook@chromium.org>
> CC: linux-kernel@vger.kernel.org
> ---
>  arch/x86/kernel/entry_64.S | 6 ++----
>  1 file changed, 2 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index a21b5b3..f5e815e 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -1270,12 +1270,10 @@ ENTRY(paranoid_exit)
>         jnz paranoid_restore
>         TRACE_IRQS_IRETQ 0
>         SWAPGS_UNSAFE_STACK
> -       RESTORE_EXTRA_REGS
> -       RESTORE_C_REGS
> -       REMOVE_PT_GPREGS_FROM_STACK 8
> -       INTERRUPT_RETURN
> +       jmp paranoid_restore1
>  paranoid_restore:
>         TRACE_IRQS_IRETQ_DEBUG 0
> +paranoid_restore1:
>         RESTORE_EXTRA_REGS
>         RESTORE_C_REGS
>         REMOVE_PT_GPREGS_FROM_STACK 8
> --
> 1.8.1.4
>

Applied, but I changed the label names.

-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 03/11] x86: mass removal of ARGOFFSET
  2015-01-14 21:48 ` [PATCH 03/11] x86: mass removal of ARGOFFSET Denys Vlasenko
@ 2015-02-21  0:31   ` Andy Lutomirski
  2015-02-23 14:31     ` Denys Vlasenko
  0 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-21  0:31 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Linus Torvalds, Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, linux-kernel

On Wed, Jan 14, 2015 at 1:48 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> ARGOFFSET is zero now, removing it changes no code.
> A few macros lost "offset" parameter, since it is always zero now too.

Can you rebase this?  It doesn't apply because sysret_check is gone,
but, even if I remove the offending hunk, it's still missing things.

--Andy

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 03/11] x86: mass removal of ARGOFFSET
  2015-02-21  0:31   ` Andy Lutomirski
@ 2015-02-23 14:31     ` Denys Vlasenko
  2015-02-23 16:06       ` Andy Lutomirski
  0 siblings, 1 reply; 130+ messages in thread
From: Denys Vlasenko @ 2015-02-23 14:31 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Linus Torvalds, Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, linux-kernel

On 02/21/2015 01:31 AM, Andy Lutomirski wrote:
> On Wed, Jan 14, 2015 at 1:48 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
>> ARGOFFSET is zero now, removing it changes no code.
>> A few macros lost "offset" parameter, since it is always zero now too.
> 
> Can you rebase this?  It doesn't apply because sysret_check is gone,
> but, even if I remove the offending hunk, it's still missing things.

Sure thing.

I must be on a wrong branch? I'm on "x86/entry", and patch does apply
to the current tip of that branch:

$ git am 0001-x86-mass-removal-of-ARGOFFSET.patch
Applying: x86: mass removal of ARGOFFSET
$ git branch -a
  master
* x86/entry
  x86/entry-devel
  remotes/origin/HEAD -> origin/master
  remotes/origin/auditsc-lite
  remotes/origin/checkpatch

Should I base patches on x86/entry-devel ?

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 03/11] x86: mass removal of ARGOFFSET
  2015-02-23 14:31     ` Denys Vlasenko
@ 2015-02-23 16:06       ` Andy Lutomirski
  0 siblings, 0 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-23 16:06 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Linus Torvalds, Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, linux-kernel

On Mon, Feb 23, 2015 at 6:31 AM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> On 02/21/2015 01:31 AM, Andy Lutomirski wrote:
>> On Wed, Jan 14, 2015 at 1:48 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
>>> ARGOFFSET is zero now, removing it changes no code.
>>> A few macros lost "offset" parameter, since it is always zero now too.
>>
>> Can you rebase this?  It doesn't apply because sysret_check is gone,
>> but, even if I remove the offending hunk, it's still missing things.
>
> Sure thing.
>
> I must be on a wrong branch? I'm on "x86/entry", and patch does apply
> to the current tip of that branch:
>
> $ git am 0001-x86-mass-removal-of-ARGOFFSET.patch
> Applying: x86: mass removal of ARGOFFSET
> $ git branch -a
>   master
> * x86/entry
>   x86/entry-devel
>   remotes/origin/HEAD -> origin/master
>   remotes/origin/auditsc-lite
>   remotes/origin/checkpatch
>
> Should I base patches on x86/entry-devel ?

No, "entry" is correct.  entry-devel doesn't exist any more.  Had you
done 'git remote update'?

In any case, your rebased version is fine.

--Andy

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-12 21:54 ` [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs" Denys Vlasenko
  2015-02-18 20:22   ` Andy Lutomirski
@ 2015-02-25 12:37   ` Andrey Wagin
  2015-02-25 13:55     ` Denys Vlasenko
                       ` (2 more replies)
  1 sibling, 3 replies; 130+ messages in thread
From: Andrey Wagin @ 2015-02-25 12:37 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Andy Lutomirski, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, LKML

[-- Attachment #1: Type: text/plain, Size: 6278 bytes --]

2015-02-13 0:54 GMT+03:00 Denys Vlasenko <dvlasenk@redhat.com>:
> 64-bit code was using six stack slots less by not saving/restoring
> registers which are callee-preserved according to C ABI,
> and not allocating space for them.
> Only when syscall needed a complete "struct pt_regs",
> the complete area was allocated and filled in.
> As an additional twist, on interrupt entry a "slightly less truncated pt_regs"
> trick is used, to make nested interrupt stacks easier to unwind.
>
> This proved to be a source of significant obfuscation and subtle bugs.
> For example, stub_fork had to pop the return address,
> extend the struct, save registers, and push return address back. Ugly.
> ia32_ptregs_common pops return address and "returns" via jmp insn,
> throwing a wrench into CPU return stack cache.
>
> This patch changes code to always allocate a complete "struct pt_regs".
> The saving of registers is still done lazily.
>
> "Partial pt_regs" trick on interrupt stack is retained.
>
> Macros which manipulate "struct pt_regs" on stack are reworked:
> ALLOC_PT_GPREGS_ON_STACK allocates the structure.
> SAVE_C_REGS saves to it those registers which are clobbered by C code.
> SAVE_EXTRA_REGS saves to it all other registers.
> Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros reverse it.
>
> ia32_ptregs_common, stub_fork and friends lost their ugly dance with
> return pointer.
>
> LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
> instead of magic numbers.
>
> error_entry and save_paranoid now use SAVE_C_REGS + SAVE_EXTRA_REGS
> instead of having it open-coded yet again.
>
> Patch was run-tested: 64-bit executables, 32-bit executables,
> strace works.
> Timing tests did not show measurable difference in 32-bit
> and 64-bit syscalls.

Hello Denys,

My test vm doesn't boot with this patch. Could you help to investigate
this issue?

I have attached a kernel config and console log.

[    2.428124] systemd-journald[284]: Received request to flush
runtime journal from PID 1
[    2.508252] traps: systemd-cgroups[380] general protection
ip:7f68ad096028 sp:7fffba298af8 error:0 in
ld-2.18.so[7f68ad07e000+20000][  OK
[    2.600179] traps: systemd-cgroups[384] general protection
ip:7f11b9a9c028 sp:7fff4420f978 error:0 in
ld-2.18.so[7f11b9a84000+20000]
[    2.743790] traps: systemd-cgroups[392] general protection
ip:7f7f40a44028 sp:7fffe1c1b8b8 error:0 in
ld-2.18.so[7f7f40a2c000+20000]
[    2.754576] traps: systemd-cgroups[393] general protection
ip:7fd1314bd028 sp:7ffff76ecc88 error:0 in
ld-2.18.so[7fd1314a5000+20000]
[    2.765343] traps: systemd-cgroups[396] general protection
ip:7ff4537b7028 sp:7fff05902378 error:0 in
ld-2.18.so[7ff45379f000+20000]
[    2.798782] traps: systemd-cgroups[399] general protection
ip:7f4d5bc9c028 sp:7fff35cb3a48 error:0 in
ld-2.18.so[7f4d5bc84000+20000]
[    3.376298] Kernel panic - not syncing: Attempted to kill init!
exitcode=0x0000000b
[    3.376298]
[    3.377199] CPU: 2 PID: 1 Comm: systemd Not tainted 3.19.0+ #169
[    3.377199] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[    3.377199]  0000000000000000 00000000302f3f16 ffff88007c88bc48
ffffffff817b2f1a
[    3.377199]  0000000000000000 ffffffff81a348f8 ffff88007c88bcc8
ffffffff817b14d8
[    3.377199]  ffff880000000010 ffff88007c88bcd8 ffff88007c88bc78
00000000302f3f16
[    3.377199] Call Trace:
[    3.377199]  [<ffffffff817b2f1a>] dump_stack+0x45/0x57
[    3.377199]  [<ffffffff817b14d8>] panic+0xd5/0x20e
[    3.377199]  [<ffffffff8109c855>] do_exit+0xb15/0xb20
[    3.377199]  [<ffffffff8109c8fe>] do_group_exit+0x4e/0xc0
[    3.377199]  [<ffffffff810aa751>] get_signal+0x271/0x860
[    3.377199]  [<ffffffff81015547>] do_signal+0x37/0x760
[    3.377199]  [<ffffffff810cc850>] ? wake_up_state+0x20/0x20
[    3.377199]  [<ffffffff817bbb2c>] ? int_very_careful+0x5/0xd
[    3.377199]  [<ffffffff810ee08d>] ? trace_hardirqs_on_caller+0x13d/0x1e0
[    3.377199]  [<ffffffff81015cd0>] do_notify_resume+0x60/0x70
[    3.377199]  [<ffffffff817bbb7f>] int_signal+0x12/0x17
[    3.377199] Kernel Offset: 0x0 from 0xffffffff81000000 (relocation
range: 0xffffffff80000000-0xffffffff9fffffff)
[    3.377199] ---[ end Kernel panic - not syncing: Attempted to kill
init! exitcode=0x0000000b
[    3.377199]


[avagin@localhost linux-2.6]$ git bisect log
# bad: [549c45cea13c1b1d4557dec2e5e3f256615682f6] Add linux-next
specific files for 20150224
# good: [c517d838eb7d07bbe9507871fab3931deccff539] Linux 4.0-rc1
git bisect start 'next-20150224' 'v4.0-rc1'
# good: [bb37267e803b5d88eca99db1f501cb0410de60ff] Merge
remote-tracking branch 'integrity/next'
git bisect good bb37267e803b5d88eca99db1f501cb0410de60ff
# good: [e6dad2c669e2029ee71bd05e858f69e12906dbfc] ia64: use %*pb[l]
to print bitmaps including cpumasks and nodemasks
git bisect good e6dad2c669e2029ee71bd05e858f69e12906dbfc
# good: [e1a0636f6f12948e8e64afe107bda7cb189ef938] Merge
remote-tracking branch 'kselftest/next'
git bisect good e1a0636f6f12948e8e64afe107bda7cb189ef938
# good: [e39b37bdf3aeda8fed17aa7dff42a6fecfc4f262] fs/ufs/super.c: fix
potential race condition
git bisect good e39b37bdf3aeda8fed17aa7dff42a6fecfc4f262
# good: [97dc3f62c8f37795cdee3001e86c346dd7f7a879] scripts/gdb: add
internal helper and convenience function for per-cpu lookup
git bisect good 97dc3f62c8f37795cdee3001e86c346dd7f7a879
# bad: [a2dc0f333a3dd8eba791afc848623c0a708ea2e4] Merge
remote-tracking branch 'livepatching/for-next'
git bisect bad a2dc0f333a3dd8eba791afc848623c0a708ea2e4
# bad: [82bbadb13ef4b3e2217a2fe297be768caf473314] x86, entry: Remove
int_check_syscall_exit_work
git bisect bad 82bbadb13ef4b3e2217a2fe297be768caf473314
# bad: [0bc5dd63915de8bac63ef63f6e75c3fecd0838d2] x86: entry_64.S:
always allocate complete "struct pt_regs"
git bisect bad 0bc5dd63915de8bac63ef63f6e75c3fecd0838d2
# good: [2202eb90f175cf45d1b2d1c64dbb5676a8ad07ad] x86: introduce
push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE
git bisect good 2202eb90f175cf45d1b2d1c64dbb5676a8ad07ad
# good: [f5e1c4084319a42e5f14d41e2d638949ce66bc08] x86: entry_64.S:
fix wrong symbolic constant usage: R11->ARGOFFSET
git bisect good f5e1c4084319a42e5f14d41e2d638949ce66bc08
# first bad commit: [0bc5dd63915de8bac63ef63f6e75c3fecd0838d2] x86:
entry_64.S: always allocate complete "struct pt_regs"

[-- Attachment #2: .config --]
[-- Type: application/octet-stream, Size: 102677 bytes --]

#
# Automatically generated file; DO NOT EDIT.
# Linux/x86 3.19.0 Kernel Configuration
#
CONFIG_64BIT=y
CONFIG_X86_64=y
CONFIG_X86=y
CONFIG_INSTRUCTION_DECODER=y
CONFIG_PERF_EVENTS_INTEL_UNCORE=y
CONFIG_OUTPUT_FORMAT="elf64-x86-64"
CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_HAVE_LATENCYTOP_SUPPORT=y
CONFIG_MMU=y
CONFIG_NEED_DMA_MAP_STATE=y
CONFIG_NEED_SG_DMA_LENGTH=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_BUG=y
CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_ARCH_HAS_CPU_RELAX=y
CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
CONFIG_ARCH_HIBERNATION_POSSIBLE=y
CONFIG_ARCH_SUSPEND_POSSIBLE=y
CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
CONFIG_ZONE_DMA32=y
CONFIG_AUDIT_ARCH=y
CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
CONFIG_HAVE_INTEL_TXT=y
CONFIG_X86_64_SMP=y
CONFIG_X86_HT=y
CONFIG_ARCH_HWEIGHT_CFLAGS="-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11"
CONFIG_ARCH_SUPPORTS_UPROBES=y
CONFIG_FIX_EARLYCON_MEM=y
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
CONFIG_IRQ_WORK=y
CONFIG_BUILDTIME_EXTABLE_SORT=y

#
# General setup
#
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_CROSS_COMPILE=""
# CONFIG_COMPILE_TEST is not set
CONFIG_LOCALVERSION=""
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_HAVE_KERNEL_GZIP=y
CONFIG_HAVE_KERNEL_BZIP2=y
CONFIG_HAVE_KERNEL_LZMA=y
CONFIG_HAVE_KERNEL_XZ=y
CONFIG_HAVE_KERNEL_LZO=y
CONFIG_HAVE_KERNEL_LZ4=y
CONFIG_KERNEL_GZIP=y
# CONFIG_KERNEL_BZIP2 is not set
# CONFIG_KERNEL_LZMA is not set
# CONFIG_KERNEL_XZ is not set
# CONFIG_KERNEL_LZO is not set
# CONFIG_KERNEL_LZ4 is not set
CONFIG_DEFAULT_HOSTNAME="(none)"
CONFIG_SWAP=y
CONFIG_SYSVIPC=y
CONFIG_SYSVIPC_SYSCTL=y
CONFIG_POSIX_MQUEUE=y
CONFIG_POSIX_MQUEUE_SYSCTL=y
CONFIG_CROSS_MEMORY_ATTACH=y
CONFIG_FHANDLE=y
# CONFIG_USELIB is not set
CONFIG_AUDIT=y
CONFIG_HAVE_ARCH_AUDITSYSCALL=y
CONFIG_AUDITSYSCALL=y
CONFIG_AUDIT_WATCH=y
CONFIG_AUDIT_TREE=y

#
# IRQ subsystem
#
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_GENERIC_IRQ_SHOW=y
CONFIG_GENERIC_IRQ_LEGACY_ALLOC_HWIRQ=y
CONFIG_GENERIC_PENDING_IRQ=y
CONFIG_IRQ_DOMAIN=y
CONFIG_GENERIC_MSI_IRQ=y
# CONFIG_IRQ_DOMAIN_DEBUG is not set
CONFIG_IRQ_FORCED_THREADING=y
CONFIG_SPARSE_IRQ=y
CONFIG_CLOCKSOURCE_WATCHDOG=y
CONFIG_ARCH_CLOCKSOURCE_DATA=y
CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE=y
CONFIG_GENERIC_TIME_VSYSCALL=y
CONFIG_GENERIC_CLOCKEVENTS=y
CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y
CONFIG_GENERIC_CMOS_UPDATE=y

#
# Timers subsystem
#
CONFIG_TICK_ONESHOT=y
CONFIG_NO_HZ_COMMON=y
# CONFIG_HZ_PERIODIC is not set
CONFIG_NO_HZ_IDLE=y
# CONFIG_NO_HZ_FULL is not set
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y

#
# CPU/Task time and stats accounting
#
# CONFIG_TICK_CPU_ACCOUNTING is not set
# CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set
CONFIG_IRQ_TIME_ACCOUNTING=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y

#
# RCU Subsystem
#
CONFIG_TREE_RCU=y
CONFIG_SRCU=y
# CONFIG_TASKS_RCU is not set
CONFIG_RCU_STALL_COMMON=y
# CONFIG_RCU_USER_QS is not set
CONFIG_RCU_FANOUT=64
CONFIG_RCU_FANOUT_LEAF=16
# CONFIG_RCU_FANOUT_EXACT is not set
# CONFIG_RCU_FAST_NO_HZ is not set
# CONFIG_TREE_RCU_TRACE is not set
CONFIG_RCU_KTHREAD_PRIO=0
CONFIG_RCU_NOCB_CPU=y
# CONFIG_RCU_NOCB_CPU_NONE is not set
# CONFIG_RCU_NOCB_CPU_ZERO is not set
CONFIG_RCU_NOCB_CPU_ALL=y
CONFIG_BUILD_BIN2C=y
CONFIG_IKCONFIG=m
# CONFIG_IKCONFIG_PROC is not set
CONFIG_LOG_BUF_SHIFT=18
CONFIG_LOG_CPU_MAX_BUF_SHIFT=12
CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
CONFIG_ARCH_SUPPORTS_INT128=y
CONFIG_NUMA_BALANCING=y
CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
CONFIG_CGROUPS=y
# CONFIG_CGROUP_DEBUG is not set
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_PROC_PID_CPUSET=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_PAGE_COUNTER=y
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
CONFIG_MEMCG_SWAP_ENABLED=y
CONFIG_MEMCG_KMEM=y
CONFIG_CGROUP_HUGETLB=y
CONFIG_CGROUP_PERF=y
CONFIG_CGROUP_SCHED=y
CONFIG_FAIR_GROUP_SCHED=y
CONFIG_CFS_BANDWIDTH=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_BLK_CGROUP=y
# CONFIG_DEBUG_BLK_CGROUP is not set
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_NAMESPACES=y
CONFIG_UTS_NS=y
CONFIG_IPC_NS=y
CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_NET_NS=y
CONFIG_SCHED_AUTOGROUP=y
# CONFIG_SYSFS_DEPRECATED is not set
CONFIG_RELAY=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_INITRAMFS_SOURCE=""
CONFIG_RD_GZIP=y
CONFIG_RD_BZIP2=y
CONFIG_RD_LZMA=y
CONFIG_RD_XZ=y
CONFIG_RD_LZO=y
CONFIG_RD_LZ4=y
CONFIG_INIT_FALLBACK=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_SYSCTL=y
CONFIG_ANON_INODES=y
CONFIG_HAVE_UID16=y
CONFIG_SYSCTL_EXCEPTION_TRACE=y
CONFIG_HAVE_PCSPKR_PLATFORM=y
CONFIG_BPF=y
CONFIG_EXPERT=y
CONFIG_UID16=y
CONFIG_SGETMASK_SYSCALL=y
CONFIG_SYSFS_SYSCALL=y
# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
CONFIG_SIGNALFD=y
CONFIG_TIMERFD=y
CONFIG_EVENTFD=y
# CONFIG_BPF_SYSCALL is not set
CONFIG_SHMEM=y
CONFIG_AIO=y
CONFIG_ADVISE_SYSCALLS=y
CONFIG_PCI_QUIRKS=y
# CONFIG_EMBEDDED is not set
CONFIG_HAVE_PERF_EVENTS=y

#
# Kernel Performance Events And Counters
#
CONFIG_PERF_EVENTS=y
# CONFIG_DEBUG_PERF_USE_VMALLOC is not set
CONFIG_VM_EVENT_COUNTERS=y
CONFIG_SLUB_DEBUG=y
# CONFIG_COMPAT_BRK is not set
# CONFIG_SLAB is not set
CONFIG_SLUB=y
# CONFIG_SLOB is not set
CONFIG_SLUB_CPU_PARTIAL=y
CONFIG_SYSTEM_TRUSTED_KEYRING=y
CONFIG_PROFILING=y
CONFIG_TRACEPOINTS=y
# CONFIG_OPROFILE is not set
CONFIG_HAVE_OPROFILE=y
CONFIG_OPROFILE_NMI_TIMER=y
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
CONFIG_OPTPROBES=y
CONFIG_KPROBES_ON_FTRACE=y
CONFIG_UPROBES=y
# CONFIG_HAVE_64BIT_ALIGNED_ACCESS is not set
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
CONFIG_ARCH_USE_BUILTIN_BSWAP=y
CONFIG_KRETPROBES=y
CONFIG_HAVE_IOREMAP_PROT=y
CONFIG_HAVE_KPROBES=y
CONFIG_HAVE_KRETPROBES=y
CONFIG_HAVE_OPTPROBES=y
CONFIG_HAVE_KPROBES_ON_FTRACE=y
CONFIG_HAVE_ARCH_TRACEHOOK=y
CONFIG_HAVE_DMA_ATTRS=y
CONFIG_HAVE_DMA_CONTIGUOUS=y
CONFIG_GENERIC_SMP_IDLE_THREAD=y
CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y
CONFIG_HAVE_CLK=y
CONFIG_HAVE_DMA_API_DEBUG=y
CONFIG_HAVE_HW_BREAKPOINT=y
CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y
CONFIG_HAVE_USER_RETURN_NOTIFIER=y
CONFIG_HAVE_PERF_EVENTS_NMI=y
CONFIG_HAVE_PERF_REGS=y
CONFIG_HAVE_PERF_USER_STACK_DUMP=y
CONFIG_HAVE_ARCH_JUMP_LABEL=y
CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y
CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y
CONFIG_HAVE_CMPXCHG_LOCAL=y
CONFIG_HAVE_CMPXCHG_DOUBLE=y
CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y
CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y
CONFIG_HAVE_ARCH_SECCOMP_FILTER=y
CONFIG_SECCOMP_FILTER=y
CONFIG_HAVE_CC_STACKPROTECTOR=y
CONFIG_CC_STACKPROTECTOR=y
# CONFIG_CC_STACKPROTECTOR_NONE is not set
# CONFIG_CC_STACKPROTECTOR_REGULAR is not set
CONFIG_CC_STACKPROTECTOR_STRONG=y
CONFIG_HAVE_CONTEXT_TRACKING=y
CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y
CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y
CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y
CONFIG_HAVE_ARCH_SOFT_DIRTY=y
CONFIG_MODULES_USE_ELF_RELA=y
CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y
CONFIG_OLD_SIGSUSPEND3=y
CONFIG_COMPAT_OLD_SIGACTION=y

#
# GCOV-based kernel profiling
#
# CONFIG_GCOV_KERNEL is not set
CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y
# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
CONFIG_SLABINFO=y
CONFIG_RT_MUTEXES=y
CONFIG_BASE_SMALL=0
CONFIG_MODULES=y
# CONFIG_MODULE_FORCE_LOAD is not set
CONFIG_MODULE_UNLOAD=y
# CONFIG_MODULE_FORCE_UNLOAD is not set
# CONFIG_MODVERSIONS is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_MODULE_SIG=y
# CONFIG_MODULE_SIG_FORCE is not set
CONFIG_MODULE_SIG_ALL=y
# CONFIG_MODULE_SIG_SHA1 is not set
# CONFIG_MODULE_SIG_SHA224 is not set
CONFIG_MODULE_SIG_SHA256=y
# CONFIG_MODULE_SIG_SHA384 is not set
# CONFIG_MODULE_SIG_SHA512 is not set
CONFIG_MODULE_SIG_HASH="sha256"
# CONFIG_MODULE_COMPRESS is not set
CONFIG_STOP_MACHINE=y
CONFIG_BLOCK=y
CONFIG_BLK_DEV_BSG=y
CONFIG_BLK_DEV_BSGLIB=y
CONFIG_BLK_DEV_INTEGRITY=y
CONFIG_BLK_DEV_THROTTLING=y
# CONFIG_BLK_CMDLINE_PARSER is not set

#
# Partition Types
#
CONFIG_PARTITION_ADVANCED=y
# CONFIG_ACORN_PARTITION is not set
CONFIG_AIX_PARTITION=y
CONFIG_OSF_PARTITION=y
CONFIG_AMIGA_PARTITION=y
# CONFIG_ATARI_PARTITION is not set
CONFIG_MAC_PARTITION=y
CONFIG_MSDOS_PARTITION=y
CONFIG_BSD_DISKLABEL=y
CONFIG_MINIX_SUBPARTITION=y
CONFIG_SOLARIS_X86_PARTITION=y
CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_LDM_PARTITION=y
# CONFIG_LDM_DEBUG is not set
CONFIG_SGI_PARTITION=y
# CONFIG_ULTRIX_PARTITION is not set
CONFIG_SUN_PARTITION=y
CONFIG_KARMA_PARTITION=y
CONFIG_EFI_PARTITION=y
# CONFIG_SYSV68_PARTITION is not set
# CONFIG_CMDLINE_PARTITION is not set
CONFIG_BLOCK_COMPAT=y

#
# IO Schedulers
#
CONFIG_IOSCHED_NOOP=y
CONFIG_IOSCHED_DEADLINE=y
CONFIG_IOSCHED_CFQ=y
CONFIG_CFQ_GROUP_IOSCHED=y
# CONFIG_DEFAULT_DEADLINE is not set
CONFIG_DEFAULT_CFQ=y
# CONFIG_DEFAULT_NOOP is not set
CONFIG_DEFAULT_IOSCHED="cfq"
CONFIG_ASN1=y
CONFIG_UNINLINE_SPIN_UNLOCK=y
CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y
CONFIG_RWSEM_SPIN_ON_OWNER=y
CONFIG_LOCK_SPIN_ON_OWNER=y
CONFIG_ARCH_USE_QUEUE_RWLOCK=y
CONFIG_QUEUE_RWLOCK=y
CONFIG_FREEZER=y

#
# Processor type and features
#
CONFIG_ZONE_DMA=y
CONFIG_SMP=y
CONFIG_X86_FEATURE_NAMES=y
CONFIG_X86_X2APIC=y
CONFIG_X86_MPPARSE=y
CONFIG_X86_EXTENDED_PLATFORM=y
# CONFIG_X86_NUMACHIP is not set
# CONFIG_X86_VSMP is not set
CONFIG_X86_UV=y
# CONFIG_X86_GOLDFISH is not set
CONFIG_X86_INTEL_LPSS=y
CONFIG_IOSF_MBI=m
# CONFIG_IOSF_MBI_DEBUG is not set
CONFIG_X86_SUPPORTS_MEMORY_FAILURE=y
CONFIG_SCHED_OMIT_FRAME_POINTER=y
CONFIG_HYPERVISOR_GUEST=y
CONFIG_PARAVIRT=y
# CONFIG_PARAVIRT_DEBUG is not set
# CONFIG_PARAVIRT_SPINLOCKS is not set
CONFIG_XEN=y
CONFIG_XEN_DOM0=y
CONFIG_XEN_PVHVM=y
CONFIG_XEN_MAX_DOMAIN_MEMORY=500
CONFIG_XEN_SAVE_RESTORE=y
CONFIG_XEN_DEBUG_FS=y
# CONFIG_XEN_PVH is not set
CONFIG_KVM_GUEST=y
# CONFIG_KVM_DEBUG_FS is not set
CONFIG_PARAVIRT_TIME_ACCOUNTING=y
CONFIG_PARAVIRT_CLOCK=y
CONFIG_NO_BOOTMEM=y
# CONFIG_MEMTEST is not set
# CONFIG_MK8 is not set
# CONFIG_MPSC is not set
# CONFIG_MCORE2 is not set
# CONFIG_MATOM is not set
CONFIG_GENERIC_CPU=y
CONFIG_X86_INTERNODE_CACHE_SHIFT=6
CONFIG_X86_L1_CACHE_SHIFT=6
CONFIG_X86_TSC=y
CONFIG_X86_CMPXCHG64=y
CONFIG_X86_CMOV=y
CONFIG_X86_MINIMUM_CPU_FAMILY=64
CONFIG_X86_DEBUGCTLMSR=y
# CONFIG_PROCESSOR_SELECT is not set
CONFIG_CPU_SUP_INTEL=y
CONFIG_CPU_SUP_AMD=y
CONFIG_CPU_SUP_CENTAUR=y
CONFIG_HPET_TIMER=y
CONFIG_HPET_EMULATE_RTC=y
CONFIG_DMI=y
# CONFIG_GART_IOMMU is not set
# CONFIG_CALGARY_IOMMU is not set
CONFIG_SWIOTLB=y
CONFIG_IOMMU_HELPER=y
# CONFIG_MAXSMP is not set
CONFIG_NR_CPUS=8
CONFIG_SCHED_SMT=y
CONFIG_SCHED_MC=y
# CONFIG_PREEMPT_NONE is not set
CONFIG_PREEMPT_VOLUNTARY=y
# CONFIG_PREEMPT is not set
CONFIG_X86_UP_APIC_MSI=y
CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
CONFIG_X86_MCE=y
CONFIG_X86_MCE_INTEL=y
CONFIG_X86_MCE_AMD=y
CONFIG_X86_MCE_THRESHOLD=y
# CONFIG_X86_MCE_INJECT is not set
CONFIG_X86_THERMAL_VECTOR=y
CONFIG_X86_16BIT=y
CONFIG_X86_ESPFIX64=y
CONFIG_X86_VSYSCALL_EMULATION=y
# CONFIG_I8K is not set
CONFIG_MICROCODE=m
CONFIG_MICROCODE_INTEL=y
CONFIG_MICROCODE_AMD=y
CONFIG_MICROCODE_OLD_INTERFACE=y
# CONFIG_MICROCODE_INTEL_EARLY is not set
# CONFIG_MICROCODE_AMD_EARLY is not set
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y
CONFIG_ARCH_PHYS_ADDR_T_64BIT=y
CONFIG_ARCH_DMA_ADDR_T_64BIT=y
CONFIG_DIRECT_GBPAGES=y
CONFIG_NUMA=y
CONFIG_AMD_NUMA=y
CONFIG_X86_64_ACPI_NUMA=y
CONFIG_NODES_SPAN_OTHER_NODES=y
# CONFIG_NUMA_EMU is not set
CONFIG_NODES_SHIFT=9
CONFIG_ARCH_SPARSEMEM_ENABLE=y
CONFIG_ARCH_SPARSEMEM_DEFAULT=y
CONFIG_ARCH_SELECT_MEMORY_MODEL=y
# CONFIG_ARCH_MEMORY_PROBE is not set
CONFIG_ARCH_PROC_KCORE_TEXT=y
CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_SPARSEMEM_MANUAL=y
CONFIG_SPARSEMEM=y
CONFIG_NEED_MULTIPLE_NODES=y
CONFIG_HAVE_MEMORY_PRESENT=y
CONFIG_SPARSEMEM_EXTREME=y
CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER=y
CONFIG_SPARSEMEM_VMEMMAP=y
CONFIG_HAVE_MEMBLOCK=y
CONFIG_HAVE_MEMBLOCK_NODE_MAP=y
CONFIG_ARCH_DISCARD_MEMBLOCK=y
CONFIG_MEMORY_ISOLATION=y
# CONFIG_MOVABLE_NODE is not set
# CONFIG_HAVE_BOOTMEM_INFO_NODE is not set
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTPLUG_SPARSE=y
# CONFIG_MEMORY_HOTREMOVE is not set
CONFIG_PAGEFLAGS_EXTENDED=y
CONFIG_SPLIT_PTLOCK_CPUS=4
CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y
CONFIG_MEMORY_BALLOON=y
CONFIG_BALLOON_COMPACTION=y
CONFIG_COMPACTION=y
CONFIG_MIGRATION=y
CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y
CONFIG_PHYS_ADDR_T_64BIT=y
CONFIG_ZONE_DMA_FLAG=1
CONFIG_BOUNCE=y
CONFIG_VIRT_TO_BUS=y
CONFIG_KSM=y
CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y
CONFIG_MEMORY_FAILURE=y
# CONFIG_HWPOISON_INJECT is not set
CONFIG_TRANSPARENT_HUGEPAGE=y
# CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS is not set
CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y
CONFIG_CLEANCACHE=y
CONFIG_FRONTSWAP=y
# CONFIG_CMA is not set
CONFIG_MEM_SOFT_DIRTY=y
CONFIG_ZSWAP=y
CONFIG_ZPOOL=y
CONFIG_ZBUD=y
CONFIG_ZSMALLOC=y
# CONFIG_PGTABLE_MAPPING is not set
CONFIG_GENERIC_EARLY_IOREMAP=y
CONFIG_X86_CHECK_BIOS_CORRUPTION=y
# CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK is not set
CONFIG_X86_RESERVE_LOW=64
CONFIG_MTRR=y
CONFIG_MTRR_SANITIZER=y
CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=0
CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=1
CONFIG_X86_PAT=y
CONFIG_ARCH_USES_PG_UNCACHED=y
CONFIG_ARCH_RANDOM=y
CONFIG_X86_SMAP=y
CONFIG_X86_INTEL_MPX=y
CONFIG_EFI=y
CONFIG_EFI_STUB=y
# CONFIG_EFI_MIXED is not set
CONFIG_SECCOMP=y
# CONFIG_HZ_100 is not set
# CONFIG_HZ_250 is not set
# CONFIG_HZ_300 is not set
CONFIG_HZ_1000=y
CONFIG_HZ=1000
CONFIG_SCHED_HRTICK=y
CONFIG_KEXEC=y
# CONFIG_KEXEC_FILE is not set
CONFIG_CRASH_DUMP=y
CONFIG_KEXEC_JUMP=y
CONFIG_PHYSICAL_START=0x1000000
CONFIG_RELOCATABLE=y
# CONFIG_RANDOMIZE_BASE is not set
CONFIG_PHYSICAL_ALIGN=0x1000000
CONFIG_HOTPLUG_CPU=y
# CONFIG_BOOTPARAM_HOTPLUG_CPU0 is not set
# CONFIG_DEBUG_HOTPLUG_CPU0 is not set
# CONFIG_COMPAT_VDSO is not set
# CONFIG_CMDLINE_BOOL is not set
CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
CONFIG_USE_PERCPU_NUMA_NODE_ID=y

#
# Power management and ACPI options
#
CONFIG_ARCH_HIBERNATION_HEADER=y
CONFIG_SUSPEND=y
CONFIG_SUSPEND_FREEZER=y
CONFIG_HIBERNATE_CALLBACKS=y
CONFIG_HIBERNATION=y
CONFIG_PM_STD_PARTITION=""
CONFIG_PM_SLEEP=y
CONFIG_PM_SLEEP_SMP=y
# CONFIG_PM_AUTOSLEEP is not set
# CONFIG_PM_WAKELOCKS is not set
CONFIG_PM=y
CONFIG_PM_DEBUG=y
CONFIG_PM_ADVANCED_DEBUG=y
# CONFIG_PM_TEST_SUSPEND is not set
CONFIG_PM_SLEEP_DEBUG=y
# CONFIG_DPM_WATCHDOG is not set
CONFIG_PM_TRACE=y
CONFIG_PM_TRACE_RTC=y
CONFIG_PM_CLK=y
# CONFIG_WQ_POWER_EFFICIENT_DEFAULT is not set
CONFIG_ACPI=y
CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y
CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC=y
CONFIG_ACPI_SLEEP=y
# CONFIG_ACPI_PROCFS_POWER is not set
# CONFIG_ACPI_EC_DEBUGFS is not set
CONFIG_ACPI_AC=y
CONFIG_ACPI_BATTERY=y
CONFIG_ACPI_BUTTON=y
# CONFIG_ACPI_VIDEO is not set
CONFIG_ACPI_FAN=y
CONFIG_ACPI_DOCK=y
CONFIG_ACPI_PROCESSOR=y
CONFIG_ACPI_HOTPLUG_CPU=y
# CONFIG_ACPI_PROCESSOR_AGGREGATOR is not set
CONFIG_ACPI_THERMAL=y
CONFIG_ACPI_NUMA=y
# CONFIG_ACPI_CUSTOM_DSDT is not set
CONFIG_ACPI_INITRD_TABLE_OVERRIDE=y
# CONFIG_ACPI_DEBUG is not set
CONFIG_ACPI_PCI_SLOT=y
CONFIG_X86_PM_TIMER=y
CONFIG_ACPI_CONTAINER=y
CONFIG_ACPI_HOTPLUG_MEMORY=y
# CONFIG_ACPI_SBS is not set
CONFIG_ACPI_HED=y
# CONFIG_ACPI_CUSTOM_METHOD is not set
CONFIG_ACPI_BGRT=y
# CONFIG_ACPI_REDUCED_HARDWARE_ONLY is not set
CONFIG_HAVE_ACPI_APEI=y
CONFIG_HAVE_ACPI_APEI_NMI=y
CONFIG_ACPI_APEI=y
CONFIG_ACPI_APEI_GHES=y
CONFIG_ACPI_APEI_PCIEAER=y
CONFIG_ACPI_APEI_MEMORY_FAILURE=y
# CONFIG_ACPI_APEI_EINJ is not set
# CONFIG_ACPI_APEI_ERST_DEBUG is not set
# CONFIG_ACPI_EXTLOG is not set
# CONFIG_PMIC_OPREGION is not set
CONFIG_SFI=y

#
# CPU Frequency scaling
#
CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_GOV_COMMON=y
# CONFIG_CPU_FREQ_STAT is not set
# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set
# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y

#
# CPU frequency scaling drivers
#
CONFIG_X86_INTEL_PSTATE=y
# CONFIG_X86_PCC_CPUFREQ is not set
# CONFIG_X86_ACPI_CPUFREQ is not set
# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
# CONFIG_X86_P4_CLOCKMOD is not set

#
# shared options
#
# CONFIG_X86_SPEEDSTEP_LIB is not set

#
# CPU Idle
#
CONFIG_CPU_IDLE=y
# CONFIG_CPU_IDLE_GOV_LADDER is not set
CONFIG_CPU_IDLE_GOV_MENU=y
# CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED is not set
CONFIG_INTEL_IDLE=y

#
# Memory power savings
#
# CONFIG_I7300_IDLE is not set

#
# Bus options (PCI etc.)
#
CONFIG_PCI=y
CONFIG_PCI_DIRECT=y
CONFIG_PCI_MMCONFIG=y
CONFIG_PCI_XEN=y
CONFIG_PCI_DOMAINS=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
CONFIG_PCIEPORTBUS=y
CONFIG_HOTPLUG_PCI_PCIE=y
CONFIG_PCIEAER=y
CONFIG_PCIE_ECRC=y
# CONFIG_PCIEAER_INJECT is not set
CONFIG_PCIEASPM=y
# CONFIG_PCIEASPM_DEBUG is not set
CONFIG_PCIEASPM_DEFAULT=y
# CONFIG_PCIEASPM_POWERSAVE is not set
# CONFIG_PCIEASPM_PERFORMANCE is not set
CONFIG_PCIE_PME=y
CONFIG_PCI_MSI=y
# CONFIG_PCI_DEBUG is not set
# CONFIG_PCI_REALLOC_ENABLE_AUTO is not set
CONFIG_PCI_STUB=y
# CONFIG_XEN_PCIDEV_FRONTEND is not set
CONFIG_HT_IRQ=y
CONFIG_PCI_ATS=y
CONFIG_PCI_IOV=y
CONFIG_PCI_PRI=y
CONFIG_PCI_PASID=y
CONFIG_PCI_LABEL=y

#
# PCI host controller drivers
#
CONFIG_ISA_DMA_API=y
CONFIG_AMD_NB=y
CONFIG_PCCARD=y
CONFIG_PCMCIA=y
CONFIG_PCMCIA_LOAD_CIS=y
CONFIG_CARDBUS=y

#
# PC-card bridges
#
# CONFIG_YENTA is not set
# CONFIG_PD6729 is not set
# CONFIG_I82092 is not set
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_ACPI=y
# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set
# CONFIG_HOTPLUG_PCI_CPCI is not set
# CONFIG_HOTPLUG_PCI_SHPC is not set
# CONFIG_RAPIDIO is not set
# CONFIG_X86_SYSFB is not set

#
# Executable file formats / Emulations
#
CONFIG_BINFMT_ELF=y
CONFIG_COMPAT_BINFMT_ELF=y
CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE=y
CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_BINFMT_SCRIPT=y
# CONFIG_HAVE_AOUT is not set
CONFIG_BINFMT_MISC=m
CONFIG_COREDUMP=y
CONFIG_IA32_EMULATION=y
# CONFIG_IA32_AOUT is not set
# CONFIG_X86_X32 is not set
CONFIG_COMPAT=y
CONFIG_COMPAT_FOR_U64_ALIGNMENT=y
CONFIG_SYSVIPC_COMPAT=y
CONFIG_KEYS_COMPAT=y
CONFIG_X86_DEV_DMA_OPS=y
CONFIG_PMC_ATOM=y
CONFIG_NET=y

#
# Networking options
#
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM=y
CONFIG_XFRM_ALGO=y
CONFIG_XFRM_USER=y
CONFIG_XFRM_SUB_POLICY=y
CONFIG_XFRM_MIGRATE=y
CONFIG_XFRM_STATISTICS=y
# CONFIG_NET_KEY is not set
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_FIB_TRIE_STATS=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_ROUTE_MULTIPATH=y
CONFIG_IP_ROUTE_VERBOSE=y
# CONFIG_IP_PNP is not set
# CONFIG_NET_IPIP is not set
# CONFIG_NET_IPGRE_DEMUX is not set
# CONFIG_NET_IP_TUNNEL is not set
CONFIG_IP_MROUTE=y
CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
CONFIG_SYN_COOKIES=y
# CONFIG_NET_UDP_TUNNEL is not set
# CONFIG_NET_FOU is not set
# CONFIG_GENEVE is not set
# CONFIG_INET_AH is not set
# CONFIG_INET_ESP is not set
# CONFIG_INET_IPCOMP is not set
# CONFIG_INET_XFRM_TUNNEL is not set
# CONFIG_INET_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
CONFIG_INET_LRO=y
CONFIG_INET_DIAG=m
CONFIG_INET_TCP_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_TCP_CONG_ADVANCED=y
# CONFIG_TCP_CONG_BIC is not set
CONFIG_TCP_CONG_CUBIC=y
# CONFIG_TCP_CONG_WESTWOOD is not set
# CONFIG_TCP_CONG_HTCP is not set
# CONFIG_TCP_CONG_HSTCP is not set
# CONFIG_TCP_CONG_HYBLA is not set
# CONFIG_TCP_CONG_VEGAS is not set
# CONFIG_TCP_CONG_SCALABLE is not set
# CONFIG_TCP_CONG_LP is not set
# CONFIG_TCP_CONG_VENO is not set
# CONFIG_TCP_CONG_YEAH is not set
# CONFIG_TCP_CONG_ILLINOIS is not set
# CONFIG_TCP_CONG_DCTCP is not set
CONFIG_DEFAULT_CUBIC=y
# CONFIG_DEFAULT_RENO is not set
CONFIG_DEFAULT_TCP_CONG="cubic"
CONFIG_TCP_MD5SIG=y
CONFIG_IPV6=y
CONFIG_IPV6_ROUTER_PREF=y
CONFIG_IPV6_ROUTE_INFO=y
CONFIG_IPV6_OPTIMISTIC_DAD=y
# CONFIG_INET6_AH is not set
# CONFIG_INET6_ESP is not set
# CONFIG_INET6_IPCOMP is not set
CONFIG_IPV6_MIP6=y
# CONFIG_INET6_XFRM_TUNNEL is not set
# CONFIG_INET6_TUNNEL is not set
# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET6_XFRM_MODE_TUNNEL is not set
# CONFIG_INET6_XFRM_MODE_BEET is not set
# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
# CONFIG_IPV6_SIT is not set
# CONFIG_IPV6_TUNNEL is not set
# CONFIG_IPV6_GRE is not set
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IPV6_SUBTREES=y
CONFIG_IPV6_MROUTE=y
CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
CONFIG_IPV6_PIMSM_V2=y
CONFIG_NETLABEL=y
CONFIG_NETWORK_SECMARK=y
CONFIG_NET_PTP_CLASSIFY=y
CONFIG_NETWORK_PHY_TIMESTAMPING=y
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_DEBUG is not set
CONFIG_NETFILTER_ADVANCED=y
CONFIG_BRIDGE_NETFILTER=m

#
# Core Netfilter Configuration
#
# CONFIG_NETFILTER_NETLINK_ACCT is not set
# CONFIG_NETFILTER_NETLINK_QUEUE is not set
# CONFIG_NETFILTER_NETLINK_LOG is not set
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_MARK=y
CONFIG_NF_CONNTRACK_SECMARK=y
CONFIG_NF_CONNTRACK_PROCFS=y
CONFIG_NF_CONNTRACK_EVENTS=y
# CONFIG_NF_CONNTRACK_TIMEOUT is not set
CONFIG_NF_CONNTRACK_TIMESTAMP=y
# CONFIG_NF_CT_PROTO_DCCP is not set
# CONFIG_NF_CT_PROTO_SCTP is not set
# CONFIG_NF_CT_PROTO_UDPLITE is not set
# CONFIG_NF_CONNTRACK_AMANDA is not set
# CONFIG_NF_CONNTRACK_FTP is not set
# CONFIG_NF_CONNTRACK_H323 is not set
# CONFIG_NF_CONNTRACK_IRC is not set
# CONFIG_NF_CONNTRACK_NETBIOS_NS is not set
# CONFIG_NF_CONNTRACK_SNMP is not set
# CONFIG_NF_CONNTRACK_PPTP is not set
# CONFIG_NF_CONNTRACK_SANE is not set
# CONFIG_NF_CONNTRACK_SIP is not set
# CONFIG_NF_CONNTRACK_TFTP is not set
# CONFIG_NF_CT_NETLINK is not set
# CONFIG_NF_CT_NETLINK_TIMEOUT is not set
CONFIG_NF_NAT=m
CONFIG_NF_NAT_NEEDED=y
# CONFIG_NF_NAT_AMANDA is not set
# CONFIG_NF_NAT_FTP is not set
# CONFIG_NF_NAT_IRC is not set
# CONFIG_NF_NAT_SIP is not set
# CONFIG_NF_NAT_TFTP is not set
# CONFIG_NF_NAT_REDIRECT is not set
# CONFIG_NF_TABLES is not set
CONFIG_NETFILTER_XTABLES=y

#
# Xtables combined modules
#
# CONFIG_NETFILTER_XT_MARK is not set
# CONFIG_NETFILTER_XT_CONNMARK is not set

#
# Xtables targets
#
# CONFIG_NETFILTER_XT_TARGET_AUDIT is not set
# CONFIG_NETFILTER_XT_TARGET_CHECKSUM is not set
# CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set
# CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set
# CONFIG_NETFILTER_XT_TARGET_CONNSECMARK is not set
# CONFIG_NETFILTER_XT_TARGET_CT is not set
# CONFIG_NETFILTER_XT_TARGET_DSCP is not set
# CONFIG_NETFILTER_XT_TARGET_HL is not set
# CONFIG_NETFILTER_XT_TARGET_HMARK is not set
# CONFIG_NETFILTER_XT_TARGET_IDLETIMER is not set
# CONFIG_NETFILTER_XT_TARGET_LED is not set
# CONFIG_NETFILTER_XT_TARGET_LOG is not set
# CONFIG_NETFILTER_XT_TARGET_MARK is not set
# CONFIG_NETFILTER_XT_NAT is not set
# CONFIG_NETFILTER_XT_TARGET_NETMAP is not set
# CONFIG_NETFILTER_XT_TARGET_NFLOG is not set
# CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set
# CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set
# CONFIG_NETFILTER_XT_TARGET_RATEEST is not set
# CONFIG_NETFILTER_XT_TARGET_REDIRECT is not set
# CONFIG_NETFILTER_XT_TARGET_TEE is not set
# CONFIG_NETFILTER_XT_TARGET_TPROXY is not set
# CONFIG_NETFILTER_XT_TARGET_TRACE is not set
# CONFIG_NETFILTER_XT_TARGET_SECMARK is not set
# CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set
# CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP is not set

#
# Xtables matches
#
# CONFIG_NETFILTER_XT_MATCH_ADDRTYPE is not set
# CONFIG_NETFILTER_XT_MATCH_BPF is not set
# CONFIG_NETFILTER_XT_MATCH_CGROUP is not set
# CONFIG_NETFILTER_XT_MATCH_CLUSTER is not set
# CONFIG_NETFILTER_XT_MATCH_COMMENT is not set
# CONFIG_NETFILTER_XT_MATCH_CONNBYTES is not set
# CONFIG_NETFILTER_XT_MATCH_CONNLABEL is not set
# CONFIG_NETFILTER_XT_MATCH_CONNLIMIT is not set
# CONFIG_NETFILTER_XT_MATCH_CONNMARK is not set
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
# CONFIG_NETFILTER_XT_MATCH_CPU is not set
# CONFIG_NETFILTER_XT_MATCH_DCCP is not set
# CONFIG_NETFILTER_XT_MATCH_DEVGROUP is not set
# CONFIG_NETFILTER_XT_MATCH_DSCP is not set
# CONFIG_NETFILTER_XT_MATCH_ECN is not set
# CONFIG_NETFILTER_XT_MATCH_ESP is not set
# CONFIG_NETFILTER_XT_MATCH_HASHLIMIT is not set
# CONFIG_NETFILTER_XT_MATCH_HELPER is not set
# CONFIG_NETFILTER_XT_MATCH_HL is not set
# CONFIG_NETFILTER_XT_MATCH_IPCOMP is not set
# CONFIG_NETFILTER_XT_MATCH_IPRANGE is not set
# CONFIG_NETFILTER_XT_MATCH_L2TP is not set
# CONFIG_NETFILTER_XT_MATCH_LENGTH is not set
# CONFIG_NETFILTER_XT_MATCH_LIMIT is not set
# CONFIG_NETFILTER_XT_MATCH_MAC is not set
# CONFIG_NETFILTER_XT_MATCH_MARK is not set
# CONFIG_NETFILTER_XT_MATCH_MULTIPORT is not set
# CONFIG_NETFILTER_XT_MATCH_NFACCT is not set
# CONFIG_NETFILTER_XT_MATCH_OWNER is not set
# CONFIG_NETFILTER_XT_MATCH_POLICY is not set
# CONFIG_NETFILTER_XT_MATCH_PHYSDEV is not set
# CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set
# CONFIG_NETFILTER_XT_MATCH_QUOTA is not set
# CONFIG_NETFILTER_XT_MATCH_RATEEST is not set
# CONFIG_NETFILTER_XT_MATCH_REALM is not set
# CONFIG_NETFILTER_XT_MATCH_RECENT is not set
# CONFIG_NETFILTER_XT_MATCH_SCTP is not set
# CONFIG_NETFILTER_XT_MATCH_SOCKET is not set
# CONFIG_NETFILTER_XT_MATCH_STATE is not set
# CONFIG_NETFILTER_XT_MATCH_STATISTIC is not set
# CONFIG_NETFILTER_XT_MATCH_STRING is not set
# CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set
# CONFIG_NETFILTER_XT_MATCH_TIME is not set
# CONFIG_NETFILTER_XT_MATCH_U32 is not set
# CONFIG_IP_SET is not set
# CONFIG_IP_VS is not set

#
# IP: Netfilter Configuration
#
CONFIG_NF_DEFRAG_IPV4=m
CONFIG_NF_CONNTRACK_IPV4=m
# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
# CONFIG_NF_LOG_ARP is not set
# CONFIG_NF_LOG_IPV4 is not set
CONFIG_NF_REJECT_IPV4=y
CONFIG_NF_NAT_IPV4=m
# CONFIG_NF_NAT_MASQUERADE_IPV4 is not set
# CONFIG_NF_NAT_PPTP is not set
# CONFIG_NF_NAT_H323 is not set
CONFIG_IP_NF_IPTABLES=y
# CONFIG_IP_NF_MATCH_AH is not set
# CONFIG_IP_NF_MATCH_ECN is not set
# CONFIG_IP_NF_MATCH_RPFILTER is not set
# CONFIG_IP_NF_MATCH_TTL is not set
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_TARGET_REJECT=y
# CONFIG_IP_NF_TARGET_SYNPROXY is not set
# CONFIG_IP_NF_NAT is not set
CONFIG_IP_NF_MANGLE=m
# CONFIG_IP_NF_TARGET_CLUSTERIP is not set
# CONFIG_IP_NF_TARGET_ECN is not set
# CONFIG_IP_NF_TARGET_TTL is not set
CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_SECURITY=m
# CONFIG_IP_NF_ARPTABLES is not set

#
# IPv6: Netfilter Configuration
#
CONFIG_NF_DEFRAG_IPV6=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_REJECT_IPV6=m
# CONFIG_NF_LOG_IPV6 is not set
CONFIG_NF_NAT_IPV6=m
# CONFIG_NF_NAT_MASQUERADE_IPV6 is not set
CONFIG_IP6_NF_IPTABLES=m
# CONFIG_IP6_NF_MATCH_AH is not set
# CONFIG_IP6_NF_MATCH_EUI64 is not set
# CONFIG_IP6_NF_MATCH_FRAG is not set
# CONFIG_IP6_NF_MATCH_OPTS is not set
# CONFIG_IP6_NF_MATCH_HL is not set
# CONFIG_IP6_NF_MATCH_IPV6HEADER is not set
# CONFIG_IP6_NF_MATCH_MH is not set
CONFIG_IP6_NF_MATCH_RPFILTER=m
# CONFIG_IP6_NF_MATCH_RT is not set
# CONFIG_IP6_NF_TARGET_HL is not set
CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_TARGET_REJECT=m
# CONFIG_IP6_NF_TARGET_SYNPROXY is not set
CONFIG_IP6_NF_MANGLE=m
CONFIG_IP6_NF_RAW=m
CONFIG_IP6_NF_SECURITY=m
# CONFIG_IP6_NF_NAT is not set
CONFIG_BRIDGE_NF_EBTABLES=m
CONFIG_BRIDGE_EBT_BROUTE=m
CONFIG_BRIDGE_EBT_T_FILTER=m
CONFIG_BRIDGE_EBT_T_NAT=m
# CONFIG_BRIDGE_EBT_802_3 is not set
# CONFIG_BRIDGE_EBT_AMONG is not set
# CONFIG_BRIDGE_EBT_ARP is not set
# CONFIG_BRIDGE_EBT_IP is not set
# CONFIG_BRIDGE_EBT_IP6 is not set
# CONFIG_BRIDGE_EBT_LIMIT is not set
# CONFIG_BRIDGE_EBT_MARK is not set
# CONFIG_BRIDGE_EBT_PKTTYPE is not set
# CONFIG_BRIDGE_EBT_STP is not set
# CONFIG_BRIDGE_EBT_VLAN is not set
# CONFIG_BRIDGE_EBT_ARPREPLY is not set
# CONFIG_BRIDGE_EBT_DNAT is not set
# CONFIG_BRIDGE_EBT_MARK_T is not set
# CONFIG_BRIDGE_EBT_REDIRECT is not set
# CONFIG_BRIDGE_EBT_SNAT is not set
# CONFIG_BRIDGE_EBT_LOG is not set
# CONFIG_BRIDGE_EBT_NFLOG is not set
# CONFIG_IP_DCCP is not set
# CONFIG_IP_SCTP is not set
# CONFIG_RDS is not set
# CONFIG_TIPC is not set
# CONFIG_ATM is not set
# CONFIG_L2TP is not set
CONFIG_STP=m
CONFIG_BRIDGE=m
CONFIG_BRIDGE_IGMP_SNOOPING=y
CONFIG_HAVE_NET_DSA=y
# CONFIG_VLAN_8021Q is not set
# CONFIG_DECNET is not set
CONFIG_LLC=m
# CONFIG_LLC2 is not set
# CONFIG_IPX is not set
# CONFIG_ATALK is not set
# CONFIG_X25 is not set
# CONFIG_LAPB is not set
# CONFIG_PHONET is not set
# CONFIG_6LOWPAN is not set
# CONFIG_IEEE802154 is not set
CONFIG_NET_SCHED=y

#
# Queueing/Scheduling
#
# CONFIG_NET_SCH_CBQ is not set
# CONFIG_NET_SCH_HTB is not set
# CONFIG_NET_SCH_HFSC is not set
# CONFIG_NET_SCH_PRIO is not set
# CONFIG_NET_SCH_MULTIQ is not set
# CONFIG_NET_SCH_RED is not set
# CONFIG_NET_SCH_SFB is not set
# CONFIG_NET_SCH_SFQ is not set
# CONFIG_NET_SCH_TEQL is not set
# CONFIG_NET_SCH_TBF is not set
# CONFIG_NET_SCH_GRED is not set
# CONFIG_NET_SCH_DSMARK is not set
# CONFIG_NET_SCH_NETEM is not set
# CONFIG_NET_SCH_DRR is not set
# CONFIG_NET_SCH_MQPRIO is not set
# CONFIG_NET_SCH_CHOKE is not set
# CONFIG_NET_SCH_QFQ is not set
# CONFIG_NET_SCH_CODEL is not set
# CONFIG_NET_SCH_FQ_CODEL is not set
# CONFIG_NET_SCH_FQ is not set
# CONFIG_NET_SCH_HHF is not set
# CONFIG_NET_SCH_PIE is not set
# CONFIG_NET_SCH_INGRESS is not set
# CONFIG_NET_SCH_PLUG is not set

#
# Classification
#
CONFIG_NET_CLS=y
# CONFIG_NET_CLS_BASIC is not set
# CONFIG_NET_CLS_TCINDEX is not set
# CONFIG_NET_CLS_ROUTE4 is not set
# CONFIG_NET_CLS_FW is not set
# CONFIG_NET_CLS_U32 is not set
# CONFIG_NET_CLS_RSVP is not set
# CONFIG_NET_CLS_RSVP6 is not set
# CONFIG_NET_CLS_FLOW is not set
CONFIG_NET_CLS_CGROUP=y
# CONFIG_NET_CLS_BPF is not set
CONFIG_NET_EMATCH=y
CONFIG_NET_EMATCH_STACK=32
# CONFIG_NET_EMATCH_CMP is not set
# CONFIG_NET_EMATCH_NBYTE is not set
# CONFIG_NET_EMATCH_U32 is not set
# CONFIG_NET_EMATCH_META is not set
# CONFIG_NET_EMATCH_TEXT is not set
CONFIG_NET_CLS_ACT=y
# CONFIG_NET_ACT_POLICE is not set
# CONFIG_NET_ACT_GACT is not set
# CONFIG_NET_ACT_MIRRED is not set
# CONFIG_NET_ACT_IPT is not set
# CONFIG_NET_ACT_NAT is not set
# CONFIG_NET_ACT_PEDIT is not set
# CONFIG_NET_ACT_SIMP is not set
# CONFIG_NET_ACT_SKBEDIT is not set
# CONFIG_NET_ACT_CSUM is not set
# CONFIG_NET_ACT_VLAN is not set
CONFIG_NET_SCH_FIFO=y
CONFIG_DCB=y
# CONFIG_DNS_RESOLVER is not set
# CONFIG_BATMAN_ADV is not set
# CONFIG_OPENVSWITCH is not set
# CONFIG_VSOCKETS is not set
CONFIG_NETLINK_MMAP=y
CONFIG_NETLINK_DIAG=m
# CONFIG_NET_MPLS_GSO is not set
# CONFIG_HSR is not set
# CONFIG_NET_SWITCHDEV is not set
CONFIG_RPS=y
CONFIG_RFS_ACCEL=y
CONFIG_XPS=y
CONFIG_CGROUP_NET_PRIO=y
CONFIG_CGROUP_NET_CLASSID=y
CONFIG_NET_RX_BUSY_POLL=y
CONFIG_BQL=y
CONFIG_BPF_JIT=y
CONFIG_NET_FLOW_LIMIT=y

#
# Network testing
#
# CONFIG_NET_PKTGEN is not set
# CONFIG_NET_TCPPROBE is not set
CONFIG_NET_DROP_MONITOR=y
CONFIG_HAMRADIO=y

#
# Packet Radio protocols
#
# CONFIG_AX25 is not set
# CONFIG_CAN is not set
# CONFIG_IRDA is not set
# CONFIG_BT is not set
# CONFIG_AF_RXRPC is not set
CONFIG_FIB_RULES=y
# CONFIG_WIRELESS is not set
# CONFIG_WIMAX is not set
CONFIG_RFKILL=m
CONFIG_RFKILL_LEDS=y
CONFIG_RFKILL_INPUT=y
# CONFIG_RFKILL_GPIO is not set
# CONFIG_NET_9P is not set
# CONFIG_CAIF is not set
# CONFIG_CEPH_LIB is not set
# CONFIG_NFC is not set
CONFIG_HAVE_BPF_JIT=y

#
# Device Drivers
#

#
# Generic Driver Options
#
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_STANDALONE=y
CONFIG_PREVENT_FIRMWARE_BUILD=y
CONFIG_FW_LOADER=y
# CONFIG_FIRMWARE_IN_KERNEL is not set
CONFIG_EXTRA_FIRMWARE=""
# CONFIG_FW_LOADER_USER_HELPER_FALLBACK is not set
CONFIG_ALLOW_DEV_COREDUMP=y
# CONFIG_DEBUG_DRIVER is not set
CONFIG_DEBUG_DEVRES=y
CONFIG_SYS_HYPERVISOR=y
# CONFIG_GENERIC_CPU_DEVICES is not set
CONFIG_GENERIC_CPU_AUTOPROBE=y
# CONFIG_DMA_SHARED_BUFFER is not set

#
# Bus devices
#
CONFIG_CONNECTOR=y
CONFIG_PROC_EVENTS=y
# CONFIG_MTD is not set
CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y
# CONFIG_PARPORT is not set
CONFIG_PNP=y
# CONFIG_PNP_DEBUG_MESSAGES is not set

#
# Protocols
#
CONFIG_PNPACPI=y
CONFIG_BLK_DEV=y
# CONFIG_BLK_DEV_NULL_BLK is not set
# CONFIG_BLK_DEV_FD is not set
# CONFIG_BLK_DEV_PCIESSD_MTIP32XX is not set
# CONFIG_ZRAM is not set
# CONFIG_BLK_CPQ_CISS_DA is not set
# CONFIG_BLK_DEV_DAC960 is not set
# CONFIG_BLK_DEV_UMEM is not set
# CONFIG_BLK_DEV_COW_COMMON is not set
# CONFIG_BLK_DEV_LOOP is not set
# CONFIG_BLK_DEV_DRBD is not set
# CONFIG_BLK_DEV_NBD is not set
# CONFIG_BLK_DEV_NVME is not set
# CONFIG_BLK_DEV_SKD is not set
# CONFIG_BLK_DEV_SX8 is not set
# CONFIG_BLK_DEV_RAM is not set
# CONFIG_CDROM_PKTCDVD is not set
# CONFIG_ATA_OVER_ETH is not set
# CONFIG_XEN_BLKDEV_FRONTEND is not set
# CONFIG_XEN_BLKDEV_BACKEND is not set
CONFIG_VIRTIO_BLK=y
# CONFIG_BLK_DEV_HD is not set
# CONFIG_BLK_DEV_RBD is not set
# CONFIG_BLK_DEV_RSXX is not set

#
# Misc devices
#
# CONFIG_SENSORS_LIS3LV02D is not set
# CONFIG_AD525X_DPOT is not set
# CONFIG_DUMMY_IRQ is not set
# CONFIG_IBM_ASM is not set
# CONFIG_PHANTOM is not set
# CONFIG_SGI_IOC4 is not set
# CONFIG_TIFM_CORE is not set
# CONFIG_ICS932S401 is not set
# CONFIG_ENCLOSURE_SERVICES is not set
# CONFIG_SGI_XP is not set
# CONFIG_HP_ILO is not set
# CONFIG_SGI_GRU is not set
# CONFIG_APDS9802ALS is not set
# CONFIG_ISL29003 is not set
# CONFIG_ISL29020 is not set
# CONFIG_SENSORS_TSL2550 is not set
# CONFIG_SENSORS_BH1780 is not set
# CONFIG_SENSORS_BH1770 is not set
# CONFIG_SENSORS_APDS990X is not set
# CONFIG_HMC6352 is not set
# CONFIG_DS1682 is not set
# CONFIG_VMWARE_BALLOON is not set
# CONFIG_BMP085_I2C is not set
# CONFIG_USB_SWITCH_FSA9480 is not set
# CONFIG_SRAM is not set
# CONFIG_C2PORT is not set

#
# EEPROM support
#
# CONFIG_EEPROM_AT24 is not set
# CONFIG_EEPROM_LEGACY is not set
# CONFIG_EEPROM_MAX6875 is not set
# CONFIG_EEPROM_93CX6 is not set
# CONFIG_CB710_CORE is not set

#
# Texas Instruments shared transport line discipline
#
# CONFIG_TI_ST is not set
# CONFIG_SENSORS_LIS3_I2C is not set

#
# Altera FPGA firmware download module
#
# CONFIG_ALTERA_STAPL is not set
# CONFIG_INTEL_MEI is not set
# CONFIG_INTEL_MEI_ME is not set
# CONFIG_INTEL_MEI_TXE is not set
# CONFIG_VMWARE_VMCI is not set

#
# Intel MIC Bus Driver
#
# CONFIG_INTEL_MIC_BUS is not set

#
# Intel MIC Host Driver
#

#
# Intel MIC Card Driver
#
# CONFIG_GENWQE is not set
# CONFIG_ECHO is not set
# CONFIG_CXL_BASE is not set
CONFIG_HAVE_IDE=y
# CONFIG_IDE is not set

#
# SCSI device support
#
CONFIG_SCSI_MOD=y
# CONFIG_RAID_ATTRS is not set
CONFIG_SCSI=y
CONFIG_SCSI_DMA=y
# CONFIG_SCSI_NETLINK is not set
# CONFIG_SCSI_MQ_DEFAULT is not set
CONFIG_SCSI_PROC_FS=y

#
# SCSI support type (disk, tape, CD-ROM)
#
CONFIG_BLK_DEV_SD=y
# CONFIG_CHR_DEV_ST is not set
# CONFIG_CHR_DEV_OSST is not set
CONFIG_BLK_DEV_SR=y
CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
# CONFIG_CHR_DEV_SCH is not set
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_LOGGING=y
CONFIG_SCSI_SCAN_ASYNC=y

#
# SCSI Transports
#
# CONFIG_SCSI_SPI_ATTRS is not set
# CONFIG_SCSI_FC_ATTRS is not set
# CONFIG_SCSI_ISCSI_ATTRS is not set
# CONFIG_SCSI_SAS_ATTRS is not set
# CONFIG_SCSI_SAS_LIBSAS is not set
# CONFIG_SCSI_SRP_ATTRS is not set
CONFIG_SCSI_LOWLEVEL=y
# CONFIG_ISCSI_TCP is not set
# CONFIG_ISCSI_BOOT_SYSFS is not set
# CONFIG_SCSI_CXGB3_ISCSI is not set
# CONFIG_SCSI_CXGB4_ISCSI is not set
# CONFIG_SCSI_BNX2_ISCSI is not set
# CONFIG_BE2ISCSI is not set
# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
# CONFIG_SCSI_HPSA is not set
# CONFIG_SCSI_3W_9XXX is not set
# CONFIG_SCSI_3W_SAS is not set
# CONFIG_SCSI_ACARD is not set
# CONFIG_SCSI_AACRAID is not set
# CONFIG_SCSI_AIC7XXX is not set
# CONFIG_SCSI_AIC79XX is not set
# CONFIG_SCSI_AIC94XX is not set
# CONFIG_SCSI_MVSAS is not set
# CONFIG_SCSI_MVUMI is not set
# CONFIG_SCSI_DPT_I2O is not set
# CONFIG_SCSI_ADVANSYS is not set
# CONFIG_SCSI_ARCMSR is not set
# CONFIG_SCSI_ESAS2R is not set
CONFIG_MEGARAID_NEWGEN=y
# CONFIG_MEGARAID_MM is not set
# CONFIG_MEGARAID_LEGACY is not set
# CONFIG_MEGARAID_SAS is not set
# CONFIG_SCSI_MPT2SAS is not set
# CONFIG_SCSI_MPT3SAS is not set
# CONFIG_SCSI_UFSHCD is not set
# CONFIG_SCSI_HPTIOP is not set
# CONFIG_SCSI_BUSLOGIC is not set
# CONFIG_VMWARE_PVSCSI is not set
# CONFIG_XEN_SCSI_FRONTEND is not set
# CONFIG_SCSI_DMX3191D is not set
# CONFIG_SCSI_EATA is not set
# CONFIG_SCSI_FUTURE_DOMAIN is not set
# CONFIG_SCSI_GDTH is not set
# CONFIG_SCSI_ISCI is not set
# CONFIG_SCSI_IPS is not set
# CONFIG_SCSI_INITIO is not set
# CONFIG_SCSI_INIA100 is not set
# CONFIG_SCSI_STEX is not set
# CONFIG_SCSI_SYM53C8XX_2 is not set
# CONFIG_SCSI_IPR is not set
# CONFIG_SCSI_QLOGIC_1280 is not set
# CONFIG_SCSI_QLA_ISCSI is not set
# CONFIG_SCSI_DC395x is not set
# CONFIG_SCSI_AM53C974 is not set
# CONFIG_SCSI_WD719X is not set
# CONFIG_SCSI_DEBUG is not set
# CONFIG_SCSI_PMCRAID is not set
# CONFIG_SCSI_PM8001 is not set
# CONFIG_SCSI_VIRTIO is not set
# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
CONFIG_SCSI_DH=y
# CONFIG_SCSI_DH_RDAC is not set
# CONFIG_SCSI_DH_HP_SW is not set
# CONFIG_SCSI_DH_EMC is not set
# CONFIG_SCSI_DH_ALUA is not set
# CONFIG_SCSI_OSD_INITIATOR is not set
CONFIG_ATA=y
# CONFIG_ATA_NONSTANDARD is not set
CONFIG_ATA_VERBOSE_ERROR=y
CONFIG_ATA_ACPI=y
# CONFIG_SATA_ZPODD is not set
CONFIG_SATA_PMP=y

#
# Controllers with non-SFF native interface
#
CONFIG_SATA_AHCI=y
# CONFIG_SATA_AHCI_PLATFORM is not set
# CONFIG_SATA_INIC162X is not set
# CONFIG_SATA_ACARD_AHCI is not set
# CONFIG_SATA_SIL24 is not set
CONFIG_ATA_SFF=y

#
# SFF controllers with custom DMA interface
#
# CONFIG_PDC_ADMA is not set
# CONFIG_SATA_QSTOR is not set
# CONFIG_SATA_SX4 is not set
CONFIG_ATA_BMDMA=y

#
# SATA SFF controllers with BMDMA
#
CONFIG_ATA_PIIX=y
# CONFIG_SATA_MV is not set
# CONFIG_SATA_NV is not set
# CONFIG_SATA_PROMISE is not set
# CONFIG_SATA_SIL is not set
# CONFIG_SATA_SIS is not set
# CONFIG_SATA_SVW is not set
# CONFIG_SATA_ULI is not set
# CONFIG_SATA_VIA is not set
# CONFIG_SATA_VITESSE is not set

#
# PATA SFF controllers with BMDMA
#
# CONFIG_PATA_ALI is not set
# CONFIG_PATA_AMD is not set
# CONFIG_PATA_ARTOP is not set
# CONFIG_PATA_ATIIXP is not set
# CONFIG_PATA_ATP867X is not set
# CONFIG_PATA_CMD64X is not set
# CONFIG_PATA_CYPRESS is not set
# CONFIG_PATA_EFAR is not set
# CONFIG_PATA_HPT366 is not set
# CONFIG_PATA_HPT37X is not set
# CONFIG_PATA_HPT3X2N is not set
# CONFIG_PATA_HPT3X3 is not set
# CONFIG_PATA_IT8213 is not set
# CONFIG_PATA_IT821X is not set
# CONFIG_PATA_JMICRON is not set
# CONFIG_PATA_MARVELL is not set
# CONFIG_PATA_NETCELL is not set
# CONFIG_PATA_NINJA32 is not set
# CONFIG_PATA_NS87415 is not set
# CONFIG_PATA_OLDPIIX is not set
# CONFIG_PATA_OPTIDMA is not set
# CONFIG_PATA_PDC2027X is not set
# CONFIG_PATA_PDC_OLD is not set
# CONFIG_PATA_RADISYS is not set
# CONFIG_PATA_RDC is not set
# CONFIG_PATA_SCH is not set
# CONFIG_PATA_SERVERWORKS is not set
# CONFIG_PATA_SIL680 is not set
# CONFIG_PATA_SIS is not set
# CONFIG_PATA_TOSHIBA is not set
# CONFIG_PATA_TRIFLEX is not set
# CONFIG_PATA_VIA is not set
# CONFIG_PATA_WINBOND is not set

#
# PIO-only SFF controllers
#
# CONFIG_PATA_CMD640_PCI is not set
# CONFIG_PATA_MPIIX is not set
# CONFIG_PATA_NS87410 is not set
# CONFIG_PATA_OPTI is not set
# CONFIG_PATA_PCMCIA is not set
# CONFIG_PATA_PLATFORM is not set
# CONFIG_PATA_RZ1000 is not set

#
# Generic fallback / legacy drivers
#
CONFIG_PATA_ACPI=m
CONFIG_ATA_GENERIC=m
# CONFIG_PATA_LEGACY is not set
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
CONFIG_MD_AUTODETECT=y
# CONFIG_MD_LINEAR is not set
# CONFIG_MD_RAID0 is not set
# CONFIG_MD_RAID1 is not set
# CONFIG_MD_RAID10 is not set
# CONFIG_MD_RAID456 is not set
# CONFIG_MD_MULTIPATH is not set
# CONFIG_MD_FAULTY is not set
# CONFIG_BCACHE is not set
CONFIG_BLK_DEV_DM_BUILTIN=y
CONFIG_BLK_DEV_DM=y
CONFIG_DM_DEBUG=y
CONFIG_DM_BUFIO=y
# CONFIG_DM_CRYPT is not set
CONFIG_DM_SNAPSHOT=y
# CONFIG_DM_THIN_PROVISIONING is not set
# CONFIG_DM_CACHE is not set
# CONFIG_DM_ERA is not set
CONFIG_DM_MIRROR=y
# CONFIG_DM_LOG_USERSPACE is not set
# CONFIG_DM_RAID is not set
CONFIG_DM_ZERO=y
# CONFIG_DM_MULTIPATH is not set
# CONFIG_DM_DELAY is not set
CONFIG_DM_UEVENT=y
# CONFIG_DM_FLAKEY is not set
# CONFIG_DM_VERITY is not set
# CONFIG_DM_SWITCH is not set
# CONFIG_TARGET_CORE is not set
CONFIG_FUSION=y
# CONFIG_FUSION_SPI is not set
# CONFIG_FUSION_SAS is not set
CONFIG_FUSION_MAX_SGE=40
CONFIG_FUSION_LOGGING=y

#
# IEEE 1394 (FireWire) support
#
# CONFIG_FIREWIRE is not set
# CONFIG_FIREWIRE_NOSY is not set
# CONFIG_I2O is not set
CONFIG_MACINTOSH_DRIVERS=y
CONFIG_MAC_EMUMOUSEBTN=y
CONFIG_NETDEVICES=y
CONFIG_NET_CORE=y
# CONFIG_BONDING is not set
# CONFIG_DUMMY is not set
# CONFIG_EQUALIZER is not set
CONFIG_NET_FC=y
# CONFIG_IFB is not set
# CONFIG_NET_TEAM is not set
# CONFIG_MACVLAN is not set
# CONFIG_IPVLAN is not set
# CONFIG_VXLAN is not set
# CONFIG_NETCONSOLE is not set
# CONFIG_NETPOLL is not set
# CONFIG_NET_POLL_CONTROLLER is not set
CONFIG_TUN=m
CONFIG_VETH=m
CONFIG_VIRTIO_NET=m
# CONFIG_NLMON is not set
# CONFIG_ARCNET is not set

#
# CAIF transport drivers
#
# CONFIG_VHOST_NET is not set

#
# Distributed Switch Architecture drivers
#
# CONFIG_NET_DSA_MV88E6XXX is not set
# CONFIG_NET_DSA_MV88E6060 is not set
# CONFIG_NET_DSA_MV88E6XXX_NEED_PPU is not set
# CONFIG_NET_DSA_MV88E6131 is not set
# CONFIG_NET_DSA_MV88E6123_61_65 is not set
# CONFIG_NET_DSA_MV88E6171 is not set
# CONFIG_NET_DSA_MV88E6352 is not set
# CONFIG_NET_DSA_BCM_SF2 is not set
CONFIG_ETHERNET=y
CONFIG_NET_VENDOR_3COM=y
# CONFIG_PCMCIA_3C574 is not set
# CONFIG_PCMCIA_3C589 is not set
# CONFIG_VORTEX is not set
# CONFIG_TYPHOON is not set
CONFIG_NET_VENDOR_ADAPTEC=y
# CONFIG_ADAPTEC_STARFIRE is not set
CONFIG_NET_VENDOR_AGERE=y
# CONFIG_ET131X is not set
CONFIG_NET_VENDOR_ALTEON=y
# CONFIG_ACENIC is not set
# CONFIG_ALTERA_TSE is not set
CONFIG_NET_VENDOR_AMD=y
# CONFIG_AMD8111_ETH is not set
# CONFIG_PCNET32 is not set
# CONFIG_PCMCIA_NMCLAN is not set
# CONFIG_NET_XGENE is not set
CONFIG_NET_VENDOR_ARC=y
CONFIG_NET_VENDOR_ATHEROS=y
# CONFIG_ATL2 is not set
# CONFIG_ATL1 is not set
# CONFIG_ATL1E is not set
# CONFIG_ATL1C is not set
# CONFIG_ALX is not set
CONFIG_NET_VENDOR_BROADCOM=y
# CONFIG_B44 is not set
# CONFIG_BCMGENET is not set
# CONFIG_BNX2 is not set
# CONFIG_CNIC is not set
# CONFIG_TIGON3 is not set
# CONFIG_BNX2X is not set
CONFIG_NET_VENDOR_BROCADE=y
# CONFIG_BNA is not set
CONFIG_NET_VENDOR_CHELSIO=y
# CONFIG_CHELSIO_T1 is not set
# CONFIG_CHELSIO_T3 is not set
# CONFIG_CHELSIO_T4 is not set
# CONFIG_CHELSIO_T4VF is not set
CONFIG_NET_VENDOR_CISCO=y
# CONFIG_ENIC is not set
# CONFIG_CX_ECAT is not set
# CONFIG_DNET is not set
CONFIG_NET_VENDOR_DEC=y
CONFIG_NET_TULIP=y
# CONFIG_DE2104X is not set
# CONFIG_TULIP is not set
# CONFIG_DE4X5 is not set
# CONFIG_WINBOND_840 is not set
# CONFIG_DM9102 is not set
# CONFIG_ULI526X is not set
# CONFIG_PCMCIA_XIRCOM is not set
CONFIG_NET_VENDOR_DLINK=y
# CONFIG_DL2K is not set
# CONFIG_SUNDANCE is not set
CONFIG_NET_VENDOR_EMULEX=y
# CONFIG_BE2NET is not set
CONFIG_NET_VENDOR_EXAR=y
# CONFIG_S2IO is not set
# CONFIG_VXGE is not set
# CONFIG_NET_VENDOR_FUJITSU is not set
# CONFIG_NET_VENDOR_HP is not set
CONFIG_NET_VENDOR_INTEL=y
# CONFIG_E100 is not set
# CONFIG_E1000 is not set
# CONFIG_E1000E is not set
# CONFIG_IGB is not set
# CONFIG_IGBVF is not set
# CONFIG_IXGB is not set
# CONFIG_IXGBE is not set
# CONFIG_IXGBEVF is not set
# CONFIG_I40E is not set
# CONFIG_I40EVF is not set
# CONFIG_FM10K is not set
# CONFIG_NET_VENDOR_I825XX is not set
# CONFIG_IP1000 is not set
# CONFIG_JME is not set
CONFIG_NET_VENDOR_MARVELL=y
# CONFIG_MVMDIO is not set
# CONFIG_SKGE is not set
# CONFIG_SKY2 is not set
CONFIG_NET_VENDOR_MELLANOX=y
# CONFIG_MLX4_EN is not set
# CONFIG_MLX4_CORE is not set
# CONFIG_MLX5_CORE is not set
CONFIG_NET_VENDOR_MICREL=y
# CONFIG_KS8851_MLL is not set
# CONFIG_KSZ884X_PCI is not set
CONFIG_NET_VENDOR_MYRI=y
# CONFIG_MYRI10GE is not set
# CONFIG_FEALNX is not set
CONFIG_NET_VENDOR_NATSEMI=y
# CONFIG_NATSEMI is not set
# CONFIG_NS83820 is not set
CONFIG_NET_VENDOR_8390=y
# CONFIG_PCMCIA_AXNET is not set
# CONFIG_NE2K_PCI is not set
# CONFIG_PCMCIA_PCNET is not set
CONFIG_NET_VENDOR_NVIDIA=y
# CONFIG_FORCEDETH is not set
CONFIG_NET_VENDOR_OKI=y
# CONFIG_ETHOC is not set
CONFIG_NET_PACKET_ENGINE=y
# CONFIG_HAMACHI is not set
# CONFIG_YELLOWFIN is not set
CONFIG_NET_VENDOR_QLOGIC=y
# CONFIG_QLA3XXX is not set
# CONFIG_QLCNIC is not set
# CONFIG_QLGE is not set
# CONFIG_NETXEN_NIC is not set
CONFIG_NET_VENDOR_QUALCOMM=y
CONFIG_NET_VENDOR_REALTEK=y
# CONFIG_8139CP is not set
# CONFIG_8139TOO is not set
# CONFIG_R8169 is not set
CONFIG_NET_VENDOR_RDC=y
# CONFIG_R6040 is not set
CONFIG_NET_VENDOR_ROCKER=y
# CONFIG_NET_VENDOR_SAMSUNG is not set
# CONFIG_NET_VENDOR_SEEQ is not set
CONFIG_NET_VENDOR_SILAN=y
# CONFIG_SC92031 is not set
CONFIG_NET_VENDOR_SIS=y
# CONFIG_SIS900 is not set
# CONFIG_SIS190 is not set
# CONFIG_SFC is not set
CONFIG_NET_VENDOR_SMSC=y
# CONFIG_PCMCIA_SMC91C92 is not set
# CONFIG_EPIC100 is not set
# CONFIG_SMSC911X is not set
# CONFIG_SMSC9420 is not set
CONFIG_NET_VENDOR_STMICRO=y
# CONFIG_STMMAC_ETH is not set
CONFIG_NET_VENDOR_SUN=y
# CONFIG_HAPPYMEAL is not set
# CONFIG_SUNGEM is not set
# CONFIG_CASSINI is not set
# CONFIG_NIU is not set
CONFIG_NET_VENDOR_TEHUTI=y
# CONFIG_TEHUTI is not set
CONFIG_NET_VENDOR_TI=y
# CONFIG_TLAN is not set
CONFIG_NET_VENDOR_VIA=y
# CONFIG_VIA_RHINE is not set
# CONFIG_VIA_VELOCITY is not set
CONFIG_NET_VENDOR_WIZNET=y
# CONFIG_WIZNET_W5100 is not set
# CONFIG_WIZNET_W5300 is not set
CONFIG_NET_VENDOR_XIRCOM=y
# CONFIG_PCMCIA_XIRC2PS is not set
# CONFIG_FDDI is not set
# CONFIG_HIPPI is not set
# CONFIG_NET_SB1000 is not set
CONFIG_PHYLIB=y

#
# MII PHY device drivers
#
# CONFIG_AT803X_PHY is not set
# CONFIG_AMD_PHY is not set
# CONFIG_MARVELL_PHY is not set
# CONFIG_DAVICOM_PHY is not set
# CONFIG_QSEMI_PHY is not set
# CONFIG_LXT_PHY is not set
# CONFIG_CICADA_PHY is not set
# CONFIG_VITESSE_PHY is not set
# CONFIG_SMSC_PHY is not set
# CONFIG_BROADCOM_PHY is not set
# CONFIG_BCM7XXX_PHY is not set
# CONFIG_BCM87XX_PHY is not set
# CONFIG_ICPLUS_PHY is not set
# CONFIG_REALTEK_PHY is not set
# CONFIG_NATIONAL_PHY is not set
# CONFIG_STE10XP is not set
# CONFIG_LSI_ET1011C_PHY is not set
# CONFIG_MICREL_PHY is not set
CONFIG_FIXED_PHY=y
# CONFIG_MDIO_BITBANG is not set
# CONFIG_MDIO_BCM_UNIMAC is not set
# CONFIG_PPP is not set
# CONFIG_SLIP is not set
CONFIG_USB_NET_DRIVERS=y
# CONFIG_USB_CATC is not set
# CONFIG_USB_KAWETH is not set
# CONFIG_USB_PEGASUS is not set
# CONFIG_USB_RTL8150 is not set
# CONFIG_USB_RTL8152 is not set
# CONFIG_USB_USBNET is not set
# CONFIG_USB_HSO is not set
# CONFIG_USB_IPHETH is not set
# CONFIG_WLAN is not set

#
# Enable WiMAX (Networking options) to see the WiMAX drivers
#
# CONFIG_WAN is not set
# CONFIG_XEN_NETDEV_FRONTEND is not set
# CONFIG_XEN_NETDEV_BACKEND is not set
# CONFIG_VMXNET3 is not set
CONFIG_ISDN=y
# CONFIG_ISDN_I4L is not set
# CONFIG_ISDN_CAPI is not set
# CONFIG_ISDN_DRV_GIGASET is not set
# CONFIG_HYSDN is not set
# CONFIG_MISDN is not set

#
# Input device support
#
CONFIG_INPUT=y
CONFIG_INPUT_FF_MEMLESS=y
# CONFIG_INPUT_POLLDEV is not set
# CONFIG_INPUT_SPARSEKMAP is not set
# CONFIG_INPUT_MATRIXKMAP is not set

#
# Userland interfaces
#
CONFIG_INPUT_MOUSEDEV=y
# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
# CONFIG_INPUT_JOYDEV is not set
CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_EVBUG is not set

#
# Input Device Drivers
#
CONFIG_INPUT_KEYBOARD=y
# CONFIG_KEYBOARD_ADP5588 is not set
# CONFIG_KEYBOARD_ADP5589 is not set
CONFIG_KEYBOARD_ATKBD=y
# CONFIG_KEYBOARD_QT1070 is not set
# CONFIG_KEYBOARD_QT2160 is not set
# CONFIG_KEYBOARD_LKKBD is not set
# CONFIG_KEYBOARD_GPIO is not set
# CONFIG_KEYBOARD_GPIO_POLLED is not set
# CONFIG_KEYBOARD_TCA6416 is not set
# CONFIG_KEYBOARD_TCA8418 is not set
# CONFIG_KEYBOARD_MATRIX is not set
# CONFIG_KEYBOARD_LM8323 is not set
# CONFIG_KEYBOARD_LM8333 is not set
# CONFIG_KEYBOARD_MAX7359 is not set
# CONFIG_KEYBOARD_MCS is not set
# CONFIG_KEYBOARD_MPR121 is not set
# CONFIG_KEYBOARD_NEWTON is not set
# CONFIG_KEYBOARD_OPENCORES is not set
# CONFIG_KEYBOARD_SAMSUNG is not set
# CONFIG_KEYBOARD_STOWAWAY is not set
# CONFIG_KEYBOARD_SUNKBD is not set
# CONFIG_KEYBOARD_XTKBD is not set
CONFIG_INPUT_MOUSE=y
CONFIG_MOUSE_PS2=y
CONFIG_MOUSE_PS2_ALPS=y
CONFIG_MOUSE_PS2_LOGIPS2PP=y
CONFIG_MOUSE_PS2_SYNAPTICS=y
CONFIG_MOUSE_PS2_CYPRESS=y
CONFIG_MOUSE_PS2_LIFEBOOK=y
CONFIG_MOUSE_PS2_TRACKPOINT=y
CONFIG_MOUSE_PS2_ELANTECH=y
CONFIG_MOUSE_PS2_SENTELIC=y
# CONFIG_MOUSE_PS2_TOUCHKIT is not set
# CONFIG_MOUSE_SERIAL is not set
# CONFIG_MOUSE_APPLETOUCH is not set
# CONFIG_MOUSE_BCM5974 is not set
# CONFIG_MOUSE_CYAPA is not set
# CONFIG_MOUSE_ELAN_I2C is not set
# CONFIG_MOUSE_VSXXXAA is not set
# CONFIG_MOUSE_GPIO is not set
# CONFIG_MOUSE_SYNAPTICS_I2C is not set
# CONFIG_MOUSE_SYNAPTICS_USB is not set
CONFIG_INPUT_JOYSTICK=y
# CONFIG_JOYSTICK_ANALOG is not set
# CONFIG_JOYSTICK_A3D is not set
# CONFIG_JOYSTICK_ADI is not set
# CONFIG_JOYSTICK_COBRA is not set
# CONFIG_JOYSTICK_GF2K is not set
# CONFIG_JOYSTICK_GRIP is not set
# CONFIG_JOYSTICK_GRIP_MP is not set
# CONFIG_JOYSTICK_GUILLEMOT is not set
# CONFIG_JOYSTICK_INTERACT is not set
# CONFIG_JOYSTICK_SIDEWINDER is not set
# CONFIG_JOYSTICK_TMDC is not set
# CONFIG_JOYSTICK_IFORCE is not set
# CONFIG_JOYSTICK_WARRIOR is not set
# CONFIG_JOYSTICK_MAGELLAN is not set
# CONFIG_JOYSTICK_SPACEORB is not set
# CONFIG_JOYSTICK_SPACEBALL is not set
# CONFIG_JOYSTICK_STINGER is not set
# CONFIG_JOYSTICK_TWIDJOY is not set
# CONFIG_JOYSTICK_ZHENHUA is not set
# CONFIG_JOYSTICK_AS5011 is not set
# CONFIG_JOYSTICK_JOYDUMP is not set
# CONFIG_JOYSTICK_XPAD is not set
CONFIG_INPUT_TABLET=y
# CONFIG_TABLET_USB_ACECAD is not set
# CONFIG_TABLET_USB_AIPTEK is not set
# CONFIG_TABLET_USB_GTCO is not set
# CONFIG_TABLET_USB_HANWANG is not set
# CONFIG_TABLET_USB_KBTAB is not set
# CONFIG_TABLET_SERIAL_WACOM4 is not set
CONFIG_INPUT_TOUCHSCREEN=y
# CONFIG_TOUCHSCREEN_AD7879 is not set
# CONFIG_TOUCHSCREEN_ATMEL_MXT is not set
# CONFIG_TOUCHSCREEN_AUO_PIXCIR is not set
# CONFIG_TOUCHSCREEN_BU21013 is not set
# CONFIG_TOUCHSCREEN_CY8CTMG110 is not set
# CONFIG_TOUCHSCREEN_CYTTSP_CORE is not set
# CONFIG_TOUCHSCREEN_CYTTSP4_CORE is not set
# CONFIG_TOUCHSCREEN_DYNAPRO is not set
# CONFIG_TOUCHSCREEN_HAMPSHIRE is not set
# CONFIG_TOUCHSCREEN_EETI is not set
# CONFIG_TOUCHSCREEN_FUJITSU is not set
# CONFIG_TOUCHSCREEN_GOODIX is not set
# CONFIG_TOUCHSCREEN_ILI210X is not set
# CONFIG_TOUCHSCREEN_GUNZE is not set
# CONFIG_TOUCHSCREEN_ELAN is not set
# CONFIG_TOUCHSCREEN_ELO is not set
# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set
# CONFIG_TOUCHSCREEN_WACOM_I2C is not set
# CONFIG_TOUCHSCREEN_MAX11801 is not set
# CONFIG_TOUCHSCREEN_MCS5000 is not set
# CONFIG_TOUCHSCREEN_MMS114 is not set
# CONFIG_TOUCHSCREEN_MTOUCH is not set
# CONFIG_TOUCHSCREEN_INEXIO is not set
# CONFIG_TOUCHSCREEN_MK712 is not set
# CONFIG_TOUCHSCREEN_PENMOUNT is not set
# CONFIG_TOUCHSCREEN_EDT_FT5X06 is not set
# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
# CONFIG_TOUCHSCREEN_TOUCHWIN is not set
# CONFIG_TOUCHSCREEN_PIXCIR is not set
# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set
# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set
# CONFIG_TOUCHSCREEN_TSC_SERIO is not set
# CONFIG_TOUCHSCREEN_TSC2007 is not set
# CONFIG_TOUCHSCREEN_ST1232 is not set
# CONFIG_TOUCHSCREEN_SUR40 is not set
# CONFIG_TOUCHSCREEN_TPS6507X is not set
# CONFIG_TOUCHSCREEN_ZFORCE is not set
CONFIG_INPUT_MISC=y
# CONFIG_INPUT_AD714X is not set
# CONFIG_INPUT_BMA150 is not set
# CONFIG_INPUT_PCSPKR is not set
# CONFIG_INPUT_MMA8450 is not set
# CONFIG_INPUT_MPU3050 is not set
# CONFIG_INPUT_APANEL is not set
# CONFIG_INPUT_GP2A is not set
# CONFIG_INPUT_GPIO_BEEPER is not set
# CONFIG_INPUT_GPIO_TILT_POLLED is not set
# CONFIG_INPUT_ATLAS_BTNS is not set
# CONFIG_INPUT_ATI_REMOTE2 is not set
# CONFIG_INPUT_KEYSPAN_REMOTE is not set
# CONFIG_INPUT_KXTJ9 is not set
# CONFIG_INPUT_POWERMATE is not set
# CONFIG_INPUT_YEALINK is not set
# CONFIG_INPUT_CM109 is not set
# CONFIG_INPUT_UINPUT is not set
# CONFIG_INPUT_PCF8574 is not set
# CONFIG_INPUT_GPIO_ROTARY_ENCODER is not set
# CONFIG_INPUT_ADXL34X is not set
# CONFIG_INPUT_IMS_PCU is not set
# CONFIG_INPUT_CMA3000 is not set
CONFIG_INPUT_XEN_KBDDEV_FRONTEND=y
# CONFIG_INPUT_IDEAPAD_SLIDEBAR is not set
# CONFIG_INPUT_DRV260X_HAPTICS is not set
# CONFIG_INPUT_DRV2667_HAPTICS is not set

#
# Hardware I/O ports
#
CONFIG_SERIO=y
CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y
CONFIG_SERIO_I8042=y
CONFIG_SERIO_SERPORT=y
# CONFIG_SERIO_CT82C710 is not set
# CONFIG_SERIO_PCIPS2 is not set
CONFIG_SERIO_LIBPS2=y
CONFIG_SERIO_RAW=m
# CONFIG_SERIO_ALTERA_PS2 is not set
# CONFIG_SERIO_PS2MULT is not set
# CONFIG_SERIO_ARC_PS2 is not set
# CONFIG_GAMEPORT is not set

#
# Character devices
#
CONFIG_TTY=y
CONFIG_VT=y
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_VT_CONSOLE=y
CONFIG_VT_CONSOLE_SLEEP=y
CONFIG_HW_CONSOLE=y
CONFIG_VT_HW_CONSOLE_BINDING=y
CONFIG_UNIX98_PTYS=y
CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_NONSTANDARD=y
# CONFIG_ROCKETPORT is not set
# CONFIG_CYCLADES is not set
# CONFIG_MOXA_INTELLIO is not set
# CONFIG_MOXA_SMARTIO is not set
# CONFIG_SYNCLINK is not set
# CONFIG_SYNCLINKMP is not set
# CONFIG_SYNCLINK_GT is not set
# CONFIG_NOZOMI is not set
# CONFIG_ISI is not set
# CONFIG_N_HDLC is not set
# CONFIG_N_GSM is not set
# CONFIG_TRACE_SINK is not set
# CONFIG_DEVKMEM is not set

#
# Serial drivers
#
CONFIG_SERIAL_EARLYCON=y
CONFIG_SERIAL_8250=y
# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
CONFIG_SERIAL_8250_PNP=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_DMA=y
CONFIG_SERIAL_8250_PCI=y
# CONFIG_SERIAL_8250_CS is not set
CONFIG_SERIAL_8250_NR_UARTS=32
CONFIG_SERIAL_8250_RUNTIME_UARTS=4
CONFIG_SERIAL_8250_EXTENDED=y
CONFIG_SERIAL_8250_MANY_PORTS=y
CONFIG_SERIAL_8250_SHARE_IRQ=y
# CONFIG_SERIAL_8250_DETECT_IRQ is not set
CONFIG_SERIAL_8250_RSA=y
# CONFIG_SERIAL_8250_DW is not set
# CONFIG_SERIAL_8250_FINTEK is not set

#
# Non-8250 serial port support
#
# CONFIG_SERIAL_MFD_HSU is not set
CONFIG_SERIAL_CORE=y
CONFIG_SERIAL_CORE_CONSOLE=y
# CONFIG_SERIAL_JSM is not set
# CONFIG_SERIAL_SCCNXP is not set
# CONFIG_SERIAL_SC16IS7XX is not set
# CONFIG_SERIAL_ALTERA_JTAGUART is not set
# CONFIG_SERIAL_ALTERA_UART is not set
# CONFIG_SERIAL_ARC is not set
# CONFIG_SERIAL_RP2 is not set
# CONFIG_SERIAL_FSL_LPUART is not set
# CONFIG_TTY_PRINTK is not set
CONFIG_HVC_DRIVER=y
CONFIG_HVC_IRQ=y
CONFIG_HVC_XEN=y
CONFIG_HVC_XEN_FRONTEND=y
CONFIG_VIRTIO_CONSOLE=y
# CONFIG_IPMI_HANDLER is not set
CONFIG_HW_RANDOM=y
# CONFIG_HW_RANDOM_TIMERIOMEM is not set
# CONFIG_HW_RANDOM_INTEL is not set
# CONFIG_HW_RANDOM_AMD is not set
# CONFIG_HW_RANDOM_VIA is not set
# CONFIG_HW_RANDOM_VIRTIO is not set
CONFIG_NVRAM=y
# CONFIG_R3964 is not set
# CONFIG_APPLICOM is not set

#
# PCMCIA character devices
#
# CONFIG_SYNCLINK_CS is not set
# CONFIG_CARDMAN_4000 is not set
# CONFIG_CARDMAN_4040 is not set
# CONFIG_IPWIRELESS is not set
# CONFIG_MWAVE is not set
CONFIG_RAW_DRIVER=y
CONFIG_MAX_RAW_DEVS=8192
CONFIG_HPET=y
# CONFIG_HPET_MMAP is not set
# CONFIG_HANGCHECK_TIMER is not set
# CONFIG_UV_MMTIMER is not set
# CONFIG_TCG_TPM is not set
# CONFIG_TELCLOCK is not set
CONFIG_DEVPORT=y
# CONFIG_XILLYBUS is not set

#
# I2C support
#
CONFIG_I2C=m
CONFIG_I2C_BOARDINFO=y
CONFIG_I2C_COMPAT=y
# CONFIG_I2C_CHARDEV is not set
# CONFIG_I2C_MUX is not set
CONFIG_I2C_HELPER_AUTO=y

#
# I2C Hardware Bus support
#

#
# PC SMBus host controller drivers
#
# CONFIG_I2C_ALI1535 is not set
# CONFIG_I2C_ALI1563 is not set
# CONFIG_I2C_ALI15X3 is not set
# CONFIG_I2C_AMD756 is not set
# CONFIG_I2C_AMD8111 is not set
# CONFIG_I2C_I801 is not set
# CONFIG_I2C_ISCH is not set
# CONFIG_I2C_ISMT is not set
CONFIG_I2C_PIIX4=m
# CONFIG_I2C_NFORCE2 is not set
# CONFIG_I2C_SIS5595 is not set
# CONFIG_I2C_SIS630 is not set
# CONFIG_I2C_SIS96X is not set
# CONFIG_I2C_VIA is not set
# CONFIG_I2C_VIAPRO is not set

#
# ACPI drivers
#
# CONFIG_I2C_SCMI is not set

#
# I2C system bus drivers (mostly embedded / system-on-chip)
#
# CONFIG_I2C_CBUS_GPIO is not set
# CONFIG_I2C_DESIGNWARE_PLATFORM is not set
# CONFIG_I2C_DESIGNWARE_PCI is not set
# CONFIG_I2C_GPIO is not set
# CONFIG_I2C_OCORES is not set
# CONFIG_I2C_PCA_PLATFORM is not set
# CONFIG_I2C_PXA_PCI is not set
# CONFIG_I2C_SIMTEC is not set
# CONFIG_I2C_XILINX is not set

#
# External I2C/SMBus adapter drivers
#
# CONFIG_I2C_DIOLAN_U2C is not set
# CONFIG_I2C_PARPORT_LIGHT is not set
# CONFIG_I2C_ROBOTFUZZ_OSIF is not set
# CONFIG_I2C_TAOS_EVM is not set
# CONFIG_I2C_TINY_USB is not set

#
# Other I2C/SMBus bus drivers
#
# CONFIG_I2C_STUB is not set
# CONFIG_I2C_SLAVE is not set
# CONFIG_I2C_DEBUG_CORE is not set
# CONFIG_I2C_DEBUG_ALGO is not set
# CONFIG_I2C_DEBUG_BUS is not set
# CONFIG_SPI is not set
# CONFIG_SPMI is not set
# CONFIG_HSI is not set

#
# PPS support
#
# CONFIG_PPS is not set

#
# PPS generators support
#

#
# PTP clock support
#
# CONFIG_PTP_1588_CLOCK is not set
# CONFIG_DP83640_PHY is not set
CONFIG_PINCTRL=y

#
# Pin controllers
#
# CONFIG_DEBUG_PINCTRL is not set
CONFIG_PINCTRL_BAYTRAIL=y
# CONFIG_PINCTRL_CHERRYVIEW is not set
CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
CONFIG_GPIOLIB=y
CONFIG_GPIO_DEVRES=y
CONFIG_GPIO_ACPI=y
CONFIG_GPIOLIB_IRQCHIP=y
# CONFIG_DEBUG_GPIO is not set
CONFIG_GPIO_SYSFS=y

#
# Memory mapped GPIO drivers:
#
# CONFIG_GPIO_GENERIC_PLATFORM is not set
# CONFIG_GPIO_IT8761E is not set
# CONFIG_GPIO_F7188X is not set
# CONFIG_GPIO_SCH311X is not set
# CONFIG_GPIO_SCH is not set
# CONFIG_GPIO_ICH is not set
# CONFIG_GPIO_VX855 is not set
# CONFIG_GPIO_LYNXPOINT is not set

#
# I2C GPIO expanders:
#
# CONFIG_GPIO_MAX7300 is not set
# CONFIG_GPIO_MAX732X is not set
# CONFIG_GPIO_PCA953X is not set
# CONFIG_GPIO_PCF857X is not set
# CONFIG_GPIO_ADP5588 is not set

#
# PCI GPIO expanders:
#
# CONFIG_GPIO_BT8XX is not set
# CONFIG_GPIO_AMD8111 is not set
# CONFIG_GPIO_INTEL_MID is not set
# CONFIG_GPIO_ML_IOH is not set
# CONFIG_GPIO_RDC321X is not set

#
# SPI GPIO expanders:
#
# CONFIG_GPIO_MCP23S08 is not set

#
# AC97 GPIO expanders:
#

#
# LPC GPIO expanders:
#

#
# MODULbus GPIO expanders:
#

#
# USB GPIO expanders:
#
# CONFIG_W1 is not set
CONFIG_POWER_SUPPLY=y
# CONFIG_POWER_SUPPLY_DEBUG is not set
# CONFIG_PDA_POWER is not set
# CONFIG_TEST_POWER is not set
# CONFIG_BATTERY_DS2780 is not set
# CONFIG_BATTERY_DS2781 is not set
# CONFIG_BATTERY_DS2782 is not set
# CONFIG_BATTERY_SBS is not set
# CONFIG_BATTERY_BQ27x00 is not set
# CONFIG_BATTERY_MAX17040 is not set
# CONFIG_BATTERY_MAX17042 is not set
# CONFIG_CHARGER_MAX8903 is not set
# CONFIG_CHARGER_LP8727 is not set
# CONFIG_CHARGER_GPIO is not set
# CONFIG_CHARGER_BQ2415X is not set
# CONFIG_CHARGER_BQ24190 is not set
# CONFIG_CHARGER_BQ24735 is not set
# CONFIG_CHARGER_SMB347 is not set
CONFIG_POWER_RESET=y
# CONFIG_POWER_AVS is not set
CONFIG_HWMON=y
# CONFIG_HWMON_VID is not set
# CONFIG_HWMON_DEBUG_CHIP is not set

#
# Native drivers
#
# CONFIG_SENSORS_ABITUGURU is not set
# CONFIG_SENSORS_ABITUGURU3 is not set
# CONFIG_SENSORS_AD7414 is not set
# CONFIG_SENSORS_AD7418 is not set
# CONFIG_SENSORS_ADM1021 is not set
# CONFIG_SENSORS_ADM1025 is not set
# CONFIG_SENSORS_ADM1026 is not set
# CONFIG_SENSORS_ADM1029 is not set
# CONFIG_SENSORS_ADM1031 is not set
# CONFIG_SENSORS_ADM9240 is not set
# CONFIG_SENSORS_ADT7410 is not set
# CONFIG_SENSORS_ADT7411 is not set
# CONFIG_SENSORS_ADT7462 is not set
# CONFIG_SENSORS_ADT7470 is not set
# CONFIG_SENSORS_ADT7475 is not set
# CONFIG_SENSORS_ASC7621 is not set
# CONFIG_SENSORS_K8TEMP is not set
# CONFIG_SENSORS_K10TEMP is not set
# CONFIG_SENSORS_FAM15H_POWER is not set
# CONFIG_SENSORS_APPLESMC is not set
# CONFIG_SENSORS_ASB100 is not set
# CONFIG_SENSORS_ATXP1 is not set
# CONFIG_SENSORS_DS620 is not set
# CONFIG_SENSORS_DS1621 is not set
# CONFIG_SENSORS_I5K_AMB is not set
# CONFIG_SENSORS_F71805F is not set
# CONFIG_SENSORS_F71882FG is not set
# CONFIG_SENSORS_F75375S is not set
# CONFIG_SENSORS_FSCHMD is not set
# CONFIG_SENSORS_GL518SM is not set
# CONFIG_SENSORS_GL520SM is not set
# CONFIG_SENSORS_G760A is not set
# CONFIG_SENSORS_G762 is not set
# CONFIG_SENSORS_GPIO_FAN is not set
# CONFIG_SENSORS_HIH6130 is not set
# CONFIG_SENSORS_I5500 is not set
# CONFIG_SENSORS_CORETEMP is not set
# CONFIG_SENSORS_IT87 is not set
# CONFIG_SENSORS_JC42 is not set
# CONFIG_SENSORS_POWR1220 is not set
# CONFIG_SENSORS_LINEAGE is not set
# CONFIG_SENSORS_LTC2945 is not set
# CONFIG_SENSORS_LTC4151 is not set
# CONFIG_SENSORS_LTC4215 is not set
# CONFIG_SENSORS_LTC4222 is not set
# CONFIG_SENSORS_LTC4245 is not set
# CONFIG_SENSORS_LTC4260 is not set
# CONFIG_SENSORS_LTC4261 is not set
# CONFIG_SENSORS_MAX16065 is not set
# CONFIG_SENSORS_MAX1619 is not set
# CONFIG_SENSORS_MAX1668 is not set
# CONFIG_SENSORS_MAX197 is not set
# CONFIG_SENSORS_MAX6639 is not set
# CONFIG_SENSORS_MAX6642 is not set
# CONFIG_SENSORS_MAX6650 is not set
# CONFIG_SENSORS_MAX6697 is not set
# CONFIG_SENSORS_HTU21 is not set
# CONFIG_SENSORS_MCP3021 is not set
# CONFIG_SENSORS_LM63 is not set
# CONFIG_SENSORS_LM73 is not set
# CONFIG_SENSORS_LM75 is not set
# CONFIG_SENSORS_LM77 is not set
# CONFIG_SENSORS_LM78 is not set
# CONFIG_SENSORS_LM80 is not set
# CONFIG_SENSORS_LM83 is not set
# CONFIG_SENSORS_LM85 is not set
# CONFIG_SENSORS_LM87 is not set
# CONFIG_SENSORS_LM90 is not set
# CONFIG_SENSORS_LM92 is not set
# CONFIG_SENSORS_LM93 is not set
# CONFIG_SENSORS_LM95234 is not set
# CONFIG_SENSORS_LM95241 is not set
# CONFIG_SENSORS_LM95245 is not set
# CONFIG_SENSORS_PC87360 is not set
# CONFIG_SENSORS_PC87427 is not set
# CONFIG_SENSORS_NTC_THERMISTOR is not set
# CONFIG_SENSORS_NCT6683 is not set
# CONFIG_SENSORS_NCT6775 is not set
# CONFIG_SENSORS_NCT7802 is not set
# CONFIG_SENSORS_PCF8591 is not set
# CONFIG_PMBUS is not set
# CONFIG_SENSORS_SHT15 is not set
# CONFIG_SENSORS_SHT21 is not set
# CONFIG_SENSORS_SHTC1 is not set
# CONFIG_SENSORS_SIS5595 is not set
# CONFIG_SENSORS_DME1737 is not set
# CONFIG_SENSORS_EMC1403 is not set
# CONFIG_SENSORS_EMC2103 is not set
# CONFIG_SENSORS_EMC6W201 is not set
# CONFIG_SENSORS_SMSC47M1 is not set
# CONFIG_SENSORS_SMSC47M192 is not set
# CONFIG_SENSORS_SMSC47B397 is not set
# CONFIG_SENSORS_SCH56XX_COMMON is not set
# CONFIG_SENSORS_SCH5627 is not set
# CONFIG_SENSORS_SCH5636 is not set
# CONFIG_SENSORS_SMM665 is not set
# CONFIG_SENSORS_ADC128D818 is not set
# CONFIG_SENSORS_ADS1015 is not set
# CONFIG_SENSORS_ADS7828 is not set
# CONFIG_SENSORS_AMC6821 is not set
# CONFIG_SENSORS_INA209 is not set
# CONFIG_SENSORS_INA2XX is not set
# CONFIG_SENSORS_THMC50 is not set
# CONFIG_SENSORS_TMP102 is not set
# CONFIG_SENSORS_TMP103 is not set
# CONFIG_SENSORS_TMP401 is not set
# CONFIG_SENSORS_TMP421 is not set
# CONFIG_SENSORS_VIA_CPUTEMP is not set
# CONFIG_SENSORS_VIA686A is not set
# CONFIG_SENSORS_VT1211 is not set
# CONFIG_SENSORS_VT8231 is not set
# CONFIG_SENSORS_W83781D is not set
# CONFIG_SENSORS_W83791D is not set
# CONFIG_SENSORS_W83792D is not set
# CONFIG_SENSORS_W83793 is not set
# CONFIG_SENSORS_W83795 is not set
# CONFIG_SENSORS_W83L785TS is not set
# CONFIG_SENSORS_W83L786NG is not set
# CONFIG_SENSORS_W83627HF is not set
# CONFIG_SENSORS_W83627EHF is not set

#
# ACPI drivers
#
# CONFIG_SENSORS_ACPI_POWER is not set
# CONFIG_SENSORS_ATK0110 is not set
CONFIG_THERMAL=y
CONFIG_THERMAL_HWMON=y
CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y
# CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set
# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set
CONFIG_THERMAL_GOV_FAIR_SHARE=y
CONFIG_THERMAL_GOV_STEP_WISE=y
# CONFIG_THERMAL_GOV_BANG_BANG is not set
CONFIG_THERMAL_GOV_USER_SPACE=y
# CONFIG_THERMAL_EMULATION is not set
# CONFIG_INTEL_POWERCLAMP is not set
# CONFIG_X86_PKG_TEMP_THERMAL is not set
# CONFIG_INTEL_SOC_DTS_THERMAL is not set
# CONFIG_INT340X_THERMAL is not set

#
# Texas Instruments thermal drivers
#
CONFIG_WATCHDOG=y
CONFIG_WATCHDOG_CORE=y
# CONFIG_WATCHDOG_NOWAYOUT is not set

#
# Watchdog Device Drivers
#
# CONFIG_SOFT_WATCHDOG is not set
# CONFIG_XILINX_WATCHDOG is not set
# CONFIG_DW_WATCHDOG is not set
# CONFIG_ACQUIRE_WDT is not set
# CONFIG_ADVANTECH_WDT is not set
# CONFIG_ALIM1535_WDT is not set
# CONFIG_ALIM7101_WDT is not set
# CONFIG_F71808E_WDT is not set
# CONFIG_SP5100_TCO is not set
# CONFIG_SBC_FITPC2_WATCHDOG is not set
# CONFIG_EUROTECH_WDT is not set
# CONFIG_IB700_WDT is not set
# CONFIG_IBMASR is not set
# CONFIG_WAFER_WDT is not set
# CONFIG_I6300ESB_WDT is not set
# CONFIG_IE6XX_WDT is not set
# CONFIG_ITCO_WDT is not set
# CONFIG_IT8712F_WDT is not set
# CONFIG_IT87_WDT is not set
# CONFIG_HP_WATCHDOG is not set
# CONFIG_SC1200_WDT is not set
# CONFIG_PC87413_WDT is not set
# CONFIG_NV_TCO is not set
# CONFIG_60XX_WDT is not set
# CONFIG_CPU5_WDT is not set
# CONFIG_SMSC_SCH311X_WDT is not set
# CONFIG_SMSC37B787_WDT is not set
# CONFIG_VIA_WDT is not set
# CONFIG_W83627HF_WDT is not set
# CONFIG_W83877F_WDT is not set
# CONFIG_W83977F_WDT is not set
# CONFIG_MACHZ_WDT is not set
# CONFIG_SBC_EPX_C3_WATCHDOG is not set
# CONFIG_MEN_A21_WDT is not set
# CONFIG_XEN_WDT is not set

#
# PCI-based Watchdog Cards
#
# CONFIG_PCIPCWATCHDOG is not set
# CONFIG_WDTPCI is not set

#
# USB-based Watchdog Cards
#
# CONFIG_USBPCWATCHDOG is not set
CONFIG_SSB_POSSIBLE=y

#
# Sonics Silicon Backplane
#
# CONFIG_SSB is not set
CONFIG_BCMA_POSSIBLE=y

#
# Broadcom specific AMBA
#
# CONFIG_BCMA is not set

#
# Multifunction device drivers
#
# CONFIG_MFD_CORE is not set
# CONFIG_MFD_BCM590XX is not set
# CONFIG_MFD_CROS_EC is not set
# CONFIG_MFD_DLN2 is not set
# CONFIG_MFD_MC13XXX_I2C is not set
# CONFIG_HTC_PASIC3 is not set
# CONFIG_LPC_ICH is not set
# CONFIG_LPC_SCH is not set
# CONFIG_MFD_JANZ_CMODIO is not set
# CONFIG_MFD_KEMPLD is not set
# CONFIG_MFD_MENF21BMC is not set
# CONFIG_MFD_VIPERBOARD is not set
# CONFIG_MFD_RETU is not set
# CONFIG_MFD_PCF50633 is not set
# CONFIG_MFD_RDC321X is not set
# CONFIG_MFD_RTSX_PCI is not set
# CONFIG_MFD_RTSX_USB is not set
# CONFIG_MFD_RN5T618 is not set
# CONFIG_MFD_SI476X_CORE is not set
# CONFIG_MFD_SM501 is not set
# CONFIG_ABX500_CORE is not set
# CONFIG_MFD_SYSCON is not set
# CONFIG_MFD_TI_AM335X_TSCADC is not set
# CONFIG_MFD_LP3943 is not set
# CONFIG_TPS6105X is not set
# CONFIG_TPS65010 is not set
# CONFIG_TPS6507X is not set
# CONFIG_MFD_TPS65217 is not set
# CONFIG_MFD_TPS65218 is not set
# CONFIG_MFD_TPS65912 is not set
# CONFIG_MFD_WL1273_CORE is not set
# CONFIG_MFD_LM3533 is not set
# CONFIG_MFD_TMIO is not set
# CONFIG_MFD_VX855 is not set
# CONFIG_MFD_ARIZONA_I2C is not set
# CONFIG_MFD_WM8994 is not set
# CONFIG_REGULATOR is not set
# CONFIG_MEDIA_SUPPORT is not set

#
# Graphics support
#
# CONFIG_AGP is not set
CONFIG_VGA_ARB=y
CONFIG_VGA_ARB_MAX_GPUS=16
CONFIG_VGA_SWITCHEROO=y

#
# Direct Rendering Manager
#
# CONFIG_DRM is not set

#
# Frame buffer Devices
#
CONFIG_FB=y
# CONFIG_FIRMWARE_EDID is not set
CONFIG_FB_CMDLINE=y
# CONFIG_FB_DDC is not set
CONFIG_FB_BOOT_VESA_SUPPORT=y
CONFIG_FB_CFB_FILLRECT=y
CONFIG_FB_CFB_COPYAREA=y
CONFIG_FB_CFB_IMAGEBLIT=y
# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
CONFIG_FB_SYS_FILLRECT=y
CONFIG_FB_SYS_COPYAREA=y
CONFIG_FB_SYS_IMAGEBLIT=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
CONFIG_FB_SYS_FOPS=y
CONFIG_FB_DEFERRED_IO=y
# CONFIG_FB_SVGALIB is not set
# CONFIG_FB_MACMODES is not set
# CONFIG_FB_BACKLIGHT is not set
# CONFIG_FB_MODE_HELPERS is not set
CONFIG_FB_TILEBLITTING=y

#
# Frame buffer hardware drivers
#
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_PM2 is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_ARC is not set
# CONFIG_FB_ASILIANT is not set
# CONFIG_FB_IMSTT is not set
# CONFIG_FB_VGA16 is not set
# CONFIG_FB_UVESA is not set
CONFIG_FB_VESA=y
CONFIG_FB_EFI=y
# CONFIG_FB_N411 is not set
# CONFIG_FB_HGA is not set
# CONFIG_FB_OPENCORES is not set
# CONFIG_FB_S1D13XXX is not set
# CONFIG_FB_NVIDIA is not set
# CONFIG_FB_RIVA is not set
# CONFIG_FB_I740 is not set
# CONFIG_FB_LE80578 is not set
# CONFIG_FB_MATROX is not set
# CONFIG_FB_RADEON is not set
# CONFIG_FB_ATY128 is not set
# CONFIG_FB_ATY is not set
# CONFIG_FB_S3 is not set
# CONFIG_FB_SAVAGE is not set
# CONFIG_FB_SIS is not set
# CONFIG_FB_VIA is not set
# CONFIG_FB_NEOMAGIC is not set
# CONFIG_FB_KYRO is not set
# CONFIG_FB_3DFX is not set
# CONFIG_FB_VOODOO1 is not set
# CONFIG_FB_VT8623 is not set
# CONFIG_FB_TRIDENT is not set
# CONFIG_FB_ARK is not set
# CONFIG_FB_PM3 is not set
# CONFIG_FB_CARMINE is not set
# CONFIG_FB_SMSCUFX is not set
# CONFIG_FB_UDL is not set
# CONFIG_FB_VIRTUAL is not set
CONFIG_XEN_FBDEV_FRONTEND=y
# CONFIG_FB_METRONOME is not set
# CONFIG_FB_MB862XX is not set
# CONFIG_FB_BROADSHEET is not set
# CONFIG_FB_AUO_K190X is not set
# CONFIG_FB_SIMPLE is not set
CONFIG_BACKLIGHT_LCD_SUPPORT=y
# CONFIG_LCD_CLASS_DEVICE is not set
CONFIG_BACKLIGHT_CLASS_DEVICE=y
# CONFIG_BACKLIGHT_GENERIC is not set
# CONFIG_BACKLIGHT_APPLE is not set
# CONFIG_BACKLIGHT_SAHARA is not set
# CONFIG_BACKLIGHT_ADP8860 is not set
# CONFIG_BACKLIGHT_ADP8870 is not set
# CONFIG_BACKLIGHT_LM3639 is not set
# CONFIG_BACKLIGHT_GPIO is not set
# CONFIG_BACKLIGHT_LV5207LP is not set
# CONFIG_BACKLIGHT_BD6107 is not set
# CONFIG_VGASTATE is not set

#
# Console display driver support
#
CONFIG_VGA_CONSOLE=y
CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64
CONFIG_DUMMY_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
CONFIG_LOGO_LINUX_CLUT224=y
# CONFIG_SOUND is not set

#
# HID support
#
CONFIG_HID=y
CONFIG_HID_BATTERY_STRENGTH=y
CONFIG_HIDRAW=y
# CONFIG_UHID is not set
CONFIG_HID_GENERIC=y

#
# Special HID drivers
#
CONFIG_HID_A4TECH=y
# CONFIG_HID_ACRUX is not set
CONFIG_HID_APPLE=y
# CONFIG_HID_APPLEIR is not set
# CONFIG_HID_AUREAL is not set
CONFIG_HID_BELKIN=y
CONFIG_HID_CHERRY=y
CONFIG_HID_CHICONY=y
# CONFIG_HID_CP2112 is not set
CONFIG_HID_CYPRESS=y
# CONFIG_HID_DRAGONRISE is not set
# CONFIG_HID_EMS_FF is not set
# CONFIG_HID_ELECOM is not set
# CONFIG_HID_ELO is not set
CONFIG_HID_EZKEY=y
# CONFIG_HID_HOLTEK is not set
# CONFIG_HID_GT683R is not set
# CONFIG_HID_HUION is not set
# CONFIG_HID_KEYTOUCH is not set
# CONFIG_HID_KYE is not set
# CONFIG_HID_UCLOGIC is not set
# CONFIG_HID_WALTOP is not set
# CONFIG_HID_GYRATION is not set
# CONFIG_HID_ICADE is not set
# CONFIG_HID_TWINHAN is not set
CONFIG_HID_KENSINGTON=y
# CONFIG_HID_LCPOWER is not set
# CONFIG_HID_LENOVO is not set
CONFIG_HID_LOGITECH=y
# CONFIG_HID_LOGITECH_DJ is not set
# CONFIG_HID_LOGITECH_HIDPP is not set
CONFIG_LOGITECH_FF=y
CONFIG_LOGIRUMBLEPAD2_FF=y
CONFIG_LOGIG940_FF=y
CONFIG_LOGIWHEELS_FF=y
CONFIG_HID_MAGICMOUSE=y
CONFIG_HID_MICROSOFT=y
CONFIG_HID_MONTEREY=y
# CONFIG_HID_MULTITOUCH is not set
CONFIG_HID_NTRIG=y
# CONFIG_HID_ORTEK is not set
# CONFIG_HID_PANTHERLORD is not set
# CONFIG_HID_PENMOUNT is not set
# CONFIG_HID_PETALYNX is not set
# CONFIG_HID_PICOLCD is not set
# CONFIG_HID_PLANTRONICS is not set
# CONFIG_HID_PRIMAX is not set
# CONFIG_HID_ROCCAT is not set
# CONFIG_HID_SAITEK is not set
# CONFIG_HID_SAMSUNG is not set
# CONFIG_HID_SONY is not set
# CONFIG_HID_SPEEDLINK is not set
# CONFIG_HID_STEELSERIES is not set
# CONFIG_HID_SUNPLUS is not set
# CONFIG_HID_RMI is not set
# CONFIG_HID_GREENASIA is not set
# CONFIG_HID_SMARTJOYPLUS is not set
# CONFIG_HID_TIVO is not set
# CONFIG_HID_TOPSEED is not set
# CONFIG_HID_THINGM is not set
# CONFIG_HID_THRUSTMASTER is not set
# CONFIG_HID_WACOM is not set
# CONFIG_HID_WIIMOTE is not set
# CONFIG_HID_XINMO is not set
# CONFIG_HID_ZEROPLUS is not set
# CONFIG_HID_ZYDACRON is not set
# CONFIG_HID_SENSOR_HUB is not set

#
# USB HID support
#
CONFIG_USB_HID=y
CONFIG_HID_PID=y
CONFIG_USB_HIDDEV=y

#
# I2C HID support
#
# CONFIG_I2C_HID is not set
CONFIG_USB_OHCI_LITTLE_ENDIAN=y
CONFIG_USB_SUPPORT=y
CONFIG_USB_COMMON=y
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y

#
# Miscellaneous USB options
#
CONFIG_USB_DEFAULT_PERSIST=y
# CONFIG_USB_DYNAMIC_MINORS is not set
# CONFIG_USB_OTG is not set
# CONFIG_USB_OTG_WHITELIST is not set
# CONFIG_USB_OTG_BLACKLIST_HUB is not set
# CONFIG_USB_OTG_FSM is not set
CONFIG_USB_MON=y
# CONFIG_USB_WUSB_CBAF is not set

#
# USB Host Controller Drivers
#
# CONFIG_USB_C67X00_HCD is not set
CONFIG_USB_XHCI_HCD=y
CONFIG_USB_XHCI_PCI=y
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_EHCI_ROOT_HUB_TT=y
CONFIG_USB_EHCI_TT_NEWSCHED=y
CONFIG_USB_EHCI_PCI=y
# CONFIG_USB_EHCI_HCD_PLATFORM is not set
# CONFIG_USB_OXU210HP_HCD is not set
# CONFIG_USB_ISP116X_HCD is not set
# CONFIG_USB_ISP1760_HCD is not set
# CONFIG_USB_ISP1362_HCD is not set
# CONFIG_USB_FUSBH200_HCD is not set
# CONFIG_USB_FOTG210_HCD is not set
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_HCD_PCI=y
# CONFIG_USB_OHCI_HCD_PLATFORM is not set
CONFIG_USB_UHCI_HCD=y
# CONFIG_USB_SL811_HCD is not set
# CONFIG_USB_R8A66597_HCD is not set
# CONFIG_USB_HCD_TEST_MODE is not set

#
# USB Device Class drivers
#
# CONFIG_USB_ACM is not set
# CONFIG_USB_PRINTER is not set
# CONFIG_USB_WDM is not set
# CONFIG_USB_TMC is not set

#
# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
#

#
# also be needed; see USB_STORAGE Help for more info
#
# CONFIG_USB_STORAGE is not set

#
# USB Imaging devices
#
# CONFIG_USB_MDC800 is not set
# CONFIG_USB_MICROTEK is not set
# CONFIG_USBIP_CORE is not set
# CONFIG_USB_MUSB_HDRC is not set
# CONFIG_USB_DWC3 is not set
# CONFIG_USB_DWC2 is not set
# CONFIG_USB_CHIPIDEA is not set

#
# USB port drivers
#
CONFIG_USB_SERIAL=y
CONFIG_USB_SERIAL_CONSOLE=y
CONFIG_USB_SERIAL_GENERIC=y
# CONFIG_USB_SERIAL_SIMPLE is not set
# CONFIG_USB_SERIAL_AIRCABLE is not set
# CONFIG_USB_SERIAL_ARK3116 is not set
# CONFIG_USB_SERIAL_BELKIN is not set
# CONFIG_USB_SERIAL_CH341 is not set
# CONFIG_USB_SERIAL_WHITEHEAT is not set
# CONFIG_USB_SERIAL_DIGI_ACCELEPORT is not set
# CONFIG_USB_SERIAL_CP210X is not set
# CONFIG_USB_SERIAL_CYPRESS_M8 is not set
# CONFIG_USB_SERIAL_EMPEG is not set
# CONFIG_USB_SERIAL_FTDI_SIO is not set
# CONFIG_USB_SERIAL_VISOR is not set
# CONFIG_USB_SERIAL_IPAQ is not set
# CONFIG_USB_SERIAL_IR is not set
# CONFIG_USB_SERIAL_EDGEPORT is not set
# CONFIG_USB_SERIAL_EDGEPORT_TI is not set
# CONFIG_USB_SERIAL_F81232 is not set
# CONFIG_USB_SERIAL_GARMIN is not set
# CONFIG_USB_SERIAL_IPW is not set
# CONFIG_USB_SERIAL_IUU is not set
# CONFIG_USB_SERIAL_KEYSPAN_PDA is not set
# CONFIG_USB_SERIAL_KEYSPAN is not set
# CONFIG_USB_SERIAL_KLSI is not set
# CONFIG_USB_SERIAL_KOBIL_SCT is not set
# CONFIG_USB_SERIAL_MCT_U232 is not set
# CONFIG_USB_SERIAL_METRO is not set
# CONFIG_USB_SERIAL_MOS7720 is not set
# CONFIG_USB_SERIAL_MOS7840 is not set
# CONFIG_USB_SERIAL_MXUPORT is not set
# CONFIG_USB_SERIAL_NAVMAN is not set
# CONFIG_USB_SERIAL_PL2303 is not set
# CONFIG_USB_SERIAL_OTI6858 is not set
# CONFIG_USB_SERIAL_QCAUX is not set
# CONFIG_USB_SERIAL_QUALCOMM is not set
# CONFIG_USB_SERIAL_SPCP8X5 is not set
# CONFIG_USB_SERIAL_SAFE is not set
# CONFIG_USB_SERIAL_SIERRAWIRELESS is not set
# CONFIG_USB_SERIAL_SYMBOL is not set
# CONFIG_USB_SERIAL_TI is not set
# CONFIG_USB_SERIAL_CYBERJACK is not set
# CONFIG_USB_SERIAL_XIRCOM is not set
# CONFIG_USB_SERIAL_OPTION is not set
# CONFIG_USB_SERIAL_OMNINET is not set
# CONFIG_USB_SERIAL_OPTICON is not set
# CONFIG_USB_SERIAL_XSENS_MT is not set
# CONFIG_USB_SERIAL_WISHBONE is not set
# CONFIG_USB_SERIAL_SSU100 is not set
# CONFIG_USB_SERIAL_QT2 is not set
# CONFIG_USB_SERIAL_DEBUG is not set

#
# USB Miscellaneous drivers
#
# CONFIG_USB_EMI62 is not set
# CONFIG_USB_EMI26 is not set
# CONFIG_USB_ADUTUX is not set
# CONFIG_USB_SEVSEG is not set
# CONFIG_USB_RIO500 is not set
# CONFIG_USB_LEGOTOWER is not set
# CONFIG_USB_LCD is not set
# CONFIG_USB_LED is not set
# CONFIG_USB_CYPRESS_CY7C63 is not set
# CONFIG_USB_CYTHERM is not set
# CONFIG_USB_IDMOUSE is not set
# CONFIG_USB_FTDI_ELAN is not set
# CONFIG_USB_APPLEDISPLAY is not set
# CONFIG_USB_SISUSBVGA is not set
# CONFIG_USB_LD is not set
# CONFIG_USB_TRANCEVIBRATOR is not set
# CONFIG_USB_IOWARRIOR is not set
# CONFIG_USB_TEST is not set
# CONFIG_USB_EHSET_TEST_FIXTURE is not set
# CONFIG_USB_ISIGHTFW is not set
# CONFIG_USB_YUREX is not set
# CONFIG_USB_EZUSB_FX2 is not set
# CONFIG_USB_HSIC_USB3503 is not set
# CONFIG_USB_LINK_LAYER_TEST is not set

#
# USB Physical Layer drivers
#
# CONFIG_USB_PHY is not set
# CONFIG_NOP_USB_XCEIV is not set
# CONFIG_USB_GPIO_VBUS is not set
# CONFIG_USB_ISP1301 is not set
# CONFIG_USB_GADGET is not set
# CONFIG_USB_LED_TRIG is not set
# CONFIG_UWB is not set
# CONFIG_MMC is not set
# CONFIG_MEMSTICK is not set
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=y

#
# LED drivers
#
# CONFIG_LEDS_LM3530 is not set
# CONFIG_LEDS_LM3642 is not set
# CONFIG_LEDS_PCA9532 is not set
# CONFIG_LEDS_GPIO is not set
# CONFIG_LEDS_LP3944 is not set
# CONFIG_LEDS_LP5521 is not set
# CONFIG_LEDS_LP5523 is not set
# CONFIG_LEDS_LP5562 is not set
# CONFIG_LEDS_LP8501 is not set
# CONFIG_LEDS_LP8860 is not set
# CONFIG_LEDS_CLEVO_MAIL is not set
# CONFIG_LEDS_PCA955X is not set
# CONFIG_LEDS_PCA963X is not set
# CONFIG_LEDS_BD2802 is not set
# CONFIG_LEDS_INTEL_SS4200 is not set
# CONFIG_LEDS_LT3593 is not set
# CONFIG_LEDS_TCA6507 is not set
# CONFIG_LEDS_LM355x is not set

#
# LED driver for blink(1) USB RGB LED is under Special HID drivers (HID_THINGM)
#
# CONFIG_LEDS_BLINKM is not set

#
# LED Triggers
#
CONFIG_LEDS_TRIGGERS=y
# CONFIG_LEDS_TRIGGER_TIMER is not set
# CONFIG_LEDS_TRIGGER_ONESHOT is not set
# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
# CONFIG_LEDS_TRIGGER_CPU is not set
# CONFIG_LEDS_TRIGGER_GPIO is not set
# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set

#
# iptables trigger is under Netfilter config (LED target)
#
# CONFIG_LEDS_TRIGGER_TRANSIENT is not set
# CONFIG_LEDS_TRIGGER_CAMERA is not set
CONFIG_ACCESSIBILITY=y
CONFIG_A11Y_BRAILLE_CONSOLE=y
# CONFIG_INFINIBAND is not set
CONFIG_EDAC=y
CONFIG_EDAC_LEGACY_SYSFS=y
# CONFIG_EDAC_DEBUG is not set
# CONFIG_EDAC_DECODE_MCE is not set
# CONFIG_EDAC_MM_EDAC is not set
CONFIG_RTC_LIB=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_HCTOSYS=y
# CONFIG_RTC_SYSTOHC is not set
CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
# CONFIG_RTC_DEBUG is not set

#
# RTC interfaces
#
CONFIG_RTC_INTF_SYSFS=y
CONFIG_RTC_INTF_PROC=y
CONFIG_RTC_INTF_DEV=y
# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
# CONFIG_RTC_DRV_TEST is not set

#
# I2C RTC drivers
#
# CONFIG_RTC_DRV_DS1307 is not set
# CONFIG_RTC_DRV_DS1374 is not set
# CONFIG_RTC_DRV_DS1672 is not set
# CONFIG_RTC_DRV_DS3232 is not set
# CONFIG_RTC_DRV_MAX6900 is not set
# CONFIG_RTC_DRV_RS5C372 is not set
# CONFIG_RTC_DRV_ISL1208 is not set
# CONFIG_RTC_DRV_ISL12022 is not set
# CONFIG_RTC_DRV_ISL12057 is not set
# CONFIG_RTC_DRV_X1205 is not set
# CONFIG_RTC_DRV_PCF2127 is not set
# CONFIG_RTC_DRV_PCF8523 is not set
# CONFIG_RTC_DRV_PCF8563 is not set
# CONFIG_RTC_DRV_PCF85063 is not set
# CONFIG_RTC_DRV_PCF8583 is not set
# CONFIG_RTC_DRV_M41T80 is not set
# CONFIG_RTC_DRV_BQ32K is not set
# CONFIG_RTC_DRV_S35390A is not set
# CONFIG_RTC_DRV_FM3130 is not set
# CONFIG_RTC_DRV_RX8581 is not set
# CONFIG_RTC_DRV_RX8025 is not set
# CONFIG_RTC_DRV_EM3027 is not set
# CONFIG_RTC_DRV_RV3029C2 is not set

#
# SPI RTC drivers
#

#
# Platform RTC drivers
#
CONFIG_RTC_DRV_CMOS=y
# CONFIG_RTC_DRV_DS1286 is not set
# CONFIG_RTC_DRV_DS1511 is not set
# CONFIG_RTC_DRV_DS1553 is not set
# CONFIG_RTC_DRV_DS1742 is not set
# CONFIG_RTC_DRV_DS2404 is not set
# CONFIG_RTC_DRV_STK17TA8 is not set
# CONFIG_RTC_DRV_M48T86 is not set
# CONFIG_RTC_DRV_M48T35 is not set
# CONFIG_RTC_DRV_M48T59 is not set
# CONFIG_RTC_DRV_MSM6242 is not set
# CONFIG_RTC_DRV_BQ4802 is not set
# CONFIG_RTC_DRV_RP5C01 is not set
# CONFIG_RTC_DRV_V3020 is not set

#
# on-CPU RTC drivers
#
# CONFIG_RTC_DRV_XGENE is not set

#
# HID Sensor RTC drivers
#
# CONFIG_RTC_DRV_HID_SENSOR_TIME is not set
CONFIG_DMADEVICES=y
# CONFIG_DMADEVICES_DEBUG is not set

#
# DMA Devices
#
# CONFIG_INTEL_MID_DMAC is not set
# CONFIG_INTEL_IOATDMA is not set
# CONFIG_DW_DMAC_CORE is not set
# CONFIG_DW_DMAC is not set
# CONFIG_DW_DMAC_PCI is not set
CONFIG_DMA_ACPI=y
CONFIG_AUXDISPLAY=y
# CONFIG_UIO is not set
# CONFIG_VFIO is not set
# CONFIG_VIRT_DRIVERS is not set
CONFIG_VIRTIO=y

#
# Virtio drivers
#
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_BALLOON=m
# CONFIG_VIRTIO_MMIO is not set

#
# Microsoft Hyper-V guest support
#
# CONFIG_HYPERV is not set

#
# Xen driver support
#
CONFIG_XEN_BALLOON=y
CONFIG_XEN_SELFBALLOONING=y
# CONFIG_XEN_BALLOON_MEMORY_HOTPLUG is not set
CONFIG_XEN_SCRUB_PAGES=y
# CONFIG_XEN_DEV_EVTCHN is not set
CONFIG_XEN_BACKEND=y
# CONFIG_XENFS is not set
CONFIG_XEN_SYS_HYPERVISOR=y
CONFIG_XEN_XENBUS_FRONTEND=y
# CONFIG_XEN_GNTDEV is not set
# CONFIG_XEN_GRANT_DEV_ALLOC is not set
CONFIG_SWIOTLB_XEN=y
CONFIG_XEN_TMEM=m
# CONFIG_XEN_PCIDEV_BACKEND is not set
CONFIG_XEN_PRIVCMD=m
# CONFIG_XEN_ACPI_PROCESSOR is not set
# CONFIG_XEN_MCE_LOG is not set
CONFIG_XEN_HAVE_PVMMU=y
CONFIG_XEN_EFI=y
CONFIG_STAGING=y
# CONFIG_SLICOSS is not set
# CONFIG_COMEDI is not set
# CONFIG_RTS5208 is not set
# CONFIG_FB_XGI is not set
# CONFIG_FT1000 is not set

#
# Speakup console speech
#
# CONFIG_SPEAKUP is not set
# CONFIG_TOUCHSCREEN_CLEARPAD_TM1217 is not set
# CONFIG_TOUCHSCREEN_SYNAPTICS_I2C_RMI4 is not set
CONFIG_STAGING_MEDIA=y

#
# Android
#
# CONFIG_USB_WPAN_HCD is not set
# CONFIG_WIMAX_GDM72XX is not set
# CONFIG_LTE_GDM724X is not set
# CONFIG_LUSTRE_FS is not set
# CONFIG_DGNC is not set
# CONFIG_DGAP is not set
# CONFIG_GS_FPGABOOT is not set
# CONFIG_CRYPTO_SKEIN is not set
# CONFIG_UNISYSSPAR is not set
CONFIG_X86_PLATFORM_DEVICES=y
# CONFIG_ACERHDF is not set
# CONFIG_ASUS_LAPTOP is not set
# CONFIG_DELL_SMO8800 is not set
# CONFIG_FUJITSU_LAPTOP is not set
# CONFIG_FUJITSU_TABLET is not set
# CONFIG_AMILO_RFKILL is not set
# CONFIG_HP_ACCEL is not set
# CONFIG_HP_WIRELESS is not set
# CONFIG_MSI_LAPTOP is not set
# CONFIG_PANASONIC_LAPTOP is not set
# CONFIG_COMPAL_LAPTOP is not set
# CONFIG_SONY_LAPTOP is not set
# CONFIG_IDEAPAD_LAPTOP is not set
# CONFIG_THINKPAD_ACPI is not set
# CONFIG_SENSORS_HDAPS is not set
# CONFIG_INTEL_MENLOW is not set
# CONFIG_EEEPC_LAPTOP is not set
# CONFIG_ACPI_WMI is not set
# CONFIG_TOPSTAR_LAPTOP is not set
# CONFIG_TOSHIBA_BT_RFKILL is not set
# CONFIG_TOSHIBA_HAPS is not set
# CONFIG_ACPI_CMPC is not set
# CONFIG_INTEL_IPS is not set
# CONFIG_IBM_RTL is not set
# CONFIG_SAMSUNG_LAPTOP is not set
# CONFIG_INTEL_OAKTRAIL is not set
# CONFIG_SAMSUNG_Q10 is not set
# CONFIG_APPLE_GMUX is not set
# CONFIG_INTEL_RST is not set
CONFIG_INTEL_SMARTCONNECT=y
# CONFIG_PVPANIC is not set
CONFIG_CHROME_PLATFORMS=y
# CONFIG_CHROMEOS_LAPTOP is not set
# CONFIG_CHROMEOS_PSTORE is not set
CONFIG_CLKDEV_LOOKUP=y
CONFIG_HAVE_CLK_PREPARE=y
CONFIG_COMMON_CLK=y

#
# Common Clock Framework
#
# CONFIG_COMMON_CLK_SI5351 is not set
# CONFIG_COMMON_CLK_PXA is not set

#
# Hardware Spinlock drivers
#

#
# Clock Source drivers
#
CONFIG_CLKEVT_I8253=y
CONFIG_I8253_LOCK=y
CONFIG_CLKBLD_I8253=y
# CONFIG_ATMEL_PIT is not set
# CONFIG_SH_TIMER_CMT is not set
# CONFIG_SH_TIMER_MTU2 is not set
# CONFIG_SH_TIMER_TMU is not set
# CONFIG_EM_TIMER_STI is not set
# CONFIG_MAILBOX is not set
CONFIG_IOMMU_API=y
CONFIG_IOMMU_SUPPORT=y
CONFIG_AMD_IOMMU=y
CONFIG_AMD_IOMMU_STATS=y
# CONFIG_AMD_IOMMU_V2 is not set
CONFIG_DMAR_TABLE=y
CONFIG_INTEL_IOMMU=y
# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
CONFIG_INTEL_IOMMU_FLOPPY_WA=y
CONFIG_IRQ_REMAP=y

#
# Remoteproc drivers
#
# CONFIG_STE_MODEM_RPROC is not set

#
# Rpmsg drivers
#

#
# SOC (System On Chip) specific Drivers
#
# CONFIG_SOC_TI is not set
# CONFIG_PM_DEVFREQ is not set
# CONFIG_EXTCON is not set
# CONFIG_MEMORY is not set
# CONFIG_IIO is not set
# CONFIG_NTB is not set
# CONFIG_VME_BUS is not set
# CONFIG_PWM is not set
# CONFIG_IPACK_BUS is not set
CONFIG_RESET_CONTROLLER=y
# CONFIG_FMC is not set

#
# PHY Subsystem
#
CONFIG_GENERIC_PHY=y
# CONFIG_BCM_KONA_USB2_PHY is not set
# CONFIG_POWERCAP is not set
# CONFIG_MCB is not set
CONFIG_RAS=y
# CONFIG_THUNDERBOLT is not set

#
# Android
#
# CONFIG_ANDROID is not set

#
# Firmware Drivers
#
# CONFIG_EDD is not set
CONFIG_FIRMWARE_MEMMAP=y
# CONFIG_DELL_RBU is not set
# CONFIG_DCDBAS is not set
CONFIG_DMIID=y
CONFIG_DMI_SYSFS=y
CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y
CONFIG_ISCSI_IBFT_FIND=y
# CONFIG_ISCSI_IBFT is not set
# CONFIG_GOOGLE_FIRMWARE is not set

#
# EFI (Extensible Firmware Interface) Support
#
CONFIG_EFI_VARS=y
CONFIG_EFI_VARS_PSTORE=y
CONFIG_EFI_VARS_PSTORE_DEFAULT_DISABLE=y
CONFIG_EFI_RUNTIME_MAP=y
CONFIG_EFI_RUNTIME_WRAPPERS=y
CONFIG_UEFI_CPER=y

#
# File systems
#
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_EXT2_FS is not set
# CONFIG_EXT3_FS is not set
CONFIG_EXT4_FS=y
CONFIG_EXT4_USE_FOR_EXT23=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
# CONFIG_EXT4_DEBUG is not set
CONFIG_JBD2=y
# CONFIG_JBD2_DEBUG is not set
CONFIG_FS_MBCACHE=y
# CONFIG_REISERFS_FS is not set
# CONFIG_JFS_FS is not set
# CONFIG_XFS_FS is not set
# CONFIG_GFS2_FS is not set
# CONFIG_OCFS2_FS is not set
# CONFIG_BTRFS_FS is not set
# CONFIG_NILFS2_FS is not set
CONFIG_FS_POSIX_ACL=y
CONFIG_EXPORTFS=y
CONFIG_FILE_LOCKING=y
CONFIG_FSNOTIFY=y
CONFIG_DNOTIFY=y
CONFIG_INOTIFY_USER=y
CONFIG_FANOTIFY=y
CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
# CONFIG_QUOTA_DEBUG is not set
CONFIG_QUOTA_TREE=y
# CONFIG_QFMT_V1 is not set
CONFIG_QFMT_V2=y
CONFIG_QUOTACTL=y
CONFIG_QUOTACTL_COMPAT=y
CONFIG_AUTOFS4_FS=y
# CONFIG_FUSE_FS is not set
# CONFIG_OVERLAY_FS is not set

#
# Caches
#
# CONFIG_FSCACHE is not set

#
# CD-ROM/DVD Filesystems
#
# CONFIG_ISO9660_FS is not set
# CONFIG_UDF_FS is not set

#
# DOS/FAT/NT Filesystems
#
# CONFIG_MSDOS_FS is not set
# CONFIG_VFAT_FS is not set
# CONFIG_NTFS_FS is not set

#
# Pseudo filesystems
#
CONFIG_PROC_FS=y
CONFIG_PROC_KCORE=y
CONFIG_PROC_VMCORE=y
CONFIG_PROC_SYSCTL=y
CONFIG_PROC_PAGE_MONITOR=y
CONFIG_KERNFS=y
CONFIG_SYSFS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_TMPFS_XATTR=y
CONFIG_HUGETLBFS=y
CONFIG_HUGETLB_PAGE=y
CONFIG_CONFIGFS_FS=y
CONFIG_MISC_FILESYSTEMS=y
# CONFIG_ADFS_FS is not set
# CONFIG_AFFS_FS is not set
# CONFIG_ECRYPT_FS is not set
# CONFIG_HFS_FS is not set
# CONFIG_HFSPLUS_FS is not set
# CONFIG_BEFS_FS is not set
# CONFIG_BFS_FS is not set
# CONFIG_EFS_FS is not set
# CONFIG_LOGFS is not set
# CONFIG_CRAMFS is not set
# CONFIG_SQUASHFS is not set
# CONFIG_VXFS_FS is not set
# CONFIG_MINIX_FS is not set
# CONFIG_OMFS_FS is not set
# CONFIG_HPFS_FS is not set
# CONFIG_QNX4FS_FS is not set
# CONFIG_QNX6FS_FS is not set
# CONFIG_ROMFS_FS is not set
CONFIG_PSTORE=y
# CONFIG_PSTORE_CONSOLE is not set
# CONFIG_PSTORE_FTRACE is not set
# CONFIG_PSTORE_RAM is not set
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set
# CONFIG_F2FS_FS is not set
CONFIG_EFIVAR_FS=y
CONFIG_NETWORK_FILESYSTEMS=y
# CONFIG_NFS_FS is not set
# CONFIG_NFSD is not set
# CONFIG_CEPH_FS is not set
# CONFIG_CIFS is not set
# CONFIG_NCP_FS is not set
# CONFIG_CODA_FS is not set
# CONFIG_AFS_FS is not set
CONFIG_NLS=y
CONFIG_NLS_DEFAULT="utf8"
CONFIG_NLS_CODEPAGE_437=y
# CONFIG_NLS_CODEPAGE_737 is not set
# CONFIG_NLS_CODEPAGE_775 is not set
# CONFIG_NLS_CODEPAGE_850 is not set
# CONFIG_NLS_CODEPAGE_852 is not set
# CONFIG_NLS_CODEPAGE_855 is not set
# CONFIG_NLS_CODEPAGE_857 is not set
# CONFIG_NLS_CODEPAGE_860 is not set
# CONFIG_NLS_CODEPAGE_861 is not set
# CONFIG_NLS_CODEPAGE_862 is not set
# CONFIG_NLS_CODEPAGE_863 is not set
# CONFIG_NLS_CODEPAGE_864 is not set
# CONFIG_NLS_CODEPAGE_865 is not set
# CONFIG_NLS_CODEPAGE_866 is not set
# CONFIG_NLS_CODEPAGE_869 is not set
# CONFIG_NLS_CODEPAGE_936 is not set
# CONFIG_NLS_CODEPAGE_950 is not set
# CONFIG_NLS_CODEPAGE_932 is not set
# CONFIG_NLS_CODEPAGE_949 is not set
# CONFIG_NLS_CODEPAGE_874 is not set
# CONFIG_NLS_ISO8859_8 is not set
# CONFIG_NLS_CODEPAGE_1250 is not set
# CONFIG_NLS_CODEPAGE_1251 is not set
CONFIG_NLS_ASCII=y
# CONFIG_NLS_ISO8859_1 is not set
# CONFIG_NLS_ISO8859_2 is not set
# CONFIG_NLS_ISO8859_3 is not set
# CONFIG_NLS_ISO8859_4 is not set
# CONFIG_NLS_ISO8859_5 is not set
# CONFIG_NLS_ISO8859_6 is not set
# CONFIG_NLS_ISO8859_7 is not set
# CONFIG_NLS_ISO8859_9 is not set
# CONFIG_NLS_ISO8859_13 is not set
# CONFIG_NLS_ISO8859_14 is not set
# CONFIG_NLS_ISO8859_15 is not set
# CONFIG_NLS_KOI8_R is not set
# CONFIG_NLS_KOI8_U is not set
# CONFIG_NLS_MAC_ROMAN is not set
# CONFIG_NLS_MAC_CELTIC is not set
# CONFIG_NLS_MAC_CENTEURO is not set
# CONFIG_NLS_MAC_CROATIAN is not set
# CONFIG_NLS_MAC_CYRILLIC is not set
# CONFIG_NLS_MAC_GAELIC is not set
# CONFIG_NLS_MAC_GREEK is not set
# CONFIG_NLS_MAC_ICELAND is not set
# CONFIG_NLS_MAC_INUIT is not set
# CONFIG_NLS_MAC_ROMANIAN is not set
# CONFIG_NLS_MAC_TURKISH is not set
# CONFIG_NLS_UTF8 is not set
# CONFIG_DLM is not set

#
# Kernel hacking
#
CONFIG_TRACE_IRQFLAGS_SUPPORT=y

#
# printk and dmesg options
#
CONFIG_PRINTK_TIME=y
CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
CONFIG_BOOT_PRINTK_DELAY=y
CONFIG_DYNAMIC_DEBUG=y

#
# Compile-time checks and compiler options
#
CONFIG_DEBUG_INFO=y
# CONFIG_DEBUG_INFO_REDUCED is not set
# CONFIG_DEBUG_INFO_SPLIT is not set
# CONFIG_DEBUG_INFO_DWARF4 is not set
# CONFIG_ENABLE_WARN_DEPRECATED is not set
CONFIG_ENABLE_MUST_CHECK=y
CONFIG_FRAME_WARN=2048
CONFIG_STRIP_ASM_SYMS=y
# CONFIG_READABLE_ASM is not set
CONFIG_UNUSED_SYMBOLS=y
# CONFIG_PAGE_OWNER is not set
CONFIG_DEBUG_FS=y
CONFIG_HEADERS_CHECK=y
# CONFIG_DEBUG_SECTION_MISMATCH is not set
CONFIG_ARCH_WANT_FRAME_POINTERS=y
CONFIG_FRAME_POINTER=y
# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
CONFIG_MAGIC_SYSRQ=y
CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x0
CONFIG_DEBUG_KERNEL=y

#
# Memory Debugging
#
# CONFIG_PAGE_EXTENSION is not set
# CONFIG_DEBUG_PAGEALLOC is not set
# CONFIG_DEBUG_OBJECTS is not set
# CONFIG_SLUB_DEBUG_ON is not set
# CONFIG_SLUB_STATS is not set
CONFIG_HAVE_DEBUG_KMEMLEAK=y
# CONFIG_DEBUG_KMEMLEAK is not set
# CONFIG_DEBUG_STACK_USAGE is not set
CONFIG_DEBUG_VM=y
# CONFIG_DEBUG_VM_VMACACHE is not set
# CONFIG_DEBUG_VM_RB is not set
# CONFIG_DEBUG_VIRTUAL is not set
CONFIG_DEBUG_MEMORY_INIT=y
# CONFIG_DEBUG_PER_CPU_MAPS is not set
CONFIG_HAVE_DEBUG_STACKOVERFLOW=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_HAVE_ARCH_KMEMCHECK=y
CONFIG_DEBUG_SHIRQ=y

#
# Debug Lockups and Hangs
#
CONFIG_LOCKUP_DETECTOR=y
CONFIG_HARDLOCKUP_DETECTOR=y
# CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set
CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0
# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
# CONFIG_DETECT_HUNG_TASK is not set
# CONFIG_PANIC_ON_OOPS is not set
CONFIG_PANIC_ON_OOPS_VALUE=0
CONFIG_PANIC_TIMEOUT=0
CONFIG_SCHED_DEBUG=y
# CONFIG_SCHEDSTATS is not set
# CONFIG_SCHED_STACK_END_CHECK is not set
CONFIG_TIMER_STATS=y

#
# Lock Debugging (spinlocks, mutexes, etc...)
#
# CONFIG_DEBUG_RT_MUTEXES is not set
CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_MUTEXES=y
# CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
CONFIG_LOCKDEP=y
# CONFIG_LOCK_STAT is not set
# CONFIG_DEBUG_LOCKDEP is not set
# CONFIG_DEBUG_ATOMIC_SLEEP is not set
# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
# CONFIG_LOCK_TORTURE_TEST is not set
CONFIG_TRACE_IRQFLAGS=y
CONFIG_STACKTRACE=y
# CONFIG_DEBUG_KOBJECT is not set
CONFIG_DEBUG_BUGVERBOSE=y
CONFIG_DEBUG_LIST=y
# CONFIG_DEBUG_PI_LIST is not set
# CONFIG_DEBUG_SG is not set
# CONFIG_DEBUG_NOTIFIERS is not set
# CONFIG_DEBUG_CREDENTIALS is not set

#
# RCU Debugging
#
CONFIG_PROVE_RCU=y
# CONFIG_PROVE_RCU_REPEATEDLY is not set
CONFIG_SPARSE_RCU_POINTER=y
# CONFIG_TORTURE_TEST is not set
# CONFIG_RCU_TORTURE_TEST is not set
CONFIG_RCU_CPU_STALL_TIMEOUT=60
# CONFIG_RCU_CPU_STALL_INFO is not set
# CONFIG_RCU_TRACE is not set
# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
# CONFIG_NOTIFIER_ERROR_INJECTION is not set
# CONFIG_FAULT_INJECTION is not set
# CONFIG_LATENCYTOP is not set
CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS=y
# CONFIG_DEBUG_STRICT_USER_COPY_CHECKS is not set
CONFIG_USER_STACKTRACE_SUPPORT=y
CONFIG_NOP_TRACER=y
CONFIG_HAVE_FUNCTION_TRACER=y
CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST=y
CONFIG_HAVE_DYNAMIC_FTRACE=y
CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y
CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
CONFIG_HAVE_FENTRY=y
CONFIG_HAVE_C_RECORDMCOUNT=y
CONFIG_TRACER_MAX_TRACE=y
CONFIG_TRACE_CLOCK=y
CONFIG_RING_BUFFER=y
CONFIG_EVENT_TRACING=y
CONFIG_CONTEXT_SWITCH_TRACER=y
CONFIG_TRACING=y
CONFIG_GENERIC_TRACER=y
CONFIG_TRACING_SUPPORT=y
CONFIG_FTRACE=y
CONFIG_FUNCTION_TRACER=y
CONFIG_FUNCTION_GRAPH_TRACER=y
# CONFIG_IRQSOFF_TRACER is not set
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_TRACER_SNAPSHOT=y
# CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP is not set
CONFIG_BRANCH_PROFILE_NONE=y
# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set
# CONFIG_PROFILE_ALL_BRANCHES is not set
CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_KPROBE_EVENT=y
CONFIG_UPROBE_EVENT=y
CONFIG_PROBE_EVENTS=y
CONFIG_DYNAMIC_FTRACE=y
CONFIG_DYNAMIC_FTRACE_WITH_REGS=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_FTRACE_MCOUNT_RECORD=y
# CONFIG_FTRACE_STARTUP_TEST is not set
# CONFIG_MMIOTRACE is not set
# CONFIG_TRACEPOINT_BENCHMARK is not set
# CONFIG_RING_BUFFER_BENCHMARK is not set
# CONFIG_RING_BUFFER_STARTUP_TEST is not set

#
# Runtime Testing
#
# CONFIG_LKDTM is not set
# CONFIG_TEST_LIST_SORT is not set
# CONFIG_KPROBES_SANITY_TEST is not set
# CONFIG_BACKTRACE_SELF_TEST is not set
# CONFIG_RBTREE_TEST is not set
# CONFIG_INTERVAL_TREE_TEST is not set
# CONFIG_PERCPU_TEST is not set
CONFIG_ATOMIC64_SELFTEST=y
# CONFIG_TEST_STRING_HELPERS is not set
CONFIG_TEST_KSTRTOX=y
# CONFIG_TEST_RHASHTABLE is not set
CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
CONFIG_BUILD_DOCSRC=y
# CONFIG_DMA_API_DEBUG is not set
# CONFIG_TEST_LKM is not set
# CONFIG_TEST_USER_COPY is not set
# CONFIG_TEST_BPF is not set
# CONFIG_TEST_FIRMWARE is not set
# CONFIG_TEST_UDELAY is not set
# CONFIG_SAMPLES is not set
CONFIG_HAVE_ARCH_KGDB=y
# CONFIG_KGDB is not set
CONFIG_STRICT_DEVMEM=y
# CONFIG_X86_VERBOSE_BOOTUP is not set
CONFIG_EARLY_PRINTK=y
CONFIG_EARLY_PRINTK_DBGP=y
CONFIG_EARLY_PRINTK_EFI=y
# CONFIG_X86_PTDUMP is not set
CONFIG_DEBUG_RODATA=y
CONFIG_DEBUG_RODATA_TEST=y
CONFIG_DEBUG_SET_MODULE_RONX=y
# CONFIG_DEBUG_NX_TEST is not set
CONFIG_DOUBLEFAULT=y
# CONFIG_DEBUG_TLBFLUSH is not set
# CONFIG_IOMMU_STRESS is not set
CONFIG_HAVE_MMIOTRACE_SUPPORT=y
CONFIG_X86_DECODER_SELFTEST=y
CONFIG_IO_DELAY_TYPE_0X80=0
CONFIG_IO_DELAY_TYPE_0XED=1
CONFIG_IO_DELAY_TYPE_UDELAY=2
CONFIG_IO_DELAY_TYPE_NONE=3
CONFIG_IO_DELAY_0X80=y
# CONFIG_IO_DELAY_0XED is not set
# CONFIG_IO_DELAY_UDELAY is not set
# CONFIG_IO_DELAY_NONE is not set
CONFIG_DEFAULT_IO_DELAY_TYPE=0
CONFIG_DEBUG_BOOT_PARAMS=y
# CONFIG_CPA_DEBUG is not set
CONFIG_OPTIMIZE_INLINING=y
# CONFIG_DEBUG_NMI_SELFTEST is not set
# CONFIG_X86_DEBUG_STATIC_CPU_HAS is not set

#
# Security options
#
CONFIG_KEYS=y
CONFIG_PERSISTENT_KEYRINGS=y
CONFIG_BIG_KEYS=y
# CONFIG_ENCRYPTED_KEYS is not set
# CONFIG_KEYS_DEBUG_PROC_KEYS is not set
# CONFIG_SECURITY_DMESG_RESTRICT is not set
CONFIG_SECURITY=y
CONFIG_SECURITYFS=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_NETWORK_XFRM=y
# CONFIG_SECURITY_PATH is not set
CONFIG_INTEL_TXT=y
CONFIG_LSM_MMAP_MIN_ADDR=65536
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1
CONFIG_SECURITY_SELINUX_DISABLE=y
CONFIG_SECURITY_SELINUX_DEVELOP=y
CONFIG_SECURITY_SELINUX_AVC_STATS=y
CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
# CONFIG_SECURITY_SMACK is not set
# CONFIG_SECURITY_TOMOYO is not set
# CONFIG_SECURITY_APPARMOR is not set
# CONFIG_SECURITY_YAMA is not set
CONFIG_INTEGRITY=y
# CONFIG_INTEGRITY_SIGNATURE is not set
CONFIG_INTEGRITY_AUDIT=y
# CONFIG_IMA is not set
# CONFIG_EVM is not set
CONFIG_DEFAULT_SECURITY_SELINUX=y
# CONFIG_DEFAULT_SECURITY_DAC is not set
CONFIG_DEFAULT_SECURITY="selinux"
CONFIG_CRYPTO=y

#
# Crypto core or helper
#
CONFIG_CRYPTO_ALGAPI=y
CONFIG_CRYPTO_ALGAPI2=y
CONFIG_CRYPTO_AEAD=y
CONFIG_CRYPTO_AEAD2=y
CONFIG_CRYPTO_BLKCIPHER=y
CONFIG_CRYPTO_BLKCIPHER2=y
CONFIG_CRYPTO_HASH=y
CONFIG_CRYPTO_HASH2=y
CONFIG_CRYPTO_RNG=y
CONFIG_CRYPTO_RNG2=y
CONFIG_CRYPTO_PCOMP2=y
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_MANAGER2=y
# CONFIG_CRYPTO_USER is not set
# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
CONFIG_CRYPTO_GF128MUL=y
# CONFIG_CRYPTO_NULL is not set
# CONFIG_CRYPTO_PCRYPT is not set
CONFIG_CRYPTO_WORKQUEUE=y
CONFIG_CRYPTO_CRYPTD=y
# CONFIG_CRYPTO_MCRYPTD is not set
# CONFIG_CRYPTO_AUTHENC is not set
# CONFIG_CRYPTO_TEST is not set
CONFIG_CRYPTO_ABLK_HELPER=y
CONFIG_CRYPTO_GLUE_HELPER_X86=y

#
# Authenticated Encryption with Associated Data
#
# CONFIG_CRYPTO_CCM is not set
# CONFIG_CRYPTO_GCM is not set
CONFIG_CRYPTO_SEQIV=y

#
# Block modes
#
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_CTR=y
# CONFIG_CRYPTO_CTS is not set
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_LRW=y
# CONFIG_CRYPTO_PCBC is not set
CONFIG_CRYPTO_XTS=y

#
# Hash modes
#
# CONFIG_CRYPTO_CMAC is not set
CONFIG_CRYPTO_HMAC=y
# CONFIG_CRYPTO_XCBC is not set
# CONFIG_CRYPTO_VMAC is not set

#
# Digest
#
CONFIG_CRYPTO_CRC32C=y
CONFIG_CRYPTO_CRC32C_INTEL=m
# CONFIG_CRYPTO_CRC32 is not set
CONFIG_CRYPTO_CRC32_PCLMUL=m
CONFIG_CRYPTO_CRCT10DIF=y
CONFIG_CRYPTO_CRCT10DIF_PCLMUL=m
# CONFIG_CRYPTO_GHASH is not set
# CONFIG_CRYPTO_MD4 is not set
CONFIG_CRYPTO_MD5=y
# CONFIG_CRYPTO_MICHAEL_MIC is not set
# CONFIG_CRYPTO_RMD128 is not set
# CONFIG_CRYPTO_RMD160 is not set
# CONFIG_CRYPTO_RMD256 is not set
# CONFIG_CRYPTO_RMD320 is not set
CONFIG_CRYPTO_SHA1=y
# CONFIG_CRYPTO_SHA1_SSSE3 is not set
# CONFIG_CRYPTO_SHA256_SSSE3 is not set
# CONFIG_CRYPTO_SHA512_SSSE3 is not set
# CONFIG_CRYPTO_SHA1_MB is not set
CONFIG_CRYPTO_SHA256=y
# CONFIG_CRYPTO_SHA512 is not set
# CONFIG_CRYPTO_TGR192 is not set
# CONFIG_CRYPTO_WP512 is not set
CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL=m

#
# Ciphers
#
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_X86_64=y
CONFIG_CRYPTO_AES_NI_INTEL=y
# CONFIG_CRYPTO_ANUBIS is not set
# CONFIG_CRYPTO_ARC4 is not set
# CONFIG_CRYPTO_BLOWFISH is not set
# CONFIG_CRYPTO_BLOWFISH_X86_64 is not set
# CONFIG_CRYPTO_CAMELLIA is not set
# CONFIG_CRYPTO_CAMELLIA_X86_64 is not set
# CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64 is not set
# CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64 is not set
# CONFIG_CRYPTO_CAST5 is not set
# CONFIG_CRYPTO_CAST5_AVX_X86_64 is not set
# CONFIG_CRYPTO_CAST6 is not set
# CONFIG_CRYPTO_CAST6_AVX_X86_64 is not set
# CONFIG_CRYPTO_DES is not set
# CONFIG_CRYPTO_DES3_EDE_X86_64 is not set
# CONFIG_CRYPTO_FCRYPT is not set
# CONFIG_CRYPTO_KHAZAD is not set
# CONFIG_CRYPTO_SALSA20 is not set
# CONFIG_CRYPTO_SALSA20_X86_64 is not set
# CONFIG_CRYPTO_SEED is not set
# CONFIG_CRYPTO_SERPENT is not set
# CONFIG_CRYPTO_SERPENT_SSE2_X86_64 is not set
# CONFIG_CRYPTO_SERPENT_AVX_X86_64 is not set
# CONFIG_CRYPTO_SERPENT_AVX2_X86_64 is not set
# CONFIG_CRYPTO_TEA is not set
# CONFIG_CRYPTO_TWOFISH is not set
# CONFIG_CRYPTO_TWOFISH_X86_64 is not set
# CONFIG_CRYPTO_TWOFISH_X86_64_3WAY is not set
# CONFIG_CRYPTO_TWOFISH_AVX_X86_64 is not set

#
# Compression
#
# CONFIG_CRYPTO_DEFLATE is not set
# CONFIG_CRYPTO_ZLIB is not set
CONFIG_CRYPTO_LZO=y
# CONFIG_CRYPTO_LZ4 is not set
# CONFIG_CRYPTO_LZ4HC is not set

#
# Random Number Generation
#
# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_DRBG_MENU is not set
CONFIG_CRYPTO_USER_API=y
CONFIG_CRYPTO_USER_API_HASH=y
CONFIG_CRYPTO_USER_API_SKCIPHER=y
CONFIG_CRYPTO_HASH_INFO=y
CONFIG_CRYPTO_HW=y
# CONFIG_CRYPTO_DEV_PADLOCK is not set
CONFIG_CRYPTO_DEV_CCP=y
# CONFIG_CRYPTO_DEV_CCP_DD is not set
# CONFIG_CRYPTO_DEV_QAT_DH895xCC is not set
CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
CONFIG_PUBLIC_KEY_ALGO_RSA=y
CONFIG_X509_CERTIFICATE_PARSER=y
# CONFIG_PKCS7_MESSAGE_PARSER is not set
CONFIG_HAVE_KVM=y
CONFIG_VIRTUALIZATION=y
# CONFIG_KVM is not set
CONFIG_BINARY_PRINTF=y

#
# Library routines
#
CONFIG_BITREVERSE=y
CONFIG_GENERIC_STRNCPY_FROM_USER=y
CONFIG_GENERIC_STRNLEN_USER=y
CONFIG_GENERIC_NET_UTILS=y
CONFIG_GENERIC_FIND_FIRST_BIT=y
CONFIG_GENERIC_PCI_IOMAP=y
CONFIG_GENERIC_IOMAP=y
CONFIG_GENERIC_IO=y
CONFIG_PERCPU_RWSEM=y
CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y
CONFIG_ARCH_HAS_FAST_MULTIPLIER=y
# CONFIG_CRC_CCITT is not set
CONFIG_CRC16=y
CONFIG_CRC_T10DIF=y
# CONFIG_CRC_ITU_T is not set
CONFIG_CRC32=y
# CONFIG_CRC32_SELFTEST is not set
CONFIG_CRC32_SLICEBY8=y
# CONFIG_CRC32_SLICEBY4 is not set
# CONFIG_CRC32_SARWATE is not set
# CONFIG_CRC32_BIT is not set
# CONFIG_CRC7 is not set
# CONFIG_LIBCRC32C is not set
# CONFIG_CRC8 is not set
# CONFIG_AUDIT_ARCH_COMPAT_GENERIC is not set
# CONFIG_RANDOM32_SELFTEST is not set
CONFIG_ZLIB_INFLATE=y
CONFIG_ZLIB_DEFLATE=y
CONFIG_LZO_COMPRESS=y
CONFIG_LZO_DECOMPRESS=y
CONFIG_LZ4_DECOMPRESS=y
CONFIG_XZ_DEC=y
CONFIG_XZ_DEC_X86=y
CONFIG_XZ_DEC_POWERPC=y
CONFIG_XZ_DEC_IA64=y
CONFIG_XZ_DEC_ARM=y
CONFIG_XZ_DEC_ARMTHUMB=y
CONFIG_XZ_DEC_SPARC=y
CONFIG_XZ_DEC_BCJ=y
# CONFIG_XZ_DEC_TEST is not set
CONFIG_DECOMPRESS_GZIP=y
CONFIG_DECOMPRESS_BZIP2=y
CONFIG_DECOMPRESS_LZMA=y
CONFIG_DECOMPRESS_XZ=y
CONFIG_DECOMPRESS_LZO=y
CONFIG_DECOMPRESS_LZ4=y
CONFIG_GENERIC_ALLOCATOR=y
CONFIG_ASSOCIATIVE_ARRAY=y
CONFIG_HAS_IOMEM=y
CONFIG_HAS_IOPORT_MAP=y
CONFIG_HAS_DMA=y
CONFIG_CPU_RMAP=y
CONFIG_DQL=y
CONFIG_GLOB=y
# CONFIG_GLOB_SELFTEST is not set
CONFIG_NLATTR=y
CONFIG_ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE=y
CONFIG_AVERAGE=y
CONFIG_CLZ_TAB=y
# CONFIG_CORDIC is not set
# CONFIG_DDR is not set
CONFIG_MPILIB=y
CONFIG_OID_REGISTRY=y
CONFIG_UCS2_STRING=y
CONFIG_FONT_SUPPORT=y
# CONFIG_FONTS is not set
CONFIG_FONT_8x8=y
CONFIG_FONT_8x16=y
CONFIG_ARCH_HAS_SG_CHAIN=y

[-- Attachment #3: console.log --]
[-- Type: text/x-log, Size: 27280 bytes --]

[    0.000000] Initializing cgroup subsys cpu
[    0.000000] Initializing cgroup subsys cpuacct
[    0.000000] Linux version 3.19.0+ (avagin@localhost.localdomain) (gcc version 4.9.2 20150212 (Red Hat 4.9.2-6) (GCC) ) #169 SMP Wed Feb 25 15:20:44 MSK 2015
[    0.000000] Command line: BOOT_IMAGE=/vmlinuz-3.19.0+ root=UUID=92dcf04a-50ae-458c-851c-6aa51a139db7 ro rd.md=0 rd.lvm=0 rd.dm=0 rd.luks=0 vconsole.keymap=us console=ttyS0,115200 LANG=en_US.UTF-8
[    0.000000] e820: BIOS-provided physical RAM map:
[    0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009fbff] usable
[    0.000000] BIOS-e820: [mem 0x000000000009fc00-0x000000000009ffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000000f0000-0x00000000000fffff] reserved
[    0.000000] BIOS-e820: [mem 0x0000000000100000-0x000000007fffdfff] usable
[    0.000000] BIOS-e820: [mem 0x000000007fffe000-0x000000007fffffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000feffc000-0x00000000feffffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000fffc0000-0x00000000ffffffff] reserved
[    0.000000] NX (Execute Disable) protection: active
[    0.000000] SMBIOS 2.4 present.
[    0.000000] Hypervisor detected: KVM
[    0.000000] e820: last_pfn = 0x7fffe max_arch_pfn = 0x400000000
[    0.000000] PAT not supported by CPU.
[    0.000000] found SMP MP-table at [mem 0x000f1e20-0x000f1e2f] mapped at [ffff8800000f1e20]
[    0.000000] init_memory_mapping: [mem 0x00000000-0x000fffff]
[    0.000000] init_memory_mapping: [mem 0x7fc00000-0x7fdfffff]
[    0.000000] init_memory_mapping: [mem 0x60000000-0x7fbfffff]
[    0.000000] init_memory_mapping: [mem 0x40000000-0x5fffffff]
[    0.000000] init_memory_mapping: [mem 0x00100000-0x3fffffff]
[    0.000000] init_memory_mapping: [mem 0x7fe00000-0x7fffdfff]
[    0.000000] RAMDISK: [mem 0x36d62000-0x376a8fff]
[    0.000000] ACPI: Early table checksum verification disabled
[    0.000000] ACPI: RSDP 0x00000000000F1C50 000014 (v00 BOCHS )
[    0.000000] ACPI: RSDT 0x000000007FFFE340 000034 (v01 BOCHS  BXPCRSDT 00000001 BXPC 00000001)
[    0.000000] ACPI: FACP 0x000000007FFFFF80 000074 (v01 BOCHS  BXPCFACP 00000001 BXPC 00000001)
[    0.000000] ACPI: DSDT 0x000000007FFFE380 001135 (v01 BOCHS  BXPCDSDT 00000001 BXPC 00000001)
[    0.000000] ACPI: FACS 0x000000007FFFFF40 000040
[    0.000000] ACPI: SSDT 0x000000007FFFF600 00093C (v01 BOCHS  BXPCSSDT 00000001 BXPC 00000001)
[    0.000000] ACPI: APIC 0x000000007FFFF500 000090 (v01 BOCHS  BXPCAPIC 00000001 BXPC 00000001)
[    0.000000] ACPI: HPET 0x000000007FFFF4C0 000038 (v01 BOCHS  BXPCHPET 00000001 BXPC 00000001)
[    0.000000] No NUMA configuration found
[    0.000000] Faking a node at [mem 0x0000000000000000-0x000000007fffdfff]
[    0.000000] NODE_DATA(0) allocated [mem 0x7ffe9000-0x7fffdfff]
[    0.000000] kvm-clock: Using msrs 4b564d01 and 4b564d00
[    0.000000] kvm-clock: cpu 0, msr 0:7ffe8001, primary cpu clock
[    0.000000] Zone ranges:
[    0.000000]   DMA      [mem 0x00001000-0x00ffffff]
[    0.000000]   DMA32    [mem 0x01000000-0x7fffdfff]
[    0.000000]   Normal   empty
[    0.000000] Movable zone start for each node
[    0.000000] Early memory node ranges
[    0.000000]   node   0: [mem 0x00001000-0x0009efff]
[    0.000000]   node   0: [mem 0x00100000-0x7fffdfff]
[    0.000000] Initmem setup node 0 [mem 0x00001000-0x7fffdfff]
[    0.000000] ACPI: PM-Timer IO Port: 0xb008
[    0.000000] ACPI: LAPIC (acpi_id[0x00] lapic_id[0x00] enabled)
[    0.000000] ACPI: LAPIC (acpi_id[0x01] lapic_id[0x01] enabled)
[    0.000000] ACPI: LAPIC (acpi_id[0x02] lapic_id[0x02] enabled)
[    0.000000] ACPI: LAPIC (acpi_id[0x03] lapic_id[0x03] enabled)
[    0.000000] ACPI: LAPIC_NMI (acpi_id[0xff] dfl dfl lint[0x1])
[    0.000000] ACPI: IOAPIC (id[0x00] address[0xfec00000] gsi_base[0])
[    0.000000] IOAPIC[0]: apic_id 0, version 17, address 0xfec00000, GSI 0-23
[    0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)
[    0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 5 global_irq 5 high level)
[    0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level)
[    0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 10 global_irq 10 high level)
[    0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 11 global_irq 11 high level)
[    0.000000] Using ACPI (MADT) for SMP configuration information
[    0.000000] ACPI: HPET id: 0x8086a201 base: 0xfed00000
[    0.000000] smpboot: Allowing 4 CPUs, 0 hotplug CPUs
[    0.000000] PM: Registered nosave memory: [mem 0x00000000-0x00000fff]
[    0.000000] PM: Registered nosave memory: [mem 0x0009f000-0x0009ffff]
[    0.000000] PM: Registered nosave memory: [mem 0x000a0000-0x000effff]
[    0.000000] PM: Registered nosave memory: [mem 0x000f0000-0x000fffff]
[    0.000000] e820: [mem 0x80000000-0xfeffbfff] available for PCI devices
[    0.000000] Booting paravirtualized kernel on KVM
[    0.000000] setup_percpu: NR_CPUS:8 nr_cpumask_bits:8 nr_cpu_ids:4 nr_node_ids:1
[    0.000000] PERCPU: Embedded 32 pages/cpu @ffff88007fc00000 s91840 r8192 d31040 u524288
[    0.000000] KVM setup async PF for cpu 0
[    0.000000] kvm-stealtime: cpu 0, msr 7fc0e440
[    0.000000] Built 1 zonelists in Node order, mobility grouping on.  Total pages: 515975
[    0.000000] Policy zone: DMA32
[    0.000000] Kernel command line: BOOT_IMAGE=/vmlinuz-3.19.0+ root=UUID=92dcf04a-50ae-458c-851c-6aa51a139db7 ro rd.md=0 rd.lvm=0 rd.dm=0 rd.luks=0 vconsole.keymap=us console=ttyS0,115200 LANG=en_US.UTF-8
[    0.000000] PID hash table entries: 4096 (order: 3, 32768 bytes)
[    0.000000] Memory: 2023852K/2096752K available (7939K kernel code, 1305K rwdata, 3240K rodata, 1468K init, 14188K bss, 72900K reserved, 0K cma-reserved)
[    0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=4, Nodes=1
[    0.000000] Hierarchical RCU implementation.
[    0.000000] 	RCU lockdep checking is enabled.
[    0.000000] 	RCU restricting CPUs from NR_CPUS=8 to nr_cpu_ids=4.
[    0.000000] RCU: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=4
[    0.000000] Running RCU self tests
[    0.000000] NR_IRQS:4352 nr_irqs:456 16
[    0.000000] 	Offload RCU callbacks from all CPUs
[    0.000000] 	Offload RCU callbacks from CPUs: 0-3.
[    0.000000] Console: colour VGA+ 80x25
[    0.000000] console [ttyS0] enabled
[    0.000000] Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar
[    0.000000] ... MAX_LOCKDEP_SUBCLASSES:  8
[    0.000000] ... MAX_LOCK_DEPTH:          48
[    0.000000] ... MAX_LOCKDEP_KEYS:        8191
[    0.000000] ... CLASSHASH_SIZE:          4096
[    0.000000] ... MAX_LOCKDEP_ENTRIES:     32768
[    0.000000] ... MAX_LOCKDEP_CHAINS:      65536
[    0.000000] ... CHAINHASH_SIZE:          32768
[    0.000000]  memory used by lock dependency info: 8159 kB
[    0.000000]  per task-struct memory footprint: 1920 bytes
[    0.000000] tsc: Detected 2491.904 MHz processor
[    0.002000] Calibrating delay loop (skipped) preset value.. 4983.80 BogoMIPS (lpj=2491904)
[    0.002271] pid_max: default: 32768 minimum: 301
[    0.002706] ACPI: Core revision 20141107
[    0.005049] ACPI: All ACPI Tables successfully acquired
[    0.005662] Security Framework initialized
[    0.006009] SELinux:  Initializing.
[    0.007012] Dentry cache hash table entries: 262144 (order: 9, 2097152 bytes)
[    0.008332] Inode-cache hash table entries: 131072 (order: 8, 1048576 bytes)
[    0.009164] Mount-cache hash table entries: 4096 (order: 3, 32768 bytes)
[    0.009787] Mountpoint-cache hash table entries: 4096 (order: 3, 32768 bytes)
[    0.011831] Initializing cgroup subsys memory
[    0.012014] Initializing cgroup subsys devices
[    0.012445] Initializing cgroup subsys freezer
[    0.013024] Initializing cgroup subsys net_cls
[    0.013440] Initializing cgroup subsys blkio
[    0.014007] Initializing cgroup subsys perf_event
[    0.014561] Initializing cgroup subsys net_prio
[    0.015008] Initializing cgroup subsys hugetlb
[    0.015534] mce: CPU supports 10 MCE banks
[    0.016040] Last level iTLB entries: 4KB 0, 2MB 0, 4MB 0
[    0.016040] Last level dTLB entries: 4KB 0, 2MB 0, 4MB 0, 1GB 0
[    0.017348] Freeing SMP alternatives memory: 24K (ffffffff81eb7000 - ffffffff81ebd000)
[    0.021651] ftrace: allocating 26766 entries in 105 pages
[    0.029668] ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1
[    0.031000] smpboot: CPU0: Intel QEMU Virtual CPU version 2.1.3 (fam: 06, model: 02, stepping: 03)
[    0.031234] Performance Events: unsupported p6 CPU model 2 no PMU driver, software events only.
[    0.033701] NMI watchdog: disabled (cpu0): hardware events not enabled
[    0.034374] x86: Booting SMP configuration:
[    0.035003] .... node  #0, CPUs:      #1
[    0.002000] kvm-clock: cpu 1, msr 0:7ffe8041, secondary cpu clock
[    0.049090] KVM setup async PF for cpu 1
[    0.049820]  #2
[    0.049820] kvm-stealtime: cpu 1, msr 7fc8e440
[    0.002000] kvm-clock: cpu 2, msr 0:7ffe8081, secondary cpu clock
[    0.062051] KVM setup async PF for cpu 2
[    0.062789]  #3
[    0.062789] kvm-stealtime: cpu 2, msr 7fd0e440
[    0.002000] kvm-clock: cpu 3, msr 0:7ffe80c1, secondary cpu clock
[    0.076069] x86: Booted up 1 node, 4 CPUs
[    0.076047] KVM setup async PF for cpu 3
[    0.076051] kvm-stealtime: cpu 3, msr 7fd8e440
[    0.077004] smpboot: Total of 4 processors activated (19935.23 BogoMIPS)
[    0.079568] devtmpfs: initialized
[    0.083153] atomic64_test: passed for x86-64 platform with CX8 and with SSE
[    0.083761] pinctrl core: initialized pinctrl subsystem
[    0.084267] RTC time: 12:24:47, date: 02/25/15
[    0.085317] NET: Registered protocol family 16
[    0.089113] cpuidle: using governor menu
[    0.089686] ACPI: bus type PCI registered
[    0.090003] acpiphp: ACPI Hot Plug PCI Controller Driver version: 0.5
[    0.091062] PCI: Using configuration type 1 for base access
[    0.099193] ACPI: Added _OSI(Module Device)
[    0.099642] ACPI: Added _OSI(Processor Device)
[    0.100004] ACPI: Added _OSI(3.0 _SCP Extensions)
[    0.100469] ACPI: Added _OSI(Processor Aggregator Device)
[    0.105832] ACPI: Interpreter enabled
[    0.106011] ACPI Exception: AE_NOT_FOUND, While evaluating Sleep State [\_S1_] (20141107/hwxface-580)
[    0.107224] ACPI Exception: AE_NOT_FOUND, While evaluating Sleep State [\_S2_] (20141107/hwxface-580)
[    0.108260] ACPI: (supports S0 S3 S4 S5)
[    0.108650] ACPI: Using IOAPIC for interrupt routing
[    0.109034] PCI: Using host bridge windows from ACPI; if necessary, use "pci=nocrs" and report a bug
[    0.116197] ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
[    0.116797] acpi PNP0A03:00: _OSC: OS supports [ASPM ClockPM Segments MSI]
[    0.117051] acpi PNP0A03:00: _OSC failed (AE_NOT_FOUND); disabling ASPM
[    0.118170] acpi PNP0A03:00: fail to add MMCONFIG information, can't access extended PCI configuration space under this bridge.
[    0.120664] acpiphp: Slot [3] registered
[    0.121051] acpiphp: Slot [4] registered
[    0.121459] acpiphp: Slot [5] registered
[    0.121885] acpiphp: Slot [6] registered
[    0.122043] acpiphp: Slot [7] registered
[    0.122475] acpiphp: Slot [8] registered
[    0.123036] acpiphp: Slot [9] registered
[    0.123450] acpiphp: Slot [10] registered
[    0.124041] acpiphp: Slot [11] registered
[    0.124464] acpiphp: Slot [12] registered
[    0.124897] acpiphp: Slot [13] registered
[    0.125035] acpiphp: Slot [14] registered
[    0.125467] acpiphp: Slot [15] registered
[    0.126035] acpiphp: Slot [16] registered
[    0.126462] acpiphp: Slot [17] registered
[    0.127061] acpiphp: Slot [18] registered
[    0.127482] acpiphp: Slot [19] registered
[    0.128017] acpiphp: Slot [20] registered
[    0.128442] acpiphp: Slot [21] registered
[    0.128878] acpiphp: Slot [22] registered
[    0.129036] acpiphp: Slot [23] registered
[    0.129467] acpiphp: Slot [24] registered
[    0.130035] acpiphp: Slot [25] registered
[    0.130446] acpiphp: Slot [26] registered
[    0.131024] acpiphp: Slot [27] registered
[    0.131450] acpiphp: Slot [28] registered
[    0.131884] acpiphp: Slot [29] registered
[    0.132056] acpiphp: Slot [30] registered
[    0.132490] acpiphp: Slot [31] registered
[    0.133012] PCI host bridge to bus 0000:00
[    0.133407] pci_bus 0000:00: root bus resource [bus 00-ff]
[    0.134004] pci_bus 0000:00: root bus resource [io  0x0000-0x0cf7]
[    0.134601] pci_bus 0000:00: root bus resource [io  0x0d00-0xffff]
[    0.135003] pci_bus 0000:00: root bus resource [mem 0x000a0000-0x000bffff]
[    0.135668] pci_bus 0000:00: root bus resource [mem 0x80000000-0xfebfffff]
[    0.144027] pci 0000:00:01.1: legacy IDE quirk: reg 0x10: [io  0x01f0-0x01f7]
[    0.145003] pci 0000:00:01.1: legacy IDE quirk: reg 0x14: [io  0x03f6]
[    0.145626] pci 0000:00:01.1: legacy IDE quirk: reg 0x18: [io  0x0170-0x0177]
[    0.146003] pci 0000:00:01.1: legacy IDE quirk: reg 0x1c: [io  0x0376]
[    0.154250] pci 0000:00:01.3: quirk: [io  0xb000-0xb03f] claimed by PIIX4 ACPI
[    0.154936] pci 0000:00:01.3: quirk: [io  0xb100-0xb10f] claimed by PIIX4 SMB
[    0.215311] ACPI: PCI Interrupt Link [LNKA] (IRQs 5 *10 11)
[    0.216079] ACPI: PCI Interrupt Link [LNKB] (IRQs 5 *10 11)
[    0.216817] ACPI: PCI Interrupt Link [LNKC] (IRQs 5 10 *11)
[    0.217281] ACPI: PCI Interrupt Link [LNKD] (IRQs 5 10 *11)
[    0.218235] ACPI: PCI Interrupt Link [LNKS] (IRQs *9)
[    0.219513] ACPI: Enabled 16 GPEs in block 00 to 0F
[    0.220297] vgaarb: setting as boot device: PCI:0000:00:02.0
[    0.220632] vgaarb: device added: PCI:0000:00:02.0,decodes=io+mem,owns=io+mem,locks=none
[    0.221006] vgaarb: loaded
[    0.222003] vgaarb: bridge control possible 0000:00:02.0
[    0.222641] SCSI subsystem initialized
[    0.223124] ACPI: bus type USB registered
[    0.223573] usbcore: registered new interface driver usbfs
[    0.224024] usbcore: registered new interface driver hub
[    0.225046] usbcore: registered new device driver usb
[    0.225673] PCI: Using ACPI for IRQ routing
[    0.226658] NetLabel: Initializing
[    0.227003] NetLabel:  domain hash size = 128
[    0.227417] NetLabel:  protocols = UNLABELED CIPSOv4
[    0.228050] NetLabel:  unlabeled traffic allowed by default
[    0.228686] HPET: 3 timers in total, 0 timers will be used for per-cpu timer
[    0.229016] hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0
[    0.230080] hpet0: 3 comparators, 64-bit 100.000000 MHz counter
[    0.234079] Switched to clocksource kvm-clock
[    0.265138] pnp: PnP ACPI init
[    0.266332] pnp: PnP ACPI: found 5 devices
[    0.278469] NET: Registered protocol family 2
[    0.279371] TCP established hash table entries: 16384 (order: 5, 131072 bytes)
[    0.280566] TCP bind hash table entries: 16384 (order: 8, 1048576 bytes)
[    0.282091] TCP: Hash tables configured (established 16384 bind 16384)
[    0.282771] TCP: reno registered
[    0.283128] UDP hash table entries: 1024 (order: 5, 163840 bytes)
[    0.283840] UDP-Lite hash table entries: 1024 (order: 5, 163840 bytes)
[    0.284719] NET: Registered protocol family 1
[    0.285175] pci 0000:00:00.0: Limiting direct PCI/PCI transfers
[    0.285764] pci 0000:00:01.0: PIIX3: Enabling Passive Release
[    0.286338] pci 0000:00:01.0: Activating ISA DMA hang workarounds
[    0.287459] ACPI: PCI Interrupt Link [LNKD] enabled at IRQ 11
[    0.289171] Unpacking initramfs...
[    0.448445] Freeing initrd memory: 9500K (ffff880036d62000 - ffff8800376a9000)
[    0.451876] futex hash table entries: 1024 (order: 5, 131072 bytes)
[    0.452560] Initialise system trusted keyring
[    0.453095] audit: initializing netlink subsys (disabled)
[    0.453665] audit: type=2000 audit(1424867087.915:1): initialized
[    0.454804] HugeTLB registered 2 MB page size, pre-allocated 0 pages
[    0.460277] zpool: loaded
[    0.460562] zbud: loaded
[    0.461694] VFS: Disk quotas dquot_6.5.2
[    0.462218] VFS: Dquot-cache hash table entries: 512 (order 0, 4096 bytes)
[    0.464532] Key type big_key registered
[    0.467597] alg: No test for stdrng (krng)
[    0.468064] NET: Registered protocol family 38
[    0.468538] Key type asymmetric registered
[    0.469071] Asymmetric key parser 'x509' registered
[    0.469900] Block layer SCSI generic (bsg) driver version 0.4 loaded (major 252)
[    0.471073] io scheduler noop registered
[    0.471674] io scheduler deadline registered
[    0.472483] io scheduler cfq registered (default)
[    0.473344] pci_hotplug: PCI Hot Plug PCI Core version: 0.5
[    0.473994] pciehp: PCI Express Hot Plug Controller Driver version: 0.4
[    0.474883] input: Power Button as /devices/LNXSYSTM:00/LNXPWRBN:00/input/input0
[    0.475635] ACPI: Power Button [PWRF]
[    0.476706] GHES: HEST is not enabled!
[    0.477762] ACPI: PCI Interrupt Link [LNKC] enabled at IRQ 10
[    0.480284] ACPI: PCI Interrupt Link [LNKA] enabled at IRQ 10
[    0.482318] ACPI: PCI Interrupt Link [LNKB] enabled at IRQ 11
[    0.484051] Serial: 8250/16550 driver, 4 ports, IRQ sharing enabled
[    0.507927] 00:04: ttyS0 at I/O 0x3f8 (irq = 4, base_baud = 115200) is a 16550A
[    0.514033] Non-volatile memory driver v1.3
[    0.516820]  vda: vda1
[    0.519861] scsi host0: ata_piix
[    0.520703] scsi host1: ata_piix
[    0.521145] ata1: PATA max MWDMA2 cmd 0x1f0 ctl 0x3f6 bmdma 0xc0c0 irq 14
[    0.522065] ata2: PATA max MWDMA2 cmd 0x170 ctl 0x376 bmdma 0xc0c8 irq 15
[    0.522971] libphy: Fixed MDIO Bus: probed
[    0.523884] ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver
[    0.524528] ehci-pci: EHCI PCI platform driver
[    0.524986] ohci_hcd: USB 1.1 'Open' Host Controller (OHCI) Driver
[    0.525592] ohci-pci: OHCI PCI platform driver
[    0.526058] uhci_hcd: USB Universal Host Controller Interface driver
[    0.528212] uhci_hcd 0000:00:01.2: UHCI Host Controller
[    0.529181] uhci_hcd 0000:00:01.2: new USB bus registered, assigned bus number 1
[    0.529969] uhci_hcd 0000:00:01.2: detected 2 ports
[    0.530568] uhci_hcd 0000:00:01.2: irq 11, io base 0x0000c040
[    0.531556] usb usb1: New USB device found, idVendor=1d6b, idProduct=0001
[    0.532245] usb usb1: New USB device strings: Mfr=3, Product=2, SerialNumber=1
[    0.532959] usb usb1: Product: UHCI Host Controller
[    0.533472] usb usb1: Manufacturer: Linux 3.19.0+ uhci_hcd
[    0.533994] usb usb1: SerialNumber: 0000:00:01.2
[    0.535167] hub 1-0:1.0: USB hub found
[    0.535615] hub 1-0:1.0: 2 ports detected
[    0.536752] usbcore: registered new interface driver usbserial
[    0.537341] usbcore: registered new interface driver usbserial_generic
[    0.538084] usbserial: USB Serial support registered for generic
[    0.538758] i8042: PNP: PS/2 Controller [PNP0303:KBD,PNP0f13:MOU] at 0x60,0x64 irq 1,12
[    0.540327] serio: i8042 KBD port at 0x60,0x64 irq 1
[    0.540978] serio: i8042 AUX port at 0x60,0x64 irq 12
[    0.542002] mousedev: PS/2 mouse device common for all mice
[    0.543550] rtc_cmos 00:00: RTC can wake from S4
[    0.544435] input: AT Translated Set 2 keyboard as /devices/platform/i8042/serio0/input/input1
[    0.544803] rtc_cmos 00:00: rtc core: registered rtc_cmos as rtc0
[    0.545089] rtc_cmos 00:00: alarms up to one day, 114 bytes nvram, hpet irqs
[    0.545376] device-mapper: uevent: version 1.0.3
[    0.545686] device-mapper: ioctl: 4.29.0-ioctl (2014-10-28) initialised: dm-devel@redhat.com
[    0.548525] hidraw: raw HID events driver (C) Jiri Kosina
[    0.548919] usbcore: registered new interface driver usbhid
[    0.548920] usbhid: USB HID core driver
[    0.549024] drop_monitor: Initializing network drop monitor service
[    0.549257] ip_tables: (C) 2000-2006 Netfilter Core Team
[    0.551065] TCP: cubic registered
[    0.551075] Initializing XFRM netlink socket
[    0.551611] NET: Registered protocol family 10
[    0.552453] mip6: Mobile IPv6
[    0.552476] NET: Registered protocol family 17
[    0.553735] Loading compiled-in X.509 certificates
[    0.555047] Loaded X.509 cert 'Magrathea: Glacier signing key: 186426f37a9cad9c2f80b301b81beedeabae47f5'
[    0.555097] registered taskstats version 1
[    0.556205]   Magic number: 3:500:431
[    0.556206]   hash matches drivers/base/power/main.c:480
[    0.556953] rtc_cmos 00:00: setting system clock to 2015-02-25 12:24:47 UTC (1424867087)
[    0.675840] ata1.00: ATA-7: QEMU HARDDISK, 2.1.3, max UDMA/100
[    0.677452] ata1.00: 41943040 sectors, multi 16: LBA48 
[    0.679849] ata1.00: configured for MWDMA2
[    0.682567] scsi 0:0:0:0: Direct-Access     ATA      QEMU HARDDISK    3    PQ: 0 ANSI: 5
[    0.688494] sd 0:0:0:0: [sda] 41943040 512-byte logical blocks: (21.4 GB/20.0 GiB)
[    0.688554] sd 0:0:0:0: Attached scsi generic sg0 type 0
[    0.691699] sd 0:0:0:0: [sda] Write Protect is off
[    0.692939] sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[    0.698177]  sda: sda1 sda2 sda3
[    0.702710] sd 0:0:0:0: [sda] Attached SCSI disk
[    0.705634] Freeing unused kernel memory: 1468K (ffffffff81d48000 - ffffffff81eb7000)
[    0.707104] Write protecting the kernel read-only data: 12288k
[    0.709092] Freeing unused kernel memory: 240K (ffff8800017c4000 - ffff880001800000)
[    0.711252] Freeing unused kernel memory: 856K (ffff880001b2a000 - ffff880001c00000)
[    0.724818] systemd[1]: [/etc/systemd/system.conf:28] Unknown lvalue 'DefaultCPUAccounting' in section 'Manager'
[    0.732475] systemd[1]: systemd 208 running in system mode. (+PAM +LIBWRAP +AUDIT +SELINUX +IMA +SYSVINIT +LIBCRYPTSETUP +GCRYPT +ACL +XZ)
[    0.735573] systemd[1]: Detected virtualization 'kvm'.
[    0.736854] systemd[1]: Running in initial RAM disk.
[    0.741564] systemd[1]: Set hostname to <avagin-fc19-cr>.
[    0.753272] random: systemd urandom read with 15 bits of entropy available
[    0.813534] systemd[1]: Expecting device dev-disk-by\x2duuid-92dcf04a\x2d50ae\x2d458c\x2d851c\x2d6aa51a139db7.device...
[    0.815907] systemd[1]: Starting -.slice.
[    0.818175] systemd[1]: Created slice -.slice.
[    0.818670] systemd[1]: Starting System Slice.
[    0.820111] systemd[1]: Created slice System Slice.
[    0.820602] systemd[1]: Starting Slices.
[    0.821682] systemd[1]: Reached target Slices.
[    0.822223] systemd[1]: Starting Timers.
[    0.823261] systemd[1]: Reached target Timers.
[    0.823714] systemd[1]: Starting Journal Socket.
[    0.825068] systemd[1]: Listening on Journal Socket.
[    0.825895] systemd[1]: Starting dracut cmdline hook...
[    0.828338] systemd[1]: Starting Create list of required static device nodes for the current kernel...
[    0.831483] systemd[1]: Starting Setup Virtual Console...
[    0.833838] systemd[1]: Starting Journal Service...
[    0.839129] systemd[1]: Started Journal Service.
[    0.923438] systemd-udevd[151]: starting version 208
[    1.185038] PM: Starting manual resume from disk
[    1.213086] EXT4-fs (sda3): INFO: recovery required on readonly filesystem
[    1.213766] EXT4-fs (sda3): write access will be enabled during recovery
[    1.241453] EXT4-fs (sda3): recovery complete
[    1.242972] EXT4-fs (sda3): mounted filesystem with ordered data mode. Opts: (null)
[    1.416228] input: ImExPS/2 Generic Explorer Mouse as /devices/platform/i8042/serio1/input/input3
[    1.451294] tsc: Refined TSC clocksource calibration: 2491.847 MHz
[    1.473032] systemd-journald[108]: Received SIGTERM
[    1.557945] SELinux:  Disabled at runtime.
[    1.582139] audit: type=1404 audit(1424867088.525:2): selinux=0 auid=4294967295 ses=4294967295
[    1.595957] systemd[1]: [/etc/systemd/system.conf:28] Unknown lvalue 'DefaultCPUAccounting' in section 'Manager'
[    1.892650] random: nonblocking pool is initialized
[    1.939923] systemd-fsck[283]: /dev/sda3: clean, 280790/1022000 files, 3712499/4082432 blocks\r
[    1.955153] systemd-udevd[298]: starting version 208
[    1.978584] EXT4-fs (sda3): re-mounted. Opts: (null)
[    2.219244] piix4_smbus 0000:00:01.3: SMBus Host Controller at 0xb100, revision 0
[    2.245492] microcode: CPU0 sig=0x623, pf=0x0, revision=0x1
[    2.248544] microcode: CPU1 sig=0x623, pf=0x0, revision=0x1
[    2.249514] microcode: CPU2 sig=0x623, pf=0x0, revision=0x1
[    2.250631] microcode: CPU3 sig=0x623, pf=0x0, revision=0x1
[    2.252063] microcode: Microcode Update Driver: v2.00 <tigran@aivazian.fsnet.co.uk>, Peter Oruba
[    2.344453] systemd-fsck[348]: /dev/sda1: recovering journal\r
[    2.384577] Adding 4128764k swap on /dev/sda2.  Priority:-1 extents:1 across:4128764k FS
[    2.393586] systemd-fsck[348]: /dev/sda1: clean, 384/128016 files, 399662/512000 blocks\r
[    2.415224] EXT4-fs (sda1): mounted filesystem with ordered data mode. Opts: (null)
[    2.428124] systemd-journald[284]: Received request to flush runtime journal from PID 1
[    2.508252] traps: systemd-cgroups[380] general protection ip:7f68ad096028 sp:7fffba298af8 error:0 in ld-2.18.so[7f68ad07e000+20000][^[[32m  OK  ^[[0m
[    2.600179] traps: systemd-cgroups[384] general protection ip:7f11b9a9c028 sp:7fff4420f978 error:0 in ld-2.18.so[7f11b9a84000+20000]
[    2.743790] traps: systemd-cgroups[392] general protection ip:7f7f40a44028 sp:7fffe1c1b8b8 error:0 in ld-2.18.so[7f7f40a2c000+20000]
[    2.754576] traps: systemd-cgroups[393] general protection ip:7fd1314bd028 sp:7ffff76ecc88 error:0 in ld-2.18.so[7fd1314a5000+20000]
[    2.765343] traps: systemd-cgroups[396] general protection ip:7ff4537b7028 sp:7fff05902378 error:0 in ld-2.18.so[7ff45379f000+20000]
[    2.798782] traps: systemd-cgroups[399] general protection ip:7f4d5bc9c028 sp:7fff35cb3a48 error:0 in ld-2.18.so[7f4d5bc84000+20000]
[    3.376298] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
[    3.376298] 
[    3.377199] CPU: 2 PID: 1 Comm: systemd Not tainted 3.19.0+ #169
[    3.377199] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[    3.377199]  0000000000000000 00000000302f3f16 ffff88007c88bc48 ffffffff817b2f1a
[    3.377199]  0000000000000000 ffffffff81a348f8 ffff88007c88bcc8 ffffffff817b14d8
[    3.377199]  ffff880000000010 ffff88007c88bcd8 ffff88007c88bc78 00000000302f3f16
[    3.377199] Call Trace:
[    3.377199]  [<ffffffff817b2f1a>] dump_stack+0x45/0x57
[    3.377199]  [<ffffffff817b14d8>] panic+0xd5/0x20e
[    3.377199]  [<ffffffff8109c855>] do_exit+0xb15/0xb20
[    3.377199]  [<ffffffff8109c8fe>] do_group_exit+0x4e/0xc0
[    3.377199]  [<ffffffff810aa751>] get_signal+0x271/0x860
[    3.377199]  [<ffffffff81015547>] do_signal+0x37/0x760
[    3.377199]  [<ffffffff810cc850>] ? wake_up_state+0x20/0x20
[    3.377199]  [<ffffffff817bbb2c>] ? int_very_careful+0x5/0xd
[    3.377199]  [<ffffffff810ee08d>] ? trace_hardirqs_on_caller+0x13d/0x1e0
[    3.377199]  [<ffffffff81015cd0>] do_notify_resume+0x60/0x70
[    3.377199]  [<ffffffff817bbb7f>] int_signal+0x12/0x17
[    3.377199] Kernel Offset: 0x0 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffff9fffffff)
[    3.377199] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
[    3.377199] 

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-25 12:37   ` Andrey Wagin
@ 2015-02-25 13:55     ` Denys Vlasenko
  2015-02-25 14:48       ` Sabrina Dubroca
  2015-02-25 16:52     ` Denys Vlasenko
  2015-02-25 18:42     ` Denys Vlasenko
  2 siblings, 1 reply; 130+ messages in thread
From: Denys Vlasenko @ 2015-02-25 13:55 UTC (permalink / raw)
  To: Andrey Wagin
  Cc: Andy Lutomirski, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, LKML

On 02/25/2015 01:37 PM, Andrey Wagin wrote:
> 2015-02-13 0:54 GMT+03:00 Denys Vlasenko <dvlasenk@redhat.com>:
> My test vm doesn't boot with this patch. Could you help to investigate
> this issue?

Hi Andrey, thanks for testing!

> I have attached a kernel config and console log.

Looking at the logs, it seems that regular syscalls do work:
systemd managed to function for some time, even spawned
a few children.

It might be that the bug is somewhere in signal delivery code.
This would explain why oops got delayed.


I am trying to reproduce it. My gcc seems to be a bit old -
it can't digest CONFIG_CC_STACKPROTECTOR_STRONG=y in your .config.

I switched to using "only" CONFIG_CC_STACKPROTECTOR_REGULAR=y:

CONFIG_CC_STACKPROTECTOR=y
# CONFIG_CC_STACKPROTECTOR_NONE is not set
CONFIG_CC_STACKPROTECTOR_REGULAR=y
# CONFIG_CC_STACKPROTECTOR_STRONG is not set

and resulting kernel works for me.

Can you send me your bzImage (off-list, of course)?

I'll send you my qemu test setup, can you check whether crash
is happening for you when you run your kernel in it?

-- 
vda


^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-25 13:55     ` Denys Vlasenko
@ 2015-02-25 14:48       ` Sabrina Dubroca
  0 siblings, 0 replies; 130+ messages in thread
From: Sabrina Dubroca @ 2015-02-25 14:48 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Andrey Wagin, Andy Lutomirski, Linus Torvalds, Oleg Nesterov,
	Borislav Petkov, H. Peter Anvin, Frederic Weisbecker, X86 ML,
	Alexei Starovoitov, Will Drewry, Kees Cook, LKML

Hello,

I'm seeing the same symptoms on next-2015022{4,5}, also with systemd in a VM:

traps: fsck[99] general protection ip:7fccb2401270 sp:7fffea3b8938 error:0 in libc-2.21.so[7fccb2349000+199000]
traps: systemd-cgroups[100] general protection ip:7fdd8ff784f8 sp:7ffcf6e27ad8 error:0 in ld-2.21.so[7fdd8ff60000+22000]
traps: systemd-cgroups[94] general protection ip:7f9f23bd24f8 sp:7ffff4fc5578 error:0 in ld-2.21.so[7f9f23bba000+22000]
traps: systemd-cgroups[102] general protection ip:7f211e6574f8 sp:7ffdb8e0d538 error:0 in ld-2.21.so[7f211e63f000+22000]
traps: systemd-cgroups[103] general protection ip:7f80627c34f8 sp:7ffc7fa4cff8 error:0 in ld-2.21.so[7f80627ab000+22000]


2015-02-25, 14:55:34 +0100, Denys Vlasenko wrote:
> On 02/25/2015 01:37 PM, Andrey Wagin wrote:
> > 2015-02-13 0:54 GMT+03:00 Denys Vlasenko <dvlasenk@redhat.com>:
> > My test vm doesn't boot with this patch. Could you help to investigate
> > this issue?
> 
> Hi Andrey, thanks for testing!
> 
> > I have attached a kernel config and console log.
> 
> Looking at the logs, it seems that regular syscalls do work:
> systemd managed to function for some time, even spawned
> a few children.
> 
> It might be that the bug is somewhere in signal delivery code.
> This would explain why oops got delayed.

It doesn't oops here, it just tries to load other bits of systemd and hangs.
I've noticed that "ip:" - "the address after ld-2.21.so[" is always
the same value, I don't know if that's expected or relevant.

(full log below)

> I am trying to reproduce it. My gcc seems to be a bit old -
> it can't digest CONFIG_CC_STACKPROTECTOR_STRONG=y in your .config.
> 
> I switched to using "only" CONFIG_CC_STACKPROTECTOR_REGULAR=y:
> 
> CONFIG_CC_STACKPROTECTOR=y
> # CONFIG_CC_STACKPROTECTOR_NONE is not set
> CONFIG_CC_STACKPROTECTOR_REGULAR=y
> # CONFIG_CC_STACKPROTECTOR_STRONG is not set
> 
> and resulting kernel works for me.

I don't have any STACKPROTECTOR in my config:

# CONFIG_CC_STACKPROTECTOR is not set
CONFIG_CC_STACKPROTECTOR_NONE=y
# CONFIG_CC_STACKPROTECTOR_REGULAR is not set
# CONFIG_CC_STACKPROTECTOR_STRONG is not set

(full config after the log)


I can start systemd's emergency shell (systemd.unit=emergency.target),
if running test programs helps.


Thanks,
Sabrina

[    0.000000] Initializing cgroup subsys cpuset
[    0.000000] Initializing cgroup subsys cpu
[    0.000000] Initializing cgroup subsys cpuacct
[    0.000000] Linux version 4.0.0-rc1-next-20150225 (zappy@kria) (gcc version 4.9.2 20150204 (prerelease) (GCC) ) #636 SMP PREEMPT Wed Feb 25 13:45:23 CET 2015
[    0.000000] Command line: root=/dev/sda1 netconsole=@10.0.1.23/,6666@10.0.1.10/ console=ttyS0
[    0.000000] e820: BIOS-provided physical RAM map:
[    0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009fbff] usable
[    0.000000] BIOS-e820: [mem 0x000000000009fc00-0x000000000009ffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000000f0000-0x00000000000fffff] reserved
[    0.000000] BIOS-e820: [mem 0x0000000000100000-0x000000001ffdffff] usable
[    0.000000] BIOS-e820: [mem 0x000000001ffe0000-0x000000001fffffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000feffc000-0x00000000feffffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000fffc0000-0x00000000ffffffff] reserved
[    0.000000] NX (Execute Disable) protection: active
[    0.000000] SMBIOS 2.8 present.
[    0.000000] Hypervisor detected: KVM
[    0.000000] AGP: No AGP bridge found
[    0.000000] e820: last_pfn = 0x1ffe0 max_arch_pfn = 0x400000000
[    0.000000] PAT configuration [0-7]: WB  WC  UC- UC  WB  WC  UC- UC  
[    0.000000] found SMP MP-table at [mem 0x000f1010-0x000f101f] mapped at [ffff8800000f1010]
[    0.000000] Scanning 1 areas for low memory corruption
[    0.000000] init_memory_mapping: [mem 0x00000000-0x000fffff]
[    0.000000] init_memory_mapping: [mem 0x1fc00000-0x1fdfffff]
[    0.000000] init_memory_mapping: [mem 0x00100000-0x1fbfffff]
[    0.000000] init_memory_mapping: [mem 0x1fe00000-0x1ffdffff]
[    0.000000] ACPI: Early table checksum verification disabled
[    0.000000] ACPI: RSDP 0x00000000000F0DD0 000014 (v00 BOCHS )
[    0.000000] ACPI: RSDT 0x000000001FFE18BC 000034 (v01 BOCHS  BXPCRSDT 00000001 BXPC 00000001)
[    0.000000] ACPI: FACP 0x000000001FFE0E48 000074 (v01 BOCHS  BXPCFACP 00000001 BXPC 00000001)
[    0.000000] ACPI: DSDT 0x000000001FFE0040 000E08 (v01 BOCHS  BXPCDSDT 00000001 BXPC 00000001)
[    0.000000] ACPI: FACS 0x000000001FFE0000 000040
[    0.000000] ACPI: SSDT 0x000000001FFE0EBC 000948 (v01 BOCHS  BXPCSSDT 00000001 BXPC 00000001)
[    0.000000] ACPI: APIC 0x000000001FFE1804 000080 (v01 BOCHS  BXPCAPIC 00000001 BXPC 00000001)
[    0.000000] ACPI: HPET 0x000000001FFE1884 000038 (v01 BOCHS  BXPCHPET 00000001 BXPC 00000001)
[    0.000000] kvm-clock: Using msrs 4b564d01 and 4b564d00
[    0.000000] kvm-clock: cpu 0, msr 0:1ffdf001, primary cpu clock
[    0.000000] Zone ranges:
[    0.000000]   DMA      [mem 0x0000000000001000-0x0000000000ffffff]
[    0.000000]   DMA32    [mem 0x0000000001000000-0x000000001ffdffff]
[    0.000000]   Normal   empty
[    0.000000] Movable zone start for each node
[    0.000000] Early memory node ranges
[    0.000000]   node   0: [mem 0x0000000000001000-0x000000000009efff]
[    0.000000]   node   0: [mem 0x0000000000100000-0x000000001ffdffff]
[    0.000000] Initmem setup node 0 [mem 0x0000000000001000-0x000000001ffdffff]
[    0.000000] ACPI: PM-Timer IO Port: 0x608
[    0.000000] ACPI: LAPIC (acpi_id[0x00] lapic_id[0x00] enabled)
[    0.000000] ACPI: LAPIC (acpi_id[0x01] lapic_id[0x01] enabled)
[    0.000000] ACPI: LAPIC_NMI (acpi_id[0xff] dfl dfl lint[0x1])
[    0.000000] ACPI: IOAPIC (id[0x00] address[0xfec00000] gsi_base[0])
[    0.000000] IOAPIC[0]: apic_id 0, version 17, address 0xfec00000, GSI 0-23
[    0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)
[    0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 5 global_irq 5 high level)
[    0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level)
[    0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 10 global_irq 10 high level)
[    0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 11 global_irq 11 high level)
[    0.000000] Using ACPI (MADT) for SMP configuration information
[    0.000000] ACPI: HPET id: 0x8086a201 base: 0xfed00000
[    0.000000] smpboot: Allowing 2 CPUs, 0 hotplug CPUs
[    0.000000] e820: [mem 0x20000000-0xfeffbfff] available for PCI devices
[    0.000000] Booting paravirtualized kernel on KVM
[    0.000000] setup_percpu: NR_CPUS:16 nr_cpumask_bits:16 nr_cpu_ids:2 nr_node_ids:1
[    0.000000] PERCPU: Embedded 29 pages/cpu @ffff88001fc00000 s79704 r8192 d30888 u1048576
[    0.000000] KVM setup async PF for cpu 0
[    0.000000] kvm-stealtime: cpu 0, msr 1fc0cc80
[    0.000000] Built 1 zonelists in Zone order, mobility grouping on.  Total pages: 128873
[    0.000000] Kernel command line: root=/dev/sda1 netconsole=@10.0.1.23/,6666@10.0.1.10/ console=ttyS0
[    0.000000] PID hash table entries: 2048 (order: 2, 16384 bytes)
[    0.000000] Dentry cache hash table entries: 65536 (order: 7, 524288 bytes)
[    0.000000] Inode-cache hash table entries: 32768 (order: 6, 262144 bytes)
[    0.000000] xsave: enabled xstate_bv 0x7, cntxt size 0x340 using standard form
[    0.000000] AGP: Checking aperture...
[    0.000000] AGP: No AGP bridge found
[    0.000000] Memory: 500320K/523768K available (5643K kernel code, 487K rwdata, 2464K rodata, 848K init, 2488K bss, 23448K reserved, 0K cma-reserved)
[    0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=2, Nodes=1
[    0.000000] Preemptible hierarchical RCU implementation.
[    0.000000] 	RCU debugfs-based tracing is enabled.
[    0.000000] 	RCU dyntick-idle grace-period acceleration is enabled.
[    0.000000] 	Additional per-CPU info printed with stalls.
[    0.000000] 	RCU restricting CPUs from NR_CPUS=16 to nr_cpu_ids=2.
[    0.000000] RCU: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=2
[    0.000000] NR_IRQS:4352 nr_irqs:440 16
[    0.000000] Console: colour VGA+ 80x25
[    0.000000] console [ttyS0] enabled
[    0.000000] tsc: Detected 3303.246 MHz processor
[    0.006666] Calibrating delay loop (skipped) preset value.. 6609.76 BogoMIPS (lpj=11010820)
[    0.006666] pid_max: default: 32768 minimum: 301
[    0.006666] ACPI: Core revision 20150204
[    0.006666] ACPI: All ACPI Tables successfully acquired
[    0.007099] Mount-cache hash table entries: 1024 (order: 1, 8192 bytes)
[    0.007791] Mountpoint-cache hash table entries: 1024 (order: 1, 8192 bytes)
[    0.008731] Initializing cgroup subsys blkio
[    0.009195] Initializing cgroup subsys memory
[    0.009671] Initializing cgroup subsys devices
[    0.010016] Initializing cgroup subsys freezer
[    0.010865] Last level iTLB entries: 4KB 512, 2MB 8, 4MB 8
[    0.011728] Last level dTLB entries: 4KB 512, 2MB 32, 4MB 32, 1GB 0
[    0.012687] Freeing SMP alternatives memory: 24K (ffffffff81b4f000 - ffffffff81b55000)
[    0.015801] ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1
[    0.016707] smpboot: CPU0: Intel(R) Core(TM) i3-3225 CPU @ 3.30GHz (fam: 06, model: 3a, stepping: 09)
[    0.018654] Performance Events: 16-deep LBR, IvyBridge events, Intel PMU driver.
[    0.020009] ... version:                2
[    0.020721] ... bit width:              48
[    0.021444] ... generic registers:      4
[    0.022152] ... value mask:             0000ffffffffffff
[    0.023096] ... max period:             000000007fffffff
[    0.023338] ... fixed-purpose events:   3
[    0.024060] ... event mask:             000000070000000f
[    0.025152] KVM setup paravirtual spinlock
[    0.036809] x86: Booting SMP configuration:
[    0.037358] .... node  #0, CPUs:      #1
[    0.006666] kvm-clock: cpu 1, msr 0:1ffdf041, secondary cpu clock
[    0.053379] x86: Booted up 1 node, 2 CPUs
[    0.053364] KVM setup async PF for cpu 1
[    0.053364] kvm-stealtime: cpu 1, msr 1fd0cc80
[    0.054692] smpboot: Total of 2 processors activated (13218.52 BogoMIPS)
[    0.055879] devtmpfs: initialized
[    0.056814] RTC time: 14:04:53, date: 02/25/15
[    0.057377] NET: Registered protocol family 16
[    0.073360] cpuidle: using governor ladder
[    0.086678] cpuidle: using governor menu
[    0.087369] ACPI: bus type PCI registered
[    0.087854] PCI: Using configuration type 1 for base access
[    0.106821] ACPI: Added _OSI(Module Device)
[    0.107204] ACPI: Added _OSI(Processor Device)
[    0.107660] ACPI: Added _OSI(3.0 _SCP Extensions)
[    0.108143] ACPI: Added _OSI(Processor Aggregator Device)
[    0.110222] ACPI: Interpreter enabled
[    0.110616] ACPI Exception: AE_NOT_FOUND, While evaluating Sleep State [\_S1_] (20150204/hwxface-580)
[    0.111759] ACPI Exception: AE_NOT_FOUND, While evaluating Sleep State [\_S2_] (20150204/hwxface-580)
[    0.112742] ACPI: (supports S0 S3 S5)
[    0.113122] ACPI: Using IOAPIC for interrupt routing
[    0.113368] PCI: Using host bridge windows from ACPI; if necessary, use "pci=nocrs" and report a bug
[    0.116319] ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
[    0.116675] acpi PNP0A03:00: _OSC: OS supports [ASPM ClockPM Segments MSI]
[    0.117382] acpi PNP0A03:00: _OSC failed (AE_NOT_FOUND); disabling ASPM
[    0.118417] acpi PNP0A03:00: fail to add MMCONFIG information, can't access extended PCI configuration space under this bridge.
[    0.119848] PCI host bridge to bus 0000:00
[    0.120009] pci_bus 0000:00: root bus resource [bus 00-ff]
[    0.120604] pci_bus 0000:00: root bus resource [io  0x0cf8-0x0cff]
[    0.121681] pci_bus 0000:00: root bus resource [io  0x0000-0x0cf7 window]
[    0.123016] pci_bus 0000:00: root bus resource [io  0x0d00-0xadff window]
[    0.123344] pci_bus 0000:00: root bus resource [io  0xae0f-0xaeff window]
[    0.124066] pci_bus 0000:00: root bus resource [io  0xaf20-0xafdf window]
[    0.124848] pci_bus 0000:00: root bus resource [io  0xafe4-0xffff window]
[    0.125636] pci_bus 0000:00: root bus resource [mem 0x000a0000-0x000bffff window]
[    0.126426] pci_bus 0000:00: root bus resource [mem 0x20000000-0xfebfffff window]
[    0.129923] pci 0000:00:01.1: legacy IDE quirk: reg 0x10: [io  0x01f0-0x01f7]
[    0.130005] pci 0000:00:01.1: legacy IDE quirk: reg 0x14: [io  0x03f6]
[    0.130716] pci 0000:00:01.1: legacy IDE quirk: reg 0x18: [io  0x0170-0x0177]
[    0.131440] pci 0000:00:01.1: legacy IDE quirk: reg 0x1c: [io  0x0376]
[    0.132606] pci 0000:00:01.3: quirk: [io  0x0600-0x063f] claimed by PIIX4 ACPI
[    0.133347] pci 0000:00:01.3: quirk: [io  0x0700-0x070f] claimed by PIIX4 SMB
[    0.151020] ACPI: PCI Interrupt Link [LNKA] (IRQs 5 *10 11)
[    0.151771] ACPI: PCI Interrupt Link [LNKB] (IRQs 5 *10 11)
[    0.152507] ACPI: PCI Interrupt Link [LNKC] (IRQs 5 10 *11)
[    0.153378] ACPI: PCI Interrupt Link [LNKD] (IRQs 5 10 *11)
[    0.154091] ACPI: PCI Interrupt Link [LNKS] (IRQs *9)
[    0.155005] ACPI: Enabled 16 GPEs in block 00 to 0F
[    0.155722] vgaarb: setting as boot device: PCI:0000:00:02.0
[    0.155722] vgaarb: device added: PCI:0000:00:02.0,decodes=io+mem,owns=io+mem,locks=none
[    0.156674] vgaarb: loaded
[    0.157046] vgaarb: bridge control possible 0000:00:02.0
[    0.157757] SCSI subsystem initialized
[    0.158267] PCI: Using ACPI for IRQ routing
[    0.158267] HPET: 3 timers in total, 0 timers will be used for per-cpu timer
[    0.158275] hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0
[    0.160151] hpet0: 3 comparators, 64-bit 100.000000 MHz counter
[    0.163423] Switched to clocksource kvm-clock
[    0.164015] pnp: PnP ACPI init
[    0.164825] pnp: PnP ACPI: found 6 devices
[    0.174524] NET: Registered protocol family 2
[    0.175189] TCP established hash table entries: 4096 (order: 3, 32768 bytes)
[    0.175944] TCP bind hash table entries: 4096 (order: 4, 65536 bytes)
[    0.176704] TCP: Hash tables configured (established 4096 bind 4096)
[    0.177466] UDP hash table entries: 256 (order: 1, 8192 bytes)
[    0.178088] UDP-Lite hash table entries: 256 (order: 1, 8192 bytes)
[    0.178781] NET: Registered protocol family 1
[    0.179426] RPC: Registered named UNIX socket transport module.
[    0.180051] RPC: Registered udp transport module.
[    0.180542] RPC: Registered tcp transport module.
[    0.181038] RPC: Registered tcp NFSv4.1 backchannel transport module.
[    0.181705] pci 0000:00:00.0: Limiting direct PCI/PCI transfers
[    0.182330] pci 0000:00:01.0: PIIX3: Enabling Passive Release
[    0.182951] pci 0000:00:01.0: Activating ISA DMA hang workarounds
[    0.184030] Scanning for low memory corruption every 60 seconds
[    0.185022] futex hash table entries: 512 (order: 3, 32768 bytes)
[    0.185730] Initialise system trusted keyring
[    0.186263] audit: initializing netlink subsys (disabled)
[    0.186907] audit: type=2000 audit(1424873094.406:1): initialized
[    0.187810] HugeTLB registered 2 MB page size, pre-allocated 0 pages
[    0.189736] VFS: Disk quotas dquot_6.5.2
[    0.190200] VFS: Dquot-cache hash table entries: 512 (order 0, 4096 bytes)
[    0.191445] NFS: Registering the id_resolver key type
[    0.191982] Key type id_resolver registered
[    0.192419] Key type id_legacy registered
[    0.192842] nfs4filelayout_init: NFSv4 File Layout Driver Registering...
[    0.193623] Installing knfsd (copyright (C) 1996 okir@monad.swb.de).
[    0.195328] Key type asymmetric registered
[    0.195764] Asymmetric key parser 'x509' registered
[    0.196299] Block layer SCSI generic (bsg) driver version 0.4 loaded (major 252)
[    0.197126] io scheduler noop registered
[    0.197550] io scheduler deadline registered
[    0.198100] io scheduler cfq registered (default)
[    0.198717] input: Power Button as /devices/LNXSYSTM:00/LNXPWRBN:00/input/input0
[    0.199482] ACPI: Power Button [PWRF]
[    0.199974] GHES: HEST is not enabled!
[    0.221823] ACPI: PCI Interrupt Link [LNKD] enabled at IRQ 11
[    0.222494] virtio-pci 0000:00:04.0: virtio_pci: leaving for legacy driver
[    0.223778] Serial: 8250/16550 driver, 4 ports, IRQ sharing disabled
[    0.248977] 00:05: ttyS0 at I/O 0x3f8 (irq = 4, base_baud = 115200) is a 16550A
[    0.251008] Linux agpgart interface v0.103
[    0.251524] [drm] Initialized drm 1.1.0 20060810
[    0.252374] [drm] Found bochs VGA, ID 0xb0c0.
[    0.252828] [drm] Framebuffer size 16384 kB @ 0xfd000000, mmio @ 0xfebf0000.
[    0.253658] [TTM] Zone  kernel: Available graphics memory: 250172 kiB
[    0.254333] [TTM] Initializing pool allocator
[    0.254789] [TTM] Initializing DMA pool allocator
[    0.257711] fbcon: bochsdrmfb (fb0) is primary device
[    0.268962] Console: switching to colour frame buffer device 128x48
[    0.272028] bochs-drm 0000:00:02.0: fb0: bochsdrmfb frame buffer device
[    0.272704] bochs-drm 0000:00:02.0: registered panic notifier
[    0.303459] [drm] Initialized bochs-drm 1.0.0 20130925 for 0000:00:02.0 on minor 0
[    0.307712] brd: module loaded
[    0.310603] loop: module loaded
[    0.311198] nbd: registered device at major 43
[    0.313883] null: module loaded
[    0.315439] scsi host0: ata_piix
[    0.316084] scsi host1: ata_piix
[    0.316535] ata1: PATA max MWDMA2 cmd 0x1f0 ctl 0x3f6 bmdma 0xc080 irq 14
[    0.317270] ata2: PATA max MWDMA2 cmd 0x170 ctl 0x376 bmdma 0xc088 irq 15
[    0.318296] e1000: Intel(R) PRO/1000 Network Driver - version 7.3.21-k8-NAPI
[    0.319729] e1000: Copyright (c) 1999-2006 Intel Corporation.
[    0.341897] ACPI: PCI Interrupt Link [LNKC] enabled at IRQ 10
[    0.470599] ata1.00: ATA-7: QEMU HARDDISK, 2.2.0, max UDMA/100
[    0.471278] ata1.00: 8388608 sectors, multi 16: LBA48 
[    0.472247] ata1.00: configured for MWDMA2
[    0.473923] ata2.00: ATAPI: QEMU DVD-ROM, 2.2.0, max UDMA/100
[    0.474999] ata2.00: configured for MWDMA2
[    0.483530] scsi 0:0:0:0: Direct-Access     ATA      QEMU HARDDISK    0    PQ: 0 ANSI: 5
[    0.485106] sd 0:0:0:0: [sda] 8388608 512-byte logical blocks: (4.29 GB/4.00 GiB)
[    0.486606] scsi 1:0:0:0: CD-ROM            QEMU     QEMU DVD-ROM     2.2. PQ: 0 ANSI: 5
[    0.488182] sd 0:0:0:0: [sda] Write Protect is off
[    0.489108] sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[    0.491243]  sda: sda1
[    0.492002] sd 0:0:0:0: [sda] Attached SCSI disk
[    0.664416] e1000 0000:00:03.0 eth0: (PCI:33MHz:32-bit) 52:54:00:12:34:56
[    0.665618] e1000 0000:00:03.0 eth0: Intel(R) PRO/1000 Network Connection
[    0.666898] i8042: PNP: PS/2 Controller [PNP0303:KBD,PNP0f13:MOU] at 0x60,0x64 irq 1,12
[    0.669187] serio: i8042 KBD port at 0x60,0x64 irq 1
[    0.669801] serio: i8042 AUX port at 0x60,0x64 irq 12
[    0.670823] rtc_cmos 00:00: RTC can wake from S4
[    0.671939] input: AT Translated Set 2 keyboard as /devices/platform/i8042/serio0/input/input1
[    0.674552] rtc_cmos 00:00: rtc core: registered rtc_cmos as rtc0
[    0.676142] rtc_cmos 00:00: alarms up to one day, 114 bytes nvram, hpet irqs
[    0.677836] piix4_smbus 0000:00:01.3: SMBus Host Controller at 0x700, revision 0
[    0.679807] NET: Registered protocol family 10
[    0.681373] NET: Registered protocol family 17
[    0.682574] Loading compiled-in X.509 certificates
[    0.683479] registered taskstats version 1
[    0.684451]   Magic number: 3:850:81
[    0.685405] rtc_cmos 00:00: setting system clock to 2015-02-25 14:04:54 UTC (1424873094)
[    0.691664] EXT4-fs (sda1): mounted filesystem with ordered data mode. Opts: (null)
[    0.693081] VFS: Mounted root (ext4 filesystem) readonly on device 8:1.
[    0.694794] Freeing unused kernel memory: 848K (ffffffff81a7b000 - ffffffff81b4f000)
[    0.696106] Write protecting the kernel read-only data: 10240k
[    0.697715] Freeing unused kernel memory: 496K (ffff880001584000 - ffff880001600000)
[    0.700123] Freeing unused kernel memory: 1632K (ffff880001868000 - ffff880001a00000)
[    0.751134] random: systemd urandom read with 17 bits of entropy available
[    0.755187] systemd[1]: systemd 218 running in system mode. (+PAM -AUDIT -SELINUX -IMA -APPARMOR +SMACK -SYSVINIT +UTMP +LIBCRYPTSETUP +GCRYPT +GNUTLS +ACL +XZ +LZ4 +SECCOMP +BLKID -ELFUTILS +KMOD +IDN)
[    0.759120] systemd[1]: Detected virtualization 'kvm'.
[    0.760283] systemd[1]: Detected architecture 'x86-64'.

Welcome to Arch Linux!

[    0.765183] systemd[1]: Set hostname to <earth>.
[    0.856809] systemd[1]: Cannot add dependency job for unit display-manager.service, ignoring: Unit display-manager.service failed to load: No such file or directory.
[    0.858913] systemd[1]: Starting Encrypted Volumes.
[  OK  ] Reached target Encrypted Volumes.
[    0.860182] systemd[1]: Reached target Encrypted Volumes.
[    0.860822] systemd[1]: Expecting device dev-ttyS0.device...
         Expecting device dev-ttyS0.device...
[    0.861939] systemd[1]: Starting Forward Password Requests to Wall Directory Watch.
[    0.862898] systemd[1]: Started Forward Password Requests to Wall Directory Watch.
[    0.864005] systemd[1]: Starting Dispatch Password Requests to Console Directory Watch.
[    0.864958] systemd[1]: Started Dispatch Password Requests to Console Directory Watch.
[    0.865841] systemd[1]: Starting Root Slice.
[  OK  ] Created slice Root Slice.
[    0.868745] systemd[1]: Created slice Root Slice.
[    0.869689] systemd[1]: Starting User and Session Slice.
[  OK  ] Created slice User and Session Slice.
[    0.872316] systemd[1]: Created slice User and Session Slice.
[    0.873483] systemd[1]: Starting Journal Audit Socket.
[  OK  ] Listening on Journal Audit Socket.
[    0.875217] systemd[1]: Listening on Journal Audit Socket.
[    0.876123] systemd[1]: Starting /dev/initctl Compatibility Named Pipe.
[  OK  ] Listening on /dev/initctl Compatibility Named Pi[    0.877381] systemd[1]: Listening on /dev/initctl Compatibility Named Pipe.
pe.
[    0.878235] systemd[1]: Starting Device-mapper event daemon FIFOs.
[  OK  ] Listening on Device-mapper event daemon FIFOs.
[    0.879439] systemd[1]: Listening on Device-mapper event daemon FIFOs.
[    0.880258] systemd[1]: Starting Journal Socket.
[  OK  ] Listening on Journal Socket.
[    0.881446] systemd[1]: Listening on Journal Socket.
[    0.882012] systemd[1]: Starting udev Kernel Socket.
[  OK  ] Listening on udev Kernel Socket.
[    0.883116] systemd[1]: Listening on udev Kernel Socket.
[    0.883721] systemd[1]: Starting System Slice.
[  OK  ] Created slice System Slice.
[    0.884798] systemd[1]: Created slice System Slice.
[    0.885339] systemd[1]: Starting File System Check on Root Device...
         Starting File System Check on Root Device...
[    0.886745] systemd[1]: Starting system-getty.slice.
[  OK  ] Created slice system-getty.slice.
[    0.888157] systemd[1]: Created slice system-getty.slice.
[    0.888848] systemd[1]: Mounting Huge Pages File System...
         Mounting Huge Pages File System...
[    0.890415] systemd[1]: Mounting Debug File System...
         Mounting Debug File System...
[    0.901145] systemd[1]: Started Create list of required static device nodes for the current kernel.
[    0.902808] systemd[1]: Starting Load Kernel Modules...
         Starting Load Kernel Modules...
[    0.904330] systemd[1]: Starting Swap.
[  OK  ] Reached target Swap.
[    0.905247] systemd[1]: Reached target Swap.
[    0.905738] systemd[1]: Mounting Temporary Directory...
         Mounting Temporary Directory...
[    0.908021] systemd[1]: tmp.mount: Directory /tmp to mount over is not empty, mounting anyway.
[    0.909365] systemd[1]: Starting udev Control Socket.
[  OK  ] Listening on udev Control Socket.
[    0.910450] systemd[1]: Listening on udev Control Socket.
[    0.911081] systemd[1]: Starting udev Coldplug all Devices...
         Starting udev Coldplug all Devices...
[    0.913090] systemd[1]: Starting system-serial\x2dgetty.slice.
[  OK  ] Created slice system-serial\x2dgetty.slice.
[    0.914538] systemd[1]: Created slice system-serial\x2dgetty.slice.
[    0.915480] systemd[1]: Starting Slices.
[  OK  ] Reached target Slices.
[    0.916716] systemd[1]: Reached target Slices.
[    0.917616] systemd[1]: Mounting POSIX Message Queue File System...
         Mounting POSIX Message Queue File System...
[    0.919987] systemd[1]: Starting Setup Virtual Console...
         Starting Setup Virtual Console...
[    0.921739] systemd[1]: Starting Delayed Shutdown Socket.
[  OK  ] Listening on Delayed Shutdown Socket.
[    0.923290] systemd[1]: Listening on Delayed Shutdown Socket.
[    0.924091] systemd[1]: Set up automount Arbitrary Executable File Formats File System Automount Point.
[    0.925109] systemd[1]: Starting Paths.
[  OK  ] Reached target Paths.
[    0.926263] systemd[1]: Reached target Paths.
[    0.928087] systemd[1]: Started Set Up Additional Binary Formats.
[    0.928741] systemd[1]: Starting LVM2 metadata daemon socket.
[  OK  ] Listening on LVM2 metadata daemon socket.
[    0.929866] systemd[1]: Listening on LVM2 metadata daemon socket.
[    0.930580] systemd[1]: Starting Journal Socket (/dev/log).
[  OK  ] Listening on Journal Socket (/dev/log).
[    0.931663] systemd[1]: Listening on Journal Socket (/dev/log).
[    0.932331] systemd[1]: Starting Journal Service...
         Starting Journal Service...
[    0.933827] systemd[1]: Starting Remote File Systems.
[  OK  ] Reached target Remote File Systems.
[    0.935718] systemd[1]: Reached target Remote File Systems.
[  OK  ] Started udev Coldplug all Devices.
[    0.946364] systemd[1]: Started udev Coldplug all Devices.
[    0.957709] systemd[1]: systemd-modules-load.service: main process exited, code=exited, status=1/FAILURE
[FAILED] Failed to start Load Kernel Modules.
See "systemctl status systemd-modules-load.service" for details.[    0.960191] systemd[1]: Failed to start Load Kernel Modules.

[    0.961008] systemd[1]: Unit systemd-modules-load.service entered failed state.
[    0.961811] systemd[1]: systemd-modules-load.service failed.
[    0.962467] systemd[1]: Mounted FUSE Control File System.
[    0.963118] systemd[1]: Mounting Configuration File System...
         Mounting Configuration File System...
[    0.964745] systemd[1]: Starting Apply Kernel Variables...
         Starting Apply Kernel Variables...
[  OK  ] Mounted POSIX Message Queue File System.
[    0.970549] systemd[1]: Mounted POSIX Message Queue File System.
[  OK  ] Mounted Huge Pages File System.
[    0.973734] systemd[1]: Mounted Huge Pages File System.
[  OK  ] Mounted Temporary Directory.
[    0.975662] systemd[1]: Mounted Temporary Directory.
[  OK  ] Mounted Debug File System.
[    0.977821] systemd[1]: Mounted Debug File System.
[  OK  ] Mounted Configuration File System.
[    0.979517] systemd[1]: Mounted Configuration File System.
[  OK  ] Started Apply Kernel Variables.
[    0.981848] systemd[1]: Started Apply Kernel Variables.
[  OK  ] Started File System Check on Root Device.
[    1.084912] systemd[1]: Started File System Check on Root Device.
[    1.086690] systemd[1]: Starting Remount Root and Kernel File Systems...
         Starting Remount Root and Kernel File Systems...
[    1.110518] EXT4-fs (sda1): re-mounted. Opts: data=ordered
[  OK  ] Started Remount Root and Kernel File Systems.
[    1.112478] systemd[1]: Started Remount Root and Kernel File Systems.
[    1.113464] systemd[1]: Started Rebuild Dynamic Linker Cache.
[    1.115590] systemd[1]: Started Rebuild Hardware Database.
[    1.116440] systemd[1]: Starting Load/Save Random Seed...
         Starting Load/Save Random Seed...
[    1.121153] systemd[1]: Started First Boot Wizard.
[    1.122079] systemd[1]: Started Create System Users.
[    1.122877] systemd[1]: Starting Create Static Device Nodes in /dev...
[    1.123730] traps: systemd-cgroups[104] general protection ip:7f0f04f8e4f8 sp:7fff4f13bd68 error:0 in ld-2.21.so[7f0f04f76000+22000]
[    1.125056] audit: type=1701 audit(1424873094.936:2): auid=4294967295 uid=0 gid=0 ses=4294967295 pid=104 comm="systemd-cgroups" exe="/usr/lib/systemd/systemd-cgroups-agent" sig=11
         Starting Create Static Device Nodes in /dev...
[  OK  ] Started Setup Virtual Console.
[    1.128543] systemd[1]: Started Setup Virtual Console.
[    1.142519] traps: systemd-coredum[106] general protection ip:7f32d48a24f8 sp:7fff256536f8 error:0 in ld-2.21.so[7f32d488a000+22000]
[    1.144059] audit: type=1701 audit(1424873094.956:3): auid=4294967295 uid=0 gid=0 ses=4294967295 pid=106 comm="systemd-coredum" exe="/usr/lib/systemd/systemd-coredump" sig=11
[    1.145696] Process 106(systemd-coredum) has RLIMIT_CORE set to 1
[    1.146338] Aborting core
[    1.154921] audit: type=1701 audit(1424873094.966:4): auid=4294967295 uid=0 gid=0 ses=4294967295 pid=107 comm="systemd" exe="/usr/lib/systemd/systemd" sig=11
[    1.155022] audit: type=1701 audit(1424873094.966:5): auid=4294967295 uid=0 gid=0 ses=4294967295 pid=105 comm="systemd" exe="/usr/lib/systemd/systemd" sig=11
[    1.158295] traps: systemd-coredum[108] general protection ip:7fe34425e4f8 sp:7fff2ea90e38 error:0[    1.158787] traps: systemd-coredum[109] general protection ip:7f579fdb84f8 sp:7ffc0c18adf8 error:0 in ld-2.21.so[7f579fda0000+22000]
[    1.158800] audit: type=1701 audit(1424873094.969:6): auid=4294967295 uid=0 gid=0 ses=4294967295 pid=109 comm="systemd-coredum" exe="/usr/lib/systemd/systemd-coredump" sig=11
[    1.158804] Process 109(systemd-coredum) has RLIMIT_CORE set to 1
[    1.158804] Aborting core

[    1.163200] systemd[1]: systemd-tmpfiles-setup-dev.service: main process exited, code=dumped, status=11/SEGV
[    1.164275]  in ld-2.21.so[7fe344246000+22000]
[    1.164763] Process 108(systemd-coredum) has RLIMIT_CORE set to 1
[    1.165407] Aborting core
[[    1.165939] Process 111(systemd-coredum) has RLIMIT_CORE set to 1
[    1.166820] Aborting core
FAILED] Failed to start Create Static Device Nodes in /dev.
See "systemctl status syste[    1.167836] systemd[1]: Failed to start Create Static Device Nodes in /dev.
md-tmpfiles-setu[    1.168653] systemd[1]: Unit systemd-tmpfiles-setup-dev.service entered failed state.
p-dev.service" f[    1.169543] systemd[1]: systemd-tmpfiles-setup-dev.service failed.
or details.
[    1.171063] systemd[1]: Starting Local File Systems (Pre).
[    1.171334] Process 113(systemd-coredum) has RLIMIT_CORE set to 1
[    1.171335] Aborting core
[  OK  ] Reached target Local File Systems (Pre).
[    1.173818] systemd[1]: Reached target Local File Systems (Pre).
[    1.174538] systemd[1]: Starting Local File Systems.
[  OK  ] Reached target Local File Systems.
[    1.175563] systemd[1]: Reached target Local File Systems.
[    1.176190] systemd[1]: Started Commit a transient machine-id on disk.
[    1.176920] systemd[1]: Started Rebuild Journal Catalog.
[    1.177522] systemd[1]: Started Update is Completed.
[    1.178072] systemd[1]: Starting udev Kernel Device Manager...
         Starting udev Kernel Device Manager...
[    1.179744] systemd[1]: systemd-random-seed.service: main process exited, code=dumped, status=11/SEGV
[FAILED] Failed to start Load/Save Random Seed.
See "systemctl status systemd-random-seed.servic[    1.181647] systemd[1]: Failed to start Load/Save Random Seed.
e" for details.
[    1.182361] systemd[1]: Unit systemd-random-seed.service entered failed state.

[    1.183194] systemd[1]: systemd-random-seed.service failed.
[    1.186759] tsc: Refined TSC clocksource calibration: 3303.211 MHz
[    1.194867] Process 115(systemd-coredum) has RLIMIT_CORE set to 1
[    1.195890] Aborting core
[    1.197116] systemd[1]: systemd-udevd.service: main process exited, code=dumped, status=11/SEGV
[FAILED] Failed to start udev Kernel Device Manager.
See "systemctl status systemd-udevd.service" for[    1.199241] systemd[1]: Failed to start udev Kernel Device Manager.
 details.
[    1.200415] Process 117(systemd-coredum) has RLIMIT_CORE set to 1
[    1.200677] systemd[1]: Unit systemd-udevd.service entered failed state.
[    1.200681] systemd[1]: systemd-udevd.service failed.
[    1.200789] systemd[1]: systemd-udevd.service has no holdoff time, scheduling restart.
[    1.200850] systemd[1]: Stopping udev Kernel Device Manager...
[    1.203814] Aborting core
         Stopping udev Kernel Device Manager...
[  OK  ] Stopped udev Kernel Device Manager.
[    1.205070] systemd[1]: Starting udev Kernel Device Manager...
         Starting udev Kernel Device Manager...
[    1.229558] Process 119(systemd-coredum) has RLIMIT_CORE set to 1
[    1.230257] Aborting core
[    1.231001] systemd[1]: systemd-udevd.service: main process exited, code=dumped, status=11/SEGV
[FAILED] Failed to start udev Kernel Device Manager.
See "systemctl status systemd-udevd.service" for[    1.232858] systemd[1]: Failed to start udev Kernel Device Manager.
 details.
[    1.233631] systemd[1]: Unit systemd-udevd.service entered failed state.
[    1.234441] systemd[1]: systemd-udevd.service failed.
[    1.235153] systemd[1]: systemd-udevd.service has no holdoff time, scheduling restart.
[    1.236290] systemd[1]: Stopping udev Kernel Device Manager...
         Stopping udev Kernel Device Manager...
[  OK  ] Stopped udev Kernel Device Manager.
[    1.238375] systemd[1]: Starting udev Kernel Device Manager...
         Starting udev Kernel Device Manager...
[  OK  ] Started Journal Service.
[    1.241442] systemd[1]: Started Journal Service.
         Starting Flush Journal to Persistent Storage...
[    1.244569] systemd-fsck[65]: /dev/sda1: clean, 70884/262144 files, 876258/1048568 blocks
[    1.250220] Process 123(systemd-coredum) has RLIMIT_CORE set to 1
[    1.251324] Aborting core
[    1.253913] Process 124(systemd-coredum) has RLIMIT_CORE set to 1
[    1.254683] Aborting core
[FAILED] Failed to start udev Kernel Device Manager.
See "systemctl status systemd-udevd.service" for details.
         Stopping udev Kernel Device Manager...
[  OK  ] Stopped udev Kernel Device Manager.
         Starting udev Kernel Device Manager...
[    1.261778] Process 127(systemd-coredum) has RLIMIT_CORE set to 1
[    1.262452] Aborting core
[FAILED] Failed to start Flush Journal to Persistent Storage.
See "systemctl status systemd-journal-flush.service" for details.
         Starting Create Volatile Files and Directories...
[    1.266957] Process 128(systemd-coredum) has RLIMIT_CORE set to 1
[    1.268100] Aborting core
[    1.273805] Process 131(systemd-coredum) has RLIMIT_CORE set to 1
[    1.274515] Aborting core
[    1.274975] Process 132(systemd-coredum) has RLIMIT_CORE set to 1
[    1.275629] Aborting core
[FAILED] Failed to start udev Kernel Device Manager.
See "systemctl status systemd-udevd.service" for details.
[    1.278465] Process 135(systemd-coredum) has RLIMIT_CORE set to 1
[    1.278922] Process 134(systemd-coredum) has RLIMIT_CORE set to 1
[    1.278923] Aborting core
[    1.280062] Aborting core
         Stopping udev Kernel Device Manager...
[  OK  ] Stopped udev Kernel Device Manager.
         Starting udev Kernel Device Manager...
[FAILED] Failed to start Create Volatile Files and Directories.
See "systemctl status systemd-tmpfiles-setup.service" for details.
         Starting Update UTMP about System Boot/Shutdown...
[    1.295320] Process 139(systemd-coredum) has RLIMIT_CORE set to 1
[    1.296025] Aborting core
[    1.296605] Process 140(systemd-coredum) has RLIMIT_CORE set to 1
[    1.297389] Aborting core
[    1.298160] Process 141(systemd-coredum) has RLIMIT_CORE set to 1
[    1.299066] Process 143(systemd-coredum) has RLIMIT_CORE set to 1
[    1.299080] Aborting core
[    1.299646] Process 145(systemd-coredum) has RLIMIT_CORE set to 1
[    1.299646] Aborting core
[    1.301004] Aborting core
[FAILED] Failed to start udev Kernel Device Manager.
See "systemctl status systemd-udevd.service" for details.
[FAILED] Failed to start Update UTMP about System Boot/Shutdown.
See "systemctl status systemd-update-utmp.service" for details.
         Stopping udev Kernel Device Manager...
[  OK  ] Stopped udev Kernel Device Manager.
         Starting udev Kernel Device Manager...
[FAILED] Failed to start udev Kernel Device Manager.
See "systemctl status systemd-udevd.service" for details.
[  OK  ] Reached target System Initialization.
[  OK  ] Listening on D-Bus System Message Bus Socket.
[  OK  ] Reached target Sockets.
[  OK  ] Reached target Basic System.
         Starting Network settings...
         Starting D-Bus System Message Bus...
[  OK  ] Started D-Bus System Message Bus.
[    1.323204] Process 148(systemd-coredum) has RLIMIT_CORE set to 1
[    1.323921] Aborting core
[    1.324885] Process 150(systemd-coredum) has RLIMIT_CORE set to 1
[    1.325536] Aborting core
[    1.326527] Process 151(systemd-coredum) has RLIMIT_CORE set to 1
[    1.327199] Aborting core
[    1.328246] Process 153(systemd-coredum) has RLIMIT_CORE set to 1
[    1.328988] Aborting core


#
# Automatically generated file; DO NOT EDIT.
# Linux/x86 4.0.0-rc1 Kernel Configuration
#
CONFIG_64BIT=y
CONFIG_X86_64=y
CONFIG_X86=y
CONFIG_INSTRUCTION_DECODER=y
CONFIG_PERF_EVENTS_INTEL_UNCORE=y
CONFIG_OUTPUT_FORMAT="elf64-x86-64"
CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_HAVE_LATENCYTOP_SUPPORT=y
CONFIG_MMU=y
CONFIG_NEED_DMA_MAP_STATE=y
CONFIG_NEED_SG_DMA_LENGTH=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_BUG=y
CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_ARCH_HAS_CPU_RELAX=y
CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
CONFIG_ARCH_HIBERNATION_POSSIBLE=y
CONFIG_ARCH_SUSPEND_POSSIBLE=y
CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
CONFIG_ZONE_DMA32=y
CONFIG_AUDIT_ARCH=y
CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
CONFIG_HAVE_INTEL_TXT=y
CONFIG_X86_64_SMP=y
CONFIG_X86_HT=y
CONFIG_ARCH_HWEIGHT_CFLAGS="-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11"
CONFIG_ARCH_SUPPORTS_UPROBES=y
CONFIG_FIX_EARLYCON_MEM=y
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
CONFIG_IRQ_WORK=y
CONFIG_BUILDTIME_EXTABLE_SORT=y

#
# General setup
#
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_CROSS_COMPILE=""
# CONFIG_COMPILE_TEST is not set
CONFIG_LOCALVERSION=""
CONFIG_LOCALVERSION_AUTO=y
CONFIG_HAVE_KERNEL_GZIP=y
CONFIG_HAVE_KERNEL_BZIP2=y
CONFIG_HAVE_KERNEL_LZMA=y
CONFIG_HAVE_KERNEL_XZ=y
CONFIG_HAVE_KERNEL_LZO=y
CONFIG_HAVE_KERNEL_LZ4=y
CONFIG_KERNEL_GZIP=y
# CONFIG_KERNEL_BZIP2 is not set
# CONFIG_KERNEL_LZMA is not set
# CONFIG_KERNEL_XZ is not set
# CONFIG_KERNEL_LZO is not set
# CONFIG_KERNEL_LZ4 is not set
CONFIG_DEFAULT_HOSTNAME="earth"
CONFIG_SWAP=y
CONFIG_SYSVIPC=y
CONFIG_SYSVIPC_SYSCTL=y
CONFIG_POSIX_MQUEUE=y
CONFIG_POSIX_MQUEUE_SYSCTL=y
CONFIG_CROSS_MEMORY_ATTACH=y
CONFIG_FHANDLE=y
CONFIG_USELIB=y
CONFIG_AUDIT=y
CONFIG_HAVE_ARCH_AUDITSYSCALL=y
CONFIG_AUDITSYSCALL=y
CONFIG_AUDIT_WATCH=y
CONFIG_AUDIT_TREE=y

#
# IRQ subsystem
#
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_GENERIC_IRQ_SHOW=y
CONFIG_GENERIC_IRQ_LEGACY_ALLOC_HWIRQ=y
CONFIG_GENERIC_PENDING_IRQ=y
CONFIG_IRQ_DOMAIN=y
CONFIG_GENERIC_MSI_IRQ=y
# CONFIG_IRQ_DOMAIN_DEBUG is not set
CONFIG_IRQ_FORCED_THREADING=y
CONFIG_SPARSE_IRQ=y
CONFIG_CLOCKSOURCE_WATCHDOG=y
CONFIG_ARCH_CLOCKSOURCE_DATA=y
CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE=y
CONFIG_GENERIC_TIME_VSYSCALL=y
CONFIG_GENERIC_CLOCKEVENTS=y
CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y
CONFIG_GENERIC_CMOS_UPDATE=y

#
# Timers subsystem
#
CONFIG_TICK_ONESHOT=y
CONFIG_NO_HZ_COMMON=y
# CONFIG_HZ_PERIODIC is not set
CONFIG_NO_HZ_IDLE=y
# CONFIG_NO_HZ_FULL is not set
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y

#
# CPU/Task time and stats accounting
#
CONFIG_TICK_CPU_ACCOUNTING=y
# CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set
# CONFIG_IRQ_TIME_ACCOUNTING is not set
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y

#
# RCU Subsystem
#
CONFIG_PREEMPT_RCU=y
CONFIG_SRCU=y
CONFIG_TASKS_RCU=y
CONFIG_RCU_STALL_COMMON=y
# CONFIG_RCU_USER_QS is not set
CONFIG_RCU_FANOUT=64
CONFIG_RCU_FANOUT_LEAF=16
# CONFIG_RCU_FANOUT_EXACT is not set
CONFIG_RCU_FAST_NO_HZ=y
CONFIG_TREE_RCU_TRACE=y
# CONFIG_RCU_BOOST is not set
CONFIG_RCU_KTHREAD_PRIO=0
CONFIG_RCU_NOCB_CPU=y
CONFIG_RCU_NOCB_CPU_NONE=y
# CONFIG_RCU_NOCB_CPU_ZERO is not set
# CONFIG_RCU_NOCB_CPU_ALL is not set
# CONFIG_RCU_EXPEDITE_BOOT is not set
CONFIG_BUILD_BIN2C=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=21
CONFIG_LOG_CPU_MAX_BUF_SHIFT=12
CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
CONFIG_ARCH_SUPPORTS_INT128=y
CONFIG_CGROUPS=y
# CONFIG_CGROUP_DEBUG is not set
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_PROC_PID_CPUSET=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_PAGE_COUNTER=y
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
# CONFIG_MEMCG_SWAP_ENABLED is not set
CONFIG_MEMCG_KMEM=y
# CONFIG_CGROUP_HUGETLB is not set
# CONFIG_CGROUP_PERF is not set
CONFIG_CGROUP_SCHED=y
CONFIG_FAIR_GROUP_SCHED=y
CONFIG_CFS_BANDWIDTH=y
# CONFIG_RT_GROUP_SCHED is not set
CONFIG_BLK_CGROUP=y
# CONFIG_DEBUG_BLK_CGROUP is not set
# CONFIG_CHECKPOINT_RESTORE is not set
CONFIG_NAMESPACES=y
CONFIG_UTS_NS=y
CONFIG_IPC_NS=y
CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_NET_NS=y
CONFIG_SCHED_AUTOGROUP=y
# CONFIG_SYSFS_DEPRECATED is not set
CONFIG_RELAY=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_INITRAMFS_SOURCE=""
CONFIG_RD_GZIP=y
CONFIG_RD_BZIP2=y
CONFIG_RD_LZMA=y
CONFIG_RD_XZ=y
CONFIG_RD_LZO=y
CONFIG_RD_LZ4=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
# CONFIG_LTO_MENU is not set
CONFIG_SYSCTL=y
CONFIG_ANON_INODES=y
CONFIG_SYSCTL_EXCEPTION_TRACE=y
CONFIG_HAVE_PCSPKR_PLATFORM=y
CONFIG_BPF=y
# CONFIG_EXPERT is not set
CONFIG_SGETMASK_SYSCALL=y
CONFIG_SYSFS_SYSCALL=y
# CONFIG_SYSCTL_SYSCALL is not set
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
CONFIG_PCSPKR_PLATFORM=y
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
CONFIG_SIGNALFD=y
CONFIG_TIMERFD=y
CONFIG_EVENTFD=y
# CONFIG_BPF_SYSCALL is not set
CONFIG_SHMEM=y
CONFIG_AIO=y
CONFIG_ADVISE_SYSCALLS=y
CONFIG_PCI_QUIRKS=y
# CONFIG_EMBEDDED is not set
CONFIG_HAVE_PERF_EVENTS=y

#
# Kernel Performance Events And Counters
#
CONFIG_PERF_EVENTS=y
# CONFIG_DEBUG_PERF_USE_VMALLOC is not set
CONFIG_VM_EVENT_COUNTERS=y
CONFIG_SLUB_DEBUG=y
# CONFIG_COMPAT_BRK is not set
# CONFIG_SLAB is not set
CONFIG_SLUB=y
CONFIG_SLUB_CPU_PARTIAL=y
CONFIG_SYSTEM_TRUSTED_KEYRING=y
# CONFIG_PROFILING is not set
CONFIG_HAVE_OPROFILE=y
CONFIG_OPROFILE_NMI_TIMER=y
# CONFIG_KPROBES is not set
CONFIG_JUMP_LABEL=y
# CONFIG_UPROBES is not set
# CONFIG_HAVE_64BIT_ALIGNED_ACCESS is not set
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
CONFIG_ARCH_USE_BUILTIN_BSWAP=y
CONFIG_HAVE_IOREMAP_PROT=y
CONFIG_HAVE_KPROBES=y
CONFIG_HAVE_KRETPROBES=y
CONFIG_HAVE_OPTPROBES=y
CONFIG_HAVE_KPROBES_ON_FTRACE=y
CONFIG_HAVE_ARCH_TRACEHOOK=y
CONFIG_HAVE_DMA_ATTRS=y
CONFIG_HAVE_DMA_CONTIGUOUS=y
CONFIG_GENERIC_SMP_IDLE_THREAD=y
CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y
CONFIG_HAVE_DMA_API_DEBUG=y
CONFIG_HAVE_HW_BREAKPOINT=y
CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y
CONFIG_HAVE_USER_RETURN_NOTIFIER=y
CONFIG_HAVE_PERF_EVENTS_NMI=y
CONFIG_HAVE_PERF_REGS=y
CONFIG_HAVE_PERF_USER_STACK_DUMP=y
CONFIG_HAVE_ARCH_JUMP_LABEL=y
CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y
CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y
CONFIG_HAVE_CMPXCHG_LOCAL=y
CONFIG_HAVE_CMPXCHG_DOUBLE=y
CONFIG_HAVE_ARCH_SECCOMP_FILTER=y
CONFIG_SECCOMP_FILTER=y
CONFIG_HAVE_CC_STACKPROTECTOR=y
# CONFIG_CC_STACKPROTECTOR is not set
CONFIG_CC_STACKPROTECTOR_NONE=y
# CONFIG_CC_STACKPROTECTOR_REGULAR is not set
# CONFIG_CC_STACKPROTECTOR_STRONG is not set
CONFIG_HAVE_CONTEXT_TRACKING=y
CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y
CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y
CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y
CONFIG_HAVE_ARCH_SOFT_DIRTY=y
CONFIG_MODULES_USE_ELF_RELA=y
CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y

#
# GCOV-based kernel profiling
#
# CONFIG_GCOV_KERNEL is not set
CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y
# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
CONFIG_SLABINFO=y
CONFIG_RT_MUTEXES=y
CONFIG_BASE_SMALL=0
CONFIG_MODULES=y
CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y
# CONFIG_MODULE_SRCVERSION_ALL is not set
# CONFIG_MODULE_SIG is not set
# CONFIG_MODULE_COMPRESS is not set
CONFIG_STOP_MACHINE=y
CONFIG_BLOCK=y
CONFIG_BLK_DEV_BSG=y
CONFIG_BLK_DEV_BSGLIB=y
# CONFIG_BLK_DEV_INTEGRITY is not set
CONFIG_BLK_DEV_THROTTLING=y
# CONFIG_BLK_CMDLINE_PARSER is not set

#
# Partition Types
#
# CONFIG_PARTITION_ADVANCED is not set
CONFIG_MSDOS_PARTITION=y
CONFIG_EFI_PARTITION=y

#
# IO Schedulers
#
CONFIG_IOSCHED_NOOP=y
CONFIG_IOSCHED_DEADLINE=y
CONFIG_IOSCHED_CFQ=y
CONFIG_CFQ_GROUP_IOSCHED=y
# CONFIG_DEFAULT_DEADLINE is not set
CONFIG_DEFAULT_CFQ=y
# CONFIG_DEFAULT_NOOP is not set
CONFIG_DEFAULT_IOSCHED="cfq"
CONFIG_ASN1=y
CONFIG_UNINLINE_SPIN_UNLOCK=y
CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y
CONFIG_MUTEX_SPIN_ON_OWNER=y
CONFIG_RWSEM_SPIN_ON_OWNER=y
CONFIG_LOCK_SPIN_ON_OWNER=y
CONFIG_ARCH_USE_QUEUE_RWLOCK=y
CONFIG_QUEUE_RWLOCK=y
CONFIG_FREEZER=y

#
# Processor type and features
#
CONFIG_ZONE_DMA=y
CONFIG_SMP=y
CONFIG_X86_FEATURE_NAMES=y
# CONFIG_X86_X2APIC is not set
CONFIG_X86_MPPARSE=y
# CONFIG_X86_EXTENDED_PLATFORM is not set
# CONFIG_X86_INTEL_LPSS is not set
# CONFIG_X86_AMD_PLATFORM_DEVICE is not set
# CONFIG_IOSF_MBI is not set
CONFIG_SCHED_OMIT_FRAME_POINTER=y
CONFIG_HYPERVISOR_GUEST=y
CONFIG_PARAVIRT=y
CONFIG_PARAVIRT_DEBUG=y
CONFIG_PARAVIRT_SPINLOCKS=y
# CONFIG_XEN is not set
CONFIG_KVM_GUEST=y
CONFIG_KVM_DEBUG_FS=y
CONFIG_PARAVIRT_TIME_ACCOUNTING=y
CONFIG_PARAVIRT_CLOCK=y
CONFIG_NO_BOOTMEM=y
# CONFIG_MEMTEST is not set
# CONFIG_MK8 is not set
# CONFIG_MPSC is not set
# CONFIG_MCORE2 is not set
# CONFIG_MATOM is not set
CONFIG_GENERIC_CPU=y
CONFIG_X86_INTERNODE_CACHE_SHIFT=6
CONFIG_X86_L1_CACHE_SHIFT=6
CONFIG_X86_TSC=y
CONFIG_X86_CMPXCHG64=y
CONFIG_X86_CMOV=y
CONFIG_X86_MINIMUM_CPU_FAMILY=64
CONFIG_X86_DEBUGCTLMSR=y
CONFIG_CPU_SUP_INTEL=y
CONFIG_CPU_SUP_AMD=y
CONFIG_CPU_SUP_CENTAUR=y
CONFIG_HPET_TIMER=y
CONFIG_HPET_EMULATE_RTC=y
CONFIG_DMI=y
CONFIG_GART_IOMMU=y
CONFIG_CALGARY_IOMMU=y
CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y
CONFIG_SWIOTLB=y
CONFIG_IOMMU_HELPER=y
# CONFIG_MAXSMP is not set
CONFIG_NR_CPUS=16
CONFIG_SCHED_SMT=y
CONFIG_SCHED_MC=y
# CONFIG_PREEMPT_NONE is not set
# CONFIG_PREEMPT_VOLUNTARY is not set
CONFIG_PREEMPT=y
CONFIG_PREEMPT_COUNT=y
CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
# CONFIG_X86_MCE is not set
CONFIG_X86_16BIT=y
CONFIG_X86_ESPFIX64=y
CONFIG_X86_VSYSCALL_EMULATION=y
# CONFIG_I8K is not set
# CONFIG_MICROCODE is not set
# CONFIG_X86_MSR is not set
# CONFIG_X86_CPUID is not set
CONFIG_ARCH_PHYS_ADDR_T_64BIT=y
CONFIG_ARCH_DMA_ADDR_T_64BIT=y
CONFIG_DIRECT_GBPAGES=y
# CONFIG_NUMA is not set
CONFIG_ARCH_SPARSEMEM_ENABLE=y
CONFIG_ARCH_SPARSEMEM_DEFAULT=y
CONFIG_ARCH_SELECT_MEMORY_MODEL=y
CONFIG_ARCH_PROC_KCORE_TEXT=y
CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_SPARSEMEM_MANUAL=y
CONFIG_SPARSEMEM=y
CONFIG_HAVE_MEMORY_PRESENT=y
CONFIG_SPARSEMEM_EXTREME=y
CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER=y
# CONFIG_SPARSEMEM_VMEMMAP is not set
CONFIG_HAVE_MEMBLOCK=y
CONFIG_HAVE_MEMBLOCK_NODE_MAP=y
CONFIG_ARCH_DISCARD_MEMBLOCK=y
# CONFIG_HAVE_BOOTMEM_INFO_NODE is not set
# CONFIG_MEMORY_HOTPLUG is not set
CONFIG_PAGEFLAGS_EXTENDED=y
CONFIG_SPLIT_PTLOCK_CPUS=4
CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y
CONFIG_MEMORY_BALLOON=y
# CONFIG_COMPACTION is not set
CONFIG_PHYS_ADDR_T_64BIT=y
CONFIG_ZONE_DMA_FLAG=1
# CONFIG_BOUNCE is not set
CONFIG_VIRT_TO_BUS=y
# CONFIG_KSM is not set
CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
# CONFIG_TRANSPARENT_HUGEPAGE is not set
# CONFIG_CLEANCACHE is not set
# CONFIG_FRONTSWAP is not set
# CONFIG_CMA is not set
# CONFIG_ZPOOL is not set
# CONFIG_ZBUD is not set
# CONFIG_ZSMALLOC is not set
CONFIG_GENERIC_EARLY_IOREMAP=y
CONFIG_X86_CHECK_BIOS_CORRUPTION=y
CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
CONFIG_X86_RESERVE_LOW=64
CONFIG_MTRR=y
# CONFIG_MTRR_SANITIZER is not set
CONFIG_X86_PAT=y
CONFIG_ARCH_USES_PG_UNCACHED=y
CONFIG_ARCH_RANDOM=y
CONFIG_X86_SMAP=y
# CONFIG_X86_INTEL_MPX is not set
# CONFIG_EFI is not set
CONFIG_SECCOMP=y
# CONFIG_HZ_100 is not set
# CONFIG_HZ_250 is not set
CONFIG_HZ_300=y
# CONFIG_HZ_1000 is not set
CONFIG_HZ=300
CONFIG_SCHED_HRTICK=y
CONFIG_KEXEC=y
# CONFIG_KEXEC_FILE is not set
# CONFIG_CRASH_DUMP is not set
CONFIG_PHYSICAL_START=0x1000000
CONFIG_RELOCATABLE=y
# CONFIG_RANDOMIZE_BASE is not set
CONFIG_PHYSICAL_ALIGN=0x1000000
CONFIG_HOTPLUG_CPU=y
# CONFIG_BOOTPARAM_HOTPLUG_CPU0 is not set
# CONFIG_DEBUG_HOTPLUG_CPU0 is not set
# CONFIG_CMDLINE_BOOL is not set
CONFIG_HAVE_LIVEPATCH=y
CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y

#
# Power management and ACPI options
#
CONFIG_SUSPEND=y
CONFIG_SUSPEND_FREEZER=y
# CONFIG_HIBERNATION is not set
CONFIG_PM_SLEEP=y
CONFIG_PM_SLEEP_SMP=y
CONFIG_PM_AUTOSLEEP=y
CONFIG_PM_WAKELOCKS=y
CONFIG_PM_WAKELOCKS_LIMIT=100
CONFIG_PM_WAKELOCKS_GC=y
CONFIG_PM=y
CONFIG_PM_DEBUG=y
CONFIG_PM_ADVANCED_DEBUG=y
# CONFIG_PM_TEST_SUSPEND is not set
CONFIG_PM_SLEEP_DEBUG=y
# CONFIG_DPM_WATCHDOG is not set
CONFIG_PM_TRACE=y
CONFIG_PM_TRACE_RTC=y
# CONFIG_WQ_POWER_EFFICIENT_DEFAULT is not set
CONFIG_ACPI=y
CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y
CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC=y
CONFIG_ACPI_SLEEP=y
# CONFIG_ACPI_PROCFS_POWER is not set
# CONFIG_ACPI_EC_DEBUGFS is not set
# CONFIG_ACPI_AC is not set
# CONFIG_ACPI_BATTERY is not set
CONFIG_ACPI_BUTTON=y
# CONFIG_ACPI_VIDEO is not set
# CONFIG_ACPI_FAN is not set
CONFIG_ACPI_DOCK=y
CONFIG_ACPI_PROCESSOR=y
CONFIG_ACPI_HOTPLUG_CPU=y
# CONFIG_ACPI_PROCESSOR_AGGREGATOR is not set
# CONFIG_ACPI_THERMAL is not set
# CONFIG_ACPI_CUSTOM_DSDT is not set
CONFIG_ACPI_INITRD_TABLE_OVERRIDE=y
# CONFIG_ACPI_DEBUG is not set
CONFIG_ACPI_PCI_SLOT=y
CONFIG_X86_PM_TIMER=y
CONFIG_ACPI_CONTAINER=y
CONFIG_ACPI_HOTPLUG_IOAPIC=y
# CONFIG_ACPI_SBS is not set
CONFIG_ACPI_HED=y
# CONFIG_ACPI_CUSTOM_METHOD is not set
# CONFIG_ACPI_REDUCED_HARDWARE_ONLY is not set
CONFIG_HAVE_ACPI_APEI=y
CONFIG_HAVE_ACPI_APEI_NMI=y
CONFIG_ACPI_APEI=y
CONFIG_ACPI_APEI_GHES=y
CONFIG_ACPI_APEI_PCIEAER=y
# CONFIG_ACPI_APEI_EINJ is not set
# CONFIG_ACPI_APEI_ERST_DEBUG is not set
# CONFIG_PMIC_OPREGION is not set
CONFIG_SFI=y

#
# CPU Frequency scaling
#
CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_GOV_COMMON=y
# CONFIG_CPU_FREQ_STAT is not set
# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set
# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
# CONFIG_CPU_FREQ_GOV_USERSPACE is not set
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set

#
# CPU frequency scaling drivers
#
CONFIG_X86_INTEL_PSTATE=y
# CONFIG_X86_PCC_CPUFREQ is not set
# CONFIG_X86_ACPI_CPUFREQ is not set
# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
# CONFIG_X86_P4_CLOCKMOD is not set

#
# shared options
#
# CONFIG_X86_SPEEDSTEP_LIB is not set

#
# CPU Idle
#
CONFIG_CPU_IDLE=y
CONFIG_CPU_IDLE_GOV_LADDER=y
CONFIG_CPU_IDLE_GOV_MENU=y
# CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED is not set
CONFIG_INTEL_IDLE=y

#
# Memory power savings
#
# CONFIG_I7300_IDLE is not set

#
# Bus options (PCI etc.)
#
CONFIG_PCI=y
CONFIG_PCI_DIRECT=y
CONFIG_PCI_MMCONFIG=y
CONFIG_PCI_DOMAINS=y
CONFIG_PCIEPORTBUS=y
CONFIG_PCIEAER=y
# CONFIG_PCIE_ECRC is not set
# CONFIG_PCIEAER_INJECT is not set
CONFIG_PCIEASPM=y
# CONFIG_PCIEASPM_DEBUG is not set
CONFIG_PCIEASPM_DEFAULT=y
# CONFIG_PCIEASPM_POWERSAVE is not set
# CONFIG_PCIEASPM_PERFORMANCE is not set
CONFIG_PCIE_PME=y
CONFIG_PCI_MSI=y
# CONFIG_PCI_DEBUG is not set
CONFIG_PCI_REALLOC_ENABLE_AUTO=y
# CONFIG_PCI_STUB is not set
CONFIG_HT_IRQ=y
CONFIG_PCI_ATS=y
CONFIG_PCI_IOV=y
CONFIG_PCI_PRI=y
CONFIG_PCI_PASID=y
CONFIG_PCI_LABEL=y

#
# PCI host controller drivers
#
CONFIG_ISA_DMA_API=y
CONFIG_AMD_NB=y
# CONFIG_PCCARD is not set
# CONFIG_HOTPLUG_PCI is not set
# CONFIG_RAPIDIO is not set
# CONFIG_X86_SYSFB is not set

#
# Executable file formats / Emulations
#
CONFIG_BINFMT_ELF=y
CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_SCRIPT=y
# CONFIG_HAVE_AOUT is not set
# CONFIG_BINFMT_MISC is not set
CONFIG_COREDUMP=y
# CONFIG_IA32_EMULATION is not set
CONFIG_X86_DEV_DMA_OPS=y
CONFIG_PMC_ATOM=y
CONFIG_NET=y

#
# Networking options
#
CONFIG_PACKET=y
# CONFIG_PACKET_DIAG is not set
CONFIG_UNIX=y
# CONFIG_UNIX_DIAG is not set
# CONFIG_XFRM_USER is not set
# CONFIG_NET_KEY is not set
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
# CONFIG_IP_ADVANCED_ROUTER is not set
# CONFIG_IP_PNP is not set
# CONFIG_NET_IPIP is not set
# CONFIG_NET_IPGRE_DEMUX is not set
# CONFIG_NET_IP_TUNNEL is not set
# CONFIG_IP_MROUTE is not set
# CONFIG_SYN_COOKIES is not set
# CONFIG_NET_UDP_TUNNEL is not set
# CONFIG_NET_FOU is not set
# CONFIG_GENEVE is not set
# CONFIG_INET_AH is not set
# CONFIG_INET_ESP is not set
# CONFIG_INET_IPCOMP is not set
# CONFIG_INET_XFRM_TUNNEL is not set
# CONFIG_INET_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
# CONFIG_TCP_CONG_ADVANCED is not set
CONFIG_TCP_CONG_CUBIC=y
CONFIG_DEFAULT_TCP_CONG="cubic"
# CONFIG_TCP_MD5SIG is not set
CONFIG_IPV6=y
CONFIG_IPV6_ROUTER_PREF=y
CONFIG_IPV6_ROUTE_INFO=y
CONFIG_IPV6_OPTIMISTIC_DAD=y
# CONFIG_INET6_AH is not set
# CONFIG_INET6_ESP is not set
# CONFIG_INET6_IPCOMP is not set
# CONFIG_IPV6_MIP6 is not set
# CONFIG_INET6_XFRM_TUNNEL is not set
# CONFIG_INET6_TUNNEL is not set
# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET6_XFRM_MODE_TUNNEL is not set
# CONFIG_INET6_XFRM_MODE_BEET is not set
# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
# CONFIG_IPV6_SIT is not set
# CONFIG_IPV6_TUNNEL is not set
# CONFIG_IPV6_GRE is not set
# CONFIG_IPV6_MULTIPLE_TABLES is not set
# CONFIG_IPV6_MROUTE is not set
# CONFIG_NETWORK_SECMARK is not set
CONFIG_NET_PTP_CLASSIFY=y
# CONFIG_NETWORK_PHY_TIMESTAMPING is not set
# CONFIG_NETFILTER is not set
# CONFIG_IP_DCCP is not set
# CONFIG_IP_SCTP is not set
# CONFIG_RDS is not set
# CONFIG_TIPC is not set
# CONFIG_ATM is not set
# CONFIG_L2TP is not set
# CONFIG_BRIDGE is not set
CONFIG_HAVE_NET_DSA=y
# CONFIG_VLAN_8021Q is not set
# CONFIG_DECNET is not set
# CONFIG_LLC2 is not set
# CONFIG_IPX is not set
# CONFIG_ATALK is not set
# CONFIG_X25 is not set
# CONFIG_LAPB is not set
# CONFIG_PHONET is not set
# CONFIG_6LOWPAN is not set
# CONFIG_IEEE802154 is not set
# CONFIG_NET_SCHED is not set
# CONFIG_DCB is not set
# CONFIG_DNS_RESOLVER is not set
# CONFIG_BATMAN_ADV is not set
# CONFIG_OPENVSWITCH is not set
# CONFIG_VSOCKETS is not set
# CONFIG_NETLINK_MMAP is not set
# CONFIG_NETLINK_DIAG is not set
# CONFIG_NET_MPLS_GSO is not set
# CONFIG_HSR is not set
# CONFIG_NET_SWITCHDEV is not set
CONFIG_RPS=y
CONFIG_RFS_ACCEL=y
CONFIG_XPS=y
# CONFIG_CGROUP_NET_PRIO is not set
# CONFIG_CGROUP_NET_CLASSID is not set
CONFIG_NET_RX_BUSY_POLL=y
CONFIG_BQL=y
# CONFIG_BPF_JIT is not set
CONFIG_NET_FLOW_LIMIT=y

#
# Network testing
#
# CONFIG_NET_PKTGEN is not set
# CONFIG_HAMRADIO is not set
# CONFIG_CAN is not set
# CONFIG_IRDA is not set
# CONFIG_BT is not set
# CONFIG_AF_RXRPC is not set
# CONFIG_WIRELESS is not set
# CONFIG_WIMAX is not set
# CONFIG_RFKILL is not set
# CONFIG_RFKILL_REGULATOR is not set
# CONFIG_NET_9P is not set
# CONFIG_CAIF is not set
# CONFIG_CEPH_LIB is not set
# CONFIG_NFC is not set
CONFIG_HAVE_BPF_JIT=y

#
# Device Drivers
#

#
# Generic Driver Options
#
CONFIG_UEVENT_HELPER=y
CONFIG_UEVENT_HELPER_PATH=""
CONFIG_DEVTMPFS=y
# CONFIG_DEVTMPFS_MOUNT is not set
CONFIG_STANDALONE=y
CONFIG_PREVENT_FIRMWARE_BUILD=y
CONFIG_FW_LOADER=y
CONFIG_FIRMWARE_IN_KERNEL=y
CONFIG_EXTRA_FIRMWARE=""
# CONFIG_FW_LOADER_USER_HELPER_FALLBACK is not set
CONFIG_ALLOW_DEV_COREDUMP=y
# CONFIG_DEBUG_DRIVER is not set
# CONFIG_DEBUG_DEVRES is not set
# CONFIG_SYS_HYPERVISOR is not set
# CONFIG_GENERIC_CPU_DEVICES is not set
CONFIG_GENERIC_CPU_AUTOPROBE=y
CONFIG_REGMAP=y
CONFIG_REGMAP_I2C=y
CONFIG_REGMAP_IRQ=y
CONFIG_DMA_SHARED_BUFFER=y
# CONFIG_FENCE_TRACE is not set

#
# Bus devices
#
CONFIG_CONNECTOR=y
CONFIG_PROC_EVENTS=y
# CONFIG_MTD is not set
CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y
# CONFIG_PARPORT is not set
CONFIG_PNP=y
# CONFIG_PNP_DEBUG_MESSAGES is not set

#
# Protocols
#
CONFIG_PNPACPI=y
CONFIG_BLK_DEV=y
CONFIG_BLK_DEV_NULL_BLK=y
# CONFIG_BLK_DEV_FD is not set
# CONFIG_BLK_DEV_PCIESSD_MTIP32XX is not set
# CONFIG_BLK_CPQ_CISS_DA is not set
# CONFIG_BLK_DEV_DAC960 is not set
# CONFIG_BLK_DEV_UMEM is not set
# CONFIG_BLK_DEV_COW_COMMON is not set
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_LOOP_MIN_COUNT=8
# CONFIG_BLK_DEV_CRYPTOLOOP is not set
# CONFIG_BLK_DEV_DRBD is not set
CONFIG_BLK_DEV_NBD=y
# CONFIG_BLK_DEV_NVME is not set
# CONFIG_BLK_DEV_SKD is not set
# CONFIG_BLK_DEV_SX8 is not set
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=16
CONFIG_BLK_DEV_RAM_SIZE=4096
# CONFIG_CDROM_PKTCDVD is not set
# CONFIG_ATA_OVER_ETH is not set
CONFIG_VIRTIO_BLK=y
# CONFIG_BLK_DEV_HD is not set
# CONFIG_BLK_DEV_RBD is not set
# CONFIG_BLK_DEV_RSXX is not set

#
# Misc devices
#
# CONFIG_SENSORS_LIS3LV02D is not set
# CONFIG_AD525X_DPOT is not set
# CONFIG_DUMMY_IRQ is not set
# CONFIG_IBM_ASM is not set
# CONFIG_PHANTOM is not set
# CONFIG_SGI_IOC4 is not set
# CONFIG_TIFM_CORE is not set
# CONFIG_ICS932S401 is not set
# CONFIG_ENCLOSURE_SERVICES is not set
# CONFIG_HP_ILO is not set
# CONFIG_APDS9802ALS is not set
# CONFIG_ISL29003 is not set
# CONFIG_ISL29020 is not set
# CONFIG_SENSORS_TSL2550 is not set
# CONFIG_SENSORS_BH1780 is not set
# CONFIG_SENSORS_BH1770 is not set
# CONFIG_SENSORS_APDS990X is not set
# CONFIG_HMC6352 is not set
# CONFIG_DS1682 is not set
# CONFIG_VMWARE_BALLOON is not set
# CONFIG_BMP085_I2C is not set
# CONFIG_USB_SWITCH_FSA9480 is not set
# CONFIG_SRAM is not set
# CONFIG_C2PORT is not set

#
# EEPROM support
#
# CONFIG_EEPROM_AT24 is not set
# CONFIG_EEPROM_LEGACY is not set
# CONFIG_EEPROM_MAX6875 is not set
# CONFIG_EEPROM_93CX6 is not set
# CONFIG_CB710_CORE is not set

#
# Texas Instruments shared transport line discipline
#
# CONFIG_SENSORS_LIS3_I2C is not set

#
# Altera FPGA firmware download module
#
# CONFIG_ALTERA_STAPL is not set
# CONFIG_INTEL_MEI is not set
# CONFIG_INTEL_MEI_ME is not set
# CONFIG_INTEL_MEI_TXE is not set
# CONFIG_VMWARE_VMCI is not set

#
# Intel MIC Bus Driver
#
# CONFIG_INTEL_MIC_BUS is not set

#
# Intel MIC Host Driver
#

#
# Intel MIC Card Driver
#
# CONFIG_GENWQE is not set
# CONFIG_ECHO is not set
# CONFIG_CXL_BASE is not set
CONFIG_HAVE_IDE=y
# CONFIG_IDE is not set

#
# SCSI device support
#
CONFIG_SCSI_MOD=y
# CONFIG_RAID_ATTRS is not set
CONFIG_SCSI=y
CONFIG_SCSI_DMA=y
# CONFIG_SCSI_NETLINK is not set
# CONFIG_SCSI_MQ_DEFAULT is not set
CONFIG_SCSI_PROC_FS=y

#
# SCSI support type (disk, tape, CD-ROM)
#
CONFIG_BLK_DEV_SD=y
# CONFIG_CHR_DEV_ST is not set
# CONFIG_CHR_DEV_OSST is not set
# CONFIG_BLK_DEV_SR is not set
# CONFIG_CHR_DEV_SG is not set
# CONFIG_CHR_DEV_SCH is not set
# CONFIG_SCSI_CONSTANTS is not set
# CONFIG_SCSI_LOGGING is not set
CONFIG_SCSI_SCAN_ASYNC=y

#
# SCSI Transports
#
# CONFIG_SCSI_SPI_ATTRS is not set
# CONFIG_SCSI_FC_ATTRS is not set
# CONFIG_SCSI_ISCSI_ATTRS is not set
# CONFIG_SCSI_SAS_ATTRS is not set
# CONFIG_SCSI_SAS_LIBSAS is not set
# CONFIG_SCSI_SRP_ATTRS is not set
CONFIG_SCSI_LOWLEVEL=y
# CONFIG_ISCSI_TCP is not set
# CONFIG_ISCSI_BOOT_SYSFS is not set
# CONFIG_SCSI_CXGB3_ISCSI is not set
# CONFIG_SCSI_CXGB4_ISCSI is not set
# CONFIG_SCSI_BNX2_ISCSI is not set
# CONFIG_BE2ISCSI is not set
# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
# CONFIG_SCSI_HPSA is not set
# CONFIG_SCSI_3W_9XXX is not set
# CONFIG_SCSI_3W_SAS is not set
# CONFIG_SCSI_ACARD is not set
# CONFIG_SCSI_AACRAID is not set
# CONFIG_SCSI_AIC7XXX is not set
# CONFIG_SCSI_AIC79XX is not set
# CONFIG_SCSI_AIC94XX is not set
# CONFIG_SCSI_MVSAS is not set
# CONFIG_SCSI_MVUMI is not set
# CONFIG_SCSI_DPT_I2O is not set
# CONFIG_SCSI_ADVANSYS is not set
# CONFIG_SCSI_ARCMSR is not set
# CONFIG_SCSI_ESAS2R is not set
CONFIG_MEGARAID_NEWGEN=y
# CONFIG_MEGARAID_MM is not set
# CONFIG_MEGARAID_LEGACY is not set
# CONFIG_MEGARAID_SAS is not set
# CONFIG_SCSI_MPT2SAS is not set
# CONFIG_SCSI_MPT3SAS is not set
# CONFIG_SCSI_UFSHCD is not set
# CONFIG_SCSI_HPTIOP is not set
# CONFIG_SCSI_BUSLOGIC is not set
# CONFIG_VMWARE_PVSCSI is not set
# CONFIG_SCSI_DMX3191D is not set
# CONFIG_SCSI_EATA is not set
# CONFIG_SCSI_FUTURE_DOMAIN is not set
# CONFIG_SCSI_GDTH is not set
# CONFIG_SCSI_ISCI is not set
# CONFIG_SCSI_IPS is not set
# CONFIG_SCSI_INITIO is not set
# CONFIG_SCSI_INIA100 is not set
# CONFIG_SCSI_STEX is not set
# CONFIG_SCSI_SYM53C8XX_2 is not set
# CONFIG_SCSI_IPR is not set
# CONFIG_SCSI_QLOGIC_1280 is not set
# CONFIG_SCSI_QLA_ISCSI is not set
# CONFIG_SCSI_DC395x is not set
# CONFIG_SCSI_AM53C974 is not set
# CONFIG_SCSI_WD719X is not set
# CONFIG_SCSI_DEBUG is not set
# CONFIG_SCSI_PMCRAID is not set
# CONFIG_SCSI_PM8001 is not set
# CONFIG_SCSI_VIRTIO is not set
# CONFIG_SCSI_DH is not set
# CONFIG_SCSI_OSD_INITIATOR is not set
CONFIG_ATA=y
# CONFIG_ATA_NONSTANDARD is not set
CONFIG_ATA_VERBOSE_ERROR=y
CONFIG_ATA_ACPI=y
CONFIG_SATA_ZPODD=y
CONFIG_SATA_PMP=y

#
# Controllers with non-SFF native interface
#
# CONFIG_SATA_AHCI is not set
# CONFIG_SATA_AHCI_PLATFORM is not set
# CONFIG_SATA_INIC162X is not set
# CONFIG_SATA_ACARD_AHCI is not set
# CONFIG_SATA_SIL24 is not set
CONFIG_ATA_SFF=y

#
# SFF controllers with custom DMA interface
#
# CONFIG_PDC_ADMA is not set
# CONFIG_SATA_QSTOR is not set
# CONFIG_SATA_SX4 is not set
CONFIG_ATA_BMDMA=y

#
# SATA SFF controllers with BMDMA
#
CONFIG_ATA_PIIX=y
# CONFIG_SATA_MV is not set
# CONFIG_SATA_NV is not set
# CONFIG_SATA_PROMISE is not set
# CONFIG_SATA_SIL is not set
# CONFIG_SATA_SIS is not set
# CONFIG_SATA_SVW is not set
# CONFIG_SATA_ULI is not set
# CONFIG_SATA_VIA is not set
# CONFIG_SATA_VITESSE is not set

#
# PATA SFF controllers with BMDMA
#
# CONFIG_PATA_ALI is not set
# CONFIG_PATA_AMD is not set
# CONFIG_PATA_ARTOP is not set
# CONFIG_PATA_ATIIXP is not set
# CONFIG_PATA_ATP867X is not set
# CONFIG_PATA_CMD64X is not set
# CONFIG_PATA_CYPRESS is not set
# CONFIG_PATA_EFAR is not set
# CONFIG_PATA_HPT366 is not set
# CONFIG_PATA_HPT37X is not set
# CONFIG_PATA_HPT3X2N is not set
# CONFIG_PATA_HPT3X3 is not set
# CONFIG_PATA_IT8213 is not set
# CONFIG_PATA_IT821X is not set
# CONFIG_PATA_JMICRON is not set
# CONFIG_PATA_MARVELL is not set
# CONFIG_PATA_NETCELL is not set
# CONFIG_PATA_NINJA32 is not set
# CONFIG_PATA_NS87415 is not set
# CONFIG_PATA_OLDPIIX is not set
# CONFIG_PATA_OPTIDMA is not set
# CONFIG_PATA_PDC2027X is not set
# CONFIG_PATA_PDC_OLD is not set
# CONFIG_PATA_RADISYS is not set
# CONFIG_PATA_RDC is not set
# CONFIG_PATA_SCH is not set
# CONFIG_PATA_SERVERWORKS is not set
# CONFIG_PATA_SIL680 is not set
# CONFIG_PATA_SIS is not set
# CONFIG_PATA_TOSHIBA is not set
# CONFIG_PATA_TRIFLEX is not set
# CONFIG_PATA_VIA is not set
# CONFIG_PATA_WINBOND is not set

#
# PIO-only SFF controllers
#
# CONFIG_PATA_CMD640_PCI is not set
# CONFIG_PATA_MPIIX is not set
# CONFIG_PATA_NS87410 is not set
# CONFIG_PATA_OPTI is not set
# CONFIG_PATA_RZ1000 is not set

#
# Generic fallback / legacy drivers
#
CONFIG_PATA_ACPI=y
CONFIG_ATA_GENERIC=y
# CONFIG_PATA_LEGACY is not set
CONFIG_MD=y
# CONFIG_BLK_DEV_MD is not set
# CONFIG_BCACHE is not set
# CONFIG_BLK_DEV_DM is not set
# CONFIG_TARGET_CORE is not set
# CONFIG_FUSION is not set

#
# IEEE 1394 (FireWire) support
#
# CONFIG_FIREWIRE is not set
# CONFIG_FIREWIRE_NOSY is not set
# CONFIG_MACINTOSH_DRIVERS is not set
CONFIG_NETDEVICES=y
CONFIG_NET_CORE=y
# CONFIG_BONDING is not set
# CONFIG_DUMMY is not set
# CONFIG_EQUALIZER is not set
# CONFIG_NET_FC is not set
# CONFIG_NET_TEAM is not set
# CONFIG_MACVLAN is not set
# CONFIG_IPVLAN is not set
# CONFIG_VXLAN is not set
# CONFIG_NETCONSOLE is not set
# CONFIG_NETPOLL is not set
# CONFIG_NET_POLL_CONTROLLER is not set
# CONFIG_TUN is not set
# CONFIG_VETH is not set
# CONFIG_VIRTIO_NET is not set
# CONFIG_NLMON is not set
# CONFIG_ARCNET is not set

#
# CAIF transport drivers
#
CONFIG_VHOST_NET=m
CONFIG_VHOST_RING=m
CONFIG_VHOST=m

#
# Distributed Switch Architecture drivers
#
# CONFIG_NET_DSA_MV88E6XXX is not set
# CONFIG_NET_DSA_MV88E6060 is not set
# CONFIG_NET_DSA_MV88E6XXX_NEED_PPU is not set
# CONFIG_NET_DSA_MV88E6131 is not set
# CONFIG_NET_DSA_MV88E6123_61_65 is not set
# CONFIG_NET_DSA_MV88E6171 is not set
# CONFIG_NET_DSA_MV88E6352 is not set
# CONFIG_NET_DSA_BCM_SF2 is not set
CONFIG_ETHERNET=y
# CONFIG_NET_VENDOR_3COM is not set
# CONFIG_NET_VENDOR_ADAPTEC is not set
# CONFIG_NET_VENDOR_AGERE is not set
# CONFIG_NET_VENDOR_ALTEON is not set
# CONFIG_ALTERA_TSE is not set
# CONFIG_NET_VENDOR_AMD is not set
# CONFIG_NET_XGENE is not set
# CONFIG_NET_VENDOR_ARC is not set
# CONFIG_NET_VENDOR_ATHEROS is not set
# CONFIG_NET_VENDOR_BROADCOM is not set
# CONFIG_NET_VENDOR_BROCADE is not set
# CONFIG_NET_VENDOR_CHELSIO is not set
# CONFIG_NET_VENDOR_CISCO is not set
# CONFIG_CX_ECAT is not set
# CONFIG_DNET is not set
# CONFIG_NET_VENDOR_DEC is not set
# CONFIG_NET_VENDOR_DLINK is not set
# CONFIG_NET_VENDOR_EMULEX is not set
# CONFIG_NET_VENDOR_EXAR is not set
# CONFIG_NET_VENDOR_HP is not set
CONFIG_NET_VENDOR_INTEL=y
# CONFIG_E100 is not set
CONFIG_E1000=y
# CONFIG_E1000E is not set
# CONFIG_IGB is not set
# CONFIG_IGBVF is not set
# CONFIG_IXGB is not set
# CONFIG_IXGBE is not set
# CONFIG_IXGBEVF is not set
# CONFIG_I40E is not set
# CONFIG_I40EVF is not set
# CONFIG_FM10K is not set
# CONFIG_NET_VENDOR_I825XX is not set
# CONFIG_IP1000 is not set
# CONFIG_JME is not set
# CONFIG_NET_VENDOR_MARVELL is not set
# CONFIG_NET_VENDOR_MELLANOX is not set
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MYRI is not set
# CONFIG_FEALNX is not set
# CONFIG_NET_VENDOR_NATSEMI is not set
# CONFIG_NET_VENDOR_NVIDIA is not set
# CONFIG_NET_VENDOR_OKI is not set
# CONFIG_ETHOC is not set
# CONFIG_NET_PACKET_ENGINE is not set
# CONFIG_NET_VENDOR_QLOGIC is not set
# CONFIG_NET_VENDOR_QUALCOMM is not set
# CONFIG_NET_VENDOR_REALTEK is not set
# CONFIG_NET_VENDOR_RDC is not set
# CONFIG_NET_VENDOR_ROCKER is not set
# CONFIG_NET_VENDOR_SAMSUNG is not set
# CONFIG_NET_VENDOR_SEEQ is not set
# CONFIG_NET_VENDOR_SILAN is not set
# CONFIG_NET_VENDOR_SIS is not set
# CONFIG_SFC is not set
# CONFIG_NET_VENDOR_SMSC is not set
# CONFIG_NET_VENDOR_STMICRO is not set
# CONFIG_NET_VENDOR_SUN is not set
# CONFIG_NET_VENDOR_TEHUTI is not set
# CONFIG_NET_VENDOR_TI is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
# CONFIG_FDDI is not set
# CONFIG_HIPPI is not set
# CONFIG_NET_SB1000 is not set
# CONFIG_PHYLIB is not set
# CONFIG_PPP is not set
# CONFIG_SLIP is not set

#
# Host-side USB support is needed for USB Network Adapter support
#
# CONFIG_WLAN is not set

#
# Enable WiMAX (Networking options) to see the WiMAX drivers
#
# CONFIG_WAN is not set
# CONFIG_VMXNET3 is not set
# CONFIG_ISDN is not set

#
# Input device support
#
CONFIG_INPUT=y
# CONFIG_INPUT_FF_MEMLESS is not set
# CONFIG_INPUT_POLLDEV is not set
# CONFIG_INPUT_SPARSEKMAP is not set
# CONFIG_INPUT_MATRIXKMAP is not set

#
# Userland interfaces
#
# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_JOYDEV is not set
CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_EVBUG is not set

#
# Input Device Drivers
#
CONFIG_INPUT_KEYBOARD=y
# CONFIG_KEYBOARD_ADP5588 is not set
# CONFIG_KEYBOARD_ADP5589 is not set
CONFIG_KEYBOARD_ATKBD=y
# CONFIG_KEYBOARD_QT1070 is not set
# CONFIG_KEYBOARD_QT2160 is not set
# CONFIG_KEYBOARD_LKKBD is not set
# CONFIG_KEYBOARD_TCA6416 is not set
# CONFIG_KEYBOARD_TCA8418 is not set
# CONFIG_KEYBOARD_LM8323 is not set
# CONFIG_KEYBOARD_LM8333 is not set
# CONFIG_KEYBOARD_MAX7359 is not set
# CONFIG_KEYBOARD_MCS is not set
# CONFIG_KEYBOARD_MPR121 is not set
# CONFIG_KEYBOARD_NEWTON is not set
# CONFIG_KEYBOARD_OPENCORES is not set
# CONFIG_KEYBOARD_STOWAWAY is not set
# CONFIG_KEYBOARD_SUNKBD is not set
# CONFIG_KEYBOARD_XTKBD is not set
CONFIG_INPUT_LEDS=y
# CONFIG_INPUT_MOUSE is not set
# CONFIG_INPUT_JOYSTICK is not set
# CONFIG_INPUT_TABLET is not set
# CONFIG_INPUT_TOUCHSCREEN is not set
# CONFIG_INPUT_MISC is not set

#
# Hardware I/O ports
#
CONFIG_SERIO=y
CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y
CONFIG_SERIO_I8042=y
# CONFIG_SERIO_SERPORT is not set
# CONFIG_SERIO_CT82C710 is not set
# CONFIG_SERIO_PCIPS2 is not set
CONFIG_SERIO_LIBPS2=y
CONFIG_SERIO_RAW=y
# CONFIG_SERIO_ALTERA_PS2 is not set
# CONFIG_SERIO_PS2MULT is not set
# CONFIG_SERIO_ARC_PS2 is not set
# CONFIG_GAMEPORT is not set

#
# Character devices
#
CONFIG_TTY=y
CONFIG_VT=y
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_VT_CONSOLE=y
CONFIG_VT_CONSOLE_SLEEP=y
CONFIG_HW_CONSOLE=y
CONFIG_VT_HW_CONSOLE_BINDING=y
CONFIG_UNIX98_PTYS=y
CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_NONSTANDARD=y
# CONFIG_ROCKETPORT is not set
# CONFIG_CYCLADES is not set
# CONFIG_MOXA_INTELLIO is not set
# CONFIG_MOXA_SMARTIO is not set
# CONFIG_SYNCLINK is not set
# CONFIG_SYNCLINKMP is not set
# CONFIG_SYNCLINK_GT is not set
# CONFIG_NOZOMI is not set
# CONFIG_ISI is not set
# CONFIG_N_HDLC is not set
# CONFIG_N_GSM is not set
# CONFIG_TRACE_SINK is not set
CONFIG_DEVMEM=y
# CONFIG_DEVKMEM is not set

#
# Serial drivers
#
CONFIG_SERIAL_EARLYCON=y
CONFIG_SERIAL_8250=y
# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
CONFIG_SERIAL_8250_PNP=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_PCI=y
CONFIG_SERIAL_8250_NR_UARTS=32
CONFIG_SERIAL_8250_RUNTIME_UARTS=4
# CONFIG_SERIAL_8250_EXTENDED is not set
# CONFIG_SERIAL_8250_DW is not set
# CONFIG_SERIAL_8250_FINTEK is not set

#
# Non-8250 serial port support
#
# CONFIG_SERIAL_MFD_HSU is not set
CONFIG_SERIAL_CORE=y
CONFIG_SERIAL_CORE_CONSOLE=y
# CONFIG_SERIAL_JSM is not set
# CONFIG_SERIAL_SCCNXP is not set
# CONFIG_SERIAL_SC16IS7XX is not set
# CONFIG_SERIAL_ALTERA_JTAGUART is not set
# CONFIG_SERIAL_ALTERA_UART is not set
# CONFIG_SERIAL_ARC is not set
# CONFIG_SERIAL_RP2 is not set
# CONFIG_SERIAL_FSL_LPUART is not set
CONFIG_HVC_DRIVER=y
CONFIG_VIRTIO_CONSOLE=m
# CONFIG_IPMI_HANDLER is not set
# CONFIG_HW_RANDOM is not set
# CONFIG_NVRAM is not set
# CONFIG_R3964 is not set
# CONFIG_APPLICOM is not set
# CONFIG_MWAVE is not set
# CONFIG_RAW_DRIVER is not set
CONFIG_HPET=y
CONFIG_HPET_MMAP=y
CONFIG_HPET_MMAP_DEFAULT=y
# CONFIG_HANGCHECK_TIMER is not set
# CONFIG_TCG_TPM is not set
# CONFIG_TELCLOCK is not set
CONFIG_DEVPORT=y
# CONFIG_XILLYBUS is not set

#
# I2C support
#
CONFIG_I2C=y
CONFIG_ACPI_I2C_OPREGION=y
CONFIG_I2C_BOARDINFO=y
CONFIG_I2C_COMPAT=y
# CONFIG_I2C_CHARDEV is not set
# CONFIG_I2C_MUX is not set
CONFIG_I2C_HELPER_AUTO=y
CONFIG_I2C_ALGOBIT=y

#
# I2C Hardware Bus support
#

#
# PC SMBus host controller drivers
#
# CONFIG_I2C_ALI1535 is not set
# CONFIG_I2C_ALI1563 is not set
# CONFIG_I2C_ALI15X3 is not set
# CONFIG_I2C_AMD756 is not set
# CONFIG_I2C_AMD8111 is not set
# CONFIG_I2C_I801 is not set
# CONFIG_I2C_ISCH is not set
# CONFIG_I2C_ISMT is not set
CONFIG_I2C_PIIX4=y
# CONFIG_I2C_NFORCE2 is not set
# CONFIG_I2C_SIS5595 is not set
# CONFIG_I2C_SIS630 is not set
# CONFIG_I2C_SIS96X is not set
# CONFIG_I2C_VIA is not set
# CONFIG_I2C_VIAPRO is not set

#
# ACPI drivers
#
# CONFIG_I2C_SCMI is not set

#
# I2C system bus drivers (mostly embedded / system-on-chip)
#
# CONFIG_I2C_DESIGNWARE_PCI is not set
# CONFIG_I2C_OCORES is not set
# CONFIG_I2C_PCA_PLATFORM is not set
# CONFIG_I2C_PXA_PCI is not set
# CONFIG_I2C_SIMTEC is not set
# CONFIG_I2C_XILINX is not set

#
# External I2C/SMBus adapter drivers
#
# CONFIG_I2C_PARPORT_LIGHT is not set
# CONFIG_I2C_TAOS_EVM is not set

#
# Other I2C/SMBus bus drivers
#
# CONFIG_I2C_STUB is not set
# CONFIG_I2C_SLAVE is not set
# CONFIG_I2C_DEBUG_CORE is not set
# CONFIG_I2C_DEBUG_ALGO is not set
# CONFIG_I2C_DEBUG_BUS is not set
# CONFIG_SPI is not set
# CONFIG_SPMI is not set
# CONFIG_HSI is not set

#
# PPS support
#
CONFIG_PPS=m
# CONFIG_PPS_DEBUG is not set

#
# PPS clients support
#
# CONFIG_PPS_CLIENT_KTIMER is not set
# CONFIG_PPS_CLIENT_LDISC is not set
# CONFIG_PPS_CLIENT_GPIO is not set

#
# PPS generators support
#

#
# PTP clock support
#
CONFIG_PTP_1588_CLOCK=m

#
# Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks.
#
CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
# CONFIG_GPIOLIB is not set
# CONFIG_W1 is not set
CONFIG_POWER_SUPPLY=y
# CONFIG_POWER_SUPPLY_DEBUG is not set
# CONFIG_PDA_POWER is not set
# CONFIG_TEST_POWER is not set
# CONFIG_BATTERY_DS2780 is not set
# CONFIG_BATTERY_DS2781 is not set
# CONFIG_BATTERY_DS2782 is not set
# CONFIG_BATTERY_SBS is not set
# CONFIG_BATTERY_BQ27x00 is not set
# CONFIG_BATTERY_MAX17040 is not set
# CONFIG_BATTERY_MAX17042 is not set
# CONFIG_CHARGER_MAX8903 is not set
# CONFIG_CHARGER_LP8727 is not set
# CONFIG_CHARGER_MANAGER is not set
# CONFIG_CHARGER_BQ2415X is not set
# CONFIG_CHARGER_SMB347 is not set
# CONFIG_BATTERY_GAUGE_LTC2941 is not set
CONFIG_POWER_RESET=y
# CONFIG_POWER_RESET_RESTART is not set
CONFIG_POWER_AVS=y
CONFIG_HWMON=y
# CONFIG_HWMON_VID is not set
# CONFIG_HWMON_DEBUG_CHIP is not set

#
# Native drivers
#
# CONFIG_SENSORS_ABITUGURU is not set
# CONFIG_SENSORS_ABITUGURU3 is not set
# CONFIG_SENSORS_AD7414 is not set
# CONFIG_SENSORS_AD7418 is not set
# CONFIG_SENSORS_ADM1021 is not set
# CONFIG_SENSORS_ADM1025 is not set
# CONFIG_SENSORS_ADM1026 is not set
# CONFIG_SENSORS_ADM1029 is not set
# CONFIG_SENSORS_ADM1031 is not set
# CONFIG_SENSORS_ADM9240 is not set
# CONFIG_SENSORS_ADT7410 is not set
# CONFIG_SENSORS_ADT7411 is not set
# CONFIG_SENSORS_ADT7462 is not set
# CONFIG_SENSORS_ADT7470 is not set
# CONFIG_SENSORS_ADT7475 is not set
# CONFIG_SENSORS_ASC7621 is not set
# CONFIG_SENSORS_K8TEMP is not set
# CONFIG_SENSORS_K10TEMP is not set
# CONFIG_SENSORS_FAM15H_POWER is not set
# CONFIG_SENSORS_APPLESMC is not set
# CONFIG_SENSORS_ASB100 is not set
# CONFIG_SENSORS_ATXP1 is not set
# CONFIG_SENSORS_DS620 is not set
# CONFIG_SENSORS_DS1621 is not set
# CONFIG_SENSORS_I5K_AMB is not set
# CONFIG_SENSORS_F71805F is not set
# CONFIG_SENSORS_F71882FG is not set
# CONFIG_SENSORS_F75375S is not set
# CONFIG_SENSORS_FSCHMD is not set
# CONFIG_SENSORS_GL518SM is not set
# CONFIG_SENSORS_GL520SM is not set
# CONFIG_SENSORS_G760A is not set
# CONFIG_SENSORS_G762 is not set
# CONFIG_SENSORS_HIH6130 is not set
# CONFIG_SENSORS_I5500 is not set
# CONFIG_SENSORS_CORETEMP is not set
# CONFIG_SENSORS_IT87 is not set
# CONFIG_SENSORS_JC42 is not set
# CONFIG_SENSORS_POWR1220 is not set
# CONFIG_SENSORS_LINEAGE is not set
# CONFIG_SENSORS_LTC2945 is not set
# CONFIG_SENSORS_LTC4151 is not set
# CONFIG_SENSORS_LTC4215 is not set
# CONFIG_SENSORS_LTC4222 is not set
# CONFIG_SENSORS_LTC4245 is not set
# CONFIG_SENSORS_LTC4260 is not set
# CONFIG_SENSORS_LTC4261 is not set
# CONFIG_SENSORS_MAX16065 is not set
# CONFIG_SENSORS_MAX1619 is not set
# CONFIG_SENSORS_MAX1668 is not set
# CONFIG_SENSORS_MAX197 is not set
# CONFIG_SENSORS_MAX6639 is not set
# CONFIG_SENSORS_MAX6642 is not set
# CONFIG_SENSORS_MAX6650 is not set
# CONFIG_SENSORS_MAX6697 is not set
# CONFIG_SENSORS_HTU21 is not set
# CONFIG_SENSORS_MCP3021 is not set
# CONFIG_SENSORS_LM63 is not set
# CONFIG_SENSORS_LM73 is not set
# CONFIG_SENSORS_LM75 is not set
# CONFIG_SENSORS_LM77 is not set
# CONFIG_SENSORS_LM78 is not set
# CONFIG_SENSORS_LM80 is not set
# CONFIG_SENSORS_LM83 is not set
# CONFIG_SENSORS_LM85 is not set
# CONFIG_SENSORS_LM87 is not set
# CONFIG_SENSORS_LM90 is not set
# CONFIG_SENSORS_LM92 is not set
# CONFIG_SENSORS_LM93 is not set
# CONFIG_SENSORS_LM95234 is not set
# CONFIG_SENSORS_LM95241 is not set
# CONFIG_SENSORS_LM95245 is not set
# CONFIG_SENSORS_PC87360 is not set
# CONFIG_SENSORS_PC87427 is not set
# CONFIG_SENSORS_NTC_THERMISTOR is not set
# CONFIG_SENSORS_NCT6683 is not set
# CONFIG_SENSORS_NCT6775 is not set
# CONFIG_SENSORS_NCT7802 is not set
# CONFIG_SENSORS_PCF8591 is not set
# CONFIG_PMBUS is not set
# CONFIG_SENSORS_SHT21 is not set
# CONFIG_SENSORS_SHTC1 is not set
# CONFIG_SENSORS_SIS5595 is not set
# CONFIG_SENSORS_DME1737 is not set
# CONFIG_SENSORS_EMC1403 is not set
# CONFIG_SENSORS_EMC2103 is not set
# CONFIG_SENSORS_EMC6W201 is not set
# CONFIG_SENSORS_SMSC47M1 is not set
# CONFIG_SENSORS_SMSC47M192 is not set
# CONFIG_SENSORS_SMSC47B397 is not set
# CONFIG_SENSORS_SCH56XX_COMMON is not set
# CONFIG_SENSORS_SCH5627 is not set
# CONFIG_SENSORS_SCH5636 is not set
# CONFIG_SENSORS_SMM665 is not set
# CONFIG_SENSORS_ADC128D818 is not set
# CONFIG_SENSORS_ADS1015 is not set
# CONFIG_SENSORS_ADS7828 is not set
# CONFIG_SENSORS_AMC6821 is not set
# CONFIG_SENSORS_INA209 is not set
# CONFIG_SENSORS_INA2XX is not set
# CONFIG_SENSORS_THMC50 is not set
# CONFIG_SENSORS_TMP102 is not set
# CONFIG_SENSORS_TMP103 is not set
# CONFIG_SENSORS_TMP401 is not set
# CONFIG_SENSORS_TMP421 is not set
# CONFIG_SENSORS_VIA_CPUTEMP is not set
# CONFIG_SENSORS_VIA686A is not set
# CONFIG_SENSORS_VT1211 is not set
# CONFIG_SENSORS_VT8231 is not set
# CONFIG_SENSORS_W83781D is not set
# CONFIG_SENSORS_W83791D is not set
# CONFIG_SENSORS_W83792D is not set
# CONFIG_SENSORS_W83793 is not set
# CONFIG_SENSORS_W83795 is not set
# CONFIG_SENSORS_W83L785TS is not set
# CONFIG_SENSORS_W83L786NG is not set
# CONFIG_SENSORS_W83627HF is not set
# CONFIG_SENSORS_W83627EHF is not set

#
# ACPI drivers
#
# CONFIG_SENSORS_ACPI_POWER is not set
# CONFIG_SENSORS_ATK0110 is not set
CONFIG_THERMAL=y
CONFIG_THERMAL_HWMON=y
CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y
# CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set
# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set
CONFIG_THERMAL_GOV_FAIR_SHARE=y
CONFIG_THERMAL_GOV_STEP_WISE=y
# CONFIG_THERMAL_GOV_BANG_BANG is not set
CONFIG_THERMAL_GOV_USER_SPACE=y
# CONFIG_THERMAL_EMULATION is not set
# CONFIG_INTEL_POWERCLAMP is not set
# CONFIG_INT340X_THERMAL is not set

#
# Texas Instruments thermal drivers
#
CONFIG_WATCHDOG=y
CONFIG_WATCHDOG_CORE=y
# CONFIG_WATCHDOG_NOWAYOUT is not set

#
# Watchdog Device Drivers
#
# CONFIG_SOFT_WATCHDOG is not set
# CONFIG_XILINX_WATCHDOG is not set
# CONFIG_DW_WATCHDOG is not set
# CONFIG_ACQUIRE_WDT is not set
# CONFIG_ADVANTECH_WDT is not set
# CONFIG_ALIM1535_WDT is not set
# CONFIG_ALIM7101_WDT is not set
# CONFIG_F71808E_WDT is not set
# CONFIG_SP5100_TCO is not set
# CONFIG_SBC_FITPC2_WATCHDOG is not set
# CONFIG_EUROTECH_WDT is not set
# CONFIG_IB700_WDT is not set
# CONFIG_IBMASR is not set
# CONFIG_WAFER_WDT is not set
# CONFIG_I6300ESB_WDT is not set
# CONFIG_IE6XX_WDT is not set
# CONFIG_ITCO_WDT is not set
# CONFIG_IT8712F_WDT is not set
# CONFIG_IT87_WDT is not set
# CONFIG_HP_WATCHDOG is not set
# CONFIG_SC1200_WDT is not set
# CONFIG_PC87413_WDT is not set
# CONFIG_NV_TCO is not set
# CONFIG_60XX_WDT is not set
# CONFIG_CPU5_WDT is not set
# CONFIG_SMSC_SCH311X_WDT is not set
# CONFIG_SMSC37B787_WDT is not set
# CONFIG_VIA_WDT is not set
# CONFIG_W83627HF_WDT is not set
# CONFIG_W83877F_WDT is not set
# CONFIG_W83977F_WDT is not set
# CONFIG_MACHZ_WDT is not set
# CONFIG_SBC_EPX_C3_WATCHDOG is not set

#
# PCI-based Watchdog Cards
#
# CONFIG_PCIPCWATCHDOG is not set
# CONFIG_WDTPCI is not set
CONFIG_SSB_POSSIBLE=y

#
# Sonics Silicon Backplane
#
# CONFIG_SSB is not set
CONFIG_BCMA_POSSIBLE=y

#
# Broadcom specific AMBA
#
# CONFIG_BCMA is not set

#
# Multifunction device drivers
#
CONFIG_MFD_CORE=y
# CONFIG_MFD_AS3711 is not set
# CONFIG_PMIC_ADP5520 is not set
# CONFIG_MFD_BCM590XX is not set
# CONFIG_MFD_AXP20X is not set
# CONFIG_MFD_CROS_EC is not set
# CONFIG_PMIC_DA903X is not set
# CONFIG_MFD_DA9052_I2C is not set
# CONFIG_MFD_DA9055 is not set
# CONFIG_MFD_DA9063 is not set
# CONFIG_MFD_DA9150 is not set
# CONFIG_MFD_MC13XXX_I2C is not set
# CONFIG_HTC_PASIC3 is not set
# CONFIG_LPC_ICH is not set
# CONFIG_LPC_SCH is not set
# CONFIG_INTEL_SOC_PMIC is not set
# CONFIG_MFD_JANZ_CMODIO is not set
# CONFIG_MFD_KEMPLD is not set
# CONFIG_MFD_88PM800 is not set
# CONFIG_MFD_88PM805 is not set
# CONFIG_MFD_88PM860X is not set
# CONFIG_MFD_MAX14577 is not set
# CONFIG_MFD_MAX77693 is not set
# CONFIG_MFD_MAX8907 is not set
# CONFIG_MFD_MAX8925 is not set
# CONFIG_MFD_MAX8997 is not set
# CONFIG_MFD_MAX8998 is not set
# CONFIG_MFD_MENF21BMC is not set
# CONFIG_MFD_RETU is not set
# CONFIG_MFD_PCF50633 is not set
# CONFIG_MFD_RDC321X is not set
# CONFIG_MFD_RTSX_PCI is not set
# CONFIG_MFD_RT5033 is not set
# CONFIG_MFD_RC5T583 is not set
# CONFIG_MFD_RN5T618 is not set
CONFIG_MFD_SEC_CORE=y
# CONFIG_MFD_SI476X_CORE is not set
# CONFIG_MFD_SM501 is not set
# CONFIG_MFD_SMSC is not set
CONFIG_ABX500_CORE=y
# CONFIG_AB3100_CORE is not set
# CONFIG_MFD_SYSCON is not set
# CONFIG_MFD_TI_AM335X_TSCADC is not set
# CONFIG_MFD_LP3943 is not set
# CONFIG_MFD_LP8788 is not set
# CONFIG_MFD_PALMAS is not set
# CONFIG_TPS6105X is not set
# CONFIG_TPS6507X is not set
# CONFIG_MFD_TPS65090 is not set
# CONFIG_MFD_TPS65217 is not set
# CONFIG_MFD_TPS65218 is not set
# CONFIG_MFD_TPS6586X is not set
# CONFIG_MFD_TPS80031 is not set
# CONFIG_TWL4030_CORE is not set
# CONFIG_TWL6040_CORE is not set
# CONFIG_MFD_WL1273_CORE is not set
# CONFIG_MFD_LM3533 is not set
# CONFIG_MFD_TC3589X is not set
# CONFIG_MFD_TMIO is not set
# CONFIG_MFD_VX855 is not set
# CONFIG_MFD_ARIZONA_I2C is not set
# CONFIG_MFD_WM8400 is not set
# CONFIG_MFD_WM831X_I2C is not set
# CONFIG_MFD_WM8350_I2C is not set
# CONFIG_MFD_WM8994 is not set
CONFIG_REGULATOR=y
# CONFIG_REGULATOR_DEBUG is not set
# CONFIG_REGULATOR_FIXED_VOLTAGE is not set
# CONFIG_REGULATOR_VIRTUAL_CONSUMER is not set
# CONFIG_REGULATOR_USERSPACE_CONSUMER is not set
# CONFIG_REGULATOR_ACT8865 is not set
# CONFIG_REGULATOR_AD5398 is not set
# CONFIG_REGULATOR_DA9210 is not set
# CONFIG_REGULATOR_DA9211 is not set
# CONFIG_REGULATOR_FAN53555 is not set
# CONFIG_REGULATOR_ISL9305 is not set
# CONFIG_REGULATOR_ISL6271A is not set
# CONFIG_REGULATOR_LP3971 is not set
# CONFIG_REGULATOR_LP3972 is not set
# CONFIG_REGULATOR_LP872X is not set
# CONFIG_REGULATOR_LP8755 is not set
# CONFIG_REGULATOR_LTC3589 is not set
# CONFIG_REGULATOR_MAX1586 is not set
# CONFIG_REGULATOR_MAX8649 is not set
# CONFIG_REGULATOR_MAX8660 is not set
# CONFIG_REGULATOR_MAX8952 is not set
# CONFIG_REGULATOR_MAX8973 is not set
# CONFIG_REGULATOR_PFUZE100 is not set
# CONFIG_REGULATOR_S2MPA01 is not set
# CONFIG_REGULATOR_S2MPS11 is not set
# CONFIG_REGULATOR_S5M8767 is not set
# CONFIG_REGULATOR_TPS51632 is not set
# CONFIG_REGULATOR_TPS62360 is not set
# CONFIG_REGULATOR_TPS65023 is not set
# CONFIG_REGULATOR_TPS6507X is not set
# CONFIG_MEDIA_SUPPORT is not set

#
# Graphics support
#
CONFIG_AGP=y
CONFIG_AGP_AMD64=y
CONFIG_AGP_INTEL=y
# CONFIG_AGP_SIS is not set
# CONFIG_AGP_VIA is not set
CONFIG_INTEL_GTT=y
CONFIG_VGA_ARB=y
CONFIG_VGA_ARB_MAX_GPUS=16
CONFIG_VGA_SWITCHEROO=y

#
# Direct Rendering Manager
#
CONFIG_DRM=y
CONFIG_DRM_KMS_HELPER=y
CONFIG_DRM_KMS_FB_HELPER=y
CONFIG_DRM_LOAD_EDID_FIRMWARE=y
CONFIG_DRM_TTM=y

#
# I2C encoder or helper chips
#
# CONFIG_DRM_I2C_ADV7511 is not set
# CONFIG_DRM_I2C_CH7006 is not set
# CONFIG_DRM_I2C_SIL164 is not set
# CONFIG_DRM_I2C_NXP_TDA998X is not set
# CONFIG_DRM_TDFX is not set
# CONFIG_DRM_R128 is not set
# CONFIG_DRM_RADEON is not set
# CONFIG_DRM_NOUVEAU is not set
# CONFIG_DRM_I915 is not set
# CONFIG_DRM_MGA is not set
# CONFIG_DRM_SIS is not set
# CONFIG_DRM_VIA is not set
# CONFIG_DRM_SAVAGE is not set
# CONFIG_DRM_VMWGFX is not set
# CONFIG_DRM_GMA500 is not set
# CONFIG_DRM_UDL is not set
# CONFIG_DRM_AST is not set
# CONFIG_DRM_MGAG200 is not set
# CONFIG_DRM_CIRRUS_QEMU is not set
# CONFIG_DRM_QXL is not set
CONFIG_DRM_BOCHS=y

#
# Frame buffer Devices
#
CONFIG_FB=y
CONFIG_FIRMWARE_EDID=y
CONFIG_FB_CMDLINE=y
# CONFIG_FB_DDC is not set
CONFIG_FB_BOOT_VESA_SUPPORT=y
CONFIG_FB_CFB_FILLRECT=y
CONFIG_FB_CFB_COPYAREA=y
CONFIG_FB_CFB_IMAGEBLIT=y
# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
CONFIG_FB_SYS_FILLRECT=y
CONFIG_FB_SYS_COPYAREA=y
CONFIG_FB_SYS_IMAGEBLIT=y
# CONFIG_FB_FOREIGN_ENDIAN is not set
# CONFIG_FB_SYS_FOPS is not set
# CONFIG_FB_SVGALIB is not set
# CONFIG_FB_MACMODES is not set
# CONFIG_FB_BACKLIGHT is not set
CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y

#
# Frame buffer hardware drivers
#
# CONFIG_FB_CIRRUS is not set
# CONFIG_FB_PM2 is not set
# CONFIG_FB_CYBER2000 is not set
# CONFIG_FB_ARC is not set
# CONFIG_FB_ASILIANT is not set
# CONFIG_FB_IMSTT is not set
# CONFIG_FB_VGA16 is not set
# CONFIG_FB_UVESA is not set
CONFIG_FB_VESA=y
# CONFIG_FB_N411 is not set
# CONFIG_FB_HGA is not set
# CONFIG_FB_OPENCORES is not set
# CONFIG_FB_S1D13XXX is not set
# CONFIG_FB_NVIDIA is not set
# CONFIG_FB_RIVA is not set
# CONFIG_FB_I740 is not set
# CONFIG_FB_LE80578 is not set
# CONFIG_FB_MATROX is not set
# CONFIG_FB_RADEON is not set
# CONFIG_FB_ATY128 is not set
# CONFIG_FB_ATY is not set
# CONFIG_FB_S3 is not set
# CONFIG_FB_SAVAGE is not set
# CONFIG_FB_SIS is not set
# CONFIG_FB_NEOMAGIC is not set
# CONFIG_FB_KYRO is not set
# CONFIG_FB_3DFX is not set
# CONFIG_FB_VOODOO1 is not set
# CONFIG_FB_VT8623 is not set
# CONFIG_FB_TRIDENT is not set
# CONFIG_FB_ARK is not set
# CONFIG_FB_PM3 is not set
# CONFIG_FB_CARMINE is not set
# CONFIG_FB_VIRTUAL is not set
# CONFIG_FB_METRONOME is not set
# CONFIG_FB_MB862XX is not set
# CONFIG_FB_BROADSHEET is not set
# CONFIG_FB_AUO_K190X is not set
# CONFIG_FB_SIMPLE is not set
CONFIG_BACKLIGHT_LCD_SUPPORT=y
# CONFIG_LCD_CLASS_DEVICE is not set
CONFIG_BACKLIGHT_CLASS_DEVICE=y
# CONFIG_BACKLIGHT_GENERIC is not set
# CONFIG_BACKLIGHT_APPLE is not set
# CONFIG_BACKLIGHT_SAHARA is not set
# CONFIG_BACKLIGHT_ADP8860 is not set
# CONFIG_BACKLIGHT_ADP8870 is not set
# CONFIG_BACKLIGHT_LM3639 is not set
# CONFIG_BACKLIGHT_LV5207LP is not set
# CONFIG_BACKLIGHT_BD6107 is not set
# CONFIG_VGASTATE is not set
CONFIG_HDMI=y

#
# Console display driver support
#
CONFIG_VGA_CONSOLE=y
# CONFIG_VGACON_SOFT_SCROLLBACK is not set
CONFIG_DUMMY_CONSOLE=y
CONFIG_DUMMY_CONSOLE_COLUMNS=80
CONFIG_DUMMY_CONSOLE_ROWS=25
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
# CONFIG_LOGO is not set
# CONFIG_SOUND is not set

#
# HID support
#
# CONFIG_HID is not set

#
# I2C HID support
#
# CONFIG_I2C_HID is not set
CONFIG_USB_OHCI_LITTLE_ENDIAN=y
CONFIG_USB_SUPPORT=y
CONFIG_USB_ARCH_HAS_HCD=y
# CONFIG_USB is not set

#
# USB port drivers
#

#
# USB Physical Layer drivers
#
# CONFIG_USB_PHY is not set
# CONFIG_NOP_USB_XCEIV is not set
# CONFIG_USB_GADGET is not set
# CONFIG_UWB is not set
# CONFIG_MMC is not set
# CONFIG_MEMSTICK is not set
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=y
# CONFIG_LEDS_CLASS_FLASH is not set

#
# LED drivers
#
# CONFIG_LEDS_LM3530 is not set
# CONFIG_LEDS_LM3642 is not set
# CONFIG_LEDS_PCA9532 is not set
# CONFIG_LEDS_LP3944 is not set
# CONFIG_LEDS_LP5521 is not set
# CONFIG_LEDS_LP5523 is not set
# CONFIG_LEDS_LP5562 is not set
# CONFIG_LEDS_LP8501 is not set
# CONFIG_LEDS_LP8860 is not set
# CONFIG_LEDS_CLEVO_MAIL is not set
# CONFIG_LEDS_PCA955X is not set
# CONFIG_LEDS_PCA963X is not set
# CONFIG_LEDS_REGULATOR is not set
# CONFIG_LEDS_BD2802 is not set
# CONFIG_LEDS_INTEL_SS4200 is not set
# CONFIG_LEDS_TCA6507 is not set
# CONFIG_LEDS_LM355x is not set

#
# LED driver for blink(1) USB RGB LED is under Special HID drivers (HID_THINGM)
#
# CONFIG_LEDS_BLINKM is not set

#
# LED Triggers
#
CONFIG_LEDS_TRIGGERS=y
# CONFIG_LEDS_TRIGGER_TIMER is not set
# CONFIG_LEDS_TRIGGER_ONESHOT is not set
# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
# CONFIG_LEDS_TRIGGER_CPU is not set
# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set

#
# iptables trigger is under Netfilter config (LED target)
#
# CONFIG_LEDS_TRIGGER_TRANSIENT is not set
# CONFIG_LEDS_TRIGGER_CAMERA is not set
# CONFIG_ACCESSIBILITY is not set
# CONFIG_INFINIBAND is not set
CONFIG_EDAC=y
CONFIG_EDAC_LEGACY_SYSFS=y
# CONFIG_EDAC_DEBUG is not set
# CONFIG_EDAC_MM_EDAC is not set
CONFIG_RTC_LIB=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_HCTOSYS=y
CONFIG_RTC_SYSTOHC=y
CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
# CONFIG_RTC_DEBUG is not set

#
# RTC interfaces
#
CONFIG_RTC_INTF_SYSFS=y
CONFIG_RTC_INTF_PROC=y
CONFIG_RTC_INTF_DEV=y
CONFIG_RTC_INTF_DEV_UIE_EMUL=y
# CONFIG_RTC_DRV_TEST is not set

#
# I2C RTC drivers
#
# CONFIG_RTC_DRV_ABB5ZES3 is not set
# CONFIG_RTC_DRV_DS1307 is not set
# CONFIG_RTC_DRV_DS1374 is not set
# CONFIG_RTC_DRV_DS1672 is not set
# CONFIG_RTC_DRV_DS3232 is not set
# CONFIG_RTC_DRV_MAX6900 is not set
# CONFIG_RTC_DRV_RS5C372 is not set
# CONFIG_RTC_DRV_ISL1208 is not set
# CONFIG_RTC_DRV_ISL12022 is not set
# CONFIG_RTC_DRV_ISL12057 is not set
# CONFIG_RTC_DRV_X1205 is not set
# CONFIG_RTC_DRV_PCF2127 is not set
# CONFIG_RTC_DRV_PCF8523 is not set
# CONFIG_RTC_DRV_PCF8563 is not set
# CONFIG_RTC_DRV_PCF85063 is not set
# CONFIG_RTC_DRV_PCF8583 is not set
# CONFIG_RTC_DRV_M41T80 is not set
# CONFIG_RTC_DRV_BQ32K is not set
# CONFIG_RTC_DRV_S35390A is not set
# CONFIG_RTC_DRV_FM3130 is not set
# CONFIG_RTC_DRV_RX8581 is not set
# CONFIG_RTC_DRV_RX8025 is not set
# CONFIG_RTC_DRV_EM3027 is not set
# CONFIG_RTC_DRV_RV3029C2 is not set
# CONFIG_RTC_DRV_S5M is not set

#
# SPI RTC drivers
#

#
# Platform RTC drivers
#
CONFIG_RTC_DRV_CMOS=y
# CONFIG_RTC_DRV_DS1286 is not set
# CONFIG_RTC_DRV_DS1511 is not set
# CONFIG_RTC_DRV_DS1553 is not set
# CONFIG_RTC_DRV_DS1685_FAMILY is not set
# CONFIG_RTC_DRV_DS1742 is not set
# CONFIG_RTC_DRV_DS2404 is not set
# CONFIG_RTC_DRV_STK17TA8 is not set
# CONFIG_RTC_DRV_M48T86 is not set
# CONFIG_RTC_DRV_M48T35 is not set
# CONFIG_RTC_DRV_M48T59 is not set
# CONFIG_RTC_DRV_MSM6242 is not set
# CONFIG_RTC_DRV_BQ4802 is not set
# CONFIG_RTC_DRV_RP5C01 is not set
# CONFIG_RTC_DRV_V3020 is not set

#
# on-CPU RTC drivers
#
# CONFIG_RTC_DRV_XGENE is not set

#
# HID Sensor RTC drivers
#
# CONFIG_DMADEVICES is not set
# CONFIG_AUXDISPLAY is not set
# CONFIG_UIO is not set
# CONFIG_VFIO is not set
# CONFIG_VIRT_DRIVERS is not set
CONFIG_VIRTIO=y

#
# Virtio drivers
#
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_PCI_LEGACY=y
CONFIG_VIRTIO_BALLOON=y
CONFIG_VIRTIO_MMIO=y
CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y

#
# Microsoft Hyper-V guest support
#
# CONFIG_HYPERV is not set
# CONFIG_STAGING is not set
# CONFIG_X86_PLATFORM_DEVICES is not set
# CONFIG_CHROME_PLATFORMS is not set

#
# Hardware Spinlock drivers
#

#
# Clock Source drivers
#
CONFIG_CLKEVT_I8253=y
CONFIG_I8253_LOCK=y
CONFIG_CLKBLD_I8253=y
# CONFIG_ATMEL_PIT is not set
# CONFIG_SH_TIMER_CMT is not set
# CONFIG_SH_TIMER_MTU2 is not set
# CONFIG_SH_TIMER_TMU is not set
# CONFIG_EM_TIMER_STI is not set
# CONFIG_ASM9260_TIMER is not set
# CONFIG_MAILBOX is not set
CONFIG_IOMMU_API=y
CONFIG_IOMMU_SUPPORT=y

#
# Generic IOMMU Pagetable Support
#
# CONFIG_IOMMU_IO_PGTABLE_LPAE is not set
CONFIG_IOMMU_IOVA=y
CONFIG_AMD_IOMMU=y
# CONFIG_AMD_IOMMU_STATS is not set
# CONFIG_AMD_IOMMU_V2 is not set
CONFIG_DMAR_TABLE=y
CONFIG_INTEL_IOMMU=y
# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
CONFIG_INTEL_IOMMU_FLOPPY_WA=y
CONFIG_IRQ_REMAP=y

#
# Remoteproc drivers
#
# CONFIG_STE_MODEM_RPROC is not set

#
# Rpmsg drivers
#

#
# SOC (System On Chip) specific Drivers
#
# CONFIG_SOC_TI is not set
# CONFIG_PM_DEVFREQ is not set
# CONFIG_EXTCON is not set
CONFIG_MEMORY=y
# CONFIG_IIO is not set
# CONFIG_NTB is not set
# CONFIG_VME_BUS is not set
# CONFIG_PWM is not set
# CONFIG_IPACK_BUS is not set
# CONFIG_RESET_CONTROLLER is not set
# CONFIG_FMC is not set

#
# PHY Subsystem
#
# CONFIG_GENERIC_PHY is not set
# CONFIG_BCM_KONA_USB2_PHY is not set
# CONFIG_POWERCAP is not set
# CONFIG_MCB is not set
CONFIG_RAS=y
# CONFIG_THUNDERBOLT is not set

#
# Android
#
# CONFIG_ANDROID is not set

#
# Firmware Drivers
#
# CONFIG_EDD is not set
CONFIG_FIRMWARE_MEMMAP=y
# CONFIG_DELL_RBU is not set
# CONFIG_DCDBAS is not set
# CONFIG_DMIID is not set
# CONFIG_DMI_SYSFS is not set
CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y
# CONFIG_ISCSI_IBFT_FIND is not set
# CONFIG_GOOGLE_FIRMWARE is not set
CONFIG_UEFI_CPER=y

#
# File systems
#
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_EXT2_FS is not set
# CONFIG_EXT3_FS is not set
CONFIG_EXT4_FS=y
# CONFIG_EXT4_USE_FOR_EXT23 is not set
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
# CONFIG_EXT4_DEBUG is not set
CONFIG_JBD2=y
# CONFIG_JBD2_DEBUG is not set
CONFIG_FS_MBCACHE=y
# CONFIG_REISERFS_FS is not set
# CONFIG_JFS_FS is not set
# CONFIG_XFS_FS is not set
# CONFIG_GFS2_FS is not set
# CONFIG_OCFS2_FS is not set
# CONFIG_BTRFS_FS is not set
# CONFIG_NILFS2_FS is not set
# CONFIG_FS_DAX is not set
CONFIG_FS_POSIX_ACL=y
CONFIG_EXPORTFS=y
CONFIG_FILE_LOCKING=y
CONFIG_FSNOTIFY=y
CONFIG_DNOTIFY=y
CONFIG_INOTIFY_USER=y
CONFIG_FANOTIFY=y
CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
# CONFIG_QUOTA_DEBUG is not set
# CONFIG_QFMT_V1 is not set
# CONFIG_QFMT_V2 is not set
CONFIG_QUOTACTL=y
CONFIG_AUTOFS4_FS=y
# CONFIG_FUSE_FS is not set
# CONFIG_OVERLAY_FS is not set

#
# Caches
#
# CONFIG_FSCACHE is not set

#
# CD-ROM/DVD Filesystems
#
# CONFIG_ISO9660_FS is not set
# CONFIG_UDF_FS is not set

#
# DOS/FAT/NT Filesystems
#
# CONFIG_MSDOS_FS is not set
# CONFIG_VFAT_FS is not set
# CONFIG_NTFS_FS is not set

#
# Pseudo filesystems
#
CONFIG_PROC_FS=y
CONFIG_PROC_KCORE=y
CONFIG_PROC_SYSCTL=y
CONFIG_PROC_PAGE_MONITOR=y
CONFIG_KERNFS=y
CONFIG_SYSFS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_TMPFS_XATTR=y
CONFIG_HUGETLBFS=y
CONFIG_HUGETLB_PAGE=y
CONFIG_CONFIGFS_FS=y
CONFIG_MISC_FILESYSTEMS=y
# CONFIG_ADFS_FS is not set
# CONFIG_AFFS_FS is not set
# CONFIG_ECRYPT_FS is not set
# CONFIG_HFS_FS is not set
# CONFIG_HFSPLUS_FS is not set
# CONFIG_BEFS_FS is not set
# CONFIG_BFS_FS is not set
# CONFIG_EFS_FS is not set
# CONFIG_LOGFS is not set
# CONFIG_CRAMFS is not set
# CONFIG_SQUASHFS is not set
# CONFIG_VXFS_FS is not set
# CONFIG_MINIX_FS is not set
# CONFIG_OMFS_FS is not set
# CONFIG_HPFS_FS is not set
# CONFIG_QNX4FS_FS is not set
# CONFIG_QNX6FS_FS is not set
# CONFIG_ROMFS_FS is not set
CONFIG_PSTORE=y
# CONFIG_PSTORE_CONSOLE is not set
# CONFIG_PSTORE_PMSG is not set
# CONFIG_PSTORE_RAM is not set
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set
# CONFIG_F2FS_FS is not set
CONFIG_NETWORK_FILESYSTEMS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V2=y
CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_NFS_SWAP=y
CONFIG_NFS_V4_1=y
CONFIG_NFS_V4_2=y
CONFIG_PNFS_FILE_LAYOUT=y
CONFIG_PNFS_FLEXFILE_LAYOUT=m
CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org"
CONFIG_NFS_V4_1_MIGRATION=y
CONFIG_NFS_USE_LEGACY_DNS=y
CONFIG_NFSD=y
CONFIG_NFSD_V2_ACL=y
CONFIG_NFSD_V3=y
CONFIG_NFSD_V3_ACL=y
CONFIG_NFSD_V4=y
# CONFIG_NFSD_PNFS is not set
CONFIG_NFSD_FAULT_INJECTION=y
CONFIG_GRACE_PERIOD=y
CONFIG_LOCKD=y
CONFIG_LOCKD_V4=y
CONFIG_NFS_ACL_SUPPORT=y
CONFIG_NFS_COMMON=y
CONFIG_SUNRPC=y
CONFIG_SUNRPC_GSS=y
CONFIG_SUNRPC_BACKCHANNEL=y
CONFIG_SUNRPC_SWAP=y
# CONFIG_SUNRPC_DEBUG is not set
# CONFIG_CEPH_FS is not set
# CONFIG_CIFS is not set
# CONFIG_NCP_FS is not set
# CONFIG_CODA_FS is not set
# CONFIG_AFS_FS is not set
CONFIG_NLS=y
CONFIG_NLS_DEFAULT="utf8"
# CONFIG_NLS_CODEPAGE_437 is not set
# CONFIG_NLS_CODEPAGE_737 is not set
# CONFIG_NLS_CODEPAGE_775 is not set
# CONFIG_NLS_CODEPAGE_850 is not set
# CONFIG_NLS_CODEPAGE_852 is not set
# CONFIG_NLS_CODEPAGE_855 is not set
# CONFIG_NLS_CODEPAGE_857 is not set
# CONFIG_NLS_CODEPAGE_860 is not set
# CONFIG_NLS_CODEPAGE_861 is not set
# CONFIG_NLS_CODEPAGE_862 is not set
# CONFIG_NLS_CODEPAGE_863 is not set
# CONFIG_NLS_CODEPAGE_864 is not set
# CONFIG_NLS_CODEPAGE_865 is not set
# CONFIG_NLS_CODEPAGE_866 is not set
# CONFIG_NLS_CODEPAGE_869 is not set
# CONFIG_NLS_CODEPAGE_936 is not set
# CONFIG_NLS_CODEPAGE_950 is not set
# CONFIG_NLS_CODEPAGE_932 is not set
# CONFIG_NLS_CODEPAGE_949 is not set
# CONFIG_NLS_CODEPAGE_874 is not set
# CONFIG_NLS_ISO8859_8 is not set
# CONFIG_NLS_CODEPAGE_1250 is not set
# CONFIG_NLS_CODEPAGE_1251 is not set
# CONFIG_NLS_ASCII is not set
CONFIG_NLS_ISO8859_1=y
# CONFIG_NLS_ISO8859_2 is not set
# CONFIG_NLS_ISO8859_3 is not set
# CONFIG_NLS_ISO8859_4 is not set
# CONFIG_NLS_ISO8859_5 is not set
# CONFIG_NLS_ISO8859_6 is not set
# CONFIG_NLS_ISO8859_7 is not set
# CONFIG_NLS_ISO8859_9 is not set
# CONFIG_NLS_ISO8859_13 is not set
# CONFIG_NLS_ISO8859_14 is not set
# CONFIG_NLS_ISO8859_15 is not set
# CONFIG_NLS_KOI8_R is not set
# CONFIG_NLS_KOI8_U is not set
# CONFIG_NLS_MAC_ROMAN is not set
# CONFIG_NLS_MAC_CELTIC is not set
# CONFIG_NLS_MAC_CENTEURO is not set
# CONFIG_NLS_MAC_CROATIAN is not set
# CONFIG_NLS_MAC_CYRILLIC is not set
# CONFIG_NLS_MAC_GAELIC is not set
# CONFIG_NLS_MAC_GREEK is not set
# CONFIG_NLS_MAC_ICELAND is not set
# CONFIG_NLS_MAC_INUIT is not set
# CONFIG_NLS_MAC_ROMANIAN is not set
# CONFIG_NLS_MAC_TURKISH is not set
# CONFIG_NLS_UTF8 is not set
# CONFIG_DLM is not set

#
# Kernel hacking
#
CONFIG_TRACE_IRQFLAGS_SUPPORT=y

#
# printk and dmesg options
#
CONFIG_PRINTK_TIME=y
CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
# CONFIG_BOOT_PRINTK_DELAY is not set
CONFIG_DYNAMIC_DEBUG=y

#
# Compile-time checks and compiler options
#
# CONFIG_DEBUG_INFO is not set
CONFIG_ENABLE_WARN_DEPRECATED=y
CONFIG_ENABLE_MUST_CHECK=y
CONFIG_FRAME_WARN=2048
CONFIG_STRIP_ASM_SYMS=y
# CONFIG_READABLE_ASM is not set
CONFIG_UNUSED_SYMBOLS=y
# CONFIG_PAGE_OWNER is not set
CONFIG_DEBUG_FS=y
# CONFIG_HEADERS_CHECK is not set
# CONFIG_DEBUG_SECTION_MISMATCH is not set
CONFIG_ARCH_WANT_FRAME_POINTERS=y
CONFIG_FRAME_POINTER=y
# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
CONFIG_MAGIC_SYSRQ=y
CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x1
CONFIG_DEBUG_KERNEL=y

#
# Memory Debugging
#
# CONFIG_PAGE_EXTENSION is not set
# CONFIG_DEBUG_PAGEALLOC is not set
# CONFIG_DEBUG_OBJECTS is not set
# CONFIG_SLUB_DEBUG_ON is not set
# CONFIG_SLUB_STATS is not set
CONFIG_HAVE_DEBUG_KMEMLEAK=y
# CONFIG_DEBUG_KMEMLEAK is not set
# CONFIG_DEBUG_STACK_USAGE is not set
# CONFIG_DEBUG_VM is not set
# CONFIG_DEBUG_VIRTUAL is not set
CONFIG_DEBUG_MEMORY_INIT=y
# CONFIG_DEBUG_PER_CPU_MAPS is not set
CONFIG_HAVE_DEBUG_STACKOVERFLOW=y
# CONFIG_DEBUG_STACKOVERFLOW is not set
CONFIG_HAVE_ARCH_KMEMCHECK=y
# CONFIG_KMEMCHECK is not set
# CONFIG_DEBUG_SHIRQ is not set

#
# Debug Lockups and Hangs
#
# CONFIG_LOCKUP_DETECTOR is not set
# CONFIG_DETECT_HUNG_TASK is not set
# CONFIG_PANIC_ON_OOPS is not set
CONFIG_PANIC_ON_OOPS_VALUE=0
CONFIG_PANIC_TIMEOUT=0
# CONFIG_SCHED_DEBUG is not set
# CONFIG_SCHEDSTATS is not set
# CONFIG_SCHED_STACK_END_CHECK is not set
# CONFIG_TIMER_STATS is not set
# CONFIG_DEBUG_PREEMPT is not set

#
# Lock Debugging (spinlocks, mutexes, etc...)
#
# CONFIG_DEBUG_RT_MUTEXES is not set
# CONFIG_DEBUG_SPINLOCK is not set
# CONFIG_DEBUG_MUTEXES is not set
# CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set
# CONFIG_DEBUG_LOCK_ALLOC is not set
# CONFIG_PROVE_LOCKING is not set
# CONFIG_LOCK_STAT is not set
# CONFIG_DEBUG_ATOMIC_SLEEP is not set
# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
# CONFIG_LOCK_TORTURE_TEST is not set
# CONFIG_STACKTRACE is not set
# CONFIG_DEBUG_KOBJECT is not set
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_LIST is not set
# CONFIG_DEBUG_PI_LIST is not set
# CONFIG_DEBUG_SG is not set
# CONFIG_DEBUG_NOTIFIERS is not set
# CONFIG_DEBUG_CREDENTIALS is not set

#
# RCU Debugging
#
# CONFIG_PROVE_RCU is not set
CONFIG_SPARSE_RCU_POINTER=y
# CONFIG_TORTURE_TEST is not set
# CONFIG_RCU_TORTURE_TEST is not set
CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY=3
CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_RCU_CPU_STALL_INFO=y
CONFIG_RCU_TRACE=y
# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
# CONFIG_NOTIFIER_ERROR_INJECTION is not set
# CONFIG_FAULT_INJECTION is not set
# CONFIG_LATENCYTOP is not set
CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS=y
# CONFIG_DEBUG_STRICT_USER_COPY_CHECKS is not set
CONFIG_USER_STACKTRACE_SUPPORT=y
CONFIG_HAVE_FUNCTION_TRACER=y
CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST=y
CONFIG_HAVE_DYNAMIC_FTRACE=y
CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y
CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
CONFIG_HAVE_FENTRY=y
CONFIG_HAVE_C_RECORDMCOUNT=y
CONFIG_TRACE_CLOCK=y
CONFIG_TRACING_SUPPORT=y
# CONFIG_FTRACE is not set

#
# Runtime Testing
#
# CONFIG_LKDTM is not set
# CONFIG_TEST_LIST_SORT is not set
# CONFIG_BACKTRACE_SELF_TEST is not set
# CONFIG_RBTREE_TEST is not set
# CONFIG_INTERVAL_TREE_TEST is not set
# CONFIG_PERCPU_TEST is not set
# CONFIG_ATOMIC64_SELFTEST is not set
# CONFIG_TEST_HEXDUMP is not set
# CONFIG_TEST_STRING_HELPERS is not set
# CONFIG_TEST_KSTRTOX is not set
# CONFIG_TEST_RHASHTABLE is not set
# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set
# CONFIG_DMA_API_DEBUG is not set
# CONFIG_TEST_LKM is not set
# CONFIG_TEST_USER_COPY is not set
# CONFIG_TEST_BPF is not set
# CONFIG_TEST_FIRMWARE is not set
# CONFIG_TEST_UDELAY is not set
# CONFIG_SAMPLES is not set
CONFIG_HAVE_ARCH_KGDB=y
# CONFIG_KGDB is not set
# CONFIG_STRICT_DEVMEM is not set
CONFIG_X86_VERBOSE_BOOTUP=y
CONFIG_EARLY_PRINTK=y
# CONFIG_EARLY_PRINTK_DBGP is not set
# CONFIG_X86_PTDUMP is not set
CONFIG_DEBUG_RODATA=y
# CONFIG_DEBUG_RODATA_TEST is not set
# CONFIG_DEBUG_SET_MODULE_RONX is not set
# CONFIG_DEBUG_NX_TEST is not set
CONFIG_DOUBLEFAULT=y
# CONFIG_DEBUG_TLBFLUSH is not set
# CONFIG_IOMMU_DEBUG is not set
# CONFIG_IOMMU_STRESS is not set
CONFIG_HAVE_MMIOTRACE_SUPPORT=y
CONFIG_IO_DELAY_TYPE_0X80=0
CONFIG_IO_DELAY_TYPE_0XED=1
CONFIG_IO_DELAY_TYPE_UDELAY=2
CONFIG_IO_DELAY_TYPE_NONE=3
CONFIG_IO_DELAY_0X80=y
# CONFIG_IO_DELAY_0XED is not set
# CONFIG_IO_DELAY_UDELAY is not set
# CONFIG_IO_DELAY_NONE is not set
CONFIG_DEFAULT_IO_DELAY_TYPE=0
# CONFIG_DEBUG_BOOT_PARAMS is not set
# CONFIG_CPA_DEBUG is not set
# CONFIG_OPTIMIZE_INLINING is not set
# CONFIG_DEBUG_NMI_SELFTEST is not set
# CONFIG_X86_DEBUG_STATIC_CPU_HAS is not set

#
# Security options
#
CONFIG_KEYS=y
# CONFIG_PERSISTENT_KEYRINGS is not set
# CONFIG_BIG_KEYS is not set
# CONFIG_ENCRYPTED_KEYS is not set
# CONFIG_SECURITY_DMESG_RESTRICT is not set
# CONFIG_SECURITY is not set
# CONFIG_SECURITYFS is not set
# CONFIG_INTEL_TXT is not set
CONFIG_DEFAULT_SECURITY_DAC=y
CONFIG_DEFAULT_SECURITY=""
CONFIG_CRYPTO=y

#
# Crypto core or helper
#
CONFIG_CRYPTO_ALGAPI=y
CONFIG_CRYPTO_ALGAPI2=y
CONFIG_CRYPTO_AEAD=m
CONFIG_CRYPTO_AEAD2=y
CONFIG_CRYPTO_BLKCIPHER=y
CONFIG_CRYPTO_BLKCIPHER2=y
CONFIG_CRYPTO_HASH=y
CONFIG_CRYPTO_HASH2=y
CONFIG_CRYPTO_RNG2=y
CONFIG_CRYPTO_PCOMP2=y
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_MANAGER2=y
# CONFIG_CRYPTO_USER is not set
CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y
# CONFIG_CRYPTO_GF128MUL is not set
# CONFIG_CRYPTO_NULL is not set
# CONFIG_CRYPTO_PCRYPT is not set
CONFIG_CRYPTO_WORKQUEUE=y
# CONFIG_CRYPTO_CRYPTD is not set
# CONFIG_CRYPTO_MCRYPTD is not set
CONFIG_CRYPTO_AUTHENC=m
# CONFIG_CRYPTO_TEST is not set

#
# Authenticated Encryption with Associated Data
#
# CONFIG_CRYPTO_CCM is not set
# CONFIG_CRYPTO_GCM is not set
# CONFIG_CRYPTO_SEQIV is not set

#
# Block modes
#
CONFIG_CRYPTO_CBC=y
# CONFIG_CRYPTO_CTR is not set
# CONFIG_CRYPTO_CTS is not set
# CONFIG_CRYPTO_ECB is not set
# CONFIG_CRYPTO_LRW is not set
# CONFIG_CRYPTO_PCBC is not set
# CONFIG_CRYPTO_XTS is not set

#
# Hash modes
#
# CONFIG_CRYPTO_CMAC is not set
CONFIG_CRYPTO_HMAC=y
# CONFIG_CRYPTO_XCBC is not set
# CONFIG_CRYPTO_VMAC is not set

#
# Digest
#
CONFIG_CRYPTO_CRC32C=y
# CONFIG_CRYPTO_CRC32C_INTEL is not set
# CONFIG_CRYPTO_CRC32 is not set
# CONFIG_CRYPTO_CRC32_PCLMUL is not set
# CONFIG_CRYPTO_CRCT10DIF is not set
# CONFIG_CRYPTO_GHASH is not set
# CONFIG_CRYPTO_MD4 is not set
CONFIG_CRYPTO_MD5=y
# CONFIG_CRYPTO_MICHAEL_MIC is not set
# CONFIG_CRYPTO_RMD128 is not set
# CONFIG_CRYPTO_RMD160 is not set
# CONFIG_CRYPTO_RMD256 is not set
# CONFIG_CRYPTO_RMD320 is not set
CONFIG_CRYPTO_SHA1=y
# CONFIG_CRYPTO_SHA1_SSSE3 is not set
# CONFIG_CRYPTO_SHA256_SSSE3 is not set
# CONFIG_CRYPTO_SHA512_SSSE3 is not set
# CONFIG_CRYPTO_SHA1_MB is not set
CONFIG_CRYPTO_SHA256=y
# CONFIG_CRYPTO_SHA512 is not set
# CONFIG_CRYPTO_TGR192 is not set
# CONFIG_CRYPTO_WP512 is not set
# CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL is not set

#
# Ciphers
#
CONFIG_CRYPTO_AES=y
# CONFIG_CRYPTO_AES_X86_64 is not set
# CONFIG_CRYPTO_AES_NI_INTEL is not set
# CONFIG_CRYPTO_ANUBIS is not set
# CONFIG_CRYPTO_ARC4 is not set
# CONFIG_CRYPTO_BLOWFISH is not set
# CONFIG_CRYPTO_BLOWFISH_X86_64 is not set
# CONFIG_CRYPTO_CAMELLIA is not set
# CONFIG_CRYPTO_CAMELLIA_X86_64 is not set
# CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64 is not set
# CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64 is not set
# CONFIG_CRYPTO_CAST5 is not set
# CONFIG_CRYPTO_CAST5_AVX_X86_64 is not set
# CONFIG_CRYPTO_CAST6 is not set
# CONFIG_CRYPTO_CAST6_AVX_X86_64 is not set
CONFIG_CRYPTO_DES=m
# CONFIG_CRYPTO_DES3_EDE_X86_64 is not set
# CONFIG_CRYPTO_FCRYPT is not set
# CONFIG_CRYPTO_KHAZAD is not set
# CONFIG_CRYPTO_SALSA20 is not set
# CONFIG_CRYPTO_SALSA20_X86_64 is not set
# CONFIG_CRYPTO_SEED is not set
# CONFIG_CRYPTO_SERPENT is not set
# CONFIG_CRYPTO_SERPENT_SSE2_X86_64 is not set
# CONFIG_CRYPTO_SERPENT_AVX_X86_64 is not set
# CONFIG_CRYPTO_SERPENT_AVX2_X86_64 is not set
# CONFIG_CRYPTO_TEA is not set
# CONFIG_CRYPTO_TWOFISH is not set
# CONFIG_CRYPTO_TWOFISH_X86_64 is not set
# CONFIG_CRYPTO_TWOFISH_X86_64_3WAY is not set
# CONFIG_CRYPTO_TWOFISH_AVX_X86_64 is not set

#
# Compression
#
CONFIG_CRYPTO_DEFLATE=m
# CONFIG_CRYPTO_ZLIB is not set
CONFIG_CRYPTO_LZO=y
# CONFIG_CRYPTO_LZ4 is not set
# CONFIG_CRYPTO_LZ4HC is not set

#
# Random Number Generation
#
# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_DRBG_MENU is not set
# CONFIG_CRYPTO_USER_API_HASH is not set
# CONFIG_CRYPTO_USER_API_SKCIPHER is not set
# CONFIG_CRYPTO_USER_API_RNG is not set
CONFIG_CRYPTO_HASH_INFO=y
CONFIG_CRYPTO_HW=y
# CONFIG_CRYPTO_DEV_PADLOCK is not set
# CONFIG_CRYPTO_DEV_CCP is not set
# CONFIG_CRYPTO_DEV_QAT_DH895xCC is not set
CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
CONFIG_PUBLIC_KEY_ALGO_RSA=y
CONFIG_X509_CERTIFICATE_PARSER=y
# CONFIG_PKCS7_MESSAGE_PARSER is not set
CONFIG_HAVE_KVM=y
CONFIG_VIRTUALIZATION=y
# CONFIG_KVM is not set
# CONFIG_BINARY_PRINTF is not set

#
# Library routines
#
CONFIG_BITREVERSE=y
# CONFIG_HAVE_ARCH_BITREVERSE is not set
CONFIG_GENERIC_STRNCPY_FROM_USER=y
CONFIG_GENERIC_STRNLEN_USER=y
CONFIG_GENERIC_NET_UTILS=y
CONFIG_GENERIC_FIND_FIRST_BIT=y
CONFIG_GENERIC_PCI_IOMAP=y
CONFIG_GENERIC_IOMAP=y
CONFIG_GENERIC_IO=y
CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y
CONFIG_ARCH_HAS_FAST_MULTIPLIER=y
# CONFIG_CRC_CCITT is not set
CONFIG_CRC16=y
# CONFIG_CRC_T10DIF is not set
# CONFIG_CRC_ITU_T is not set
CONFIG_CRC32=y
# CONFIG_CRC32_SELFTEST is not set
CONFIG_CRC32_SLICEBY8=y
# CONFIG_CRC32_SLICEBY4 is not set
# CONFIG_CRC32_SARWATE is not set
# CONFIG_CRC32_BIT is not set
# CONFIG_CRC7 is not set
# CONFIG_LIBCRC32C is not set
# CONFIG_CRC8 is not set
# CONFIG_CRC64_ECMA is not set
# CONFIG_AUDIT_ARCH_COMPAT_GENERIC is not set
# CONFIG_RANDOM32_SELFTEST is not set
CONFIG_ZLIB_INFLATE=y
CONFIG_ZLIB_DEFLATE=y
CONFIG_LZO_COMPRESS=y
CONFIG_LZO_DECOMPRESS=y
CONFIG_LZ4_DECOMPRESS=y
CONFIG_XZ_DEC=y
CONFIG_XZ_DEC_X86=y
CONFIG_XZ_DEC_POWERPC=y
CONFIG_XZ_DEC_IA64=y
CONFIG_XZ_DEC_ARM=y
CONFIG_XZ_DEC_ARMTHUMB=y
CONFIG_XZ_DEC_SPARC=y
CONFIG_XZ_DEC_BCJ=y
# CONFIG_XZ_DEC_TEST is not set
CONFIG_DECOMPRESS_GZIP=y
CONFIG_DECOMPRESS_BZIP2=y
CONFIG_DECOMPRESS_LZMA=y
CONFIG_DECOMPRESS_XZ=y
CONFIG_DECOMPRESS_LZO=y
CONFIG_DECOMPRESS_LZ4=y
CONFIG_GENERIC_ALLOCATOR=y
CONFIG_ASSOCIATIVE_ARRAY=y
CONFIG_HAS_IOMEM=y
CONFIG_HAS_IOPORT_MAP=y
CONFIG_HAS_DMA=y
CONFIG_CPU_RMAP=y
CONFIG_DQL=y
CONFIG_GLOB=y
# CONFIG_GLOB_SELFTEST is not set
CONFIG_NLATTR=y
CONFIG_ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE=y
# CONFIG_AVERAGE is not set
CONFIG_CLZ_TAB=y
# CONFIG_CORDIC is not set
# CONFIG_DDR is not set
CONFIG_MPILIB=y
CONFIG_OID_REGISTRY=y
CONFIG_FONT_SUPPORT=y
# CONFIG_FONTS is not set
CONFIG_FONT_8x8=y
CONFIG_FONT_8x16=y
CONFIG_ARCH_HAS_SG_CHAIN=y

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-25 12:37   ` Andrey Wagin
  2015-02-25 13:55     ` Denys Vlasenko
@ 2015-02-25 16:52     ` Denys Vlasenko
  2015-02-25 18:42     ` Denys Vlasenko
  2 siblings, 0 replies; 130+ messages in thread
From: Denys Vlasenko @ 2015-02-25 16:52 UTC (permalink / raw)
  To: Andrey Wagin
  Cc: Andy Lutomirski, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, LKML

On 02/25/2015 01:37 PM, Andrey Wagin wrote:
> Hello Denys,
> 
> My test vm doesn't boot with this patch. Could you help to investigate
> this issue?
> 
> I have attached a kernel config and console log.
> 
> [    2.508252] traps: systemd-cgroups[380] general protection ip:7f68ad096028 sp:7fffba298af8 error:0 in ld-2.18.so[7f68ad07e000+20000][  OK
> [    2.600179] traps: systemd-cgroups[384] general protection ip:7f11b9a9c028 sp:7fff4420f978 error:0 in ld-2.18.so[7f11b9a84000+20000]
> [    2.743790] traps: systemd-cgroups[392] general protection ip:7f7f40a44028 sp:7fffe1c1b8b8 error:0 in ld-2.18.so[7f7f40a2c000+20000]
> [    2.754576] traps: systemd-cgroups[393] general protection ip:7fd1314bd028 sp:7ffff76ecc88 error:0 in ld-2.18.so[7fd1314a5000+20000]
> [    2.765343] traps: systemd-cgroups[396] general protection ip:7ff4537b7028 sp:7fff05902378 error:0 in ld-2.18.so[7ff45379f000+20000]
> [    2.798782] traps: systemd-cgroups[399] general protection ip:7f4d5bc9c028 sp:7fff35cb3a48 error:0 in ld-2.18.so[7f4d5bc84000+20000]

These SEGVs are always at the same location within ld-2.18.so:
(ip - load_address) is always 0x18028.

For Sabrine's case, SEGVing address in ld-2.21.so also the same
all the time: 0x184f8.

Please send me your ld-2.18.so / ld-2.21.so files.

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-25 12:37   ` Andrey Wagin
  2015-02-25 13:55     ` Denys Vlasenko
  2015-02-25 16:52     ` Denys Vlasenko
@ 2015-02-25 18:42     ` Denys Vlasenko
  2015-02-25 19:59       ` Andrey Wagin
  2 siblings, 1 reply; 130+ messages in thread
From: Denys Vlasenko @ 2015-02-25 18:42 UTC (permalink / raw)
  To: Andrey Wagin
  Cc: Andy Lutomirski, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, LKML

[-- Attachment #1: Type: text/plain, Size: 2865 bytes --]

On 02/25/2015 01:37 PM, Andrey Wagin wrote:
> 2015-02-13 0:54 GMT+03:00 Denys Vlasenko <dvlasenk@redhat.com>:
>> 64-bit code was using six stack slots less by not saving/restoring
>> registers which are callee-preserved according to C ABI,
>> and not allocating space for them.
>> Only when syscall needed a complete "struct pt_regs",
>> the complete area was allocated and filled in.
>> As an additional twist, on interrupt entry a "slightly less truncated pt_regs"
>> trick is used, to make nested interrupt stacks easier to unwind.
>>
>> This proved to be a source of significant obfuscation and subtle bugs.
>> For example, stub_fork had to pop the return address,
>> extend the struct, save registers, and push return address back. Ugly.
>> ia32_ptregs_common pops return address and "returns" via jmp insn,
>> throwing a wrench into CPU return stack cache.
>>
>> This patch changes code to always allocate a complete "struct pt_regs".
>> The saving of registers is still done lazily.
>>
>> "Partial pt_regs" trick on interrupt stack is retained.
>>
>> Macros which manipulate "struct pt_regs" on stack are reworked:
>> ALLOC_PT_GPREGS_ON_STACK allocates the structure.
>> SAVE_C_REGS saves to it those registers which are clobbered by C code.
>> SAVE_EXTRA_REGS saves to it all other registers.
>> Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros reverse it.
>>
>> ia32_ptregs_common, stub_fork and friends lost their ugly dance with
>> return pointer.
>>
>> LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
>> instead of magic numbers.
>>
>> error_entry and save_paranoid now use SAVE_C_REGS + SAVE_EXTRA_REGS
>> instead of having it open-coded yet again.
>>
>> Patch was run-tested: 64-bit executables, 32-bit executables,
>> strace works.
>> Timing tests did not show measurable difference in 32-bit
>> and 64-bit syscalls.
> 
> Hello Denys,
> 
> My test vm doesn't boot with this patch. Could you help to investigate
> this issue?

I think I found it. This part of my patch is possibly wrong:

@@ -171,9 +171,9 @@ static inline int arch_irqs_disabled(void)
 #define ARCH_LOCKDEP_SYS_EXIT_IRQ      \
        TRACE_IRQS_ON; \
        sti; \
-       SAVE_REST; \
+       SAVE_EXTRA_REGS; \
        LOCKDEP_SYS_EXIT; \
-       RESTORE_REST; \
+       RESTORE_EXTRA_REGS; \
        cli; \
        TRACE_IRQS_OFF;

The "SAVE_REST" here is intended to really *push* extra regs on stack,
but the patch changed it so that they are written to existing stack
slots above.

>From code inspection it should work in almost all cases, but some
locations where it is used are really obscure.

If there are places where *pushing* regs is really necessary,
this can corrupt rbp,rbx,r12-15 registers.

Your config has CONFIG_LOCKDEP=y, I think it's worth trying whether the bug
was here.
Please find updated patch attached. Can you try it?

-- 
vda


[-- Attachment #2: 0001-x86-entry_64.S-always-allocate-complete-struct-pt_re.patch --]
[-- Type: text/x-patch, Size: 26935 bytes --]

>From 80c82370dcb8cd85c3b981ce62f63f74eff0df2c Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Wed, 25 Feb 2015 19:32:14 +0100
Subject: [PATCH v2] x86: entry_64.S: always allocate complete "struct pt_regs"

64-bit code was using six stack slots less by not saving/restoring
registers which are callee-preserved according to C ABI,
and not allocating space for them.
Only when syscall needed a complete "struct pt_regs",
the complete area was allocated and filled in.
As an additional twist, on interrupt entry a "slightly less truncated pt_regs"
trick is used, to make nested interrupt stacks easier to unwind.

This proved to be a source of significant obfuscation and subtle bugs.
For example, stub_fork had to pop the return address,
extend the struct, save registers, and push return address back. Ugly.
ia32_ptregs_common pops return address and "returns" via jmp insn,
throwing a wrench into CPU return stack cache.

This patch changes code to always allocate a complete "struct pt_regs".
The saving of registers is still done lazily.

"Partial pt_regs" trick on interrupt stack is retained.

Macros which manipulate "struct pt_regs" on stack are reworked:
ALLOC_PT_GPREGS_ON_STACK allocates the structure.
SAVE_C_REGS saves to it those registers which are clobbered by C code.
SAVE_EXTRA_REGS saves to it all other registers.
Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros reverse it.

ia32_ptregs_common, stub_fork and friends lost their ugly dance with
return pointer.

LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
instead of magic numbers.

error_entry and save_paranoid now use SAVE_C_REGS + SAVE_EXTRA_REGS
instead of having it open-coded yet again.

Patch was run-tested: 64-bit executables, 32-bit executables,
strace works.
Timing tests did not show measurable difference in 32-bit
and 64-bit syscalls.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
---

Changes since v1: fix a thinko in LOCKDEP_SYS_EXIT_IRQ

 arch/x86/ia32/ia32entry.S              |  47 +++----
 arch/x86/include/asm/calling.h         | 222 ++++++++++++++++-----------------
 arch/x86/include/asm/irqflags.h        |  18 ++-
 arch/x86/include/uapi/asm/ptrace-abi.h |   1 -
 arch/x86/kernel/entry_64.S             | 195 +++++++++++------------------
 5 files changed, 223 insertions(+), 260 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 156ebca..f4bed49 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -62,12 +62,12 @@
 	 */
 	.macro LOAD_ARGS32 offset, _r9=0
 	.if \_r9
-	movl \offset+16(%rsp),%r9d
+	movl \offset+R9(%rsp),%r9d
 	.endif
-	movl \offset+40(%rsp),%ecx
-	movl \offset+48(%rsp),%edx
-	movl \offset+56(%rsp),%esi
-	movl \offset+64(%rsp),%edi
+	movl \offset+RCX(%rsp),%ecx
+	movl \offset+RDX(%rsp),%edx
+	movl \offset+RSI(%rsp),%esi
+	movl \offset+RDI(%rsp),%edi
 	movl %eax,%eax			/* zero extension */
 	.endm
 	
@@ -144,7 +144,8 @@ ENTRY(ia32_sysenter_target)
 	CFI_REL_OFFSET rip,0
 	pushq_cfi %rax
 	cld
-	SAVE_ARGS 0,1,0
+	ALLOC_PT_GPREGS_ON_STACK
+	SAVE_C_REGS_EXCEPT_R891011
  	/* no need to do an access_ok check here because rbp has been
  	   32bit zero extended */ 
 	ASM_STAC
@@ -182,7 +183,8 @@ sysexit_from_sys_call:
 	andl	$~0x200,EFLAGS-ARGOFFSET(%rsp)
 	movl	RIP-ARGOFFSET(%rsp),%edx		/* User %eip */
 	CFI_REGISTER rip,rdx
-	RESTORE_ARGS 0,24,0,0,0,0
+	RESTORE_RSI_RDI
+	REMOVE_PT_GPREGS_FROM_STACK 3*8
 	xorq	%r8,%r8
 	xorq	%r9,%r9
 	xorq	%r10,%r10
@@ -256,13 +258,13 @@ sysenter_tracesys:
 	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jz	sysenter_auditsys
 #endif
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	CLEAR_RREGS
 	movq	$-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
 	movq	%rsp,%rdi        /* &pt_regs -> arg1 */
 	call	syscall_trace_enter
 	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	cmpq	$(IA32_NR_syscalls-1),%rax
 	ja	int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
 	jmp	sysenter_do_call
@@ -304,7 +306,8 @@ ENTRY(ia32_cstar_target)
 	 * disabled irqs and here we enable it straight after entry:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_ARGS 8,0,0
+	ALLOC_PT_GPREGS_ON_STACK 8
+	SAVE_C_REGS_EXCEPT_RCX_R891011
 	movl 	%eax,%eax	/* zero extension */
 	movq	%rax,ORIG_RAX-ARGOFFSET(%rsp)
 	movq	%rcx,RIP-ARGOFFSET(%rsp)
@@ -341,7 +344,7 @@ cstar_dispatch:
 	jnz sysretl_audit
 sysretl_from_sys_call:
 	andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	RESTORE_ARGS 0,-ARG_SKIP,0,0,0
+	RESTORE_RSI_RDI_RDX
 	movl RIP-ARGOFFSET(%rsp),%ecx
 	CFI_REGISTER rip,rcx
 	movl EFLAGS-ARGOFFSET(%rsp),%r11d	
@@ -372,13 +375,13 @@ cstar_tracesys:
 	jz cstar_auditsys
 #endif
 	xchgl %r9d,%ebp
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	CLEAR_RREGS 0, r9
 	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
 	LOAD_ARGS32 ARGOFFSET, 1  /* reload args from stack in case ptrace changed it */
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	xchgl %ebp,%r9d
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
@@ -433,7 +436,8 @@ ENTRY(ia32_syscall)
 	cld
 	/* note the registers are not zero extended to the sf.
 	   this could be a problem. */
-	SAVE_ARGS 0,1,0
+	ALLOC_PT_GPREGS_ON_STACK
+	SAVE_C_REGS_EXCEPT_R891011
 	orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jnz ia32_tracesys
@@ -446,16 +450,16 @@ ia32_sysret:
 	movq %rax,RAX-ARGOFFSET(%rsp)
 ia32_ret_from_sys_call:
 	CLEAR_RREGS -ARGOFFSET
-	jmp int_ret_from_sys_call 
+	jmp int_ret_from_sys_call
 
-ia32_tracesys:			 
-	SAVE_REST
+ia32_tracesys:
+	SAVE_EXTRA_REGS
 	CLEAR_RREGS
 	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
 	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja  int_ret_from_sys_call	/* ia32_tracesys has set RAX(%rsp) */
 	jmp ia32_do_call
@@ -492,7 +496,6 @@ GLOBAL(stub32_clone)
 
 	ALIGN
 ia32_ptregs_common:
-	popq %r11
 	CFI_ENDPROC
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
@@ -507,9 +510,9 @@ ia32_ptregs_common:
 /*	CFI_REL_OFFSET	rflags,EFLAGS-ARGOFFSET*/
 	CFI_REL_OFFSET	rsp,RSP-ARGOFFSET
 /*	CFI_REL_OFFSET	ss,SS-ARGOFFSET*/
-	SAVE_REST
+	SAVE_EXTRA_REGS 8
 	call *%rax
-	RESTORE_REST
-	jmp  ia32_sysret	/* misbalances the return cache */
+	RESTORE_EXTRA_REGS 8
+	ret
 	CFI_ENDPROC
 END(ia32_ptregs_common)
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 3c711f2a..3835647 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -55,143 +55,137 @@ For 32-bit we have the following conventions - kernel is built with
  * for assembly code:
  */
 
-#define R15		  0
-#define R14		  8
-#define R13		 16
-#define R12		 24
-#define RBP		 32
-#define RBX		 40
-
-/* arguments: interrupts/non tracing syscalls only save up to here: */
-#define R11		 48
-#define R10		 56
-#define R9		 64
-#define R8		 72
-#define RAX		 80
-#define RCX		 88
-#define RDX		 96
-#define RSI		104
-#define RDI		112
-#define ORIG_RAX	120       /* + error_code */
-/* end of arguments */
-
-/* cpu exception frame or undefined in case of fast syscall: */
-#define RIP		128
-#define CS		136
-#define EFLAGS		144
-#define RSP		152
-#define SS		160
-
-#define ARGOFFSET	R11
-
-	.macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
-	subq  $9*8+\addskip, %rsp
-	CFI_ADJUST_CFA_OFFSET	9*8+\addskip
-	movq_cfi rdi, 8*8
-	movq_cfi rsi, 7*8
-	movq_cfi rdx, 6*8
-
-	.if \save_rcx
-	movq_cfi rcx, 5*8
-	.endif
+/* The layout forms the "struct pt_regs" on the stack: */
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
+#define R15		0*8
+#define R14		1*8
+#define R13		2*8
+#define R12		3*8
+#define RBP		4*8
+#define RBX		5*8
+/* These regs are callee-clobbered. Always saved on kernel entry. */
+#define R11		6*8
+#define R10		7*8
+#define R9		8*8
+#define R8		9*8
+#define RAX		10*8
+#define RCX		11*8
+#define RDX		12*8
+#define RSI		13*8
+#define RDI		14*8
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
+#define ORIG_RAX	15*8
+/* Return frame for iretq */
+#define RIP		16*8
+#define CS		17*8
+#define EFLAGS		18*8
+#define RSP		19*8
+#define SS		20*8
+
+#define ARGOFFSET	0
+
+	.macro ALLOC_PT_GPREGS_ON_STACK addskip=0
+	subq	$15*8+\addskip, %rsp
+	CFI_ADJUST_CFA_OFFSET 15*8+\addskip
+	.endm
 
-	.if \rax_enosys
-	movq $-ENOSYS, 4*8(%rsp)
-	.else
-	movq_cfi rax, 4*8
+	.macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8plus=1
+	.if \r8plus
+	movq_cfi r11, 6*8+\offset
+	movq_cfi r10, 7*8+\offset
+	movq_cfi r9,  8*8+\offset
+	movq_cfi r8,  9*8+\offset
 	.endif
-
-	.if \save_r891011
-	movq_cfi r8,  3*8
-	movq_cfi r9,  2*8
-	movq_cfi r10, 1*8
-	movq_cfi r11, 0*8
+	.if \rax
+	movq_cfi rax, 10*8+\offset
+	.endif
+	.if \rcx
+	movq_cfi rcx, 11*8+\offset
 	.endif
+	movq_cfi rdx, 12*8+\offset
+	movq_cfi rsi, 13*8+\offset
+	movq_cfi rdi, 14*8+\offset
+	.endm
+	.macro SAVE_C_REGS offset=0
+	SAVE_C_REGS_HELPER \offset, 1, 1, 1
+	.endm
+	.macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
+	SAVE_C_REGS_HELPER \offset, 0, 0, 1
+	.endm
+	.macro SAVE_C_REGS_EXCEPT_R891011
+	SAVE_C_REGS_HELPER 0, 1, 1, 0
+	.endm
+	.macro SAVE_C_REGS_EXCEPT_RCX_R891011
+	SAVE_C_REGS_HELPER 0, 1, 0, 0
+	.endm
 
+	.macro SAVE_EXTRA_REGS offset=0
+	movq_cfi r15, 0*8+\offset
+	movq_cfi r14, 1*8+\offset
+	movq_cfi r13, 2*8+\offset
+	movq_cfi r12, 3*8+\offset
+	movq_cfi rbp, 4*8+\offset
+	movq_cfi rbx, 5*8+\offset
+	.endm
+	.macro SAVE_EXTRA_REGS_RBP offset=0
+	movq_cfi rbp, 4*8+\offset
 	.endm
 
-#define ARG_SKIP	(9*8)
+	.macro RESTORE_EXTRA_REGS offset=0
+	movq_cfi_restore 0*8+\offset, r15
+	movq_cfi_restore 1*8+\offset, r14
+	movq_cfi_restore 2*8+\offset, r13
+	movq_cfi_restore 3*8+\offset, r12
+	movq_cfi_restore 4*8+\offset, rbp
+	movq_cfi_restore 5*8+\offset, rbx
+	.endm
 
-	.macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \
-			    rstor_r8910=1, rstor_rdx=1
+	.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
 	.if \rstor_r11
-	movq_cfi_restore 0*8, r11
+	movq_cfi_restore 6*8, r11
 	.endif
-
 	.if \rstor_r8910
-	movq_cfi_restore 1*8, r10
-	movq_cfi_restore 2*8, r9
-	movq_cfi_restore 3*8, r8
+	movq_cfi_restore 7*8, r10
+	movq_cfi_restore 8*8, r9
+	movq_cfi_restore 9*8, r8
 	.endif
-
 	.if \rstor_rax
-	movq_cfi_restore 4*8, rax
+	movq_cfi_restore 10*8, rax
 	.endif
-
 	.if \rstor_rcx
-	movq_cfi_restore 5*8, rcx
+	movq_cfi_restore 11*8, rcx
 	.endif
-
 	.if \rstor_rdx
-	movq_cfi_restore 6*8, rdx
-	.endif
-
-	movq_cfi_restore 7*8, rsi
-	movq_cfi_restore 8*8, rdi
-
-	.if ARG_SKIP+\addskip > 0
-	addq $ARG_SKIP+\addskip, %rsp
-	CFI_ADJUST_CFA_OFFSET	-(ARG_SKIP+\addskip)
+	movq_cfi_restore 12*8, rdx
 	.endif
+	movq_cfi_restore 13*8, rsi
+	movq_cfi_restore 14*8, rdi
 	.endm
-
-	.macro LOAD_ARGS offset, skiprax=0
-	movq \offset(%rsp),    %r11
-	movq \offset+8(%rsp),  %r10
-	movq \offset+16(%rsp), %r9
-	movq \offset+24(%rsp), %r8
-	movq \offset+40(%rsp), %rcx
-	movq \offset+48(%rsp), %rdx
-	movq \offset+56(%rsp), %rsi
-	movq \offset+64(%rsp), %rdi
-	.if \skiprax
-	.else
-	movq \offset+72(%rsp), %rax
-	.endif
+	.macro RESTORE_C_REGS
+	RESTORE_C_REGS_HELPER 1,1,1,1,1
 	.endm
-
-#define REST_SKIP	(6*8)
-
-	.macro SAVE_REST
-	subq $REST_SKIP, %rsp
-	CFI_ADJUST_CFA_OFFSET	REST_SKIP
-	movq_cfi rbx, 5*8
-	movq_cfi rbp, 4*8
-	movq_cfi r12, 3*8
-	movq_cfi r13, 2*8
-	movq_cfi r14, 1*8
-	movq_cfi r15, 0*8
+	.macro RESTORE_C_REGS_EXCEPT_RAX
+	RESTORE_C_REGS_HELPER 0,1,1,1,1
 	.endm
-
-	.macro RESTORE_REST
-	movq_cfi_restore 0*8, r15
-	movq_cfi_restore 1*8, r14
-	movq_cfi_restore 2*8, r13
-	movq_cfi_restore 3*8, r12
-	movq_cfi_restore 4*8, rbp
-	movq_cfi_restore 5*8, rbx
-	addq $REST_SKIP, %rsp
-	CFI_ADJUST_CFA_OFFSET	-(REST_SKIP)
+	.macro RESTORE_C_REGS_EXCEPT_RCX
+	RESTORE_C_REGS_HELPER 1,0,1,1,1
 	.endm
-
-	.macro SAVE_ALL
-	SAVE_ARGS
-	SAVE_REST
+	.macro RESTORE_RSI_RDI
+	RESTORE_C_REGS_HELPER 0,0,0,0,0
+	.endm
+	.macro RESTORE_RSI_RDI_RDX
+	RESTORE_C_REGS_HELPER 0,0,0,0,1
 	.endm
 
-	.macro RESTORE_ALL addskip=0
-	RESTORE_REST
-	RESTORE_ARGS 1, \addskip
+	.macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
+	addq $15*8+\addskip, %rsp
+	CFI_ADJUST_CFA_OFFSET -(15*8+\addskip)
 	.endm
 
 	.macro icebp
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 0a8b519..58bb6a7 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -171,9 +171,23 @@ static inline int arch_irqs_disabled(void)
 #define ARCH_LOCKDEP_SYS_EXIT_IRQ	\
 	TRACE_IRQS_ON; \
 	sti; \
-	SAVE_REST; \
+	subq $6*8, %rsp; \
+	CFI_ADJUST_CFA_OFFSET 6*8; \
+	movq_cfi rbx, 5*8; \
+	movq_cfi rbp, 4*8; \
+	movq_cfi r12, 3*8; \
+	movq_cfi r13, 2*8; \
+	movq_cfi r14, 1*8; \
+	movq_cfi r15, 0*8; \
 	LOCKDEP_SYS_EXIT; \
-	RESTORE_REST; \
+	movq_cfi_restore 0*8, r15; \
+	movq_cfi_restore 1*8, r14; \
+	movq_cfi_restore 2*8, r13; \
+	movq_cfi_restore 3*8, r12; \
+	movq_cfi_restore 4*8, rbp; \
+	movq_cfi_restore 5*8, rbx; \
+	addq $6*8, %rsp; \
+	CFI_ADJUST_CFA_OFFSET -(6*8); \
 	cli; \
 	TRACE_IRQS_OFF;
 
diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h
index 7b0a55a..ad115bf 100644
--- a/arch/x86/include/uapi/asm/ptrace-abi.h
+++ b/arch/x86/include/uapi/asm/ptrace-abi.h
@@ -49,7 +49,6 @@
 #define EFLAGS 144
 #define RSP 152
 #define SS 160
-#define ARGOFFSET R11
 #endif /* __ASSEMBLY__ */
 
 /* top of stack page */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index ac542ac..45bdd26 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -26,12 +26,6 @@
  * Some macro usage:
  * - CFI macros are used to generate dwarf2 unwind information for better
  * backtraces. They don't change any code.
- * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
- * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
- * There are unfortunately lots of special cases where some registers
- * not touched. The macro is a big mess that should be cleaned up.
- * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
- * Gives a full stack frame.
  * - ENTRY/END Define functions in the symbol table.
  * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  * frame that is otherwise undefined after a SYSCALL
@@ -190,9 +184,9 @@ ENDPROC(native_usergs_sysret64)
 	.endm
 
 /*
- * frame that enables calling into C.
+ * frame that enables passing a complete pt_regs to a C function.
  */
-	.macro PARTIAL_FRAME start=1 offset=0
+	.macro DEFAULT_FRAME start=1 offset=0
 	XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
 	CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
 	CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
@@ -203,13 +197,6 @@ ENDPROC(native_usergs_sysret64)
 	CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
 	CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
 	CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
-	.endm
-
-/*
- * frame that enables passing a complete pt_regs to a C function.
- */
-	.macro DEFAULT_FRAME start=1 offset=0
-	PARTIAL_FRAME \start, R11+\offset-R15
 	CFI_REL_OFFSET rbx, RBX+\offset
 	CFI_REL_OFFSET rbp, RBP+\offset
 	CFI_REL_OFFSET r12, R12+\offset
@@ -221,21 +208,8 @@ ENDPROC(native_usergs_sysret64)
 ENTRY(save_paranoid)
 	XCPT_FRAME 1 RDI+8
 	cld
-	movq %rdi, RDI+8(%rsp)
-	movq %rsi, RSI+8(%rsp)
-	movq_cfi rdx, RDX+8
-	movq_cfi rcx, RCX+8
-	movq_cfi rax, RAX+8
-	movq %r8, R8+8(%rsp)
-	movq %r9, R9+8(%rsp)
-	movq %r10, R10+8(%rsp)
-	movq %r11, R11+8(%rsp)
-	movq_cfi rbx, RBX+8
-	movq %rbp, RBP+8(%rsp)
-	movq %r12, R12+8(%rsp)
-	movq %r13, R13+8(%rsp)
-	movq %r14, R14+8(%rsp)
-	movq %r15, R15+8(%rsp)
+	SAVE_C_REGS 8
+	SAVE_EXTRA_REGS 8
 	movl $1,%ebx
 	movl $MSR_GS_BASE,%ecx
 	rdmsr
@@ -264,7 +238,7 @@ ENTRY(ret_from_fork)
 
 	GET_THREAD_INFO(%rcx)
 
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 
 	testl $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
 	jz   1f
@@ -276,12 +250,10 @@ ENTRY(ret_from_fork)
 	jmp ret_from_sys_call			# go to the SYSRET fastpath
 
 1:
-	subq $REST_SKIP, %rsp	# leave space for volatiles
-	CFI_ADJUST_CFA_OFFSET	REST_SKIP
 	movq %rbp, %rdi
 	call *%rbx
 	movl $0, RAX(%rsp)
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(ret_from_fork)
@@ -339,9 +311,11 @@ GLOBAL(system_call_after_swapgs)
 	 * and short:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_ARGS 8, 0, rax_enosys=1
+	ALLOC_PT_GPREGS_ON_STACK 8
+	SAVE_C_REGS_EXCEPT_RAX_RCX
+	movq	$-ENOSYS,RAX-ARGOFFSET(%rsp)
 	movq_cfi rax,(ORIG_RAX-ARGOFFSET)
-	movq  %rcx,RIP-ARGOFFSET(%rsp)
+	movq	%rcx,RIP-ARGOFFSET(%rsp)
 	CFI_REL_OFFSET rip,RIP-ARGOFFSET
 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jnz tracesys
@@ -372,9 +346,9 @@ ret_from_sys_call:
 	 * sysretq will re-enable interrupts:
 	 */
 	TRACE_IRQS_ON
+	RESTORE_C_REGS_EXCEPT_RCX
 	movq RIP-ARGOFFSET(%rsp),%rcx
 	CFI_REGISTER	rip,rcx
-	RESTORE_ARGS 1,-ARG_SKIP,0
 	/*CFI_REGISTER	rflags,r11*/
 	movq	PER_CPU_VAR(old_rsp), %rsp
 	USERGS_SYSRET64
@@ -387,16 +361,16 @@ int_ret_from_sys_call_fixup:
 
 	/* Do syscall tracing */
 tracesys:
-	leaq -REST_SKIP(%rsp), %rdi
+	movq %rsp, %rdi
 	movq $AUDIT_ARCH_X86_64, %rsi
 	call syscall_trace_enter_phase1
 	test %rax, %rax
 	jnz tracesys_phase2		/* if needed, run the slow path */
-	LOAD_ARGS 0			/* else restore clobbered regs */
+	RESTORE_C_REGS			/* else restore clobbered regs */
 	jmp system_call_fastpath	/*      and return to the fast path */
 
 tracesys_phase2:
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %rdi
 	movq %rsp, %rdi
 	movq $AUDIT_ARCH_X86_64, %rsi
@@ -408,8 +382,8 @@ tracesys_phase2:
 	 * We don't reload %rax because syscall_trace_entry_phase2() returned
 	 * the value it wants us to use in the table lookup.
 	 */
-	LOAD_ARGS ARGOFFSET, 1
-	RESTORE_REST
+	RESTORE_C_REGS_EXCEPT_RAX
+	RESTORE_EXTRA_REGS
 #if __SYSCALL_MASK == ~0
 	cmpq $__NR_syscall_max,%rax
 #else
@@ -460,7 +434,7 @@ int_very_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
 int_check_syscall_exit_work:
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	/* Check for syscall exit trace */
 	testl $_TIF_WORK_SYSCALL_EXIT,%edx
 	jz int_signal
@@ -479,7 +453,7 @@ int_signal:
 	call do_notify_resume
 1:	movl $_TIF_WORK_MASK,%edi
 int_restore_rest:
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	jmp int_with_check
@@ -489,15 +463,12 @@ END(system_call)
 	.macro FORK_LIKE func
 ENTRY(stub_\func)
 	CFI_STARTPROC
-	popq	%r11			/* save return address */
-	PARTIAL_FRAME 0
-	SAVE_REST
-	pushq	%r11			/* put it back on stack */
+	DEFAULT_FRAME 0, 8		/* offset 8: return address */
+	SAVE_EXTRA_REGS 8
 	FIXUP_TOP_OF_STACK %r11, 8
-	DEFAULT_FRAME 0 8		/* offset 8: return address */
 	call sys_\func
 	RESTORE_TOP_OF_STACK %r11, 8
-	ret $REST_SKIP		/* pop extended registers */
+	ret
 	CFI_ENDPROC
 END(stub_\func)
 	.endm
@@ -505,7 +476,7 @@ END(stub_\func)
 	.macro FIXED_FRAME label,func
 ENTRY(\label)
 	CFI_STARTPROC
-	PARTIAL_FRAME 0 8		/* offset 8: return address */
+	DEFAULT_FRAME 0, 8		/* offset 8: return address */
 	FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET
 	call \func
 	RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET
@@ -522,12 +493,12 @@ END(\label)
 ENTRY(stub_execve)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call sys_execve
 	movq %rax,RAX(%rsp)
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_execve)
@@ -535,13 +506,13 @@ END(stub_execve)
 ENTRY(stub_execveat)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call sys_execveat
 	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_execveat)
@@ -553,12 +524,12 @@ END(stub_execveat)
 ENTRY(stub_rt_sigreturn)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call sys_rt_sigreturn
 	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_rt_sigreturn)
@@ -567,12 +538,12 @@ END(stub_rt_sigreturn)
 ENTRY(stub_x32_rt_sigreturn)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call sys32_x32_rt_sigreturn
 	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_x32_rt_sigreturn)
@@ -580,13 +551,13 @@ END(stub_x32_rt_sigreturn)
 ENTRY(stub_x32_execve)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call compat_sys_execve
 	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_x32_execve)
@@ -594,13 +565,13 @@ END(stub_x32_execve)
 ENTRY(stub_x32_execveat)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call compat_sys_execveat
 	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_x32_execveat)
@@ -656,42 +627,28 @@ END(interrupt)
 
 /* 0(%rsp): ~(interrupt number) */
 	.macro interrupt func
-	/* reserve pt_regs for scratch regs and rbp */
-	subq $ORIG_RAX-RBP, %rsp
-	CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
 	cld
-	/* start from rbp in pt_regs and jump over */
-	movq_cfi rdi, (RDI-RBP)
-	movq_cfi rsi, (RSI-RBP)
-	movq_cfi rdx, (RDX-RBP)
-	movq_cfi rcx, (RCX-RBP)
-	movq_cfi rax, (RAX-RBP)
-	movq_cfi  r8,  (R8-RBP)
-	movq_cfi  r9,  (R9-RBP)
-	movq_cfi r10, (R10-RBP)
-	movq_cfi r11, (R11-RBP)
-
-	/* Save rbp so that we can unwind from get_irq_regs() */
-	movq_cfi rbp, 0
-
-	/* Save previous stack value */
-	movq %rsp, %rsi
+	ALLOC_PT_GPREGS_ON_STACK -RBP
+	SAVE_C_REGS -RBP
+	/* this goes to 0(%rsp) for unwinder, not for saving the value: */
+	SAVE_EXTRA_REGS_RBP -RBP
+
+	leaq -RBP(%rsp),%rdi	/* arg1 for \func (pointer to pt_regs) */
 
-	leaq -RBP(%rsp),%rdi	/* arg1 for handler */
-	testl $3, CS-RBP(%rsi)
+	testl $3, CS-RBP(%rsp)
 	je 1f
 	SWAPGS
+1:
 	/*
 	 * irq_count is used to check if a CPU is already on an interrupt stack
 	 * or not. While this is essentially redundant with preempt_count it is
 	 * a little cheaper to use a separate counter in the PDA (short of
 	 * moving irq_enter into assembly, which would be too much work)
 	 */
-1:	incl PER_CPU_VAR(irq_count)
+	movq %rsp, %rsi
+	incl PER_CPU_VAR(irq_count)
 	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
 	CFI_DEF_CFA_REGISTER	rsi
-
-	/* Store previous stack value */
 	pushq %rsi
 	CFI_ESCAPE	0x0f /* DW_CFA_def_cfa_expression */, 6, \
 			0x77 /* DW_OP_breg7 */, 0, \
@@ -800,7 +757,8 @@ retint_swapgs:		/* return to user-space */
 	 */
 irq_return_via_sysret:
 	CFI_REMEMBER_STATE
-	RESTORE_ARGS 1,8,1
+	RESTORE_C_REGS
+	REMOVE_PT_GPREGS_FROM_STACK 8
 	movq (RSP-RIP)(%rsp),%rsp
 	USERGS_SYSRET64
 	CFI_RESTORE_STATE
@@ -816,7 +774,8 @@ retint_restore_args:	/* return to kernel space */
 	 */
 	TRACE_IRQS_IRETQ
 restore_args:
-	RESTORE_ARGS 1,8,1
+	RESTORE_C_REGS
+	REMOVE_PT_GPREGS_FROM_STACK 8
 
 irq_return:
 	INTERRUPT_RETURN
@@ -887,12 +846,12 @@ retint_signal:
 	jz    retint_swapgs
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	movq $-1,ORIG_RAX(%rsp)
 	xorl %esi,%esi		# oldset
 	movq %rsp,%rdi		# &pt_regs
 	call do_notify_resume
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)
@@ -1019,8 +978,7 @@ ENTRY(\sym)
 	pushq_cfi $-1			/* ORIG_RAX: no syscall to restart */
 	.endif
 
-	subq $ORIG_RAX-R15, %rsp
-	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+	ALLOC_PT_GPREGS_ON_STACK
 
 	.if \paranoid
 	.if \paranoid == 1
@@ -1266,7 +1224,9 @@ ENTRY(xen_failsafe_callback)
 	addq $0x30,%rsp
 	CFI_ADJUST_CFA_OFFSET -0x30
 	pushq_cfi $-1 /* orig_ax = -1 => not a system call */
-	SAVE_ALL
+	ALLOC_PT_GPREGS_ON_STACK
+	SAVE_C_REGS
+	SAVE_EXTRA_REGS
 	jmp error_exit
 	CFI_ENDPROC
 END(xen_failsafe_callback)
@@ -1318,11 +1278,15 @@ ENTRY(paranoid_exit)
 	jnz paranoid_restore
 	TRACE_IRQS_IRETQ 0
 	SWAPGS_UNSAFE_STACK
-	RESTORE_ALL 8
+	RESTORE_EXTRA_REGS
+	RESTORE_C_REGS
+	REMOVE_PT_GPREGS_FROM_STACK 8
 	INTERRUPT_RETURN
 paranoid_restore:
 	TRACE_IRQS_IRETQ_DEBUG 0
-	RESTORE_ALL 8
+	RESTORE_EXTRA_REGS
+	RESTORE_C_REGS
+	REMOVE_PT_GPREGS_FROM_STACK 8
 	INTERRUPT_RETURN
 	CFI_ENDPROC
 END(paranoid_exit)
@@ -1336,21 +1300,8 @@ ENTRY(error_entry)
 	CFI_ADJUST_CFA_OFFSET 15*8
 	/* oldrax contains error code */
 	cld
-	movq %rdi, RDI+8(%rsp)
-	movq %rsi, RSI+8(%rsp)
-	movq %rdx, RDX+8(%rsp)
-	movq %rcx, RCX+8(%rsp)
-	movq %rax, RAX+8(%rsp)
-	movq  %r8,  R8+8(%rsp)
-	movq  %r9,  R9+8(%rsp)
-	movq %r10, R10+8(%rsp)
-	movq %r11, R11+8(%rsp)
-	movq_cfi rbx, RBX+8
-	movq %rbp, RBP+8(%rsp)
-	movq %r12, R12+8(%rsp)
-	movq %r13, R13+8(%rsp)
-	movq %r14, R14+8(%rsp)
-	movq %r15, R15+8(%rsp)
+	SAVE_C_REGS 8
+	SAVE_EXTRA_REGS 8
 	xorl %ebx,%ebx
 	testl $3,CS+8(%rsp)
 	je error_kernelspace
@@ -1399,7 +1350,7 @@ END(error_entry)
 ENTRY(error_exit)
 	DEFAULT_FRAME
 	movl %ebx,%eax
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)
@@ -1618,8 +1569,8 @@ end_repeat_nmi:
 	 * so that we repeat another NMI.
 	 */
 	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
-	subq $ORIG_RAX-R15, %rsp
-	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+	ALLOC_PT_GPREGS_ON_STACK
+
 	/*
 	 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
 	 * as we should not be calling schedule in NMI context.
@@ -1658,8 +1609,10 @@ end_repeat_nmi:
 nmi_swapgs:
 	SWAPGS_UNSAFE_STACK
 nmi_restore:
+	RESTORE_EXTRA_REGS
+	RESTORE_C_REGS
 	/* Pop the extra iret frame at once */
-	RESTORE_ALL 6*8
+	REMOVE_PT_GPREGS_FROM_STACK 6*8
 
 	/* Clear the NMI executing stack variable */
 	movq $0, 5*8(%rsp)
-- 
1.8.1.4


^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-25 18:42     ` Denys Vlasenko
@ 2015-02-25 19:59       ` Andrey Wagin
  2015-02-25 20:10         ` Andy Lutomirski
  0 siblings, 1 reply; 130+ messages in thread
From: Andrey Wagin @ 2015-02-25 19:59 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Andy Lutomirski, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, LKML

2015-02-25 21:42 GMT+03:00 Denys Vlasenko <dvlasenk@redhat.com>:
> On 02/25/2015 01:37 PM, Andrey Wagin wrote:
>> 2015-02-13 0:54 GMT+03:00 Denys Vlasenko <dvlasenk@redhat.com>:
>>> 64-bit code was using six stack slots less by not saving/restoring
>>> registers which are callee-preserved according to C ABI,
>>> and not allocating space for them.
>>> Only when syscall needed a complete "struct pt_regs",
>>> the complete area was allocated and filled in.
>>> As an additional twist, on interrupt entry a "slightly less truncated pt_regs"
>>> trick is used, to make nested interrupt stacks easier to unwind.
>>>
>>> This proved to be a source of significant obfuscation and subtle bugs.
>>> For example, stub_fork had to pop the return address,
>>> extend the struct, save registers, and push return address back. Ugly.
>>> ia32_ptregs_common pops return address and "returns" via jmp insn,
>>> throwing a wrench into CPU return stack cache.
>>>
>>> This patch changes code to always allocate a complete "struct pt_regs".
>>> The saving of registers is still done lazily.
>>>
>>> "Partial pt_regs" trick on interrupt stack is retained.
>>>
>>> Macros which manipulate "struct pt_regs" on stack are reworked:
>>> ALLOC_PT_GPREGS_ON_STACK allocates the structure.
>>> SAVE_C_REGS saves to it those registers which are clobbered by C code.
>>> SAVE_EXTRA_REGS saves to it all other registers.
>>> Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros reverse it.
>>>
>>> ia32_ptregs_common, stub_fork and friends lost their ugly dance with
>>> return pointer.
>>>
>>> LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
>>> instead of magic numbers.
>>>
>>> error_entry and save_paranoid now use SAVE_C_REGS + SAVE_EXTRA_REGS
>>> instead of having it open-coded yet again.
>>>
>>> Patch was run-tested: 64-bit executables, 32-bit executables,
>>> strace works.
>>> Timing tests did not show measurable difference in 32-bit
>>> and 64-bit syscalls.
>>
>> Hello Denys,
>>
>> My test vm doesn't boot with this patch. Could you help to investigate
>> this issue?
>
> I think I found it. This part of my patch is possibly wrong:
>
> @@ -171,9 +171,9 @@ static inline int arch_irqs_disabled(void)
>  #define ARCH_LOCKDEP_SYS_EXIT_IRQ      \
>         TRACE_IRQS_ON; \
>         sti; \
> -       SAVE_REST; \
> +       SAVE_EXTRA_REGS; \
>         LOCKDEP_SYS_EXIT; \
> -       RESTORE_REST; \
> +       RESTORE_EXTRA_REGS; \
>         cli; \
>         TRACE_IRQS_OFF;
>
> The "SAVE_REST" here is intended to really *push* extra regs on stack,
> but the patch changed it so that they are written to existing stack
> slots above.
>
> From code inspection it should work in almost all cases, but some
> locations where it is used are really obscure.
>
> If there are places where *pushing* regs is really necessary,
> this can corrupt rbp,rbx,r12-15 registers.
>
> Your config has CONFIG_LOCKDEP=y, I think it's worth trying whether the bug
> was here.
> Please find updated patch attached. Can you try it?

It doesn't work

[    2.282198] microcode: CPU0 sig=0x623, pf=0x0, revision=0x1
[    2.288321] microcode: CPU1 sig=0x623, pf=0x0, revision=0x1
[    2.289139] microcode: CPU2 sig=0x623, pf=0x0, revision=0x1
[    2.290618] microcode: CPU3 sig=0x623, pf=0x0, revision=0x1
[    2.292417] microcode: Microcode Update Driver: v2.00
<tigran@aivazian.fsnet.co.uk>, Peter Oruba
[    2.392592] piix4_smbus 0000:00:01.3: SMBus Host Controller at
0xb100, revision 0
[    2.510882] systemd-fsck[349]: /dev/sda1: clean, 343/128016 files,
166911/512000 blocks
[    2.521463] Adding 4128764k swap on /dev/sda2.  Priority:-1
extents:1 across:4128764k FS
[    2.573597] EXT4-fs (sda1): mounted filesystem with ordered data
mode. Opts: (null)
[    2.597771] systemd-journald[283]: Received request to flush
runtime journal from PID 1
[    2.802288] audit: type=1305 audit(1424892361.629:3): audit_pid=369
old=0 auid=4294967295 ses=4294967295 res=1
[    2.819660] traps: systemd-cgroups[380] general protection
ip:7fb227939028 sp:7fff6bac59c8 error:0 in
ld-2.18.so[7fb227921000+20000]
[    3.016262] traps: systemd-cgroups[390] general protection
ip:7f456f7b6028 sp:7fffdc059718 error:0 in
ld-2.18.so[7f456f79e000+20000]

>
> --
> vda
>

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-25 19:59       ` Andrey Wagin
@ 2015-02-25 20:10         ` Andy Lutomirski
  2015-02-25 21:28           ` Denys Vlasenko
  0 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-25 20:10 UTC (permalink / raw)
  To: Andrey Wagin, Ingo Molnar
  Cc: Denys Vlasenko, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, LKML

On Wed, Feb 25, 2015 at 11:59 AM, Andrey Wagin <avagin@gmail.com> wrote:
> 2015-02-25 21:42 GMT+03:00 Denys Vlasenko <dvlasenk@redhat.com>:
>> On 02/25/2015 01:37 PM, Andrey Wagin wrote:
>>> 2015-02-13 0:54 GMT+03:00 Denys Vlasenko <dvlasenk@redhat.com>:
>>>> 64-bit code was using six stack slots less by not saving/restoring
>>>> registers which are callee-preserved according to C ABI,
>>>> and not allocating space for them.
>>>> Only when syscall needed a complete "struct pt_regs",
>>>> the complete area was allocated and filled in.
>>>> As an additional twist, on interrupt entry a "slightly less truncated pt_regs"
>>>> trick is used, to make nested interrupt stacks easier to unwind.
>>>>
>>>> This proved to be a source of significant obfuscation and subtle bugs.
>>>> For example, stub_fork had to pop the return address,
>>>> extend the struct, save registers, and push return address back. Ugly.
>>>> ia32_ptregs_common pops return address and "returns" via jmp insn,
>>>> throwing a wrench into CPU return stack cache.
>>>>
>>>> This patch changes code to always allocate a complete "struct pt_regs".
>>>> The saving of registers is still done lazily.
>>>>
>>>> "Partial pt_regs" trick on interrupt stack is retained.
>>>>
>>>> Macros which manipulate "struct pt_regs" on stack are reworked:
>>>> ALLOC_PT_GPREGS_ON_STACK allocates the structure.
>>>> SAVE_C_REGS saves to it those registers which are clobbered by C code.
>>>> SAVE_EXTRA_REGS saves to it all other registers.
>>>> Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros reverse it.
>>>>
>>>> ia32_ptregs_common, stub_fork and friends lost their ugly dance with
>>>> return pointer.
>>>>
>>>> LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
>>>> instead of magic numbers.
>>>>
>>>> error_entry and save_paranoid now use SAVE_C_REGS + SAVE_EXTRA_REGS
>>>> instead of having it open-coded yet again.
>>>>
>>>> Patch was run-tested: 64-bit executables, 32-bit executables,
>>>> strace works.
>>>> Timing tests did not show measurable difference in 32-bit
>>>> and 64-bit syscalls.
>>>
>>> Hello Denys,
>>>
>>> My test vm doesn't boot with this patch. Could you help to investigate
>>> this issue?
>>
>> I think I found it. This part of my patch is possibly wrong:
>>
>> @@ -171,9 +171,9 @@ static inline int arch_irqs_disabled(void)
>>  #define ARCH_LOCKDEP_SYS_EXIT_IRQ      \
>>         TRACE_IRQS_ON; \
>>         sti; \
>> -       SAVE_REST; \
>> +       SAVE_EXTRA_REGS; \
>>         LOCKDEP_SYS_EXIT; \
>> -       RESTORE_REST; \
>> +       RESTORE_EXTRA_REGS; \
>>         cli; \
>>         TRACE_IRQS_OFF;
>>
>> The "SAVE_REST" here is intended to really *push* extra regs on stack,
>> but the patch changed it so that they are written to existing stack
>> slots above.
>>
>> From code inspection it should work in almost all cases, but some
>> locations where it is used are really obscure.
>>
>> If there are places where *pushing* regs is really necessary,
>> this can corrupt rbp,rbx,r12-15 registers.
>>
>> Your config has CONFIG_LOCKDEP=y, I think it's worth trying whether the bug
>> was here.
>> Please find updated patch attached. Can you try it?
>
> It doesn't work
>
> [    2.282198] microcode: CPU0 sig=0x623, pf=0x0, revision=0x1
> [    2.288321] microcode: CPU1 sig=0x623, pf=0x0, revision=0x1
> [    2.289139] microcode: CPU2 sig=0x623, pf=0x0, revision=0x1
> [    2.290618] microcode: CPU3 sig=0x623, pf=0x0, revision=0x1
> [    2.292417] microcode: Microcode Update Driver: v2.00
> <tigran@aivazian.fsnet.co.uk>, Peter Oruba
> [    2.392592] piix4_smbus 0000:00:01.3: SMBus Host Controller at
> 0xb100, revision 0
> [    2.510882] systemd-fsck[349]: /dev/sda1: clean, 343/128016 files,
> 166911/512000 blocks
> [    2.521463] Adding 4128764k swap on /dev/sda2.  Priority:-1
> extents:1 across:4128764k FS
> [    2.573597] EXT4-fs (sda1): mounted filesystem with ordered data
> mode. Opts: (null)
> [    2.597771] systemd-journald[283]: Received request to flush
> runtime journal from PID 1
> [    2.802288] audit: type=1305 audit(1424892361.629:3): audit_pid=369
> old=0 auid=4294967295 ses=4294967295 res=1
> [    2.819660] traps: systemd-cgroups[380] general protection
> ip:7fb227939028 sp:7fff6bac59c8 error:0 in
> ld-2.18.so[7fb227921000+20000]
> [    3.016262] traps: systemd-cgroups[390] general protection
> ip:7f456f7b6028 sp:7fffdc059718 error:0 in
> ld-2.18.so[7f456f79e000+20000]

The change to stub_\func looks wrong to me.  It saves and restores
regs, but those regs might already have been saved if we're on the
slow path.  (Yes, all that code is quite buggy even without all these
patches.)  So is execve.

This means that, for example, execve called in the slow path will
save/restore regs twice.  If the values in the regs after the first
save and before the second save are different, then we corrupt user
state.

I think that the changes to all the stub-like things should be reverted.

--Andy

>
>>
>> --
>> vda
>>



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-25 20:10         ` Andy Lutomirski
@ 2015-02-25 21:28           ` Denys Vlasenko
  2015-02-25 21:59             ` Andy Lutomirski
  0 siblings, 1 reply; 130+ messages in thread
From: Denys Vlasenko @ 2015-02-25 21:28 UTC (permalink / raw)
  To: Andy Lutomirski, Andrey Wagin, Ingo Molnar
  Cc: Linus Torvalds, Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, LKML

On 02/25/2015 09:10 PM, Andy Lutomirski wrote:
> On Wed, Feb 25, 2015 at 11:59 AM, Andrey Wagin <avagin@gmail.com> wrote:
>> 2015-02-25 21:42 GMT+03:00 Denys Vlasenko <dvlasenk@redhat.com>:
>>> On 02/25/2015 01:37 PM, Andrey Wagin wrote:
>>>> 2015-02-13 0:54 GMT+03:00 Denys Vlasenko <dvlasenk@redhat.com>:
>>>>> 64-bit code was using six stack slots less by not saving/restoring
>>>>> registers which are callee-preserved according to C ABI,
>>>>> and not allocating space for them.
>>>>> Only when syscall needed a complete "struct pt_regs",
>>>>> the complete area was allocated and filled in.
>>>>> As an additional twist, on interrupt entry a "slightly less truncated pt_regs"
>>>>> trick is used, to make nested interrupt stacks easier to unwind.
>>>>>
>>>>> This proved to be a source of significant obfuscation and subtle bugs.
>>>>> For example, stub_fork had to pop the return address,
>>>>> extend the struct, save registers, and push return address back. Ugly.
>>>>> ia32_ptregs_common pops return address and "returns" via jmp insn,
>>>>> throwing a wrench into CPU return stack cache.
>>>>>
>>>>> This patch changes code to always allocate a complete "struct pt_regs".
>>>>> The saving of registers is still done lazily.
>>>>>
>>>>> "Partial pt_regs" trick on interrupt stack is retained.
>>>>>
>>>>> Macros which manipulate "struct pt_regs" on stack are reworked:
>>>>> ALLOC_PT_GPREGS_ON_STACK allocates the structure.
>>>>> SAVE_C_REGS saves to it those registers which are clobbered by C code.
>>>>> SAVE_EXTRA_REGS saves to it all other registers.
>>>>> Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros reverse it.
>>>>>
>>>>> ia32_ptregs_common, stub_fork and friends lost their ugly dance with
>>>>> return pointer.
>>>>>
>>>>> LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
>>>>> instead of magic numbers.
>>>>>
>>>>> error_entry and save_paranoid now use SAVE_C_REGS + SAVE_EXTRA_REGS
>>>>> instead of having it open-coded yet again.
>>>>>
>>>>> Patch was run-tested: 64-bit executables, 32-bit executables,
>>>>> strace works.
>>>>> Timing tests did not show measurable difference in 32-bit
>>>>> and 64-bit syscalls.
>>>>
>>>> Hello Denys,
>>>>
>>>> My test vm doesn't boot with this patch. Could you help to investigate
>>>> this issue?
>>>
>>> I think I found it. This part of my patch is possibly wrong:
>>>
>>> @@ -171,9 +171,9 @@ static inline int arch_irqs_disabled(void)
>>>  #define ARCH_LOCKDEP_SYS_EXIT_IRQ      \
>>>         TRACE_IRQS_ON; \
>>>         sti; \
>>> -       SAVE_REST; \
>>> +       SAVE_EXTRA_REGS; \
>>>         LOCKDEP_SYS_EXIT; \
>>> -       RESTORE_REST; \
>>> +       RESTORE_EXTRA_REGS; \
>>>         cli; \
>>>         TRACE_IRQS_OFF;
>>>
>>> The "SAVE_REST" here is intended to really *push* extra regs on stack,
>>> but the patch changed it so that they are written to existing stack
>>> slots above.
>>>
>>> From code inspection it should work in almost all cases, but some
>>> locations where it is used are really obscure.
>>>
>>> If there are places where *pushing* regs is really necessary,
>>> this can corrupt rbp,rbx,r12-15 registers.
>>>
>>> Your config has CONFIG_LOCKDEP=y, I think it's worth trying whether the bug
>>> was here.
>>> Please find updated patch attached. Can you try it?
>>
>> It doesn't work

Thanks for testing it anyway.


>> [    3.016262] traps: systemd-cgroups[390] general protection
>> ip:7f456f7b6028 sp:7fffdc059718 error:0 in
>> ld-2.18.so[7f456f79e000+20000]

This is what I know about these crashes. The SEGV itself is caused by
HLT instruction executed by dynamic loader, ld-2.NN.so.
The instruction is in _exit function, and is only reachable if
exit_group and exit syscalls fail to terminate the process.
So it seems that syscall execution is getting badly broken somehow
at some point.

This happens to both reporters.

My theory that it is related to lockdep seems to be wrong, because
Sabrina's kernel is not lockdep-enabled, yet it sees the same failure.

Both kernels are paravirtualized, both are booted under KVM,
Andrey runs it with four virtual CPUs, Sabrina runs with two.

My next theory is that I missed something related to paravirt.
I am looking at that code, so far I don't see anything suspicious.

Unfortunately, it doesn't happen to me: I have Sabrina's bzImage,
I run it under "qemu-system-x86_64 -enable-kvm -smp 2",
I see in dmesg that kernel does detect that it is being run under KVM,
but it works for me. No mysterious segfaults.

Andrey, can you send me your bzImage? Maybe it will trigger
the problem for me.


> The change to stub_\func looks wrong to me.  It saves and restores
> regs, but those regs might already have been saved if we're on the
> slow path.  (Yes, all that code is quite buggy even without all these
> patches.)  So is execve.
> 
> This means that, for example, execve called in the slow path will
> save/restore regs twice.  If the values in the regs after the first
> save and before the second save are different, then we corrupt user
> state.

This part?

        .macro FORK_LIKE func
 ENTRY(stub_\func)
        CFI_STARTPROC
-       popq    %r11                    /* save return address */
-       PARTIAL_FRAME 0
-       SAVE_REST
-       pushq   %r11                    /* put it back on stack */
+       DEFAULT_FRAME 0, 8              /* offset 8: return address */
+       SAVE_EXTRA_REGS 8
        FIXUP_TOP_OF_STACK %r11, 8
-       DEFAULT_FRAME 0 8               /* offset 8: return address */
        call sys_\func
        RESTORE_TOP_OF_STACK %r11, 8
-       ret $REST_SKIP          /* pop extended registers */
+       ret
        CFI_ENDPROC
 END(stub_\func)
        .endm

        FORK_LIKE  clone
        FORK_LIKE  fork
        FORK_LIKE  vfork

But the old code (SAVE_REST thing) was also saving registers here.
It had to jump through hoops (pop return address, SAVE_REST,
push return address) to do that.
After the patch, "SAVE_EXTRA_REGS 8" does the same, just without
pop/push pair.

I just don't see what's wrong with it. Can you elaborate?

And this area of code has no paravirt gunk, so if the bug is here,
why it doesn't fail for people running this natively?

-- 
vda


^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-25 21:28           ` Denys Vlasenko
@ 2015-02-25 21:59             ` Andy Lutomirski
  2015-02-25 22:40               ` Sabrina Dubroca
  2015-02-26  9:55               ` Denys Vlasenko
  0 siblings, 2 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-25 21:59 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Andrey Wagin, Ingo Molnar, Linus Torvalds, Oleg Nesterov,
	Borislav Petkov, H. Peter Anvin, Frederic Weisbecker, X86 ML,
	Alexei Starovoitov, Will Drewry, Kees Cook, LKML

On Wed, Feb 25, 2015 at 1:28 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> On 02/25/2015 09:10 PM, Andy Lutomirski wrote:
>> On Wed, Feb 25, 2015 at 11:59 AM, Andrey Wagin <avagin@gmail.com> wrote:
>>> 2015-02-25 21:42 GMT+03:00 Denys Vlasenko <dvlasenk@redhat.com>:
>>>> On 02/25/2015 01:37 PM, Andrey Wagin wrote:
>>>>> 2015-02-13 0:54 GMT+03:00 Denys Vlasenko <dvlasenk@redhat.com>:
>>>>>> 64-bit code was using six stack slots less by not saving/restoring
>>>>>> registers which are callee-preserved according to C ABI,
>>>>>> and not allocating space for them.
>>>>>> Only when syscall needed a complete "struct pt_regs",
>>>>>> the complete area was allocated and filled in.
>>>>>> As an additional twist, on interrupt entry a "slightly less truncated pt_regs"
>>>>>> trick is used, to make nested interrupt stacks easier to unwind.
>>>>>>
>>>>>> This proved to be a source of significant obfuscation and subtle bugs.
>>>>>> For example, stub_fork had to pop the return address,
>>>>>> extend the struct, save registers, and push return address back. Ugly.
>>>>>> ia32_ptregs_common pops return address and "returns" via jmp insn,
>>>>>> throwing a wrench into CPU return stack cache.
>>>>>>
>>>>>> This patch changes code to always allocate a complete "struct pt_regs".
>>>>>> The saving of registers is still done lazily.
>>>>>>
>>>>>> "Partial pt_regs" trick on interrupt stack is retained.
>>>>>>
>>>>>> Macros which manipulate "struct pt_regs" on stack are reworked:
>>>>>> ALLOC_PT_GPREGS_ON_STACK allocates the structure.
>>>>>> SAVE_C_REGS saves to it those registers which are clobbered by C code.
>>>>>> SAVE_EXTRA_REGS saves to it all other registers.
>>>>>> Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros reverse it.
>>>>>>
>>>>>> ia32_ptregs_common, stub_fork and friends lost their ugly dance with
>>>>>> return pointer.
>>>>>>
>>>>>> LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
>>>>>> instead of magic numbers.
>>>>>>
>>>>>> error_entry and save_paranoid now use SAVE_C_REGS + SAVE_EXTRA_REGS
>>>>>> instead of having it open-coded yet again.
>>>>>>
>>>>>> Patch was run-tested: 64-bit executables, 32-bit executables,
>>>>>> strace works.
>>>>>> Timing tests did not show measurable difference in 32-bit
>>>>>> and 64-bit syscalls.
>>>>>
>>>>> Hello Denys,
>>>>>
>>>>> My test vm doesn't boot with this patch. Could you help to investigate
>>>>> this issue?
>>>>
>>>> I think I found it. This part of my patch is possibly wrong:
>>>>
>>>> @@ -171,9 +171,9 @@ static inline int arch_irqs_disabled(void)
>>>>  #define ARCH_LOCKDEP_SYS_EXIT_IRQ      \
>>>>         TRACE_IRQS_ON; \
>>>>         sti; \
>>>> -       SAVE_REST; \
>>>> +       SAVE_EXTRA_REGS; \
>>>>         LOCKDEP_SYS_EXIT; \
>>>> -       RESTORE_REST; \
>>>> +       RESTORE_EXTRA_REGS; \
>>>>         cli; \
>>>>         TRACE_IRQS_OFF;
>>>>
>>>> The "SAVE_REST" here is intended to really *push* extra regs on stack,
>>>> but the patch changed it so that they are written to existing stack
>>>> slots above.
>>>>
>>>> From code inspection it should work in almost all cases, but some
>>>> locations where it is used are really obscure.
>>>>
>>>> If there are places where *pushing* regs is really necessary,
>>>> this can corrupt rbp,rbx,r12-15 registers.
>>>>
>>>> Your config has CONFIG_LOCKDEP=y, I think it's worth trying whether the bug
>>>> was here.
>>>> Please find updated patch attached. Can you try it?
>>>
>>> It doesn't work
>
> Thanks for testing it anyway.
>
>
>>> [    3.016262] traps: systemd-cgroups[390] general protection
>>> ip:7f456f7b6028 sp:7fffdc059718 error:0 in
>>> ld-2.18.so[7f456f79e000+20000]
>
> This is what I know about these crashes. The SEGV itself is caused by
> HLT instruction executed by dynamic loader, ld-2.NN.so.
> The instruction is in _exit function, and is only reachable if
> exit_group and exit syscalls fail to terminate the process.
> So it seems that syscall execution is getting badly broken somehow
> at some point.
>
> This happens to both reporters.
>
> My theory that it is related to lockdep seems to be wrong, because
> Sabrina's kernel is not lockdep-enabled, yet it sees the same failure.
>
> Both kernels are paravirtualized, both are booted under KVM,
> Andrey runs it with four virtual CPUs, Sabrina runs with two.
>
> My next theory is that I missed something related to paravirt.
> I am looking at that code, so far I don't see anything suspicious.
>
> Unfortunately, it doesn't happen to me: I have Sabrina's bzImage,
> I run it under "qemu-system-x86_64 -enable-kvm -smp 2",
> I see in dmesg that kernel does detect that it is being run under KVM,
> but it works for me. No mysterious segfaults.
>
> Andrey, can you send me your bzImage? Maybe it will trigger
> the problem for me.
>
>
>> The change to stub_\func looks wrong to me.  It saves and restores
>> regs, but those regs might already have been saved if we're on the
>> slow path.  (Yes, all that code is quite buggy even without all these
>> patches.)  So is execve.
>>
>> This means that, for example, execve called in the slow path will
>> save/restore regs twice.  If the values in the regs after the first
>> save and before the second save are different, then we corrupt user
>> state.
>
> This part?
>
>         .macro FORK_LIKE func
>  ENTRY(stub_\func)
>         CFI_STARTPROC
> -       popq    %r11                    /* save return address */
> -       PARTIAL_FRAME 0
> -       SAVE_REST
> -       pushq   %r11                    /* put it back on stack */
> +       DEFAULT_FRAME 0, 8              /* offset 8: return address */
> +       SAVE_EXTRA_REGS 8
>         FIXUP_TOP_OF_STACK %r11, 8
> -       DEFAULT_FRAME 0 8               /* offset 8: return address */
>         call sys_\func
>         RESTORE_TOP_OF_STACK %r11, 8
> -       ret $REST_SKIP          /* pop extended registers */
> +       ret
>         CFI_ENDPROC
>  END(stub_\func)
>         .endm
>
>         FORK_LIKE  clone
>         FORK_LIKE  fork
>         FORK_LIKE  vfork
>
> But the old code (SAVE_REST thing) was also saving registers here.
> It had to jump through hoops (pop return address, SAVE_REST,
> push return address) to do that.
> After the patch, "SAVE_EXTRA_REGS 8" does the same, just without
> pop/push pair.
>
> I just don't see what's wrong with it. Can you elaborate?

SAVE_REST pushed the regs onto the stack, whereas SAVE_EXTRA_REGS just
writes them in place.  It's possible for this to be called when the
regs have already been saved.

>
> And this area of code has no paravirt gunk, so if the bug is here,
> why it doesn't fail for people running this natively?

I don't know whether paravirt is involved.  It could be something else.

--Andy

>
> --
> vda
>



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-25 21:59             ` Andy Lutomirski
@ 2015-02-25 22:40               ` Sabrina Dubroca
  2015-02-25 23:34                 ` Sabrina Dubroca
  2015-02-26  9:55               ` Denys Vlasenko
  1 sibling, 1 reply; 130+ messages in thread
From: Sabrina Dubroca @ 2015-02-25 22:40 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Andrey Wagin, Ingo Molnar, Linus Torvalds,
	Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, LKML

2015-02-25, 13:59:06 -0800, Andy Lutomirski wrote:
> On Wed, Feb 25, 2015 at 1:28 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> > On 02/25/2015 09:10 PM, Andy Lutomirski wrote:
> >> On Wed, Feb 25, 2015 at 11:59 AM, Andrey Wagin <avagin@gmail.com> wrote:
> >>> 2015-02-25 21:42 GMT+03:00 Denys Vlasenko <dvlasenk@redhat.com>:
> >>>> On 02/25/2015 01:37 PM, Andrey Wagin wrote:
> >>>>> 2015-02-13 0:54 GMT+03:00 Denys Vlasenko <dvlasenk@redhat.com>:
> >>>>>> 64-bit code was using six stack slots less by not saving/restoring
> >>>>>> registers which are callee-preserved according to C ABI,
> >>>>>> and not allocating space for them.
> >>>>>> Only when syscall needed a complete "struct pt_regs",
> >>>>>> the complete area was allocated and filled in.
> >>>>>> As an additional twist, on interrupt entry a "slightly less truncated pt_regs"
> >>>>>> trick is used, to make nested interrupt stacks easier to unwind.
> >>>>>>
> >>>>>> This proved to be a source of significant obfuscation and subtle bugs.
> >>>>>> For example, stub_fork had to pop the return address,
> >>>>>> extend the struct, save registers, and push return address back. Ugly.
> >>>>>> ia32_ptregs_common pops return address and "returns" via jmp insn,
> >>>>>> throwing a wrench into CPU return stack cache.
> >>>>>>
> >>>>>> This patch changes code to always allocate a complete "struct pt_regs".
> >>>>>> The saving of registers is still done lazily.
> >>>>>>
> >>>>>> "Partial pt_regs" trick on interrupt stack is retained.
> >>>>>>
> >>>>>> Macros which manipulate "struct pt_regs" on stack are reworked:
> >>>>>> ALLOC_PT_GPREGS_ON_STACK allocates the structure.
> >>>>>> SAVE_C_REGS saves to it those registers which are clobbered by C code.
> >>>>>> SAVE_EXTRA_REGS saves to it all other registers.
> >>>>>> Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros reverse it.
> >>>>>>
> >>>>>> ia32_ptregs_common, stub_fork and friends lost their ugly dance with
> >>>>>> return pointer.
> >>>>>>
> >>>>>> LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
> >>>>>> instead of magic numbers.
> >>>>>>
> >>>>>> error_entry and save_paranoid now use SAVE_C_REGS + SAVE_EXTRA_REGS
> >>>>>> instead of having it open-coded yet again.
> >>>>>>
> >>>>>> Patch was run-tested: 64-bit executables, 32-bit executables,
> >>>>>> strace works.
> >>>>>> Timing tests did not show measurable difference in 32-bit
> >>>>>> and 64-bit syscalls.
> >>>>>
> >>>>> Hello Denys,
> >>>>>
> >>>>> My test vm doesn't boot with this patch. Could you help to investigate
> >>>>> this issue?
> >>>>
> >>>> I think I found it. This part of my patch is possibly wrong:
> >>>>
> >>>> @@ -171,9 +171,9 @@ static inline int arch_irqs_disabled(void)
> >>>>  #define ARCH_LOCKDEP_SYS_EXIT_IRQ      \
> >>>>         TRACE_IRQS_ON; \
> >>>>         sti; \
> >>>> -       SAVE_REST; \
> >>>> +       SAVE_EXTRA_REGS; \
> >>>>         LOCKDEP_SYS_EXIT; \
> >>>> -       RESTORE_REST; \
> >>>> +       RESTORE_EXTRA_REGS; \
> >>>>         cli; \
> >>>>         TRACE_IRQS_OFF;
> >>>>
> >>>> The "SAVE_REST" here is intended to really *push* extra regs on stack,
> >>>> but the patch changed it so that they are written to existing stack
> >>>> slots above.
> >>>>
> >>>> From code inspection it should work in almost all cases, but some
> >>>> locations where it is used are really obscure.
> >>>>
> >>>> If there are places where *pushing* regs is really necessary,
> >>>> this can corrupt rbp,rbx,r12-15 registers.
> >>>>
> >>>> Your config has CONFIG_LOCKDEP=y, I think it's worth trying whether the bug
> >>>> was here.
> >>>> Please find updated patch attached. Can you try it?
> >>>
> >>> It doesn't work
> >
> > Thanks for testing it anyway.
> >
> >
> >>> [    3.016262] traps: systemd-cgroups[390] general protection
> >>> ip:7f456f7b6028 sp:7fffdc059718 error:0 in
> >>> ld-2.18.so[7f456f79e000+20000]
> >
> > This is what I know about these crashes. The SEGV itself is caused by
> > HLT instruction executed by dynamic loader, ld-2.NN.so.
> > The instruction is in _exit function, and is only reachable if
> > exit_group and exit syscalls fail to terminate the process.
> > So it seems that syscall execution is getting badly broken somehow
> > at some point.
> >
> > This happens to both reporters.
> >
> > My theory that it is related to lockdep seems to be wrong, because
> > Sabrina's kernel is not lockdep-enabled, yet it sees the same failure.
> >
> > Both kernels are paravirtualized, both are booted under KVM,
> > Andrey runs it with four virtual CPUs, Sabrina runs with two.
> >
> > My next theory is that I missed something related to paravirt.
> > I am looking at that code, so far I don't see anything suspicious.
> >
> > Unfortunately, it doesn't happen to me: I have Sabrina's bzImage,
> > I run it under "qemu-system-x86_64 -enable-kvm -smp 2",
> > I see in dmesg that kernel does detect that it is being run under KVM,
> > but it works for me. No mysterious segfaults.
> >
> > Andrey, can you send me your bzImage? Maybe it will trigger
> > the problem for me.
> >
> >
> >> The change to stub_\func looks wrong to me.  It saves and restores
> >> regs, but those regs might already have been saved if we're on the
> >> slow path.  (Yes, all that code is quite buggy even without all these
> >> patches.)  So is execve.
> >>
> >> This means that, for example, execve called in the slow path will
> >> save/restore regs twice.  If the values in the regs after the first
> >> save and before the second save are different, then we corrupt user
> >> state.
> >
> > This part?
> >
> >         .macro FORK_LIKE func
> >  ENTRY(stub_\func)
> >         CFI_STARTPROC
> > -       popq    %r11                    /* save return address */
> > -       PARTIAL_FRAME 0
> > -       SAVE_REST
> > -       pushq   %r11                    /* put it back on stack */
> > +       DEFAULT_FRAME 0, 8              /* offset 8: return address */
> > +       SAVE_EXTRA_REGS 8
> >         FIXUP_TOP_OF_STACK %r11, 8
> > -       DEFAULT_FRAME 0 8               /* offset 8: return address */
> >         call sys_\func
> >         RESTORE_TOP_OF_STACK %r11, 8
> > -       ret $REST_SKIP          /* pop extended registers */
> > +       ret
> >         CFI_ENDPROC
> >  END(stub_\func)
> >         .endm
> >
> >         FORK_LIKE  clone
> >         FORK_LIKE  fork
> >         FORK_LIKE  vfork
> >
> > But the old code (SAVE_REST thing) was also saving registers here.
> > It had to jump through hoops (pop return address, SAVE_REST,
> > push return address) to do that.
> > After the patch, "SAVE_EXTRA_REGS 8" does the same, just without
> > pop/push pair.
> >
> > I just don't see what's wrong with it. Can you elaborate?
> 
> SAVE_REST pushed the regs onto the stack, whereas SAVE_EXTRA_REGS just
> writes them in place.  It's possible for this to be called when the
> regs have already been saved.
> 
> >
> > And this area of code has no paravirt gunk, so if the bug is here,
> > why it doesn't fail for people running this natively?
> 
> I don't know whether paravirt is involved.  It could be something else.

After reading Denys's last mail, I tried booting the same VM
 - with 1 cpu
 - without CONFIG_PARAVIRT
 - with x86_64_defconfig

and I still get the same traps in all 3 cases.

I can run some userspace programs, but I have no idea what would be
helpful.
I can also try booting a real machine with archlinux/systemd tomorrow.

-- 
Sabrina

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-25 22:40               ` Sabrina Dubroca
@ 2015-02-25 23:34                 ` Sabrina Dubroca
  2015-02-26  1:12                   ` Denys Vlasenko
  0 siblings, 1 reply; 130+ messages in thread
From: Sabrina Dubroca @ 2015-02-25 23:34 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Andrey Wagin, Ingo Molnar, Linus Torvalds,
	Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, LKML

2015-02-25, 23:40:55 +0100, Sabrina Dubroca wrote:
> I can run some userspace programs, but I have no idea what would be
> helpful.
> I can also try booting a real machine with archlinux/systemd tomorrow.

I got a good boot out of kernels that normally fail.  I booted
systemd's emergency shell and enabled a few services, in the same
order they normally start.  journald started cleanly, but after that,
every single command produced a "traps:" output and an "audit:" line.

I disabled systemd-journald (chmod -x, because `systemctl disable`
didn't really disable it), and now it boots, no "traps:" in the log.
If I run it, everything fails again (zsh has traps for simply pressing
enter on an empty cmd).

-- 
Sabrina

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-25 23:34                 ` Sabrina Dubroca
@ 2015-02-26  1:12                   ` Denys Vlasenko
  2015-02-26  5:18                     ` Andrew Morton
  0 siblings, 1 reply; 130+ messages in thread
From: Denys Vlasenko @ 2015-02-26  1:12 UTC (permalink / raw)
  To: Sabrina Dubroca
  Cc: Andy Lutomirski, Denys Vlasenko, Andrey Wagin, Ingo Molnar,
	Linus Torvalds, Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, LKML

On Thu, Feb 26, 2015 at 12:34 AM, Sabrina Dubroca <sd@queasysnail.net> wrote:
> 2015-02-25, 23:40:55 +0100, Sabrina Dubroca wrote:
>> I can run some userspace programs, but I have no idea what would be
>> helpful.
>> I can also try booting a real machine with archlinux/systemd tomorrow.
>
> I got a good boot out of kernels that normally fail.  I booted
> systemd's emergency shell and enabled a few services, in the same
> order they normally start.  journald started cleanly, but after that,
> every single command produced a "traps:" output and an "audit:" line.
>
> I disabled systemd-journald (chmod -x, because `systemctl disable`
> didn't really disable it), and now it boots, no "traps:" in the log.
> If I run it, everything fails again (zsh has traps for simply pressing
> enter on an empty cmd).

That's some progress!

It's strange how one process manages to affect everything else.

"If I run it, everything fails again". How do you run it? Directly,
or via systemd services mechanism?
If you just run it directly, can you try running it under
"strace -f -tt -oLOG"? Does it have the same effect? What's in the LOG?

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-26  1:12                   ` Denys Vlasenko
@ 2015-02-26  5:18                     ` Andrew Morton
  2015-02-26  6:25                       ` Stephen Rothwell
  0 siblings, 1 reply; 130+ messages in thread
From: Andrew Morton @ 2015-02-26  5:18 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Sabrina Dubroca, Andy Lutomirski, Denys Vlasenko, Andrey Wagin,
	Ingo Molnar, Linus Torvalds, Oleg Nesterov, Borislav Petkov,
	H. Peter Anvin, Frederic Weisbecker, X86 ML, Alexei Starovoitov,
	Will Drewry, Kees Cook, LKML

On Thu, 26 Feb 2015 02:12:57 +0100 Denys Vlasenko <vda.linux@googlemail.com> wrote:

> On Thu, Feb 26, 2015 at 12:34 AM, Sabrina Dubroca <sd@queasysnail.net> wrote:
> > 2015-02-25, 23:40:55 +0100, Sabrina Dubroca wrote:
> >> I can run some userspace programs, but I have no idea what would be
> >> helpful.
> >> I can also try booting a real machine with archlinux/systemd tomorrow.
> >
> > I got a good boot out of kernels that normally fail.  I booted
> > systemd's emergency shell and enabled a few services, in the same
> > order they normally start.  journald started cleanly, but after that,
> > every single command produced a "traps:" output and an "audit:" line.
> >
> > I disabled systemd-journald (chmod -x, because `systemctl disable`
> > didn't really disable it), and now it boots, no "traps:" in the log.
> > If I run it, everything fails again (zsh has traps for simply pressing
> > enter on an empty cmd).
> 
> That's some progress!
> 
> It's strange how one process manages to affect everything else.
> 
> "If I run it, everything fails again". How do you run it? Directly,
> or via systemd services mechanism?
> If you just run it directly, can you try running it under
> "strace -f -tt -oLOG"? Does it have the same effect? What's in the LOG?

I'm hitting this bug as well, bisected to this commit.  On an old
x64_64 box, no vms, paravirt, etc.  Running FC6 userspace (heh).

Quite late in initscripts, binaries start getting segmentation faults
and init gives up.  Seems to only affect /usr/bin/rhgb-client.  There's
one instance where /bin/rm is said to segfault, but I suspect that's
init lying to me.



^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-26  5:18                     ` Andrew Morton
@ 2015-02-26  6:25                       ` Stephen Rothwell
  0 siblings, 0 replies; 130+ messages in thread
From: Stephen Rothwell @ 2015-02-26  6:25 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Denys Vlasenko, Sabrina Dubroca, Andy Lutomirski, Denys Vlasenko,
	Andrey Wagin, Ingo Molnar, Linus Torvalds, Oleg Nesterov,
	Borislav Petkov, H. Peter Anvin, Frederic Weisbecker, X86 ML,
	Alexei Starovoitov, Will Drewry, Kees Cook, LKML

[-- Attachment #1: Type: text/plain, Size: 2009 bytes --]

Hi all,

On Wed, 25 Feb 2015 21:18:52 -0800 Andrew Morton <akpm@linux-foundation.org> wrote:
>
> On Thu, 26 Feb 2015 02:12:57 +0100 Denys Vlasenko <vda.linux@googlemail.com> wrote:
> 
> > On Thu, Feb 26, 2015 at 12:34 AM, Sabrina Dubroca <sd@queasysnail.net> wrote:
> > > 2015-02-25, 23:40:55 +0100, Sabrina Dubroca wrote:
> > >> I can run some userspace programs, but I have no idea what would be
> > >> helpful.
> > >> I can also try booting a real machine with archlinux/systemd tomorrow.
> > >
> > > I got a good boot out of kernels that normally fail.  I booted
> > > systemd's emergency shell and enabled a few services, in the same
> > > order they normally start.  journald started cleanly, but after that,
> > > every single command produced a "traps:" output and an "audit:" line.
> > >
> > > I disabled systemd-journald (chmod -x, because `systemctl disable`
> > > didn't really disable it), and now it boots, no "traps:" in the log.
> > > If I run it, everything fails again (zsh has traps for simply pressing
> > > enter on an empty cmd).
> > 
> > That's some progress!
> > 
> > It's strange how one process manages to affect everything else.
> > 
> > "If I run it, everything fails again". How do you run it? Directly,
> > or via systemd services mechanism?
> > If you just run it directly, can you try running it under
> > "strace -f -tt -oLOG"? Does it have the same effect? What's in the LOG?
> 
> I'm hitting this bug as well, bisected to this commit.  On an old
> x64_64 box, no vms, paravirt, etc.  Running FC6 userspace (heh).
> 
> Quite late in initscripts, binaries start getting segmentation faults
> and init gives up.  Seems to only affect /usr/bin/rhgb-client.  There's
> one instance where /bin/rm is said to segfault, but I suspect that's
> init lying to me.

I note that that commit has been removed from today's version of the
luto-misc tree and thus linux-next.

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-25 21:59             ` Andy Lutomirski
  2015-02-25 22:40               ` Sabrina Dubroca
@ 2015-02-26  9:55               ` Denys Vlasenko
  2015-02-26 12:11                 ` Denys Vlasenko
  2015-02-26 15:14                 ` Andy Lutomirski
  1 sibling, 2 replies; 130+ messages in thread
From: Denys Vlasenko @ 2015-02-26  9:55 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Andrey Wagin, Ingo Molnar, Linus Torvalds,
	Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, LKML

On Wed, Feb 25, 2015 at 10:59 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Wed, Feb 25, 2015 at 1:28 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
>> On 02/25/2015 09:10 PM, Andy Lutomirski wrote:
>> This part?
>>
>>         .macro FORK_LIKE func
>>  ENTRY(stub_\func)
>>         CFI_STARTPROC
>> -       popq    %r11                    /* save return address */
>> -       PARTIAL_FRAME 0
>> -       SAVE_REST
>> -       pushq   %r11                    /* put it back on stack */
>> +       DEFAULT_FRAME 0, 8              /* offset 8: return address */
>> +       SAVE_EXTRA_REGS 8
>>         FIXUP_TOP_OF_STACK %r11, 8
>> -       DEFAULT_FRAME 0 8               /* offset 8: return address */
>>         call sys_\func
>>         RESTORE_TOP_OF_STACK %r11, 8
>> -       ret $REST_SKIP          /* pop extended registers */
>> +       ret
>>         CFI_ENDPROC
>>  END(stub_\func)
>>         .endm
>>
>>         FORK_LIKE  clone
>>         FORK_LIKE  fork
>>         FORK_LIKE  vfork
>>
>> But the old code (SAVE_REST thing) was also saving registers here.
>> It had to jump through hoops (pop return address, SAVE_REST,
>> push return address) to do that.
>> After the patch, "SAVE_EXTRA_REGS 8" does the same, just without
>> pop/push pair.
>>
>> I just don't see what's wrong with it. Can you elaborate?
>
> SAVE_REST pushed the regs onto the stack, whereas SAVE_EXTRA_REGS just
> writes them in place.  It's possible for this to be called when the
> regs have already been saved.

If that would be the case - that is, if SAVE_REST was saving extra copy
of registers on stack, then FIXUP_TOP_OF_STACK %r11, 8 would be working
on wrong locations. The "8" there says "we have full pt_regs on stack,
plus extra 8 bytes (the return address)". Your conjecture would mean
that in fact there would be more bytes on stack, and FIXUP_TOP_OF_STACK
would corrupt iret stack. Evidently, since old code was not crashing,
this wasn't happening. SAVE_REST was really creating the "tail" of pt_regs.

In addition to my previous tests, I ran my home machine with
patched kernel. Unfortunately, it works for me :(

Will try on yet another machine.

-- 
vda

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-26  9:55               ` Denys Vlasenko
@ 2015-02-26 12:11                 ` Denys Vlasenko
  2015-02-26 13:54                   ` Denys Vlasenko
  2015-02-26 15:14                 ` Andy Lutomirski
  1 sibling, 1 reply; 130+ messages in thread
From: Denys Vlasenko @ 2015-02-26 12:11 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Andrey Wagin, Ingo Molnar, Linus Torvalds,
	Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, LKML

On Thu, Feb 26, 2015 at 10:55 AM, Denys Vlasenko
<vda.linux@googlemail.com> wrote:
> On Wed, Feb 25, 2015 at 10:59 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> In addition to my previous tests, I ran my home machine with
> patched kernel. Unfortunately, it works for me :(
>
> Will try on yet another machine.

And voila, it does happen on another machine :)

I'm debugging it right now. Looks like 64-bit syscalls just stop working
at some point in new processes. That is, existing process is alive and well,
but children get SEGV after fork (most likely on any syscall64 they do,
not after fork per se. They eventually manage to kill themselves -
not trivial when exit syscall isn't working either - by tripping on HLT insn).

32-bit syscalls (int 80) continue to work. Fork, exec, whatever you want.
I have static 32-bit busybox binary and everything works there.

Also, any 64-bit process which was under strace continues to work correctly,
including forks and execs.

This points towards some bug on fast path sysret64 code. Looking for it.

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-26 12:11                 ` Denys Vlasenko
@ 2015-02-26 13:54                   ` Denys Vlasenko
  2015-02-26 14:26                     ` Sabrina Dubroca
  0 siblings, 1 reply; 130+ messages in thread
From: Denys Vlasenko @ 2015-02-26 13:54 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Denys Vlasenko, Andrey Wagin, Ingo Molnar, Linus Torvalds,
	Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, LKML

On Thu, Feb 26, 2015 at 1:11 PM, Denys Vlasenko
<vda.linux@googlemail.com> wrote:
> On Thu, Feb 26, 2015 at 10:55 AM, Denys Vlasenko
> <vda.linux@googlemail.com> wrote:
>> On Wed, Feb 25, 2015 at 10:59 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>> In addition to my previous tests, I ran my home machine with
>> patched kernel. Unfortunately, it works for me :(
>>
>> Will try on yet another machine.
>
> And voila, it does happen on another machine :)
>
> I'm debugging it right now. Looks like 64-bit syscalls just stop working
> at some point in new processes. That is, existing process is alive and well,
> but children get SEGV after fork (most likely on any syscall64 they do,
> not after fork per se. They eventually manage to kill themselves -
> not trivial when exit syscall isn't working either - by tripping on HLT insn).
>
> 32-bit syscalls (int 80) continue to work. Fork, exec, whatever you want.
> I have static 32-bit busybox binary and everything works there.
>
> Also, any 64-bit process which was under strace continues to work correctly,
> including forks and execs.
>
> This points towards some bug on fast path sysret64 code. Looking for it.

audit=0 makes crashes disappear.

I found the problem. If syscall_trace_enter_phase1 returns 0,
I restore %rax from pt_regs->ax, but should restore it from
pt_regs->orig_ax:

        call syscall_trace_enter_phase1
        test %rax, %rax
        jnz tracesys_phase2             /* if needed, run the slow path */
-       RESTORE_C_REGS                  /* else restore clobbered regs */
+       RESTORE_C_REGS_EXCEPT_RAX       /* else restore clobbered regs */
+       movq ORIG_RAX-ARGOFFSET(%rsp),%rax
        jmp system_call_fastpath        /*      and return to the fast path */

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-26 13:54                   ` Denys Vlasenko
@ 2015-02-26 14:26                     ` Sabrina Dubroca
  0 siblings, 0 replies; 130+ messages in thread
From: Sabrina Dubroca @ 2015-02-26 14:26 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Andy Lutomirski, Denys Vlasenko, Andrey Wagin, Ingo Molnar,
	Linus Torvalds, Oleg Nesterov, Borislav Petkov, H. Peter Anvin,
	Frederic Weisbecker, X86 ML, Alexei Starovoitov, Will Drewry,
	Kees Cook, LKML

2015-02-26, 14:54:33 +0100, Denys Vlasenko wrote:
> On Thu, Feb 26, 2015 at 1:11 PM, Denys Vlasenko
> <vda.linux@googlemail.com> wrote:
> > On Thu, Feb 26, 2015 at 10:55 AM, Denys Vlasenko
> > <vda.linux@googlemail.com> wrote:
> >> On Wed, Feb 25, 2015 at 10:59 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> >> In addition to my previous tests, I ran my home machine with
> >> patched kernel. Unfortunately, it works for me :(
> >>
> >> Will try on yet another machine.
> >
> > And voila, it does happen on another machine :)
> >
> > I'm debugging it right now. Looks like 64-bit syscalls just stop working
> > at some point in new processes. That is, existing process is alive and well,
> > but children get SEGV after fork (most likely on any syscall64 they do,
> > not after fork per se. They eventually manage to kill themselves -
> > not trivial when exit syscall isn't working either - by tripping on HLT insn).
> >
> > 32-bit syscalls (int 80) continue to work. Fork, exec, whatever you want.
> > I have static 32-bit busybox binary and everything works there.
> >
> > Also, any 64-bit process which was under strace continues to work correctly,
> > including forks and execs.
> >
> > This points towards some bug on fast path sysret64 code. Looking for it.
> 
> audit=0 makes crashes disappear.

Ah, yes.

> I found the problem. If syscall_trace_enter_phase1 returns 0,
> I restore %rax from pt_regs->ax, but should restore it from
> pt_regs->orig_ax:
> 
>         call syscall_trace_enter_phase1
>         test %rax, %rax
>         jnz tracesys_phase2             /* if needed, run the slow path */
> -       RESTORE_C_REGS                  /* else restore clobbered regs */
> +       RESTORE_C_REGS_EXCEPT_RAX       /* else restore clobbered regs */
> +       movq ORIG_RAX-ARGOFFSET(%rsp),%rax
>         jmp system_call_fastpath        /*      and return to the fast path */

with s/-ARGOFFSET// on top of next-20150224, that works.

Thanks, Denys.

-- 
Sabrina

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 2/3 v3] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-26  9:55               ` Denys Vlasenko
  2015-02-26 12:11                 ` Denys Vlasenko
@ 2015-02-26 15:14                 ` Andy Lutomirski
  1 sibling, 0 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-26 15:14 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Borislav Petkov, X86 ML, Linus Torvalds, Andrey Wagin,
	Frederic Weisbecker, Denys Vlasenko, Oleg Nesterov, Ingo Molnar,
	LKML, H. Peter Anvin, Alexei Starovoitov, Kees Cook, Will Drewry

On Feb 26, 2015 1:55 AM, "Denys Vlasenko" <vda.linux@googlemail.com> wrote:
>
> On Wed, Feb 25, 2015 at 10:59 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> > On Wed, Feb 25, 2015 at 1:28 PM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> >> On 02/25/2015 09:10 PM, Andy Lutomirski wrote:
> >> This part?
> >>
> >>         .macro FORK_LIKE func
> >>  ENTRY(stub_\func)
> >>         CFI_STARTPROC
> >> -       popq    %r11                    /* save return address */
> >> -       PARTIAL_FRAME 0
> >> -       SAVE_REST
> >> -       pushq   %r11                    /* put it back on stack */
> >> +       DEFAULT_FRAME 0, 8              /* offset 8: return address */
> >> +       SAVE_EXTRA_REGS 8
> >>         FIXUP_TOP_OF_STACK %r11, 8
> >> -       DEFAULT_FRAME 0 8               /* offset 8: return address */
> >>         call sys_\func
> >>         RESTORE_TOP_OF_STACK %r11, 8
> >> -       ret $REST_SKIP          /* pop extended registers */
> >> +       ret
> >>         CFI_ENDPROC
> >>  END(stub_\func)
> >>         .endm
> >>
> >>         FORK_LIKE  clone
> >>         FORK_LIKE  fork
> >>         FORK_LIKE  vfork
> >>
> >> But the old code (SAVE_REST thing) was also saving registers here.
> >> It had to jump through hoops (pop return address, SAVE_REST,
> >> push return address) to do that.
> >> After the patch, "SAVE_EXTRA_REGS 8" does the same, just without
> >> pop/push pair.
> >>
> >> I just don't see what's wrong with it. Can you elaborate?
> >
> > SAVE_REST pushed the regs onto the stack, whereas SAVE_EXTRA_REGS just
> > writes them in place.  It's possible for this to be called when the
> > regs have already been saved.
>
> If that would be the case - that is, if SAVE_REST was saving extra copy
> of registers on stack, then FIXUP_TOP_OF_STACK %r11, 8 would be working
> on wrong locations. The "8" there says "we have full pt_regs on stack,
> plus extra 8 bytes (the return address)". Your conjecture would mean
> that in fact there would be more bytes on stack, and FIXUP_TOP_OF_STACK
> would corrupt iret stack. Evidently, since old code was not crashing,
> this wasn't happening. SAVE_REST was really creating the "tail" of pt_regs

Ugh, you're right.

The FIXUP_TOP_OF_STACK indeed looks duplicated, bit t that's less
harmful and was already the case.

--Andy
.
>
> In addition to my previous tests, I ran my home machine with
> patched kernel. Unfortunately, it works for me :(
>
> Will try on yet another machine.
>
> --
> vda

^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 00/16] x86/asm changes for 4.1 for review
@ 2015-02-26 22:40 Andy Lutomirski
  2015-02-26 22:40 ` [PATCH 01/16] x86: open-code register save/restore in trace_hardirqs thunks Andy Lutomirski
                   ` (16 more replies)
  0 siblings, 17 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-26 22:40 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Borislav Petkov, Oleg Nesterov, Denys Vlasenko, Andy Lutomirski

Hi all-

After much handwringing, painful review, and curation, here's round 1 of
the x86/asm changes.  (This is not intended to imply that there will or
will not be a round 2.)

For ease of review for git users:

The following changes since commit 7453311d68f16a5c587c3cbf19563c9a4fbbd41a:

  Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (2015-02-09 17:16:44 -0800)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git 

for you to fetch changes up to 4710be56d76ef994ddf59087aad98c000fbab9a4:

  x86_64, entry: Remove a bogus ret_from_fork optimization (2015-02-26 07:09:36 -0800)

Ingo, if you like these, feel free to pull.  I will try to review the scary
change *again* this evening if I have time.  In the mean time, these are
queued as is for -next.  I will remove them from my -next branch if they
turn out to be buggy or if they end up in -tip in any form.

Andy Lutomirski (2):
  x86, entry: Remove int_check_syscall_exit_work
  x86_64, entry: Remove a bogus ret_from_fork optimization

Denys Vlasenko (14):
  x86: open-code register save/restore in trace_hardirqs thunks
  x86: introduce push/pop macros which generate CFI_REL_OFFSET and
    CFI_RESTORE
  x86: entry_64.S: fix wrong symbolic constant usage: R11->ARGOFFSET
  x86: entry_64.S: always allocate complete "struct pt_regs"
  x86: entry_64.S: fix comments. No code changes
  x86: code shrink in paranoid_exit
  x86: mass removal of ARGOFFSET. No code changes
  x86: add comments about various syscall instructions, no code changes
  x86: entry_64.S: move save_paranoid and ret_from_fork closer to their
    users
  x86: entry_64.S: rename save_paranoid to paranoid_entry, no code
    changes
  x86: ia32entry.S: fold IA32_ARG_FIXUP macro into its callers
  x86: entry_64.S: use more understandable constants
  x86: ia32entry.S: use more understandable constant
  x86: entry.S: simplify optimistic SYSRET

 arch/x86/ia32/ia32entry.S              | 342 ++++++++++++----------
 arch/x86/include/asm/calling.h         | 267 ++++++++---------
 arch/x86/include/asm/dwarf2.h          |  24 ++
 arch/x86/include/asm/irqflags.h        |   4 +-
 arch/x86/include/asm/ptrace.h          |  13 +-
 arch/x86/include/uapi/asm/ptrace-abi.h |  16 +-
 arch/x86/include/uapi/asm/ptrace.h     |  13 +-
 arch/x86/kernel/entry_32.S             |  21 +-
 arch/x86/kernel/entry_64.S             | 510 ++++++++++++++++-----------------
 arch/x86/lib/atomic64_cx8_32.S         |  50 ++--
 arch/x86/lib/checksum_32.S             |  60 ++--
 arch/x86/lib/msr-reg.S                 |  24 +-
 arch/x86/lib/rwsem.S                   |  44 ++-
 arch/x86/lib/thunk_32.S                |  18 +-
 arch/x86/lib/thunk_64.S                |  28 +-
 15 files changed, 724 insertions(+), 710 deletions(-)

-- 
2.1.0


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 01/16] x86: open-code register save/restore in trace_hardirqs thunks
  2015-02-26 22:40 [PATCH 00/16] x86/asm changes for 4.1 for review Andy Lutomirski
@ 2015-02-26 22:40 ` Andy Lutomirski
  2015-03-04 22:52   ` [tip:x86/asm] x86/asm/64: Open-code register save/ restore in trace_hardirqs*() thunks tip-bot for Denys Vlasenko
  2015-02-26 22:40 ` [PATCH 02/16] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE Andy Lutomirski
                   ` (15 subsequent siblings)
  16 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-26 22:40 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Borislav Petkov, Oleg Nesterov, Denys Vlasenko, Linus Torvalds,
	H. Peter Anvin, Andy Lutomirski, Frederic Weisbecker,
	Alexei Starovoitov, Will Drewry, Kees Cook

From: Denys Vlasenko <dvlasenk@redhat.com>

This is a preparatory patch for change in "struct pt_regs"
handling in entry_64.S.

trace_hardirqs thunks were (ab)using a part of pt_regs
handling code, namely SAVE_ARGS/RESTORE_ARGS macros,
to save/restore registers across C function calls.

Since SAVE_ARGS is going to be changed, open-code
register saving/restoring here.

Incidentally, this removes a bit of dead code:
one SAVE_ARGS was used just to emit a CFI annotation,
but it also generated unreachable assembly insns.

Take a page from thunk_32.S and use push/pop insns
instead of movq, they are far shorter:
1 or 2 bytes versus 5, and no need for insns to adjust %rsp:

   text	   data	    bss	    dec	    hex	filename
    333	     40	      0	    373	    175	thunk_64_movq.o
    104	     40	      0	    144	     90	thunk_64_push_pop.o

[This is ugly as sin, but we'll fix up the ugliness in the next patch.
 I see no point in reordering patches just to avoid an ugly intermediate
 state.  --Andy]

Link: http://lkml.kernel.org/r/1420927210-19738-4-git-send-email-dvlasenk@redhat.com
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Borislav Petkov <bp@alien8.de>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/lib/thunk_64.S | 46 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index b30b5ebd614a..8ec443a0777b 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -17,9 +17,27 @@
 	CFI_STARTPROC
 
 	/* this one pushes 9 elems, the next one would be %rIP */
-	SAVE_ARGS
+	pushq_cfi %rdi
+	CFI_REL_OFFSET rdi, 0
+	pushq_cfi %rsi
+	CFI_REL_OFFSET rsi, 0
+	pushq_cfi %rdx
+	CFI_REL_OFFSET rdx, 0
+	pushq_cfi %rcx
+	CFI_REL_OFFSET rcx, 0
+	pushq_cfi %rax
+	CFI_REL_OFFSET rax, 0
+	pushq_cfi %r8
+	CFI_REL_OFFSET r8, 0
+	pushq_cfi %r9
+	CFI_REL_OFFSET r9, 0
+	pushq_cfi %r10
+	CFI_REL_OFFSET r10, 0
+	pushq_cfi %r11
+	CFI_REL_OFFSET r11, 0
 
 	.if \put_ret_addr_in_rdi
+	/* 9*8(%rsp) is return addr on stack */
 	movq_cfi_restore 9*8, rdi
 	.endif
 
@@ -45,11 +63,31 @@
 #endif
 #endif
 
-	/* SAVE_ARGS below is used only for the .cfi directives it contains. */
+#if defined(CONFIG_TRACE_IRQFLAGS) \
+ || defined(CONFIG_DEBUG_LOCK_ALLOC) \
+ || defined(CONFIG_PREEMPT)
 	CFI_STARTPROC
-	SAVE_ARGS
+	CFI_ADJUST_CFA_OFFSET 9*8
 restore:
-	RESTORE_ARGS
+	popq_cfi %r11
+	CFI_RESTORE r11
+	popq_cfi %r10
+	CFI_RESTORE r10
+	popq_cfi %r9
+	CFI_RESTORE r9
+	popq_cfi %r8
+	CFI_RESTORE r8
+	popq_cfi %rax
+	CFI_RESTORE rax
+	popq_cfi %rcx
+	CFI_RESTORE rcx
+	popq_cfi %rdx
+	CFI_RESTORE rdx
+	popq_cfi %rsi
+	CFI_RESTORE rsi
+	popq_cfi %rdi
+	CFI_RESTORE rdi
 	ret
 	CFI_ENDPROC
 	_ASM_NOKPROBE(restore)
+#endif
-- 
2.1.0


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 02/16] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE
  2015-02-26 22:40 [PATCH 00/16] x86/asm changes for 4.1 for review Andy Lutomirski
  2015-02-26 22:40 ` [PATCH 01/16] x86: open-code register save/restore in trace_hardirqs thunks Andy Lutomirski
@ 2015-02-26 22:40 ` Andy Lutomirski
  2015-03-04 22:52   ` [tip:x86/asm] x86/asm: Introduce push/ pop " tip-bot for Denys Vlasenko
  2015-02-26 22:40 ` [PATCH 03/16] x86: entry_64.S: fix wrong symbolic constant usage: R11->ARGOFFSET Andy Lutomirski
                   ` (14 subsequent siblings)
  16 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-26 22:40 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Borislav Petkov, Oleg Nesterov, Denys Vlasenko, Linus Torvalds,
	H. Peter Anvin, Andy Lutomirski, Frederic Weisbecker,
	Alexei Starovoitov, Will Drewry, Kees Cook

From: Denys Vlasenko <dvlasenk@redhat.com>

Sequences
        pushl_cfi %reg
        CFI_REL_OFFSET reg, 0
and
        popl_cfi %reg
        CFI_RESTORE reg
happen quite often. This patch adds macros which generate them.

No assembly changes (verified with objdump -dr vmlinux.o).

Link: http://lkml.kernel.org/r/1421017655-25561-1-git-send-email-dvlasenk@redhat.com
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Borislav Petkov <bp@alien8.de>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/include/asm/calling.h | 42 ++++++++++-------------------
 arch/x86/include/asm/dwarf2.h  | 24 +++++++++++++++++
 arch/x86/kernel/entry_32.S     | 21 +++++----------
 arch/x86/lib/atomic64_cx8_32.S | 50 ++++++++++++++---------------------
 arch/x86/lib/checksum_32.S     | 60 ++++++++++++++----------------------------
 arch/x86/lib/msr-reg.S         | 24 ++++++++---------
 arch/x86/lib/rwsem.S           | 44 ++++++++++++++-----------------
 arch/x86/lib/thunk_32.S        | 18 +++++--------
 arch/x86/lib/thunk_64.S        | 54 +++++++++++++------------------------
 9 files changed, 141 insertions(+), 196 deletions(-)

diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 1f1297b46f83..3c711f2ab236 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -210,37 +210,23 @@ For 32-bit we have the following conventions - kernel is built with
  */
 
 	.macro SAVE_ALL
-	pushl_cfi %eax
-	CFI_REL_OFFSET eax, 0
-	pushl_cfi %ebp
-	CFI_REL_OFFSET ebp, 0
-	pushl_cfi %edi
-	CFI_REL_OFFSET edi, 0
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %edx
-	CFI_REL_OFFSET edx, 0
-	pushl_cfi %ecx
-	CFI_REL_OFFSET ecx, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg eax
+	pushl_cfi_reg ebp
+	pushl_cfi_reg edi
+	pushl_cfi_reg esi
+	pushl_cfi_reg edx
+	pushl_cfi_reg ecx
+	pushl_cfi_reg ebx
 	.endm
 
 	.macro RESTORE_ALL
-	popl_cfi %ebx
-	CFI_RESTORE ebx
-	popl_cfi %ecx
-	CFI_RESTORE ecx
-	popl_cfi %edx
-	CFI_RESTORE edx
-	popl_cfi %esi
-	CFI_RESTORE esi
-	popl_cfi %edi
-	CFI_RESTORE edi
-	popl_cfi %ebp
-	CFI_RESTORE ebp
-	popl_cfi %eax
-	CFI_RESTORE eax
+	popl_cfi_reg ebx
+	popl_cfi_reg ecx
+	popl_cfi_reg edx
+	popl_cfi_reg esi
+	popl_cfi_reg edi
+	popl_cfi_reg ebp
+	popl_cfi_reg eax
 	.endm
 
 #endif /* CONFIG_X86_64 */
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index f6f15986df6c..de1cdaf4d743 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -86,11 +86,23 @@
 	CFI_ADJUST_CFA_OFFSET 8
 	.endm
 
+	.macro pushq_cfi_reg reg
+	pushq %\reg
+	CFI_ADJUST_CFA_OFFSET 8
+	CFI_REL_OFFSET \reg, 0
+	.endm
+
 	.macro popq_cfi reg
 	popq \reg
 	CFI_ADJUST_CFA_OFFSET -8
 	.endm
 
+	.macro popq_cfi_reg reg
+	popq %\reg
+	CFI_ADJUST_CFA_OFFSET -8
+	CFI_RESTORE \reg
+	.endm
+
 	.macro pushfq_cfi
 	pushfq
 	CFI_ADJUST_CFA_OFFSET 8
@@ -116,11 +128,23 @@
 	CFI_ADJUST_CFA_OFFSET 4
 	.endm
 
+	.macro pushl_cfi_reg reg
+	pushl %\reg
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET \reg, 0
+	.endm
+
 	.macro popl_cfi reg
 	popl \reg
 	CFI_ADJUST_CFA_OFFSET -4
 	.endm
 
+	.macro popl_cfi_reg reg
+	popl %\reg
+	CFI_ADJUST_CFA_OFFSET -4
+	CFI_RESTORE \reg
+	.endm
+
 	.macro pushfl_cfi
 	pushfl
 	CFI_ADJUST_CFA_OFFSET 4
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 000d4199b03e..a4a2eaa604a5 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1237,20 +1237,13 @@ error_code:
 	/*CFI_REL_OFFSET es, 0*/
 	pushl_cfi %ds
 	/*CFI_REL_OFFSET ds, 0*/
-	pushl_cfi %eax
-	CFI_REL_OFFSET eax, 0
-	pushl_cfi %ebp
-	CFI_REL_OFFSET ebp, 0
-	pushl_cfi %edi
-	CFI_REL_OFFSET edi, 0
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %edx
-	CFI_REL_OFFSET edx, 0
-	pushl_cfi %ecx
-	CFI_REL_OFFSET ecx, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg eax
+	pushl_cfi_reg ebp
+	pushl_cfi_reg edi
+	pushl_cfi_reg esi
+	pushl_cfi_reg edx
+	pushl_cfi_reg ecx
+	pushl_cfi_reg ebx
 	cld
 	movl $(__KERNEL_PERCPU), %ecx
 	movl %ecx, %fs
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index f5cc9eb1d51b..082a85167a5b 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -13,16 +13,6 @@
 #include <asm/alternative-asm.h>
 #include <asm/dwarf2.h>
 
-.macro SAVE reg
-	pushl_cfi %\reg
-	CFI_REL_OFFSET \reg, 0
-.endm
-
-.macro RESTORE reg
-	popl_cfi %\reg
-	CFI_RESTORE \reg
-.endm
-
 .macro read64 reg
 	movl %ebx, %eax
 	movl %ecx, %edx
@@ -67,10 +57,10 @@ ENDPROC(atomic64_xchg_cx8)
 .macro addsub_return func ins insc
 ENTRY(atomic64_\func\()_return_cx8)
 	CFI_STARTPROC
-	SAVE ebp
-	SAVE ebx
-	SAVE esi
-	SAVE edi
+	pushl_cfi_reg ebp
+	pushl_cfi_reg ebx
+	pushl_cfi_reg esi
+	pushl_cfi_reg edi
 
 	movl %eax, %esi
 	movl %edx, %edi
@@ -89,10 +79,10 @@ ENTRY(atomic64_\func\()_return_cx8)
 10:
 	movl %ebx, %eax
 	movl %ecx, %edx
-	RESTORE edi
-	RESTORE esi
-	RESTORE ebx
-	RESTORE ebp
+	popl_cfi_reg edi
+	popl_cfi_reg esi
+	popl_cfi_reg ebx
+	popl_cfi_reg ebp
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_\func\()_return_cx8)
@@ -104,7 +94,7 @@ addsub_return sub sub sbb
 .macro incdec_return func ins insc
 ENTRY(atomic64_\func\()_return_cx8)
 	CFI_STARTPROC
-	SAVE ebx
+	pushl_cfi_reg ebx
 
 	read64 %esi
 1:
@@ -119,7 +109,7 @@ ENTRY(atomic64_\func\()_return_cx8)
 10:
 	movl %ebx, %eax
 	movl %ecx, %edx
-	RESTORE ebx
+	popl_cfi_reg ebx
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_\func\()_return_cx8)
@@ -130,7 +120,7 @@ incdec_return dec sub sbb
 
 ENTRY(atomic64_dec_if_positive_cx8)
 	CFI_STARTPROC
-	SAVE ebx
+	pushl_cfi_reg ebx
 
 	read64 %esi
 1:
@@ -146,18 +136,18 @@ ENTRY(atomic64_dec_if_positive_cx8)
 2:
 	movl %ebx, %eax
 	movl %ecx, %edx
-	RESTORE ebx
+	popl_cfi_reg ebx
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_dec_if_positive_cx8)
 
 ENTRY(atomic64_add_unless_cx8)
 	CFI_STARTPROC
-	SAVE ebp
-	SAVE ebx
+	pushl_cfi_reg ebp
+	pushl_cfi_reg ebx
 /* these just push these two parameters on the stack */
-	SAVE edi
-	SAVE ecx
+	pushl_cfi_reg edi
+	pushl_cfi_reg ecx
 
 	movl %eax, %ebp
 	movl %edx, %edi
@@ -179,8 +169,8 @@ ENTRY(atomic64_add_unless_cx8)
 3:
 	addl $8, %esp
 	CFI_ADJUST_CFA_OFFSET -8
-	RESTORE ebx
-	RESTORE ebp
+	popl_cfi_reg ebx
+	popl_cfi_reg ebp
 	ret
 4:
 	cmpl %edx, 4(%esp)
@@ -192,7 +182,7 @@ ENDPROC(atomic64_add_unless_cx8)
 
 ENTRY(atomic64_inc_not_zero_cx8)
 	CFI_STARTPROC
-	SAVE ebx
+	pushl_cfi_reg ebx
 
 	read64 %esi
 1:
@@ -209,7 +199,7 @@ ENTRY(atomic64_inc_not_zero_cx8)
 
 	movl $1, %eax
 3:
-	RESTORE ebx
+	popl_cfi_reg ebx
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_inc_not_zero_cx8)
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index e78b8eee6615..c3b9953d3fa0 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -51,10 +51,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
 	   */		
 ENTRY(csum_partial)
 	CFI_STARTPROC
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg esi
+	pushl_cfi_reg ebx
 	movl 20(%esp),%eax	# Function arg: unsigned int sum
 	movl 16(%esp),%ecx	# Function arg: int len
 	movl 12(%esp),%esi	# Function arg: unsigned char *buff
@@ -131,10 +129,8 @@ ENTRY(csum_partial)
 	jz 8f
 	roll $8, %eax
 8:
-	popl_cfi %ebx
-	CFI_RESTORE ebx
-	popl_cfi %esi
-	CFI_RESTORE esi
+	popl_cfi_reg ebx
+	popl_cfi_reg esi
 	ret
 	CFI_ENDPROC
 ENDPROC(csum_partial)
@@ -145,10 +141,8 @@ ENDPROC(csum_partial)
 
 ENTRY(csum_partial)
 	CFI_STARTPROC
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg esi
+	pushl_cfi_reg ebx
 	movl 20(%esp),%eax	# Function arg: unsigned int sum
 	movl 16(%esp),%ecx	# Function arg: int len
 	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
@@ -255,10 +249,8 @@ ENTRY(csum_partial)
 	jz 90f
 	roll $8, %eax
 90: 
-	popl_cfi %ebx
-	CFI_RESTORE ebx
-	popl_cfi %esi
-	CFI_RESTORE esi
+	popl_cfi_reg ebx
+	popl_cfi_reg esi
 	ret
 	CFI_ENDPROC
 ENDPROC(csum_partial)
@@ -298,12 +290,9 @@ ENTRY(csum_partial_copy_generic)
 	CFI_STARTPROC
 	subl  $4,%esp	
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl_cfi %edi
-	CFI_REL_OFFSET edi, 0
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg edi
+	pushl_cfi_reg esi
+	pushl_cfi_reg ebx
 	movl ARGBASE+16(%esp),%eax	# sum
 	movl ARGBASE+12(%esp),%ecx	# len
 	movl ARGBASE+4(%esp),%esi	# src
@@ -412,12 +401,9 @@ DST(	movb %cl, (%edi)	)
 
 .previous
 
-	popl_cfi %ebx
-	CFI_RESTORE ebx
-	popl_cfi %esi
-	CFI_RESTORE esi
-	popl_cfi %edi
-	CFI_RESTORE edi
+	popl_cfi_reg ebx
+	popl_cfi_reg esi
+	popl_cfi_reg edi
 	popl_cfi %ecx			# equivalent to addl $4,%esp
 	ret	
 	CFI_ENDPROC
@@ -441,12 +427,9 @@ ENDPROC(csum_partial_copy_generic)
 		
 ENTRY(csum_partial_copy_generic)
 	CFI_STARTPROC
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
-	pushl_cfi %edi
-	CFI_REL_OFFSET edi, 0
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
+	pushl_cfi_reg ebx
+	pushl_cfi_reg edi
+	pushl_cfi_reg esi
 	movl ARGBASE+4(%esp),%esi	#src
 	movl ARGBASE+8(%esp),%edi	#dst	
 	movl ARGBASE+12(%esp),%ecx	#len
@@ -506,12 +489,9 @@ DST(	movb %dl, (%edi)         )
 	jmp  7b			
 .previous				
 
-	popl_cfi %esi
-	CFI_RESTORE esi
-	popl_cfi %edi
-	CFI_RESTORE edi
-	popl_cfi %ebx
-	CFI_RESTORE ebx
+	popl_cfi_reg esi
+	popl_cfi_reg edi
+	popl_cfi_reg ebx
 	ret
 	CFI_ENDPROC
 ENDPROC(csum_partial_copy_generic)
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index f6d13eefad10..3ca5218fbece 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -14,8 +14,8 @@
 .macro op_safe_regs op
 ENTRY(\op\()_safe_regs)
 	CFI_STARTPROC
-	pushq_cfi %rbx
-	pushq_cfi %rbp
+	pushq_cfi_reg rbx
+	pushq_cfi_reg rbp
 	movq	%rdi, %r10	/* Save pointer */
 	xorl	%r11d, %r11d	/* Return value */
 	movl    (%rdi), %eax
@@ -35,8 +35,8 @@ ENTRY(\op\()_safe_regs)
 	movl    %ebp, 20(%r10)
 	movl    %esi, 24(%r10)
 	movl    %edi, 28(%r10)
-	popq_cfi %rbp
-	popq_cfi %rbx
+	popq_cfi_reg rbp
+	popq_cfi_reg rbx
 	ret
 3:
 	CFI_RESTORE_STATE
@@ -53,10 +53,10 @@ ENDPROC(\op\()_safe_regs)
 .macro op_safe_regs op
 ENTRY(\op\()_safe_regs)
 	CFI_STARTPROC
-	pushl_cfi %ebx
-	pushl_cfi %ebp
-	pushl_cfi %esi
-	pushl_cfi %edi
+	pushl_cfi_reg ebx
+	pushl_cfi_reg ebp
+	pushl_cfi_reg esi
+	pushl_cfi_reg edi
 	pushl_cfi $0              /* Return value */
 	pushl_cfi %eax
 	movl    4(%eax), %ecx
@@ -80,10 +80,10 @@ ENTRY(\op\()_safe_regs)
 	movl    %esi, 24(%eax)
 	movl    %edi, 28(%eax)
 	popl_cfi %eax
-	popl_cfi %edi
-	popl_cfi %esi
-	popl_cfi %ebp
-	popl_cfi %ebx
+	popl_cfi_reg edi
+	popl_cfi_reg esi
+	popl_cfi_reg ebp
+	popl_cfi_reg ebx
 	ret
 3:
 	CFI_RESTORE_STATE
diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S
index 5dff5f042468..2322abe4da3b 100644
--- a/arch/x86/lib/rwsem.S
+++ b/arch/x86/lib/rwsem.S
@@ -34,10 +34,10 @@
  */
 
 #define save_common_regs \
-	pushl_cfi %ecx; CFI_REL_OFFSET ecx, 0
+	pushl_cfi_reg ecx
 
 #define restore_common_regs \
-	popl_cfi %ecx; CFI_RESTORE ecx
+	popl_cfi_reg ecx
 
 	/* Avoid uglifying the argument copying x86-64 needs to do. */
 	.macro movq src, dst
@@ -64,22 +64,22 @@
  */
 
 #define save_common_regs \
-	pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \
-	pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \
-	pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \
-	pushq_cfi %r8;  CFI_REL_OFFSET r8,  0; \
-	pushq_cfi %r9;  CFI_REL_OFFSET r9,  0; \
-	pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \
-	pushq_cfi %r11; CFI_REL_OFFSET r11, 0
+	pushq_cfi_reg rdi; \
+	pushq_cfi_reg rsi; \
+	pushq_cfi_reg rcx; \
+	pushq_cfi_reg r8;  \
+	pushq_cfi_reg r9;  \
+	pushq_cfi_reg r10; \
+	pushq_cfi_reg r11
 
 #define restore_common_regs \
-	popq_cfi %r11; CFI_RESTORE r11; \
-	popq_cfi %r10; CFI_RESTORE r10; \
-	popq_cfi %r9;  CFI_RESTORE r9; \
-	popq_cfi %r8;  CFI_RESTORE r8; \
-	popq_cfi %rcx; CFI_RESTORE rcx; \
-	popq_cfi %rsi; CFI_RESTORE rsi; \
-	popq_cfi %rdi; CFI_RESTORE rdi
+	popq_cfi_reg r11; \
+	popq_cfi_reg r10; \
+	popq_cfi_reg r9; \
+	popq_cfi_reg r8; \
+	popq_cfi_reg rcx; \
+	popq_cfi_reg rsi; \
+	popq_cfi_reg rdi
 
 #endif
 
@@ -87,12 +87,10 @@
 ENTRY(call_rwsem_down_read_failed)
 	CFI_STARTPROC
 	save_common_regs
-	__ASM_SIZE(push,_cfi) %__ASM_REG(dx)
-	CFI_REL_OFFSET __ASM_REG(dx), 0
+	__ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
 	movq %rax,%rdi
 	call rwsem_down_read_failed
-	__ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
-	CFI_RESTORE __ASM_REG(dx)
+	__ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
 	restore_common_regs
 	ret
 	CFI_ENDPROC
@@ -124,12 +122,10 @@ ENDPROC(call_rwsem_wake)
 ENTRY(call_rwsem_downgrade_wake)
 	CFI_STARTPROC
 	save_common_regs
-	__ASM_SIZE(push,_cfi) %__ASM_REG(dx)
-	CFI_REL_OFFSET __ASM_REG(dx), 0
+	__ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
 	movq %rax,%rdi
 	call rwsem_downgrade_wake
-	__ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
-	CFI_RESTORE __ASM_REG(dx)
+	__ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
 	restore_common_regs
 	ret
 	CFI_ENDPROC
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
index e28cdaf5ac2c..5eb715087b80 100644
--- a/arch/x86/lib/thunk_32.S
+++ b/arch/x86/lib/thunk_32.S
@@ -13,12 +13,9 @@
 	.globl \name
 \name:
 	CFI_STARTPROC
-	pushl_cfi %eax
-	CFI_REL_OFFSET eax, 0
-	pushl_cfi %ecx
-	CFI_REL_OFFSET ecx, 0
-	pushl_cfi %edx
-	CFI_REL_OFFSET edx, 0
+	pushl_cfi_reg eax
+	pushl_cfi_reg ecx
+	pushl_cfi_reg edx
 
 	.if \put_ret_addr_in_eax
 	/* Place EIP in the arg1 */
@@ -26,12 +23,9 @@
 	.endif
 
 	call \func
-	popl_cfi %edx
-	CFI_RESTORE edx
-	popl_cfi %ecx
-	CFI_RESTORE ecx
-	popl_cfi %eax
-	CFI_RESTORE eax
+	popl_cfi_reg edx
+	popl_cfi_reg ecx
+	popl_cfi_reg eax
 	ret
 	CFI_ENDPROC
 	_ASM_NOKPROBE(\name)
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index 8ec443a0777b..f89ba4e93025 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -17,24 +17,15 @@
 	CFI_STARTPROC
 
 	/* this one pushes 9 elems, the next one would be %rIP */
-	pushq_cfi %rdi
-	CFI_REL_OFFSET rdi, 0
-	pushq_cfi %rsi
-	CFI_REL_OFFSET rsi, 0
-	pushq_cfi %rdx
-	CFI_REL_OFFSET rdx, 0
-	pushq_cfi %rcx
-	CFI_REL_OFFSET rcx, 0
-	pushq_cfi %rax
-	CFI_REL_OFFSET rax, 0
-	pushq_cfi %r8
-	CFI_REL_OFFSET r8, 0
-	pushq_cfi %r9
-	CFI_REL_OFFSET r9, 0
-	pushq_cfi %r10
-	CFI_REL_OFFSET r10, 0
-	pushq_cfi %r11
-	CFI_REL_OFFSET r11, 0
+	pushq_cfi_reg rdi
+	pushq_cfi_reg rsi
+	pushq_cfi_reg rdx
+	pushq_cfi_reg rcx
+	pushq_cfi_reg rax
+	pushq_cfi_reg r8
+	pushq_cfi_reg r9
+	pushq_cfi_reg r10
+	pushq_cfi_reg r11
 
 	.if \put_ret_addr_in_rdi
 	/* 9*8(%rsp) is return addr on stack */
@@ -69,24 +60,15 @@
 	CFI_STARTPROC
 	CFI_ADJUST_CFA_OFFSET 9*8
 restore:
-	popq_cfi %r11
-	CFI_RESTORE r11
-	popq_cfi %r10
-	CFI_RESTORE r10
-	popq_cfi %r9
-	CFI_RESTORE r9
-	popq_cfi %r8
-	CFI_RESTORE r8
-	popq_cfi %rax
-	CFI_RESTORE rax
-	popq_cfi %rcx
-	CFI_RESTORE rcx
-	popq_cfi %rdx
-	CFI_RESTORE rdx
-	popq_cfi %rsi
-	CFI_RESTORE rsi
-	popq_cfi %rdi
-	CFI_RESTORE rdi
+	popq_cfi_reg r11
+	popq_cfi_reg r10
+	popq_cfi_reg r9
+	popq_cfi_reg r8
+	popq_cfi_reg rax
+	popq_cfi_reg rcx
+	popq_cfi_reg rdx
+	popq_cfi_reg rsi
+	popq_cfi_reg rdi
 	ret
 	CFI_ENDPROC
 	_ASM_NOKPROBE(restore)
-- 
2.1.0


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 03/16] x86: entry_64.S: fix wrong symbolic constant usage: R11->ARGOFFSET
  2015-02-26 22:40 [PATCH 00/16] x86/asm changes for 4.1 for review Andy Lutomirski
  2015-02-26 22:40 ` [PATCH 01/16] x86: open-code register save/restore in trace_hardirqs thunks Andy Lutomirski
  2015-02-26 22:40 ` [PATCH 02/16] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE Andy Lutomirski
@ 2015-02-26 22:40 ` Andy Lutomirski
  2015-03-04 22:53   ` [tip:x86/asm] x86/asm/entry/64: Fix incorrect " tip-bot for Denys Vlasenko
  2015-02-26 22:40 ` [PATCH 04/16] x86: entry_64.S: always allocate complete "struct pt_regs" Andy Lutomirski
                   ` (13 subsequent siblings)
  16 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-26 22:40 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Borislav Petkov, Oleg Nesterov, Denys Vlasenko, Linus Torvalds,
	H. Peter Anvin, Andy Lutomirski, Frederic Weisbecker,
	Alexei Starovoitov, Will Drewry, Kees Cook

From: Denys Vlasenko <dvlasenk@redhat.com>

Since the last fix of this nature, few more instances have crept in.
Fix them up. No object code changes (constants have the same value).

[mea culpa --Andy]

Link: http://lkml.kernel.org/r/1423778052-21038-1-git-send-email-dvlasenk@redhat.com
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/kernel/entry_64.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index db13655c3a2a..ac542acb53db 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -757,8 +757,8 @@ retint_swapgs:		/* return to user-space */
 	 * Try to use SYSRET instead of IRET if we're returning to
 	 * a completely clean 64-bit userspace context.
 	 */
-	movq (RCX-R11)(%rsp), %rcx
-	cmpq %rcx,(RIP-R11)(%rsp)		/* RCX == RIP */
+	movq (RCX-ARGOFFSET)(%rsp), %rcx
+	cmpq %rcx,(RIP-ARGOFFSET)(%rsp)		/* RCX == RIP */
 	jne opportunistic_sysret_failed
 
 	/*
@@ -779,7 +779,7 @@ retint_swapgs:		/* return to user-space */
 	shr $__VIRTUAL_MASK_SHIFT, %rcx
 	jnz opportunistic_sysret_failed
 
-	cmpq $__USER_CS,(CS-R11)(%rsp)		/* CS must match SYSRET */
+	cmpq $__USER_CS,(CS-ARGOFFSET)(%rsp)	/* CS must match SYSRET */
 	jne opportunistic_sysret_failed
 
 	movq (R11-ARGOFFSET)(%rsp), %r11
-- 
2.1.0


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 04/16] x86: entry_64.S: always allocate complete "struct pt_regs"
  2015-02-26 22:40 [PATCH 00/16] x86/asm changes for 4.1 for review Andy Lutomirski
                   ` (2 preceding siblings ...)
  2015-02-26 22:40 ` [PATCH 03/16] x86: entry_64.S: fix wrong symbolic constant usage: R11->ARGOFFSET Andy Lutomirski
@ 2015-02-26 22:40 ` Andy Lutomirski
  2015-03-04 22:53   ` [tip:x86/asm] x86/asm/entry/64: Always allocate a complete " struct pt_regs" on the kernel stack tip-bot for Denys Vlasenko
  2015-02-26 22:40 ` [PATCH 05/16] x86: entry_64.S: fix comments. No code changes Andy Lutomirski
                   ` (12 subsequent siblings)
  16 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-26 22:40 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Borislav Petkov, Oleg Nesterov, Denys Vlasenko, Linus Torvalds,
	H. Peter Anvin, Andy Lutomirski, Frederic Weisbecker,
	Alexei Starovoitov, Will Drewry, Kees Cook

From: Denys Vlasenko <dvlasenk@redhat.com>

64-bit code was using six stack slots less by not saving/restoring
registers which are callee-preserved according to C ABI,
and not allocating space for them.
Only when syscall needed a complete "struct pt_regs",
the complete area was allocated and filled in.
As an additional twist, on interrupt entry a "slightly less truncated pt_regs"
trick is used, to make nested interrupt stacks easier to unwind.

This proved to be a source of significant obfuscation and subtle bugs.
For example, stub_fork had to pop the return address,
extend the struct, save registers, and push return address back. Ugly.
ia32_ptregs_common pops return address and "returns" via jmp insn,
throwing a wrench into CPU return stack cache.

This patch changes code to always allocate a complete "struct pt_regs".
The saving of registers is still done lazily.

"Partial pt_regs" trick on interrupt stack is retained.

Macros which manipulate "struct pt_regs" on stack are reworked:
ALLOC_PT_GPREGS_ON_STACK allocates the structure.
SAVE_C_REGS saves to it those registers which are clobbered by C code.
SAVE_EXTRA_REGS saves to it all other registers.
Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros reverse it.

ia32_ptregs_common, stub_fork and friends lost their ugly dance with
return pointer.

LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
instead of magic numbers.

error_entry and save_paranoid now use SAVE_C_REGS + SAVE_EXTRA_REGS
instead of having it open-coded yet again.

Patch was run-tested: 64-bit executables, 32-bit executables,
strace works.
Timing tests did not show measurable difference in 32-bit
and 64-bit syscalls.

Link: http://lkml.kernel.org/r/1423778052-21038-2-git-send-email-dvlasenk@redhat.com
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/ia32/ia32entry.S              |  47 +++----
 arch/x86/include/asm/calling.h         | 222 ++++++++++++++++-----------------
 arch/x86/include/asm/irqflags.h        |   4 +-
 arch/x86/include/uapi/asm/ptrace-abi.h |   1 -
 arch/x86/kernel/entry_64.S             | 196 +++++++++++------------------
 5 files changed, 210 insertions(+), 260 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 156ebcab4ada..f4bed4971673 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -62,12 +62,12 @@
 	 */
 	.macro LOAD_ARGS32 offset, _r9=0
 	.if \_r9
-	movl \offset+16(%rsp),%r9d
+	movl \offset+R9(%rsp),%r9d
 	.endif
-	movl \offset+40(%rsp),%ecx
-	movl \offset+48(%rsp),%edx
-	movl \offset+56(%rsp),%esi
-	movl \offset+64(%rsp),%edi
+	movl \offset+RCX(%rsp),%ecx
+	movl \offset+RDX(%rsp),%edx
+	movl \offset+RSI(%rsp),%esi
+	movl \offset+RDI(%rsp),%edi
 	movl %eax,%eax			/* zero extension */
 	.endm
 	
@@ -144,7 +144,8 @@ ENTRY(ia32_sysenter_target)
 	CFI_REL_OFFSET rip,0
 	pushq_cfi %rax
 	cld
-	SAVE_ARGS 0,1,0
+	ALLOC_PT_GPREGS_ON_STACK
+	SAVE_C_REGS_EXCEPT_R891011
  	/* no need to do an access_ok check here because rbp has been
  	   32bit zero extended */ 
 	ASM_STAC
@@ -182,7 +183,8 @@ sysexit_from_sys_call:
 	andl	$~0x200,EFLAGS-ARGOFFSET(%rsp)
 	movl	RIP-ARGOFFSET(%rsp),%edx		/* User %eip */
 	CFI_REGISTER rip,rdx
-	RESTORE_ARGS 0,24,0,0,0,0
+	RESTORE_RSI_RDI
+	REMOVE_PT_GPREGS_FROM_STACK 3*8
 	xorq	%r8,%r8
 	xorq	%r9,%r9
 	xorq	%r10,%r10
@@ -256,13 +258,13 @@ sysenter_tracesys:
 	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jz	sysenter_auditsys
 #endif
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	CLEAR_RREGS
 	movq	$-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
 	movq	%rsp,%rdi        /* &pt_regs -> arg1 */
 	call	syscall_trace_enter
 	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	cmpq	$(IA32_NR_syscalls-1),%rax
 	ja	int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
 	jmp	sysenter_do_call
@@ -304,7 +306,8 @@ ENTRY(ia32_cstar_target)
 	 * disabled irqs and here we enable it straight after entry:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_ARGS 8,0,0
+	ALLOC_PT_GPREGS_ON_STACK 8
+	SAVE_C_REGS_EXCEPT_RCX_R891011
 	movl 	%eax,%eax	/* zero extension */
 	movq	%rax,ORIG_RAX-ARGOFFSET(%rsp)
 	movq	%rcx,RIP-ARGOFFSET(%rsp)
@@ -341,7 +344,7 @@ cstar_dispatch:
 	jnz sysretl_audit
 sysretl_from_sys_call:
 	andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	RESTORE_ARGS 0,-ARG_SKIP,0,0,0
+	RESTORE_RSI_RDI_RDX
 	movl RIP-ARGOFFSET(%rsp),%ecx
 	CFI_REGISTER rip,rcx
 	movl EFLAGS-ARGOFFSET(%rsp),%r11d	
@@ -372,13 +375,13 @@ cstar_tracesys:
 	jz cstar_auditsys
 #endif
 	xchgl %r9d,%ebp
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	CLEAR_RREGS 0, r9
 	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
 	LOAD_ARGS32 ARGOFFSET, 1  /* reload args from stack in case ptrace changed it */
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	xchgl %ebp,%r9d
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
@@ -433,7 +436,8 @@ ENTRY(ia32_syscall)
 	cld
 	/* note the registers are not zero extended to the sf.
 	   this could be a problem. */
-	SAVE_ARGS 0,1,0
+	ALLOC_PT_GPREGS_ON_STACK
+	SAVE_C_REGS_EXCEPT_R891011
 	orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jnz ia32_tracesys
@@ -446,16 +450,16 @@ ia32_sysret:
 	movq %rax,RAX-ARGOFFSET(%rsp)
 ia32_ret_from_sys_call:
 	CLEAR_RREGS -ARGOFFSET
-	jmp int_ret_from_sys_call 
+	jmp int_ret_from_sys_call
 
-ia32_tracesys:			 
-	SAVE_REST
+ia32_tracesys:
+	SAVE_EXTRA_REGS
 	CLEAR_RREGS
 	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
 	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja  int_ret_from_sys_call	/* ia32_tracesys has set RAX(%rsp) */
 	jmp ia32_do_call
@@ -492,7 +496,6 @@ GLOBAL(stub32_clone)
 
 	ALIGN
 ia32_ptregs_common:
-	popq %r11
 	CFI_ENDPROC
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
@@ -507,9 +510,9 @@ ia32_ptregs_common:
 /*	CFI_REL_OFFSET	rflags,EFLAGS-ARGOFFSET*/
 	CFI_REL_OFFSET	rsp,RSP-ARGOFFSET
 /*	CFI_REL_OFFSET	ss,SS-ARGOFFSET*/
-	SAVE_REST
+	SAVE_EXTRA_REGS 8
 	call *%rax
-	RESTORE_REST
-	jmp  ia32_sysret	/* misbalances the return cache */
+	RESTORE_EXTRA_REGS 8
+	ret
 	CFI_ENDPROC
 END(ia32_ptregs_common)
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 3c711f2ab236..38356476b131 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -55,143 +55,137 @@ For 32-bit we have the following conventions - kernel is built with
  * for assembly code:
  */
 
-#define R15		  0
-#define R14		  8
-#define R13		 16
-#define R12		 24
-#define RBP		 32
-#define RBX		 40
-
-/* arguments: interrupts/non tracing syscalls only save up to here: */
-#define R11		 48
-#define R10		 56
-#define R9		 64
-#define R8		 72
-#define RAX		 80
-#define RCX		 88
-#define RDX		 96
-#define RSI		104
-#define RDI		112
-#define ORIG_RAX	120       /* + error_code */
-/* end of arguments */
-
-/* cpu exception frame or undefined in case of fast syscall: */
-#define RIP		128
-#define CS		136
-#define EFLAGS		144
-#define RSP		152
-#define SS		160
-
-#define ARGOFFSET	R11
-
-	.macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
-	subq  $9*8+\addskip, %rsp
-	CFI_ADJUST_CFA_OFFSET	9*8+\addskip
-	movq_cfi rdi, 8*8
-	movq_cfi rsi, 7*8
-	movq_cfi rdx, 6*8
-
-	.if \save_rcx
-	movq_cfi rcx, 5*8
-	.endif
+/* The layout forms the "struct pt_regs" on the stack: */
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
+#define R15		0*8
+#define R14		1*8
+#define R13		2*8
+#define R12		3*8
+#define RBP		4*8
+#define RBX		5*8
+/* These regs are callee-clobbered. Always saved on kernel entry. */
+#define R11		6*8
+#define R10		7*8
+#define R9		8*8
+#define R8		9*8
+#define RAX		10*8
+#define RCX		11*8
+#define RDX		12*8
+#define RSI		13*8
+#define RDI		14*8
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
+#define ORIG_RAX	15*8
+/* Return frame for iretq */
+#define RIP		16*8
+#define CS		17*8
+#define EFLAGS		18*8
+#define RSP		19*8
+#define SS		20*8
+
+#define ARGOFFSET	0
+
+	.macro ALLOC_PT_GPREGS_ON_STACK addskip=0
+	subq	$15*8+\addskip, %rsp
+	CFI_ADJUST_CFA_OFFSET 15*8+\addskip
+	.endm
 
-	.if \rax_enosys
-	movq $-ENOSYS, 4*8(%rsp)
-	.else
-	movq_cfi rax, 4*8
+	.macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8plus=1
+	.if \r8plus
+	movq_cfi r11, 6*8+\offset
+	movq_cfi r10, 7*8+\offset
+	movq_cfi r9,  8*8+\offset
+	movq_cfi r8,  9*8+\offset
 	.endif
-
-	.if \save_r891011
-	movq_cfi r8,  3*8
-	movq_cfi r9,  2*8
-	movq_cfi r10, 1*8
-	movq_cfi r11, 0*8
+	.if \rax
+	movq_cfi rax, 10*8+\offset
+	.endif
+	.if \rcx
+	movq_cfi rcx, 11*8+\offset
 	.endif
+	movq_cfi rdx, 12*8+\offset
+	movq_cfi rsi, 13*8+\offset
+	movq_cfi rdi, 14*8+\offset
+	.endm
+	.macro SAVE_C_REGS offset=0
+	SAVE_C_REGS_HELPER \offset, 1, 1, 1
+	.endm
+	.macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
+	SAVE_C_REGS_HELPER \offset, 0, 0, 1
+	.endm
+	.macro SAVE_C_REGS_EXCEPT_R891011
+	SAVE_C_REGS_HELPER 0, 1, 1, 0
+	.endm
+	.macro SAVE_C_REGS_EXCEPT_RCX_R891011
+	SAVE_C_REGS_HELPER 0, 1, 0, 0
+	.endm
 
+	.macro SAVE_EXTRA_REGS offset=0
+	movq_cfi r15, 0*8+\offset
+	movq_cfi r14, 1*8+\offset
+	movq_cfi r13, 2*8+\offset
+	movq_cfi r12, 3*8+\offset
+	movq_cfi rbp, 4*8+\offset
+	movq_cfi rbx, 5*8+\offset
+	.endm
+	.macro SAVE_EXTRA_REGS_RBP offset=0
+	movq_cfi rbp, 4*8+\offset
 	.endm
 
-#define ARG_SKIP	(9*8)
+	.macro RESTORE_EXTRA_REGS offset=0
+	movq_cfi_restore 0*8+\offset, r15
+	movq_cfi_restore 1*8+\offset, r14
+	movq_cfi_restore 2*8+\offset, r13
+	movq_cfi_restore 3*8+\offset, r12
+	movq_cfi_restore 4*8+\offset, rbp
+	movq_cfi_restore 5*8+\offset, rbx
+	.endm
 
-	.macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \
-			    rstor_r8910=1, rstor_rdx=1
+	.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
 	.if \rstor_r11
-	movq_cfi_restore 0*8, r11
+	movq_cfi_restore 6*8, r11
 	.endif
-
 	.if \rstor_r8910
-	movq_cfi_restore 1*8, r10
-	movq_cfi_restore 2*8, r9
-	movq_cfi_restore 3*8, r8
+	movq_cfi_restore 7*8, r10
+	movq_cfi_restore 8*8, r9
+	movq_cfi_restore 9*8, r8
 	.endif
-
 	.if \rstor_rax
-	movq_cfi_restore 4*8, rax
+	movq_cfi_restore 10*8, rax
 	.endif
-
 	.if \rstor_rcx
-	movq_cfi_restore 5*8, rcx
+	movq_cfi_restore 11*8, rcx
 	.endif
-
 	.if \rstor_rdx
-	movq_cfi_restore 6*8, rdx
-	.endif
-
-	movq_cfi_restore 7*8, rsi
-	movq_cfi_restore 8*8, rdi
-
-	.if ARG_SKIP+\addskip > 0
-	addq $ARG_SKIP+\addskip, %rsp
-	CFI_ADJUST_CFA_OFFSET	-(ARG_SKIP+\addskip)
+	movq_cfi_restore 12*8, rdx
 	.endif
+	movq_cfi_restore 13*8, rsi
+	movq_cfi_restore 14*8, rdi
 	.endm
-
-	.macro LOAD_ARGS offset, skiprax=0
-	movq \offset(%rsp),    %r11
-	movq \offset+8(%rsp),  %r10
-	movq \offset+16(%rsp), %r9
-	movq \offset+24(%rsp), %r8
-	movq \offset+40(%rsp), %rcx
-	movq \offset+48(%rsp), %rdx
-	movq \offset+56(%rsp), %rsi
-	movq \offset+64(%rsp), %rdi
-	.if \skiprax
-	.else
-	movq \offset+72(%rsp), %rax
-	.endif
+	.macro RESTORE_C_REGS
+	RESTORE_C_REGS_HELPER 1,1,1,1,1
 	.endm
-
-#define REST_SKIP	(6*8)
-
-	.macro SAVE_REST
-	subq $REST_SKIP, %rsp
-	CFI_ADJUST_CFA_OFFSET	REST_SKIP
-	movq_cfi rbx, 5*8
-	movq_cfi rbp, 4*8
-	movq_cfi r12, 3*8
-	movq_cfi r13, 2*8
-	movq_cfi r14, 1*8
-	movq_cfi r15, 0*8
+	.macro RESTORE_C_REGS_EXCEPT_RAX
+	RESTORE_C_REGS_HELPER 0,1,1,1,1
 	.endm
-
-	.macro RESTORE_REST
-	movq_cfi_restore 0*8, r15
-	movq_cfi_restore 1*8, r14
-	movq_cfi_restore 2*8, r13
-	movq_cfi_restore 3*8, r12
-	movq_cfi_restore 4*8, rbp
-	movq_cfi_restore 5*8, rbx
-	addq $REST_SKIP, %rsp
-	CFI_ADJUST_CFA_OFFSET	-(REST_SKIP)
+	.macro RESTORE_C_REGS_EXCEPT_RCX
+	RESTORE_C_REGS_HELPER 1,0,1,1,1
 	.endm
-
-	.macro SAVE_ALL
-	SAVE_ARGS
-	SAVE_REST
+	.macro RESTORE_RSI_RDI
+	RESTORE_C_REGS_HELPER 0,0,0,0,0
+	.endm
+	.macro RESTORE_RSI_RDI_RDX
+	RESTORE_C_REGS_HELPER 0,0,0,0,1
 	.endm
 
-	.macro RESTORE_ALL addskip=0
-	RESTORE_REST
-	RESTORE_ARGS 1, \addskip
+	.macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
+	addq $15*8+\addskip, %rsp
+	CFI_ADJUST_CFA_OFFSET -(15*8+\addskip)
 	.endm
 
 	.macro icebp
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 0a8b519226b8..021bee9b86b6 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -171,9 +171,9 @@ static inline int arch_irqs_disabled(void)
 #define ARCH_LOCKDEP_SYS_EXIT_IRQ	\
 	TRACE_IRQS_ON; \
 	sti; \
-	SAVE_REST; \
+	SAVE_EXTRA_REGS; \
 	LOCKDEP_SYS_EXIT; \
-	RESTORE_REST; \
+	RESTORE_EXTRA_REGS; \
 	cli; \
 	TRACE_IRQS_OFF;
 
diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h
index 7b0a55a88851..ad115bf779f3 100644
--- a/arch/x86/include/uapi/asm/ptrace-abi.h
+++ b/arch/x86/include/uapi/asm/ptrace-abi.h
@@ -49,7 +49,6 @@
 #define EFLAGS 144
 #define RSP 152
 #define SS 160
-#define ARGOFFSET R11
 #endif /* __ASSEMBLY__ */
 
 /* top of stack page */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index ac542acb53db..e25e901e5d57 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -26,12 +26,6 @@
  * Some macro usage:
  * - CFI macros are used to generate dwarf2 unwind information for better
  * backtraces. They don't change any code.
- * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
- * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
- * There are unfortunately lots of special cases where some registers
- * not touched. The macro is a big mess that should be cleaned up.
- * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
- * Gives a full stack frame.
  * - ENTRY/END Define functions in the symbol table.
  * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  * frame that is otherwise undefined after a SYSCALL
@@ -190,9 +184,9 @@ ENDPROC(native_usergs_sysret64)
 	.endm
 
 /*
- * frame that enables calling into C.
+ * frame that enables passing a complete pt_regs to a C function.
  */
-	.macro PARTIAL_FRAME start=1 offset=0
+	.macro DEFAULT_FRAME start=1 offset=0
 	XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
 	CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
 	CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
@@ -203,13 +197,6 @@ ENDPROC(native_usergs_sysret64)
 	CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
 	CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
 	CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
-	.endm
-
-/*
- * frame that enables passing a complete pt_regs to a C function.
- */
-	.macro DEFAULT_FRAME start=1 offset=0
-	PARTIAL_FRAME \start, R11+\offset-R15
 	CFI_REL_OFFSET rbx, RBX+\offset
 	CFI_REL_OFFSET rbp, RBP+\offset
 	CFI_REL_OFFSET r12, R12+\offset
@@ -221,21 +208,8 @@ ENDPROC(native_usergs_sysret64)
 ENTRY(save_paranoid)
 	XCPT_FRAME 1 RDI+8
 	cld
-	movq %rdi, RDI+8(%rsp)
-	movq %rsi, RSI+8(%rsp)
-	movq_cfi rdx, RDX+8
-	movq_cfi rcx, RCX+8
-	movq_cfi rax, RAX+8
-	movq %r8, R8+8(%rsp)
-	movq %r9, R9+8(%rsp)
-	movq %r10, R10+8(%rsp)
-	movq %r11, R11+8(%rsp)
-	movq_cfi rbx, RBX+8
-	movq %rbp, RBP+8(%rsp)
-	movq %r12, R12+8(%rsp)
-	movq %r13, R13+8(%rsp)
-	movq %r14, R14+8(%rsp)
-	movq %r15, R15+8(%rsp)
+	SAVE_C_REGS 8
+	SAVE_EXTRA_REGS 8
 	movl $1,%ebx
 	movl $MSR_GS_BASE,%ecx
 	rdmsr
@@ -264,7 +238,7 @@ ENTRY(ret_from_fork)
 
 	GET_THREAD_INFO(%rcx)
 
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 
 	testl $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
 	jz   1f
@@ -276,12 +250,10 @@ ENTRY(ret_from_fork)
 	jmp ret_from_sys_call			# go to the SYSRET fastpath
 
 1:
-	subq $REST_SKIP, %rsp	# leave space for volatiles
-	CFI_ADJUST_CFA_OFFSET	REST_SKIP
 	movq %rbp, %rdi
 	call *%rbx
 	movl $0, RAX(%rsp)
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(ret_from_fork)
@@ -339,9 +311,11 @@ GLOBAL(system_call_after_swapgs)
 	 * and short:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_ARGS 8, 0, rax_enosys=1
+	ALLOC_PT_GPREGS_ON_STACK 8
+	SAVE_C_REGS_EXCEPT_RAX_RCX
+	movq	$-ENOSYS,RAX-ARGOFFSET(%rsp)
 	movq_cfi rax,(ORIG_RAX-ARGOFFSET)
-	movq  %rcx,RIP-ARGOFFSET(%rsp)
+	movq	%rcx,RIP-ARGOFFSET(%rsp)
 	CFI_REL_OFFSET rip,RIP-ARGOFFSET
 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 	jnz tracesys
@@ -372,9 +346,9 @@ ret_from_sys_call:
 	 * sysretq will re-enable interrupts:
 	 */
 	TRACE_IRQS_ON
+	RESTORE_C_REGS_EXCEPT_RCX
 	movq RIP-ARGOFFSET(%rsp),%rcx
 	CFI_REGISTER	rip,rcx
-	RESTORE_ARGS 1,-ARG_SKIP,0
 	/*CFI_REGISTER	rflags,r11*/
 	movq	PER_CPU_VAR(old_rsp), %rsp
 	USERGS_SYSRET64
@@ -387,16 +361,17 @@ int_ret_from_sys_call_fixup:
 
 	/* Do syscall tracing */
 tracesys:
-	leaq -REST_SKIP(%rsp), %rdi
+	movq %rsp, %rdi
 	movq $AUDIT_ARCH_X86_64, %rsi
 	call syscall_trace_enter_phase1
 	test %rax, %rax
 	jnz tracesys_phase2		/* if needed, run the slow path */
-	LOAD_ARGS 0			/* else restore clobbered regs */
+	RESTORE_C_REGS_EXCEPT_RAX	/* else restore clobbered regs */
+	movq ORIG_RAX-ARGOFFSET(%rsp), %rax
 	jmp system_call_fastpath	/*      and return to the fast path */
 
 tracesys_phase2:
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %rdi
 	movq %rsp, %rdi
 	movq $AUDIT_ARCH_X86_64, %rsi
@@ -408,8 +383,8 @@ tracesys_phase2:
 	 * We don't reload %rax because syscall_trace_entry_phase2() returned
 	 * the value it wants us to use in the table lookup.
 	 */
-	LOAD_ARGS ARGOFFSET, 1
-	RESTORE_REST
+	RESTORE_C_REGS_EXCEPT_RAX
+	RESTORE_EXTRA_REGS
 #if __SYSCALL_MASK == ~0
 	cmpq $__NR_syscall_max,%rax
 #else
@@ -460,7 +435,7 @@ int_very_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
 int_check_syscall_exit_work:
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	/* Check for syscall exit trace */
 	testl $_TIF_WORK_SYSCALL_EXIT,%edx
 	jz int_signal
@@ -479,7 +454,7 @@ int_signal:
 	call do_notify_resume
 1:	movl $_TIF_WORK_MASK,%edi
 int_restore_rest:
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	jmp int_with_check
@@ -489,15 +464,12 @@ END(system_call)
 	.macro FORK_LIKE func
 ENTRY(stub_\func)
 	CFI_STARTPROC
-	popq	%r11			/* save return address */
-	PARTIAL_FRAME 0
-	SAVE_REST
-	pushq	%r11			/* put it back on stack */
+	DEFAULT_FRAME 0, 8		/* offset 8: return address */
+	SAVE_EXTRA_REGS 8
 	FIXUP_TOP_OF_STACK %r11, 8
-	DEFAULT_FRAME 0 8		/* offset 8: return address */
 	call sys_\func
 	RESTORE_TOP_OF_STACK %r11, 8
-	ret $REST_SKIP		/* pop extended registers */
+	ret
 	CFI_ENDPROC
 END(stub_\func)
 	.endm
@@ -505,7 +477,7 @@ END(stub_\func)
 	.macro FIXED_FRAME label,func
 ENTRY(\label)
 	CFI_STARTPROC
-	PARTIAL_FRAME 0 8		/* offset 8: return address */
+	DEFAULT_FRAME 0, 8		/* offset 8: return address */
 	FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET
 	call \func
 	RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET
@@ -522,12 +494,12 @@ END(\label)
 ENTRY(stub_execve)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call sys_execve
 	movq %rax,RAX(%rsp)
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_execve)
@@ -535,13 +507,13 @@ END(stub_execve)
 ENTRY(stub_execveat)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call sys_execveat
 	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_execveat)
@@ -553,12 +525,12 @@ END(stub_execveat)
 ENTRY(stub_rt_sigreturn)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call sys_rt_sigreturn
 	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_rt_sigreturn)
@@ -567,12 +539,12 @@ END(stub_rt_sigreturn)
 ENTRY(stub_x32_rt_sigreturn)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call sys32_x32_rt_sigreturn
 	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_x32_rt_sigreturn)
@@ -580,13 +552,13 @@ END(stub_x32_rt_sigreturn)
 ENTRY(stub_x32_execve)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call compat_sys_execve
 	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_x32_execve)
@@ -594,13 +566,13 @@ END(stub_x32_execve)
 ENTRY(stub_x32_execveat)
 	CFI_STARTPROC
 	addq $8, %rsp
-	PARTIAL_FRAME 0
-	SAVE_REST
+	DEFAULT_FRAME 0
+	SAVE_EXTRA_REGS
 	FIXUP_TOP_OF_STACK %r11
 	call compat_sys_execveat
 	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(stub_x32_execveat)
@@ -656,42 +628,28 @@ END(interrupt)
 
 /* 0(%rsp): ~(interrupt number) */
 	.macro interrupt func
-	/* reserve pt_regs for scratch regs and rbp */
-	subq $ORIG_RAX-RBP, %rsp
-	CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
 	cld
-	/* start from rbp in pt_regs and jump over */
-	movq_cfi rdi, (RDI-RBP)
-	movq_cfi rsi, (RSI-RBP)
-	movq_cfi rdx, (RDX-RBP)
-	movq_cfi rcx, (RCX-RBP)
-	movq_cfi rax, (RAX-RBP)
-	movq_cfi  r8,  (R8-RBP)
-	movq_cfi  r9,  (R9-RBP)
-	movq_cfi r10, (R10-RBP)
-	movq_cfi r11, (R11-RBP)
-
-	/* Save rbp so that we can unwind from get_irq_regs() */
-	movq_cfi rbp, 0
-
-	/* Save previous stack value */
-	movq %rsp, %rsi
+	ALLOC_PT_GPREGS_ON_STACK -RBP
+	SAVE_C_REGS -RBP
+	/* this goes to 0(%rsp) for unwinder, not for saving the value: */
+	SAVE_EXTRA_REGS_RBP -RBP
+
+	leaq -RBP(%rsp),%rdi	/* arg1 for \func (pointer to pt_regs) */
 
-	leaq -RBP(%rsp),%rdi	/* arg1 for handler */
-	testl $3, CS-RBP(%rsi)
+	testl $3, CS-RBP(%rsp)
 	je 1f
 	SWAPGS
+1:
 	/*
 	 * irq_count is used to check if a CPU is already on an interrupt stack
 	 * or not. While this is essentially redundant with preempt_count it is
 	 * a little cheaper to use a separate counter in the PDA (short of
 	 * moving irq_enter into assembly, which would be too much work)
 	 */
-1:	incl PER_CPU_VAR(irq_count)
+	movq %rsp, %rsi
+	incl PER_CPU_VAR(irq_count)
 	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
 	CFI_DEF_CFA_REGISTER	rsi
-
-	/* Store previous stack value */
 	pushq %rsi
 	CFI_ESCAPE	0x0f /* DW_CFA_def_cfa_expression */, 6, \
 			0x77 /* DW_OP_breg7 */, 0, \
@@ -800,7 +758,8 @@ retint_swapgs:		/* return to user-space */
 	 */
 irq_return_via_sysret:
 	CFI_REMEMBER_STATE
-	RESTORE_ARGS 1,8,1
+	RESTORE_C_REGS
+	REMOVE_PT_GPREGS_FROM_STACK 8
 	movq (RSP-RIP)(%rsp),%rsp
 	USERGS_SYSRET64
 	CFI_RESTORE_STATE
@@ -816,7 +775,8 @@ retint_restore_args:	/* return to kernel space */
 	 */
 	TRACE_IRQS_IRETQ
 restore_args:
-	RESTORE_ARGS 1,8,1
+	RESTORE_C_REGS
+	REMOVE_PT_GPREGS_FROM_STACK 8
 
 irq_return:
 	INTERRUPT_RETURN
@@ -887,12 +847,12 @@ retint_signal:
 	jz    retint_swapgs
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_REST
+	SAVE_EXTRA_REGS
 	movq $-1,ORIG_RAX(%rsp)
 	xorl %esi,%esi		# oldset
 	movq %rsp,%rdi		# &pt_regs
 	call do_notify_resume
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)
@@ -1019,8 +979,7 @@ ENTRY(\sym)
 	pushq_cfi $-1			/* ORIG_RAX: no syscall to restart */
 	.endif
 
-	subq $ORIG_RAX-R15, %rsp
-	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+	ALLOC_PT_GPREGS_ON_STACK
 
 	.if \paranoid
 	.if \paranoid == 1
@@ -1266,7 +1225,9 @@ ENTRY(xen_failsafe_callback)
 	addq $0x30,%rsp
 	CFI_ADJUST_CFA_OFFSET -0x30
 	pushq_cfi $-1 /* orig_ax = -1 => not a system call */
-	SAVE_ALL
+	ALLOC_PT_GPREGS_ON_STACK
+	SAVE_C_REGS
+	SAVE_EXTRA_REGS
 	jmp error_exit
 	CFI_ENDPROC
 END(xen_failsafe_callback)
@@ -1318,11 +1279,15 @@ ENTRY(paranoid_exit)
 	jnz paranoid_restore
 	TRACE_IRQS_IRETQ 0
 	SWAPGS_UNSAFE_STACK
-	RESTORE_ALL 8
+	RESTORE_EXTRA_REGS
+	RESTORE_C_REGS
+	REMOVE_PT_GPREGS_FROM_STACK 8
 	INTERRUPT_RETURN
 paranoid_restore:
 	TRACE_IRQS_IRETQ_DEBUG 0
-	RESTORE_ALL 8
+	RESTORE_EXTRA_REGS
+	RESTORE_C_REGS
+	REMOVE_PT_GPREGS_FROM_STACK 8
 	INTERRUPT_RETURN
 	CFI_ENDPROC
 END(paranoid_exit)
@@ -1336,21 +1301,8 @@ ENTRY(error_entry)
 	CFI_ADJUST_CFA_OFFSET 15*8
 	/* oldrax contains error code */
 	cld
-	movq %rdi, RDI+8(%rsp)
-	movq %rsi, RSI+8(%rsp)
-	movq %rdx, RDX+8(%rsp)
-	movq %rcx, RCX+8(%rsp)
-	movq %rax, RAX+8(%rsp)
-	movq  %r8,  R8+8(%rsp)
-	movq  %r9,  R9+8(%rsp)
-	movq %r10, R10+8(%rsp)
-	movq %r11, R11+8(%rsp)
-	movq_cfi rbx, RBX+8
-	movq %rbp, RBP+8(%rsp)
-	movq %r12, R12+8(%rsp)
-	movq %r13, R13+8(%rsp)
-	movq %r14, R14+8(%rsp)
-	movq %r15, R15+8(%rsp)
+	SAVE_C_REGS 8
+	SAVE_EXTRA_REGS 8
 	xorl %ebx,%ebx
 	testl $3,CS+8(%rsp)
 	je error_kernelspace
@@ -1399,7 +1351,7 @@ END(error_entry)
 ENTRY(error_exit)
 	DEFAULT_FRAME
 	movl %ebx,%eax
-	RESTORE_REST
+	RESTORE_EXTRA_REGS
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)
@@ -1618,8 +1570,8 @@ end_repeat_nmi:
 	 * so that we repeat another NMI.
 	 */
 	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
-	subq $ORIG_RAX-R15, %rsp
-	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+	ALLOC_PT_GPREGS_ON_STACK
+
 	/*
 	 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
 	 * as we should not be calling schedule in NMI context.
@@ -1658,8 +1610,10 @@ end_repeat_nmi:
 nmi_swapgs:
 	SWAPGS_UNSAFE_STACK
 nmi_restore:
+	RESTORE_EXTRA_REGS
+	RESTORE_C_REGS
 	/* Pop the extra iret frame at once */
-	RESTORE_ALL 6*8
+	REMOVE_PT_GPREGS_FROM_STACK 6*8
 
 	/* Clear the NMI executing stack variable */
 	movq $0, 5*8(%rsp)
-- 
2.1.0


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 05/16] x86: entry_64.S: fix comments. No code changes
  2015-02-26 22:40 [PATCH 00/16] x86/asm changes for 4.1 for review Andy Lutomirski
                   ` (3 preceding siblings ...)
  2015-02-26 22:40 ` [PATCH 04/16] x86: entry_64.S: always allocate complete "struct pt_regs" Andy Lutomirski
@ 2015-02-26 22:40 ` Andy Lutomirski
  2015-03-04 22:53   ` [tip:x86/asm] x86/asm/entry/64: Fix comments tip-bot for Denys Vlasenko
  2015-02-26 22:40 ` [PATCH 06/16] x86: code shrink in paranoid_exit Andy Lutomirski
                   ` (11 subsequent siblings)
  16 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-26 22:40 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Borislav Petkov, Oleg Nesterov, Denys Vlasenko, Linus Torvalds,
	H. Peter Anvin, Andy Lutomirski, Frederic Weisbecker,
	Alexei Starovoitov, Will Drewry, Kees Cook

From: Denys Vlasenko <dvlasenk@redhat.com>

Misleading and slightly wrong comments in "struct pt_regs" are fixed
(four instances).

Fix wrong comment atop EMPTY_FRAME macro.

Explain in more details what we do with stack layout during hw interrupt.

Correct comments about "partial stack frame" which are no longer true.

Link: http://lkml.kernel.org/r/1423778052-21038-3-git-send-email-dvlasenk@redhat.com
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/include/asm/ptrace.h          | 13 ++++++++++---
 arch/x86/include/uapi/asm/ptrace-abi.h | 15 +++++++++++----
 arch/x86/include/uapi/asm/ptrace.h     | 13 ++++++++++---
 arch/x86/kernel/entry_64.S             | 18 ++++++++++++------
 4 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 86fc2bb82287..4077d963a1a0 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -31,13 +31,17 @@ struct pt_regs {
 #else /* __i386__ */
 
 struct pt_regs {
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
 	unsigned long r15;
 	unsigned long r14;
 	unsigned long r13;
 	unsigned long r12;
 	unsigned long bp;
 	unsigned long bx;
-/* arguments: non interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
 	unsigned long r11;
 	unsigned long r10;
 	unsigned long r9;
@@ -47,9 +51,12 @@ struct pt_regs {
 	unsigned long dx;
 	unsigned long si;
 	unsigned long di;
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
 	unsigned long orig_ax;
-/* end of arguments */
-/* cpu exception frame or undefined */
+/* Return frame for iretq */
 	unsigned long ip;
 	unsigned long cs;
 	unsigned long flags;
diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h
index ad115bf779f3..580aee3072e0 100644
--- a/arch/x86/include/uapi/asm/ptrace-abi.h
+++ b/arch/x86/include/uapi/asm/ptrace-abi.h
@@ -25,13 +25,17 @@
 #else /* __i386__ */
 
 #if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
 #define R15 0
 #define R14 8
 #define R13 16
 #define R12 24
 #define RBP 32
 #define RBX 40
-/* arguments: interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
 #define R11 48
 #define R10 56
 #define R9 64
@@ -41,9 +45,12 @@
 #define RDX 96
 #define RSI 104
 #define RDI 112
-#define ORIG_RAX 120       /* = ERROR */
-/* end of arguments */
-/* cpu exception frame or undefined in case of fast syscall. */
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
+#define ORIG_RAX 120
+/* Return frame for iretq */
 #define RIP 128
 #define CS 136
 #define EFLAGS 144
diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h
index ac4b9aa4d999..bc16115af39b 100644
--- a/arch/x86/include/uapi/asm/ptrace.h
+++ b/arch/x86/include/uapi/asm/ptrace.h
@@ -41,13 +41,17 @@ struct pt_regs {
 #ifndef __KERNEL__
 
 struct pt_regs {
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
 	unsigned long r15;
 	unsigned long r14;
 	unsigned long r13;
 	unsigned long r12;
 	unsigned long rbp;
 	unsigned long rbx;
-/* arguments: non interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
 	unsigned long r11;
 	unsigned long r10;
 	unsigned long r9;
@@ -57,9 +61,12 @@ struct pt_regs {
 	unsigned long rdx;
 	unsigned long rsi;
 	unsigned long rdi;
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
 	unsigned long orig_rax;
-/* end of arguments */
-/* cpu exception frame or undefined */
+/* Return frame for iretq */
 	unsigned long rip;
 	unsigned long cs;
 	unsigned long eflags;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e25e901e5d57..b4469ed2b30f 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -14,9 +14,6 @@
  * NOTE: This code handles signal-recognition, which happens every time
  * after an interrupt and after each system call.
  *
- * Normal syscalls and interrupts don't save a full stack frame, this is
- * only done for syscall tracing, signals or fork/exec et.al.
- *
  * A note on terminology:
  * - top of stack: Architecture defined interrupt frame from SS to RIP
  * at the top of the kernel process stack.
@@ -151,7 +148,7 @@ ENDPROC(native_usergs_sysret64)
 	.endm
 
 /*
- * initial frame state for interrupts (and exceptions without error code)
+ * empty frame
  */
 	.macro EMPTY_FRAME start=1 offset=0
 	.if \start
@@ -379,7 +376,7 @@ tracesys_phase2:
 	call syscall_trace_enter_phase2
 
 	/*
-	 * Reload arg registers from stack in case ptrace changed them.
+	 * Reload registers from stack in case ptrace changed them.
 	 * We don't reload %rax because syscall_trace_entry_phase2() returned
 	 * the value it wants us to use in the table lookup.
 	 */
@@ -629,6 +626,13 @@ END(interrupt)
 /* 0(%rsp): ~(interrupt number) */
 	.macro interrupt func
 	cld
+	/*
+	 * Since nothing in interrupt handling code touches r12...r15 members
+	 * of "struct pt_regs", and since interrupts can nest, we can save
+	 * four stack slots and simultaneously provide
+	 * an unwind-friendly stack layout by saving "truncated" pt_regs
+	 * exactly up to rbp slot, without these members.
+	 */
 	ALLOC_PT_GPREGS_ON_STACK -RBP
 	SAVE_C_REGS -RBP
 	/* this goes to 0(%rsp) for unwinder, not for saving the value: */
@@ -641,6 +645,7 @@ END(interrupt)
 	SWAPGS
 1:
 	/*
+	 * Save previous stack pointer, optionally switch to interrupt stack.
 	 * irq_count is used to check if a CPU is already on an interrupt stack
 	 * or not. While this is essentially redundant with preempt_count it is
 	 * a little cheaper to use a separate counter in the PDA (short of
@@ -681,6 +686,7 @@ ret_from_intr:
 	/* Restore saved previous stack */
 	popq %rsi
 	CFI_DEF_CFA rsi,SS+8-RBP	/* reg/off reset after def_cfa_expr */
+	/* return code expects complete pt_regs - adjust rsp accordingly: */
 	leaq ARGOFFSET-RBP(%rsi), %rsp
 	CFI_DEF_CFA_REGISTER	rsp
 	CFI_ADJUST_CFA_OFFSET	RBP-ARGOFFSET
@@ -692,7 +698,7 @@ exit_intr:
 
 	/* Interrupt came from user space */
 	/*
-	 * Has a correct top of stack, but a partial stack frame
+	 * Has a correct top of stack.
 	 * %rcx: thread info. Interrupts off.
 	 */
 retint_with_reschedule:
-- 
2.1.0


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 06/16] x86: code shrink in paranoid_exit
  2015-02-26 22:40 [PATCH 00/16] x86/asm changes for 4.1 for review Andy Lutomirski
                   ` (4 preceding siblings ...)
  2015-02-26 22:40 ` [PATCH 05/16] x86: entry_64.S: fix comments. No code changes Andy Lutomirski
@ 2015-02-26 22:40 ` Andy Lutomirski
  2015-03-04 22:53   ` [tip:x86/asm] x86/asm/entry/64: Shrink code in 'paranoid_exit' tip-bot for Denys Vlasenko
  2015-02-26 22:40 ` [PATCH 07/16] x86: mass removal of ARGOFFSET. No code changes Andy Lutomirski
                   ` (10 subsequent siblings)
  16 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-26 22:40 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Borislav Petkov, Oleg Nesterov, Denys Vlasenko, Linus Torvalds,
	H. Peter Anvin, Andy Lutomirski, Frederic Weisbecker,
	Alexei Starovoitov, Will Drewry, Kees Cook

From: Denys Vlasenko <dvlasenk@redhat.com>

RESTORE_EXTRA_REGS + RESTORE_C_REGS looks small, but it's
a lot of instructions (fourteen). Let's reuse them.

[I changed the labels.  In vda's version of this patch, paranoid_exit_restore
 was paranoid_restore1 and paranoid_exit_no_swapgs was paranoid_restore.
 This still isn't great, but I think it's a lot better.  --Andy]

Link: http://lkml.kernel.org/r/1421272101-16847-2-git-send-email-dvlasenk@redhat.com
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/kernel/entry_64.S | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b4469ed2b30f..e5cad016cb60 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1282,15 +1282,13 @@ ENTRY(paranoid_exit)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF_DEBUG
 	testl %ebx,%ebx				/* swapgs needed? */
-	jnz paranoid_restore
+	jnz paranoid_exit_no_swapgs
 	TRACE_IRQS_IRETQ 0
 	SWAPGS_UNSAFE_STACK
-	RESTORE_EXTRA_REGS
-	RESTORE_C_REGS
-	REMOVE_PT_GPREGS_FROM_STACK 8
-	INTERRUPT_RETURN
-paranoid_restore:
+	jmp paranoid_exit_restore
+paranoid_exit_no_swapgs:
 	TRACE_IRQS_IRETQ_DEBUG 0
+paranoid_exit_restore:
 	RESTORE_EXTRA_REGS
 	RESTORE_C_REGS
 	REMOVE_PT_GPREGS_FROM_STACK 8
-- 
2.1.0


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 07/16] x86: mass removal of ARGOFFSET. No code changes
  2015-02-26 22:40 [PATCH 00/16] x86/asm changes for 4.1 for review Andy Lutomirski
                   ` (5 preceding siblings ...)
  2015-02-26 22:40 ` [PATCH 06/16] x86: code shrink in paranoid_exit Andy Lutomirski
@ 2015-02-26 22:40 ` Andy Lutomirski
  2015-03-04 22:54   ` [tip:x86/asm] x86/asm/entry: Do mass removal of 'ARGOFFSET' tip-bot for Denys Vlasenko
  2015-02-26 22:40 ` [PATCH 08/16] x86, entry: Remove int_check_syscall_exit_work Andy Lutomirski
                   ` (9 subsequent siblings)
  16 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-26 22:40 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Borislav Petkov, Oleg Nesterov, Denys Vlasenko, Linus Torvalds,
	H. Peter Anvin, Andy Lutomirski, Frederic Weisbecker,
	Alexei Starovoitov, Will Drewry, Kees Cook

From: Denys Vlasenko <dvlasenk@redhat.com>

ARGOFFSET is zero now, removing it changes no code.
A few macros lost "offset" parameter, since it is always zero now too.

No code changes - verified with objdump.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
Message-Id: <1424549716-14619-1-git-send-email-dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/ia32/ia32entry.S      | 142 ++++++++++++++++++++---------------------
 arch/x86/include/asm/calling.h |   2 -
 arch/x86/kernel/entry_64.S     |  86 ++++++++++++-------------
 3 files changed, 114 insertions(+), 116 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index f4bed4971673..e99f8a5be2df 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -41,13 +41,13 @@
 	movl	%edx,%edx	/* zero extension */
 	.endm 
 
-	/* clobbers %eax */	
-	.macro  CLEAR_RREGS offset=0, _r9=rax
+	/* clobbers %rax */
+	.macro  CLEAR_RREGS _r9=rax
 	xorl 	%eax,%eax
-	movq	%rax,\offset+R11(%rsp)
-	movq	%rax,\offset+R10(%rsp)
-	movq	%\_r9,\offset+R9(%rsp)
-	movq	%rax,\offset+R8(%rsp)
+	movq	%rax,R11(%rsp)
+	movq	%rax,R10(%rsp)
+	movq	%\_r9,R9(%rsp)
+	movq	%rax,R8(%rsp)
 	.endm
 
 	/*
@@ -60,14 +60,14 @@
 	 * If it's -1 to make us punt the syscall, then (u32)-1 is still
 	 * an appropriately invalid value.
 	 */
-	.macro LOAD_ARGS32 offset, _r9=0
+	.macro LOAD_ARGS32 _r9=0
 	.if \_r9
-	movl \offset+R9(%rsp),%r9d
+	movl R9(%rsp),%r9d
 	.endif
-	movl \offset+RCX(%rsp),%ecx
-	movl \offset+RDX(%rsp),%edx
-	movl \offset+RSI(%rsp),%esi
-	movl \offset+RDI(%rsp),%edi
+	movl RCX(%rsp),%ecx
+	movl RDX(%rsp),%edx
+	movl RSI(%rsp),%esi
+	movl RDI(%rsp),%edi
 	movl %eax,%eax			/* zero extension */
 	.endm
 	
@@ -158,12 +158,12 @@ ENTRY(ia32_sysenter_target)
 	 * ourselves.  To save a few cycles, we can check whether
 	 * NT was set instead of doing an unconditional popfq.
 	 */
-	testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp)
+	testl $X86_EFLAGS_NT,EFLAGS(%rsp)
 	jnz sysenter_fix_flags
 sysenter_flags_fixed:
 
-	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP)
+	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP)
 	CFI_REMEMBER_STATE
 	jnz  sysenter_tracesys
 	cmpq	$(IA32_NR_syscalls-1),%rax
@@ -172,16 +172,16 @@ sysenter_do_call:
 	IA32_ARG_FIXUP
 sysenter_dispatch:
 	call	*ia32_sys_call_table(,%rax,8)
-	movq	%rax,RAX-ARGOFFSET(%rsp)
+	movq	%rax,RAX(%rsp)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl	$_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl	$_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP)
 	jnz	sysexit_audit
 sysexit_from_sys_call:
-	andl    $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	andl    $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP)
 	/* clear IF, that popfq doesn't enable interrupts early */
-	andl	$~0x200,EFLAGS-ARGOFFSET(%rsp)
-	movl	RIP-ARGOFFSET(%rsp),%edx		/* User %eip */
+	andl	$~0x200,EFLAGS(%rsp)
+	movl	RIP(%rsp),%edx		/* User %eip */
 	CFI_REGISTER rip,rdx
 	RESTORE_RSI_RDI
 	REMOVE_PT_GPREGS_FROM_STACK 3*8
@@ -207,18 +207,18 @@ sysexit_from_sys_call:
 	movl %ebx,%esi			/* 2nd arg: 1st syscall arg */
 	movl %eax,%edi			/* 1st arg: syscall number */
 	call __audit_syscall_entry
-	movl RAX-ARGOFFSET(%rsp),%eax	/* reload syscall number */
+	movl RAX(%rsp),%eax	/* reload syscall number */
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja ia32_badsys
 	movl %ebx,%edi			/* reload 1st syscall arg */
-	movl RCX-ARGOFFSET(%rsp),%esi	/* reload 2nd syscall arg */
-	movl RDX-ARGOFFSET(%rsp),%edx	/* reload 3rd syscall arg */
-	movl RSI-ARGOFFSET(%rsp),%ecx	/* reload 4th syscall arg */
-	movl RDI-ARGOFFSET(%rsp),%r8d	/* reload 5th syscall arg */
+	movl RCX(%rsp),%esi	/* reload 2nd syscall arg */
+	movl RDX(%rsp),%edx	/* reload 3rd syscall arg */
+	movl RSI(%rsp),%ecx	/* reload 4th syscall arg */
+	movl RDI(%rsp),%r8d	/* reload 5th syscall arg */
 	.endm
 
 	.macro auditsys_exit exit
-	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP)
 	jnz ia32_ret_from_sys_call
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
@@ -229,13 +229,13 @@ sysexit_from_sys_call:
 1:	setbe %al		/* 1 if error, 0 if not */
 	movzbl %al,%edi		/* zero-extend that into %edi */
 	call __audit_syscall_exit
-	movq RAX-ARGOFFSET(%rsp),%rax	/* reload syscall return value */
+	movq RAX(%rsp),%rax	/* reload syscall return value */
 	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl %edi,TI_flags+THREAD_INFO(%rsp,RIP)
 	jz \exit
-	CLEAR_RREGS -ARGOFFSET
+	CLEAR_RREGS
 	jmp int_with_check
 	.endm
 
@@ -255,7 +255,7 @@ sysenter_fix_flags:
 
 sysenter_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP)
 	jz	sysenter_auditsys
 #endif
 	SAVE_EXTRA_REGS
@@ -263,7 +263,7 @@ sysenter_tracesys:
 	movq	$-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
 	movq	%rsp,%rdi        /* &pt_regs -> arg1 */
 	call	syscall_trace_enter
-	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
+	LOAD_ARGS32  /* reload args from stack in case ptrace changed it */
 	RESTORE_EXTRA_REGS
 	cmpq	$(IA32_NR_syscalls-1),%rax
 	ja	int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
@@ -309,17 +309,17 @@ ENTRY(ia32_cstar_target)
 	ALLOC_PT_GPREGS_ON_STACK 8
 	SAVE_C_REGS_EXCEPT_RCX_R891011
 	movl 	%eax,%eax	/* zero extension */
-	movq	%rax,ORIG_RAX-ARGOFFSET(%rsp)
-	movq	%rcx,RIP-ARGOFFSET(%rsp)
-	CFI_REL_OFFSET rip,RIP-ARGOFFSET
-	movq	%rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */
+	movq	%rax,ORIG_RAX(%rsp)
+	movq	%rcx,RIP(%rsp)
+	CFI_REL_OFFSET rip,RIP
+	movq	%rbp,RCX(%rsp) /* this lies slightly to ptrace */
 	movl	%ebp,%ecx
-	movq	$__USER32_CS,CS-ARGOFFSET(%rsp)
-	movq	$__USER32_DS,SS-ARGOFFSET(%rsp)
-	movq	%r11,EFLAGS-ARGOFFSET(%rsp)
-	/*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
-	movq	%r8,RSP-ARGOFFSET(%rsp)	
-	CFI_REL_OFFSET rsp,RSP-ARGOFFSET
+	movq	$__USER32_CS,CS(%rsp)
+	movq	$__USER32_DS,SS(%rsp)
+	movq	%r11,EFLAGS(%rsp)
+	/*CFI_REL_OFFSET rflags,EFLAGS*/
+	movq	%r8,RSP(%rsp)
+	CFI_REL_OFFSET rsp,RSP
 	/* no need to do an access_ok check here because r8 has been
 	   32bit zero extended */ 
 	/* hardware stack frame is complete now */	
@@ -327,8 +327,8 @@ ENTRY(ia32_cstar_target)
 1:	movl	(%r8),%r9d
 	_ASM_EXTABLE(1b,ia32_badarg)
 	ASM_CLAC
-	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP)
+	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP)
 	CFI_REMEMBER_STATE
 	jnz   cstar_tracesys
 	cmpq $IA32_NR_syscalls-1,%rax
@@ -337,32 +337,32 @@ cstar_do_call:
 	IA32_ARG_FIXUP 1
 cstar_dispatch:
 	call *ia32_sys_call_table(,%rax,8)
-	movq %rax,RAX-ARGOFFSET(%rsp)
+	movq %rax,RAX(%rsp)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP)
 	jnz sysretl_audit
 sysretl_from_sys_call:
-	andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP)
 	RESTORE_RSI_RDI_RDX
-	movl RIP-ARGOFFSET(%rsp),%ecx
+	movl RIP(%rsp),%ecx
 	CFI_REGISTER rip,rcx
-	movl EFLAGS-ARGOFFSET(%rsp),%r11d	
+	movl EFLAGS(%rsp),%r11d
 	/*CFI_REGISTER rflags,r11*/
 	xorq	%r10,%r10
 	xorq	%r9,%r9
 	xorq	%r8,%r8
 	TRACE_IRQS_ON
-	movl RSP-ARGOFFSET(%rsp),%esp
+	movl RSP(%rsp),%esp
 	CFI_RESTORE rsp
 	USERGS_SYSRET32
 	
 #ifdef CONFIG_AUDITSYSCALL
 cstar_auditsys:
 	CFI_RESTORE_STATE
-	movl %r9d,R9-ARGOFFSET(%rsp)	/* register to be clobbered by call */
+	movl %r9d,R9(%rsp)	/* register to be clobbered by call */
 	auditsys_entry_common
-	movl R9-ARGOFFSET(%rsp),%r9d	/* reload 6th syscall arg */
+	movl R9(%rsp),%r9d	/* reload 6th syscall arg */
 	jmp cstar_dispatch
 
 sysretl_audit:
@@ -371,16 +371,16 @@ sysretl_audit:
 
 cstar_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP)
 	jz cstar_auditsys
 #endif
 	xchgl %r9d,%ebp
 	SAVE_EXTRA_REGS
-	CLEAR_RREGS 0, r9
+	CLEAR_RREGS r9
 	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
-	LOAD_ARGS32 ARGOFFSET, 1  /* reload args from stack in case ptrace changed it */
+	LOAD_ARGS32 1	/* reload args from stack in case ptrace changed it */
 	RESTORE_EXTRA_REGS
 	xchgl %ebp,%r9d
 	cmpq $(IA32_NR_syscalls-1),%rax
@@ -438,8 +438,8 @@ ENTRY(ia32_syscall)
 	   this could be a problem. */
 	ALLOC_PT_GPREGS_ON_STACK
 	SAVE_C_REGS_EXCEPT_R891011
-	orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP)
+	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP)
 	jnz ia32_tracesys
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja ia32_badsys
@@ -447,9 +447,9 @@ ia32_do_call:
 	IA32_ARG_FIXUP
 	call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
 ia32_sysret:
-	movq %rax,RAX-ARGOFFSET(%rsp)
+	movq %rax,RAX(%rsp)
 ia32_ret_from_sys_call:
-	CLEAR_RREGS -ARGOFFSET
+	CLEAR_RREGS
 	jmp int_ret_from_sys_call
 
 ia32_tracesys:
@@ -458,7 +458,7 @@ ia32_tracesys:
 	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
-	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
+	LOAD_ARGS32	/* reload args from stack in case ptrace changed it */
 	RESTORE_EXTRA_REGS
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja  int_ret_from_sys_call	/* ia32_tracesys has set RAX(%rsp) */
@@ -466,7 +466,7 @@ ia32_tracesys:
 END(ia32_syscall)
 
 ia32_badsys:
-	movq $0,ORIG_RAX-ARGOFFSET(%rsp)
+	movq $0,ORIG_RAX(%rsp)
 	movq $-ENOSYS,%rax
 	jmp ia32_sysret
 
@@ -499,17 +499,17 @@ ia32_ptregs_common:
 	CFI_ENDPROC
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,SS+8-ARGOFFSET
-	CFI_REL_OFFSET	rax,RAX-ARGOFFSET
-	CFI_REL_OFFSET	rcx,RCX-ARGOFFSET
-	CFI_REL_OFFSET	rdx,RDX-ARGOFFSET
-	CFI_REL_OFFSET	rsi,RSI-ARGOFFSET
-	CFI_REL_OFFSET	rdi,RDI-ARGOFFSET
-	CFI_REL_OFFSET	rip,RIP-ARGOFFSET
-/*	CFI_REL_OFFSET	cs,CS-ARGOFFSET*/
-/*	CFI_REL_OFFSET	rflags,EFLAGS-ARGOFFSET*/
-	CFI_REL_OFFSET	rsp,RSP-ARGOFFSET
-/*	CFI_REL_OFFSET	ss,SS-ARGOFFSET*/
+	CFI_DEF_CFA	rsp,SS+8
+	CFI_REL_OFFSET	rax,RAX
+	CFI_REL_OFFSET	rcx,RCX
+	CFI_REL_OFFSET	rdx,RDX
+	CFI_REL_OFFSET	rsi,RSI
+	CFI_REL_OFFSET	rdi,RDI
+	CFI_REL_OFFSET	rip,RIP
+/*	CFI_REL_OFFSET	cs,CS*/
+/*	CFI_REL_OFFSET	rflags,EFLAGS*/
+	CFI_REL_OFFSET	rsp,RSP
+/*	CFI_REL_OFFSET	ss,SS*/
 	SAVE_EXTRA_REGS 8
 	call *%rax
 	RESTORE_EXTRA_REGS 8
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 38356476b131..4a7ceb9789a5 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -88,8 +88,6 @@ For 32-bit we have the following conventions - kernel is built with
 #define RSP		19*8
 #define SS		20*8
 
-#define ARGOFFSET	0
-
 	.macro ALLOC_PT_GPREGS_ON_STACK addskip=0
 	subq	$15*8+\addskip, %rsp
 	CFI_ADJUST_CFA_OFFSET 15*8+\addskip
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e5cad016cb60..5fe186e12cf1 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -73,9 +73,9 @@ ENDPROC(native_usergs_sysret64)
 #endif /* CONFIG_PARAVIRT */
 
 
-.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
+.macro TRACE_IRQS_IRETQ
 #ifdef CONFIG_TRACE_IRQFLAGS
-	bt   $9,EFLAGS-\offset(%rsp)	/* interrupts off? */
+	bt   $9,EFLAGS(%rsp)	/* interrupts off? */
 	jnc  1f
 	TRACE_IRQS_ON
 1:
@@ -107,8 +107,8 @@ ENDPROC(native_usergs_sysret64)
 	call debug_stack_reset
 .endm
 
-.macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET
-	bt   $9,EFLAGS-\offset(%rsp)	/* interrupts off? */
+.macro TRACE_IRQS_IRETQ_DEBUG
+	bt   $9,EFLAGS(%rsp)	/* interrupts off? */
 	jnc  1f
 	TRACE_IRQS_ON_DEBUG
 1:
@@ -184,16 +184,16 @@ ENDPROC(native_usergs_sysret64)
  * frame that enables passing a complete pt_regs to a C function.
  */
 	.macro DEFAULT_FRAME start=1 offset=0
-	XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
-	CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
-	CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
-	CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
-	CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
-	CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
-	CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
-	CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
-	CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
-	CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
+	XCPT_FRAME \start, ORIG_RAX+\offset
+	CFI_REL_OFFSET rdi, RDI+\offset
+	CFI_REL_OFFSET rsi, RSI+\offset
+	CFI_REL_OFFSET rdx, RDX+\offset
+	CFI_REL_OFFSET rcx, RCX+\offset
+	CFI_REL_OFFSET rax, RAX+\offset
+	CFI_REL_OFFSET r8, R8+\offset
+	CFI_REL_OFFSET r9, R9+\offset
+	CFI_REL_OFFSET r10, R10+\offset
+	CFI_REL_OFFSET r11, R11+\offset
 	CFI_REL_OFFSET rbx, RBX+\offset
 	CFI_REL_OFFSET rbp, RBP+\offset
 	CFI_REL_OFFSET r12, R12+\offset
@@ -237,13 +237,13 @@ ENTRY(ret_from_fork)
 
 	RESTORE_EXTRA_REGS
 
-	testl $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
+	testl $3,CS(%rsp)			# from kernel_thread?
 	jz   1f
 
 	testl $_TIF_IA32, TI_flags(%rcx)	# 32-bit compat task needs IRET
 	jnz  int_ret_from_sys_call
 
-	RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
+	RESTORE_TOP_OF_STACK %rdi
 	jmp ret_from_sys_call			# go to the SYSRET fastpath
 
 1:
@@ -310,11 +310,11 @@ GLOBAL(system_call_after_swapgs)
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	ALLOC_PT_GPREGS_ON_STACK 8
 	SAVE_C_REGS_EXCEPT_RAX_RCX
-	movq	$-ENOSYS,RAX-ARGOFFSET(%rsp)
-	movq_cfi rax,(ORIG_RAX-ARGOFFSET)
-	movq	%rcx,RIP-ARGOFFSET(%rsp)
-	CFI_REL_OFFSET rip,RIP-ARGOFFSET
-	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	movq	$-ENOSYS,RAX(%rsp)
+	movq_cfi rax,ORIG_RAX
+	movq	%rcx,RIP(%rsp)
+	CFI_REL_OFFSET rip,RIP
+	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP)
 	jnz tracesys
 system_call_fastpath:
 #if __SYSCALL_MASK == ~0
@@ -326,13 +326,13 @@ system_call_fastpath:
 	ja ret_from_sys_call  /* and return regs->ax */
 	movq %r10,%rcx
 	call *sys_call_table(,%rax,8)  # XXX:	 rip relative
-	movq %rax,RAX-ARGOFFSET(%rsp)
+	movq %rax,RAX(%rsp)
 /*
  * Syscall return path ending with SYSRET (fast path)
  * Has incomplete stack frame and undefined top of stack.
  */
 ret_from_sys_call:
-	testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP)
 	jnz int_ret_from_sys_call_fixup	/* Go the the slow path */
 
 	LOCKDEP_SYS_EXIT
@@ -344,7 +344,7 @@ ret_from_sys_call:
 	 */
 	TRACE_IRQS_ON
 	RESTORE_C_REGS_EXCEPT_RCX
-	movq RIP-ARGOFFSET(%rsp),%rcx
+	movq RIP(%rsp),%rcx
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
 	movq	PER_CPU_VAR(old_rsp), %rsp
@@ -353,7 +353,7 @@ ret_from_sys_call:
 	CFI_RESTORE_STATE
 
 int_ret_from_sys_call_fixup:
-	FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
+	FIXUP_TOP_OF_STACK %r11
 	jmp int_ret_from_sys_call
 
 	/* Do syscall tracing */
@@ -364,7 +364,7 @@ tracesys:
 	test %rax, %rax
 	jnz tracesys_phase2		/* if needed, run the slow path */
 	RESTORE_C_REGS_EXCEPT_RAX	/* else restore clobbered regs */
-	movq ORIG_RAX-ARGOFFSET(%rsp), %rax
+	movq ORIG_RAX(%rsp), %rax
 	jmp system_call_fastpath	/*      and return to the fast path */
 
 tracesys_phase2:
@@ -391,7 +391,7 @@ tracesys_phase2:
 	ja   int_ret_from_sys_call	/* RAX(%rsp) is already set */
 	movq %r10,%rcx	/* fixup for C */
 	call *sys_call_table(,%rax,8)
-	movq %rax,RAX-ARGOFFSET(%rsp)
+	movq %rax,RAX(%rsp)
 	/* Use IRET because user could have changed frame */
 
 /*
@@ -475,9 +475,9 @@ END(stub_\func)
 ENTRY(\label)
 	CFI_STARTPROC
 	DEFAULT_FRAME 0, 8		/* offset 8: return address */
-	FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET
+	FIXUP_TOP_OF_STACK %r11, 8
 	call \func
-	RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET
+	RESTORE_TOP_OF_STACK %r11, 8
 	ret
 	CFI_ENDPROC
 END(\label)
@@ -677,7 +677,7 @@ common_interrupt:
 	ASM_CLAC
 	addq $-0x80,(%rsp)		/* Adjust vector to [-256,-1] range */
 	interrupt do_IRQ
-	/* 0(%rsp): old_rsp-ARGOFFSET */
+	/* 0(%rsp): old_rsp */
 ret_from_intr:
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
@@ -687,13 +687,13 @@ ret_from_intr:
 	popq %rsi
 	CFI_DEF_CFA rsi,SS+8-RBP	/* reg/off reset after def_cfa_expr */
 	/* return code expects complete pt_regs - adjust rsp accordingly: */
-	leaq ARGOFFSET-RBP(%rsi), %rsp
+	leaq -RBP(%rsi),%rsp
 	CFI_DEF_CFA_REGISTER	rsp
-	CFI_ADJUST_CFA_OFFSET	RBP-ARGOFFSET
+	CFI_ADJUST_CFA_OFFSET	RBP
 
 exit_intr:
 	GET_THREAD_INFO(%rcx)
-	testl $3,CS-ARGOFFSET(%rsp)
+	testl $3,CS(%rsp)
 	je retint_kernel
 
 	/* Interrupt came from user space */
@@ -721,8 +721,8 @@ retint_swapgs:		/* return to user-space */
 	 * Try to use SYSRET instead of IRET if we're returning to
 	 * a completely clean 64-bit userspace context.
 	 */
-	movq (RCX-ARGOFFSET)(%rsp), %rcx
-	cmpq %rcx,(RIP-ARGOFFSET)(%rsp)		/* RCX == RIP */
+	movq RCX(%rsp),%rcx
+	cmpq %rcx,RIP(%rsp)		/* RCX == RIP */
 	jne opportunistic_sysret_failed
 
 	/*
@@ -743,19 +743,19 @@ retint_swapgs:		/* return to user-space */
 	shr $__VIRTUAL_MASK_SHIFT, %rcx
 	jnz opportunistic_sysret_failed
 
-	cmpq $__USER_CS,(CS-ARGOFFSET)(%rsp)	/* CS must match SYSRET */
+	cmpq $__USER_CS,CS(%rsp)	/* CS must match SYSRET */
 	jne opportunistic_sysret_failed
 
-	movq (R11-ARGOFFSET)(%rsp), %r11
-	cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp)	/* R11 == RFLAGS */
+	movq R11(%rsp),%r11
+	cmpq %r11,EFLAGS(%rsp)		/* R11 == RFLAGS */
 	jne opportunistic_sysret_failed
 
-	testq $X86_EFLAGS_RF,%r11		/* sysret can't restore RF */
+	testq $X86_EFLAGS_RF,%r11	/* sysret can't restore RF */
 	jnz opportunistic_sysret_failed
 
 	/* nothing to check for RSP */
 
-	cmpq $__USER_DS,(SS-ARGOFFSET)(%rsp)	/* SS must match SYSRET */
+	cmpq $__USER_DS,SS(%rsp)	/* SS must match SYSRET */
 	jne opportunistic_sysret_failed
 
 	/*
@@ -870,7 +870,7 @@ retint_signal:
 ENTRY(retint_kernel)
 	cmpl $0,PER_CPU_VAR(__preempt_count)
 	jnz  retint_restore_args
-	bt   $9,EFLAGS-ARGOFFSET(%rsp)	/* interrupts off? */
+	bt   $9,EFLAGS(%rsp)	/* interrupts off? */
 	jnc  retint_restore_args
 	call preempt_schedule_irq
 	jmp exit_intr
@@ -1283,11 +1283,11 @@ ENTRY(paranoid_exit)
 	TRACE_IRQS_OFF_DEBUG
 	testl %ebx,%ebx				/* swapgs needed? */
 	jnz paranoid_exit_no_swapgs
-	TRACE_IRQS_IRETQ 0
+	TRACE_IRQS_IRETQ
 	SWAPGS_UNSAFE_STACK
 	jmp paranoid_exit_restore
 paranoid_exit_no_swapgs:
-	TRACE_IRQS_IRETQ_DEBUG 0
+	TRACE_IRQS_IRETQ_DEBUG
 paranoid_exit_restore:
 	RESTORE_EXTRA_REGS
 	RESTORE_C_REGS
-- 
2.1.0


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 08/16] x86, entry: Remove int_check_syscall_exit_work
  2015-02-26 22:40 [PATCH 00/16] x86/asm changes for 4.1 for review Andy Lutomirski
                   ` (6 preceding siblings ...)
  2015-02-26 22:40 ` [PATCH 07/16] x86: mass removal of ARGOFFSET. No code changes Andy Lutomirski
@ 2015-02-26 22:40 ` Andy Lutomirski
  2015-03-04 22:54   ` [tip:x86/asm] x86/asm/entry/64: Remove ' int_check_syscall_exit_work' tip-bot for Andy Lutomirski
  2015-02-26 22:40 ` [PATCH 09/16] x86: add comments about various syscall instructions, no code changes Andy Lutomirski
                   ` (8 subsequent siblings)
  16 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-26 22:40 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Borislav Petkov, Oleg Nesterov, Denys Vlasenko, Andy Lutomirski

Nothing references it any more.

Fixes: 96b6352c1271 x86_64, entry: Remove the syscall exit audit and schedule optimizations
Reported-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/kernel/entry_64.S | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 5fe186e12cf1..990793a811f4 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -431,7 +431,6 @@ int_careful:
 int_very_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
-int_check_syscall_exit_work:
 	SAVE_EXTRA_REGS
 	/* Check for syscall exit trace */
 	testl $_TIF_WORK_SYSCALL_EXIT,%edx
-- 
2.1.0


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 09/16] x86: add comments about various syscall instructions, no code changes
  2015-02-26 22:40 [PATCH 00/16] x86/asm changes for 4.1 for review Andy Lutomirski
                   ` (7 preceding siblings ...)
  2015-02-26 22:40 ` [PATCH 08/16] x86, entry: Remove int_check_syscall_exit_work Andy Lutomirski
@ 2015-02-26 22:40 ` Andy Lutomirski
  2015-03-04 22:54   ` [tip:x86/asm] x86/asm/entry: Add comments about various syscall instructions tip-bot for Denys Vlasenko
  2015-02-26 22:40 ` [PATCH 10/16] x86: entry_64.S: move save_paranoid and ret_from_fork closer to their users Andy Lutomirski
                   ` (7 subsequent siblings)
  16 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-26 22:40 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Borislav Petkov, Oleg Nesterov, Denys Vlasenko, Linus Torvalds,
	H. Peter Anvin, Andy Lutomirski, Frederic Weisbecker,
	Alexei Starovoitov, Will Drewry, Kees Cook

From: Denys Vlasenko <dvlasenk@redhat.com>

SYSCALL/SYSRET and SYSENTER/SYSEXIT have weird semantics.
Moreover, they differ in 32- and 64-bit mode.
What is saved? What is not? Is rsp set? Are interrupts disabled?
People tend to not remember these details well enough.

This patch adds comments which explain in detail
what registers are modified by each of these instructions.
The comments are placed immediately before corresponding
entry and exit points.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
Message-Id: <1424736744-13414-1-git-send-email-dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/ia32/ia32entry.S  | 133 ++++++++++++++++++++++++++++-----------------
 arch/x86/kernel/entry_64.S |  32 ++++++-----
 2 files changed, 102 insertions(+), 63 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index e99f8a5be2df..b5670564a1fb 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -99,22 +99,25 @@ ENDPROC(native_irq_enable_sysexit)
 /*
  * 32bit SYSENTER instruction entry.
  *
+ * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
+ * IF and VM in rflags are cleared (IOW: interrupts are off).
+ * SYSENTER does not save anything on the stack,
+ * and does not save old rip (!!!) and rflags.
+ *
  * Arguments:
- * %eax	System call number.
- * %ebx Arg1
- * %ecx Arg2
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp user stack
- * 0(%ebp) Arg6	
- * 	
- * Interrupts off.
- *	
+ * eax  system call number
+ * ebx  arg1
+ * ecx  arg2
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * ebp  user stack
+ * 0(%ebp) arg6
+ *
  * This is purely a fast path. For anything complicated we use the int 0x80
- * path below.	Set up a complete hardware stack frame to share code
+ * path below. We set up a complete hardware stack frame to share code
  * with the int 0x80 path.
- */ 	
+ */
 ENTRY(ia32_sysenter_target)
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
@@ -128,6 +131,7 @@ ENTRY(ia32_sysenter_target)
 	 * disabled irqs, here we enable it straight after entry:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
+	/* Construct iret frame (ss,rsp,rflags,cs,rip) */
  	movl	%ebp,%ebp		/* zero extension */
 	pushq_cfi $__USER32_DS
 	/*CFI_REL_OFFSET ss,0*/
@@ -140,14 +144,19 @@ ENTRY(ia32_sysenter_target)
 	pushq_cfi $__USER32_CS
 	/*CFI_REL_OFFSET cs,0*/
 	movl	%eax, %eax
+	/* Store thread_info->sysenter_return in rip stack slot */
 	pushq_cfi %r10
 	CFI_REL_OFFSET rip,0
+	/* Store orig_ax */
 	pushq_cfi %rax
+	/* Construct the rest of "struct pt_regs" */
 	cld
 	ALLOC_PT_GPREGS_ON_STACK
 	SAVE_C_REGS_EXCEPT_R891011
- 	/* no need to do an access_ok check here because rbp has been
- 	   32bit zero extended */ 
+	/*
+	 * no need to do an access_ok check here because rbp has been
+	 * 32bit zero extended
+	 */
 	ASM_STAC
 1:	movl	(%rbp),%ebp
 	_ASM_EXTABLE(1b,ia32_badarg)
@@ -184,6 +193,7 @@ sysexit_from_sys_call:
 	movl	RIP(%rsp),%edx		/* User %eip */
 	CFI_REGISTER rip,rdx
 	RESTORE_RSI_RDI
+	/* pop everything except ss,rsp,rflags slots */
 	REMOVE_PT_GPREGS_FROM_STACK 3*8
 	xorq	%r8,%r8
 	xorq	%r9,%r9
@@ -194,6 +204,10 @@ sysexit_from_sys_call:
 	popq_cfi %rcx				/* User %esp */
 	CFI_REGISTER rsp,rcx
 	TRACE_IRQS_ON
+	/*
+	 * 32bit SYSEXIT restores eip from edx, esp from ecx.
+	 * cs and ss are loaded from MSRs.
+	 */
 	ENABLE_INTERRUPTS_SYSEXIT32
 
 	CFI_RESTORE_STATE
@@ -274,23 +288,33 @@ ENDPROC(ia32_sysenter_target)
 /*
  * 32bit SYSCALL instruction entry.
  *
+ * 32bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
+ * then loads new ss, cs, and rip from previously programmed MSRs.
+ * rflags gets masked by a value from another MSR (so CLD and CLAC
+ * are not needed). SYSCALL does not save anything on the stack
+ * and does not change rsp.
+ *
+ * Note: rflags saving+masking-with-MSR happens only in Long mode
+ * (in legacy 32bit mode, IF, RF and VM bits are cleared and that's it).
+ * Don't get confused: rflags saving+masking depends on Long Mode Active bit
+ * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
+ * or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
+ *
  * Arguments:
- * %eax	System call number.
- * %ebx Arg1
- * %ecx return EIP 
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp Arg2    [note: not saved in the stack frame, should not be touched]
- * %esp user stack 
- * 0(%esp) Arg6
- * 	
- * Interrupts off.
- *	
+ * eax  system call number
+ * ecx  return address
+ * ebx  arg1
+ * ebp  arg2	(note: not saved in the stack frame, should not be touched)
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * esp  user stack
+ * 0(%esp) arg6
+ *
  * This is purely a fast path. For anything complicated we use the int 0x80
- * path below.	Set up a complete hardware stack frame to share code
- * with the int 0x80 path.	
- */ 	
+ * path below. We set up a complete hardware stack frame to share code
+ * with the int 0x80 path.
+ */
 ENTRY(ia32_cstar_target)
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
@@ -306,7 +330,7 @@ ENTRY(ia32_cstar_target)
 	 * disabled irqs and here we enable it straight after entry:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	ALLOC_PT_GPREGS_ON_STACK 8
+	ALLOC_PT_GPREGS_ON_STACK 8	/* +8: space for orig_ax */
 	SAVE_C_REGS_EXCEPT_RCX_R891011
 	movl 	%eax,%eax	/* zero extension */
 	movq	%rax,ORIG_RAX(%rsp)
@@ -320,9 +344,11 @@ ENTRY(ia32_cstar_target)
 	/*CFI_REL_OFFSET rflags,EFLAGS*/
 	movq	%r8,RSP(%rsp)
 	CFI_REL_OFFSET rsp,RSP
-	/* no need to do an access_ok check here because r8 has been
-	   32bit zero extended */ 
-	/* hardware stack frame is complete now */	
+	/* iret stack frame is complete now */
+	/*
+	 * no need to do an access_ok check here because r8 has been
+	 * 32bit zero extended
+	 */
 	ASM_STAC
 1:	movl	(%r8),%r9d
 	_ASM_EXTABLE(1b,ia32_badarg)
@@ -355,8 +381,15 @@ sysretl_from_sys_call:
 	TRACE_IRQS_ON
 	movl RSP(%rsp),%esp
 	CFI_RESTORE rsp
+	/*
+	 * 64bit->32bit SYSRET restores eip from ecx,
+	 * eflags from r11 (but RF and VM bits are forced to 0),
+	 * cs and ss are loaded from MSRs.
+	 * (Note: 32bit->32bit SYSRET is different: since r11
+	 * does not exist, it merely sets eflags.IF=1).
+	 */
 	USERGS_SYSRET32
-	
+
 #ifdef CONFIG_AUDITSYSCALL
 cstar_auditsys:
 	CFI_RESTORE_STATE
@@ -394,26 +427,26 @@ ia32_badarg:
 	jmp ia32_sysret
 	CFI_ENDPROC
 
-/* 
- * Emulated IA32 system calls via int 0x80. 
+/*
+ * Emulated IA32 system calls via int 0x80.
  *
- * Arguments:	 
- * %eax	System call number.
- * %ebx Arg1
- * %ecx Arg2
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp Arg6    [note: not saved in the stack frame, should not be touched]
+ * Arguments:
+ * eax  system call number
+ * ebx  arg1
+ * ecx  arg2
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * ebp  arg6	(note: not saved in the stack frame, should not be touched)
  *
  * Notes:
- * Uses the same stack frame as the x86-64 version.	
- * All registers except %eax must be saved (but ptrace may violate that)
+ * Uses the same stack frame as the x86-64 version.
+ * All registers except eax must be saved (but ptrace may violate that).
  * Arguments are zero extended. For system calls that want sign extension and
  * take long arguments a wrapper is needed. Most calls can just be called
  * directly.
- * Assumes it is only called from user space and entered with interrupts off.	
- */ 				
+ * Assumes it is only called from user space and entered with interrupts off.
+ */
 
 ENTRY(ia32_syscall)
 	CFI_STARTPROC32	simple
@@ -432,7 +465,7 @@ ENTRY(ia32_syscall)
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	movl %eax,%eax
-	pushq_cfi %rax
+	pushq_cfi %rax		/* store orig_ax */
 	cld
 	/* note the registers are not zero extended to the sf.
 	   this could be a problem. */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 990793a811f4..73c314a6e436 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -256,25 +256,25 @@ ENTRY(ret_from_fork)
 END(ret_from_fork)
 
 /*
- * System call entry. Up to 6 arguments in registers are supported.
+ * 64bit SYSCALL instruction entry. Up to 6 arguments in registers.
  *
- * SYSCALL does not save anything on the stack and does not change the
- * stack pointer.  However, it does mask the flags register for us, so
- * CLD and CLAC are not needed.
- */
-
-/*
- * Register setup:
+ * 64bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
+ * then loads new ss, cs, and rip from previously programmed MSRs.
+ * rflags gets masked by a value from another MSR (so CLD and CLAC
+ * are not needed). SYSCALL does not save anything on the stack
+ * and does not change rsp.
+ *
+ * Registers on entry:
  * rax  system call number
+ * rcx  return address
+ * r11  saved rflags (note: r11 is callee-clobbered register in C ABI)
  * rdi  arg0
- * rcx  return address for syscall/sysret, C arg3
  * rsi  arg1
  * rdx  arg2
- * r10  arg3 	(--> moved to rcx for C)
+ * r10  arg3 (needs to be moved to rcx to conform to C ABI)
  * r8   arg4
  * r9   arg5
- * r11  eflags for syscall/sysret, temporary for C
- * r12-r15,rbp,rbx saved by C code, not touched.
+ * (note: r12-r15,rbp,rbx are callee-preserved in C ABI)
  *
  * Interrupts are off on entry.
  * Only called from user space.
@@ -302,13 +302,14 @@ ENTRY(system_call)
 GLOBAL(system_call_after_swapgs)
 
 	movq	%rsp,PER_CPU_VAR(old_rsp)
+	/* kernel_stack is set so that 5 slots (iret frame) are preallocated */
 	movq	PER_CPU_VAR(kernel_stack),%rsp
 	/*
 	 * No need to follow this irqs off/on section - it's straight
 	 * and short:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	ALLOC_PT_GPREGS_ON_STACK 8
+	ALLOC_PT_GPREGS_ON_STACK 8		/* +8: space for orig_ax */
 	SAVE_C_REGS_EXCEPT_RAX_RCX
 	movq	$-ENOSYS,RAX(%rsp)
 	movq_cfi rax,ORIG_RAX
@@ -348,6 +349,11 @@ ret_from_sys_call:
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
 	movq	PER_CPU_VAR(old_rsp), %rsp
+	/*
+	 * 64bit SYSRET restores rip from rcx,
+	 * rflags from r11 (but RF and VM bits are forced to 0),
+	 * cs and ss are loaded from MSRs.
+	 */
 	USERGS_SYSRET64
 
 	CFI_RESTORE_STATE
-- 
2.1.0


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 10/16] x86: entry_64.S: move save_paranoid and ret_from_fork closer to their users
  2015-02-26 22:40 [PATCH 00/16] x86/asm changes for 4.1 for review Andy Lutomirski
                   ` (8 preceding siblings ...)
  2015-02-26 22:40 ` [PATCH 09/16] x86: add comments about various syscall instructions, no code changes Andy Lutomirski
@ 2015-02-26 22:40 ` Andy Lutomirski
  2015-03-04 22:55   ` [tip:x86/asm] x86/asm/entry/64: Move 'save_paranoid' and ' ret_from_fork' " tip-bot for Denys Vlasenko
  2015-02-26 22:40 ` [PATCH 11/16] x86: entry_64.S: rename save_paranoid to paranoid_entry, no code changes Andy Lutomirski
                   ` (6 subsequent siblings)
  16 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-26 22:40 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Borislav Petkov, Oleg Nesterov, Denys Vlasenko, Linus Torvalds,
	H. Peter Anvin, Andy Lutomirski, Frederic Weisbecker,
	Alexei Starovoitov, Will Drewry, Kees Cook

From: Denys Vlasenko <dvlasenk@redhat.com>

For some odd reason, these two functions are at the very top of the file.
save_paranoid's caller is approximately in the middle of it, move it there.
Move ret_from_fork to be right after fork/exec helpers.

This is a pure block move, nothing is changed in the function bodies.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
Message-Id: <1424736744-13414-2-git-send-email-dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/kernel/entry_64.S | 106 ++++++++++++++++++++++-----------------------
 1 file changed, 53 insertions(+), 53 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 73c314a6e436..cd1d62b3200a 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -202,59 +202,6 @@ ENDPROC(native_usergs_sysret64)
 	CFI_REL_OFFSET r15, R15+\offset
 	.endm
 
-ENTRY(save_paranoid)
-	XCPT_FRAME 1 RDI+8
-	cld
-	SAVE_C_REGS 8
-	SAVE_EXTRA_REGS 8
-	movl $1,%ebx
-	movl $MSR_GS_BASE,%ecx
-	rdmsr
-	testl %edx,%edx
-	js 1f	/* negative -> in kernel */
-	SWAPGS
-	xorl %ebx,%ebx
-1:	ret
-	CFI_ENDPROC
-END(save_paranoid)
-
-/*
- * A newly forked process directly context switches into this address.
- *
- * rdi: prev task we switched from
- */
-ENTRY(ret_from_fork)
-	DEFAULT_FRAME
-
-	LOCK ; btr $TIF_FORK,TI_flags(%r8)
-
-	pushq_cfi $0x0002
-	popfq_cfi				# reset kernel eflags
-
-	call schedule_tail			# rdi: 'prev' task parameter
-
-	GET_THREAD_INFO(%rcx)
-
-	RESTORE_EXTRA_REGS
-
-	testl $3,CS(%rsp)			# from kernel_thread?
-	jz   1f
-
-	testl $_TIF_IA32, TI_flags(%rcx)	# 32-bit compat task needs IRET
-	jnz  int_ret_from_sys_call
-
-	RESTORE_TOP_OF_STACK %rdi
-	jmp ret_from_sys_call			# go to the SYSRET fastpath
-
-1:
-	movq %rbp, %rdi
-	call *%rbx
-	movl $0, RAX(%rsp)
-	RESTORE_EXTRA_REGS
-	jmp int_ret_from_sys_call
-	CFI_ENDPROC
-END(ret_from_fork)
-
 /*
  * 64bit SYSCALL instruction entry. Up to 6 arguments in registers.
  *
@@ -582,6 +529,43 @@ END(stub_x32_execveat)
 #endif
 
 /*
+ * A newly forked process directly context switches into this address.
+ *
+ * rdi: prev task we switched from
+ */
+ENTRY(ret_from_fork)
+	DEFAULT_FRAME
+
+	LOCK ; btr $TIF_FORK,TI_flags(%r8)
+
+	pushq_cfi $0x0002
+	popfq_cfi				# reset kernel eflags
+
+	call schedule_tail			# rdi: 'prev' task parameter
+
+	GET_THREAD_INFO(%rcx)
+
+	RESTORE_EXTRA_REGS
+
+	testl $3,CS(%rsp)			# from kernel_thread?
+	jz   1f
+
+	testl $_TIF_IA32, TI_flags(%rcx)	# 32-bit compat task needs IRET
+	jnz  int_ret_from_sys_call
+
+	RESTORE_TOP_OF_STACK %rdi
+	jmp ret_from_sys_call			# go to the SYSRET fastpath
+
+1:
+	movq %rbp, %rdi
+	call *%rbx
+	movl $0, RAX(%rsp)
+	RESTORE_EXTRA_REGS
+	jmp int_ret_from_sys_call
+	CFI_ENDPROC
+END(ret_from_fork)
+
+/*
  * Build the entry stubs and pointer table with some assembler magic.
  * We pack 7 stubs into a single 32-byte chunk, which will fit in a
  * single cache line on all modern x86 implementations.
@@ -1270,6 +1254,22 @@ idtentry async_page_fault do_async_page_fault has_error_code=1
 idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
 #endif
 
+ENTRY(save_paranoid)
+	XCPT_FRAME 1 RDI+8
+	cld
+	SAVE_C_REGS 8
+	SAVE_EXTRA_REGS 8
+	movl $1,%ebx
+	movl $MSR_GS_BASE,%ecx
+	rdmsr
+	testl %edx,%edx
+	js 1f	/* negative -> in kernel */
+	SWAPGS
+	xorl %ebx,%ebx
+1:	ret
+	CFI_ENDPROC
+END(save_paranoid)
+
 	/*
 	 * "Paranoid" exit path from exception stack.  This is invoked
 	 * only on return from non-NMI IST interrupts that came
-- 
2.1.0


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 11/16] x86: entry_64.S: rename save_paranoid to paranoid_entry, no code changes
  2015-02-26 22:40 [PATCH 00/16] x86/asm changes for 4.1 for review Andy Lutomirski
                   ` (9 preceding siblings ...)
  2015-02-26 22:40 ` [PATCH 10/16] x86: entry_64.S: move save_paranoid and ret_from_fork closer to their users Andy Lutomirski
@ 2015-02-26 22:40 ` Andy Lutomirski
  2015-03-04 22:55   ` [tip:x86/asm] x86/asm/entry/64: Clean up and document various entry code details tip-bot for Denys Vlasenko
  2015-02-26 22:40 ` [PATCH 12/16] x86: ia32entry.S: fold IA32_ARG_FIXUP macro into its callers Andy Lutomirski
                   ` (5 subsequent siblings)
  16 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-26 22:40 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Borislav Petkov, Oleg Nesterov, Denys Vlasenko, Linus Torvalds,
	H. Peter Anvin, Andy Lutomirski, Frederic Weisbecker,
	Alexei Starovoitov, Will Drewry, Kees Cook

From: Denys Vlasenko <dvlasenk@redhat.com>

This patch does a lot of cleanup in comments and formatting,
but it does not change any code.

Rename save_paranoid to paranoid_entry: this makes naming
similar to its "non-paranoid" sibling, error_entry,
and to its counterpart, paranoid_exit.

Use the same CFI annotation atop paranoid_entry and error_entry.

Fix irregular indentation of assembler operands.

Add/fix comments on top of paranoid_entry and error_entry.
Remove stale comment about "oldrax".
Make comments about "no swapgs" flag in ebx more prominent.
Deindent wrongly indented top-level comment atop paranoid_exit.
Indent wrongly deindented comment inside error_entry.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
Message-Id: <1424736744-13414-3-git-send-email-dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/kernel/entry_64.S | 68 ++++++++++++++++++++++++----------------------
 1 file changed, 36 insertions(+), 32 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index cd1d62b3200a..89efcb65030d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -982,10 +982,11 @@ ENTRY(\sym)
 	testl $3, CS(%rsp)		/* If coming from userspace, switch */
 	jnz 1f				/* stacks. */
 	.endif
-	call save_paranoid
+	call paranoid_entry
 	.else
 	call error_entry
 	.endif
+	/* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
 
 	DEFAULT_FRAME 0
 
@@ -1016,10 +1017,11 @@ ENTRY(\sym)
 	addq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist)
 	.endif
 
+	/* these procedures expect "no swapgs" flag in ebx */
 	.if \paranoid
-	jmp paranoid_exit		/* %ebx: no swapgs flag */
+	jmp paranoid_exit
 	.else
-	jmp error_exit			/* %ebx: no swapgs flag */
+	jmp error_exit
 	.endif
 
 	.if \paranoid == 1
@@ -1254,8 +1256,13 @@ idtentry async_page_fault do_async_page_fault has_error_code=1
 idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
 #endif
 
-ENTRY(save_paranoid)
-	XCPT_FRAME 1 RDI+8
+/*
+ * Save all registers in pt_regs, and switch gs if needed.
+ * Use slow, but surefire "are we in kernel?" check.
+ * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
+ */
+ENTRY(paranoid_entry)
+	XCPT_FRAME 1 15*8
 	cld
 	SAVE_C_REGS 8
 	SAVE_EXTRA_REGS 8
@@ -1268,20 +1275,19 @@ ENTRY(save_paranoid)
 	xorl %ebx,%ebx
 1:	ret
 	CFI_ENDPROC
-END(save_paranoid)
-
-	/*
-	 * "Paranoid" exit path from exception stack.  This is invoked
-	 * only on return from non-NMI IST interrupts that came
-	 * from kernel space.
-	 *
-	 * We may be returning to very strange contexts (e.g. very early
-	 * in syscall entry), so checking for preemption here would
-	 * be complicated.  Fortunately, we there's no good reason
-	 * to try to handle preemption here.
-	 */
+END(paranoid_entry)
 
-	/* ebx:	no swapgs flag */
+/*
+ * "Paranoid" exit path from exception stack.  This is invoked
+ * only on return from non-NMI IST interrupts that came
+ * from kernel space.
+ *
+ * We may be returning to very strange contexts (e.g. very early
+ * in syscall entry), so checking for preemption here would
+ * be complicated.  Fortunately, we there's no good reason
+ * to try to handle preemption here.
+ */
+/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
 ENTRY(paranoid_exit)
 	DEFAULT_FRAME
 	DISABLE_INTERRUPTS(CLBR_NONE)
@@ -1302,13 +1308,11 @@ paranoid_exit_restore:
 END(paranoid_exit)
 
 /*
- * Exception entry point. This expects an error code/orig_rax on the stack.
- * returns in "no swapgs flag" in %ebx.
+ * Save all registers in pt_regs, and switch gs if needed.
+ * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
  */
 ENTRY(error_entry)
-	XCPT_FRAME
-	CFI_ADJUST_CFA_OFFSET 15*8
-	/* oldrax contains error code */
+	XCPT_FRAME 1 15*8
 	cld
 	SAVE_C_REGS 8
 	SAVE_EXTRA_REGS 8
@@ -1321,12 +1325,12 @@ error_sti:
 	TRACE_IRQS_OFF
 	ret
 
-/*
- * There are two places in the kernel that can potentially fault with
- * usergs. Handle them here.  B stepping K8s sometimes report a
- * truncated RIP for IRET exceptions returning to compat mode. Check
- * for these here too.
- */
+	/*
+	 * There are two places in the kernel that can potentially fault with
+	 * usergs. Handle them here.  B stepping K8s sometimes report a
+	 * truncated RIP for IRET exceptions returning to compat mode. Check
+	 * for these here too.
+	 */
 error_kernelspace:
 	CFI_REL_OFFSET rcx, RCX+8
 	incl %ebx
@@ -1356,7 +1360,7 @@ error_bad_iret:
 END(error_entry)
 
 
-/* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */
+/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
 ENTRY(error_exit)
 	DEFAULT_FRAME
 	movl %ebx,%eax
@@ -1582,13 +1586,13 @@ end_repeat_nmi:
 	ALLOC_PT_GPREGS_ON_STACK
 
 	/*
-	 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
+	 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
 	 * as we should not be calling schedule in NMI context.
 	 * Even with normal interrupts enabled. An NMI should not be
 	 * setting NEED_RESCHED or anything that normal interrupts and
 	 * exceptions might do.
 	 */
-	call save_paranoid
+	call paranoid_entry
 	DEFAULT_FRAME 0
 
 	/*
-- 
2.1.0


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 12/16] x86: ia32entry.S: fold IA32_ARG_FIXUP macro into its callers
  2015-02-26 22:40 [PATCH 00/16] x86/asm changes for 4.1 for review Andy Lutomirski
                   ` (10 preceding siblings ...)
  2015-02-26 22:40 ` [PATCH 11/16] x86: entry_64.S: rename save_paranoid to paranoid_entry, no code changes Andy Lutomirski
@ 2015-02-26 22:40 ` Andy Lutomirski
  2015-03-04 22:55   ` [tip:x86/asm] x86/asm/entry/64/compat: Fold the " tip-bot for Denys Vlasenko
  2015-02-26 22:40 ` [PATCH 13/16] x86: entry_64.S: use more understandable constants Andy Lutomirski
                   ` (4 subsequent siblings)
  16 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-26 22:40 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Borislav Petkov, Oleg Nesterov, Denys Vlasenko, Linus Torvalds,
	H. Peter Anvin, Andy Lutomirski, Frederic Weisbecker,
	Alexei Starovoitov, Will Drewry, Kees Cook

From: Denys Vlasenko <dvlasenk@redhat.com>

Use of a small macro - one with conditional expansion - does more harm
than good. It obfuscates code, with minimal code reuse.

For example, because of obfuscation it's not obvious that
in ia32_sysenter_target, we can optimize loading of r9 -
currently it is loaded with a detour through ebp.

This patch folds IA32_ARG_FIXUP macro into its callers.

No code changes.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
Message-Id: <1424736744-13414-5-git-send-email-dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/ia32/ia32entry.S | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index b5670564a1fb..6dcd37256979 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -30,17 +30,6 @@
 
 	.section .entry.text, "ax"
 
-	.macro IA32_ARG_FIXUP noebp=0
-	movl	%edi,%r8d
-	.if \noebp
-	.else
-	movl	%ebp,%r9d
-	.endif
-	xchg	%ecx,%esi
-	movl	%ebx,%edi
-	movl	%edx,%edx	/* zero extension */
-	.endm 
-
 	/* clobbers %rax */
 	.macro  CLEAR_RREGS _r9=rax
 	xorl 	%eax,%eax
@@ -178,7 +167,12 @@ sysenter_flags_fixed:
 	cmpq	$(IA32_NR_syscalls-1),%rax
 	ja	ia32_badsys
 sysenter_do_call:
-	IA32_ARG_FIXUP
+	/* 32bit syscall -> 64bit C ABI argument conversion */
+	movl	%edi,%r8d	/* arg5 */
+	movl	%ebp,%r9d	/* arg6 */
+	xchg	%ecx,%esi	/* rsi:arg2, rcx:arg4 */
+	movl	%ebx,%edi	/* arg1 */
+	movl	%edx,%edx	/* arg3 (zero extension) */
 sysenter_dispatch:
 	call	*ia32_sys_call_table(,%rax,8)
 	movq	%rax,RAX(%rsp)
@@ -360,7 +354,12 @@ ENTRY(ia32_cstar_target)
 	cmpq $IA32_NR_syscalls-1,%rax
 	ja  ia32_badsys
 cstar_do_call:
-	IA32_ARG_FIXUP 1
+	/* 32bit syscall -> 64bit C ABI argument conversion */
+	movl	%edi,%r8d	/* arg5 */
+	/* r9 already loaded */	/* arg6 */
+	xchg	%ecx,%esi	/* rsi:arg2, rcx:arg4 */
+	movl	%ebx,%edi	/* arg1 */
+	movl	%edx,%edx	/* arg3 (zero extension) */
 cstar_dispatch:
 	call *ia32_sys_call_table(,%rax,8)
 	movq %rax,RAX(%rsp)
@@ -477,7 +476,12 @@ ENTRY(ia32_syscall)
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja ia32_badsys
 ia32_do_call:
-	IA32_ARG_FIXUP
+	/* 32bit syscall -> 64bit C ABI argument conversion */
+	movl %edi,%r8d	/* arg5 */
+	movl %ebp,%r9d	/* arg6 */
+	xchg %ecx,%esi	/* rsi:arg2, rcx:arg4 */
+	movl %ebx,%edi	/* arg1 */
+	movl %edx,%edx	/* arg3 (zero extension) */
 	call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
 ia32_sysret:
 	movq %rax,RAX(%rsp)
-- 
2.1.0


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 13/16] x86: entry_64.S: use more understandable constants
  2015-02-26 22:40 [PATCH 00/16] x86/asm changes for 4.1 for review Andy Lutomirski
                   ` (11 preceding siblings ...)
  2015-02-26 22:40 ` [PATCH 12/16] x86: ia32entry.S: fold IA32_ARG_FIXUP macro into its callers Andy Lutomirski
@ 2015-02-26 22:40 ` Andy Lutomirski
  2015-03-04 22:56   ` [tip:x86/asm] x86/asm/entry/64: Use more readable constants tip-bot for Denys Vlasenko
  2015-02-26 22:40 ` [PATCH 14/16] x86: ia32entry.S: use more understandable constant Andy Lutomirski
                   ` (3 subsequent siblings)
  16 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-26 22:40 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Borislav Petkov, Oleg Nesterov, Denys Vlasenko, Linus Torvalds,
	H. Peter Anvin, Andy Lutomirski, Frederic Weisbecker,
	Alexei Starovoitov, Will Drewry, Kees Cook

From: Denys Vlasenko <dvlasenk@redhat.com>

Constants such as SS+8 or SS+8-RIP are mysterious.
In most cases, SS+8 is just meant to be SIZEOF_PTREGS,
SS+8-RIP is RIP's offset in iret frame.

This patch changes some of these constants to be less mysterious.

No code changes (verified with objdump).

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: X86 ML <x86@kernel.org>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: linux-kernel@vger.kernel.org
Message-Id: <1424736744-13414-6-git-send-email-dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/include/asm/calling.h |  2 ++
 arch/x86/kernel/entry_64.S     | 28 ++++++++++++++++------------
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 4a7ceb9789a5..337423590b08 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -88,6 +88,8 @@ For 32-bit we have the following conventions - kernel is built with
 #define RSP		19*8
 #define SS		20*8
 
+#define SIZEOF_PTREGS	21*8
+
 	.macro ALLOC_PT_GPREGS_ON_STACK addskip=0
 	subq	$15*8+\addskip, %rsp
 	CFI_ADJUST_CFA_OFFSET 15*8+\addskip
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 89efcb65030d..6fcf5d1e36c9 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -164,12 +164,12 @@ ENDPROC(native_usergs_sysret64)
  * initial frame state for interrupts (and exceptions without error code)
  */
 	.macro INTR_FRAME start=1 offset=0
-	EMPTY_FRAME \start, SS+8+\offset-RIP
-	/*CFI_REL_OFFSET ss, SS+\offset-RIP*/
-	CFI_REL_OFFSET rsp, RSP+\offset-RIP
-	/*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
-	/*CFI_REL_OFFSET cs, CS+\offset-RIP*/
-	CFI_REL_OFFSET rip, RIP+\offset-RIP
+	EMPTY_FRAME \start, 5*8+\offset
+	/*CFI_REL_OFFSET ss, 4*8+\offset*/
+	CFI_REL_OFFSET rsp, 3*8+\offset
+	/*CFI_REL_OFFSET rflags, 2*8+\offset*/
+	/*CFI_REL_OFFSET cs, 1*8+\offset*/
+	CFI_REL_OFFSET rip, 0*8+\offset
 	.endm
 
 /*
@@ -177,7 +177,7 @@ ENDPROC(native_usergs_sysret64)
  * with vector already pushed)
  */
 	.macro XCPT_FRAME start=1 offset=0
-	INTR_FRAME \start, RIP+\offset-ORIG_RAX
+	INTR_FRAME \start, 1*8+\offset
 	.endm
 
 /*
@@ -645,10 +645,14 @@ END(interrupt)
 	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
 	CFI_DEF_CFA_REGISTER	rsi
 	pushq %rsi
+	/*
+	 * For debugger:
+	 * "CFA (Current Frame Address) is the value on stack + offset"
+	 */
 	CFI_ESCAPE	0x0f /* DW_CFA_def_cfa_expression */, 6, \
-			0x77 /* DW_OP_breg7 */, 0, \
+			0x77 /* DW_OP_breg7 (rsp) */, 0, \
 			0x06 /* DW_OP_deref */, \
-			0x08 /* DW_OP_const1u */, SS+8-RBP, \
+			0x08 /* DW_OP_const1u */, SIZEOF_PTREGS-RBP, \
 			0x22 /* DW_OP_plus */
 	/* We entered an interrupt context - irqs are off: */
 	TRACE_IRQS_OFF
@@ -674,7 +678,7 @@ ret_from_intr:
 
 	/* Restore saved previous stack */
 	popq %rsi
-	CFI_DEF_CFA rsi,SS+8-RBP	/* reg/off reset after def_cfa_expr */
+	CFI_DEF_CFA rsi,SIZEOF_PTREGS-RBP /* reg/off reset after def_cfa_expr */
 	/* return code expects complete pt_regs - adjust rsp accordingly: */
 	leaq -RBP(%rsi),%rsp
 	CFI_DEF_CFA_REGISTER	rsp
@@ -1546,7 +1550,7 @@ first_nmi:
 	.rept 5
 	pushq_cfi 11*8(%rsp)
 	.endr
-	CFI_DEF_CFA_OFFSET SS+8-RIP
+	CFI_DEF_CFA_OFFSET 5*8
 
 	/* Everything up to here is safe from nested NMIs */
 
@@ -1574,7 +1578,7 @@ repeat_nmi:
 	pushq_cfi -6*8(%rsp)
 	.endr
 	subq $(5*8), %rsp
-	CFI_DEF_CFA_OFFSET SS+8-RIP
+	CFI_DEF_CFA_OFFSET 5*8
 end_repeat_nmi:
 
 	/*
-- 
2.1.0


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 14/16] x86: ia32entry.S: use more understandable constant
  2015-02-26 22:40 [PATCH 00/16] x86/asm changes for 4.1 for review Andy Lutomirski
                   ` (12 preceding siblings ...)
  2015-02-26 22:40 ` [PATCH 13/16] x86: entry_64.S: use more understandable constants Andy Lutomirski
@ 2015-02-26 22:40 ` Andy Lutomirski
  2015-03-04 22:56   ` [tip:x86/asm] x86/asm/entry/64/compat: Use more readable constant tip-bot for Denys Vlasenko
  2015-02-26 22:40 ` [PATCH 15/16] x86: entry.S: simplify optimistic SYSRET Andy Lutomirski
                   ` (2 subsequent siblings)
  16 siblings, 1 reply; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-26 22:40 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Borislav Petkov, Oleg Nesterov, Denys Vlasenko, Linus Torvalds,
	Steven Rostedt, Ingo Molnar, H. Peter Anvin, Andy Lutomirski,
	Frederic Weisbecker, Alexei Starovoitov, Will Drewry, Kees Cook

From: Denys Vlasenko <dvlasenk@redhat.com>

The last instance of "mysterious" SS+8 constant is replaced by SIZEOF_PTREGS.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Steven Rostedt <rostedt@goodmis.org>
CC: Ingo Molnar <mingo@kernel.org>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: x86@kernel.org
CC: linux-kernel@vger.kernel.org
Message-Id: <1424822419-10267-1-git-send-email-dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/ia32/ia32entry.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 6dcd37256979..ed9746340363 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -536,7 +536,7 @@ ia32_ptregs_common:
 	CFI_ENDPROC
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,SS+8
+	CFI_DEF_CFA	rsp,SIZEOF_PTREGS
 	CFI_REL_OFFSET	rax,RAX
 	CFI_REL_OFFSET	rcx,RCX
 	CFI_REL_OFFSET	rdx,RDX
-- 
2.1.0


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 15/16] x86: entry.S: simplify optimistic SYSRET
  2015-02-26 22:40 [PATCH 00/16] x86/asm changes for 4.1 for review Andy Lutomirski
                   ` (13 preceding siblings ...)
  2015-02-26 22:40 ` [PATCH 14/16] x86: ia32entry.S: use more understandable constant Andy Lutomirski
@ 2015-02-26 22:40 ` Andy Lutomirski
  2015-03-04 21:40   ` Ingo Molnar
  2015-03-04 22:56   ` [tip:x86/asm] x86/asm/entry/64: Simplify " tip-bot for Denys Vlasenko
  2015-02-26 22:40 ` [PATCH 16/16] x86_64, entry: Remove a bogus ret_from_fork optimization Andy Lutomirski
  2015-03-04 21:50 ` [PATCH 00/16] x86/asm changes for 4.1 for review Ingo Molnar
  16 siblings, 2 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-26 22:40 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Borislav Petkov, Oleg Nesterov, Denys Vlasenko, Linus Torvalds,
	Steven Rostedt, Ingo Molnar, H. Peter Anvin, Andy Lutomirski,
	Frederic Weisbecker, Alexei Starovoitov, Will Drewry, Kees Cook

From: Denys Vlasenko <dvlasenk@redhat.com>

Avoid redundant load of %r11 (it is already loaded a few instructions before).
Do not needlessly increment %rsp - we are going to return to userspace
via SYSRET, this insn doesn't use stack for return.

Changes since v1: added a comment

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Steven Rostedt <rostedt@goodmis.org>
CC: Ingo Molnar <mingo@kernel.org>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: x86@kernel.org
CC: linux-kernel@vger.kernel.org
Message-Id: <1424824222-10856-1-git-send-email-dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/include/asm/calling.h | 3 +++
 arch/x86/kernel/entry_64.S     | 6 +++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 337423590b08..f1a962ff7ddf 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -176,6 +176,9 @@ For 32-bit we have the following conventions - kernel is built with
 	.macro RESTORE_C_REGS_EXCEPT_RCX
 	RESTORE_C_REGS_HELPER 1,0,1,1,1
 	.endm
+	.macro RESTORE_C_REGS_EXCEPT_R11
+	RESTORE_C_REGS_HELPER 1,1,0,1,1
+	.endm
 	.macro RESTORE_RSI_RDI
 	RESTORE_C_REGS_HELPER 0,0,0,0,0
 	.endm
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 6fcf5d1e36c9..4670fcdb5150 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -757,9 +757,9 @@ retint_swapgs:		/* return to user-space */
 	 */
 irq_return_via_sysret:
 	CFI_REMEMBER_STATE
-	RESTORE_C_REGS
-	REMOVE_PT_GPREGS_FROM_STACK 8
-	movq (RSP-RIP)(%rsp),%rsp
+	/* r11 is already restored (see code above) */
+	RESTORE_C_REGS_EXCEPT_R11
+	movq RSP(%rsp),%rsp
 	USERGS_SYSRET64
 	CFI_RESTORE_STATE
 
-- 
2.1.0


^ permalink raw reply	[flat|nested] 130+ messages in thread

* [PATCH 16/16] x86_64, entry: Remove a bogus ret_from_fork optimization
  2015-02-26 22:40 [PATCH 00/16] x86/asm changes for 4.1 for review Andy Lutomirski
                   ` (14 preceding siblings ...)
  2015-02-26 22:40 ` [PATCH 15/16] x86: entry.S: simplify optimistic SYSRET Andy Lutomirski
@ 2015-02-26 22:40 ` Andy Lutomirski
  2015-03-04 22:57   ` [tip:x86/asm] x86/asm/entry/64: Remove a bogus 'ret_from_fork' optimization tip-bot for Andy Lutomirski
  2015-03-05 11:49   ` [tip:x86/urgent] " tip-bot for Andy Lutomirski
  2015-03-04 21:50 ` [PATCH 00/16] x86/asm changes for 4.1 for review Ingo Molnar
  16 siblings, 2 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-02-26 22:40 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: Borislav Petkov, Oleg Nesterov, Denys Vlasenko, Andy Lutomirski

ret_from_fork checks TIF_IA32 to determine whether pt_regs and the
related state make sense for ret_from_sys_call.  This is entirely
the wrong check.  TS_COMPAT would make a little more sense, but
there's really no point in keeping this optimization at all.

This fixes a return to the wrong user CS if we came from int 0x80 in
a 64-bit task.

Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/kernel/entry_64.S | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 4670fcdb5150..519498d1c8ec 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -550,11 +550,14 @@ ENTRY(ret_from_fork)
 	testl $3,CS(%rsp)			# from kernel_thread?
 	jz   1f
 
-	testl $_TIF_IA32, TI_flags(%rcx)	# 32-bit compat task needs IRET
-	jnz  int_ret_from_sys_call
-
-	RESTORE_TOP_OF_STACK %rdi
-	jmp ret_from_sys_call			# go to the SYSRET fastpath
+	/*
+	 * By the time we get here, we have no idea whether our pt_regs,
+	 * ti flags, and ti status came from the 64-bit SYSCALL fast path,
+	 * the slow path, or one of the ia32entry paths.
+	 * Use int_ret_from_sys_call to return, since it can safely handle
+	 * all of the above.
+	 */
+	jmp  int_ret_from_sys_call
 
 1:
 	movq %rbp, %rdi
-- 
2.1.0


^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 15/16] x86: entry.S: simplify optimistic SYSRET
  2015-02-26 22:40 ` [PATCH 15/16] x86: entry.S: simplify optimistic SYSRET Andy Lutomirski
@ 2015-03-04 21:40   ` Ingo Molnar
  2015-03-04 22:56   ` [tip:x86/asm] x86/asm/entry/64: Simplify " tip-bot for Denys Vlasenko
  1 sibling, 0 replies; 130+ messages in thread
From: Ingo Molnar @ 2015-03-04 21:40 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: x86, linux-kernel, Borislav Petkov, Oleg Nesterov,
	Denys Vlasenko, Linus Torvalds, Steven Rostedt, H. Peter Anvin,
	Frederic Weisbecker, Alexei Starovoitov, Will Drewry, Kees Cook


* Andy Lutomirski <luto@amacapital.net> wrote:

> From: Denys Vlasenko <dvlasenk@redhat.com>
> 
> Avoid redundant load of %r11 (it is already loaded a few instructions before).

Note, this comment is incorrect:

> Do not needlessly increment %rsp - we are going to return to 
> userspace via SYSRET, this insn doesn't use stack for return.

The reason we don't need to increment %rsp because in the very next 
instruction we are restoring it, and then do we execute a SYSRET:

So instead of:

	add $0x80, %rsp
	mov 0x18(%rsp), %rsp

we can do a simplified:

	mov 0x98(%rsp), %rsp

to get the same result.

(I've clarified the changelog with this when applying the patch.)

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 00/16] x86/asm changes for 4.1 for review
  2015-02-26 22:40 [PATCH 00/16] x86/asm changes for 4.1 for review Andy Lutomirski
                   ` (15 preceding siblings ...)
  2015-02-26 22:40 ` [PATCH 16/16] x86_64, entry: Remove a bogus ret_from_fork optimization Andy Lutomirski
@ 2015-03-04 21:50 ` Ingo Molnar
  2015-03-04 21:55   ` Andy Lutomirski
  16 siblings, 1 reply; 130+ messages in thread
From: Ingo Molnar @ 2015-03-04 21:50 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: x86, linux-kernel, Borislav Petkov, Oleg Nesterov,
	Denys Vlasenko, Linus Torvalds, Thomas Gleixner, H. Peter Anvin


* Andy Lutomirski <luto@amacapital.net> wrote:

> Hi all-
> 
> After much handwringing, painful review, and curation, here's round 
> 1 of the x86/asm changes.  (This is not intended to imply that there 
> will or will not be a round 2.)
> 
> For ease of review for git users:
> 
> The following changes since commit 7453311d68f16a5c587c3cbf19563c9a4fbbd41a:
> 
>   Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (2015-02-09 17:16:44 -0800)
> 
> are available in the git repository at:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git 
> 
> for you to fetch changes up to 4710be56d76ef994ddf59087aad98c000fbab9a4:
> 
>   x86_64, entry: Remove a bogus ret_from_fork optimization (2015-02-26 07:09:36 -0800)
> 
> Ingo, if you like these, feel free to pull.  I will try to review 
> the scary change *again* this evening if I have time.  In the mean 
> time, these are queued as is for -next.  I will remove them from my 
> -next branch if they turn out to be buggy or if they end up in -tip 
> in any form.
> 
> Andy Lutomirski (2):
>   x86, entry: Remove int_check_syscall_exit_work
>   x86_64, entry: Remove a bogus ret_from_fork optimization
> 
> Denys Vlasenko (14):
>   x86: open-code register save/restore in trace_hardirqs thunks
>   x86: introduce push/pop macros which generate CFI_REL_OFFSET and
>     CFI_RESTORE
>   x86: entry_64.S: fix wrong symbolic constant usage: R11->ARGOFFSET
>   x86: entry_64.S: always allocate complete "struct pt_regs"
>   x86: entry_64.S: fix comments. No code changes
>   x86: code shrink in paranoid_exit
>   x86: mass removal of ARGOFFSET. No code changes
>   x86: add comments about various syscall instructions, no code changes
>   x86: entry_64.S: move save_paranoid and ret_from_fork closer to their
>     users
>   x86: entry_64.S: rename save_paranoid to paranoid_entry, no code
>     changes
>   x86: ia32entry.S: fold IA32_ARG_FIXUP macro into its callers
>   x86: entry_64.S: use more understandable constants
>   x86: ia32entry.S: use more understandable constant
>   x86: entry.S: simplify optimistic SYSRET
> 
>  arch/x86/ia32/ia32entry.S              | 342 ++++++++++++----------
>  arch/x86/include/asm/calling.h         | 267 ++++++++---------
>  arch/x86/include/asm/dwarf2.h          |  24 ++
>  arch/x86/include/asm/irqflags.h        |   4 +-
>  arch/x86/include/asm/ptrace.h          |  13 +-
>  arch/x86/include/uapi/asm/ptrace-abi.h |  16 +-
>  arch/x86/include/uapi/asm/ptrace.h     |  13 +-
>  arch/x86/kernel/entry_32.S             |  21 +-
>  arch/x86/kernel/entry_64.S             | 510 ++++++++++++++++-----------------
>  arch/x86/lib/atomic64_cx8_32.S         |  50 ++--
>  arch/x86/lib/checksum_32.S             |  60 ++--
>  arch/x86/lib/msr-reg.S                 |  24 +-
>  arch/x86/lib/rwsem.S                   |  44 ++-
>  arch/x86/lib/thunk_32.S                |  18 +-
>  arch/x86/lib/thunk_64.S                |  28 +-
>  15 files changed, 724 insertions(+), 710 deletions(-)

Ok, I had a really close look, and these changes are all looking good 
to me.

While reviewing the patches I also fixed up the patch titles and tons 
of small details in the changelogs, so I didn't pull but applied the 
patches out of email, to tip:x86/asm.

Thanks guys, this is really good progress and a big step forward for 
the x86 entry code's readability and maintainability!

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 130+ messages in thread

* Re: [PATCH 00/16] x86/asm changes for 4.1 for review
  2015-03-04 21:50 ` [PATCH 00/16] x86/asm changes for 4.1 for review Ingo Molnar
@ 2015-03-04 21:55   ` Andy Lutomirski
  0 siblings, 0 replies; 130+ messages in thread
From: Andy Lutomirski @ 2015-03-04 21:55 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: X86 ML, linux-kernel, Borislav Petkov, Oleg Nesterov,
	Denys Vlasenko, Linus Torvalds, Thomas Gleixner, H. Peter Anvin

On Wed, Mar 4, 2015 at 1:50 PM, Ingo Molnar <mingo@kernel.org> wrote:
>
> * Andy Lutomirski <luto@amacapital.net> wrote:
>
>> Hi all-
>>
>> After much handwringing, painful review, and curation, here's round
>> 1 of the x86/asm changes.  (This is not intended to imply that there
>> will or will not be a round 2.)
>>
>> For ease of review for git users:
>>
>> The following changes since commit 7453311d68f16a5c587c3cbf19563c9a4fbbd41a:
>>
>>   Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (2015-02-09 17:16:44 -0800)
>>
>> are available in the git repository at:
>>
>>   git://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git
>>
>> for you to fetch changes up to 4710be56d76ef994ddf59087aad98c000fbab9a4:
>>
>>   x86_64, entry: Remove a bogus ret_from_fork optimization (2015-02-26 07:09:36 -0800)
>>
>> Ingo, if you like these, feel free to pull.  I will try to review
>> the scary change *again* this evening if I have time.  In the mean
>> time, these are queued as is for -next.  I will remove them from my
>> -next branch if they turn out to be buggy or if they end up in -tip
>> in any form.
>>
>> Andy Lutomirski (2):
>>   x86, entry: Remove int_check_syscall_exit_work
>>   x86_64, entry: Remove a bogus ret_from_fork optimization
>>
>> Denys Vlasenko (14):
>>   x86: open-code register save/restore in trace_hardirqs thunks
>>   x86: introduce push/pop macros which generate CFI_REL_OFFSET and
>>     CFI_RESTORE
>>   x86: entry_64.S: fix wrong symbolic constant usage: R11->ARGOFFSET
>>   x86: entry_64.S: always allocate complete "struct pt_regs"
>>   x86: entry_64.S: fix comments. No code changes
>>   x86: code shrink in paranoid_exit
>>   x86: mass removal of ARGOFFSET. No code changes
>>   x86: add comments about various syscall instructions, no code changes
>>   x86: entry_64.S: move save_paranoid and ret_from_fork closer to their
>>     users
>>   x86: entry_64.S: rename save_paranoid to paranoid_entry, no code
>>     changes
>>   x86: ia32entry.S: fold IA32_ARG_FIXUP macro into its callers
>>   x86: entry_64.S: use more understandable constants
>>   x86: ia32entry.S: use more understandable constant
>>   x86: entry.S: simplify optimistic SYSRET
>>
>>  arch/x86/ia32/ia32entry.S              | 342 ++++++++++++----------
>>  arch/x86/include/asm/calling.h         | 267 ++++++++---------
>>  arch/x86/include/asm/dwarf2.h          |  24 ++
>>  arch/x86/include/asm/irqflags.h        |   4 +-
>>  arch/x86/include/asm/ptrace.h          |  13 +-
>>  arch/x86/include/uapi/asm/ptrace-abi.h |  16 +-
>>  arch/x86/include/uapi/asm/ptrace.h     |  13 +-
>>  arch/x86/kernel/entry_32.S             |  21 +-
>>  arch/x86/kernel/entry_64.S             | 510 ++++++++++++++++-----------------
>>  arch/x86/lib/atomic64_cx8_32.S         |  50 ++--
>>  arch/x86/lib/checksum_32.S             |  60 ++--
>>  arch/x86/lib/msr-reg.S                 |  24 +-
>>  arch/x86/lib/rwsem.S                   |  44 ++-
>>  arch/x86/lib/thunk_32.S                |  18 +-
>>  arch/x86/lib/thunk_64.S                |  28 +-
>>  15 files changed, 724 insertions(+), 710 deletions(-)
>
> Ok, I had a really close look, and these changes are all looking good
> to me.
>
> While reviewing the patches I also fixed up the patch titles and tons
> of small details in the changelogs, so I didn't pull but applied the
> patches out of email, to tip:x86/asm.
>
> Thanks guys, this is really good progress and a big step forward for
> the x86 entry code's readability and maintainability!

Thanks!

I removed my version from -next to avoid merge confusion.

--Andy

>
> Thanks,
>
>         Ingo



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 130+ messages in thread

* [tip:x86/asm] x86/asm/64: Open-code register save/ restore in trace_hardirqs*() thunks
  2015-02-26 22:40 ` [PATCH 01/16] x86: open-code register save/restore in trace_hardirqs thunks Andy Lutomirski
@ 2015-03-04 22:52   ` tip-bot for Denys Vlasenko
  0 siblings, 0 replies; 130+ messages in thread
From: tip-bot for Denys Vlasenko @ 2015-03-04 22:52 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: bp, ast, luto, dvlasenk, keescook, hpa, linux-kernel, tglx, oleg,
	wad, fweisbec, mingo, torvalds

Commit-ID:  69e8544cd0056e02965ffb5e8414fb7501a2ee2e
Gitweb:     http://git.kernel.org/tip/69e8544cd0056e02965ffb5e8414fb7501a2ee2e
Author:     Denys Vlasenko <dvlasenk@redhat.com>
AuthorDate: Thu, 26 Feb 2015 14:40:24 -0800
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 4 Mar 2015 22:50:48 +0100

x86/asm/64: Open-code register save/restore in trace_hardirqs*() thunks

This is a preparatory patch for change in "struct pt_regs"
handling in entry_64.S.

trace_hardirqs*() thunks were (ab)using a part of the
'pt_regs' handling code, namely the SAVE_ARGS/RESTORE_ARGS
macros, to save/restore registers across C function calls.

Since SAVE_ARGS is going to be changed, open-code
register saving/restoring here.

Incidentally, this removes a bit of dead code:
one SAVE_ARGS was used just to emit a CFI annotation,
but it also generated unreachable assembly instructions.

Take a page from thunk_32.S and use push/pop instructions
instead of movq, they are far shorter:
1 or 2 bytes versus 5, and no need for instructions to adjust %rsp:

   text	   data	    bss	    dec	    hex	filename
    333	     40	      0	    373	    175	thunk_64_movq.o
    104	     40	      0	    144	     90	thunk_64_push_pop.o

[ This is ugly as sin, but we'll fix up the ugliness in the next
  patch. I see no point in reordering patches just to avoid an
  ugly intermediate state.  --Andy ]

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1420927210-19738-4-git-send-email-dvlasenk@redhat.com
Link: http://lkml.kernel.org/r/4c979ad604f0f02c5ade3b3da308b53eabd5e198.1424989793.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/lib/thunk_64.S | 46 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index b30b5eb..8ec443a 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -17,9 +17,27 @@
 	CFI_STARTPROC
 
 	/* this one pushes 9 elems, the next one would be %rIP */
-	SAVE_ARGS
+	pushq_cfi %rdi
+	CFI_REL_OFFSET rdi, 0
+	pushq_cfi %rsi
+	CFI_REL_OFFSET rsi, 0
+	pushq_cfi %rdx
+	CFI_REL_OFFSET rdx, 0
+	pushq_cfi %rcx
+	CFI_REL_OFFSET rcx, 0
+	pushq_cfi %rax
+	CFI_REL_OFFSET rax, 0
+	pushq_cfi %r8
+	CFI_REL_OFFSET r8, 0
+	pushq_cfi %r9
+	CFI_REL_OFFSET r9, 0
+	pushq_cfi %r10
+	CFI_REL_OFFSET r10, 0
+	pushq_cfi %r11
+	CFI_REL_OFFSET r11, 0
 
 	.if \put_ret_addr_in_rdi
+	/* 9*8(%rsp) is return addr on stack */
 	movq_cfi_restore 9*8, rdi
 	.endif
 
@@ -45,11 +63,31 @@
 #endif
 #endif
 
-	/* SAVE_ARGS below is used only for the .cfi directives it contains. */
+#if defined(CONFIG_TRACE_IRQFLAGS) \
+ || defined(CONFIG_DEBUG_LOCK_ALLOC) \
+ || defined(CONFIG_PREEMPT)
 	CFI_STARTPROC
-	SAVE_ARGS
+	CFI_ADJUST_CFA_OFFSET 9*8
 restore:
-	RESTORE_ARGS
+	popq_cfi %r11
+	CFI_RESTORE r11
+	popq_cfi %r10
+	CFI_RESTORE r10
+	popq_cfi %r9
+	CFI_RESTORE r9
+	popq_cfi %r8
+	CFI_RESTORE r8
+	popq_cfi %rax
+	CFI_RESTORE rax
+	popq_cfi %rcx
+	CFI_RESTORE rcx
+	popq_cfi %rdx
+	CFI_RESTORE rdx
+	popq_cfi %rsi
+	CFI_RESTORE rsi
+	popq_cfi %rdi
+	CFI_RESTORE rdi
 	ret
 	CFI_ENDPROC
 	_ASM_NOKPROBE(restore)
+#endif

^ permalink raw reply	[flat|nested] 130+ messages in thread

* [tip:x86/asm] x86/asm: Introduce push/ pop macros which generate CFI_REL_OFFSET and CFI_RESTORE
  2015-02-26 22:40 ` [PATCH 02/16] x86: introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE Andy Lutomirski
@ 2015-03-04 22:52   ` tip-bot for Denys Vlasenko
  0 siblings, 0 replies; 130+ messages in thread
From: tip-bot for Denys Vlasenko @ 2015-03-04 22:52 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: oleg, bp, tglx, dvlasenk, keescook, ast, hpa, wad, fweisbec,
	mingo, luto, torvalds, linux-kernel

Commit-ID:  49db46a67bec9ca9e29ece4729a876195877af50
Gitweb:     http://git.kernel.org/tip/49db46a67bec9ca9e29ece4729a876195877af50
Author:     Denys Vlasenko <dvlasenk@redhat.com>
AuthorDate: Thu, 26 Feb 2015 14:40:25 -0800
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 4 Mar 2015 22:50:49 +0100

x86/asm: Introduce push/pop macros which generate CFI_REL_OFFSET and CFI_RESTORE

Sequences:

        pushl_cfi %reg
        CFI_REL_OFFSET reg, 0

and:

        popl_cfi %reg
        CFI_RESTORE reg

happen quite often. This patch adds macros which generate them.

No assembly changes (verified with objdump -dr vmlinux.o).

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1421017655-25561-1-git-send-email-dvlasenk@redhat.com
Link: http://lkml.kernel.org/r/2202eb90f175cf45d1b2d1c64dbb5676a8ad07ad.1424989793.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/calling.h | 42 ++++++++++-------------------
 arch/x86/include/asm/dwarf2.h  | 24 +++++++++++++++++
 arch/x86/kernel/entry_32.S     | 21 +++++----------
 arch/x86/lib/atomic64_cx8_32.S | 50 ++++++++++++++---------------------
 arch/x86/lib/checksum_32.S     | 60 ++++++++++++++----------------------------
 arch/x86/lib/msr-reg.S         | 24 ++++++++---------
 arch/x86/lib/rwsem.S           | 44 ++++++++++++++-----------------
 arch/x86/lib/thunk_32.S        | 18 +++++--------
 arch/x86/lib/thunk_64.S        | 54 +++++++++++++------------------------
 9 files changed, 141 insertions(+), 196 deletions(-)

diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 1f1297b..3c711f2a 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -210,37 +210,23 @@ For 32-bit we have the following conventions - kernel is built with
  */
 
 	.macro SAVE_ALL
-	pushl_cfi %eax
-	CFI_REL_OFFSET eax, 0
-	pushl_cfi %ebp
-	CFI_REL_OFFSET ebp, 0
-	pushl_cfi %edi
-	CFI_REL_OFFSET edi, 0
-	pushl_cfi %esi
-	CFI_REL_OFFSET esi, 0
-	pushl_cfi %edx
-	CFI_REL_OFFSET edx, 0
-	pushl_cfi %ecx
-	CFI_REL_OFFSET ecx, 0
-	pushl_cfi %ebx
-	CFI_REL_OFFSET ebx, 0
+	pushl_cfi_reg eax
+	pushl_cfi_reg ebp
+	pushl_cfi_reg edi
+	pushl_cfi_reg esi
+	pushl_cfi_reg edx
+	pushl_cfi_reg ecx
+	pushl_cfi_reg ebx
 	.endm
 
 	.macro RESTORE_ALL
-	popl_cfi %ebx
-	CFI_RESTORE ebx
-	popl_cfi %ecx
-	CFI_RESTORE ecx
-	popl_cfi %edx
-	CFI_RESTORE edx
-	popl_cfi %esi
-	CFI_RESTORE esi
-	popl_cfi %edi
-	CFI_RESTORE edi
-	popl_cfi %ebp
-	CFI_RESTORE ebp
-	popl_cfi %eax
-	CFI_RESTORE eax
+	popl_cfi_reg ebx
+	popl_cfi_reg ecx
+	popl_cfi_reg edx
+	popl_cfi_reg esi
+	popl_cfi_reg edi
+	popl_cfi_reg ebp
+	popl_cfi_reg eax
 	.endm
 
 #endif /* CONFIG_X86_64 */
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index f6f1598..de1cdaf 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -86,11 +86,23 @@
 	CFI_ADJUST_CFA_OFFSET 8
 	.endm
 
+	.macro pushq_cfi_reg reg
+	pushq %\reg
+	CFI_ADJUST_CFA_OFFSET 8
+	CFI_REL_OFFSET \reg, 0
+	.endm
+
 	.macro popq_cfi reg
 	popq \reg
 	CFI_ADJUST_CFA_OFFSET -8
 	.endm
 
+	.macro popq_cfi_reg reg
+	popq %\reg
+	CFI_ADJUST_CFA_OFFSET -8
+	CFI_RESTORE \reg
+	.endm
+
 	.macro pushfq_cfi
 	pushfq
 	CFI_ADJUST_CFA_OFFSET 8
@@ -116,11 +128,23 @@
 	CFI_ADJUST_CFA_OFFSET 4
 	.endm
 
+	.macro pushl_cfi_reg reg
+	pushl %\reg
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET \reg, 0
+